From: Kevin Lim Date: Tue, 30 May 2006 18:17:41 +0000 (-0400) Subject: Merge ktlim@zizzer:/bk/m5 X-Git-Tag: m5_2.0_beta1~36^2~112^2 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=4a5b51b516853c9fcaabc44caacdd7e8e93dc0ef;p=gem5.git Merge ktlim@zizzer:/bk/m5 into zamp.eecs.umich.edu:/z/ktlim2/clean/newmem SConstruct: src/SConscript: src/arch/SConscript: src/arch/alpha/faults.cc: src/arch/alpha/tlb.cc: src/base/traceflags.py: src/cpu/SConscript: src/cpu/base.cc: src/cpu/base.hh: src/cpu/base_dyn_inst.cc: src/cpu/cpu_exec_context.cc: src/cpu/cpu_exec_context.hh: src/cpu/exec_context.hh: src/cpu/o3/alpha_cpu.hh: src/cpu/o3/alpha_cpu_impl.hh: src/cpu/o3/alpha_dyn_inst.hh: src/cpu/o3/cpu.cc: src/cpu/o3/cpu.hh: src/cpu/o3/regfile.hh: src/cpu/ozone/cpu.hh: src/cpu/simple/base.cc: src/cpu/base_dyn_inst.hh: src/cpu/o3/2bit_local_pred.cc: src/cpu/o3/2bit_local_pred.hh: src/cpu/o3/alpha_cpu.cc: src/cpu/o3/alpha_cpu_builder.cc: src/cpu/o3/alpha_dyn_inst.cc: src/cpu/o3/alpha_dyn_inst_impl.hh: src/cpu/o3/alpha_impl.hh: src/cpu/o3/alpha_params.hh: src/cpu/o3/bpred_unit.cc: src/cpu/o3/bpred_unit.hh: src/cpu/o3/bpred_unit_impl.hh: src/cpu/o3/btb.cc: src/cpu/o3/btb.hh: src/cpu/o3/comm.hh: src/cpu/o3/commit.cc: src/cpu/o3/commit.hh: src/cpu/o3/commit_impl.hh: src/cpu/o3/cpu_policy.hh: src/cpu/o3/decode.cc: src/cpu/o3/decode.hh: src/cpu/o3/decode_impl.hh: src/cpu/o3/fetch.cc: src/cpu/o3/fetch.hh: src/cpu/o3/fetch_impl.hh: src/cpu/o3/free_list.cc: src/cpu/o3/free_list.hh: src/cpu/o3/iew.cc: src/cpu/o3/iew.hh: src/cpu/o3/iew_impl.hh: src/cpu/o3/inst_queue.cc: src/cpu/o3/inst_queue.hh: src/cpu/o3/inst_queue_impl.hh: src/cpu/o3/mem_dep_unit.cc: src/cpu/o3/mem_dep_unit.hh: src/cpu/o3/mem_dep_unit_impl.hh: src/cpu/o3/ras.cc: src/cpu/o3/ras.hh: src/cpu/o3/rename.cc: src/cpu/o3/rename.hh: src/cpu/o3/rename_impl.hh: src/cpu/o3/rename_map.cc: src/cpu/o3/rename_map.hh: src/cpu/o3/rob.cc: src/cpu/o3/rob.hh: src/cpu/o3/rob_impl.hh: src/cpu/o3/sat_counter.cc: src/cpu/o3/sat_counter.hh: src/cpu/o3/store_set.cc: src/cpu/o3/store_set.hh: src/cpu/o3/tournament_pred.cc: src/cpu/o3/tournament_pred.hh: Hand merges. --HG-- rename : build/SConstruct => SConstruct rename : SConscript => src/SConscript rename : arch/alpha/ev5.cc => src/arch/alpha/ev5.cc rename : arch/alpha/isa/decoder.isa => src/arch/alpha/isa/decoder.isa rename : arch/alpha/isa/pal.isa => src/arch/alpha/isa/pal.isa rename : base/traceflags.py => src/base/traceflags.py rename : cpu/SConscript => src/cpu/SConscript rename : cpu/base.cc => src/cpu/base.cc rename : cpu/base.hh => src/cpu/base.hh rename : cpu/base_dyn_inst.cc => src/cpu/base_dyn_inst.cc rename : cpu/base_dyn_inst.hh => src/cpu/base_dyn_inst.hh rename : cpu/cpu_exec_context.cc => src/cpu/cpu_exec_context.cc rename : cpu/cpu_exec_context.hh => src/cpu/cpu_exec_context.hh rename : cpu/cpu_models.py => src/cpu/cpu_models.py rename : cpu/exec_context.hh => src/cpu/exec_context.hh rename : cpu/exetrace.cc => src/cpu/exetrace.cc rename : cpu/exetrace.hh => src/cpu/exetrace.hh rename : cpu/inst_seq.hh => src/cpu/inst_seq.hh rename : cpu/o3/2bit_local_pred.cc => src/cpu/o3/2bit_local_pred.cc rename : cpu/o3/2bit_local_pred.hh => src/cpu/o3/2bit_local_pred.hh rename : cpu/o3/alpha_cpu.hh => src/cpu/o3/alpha_cpu.hh rename : cpu/o3/alpha_cpu_builder.cc => src/cpu/o3/alpha_cpu_builder.cc rename : cpu/o3/alpha_cpu_impl.hh => src/cpu/o3/alpha_cpu_impl.hh rename : cpu/o3/alpha_dyn_inst.hh => src/cpu/o3/alpha_dyn_inst.hh rename : cpu/o3/alpha_dyn_inst_impl.hh => src/cpu/o3/alpha_dyn_inst_impl.hh rename : cpu/o3/alpha_impl.hh => src/cpu/o3/alpha_impl.hh rename : cpu/o3/alpha_params.hh => src/cpu/o3/alpha_params.hh rename : cpu/o3/bpred_unit.cc => src/cpu/o3/bpred_unit.cc rename : cpu/o3/bpred_unit.hh => src/cpu/o3/bpred_unit.hh rename : cpu/o3/bpred_unit_impl.hh => src/cpu/o3/bpred_unit_impl.hh rename : cpu/o3/btb.cc => src/cpu/o3/btb.cc rename : cpu/o3/btb.hh => src/cpu/o3/btb.hh rename : cpu/o3/comm.hh => src/cpu/o3/comm.hh rename : cpu/o3/commit.cc => src/cpu/o3/commit.cc rename : cpu/o3/commit.hh => src/cpu/o3/commit.hh rename : cpu/o3/commit_impl.hh => src/cpu/o3/commit_impl.hh rename : cpu/o3/cpu.cc => src/cpu/o3/cpu.cc rename : cpu/o3/cpu.hh => src/cpu/o3/cpu.hh rename : cpu/o3/cpu_policy.hh => src/cpu/o3/cpu_policy.hh rename : cpu/o3/decode.cc => src/cpu/o3/decode.cc rename : cpu/o3/decode.hh => src/cpu/o3/decode.hh rename : cpu/o3/decode_impl.hh => src/cpu/o3/decode_impl.hh rename : cpu/o3/fetch.cc => src/cpu/o3/fetch.cc rename : cpu/o3/fetch.hh => src/cpu/o3/fetch.hh rename : cpu/o3/fetch_impl.hh => src/cpu/o3/fetch_impl.hh rename : cpu/o3/free_list.cc => src/cpu/o3/free_list.cc rename : cpu/o3/free_list.hh => src/cpu/o3/free_list.hh rename : cpu/o3/iew.cc => src/cpu/o3/iew.cc rename : cpu/o3/iew.hh => src/cpu/o3/iew.hh rename : cpu/o3/iew_impl.hh => src/cpu/o3/iew_impl.hh rename : cpu/o3/inst_queue.cc => src/cpu/o3/inst_queue.cc rename : cpu/o3/inst_queue.hh => src/cpu/o3/inst_queue.hh rename : cpu/o3/inst_queue_impl.hh => src/cpu/o3/inst_queue_impl.hh rename : cpu/o3/mem_dep_unit.cc => src/cpu/o3/mem_dep_unit.cc rename : cpu/o3/mem_dep_unit.hh => src/cpu/o3/mem_dep_unit.hh rename : cpu/o3/mem_dep_unit_impl.hh => src/cpu/o3/mem_dep_unit_impl.hh rename : cpu/o3/ras.cc => src/cpu/o3/ras.cc rename : cpu/o3/ras.hh => src/cpu/o3/ras.hh rename : cpu/o3/regfile.hh => src/cpu/o3/regfile.hh rename : cpu/o3/rename.cc => src/cpu/o3/rename.cc rename : cpu/o3/rename.hh => src/cpu/o3/rename.hh rename : cpu/o3/rename_impl.hh => src/cpu/o3/rename_impl.hh rename : cpu/o3/rename_map.cc => src/cpu/o3/rename_map.cc rename : cpu/o3/rename_map.hh => src/cpu/o3/rename_map.hh rename : cpu/o3/rob.hh => src/cpu/o3/rob.hh rename : cpu/o3/rob_impl.hh => src/cpu/o3/rob_impl.hh rename : cpu/o3/sat_counter.hh => src/cpu/o3/sat_counter.hh rename : cpu/o3/store_set.cc => src/cpu/o3/store_set.cc rename : cpu/o3/store_set.hh => src/cpu/o3/store_set.hh rename : cpu/o3/tournament_pred.cc => src/cpu/o3/tournament_pred.cc rename : cpu/o3/tournament_pred.hh => src/cpu/o3/tournament_pred.hh rename : cpu/ozone/cpu.cc => src/cpu/ozone/cpu.cc rename : cpu/ozone/cpu.hh => src/cpu/ozone/cpu.hh rename : cpu/ozone/cpu_impl.hh => src/cpu/ozone/cpu_impl.hh rename : cpu/static_inst.hh => src/cpu/static_inst.hh rename : kern/system_events.cc => src/kern/system_events.cc rename : kern/tru64/tru64.hh => src/kern/tru64/tru64.hh rename : python/m5/objects/AlphaFullCPU.py => src/python/m5/objects/AlphaFullCPU.py rename : sim/pseudo_inst.cc => src/sim/pseudo_inst.cc extra : convert_revision : ff351fc0e3a7c0f23e59fdbec33d8209eb9280be --- 4a5b51b516853c9fcaabc44caacdd7e8e93dc0ef diff --cc SConstruct index cbbcb07a6,000000000..f2a41400c mode 100644,000000..100644 --- a/SConstruct +++ b/SConstruct @@@ -1,437 -1,0 +1,438 @@@ +# -*- mode:python -*- + +# Copyright (c) 2004-2005 The Regents of The University of Michigan +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +################################################### +# +# SCons top-level build description (SConstruct) file. +# +# While in this directory ('m5'), just type 'scons' to build the default +# configuration (see below), or type 'scons build//' +# to build some other configuration (e.g., 'build/ALPHA_FS/m5.opt' for +# the optimized full-system version). +# +# You can build M5 in a different directory as long as there is a +# 'build/' somewhere along the target path. The build system +# expdects that all configs under the same build directory are being +# built for the same host system. +# +# Examples: +# These two commands are equivalent. The '-u' option tells scons to +# search up the directory tree for this SConstruct file. +# % cd /m5 ; scons build/ALPHA_FS/m5.debug +# % cd /m5/build/ALPHA_FS; scons -u m5.debug +# These two commands are equivalent and demonstrate building in a +# directory outside of the source tree. The '-C' option tells scons +# to chdir to the specified directory to find this SConstruct file. +# % cd /m5 ; scons /local/foo/build/ALPHA_FS/m5.debug +# % cd /local/foo/build/ALPHA_FS; scons -C /m5 m5.debug +# +# You can use 'scons -H' to print scons options. If you're in this +# 'm5' directory (or use -u or -C to tell scons where to find this +# file), you can use 'scons -h' to print all the M5-specific build +# options as well. +# +################################################### + +# Python library imports +import sys +import os + +# Check for recent-enough Python and SCons versions +EnsurePythonVersion(2,3) + +# Ironically, SCons 0.96 dies if you give EnsureSconsVersion a +# 3-element version number. +min_scons_version = (0,96,91) +try: + EnsureSConsVersion(*min_scons_version) +except: + print "Error checking current SCons version." + print "SCons", ".".join(map(str,min_scons_version)), "or greater required." + Exit(2) + + +# The absolute path to the current directory (where this file lives). +ROOT = Dir('.').abspath + +# Paths to the M5 and external source trees. +SRCDIR = os.path.join(ROOT, 'src') + +# tell python where to find m5 python code +sys.path.append(os.path.join(ROOT, 'src/python')) + +################################################### +# +# Figure out which configurations to set up based on the path(s) of +# the target(s). +# +################################################### + +# Find default configuration & binary. +Default(os.environ.get('M5_DEFAULT_BINARY', 'build/ALPHA_SE/m5.debug')) + +# Ask SCons which directory it was invoked from. +launch_dir = GetLaunchDir() + +# Make targets relative to invocation directory +abs_targets = map(lambda x: os.path.normpath(os.path.join(launch_dir, str(x))), + BUILD_TARGETS) + +# helper function: find last occurrence of element in list +def rfind(l, elt, offs = -1): + for i in range(len(l)+offs, 0, -1): + if l[i] == elt: + return i + raise ValueError, "element not found" + +# Each target must have 'build' in the interior of the path; the +# directory below this will determine the build parameters. For +# example, for target 'foo/bar/build/ALPHA_SE/arch/alpha/blah.do' we +# recognize that ALPHA_SE specifies the configuration because it +# follow 'build' in the bulid path. + +# Generate a list of the unique build roots and configs that the +# collected targets reference. +build_paths = [] +build_root = None +for t in abs_targets: + path_dirs = t.split('/') + try: + build_top = rfind(path_dirs, 'build', -2) + except: + print "Error: no non-leaf 'build' dir found on target path", t + Exit(1) + this_build_root = os.path.join('/',*path_dirs[:build_top+1]) + if not build_root: + build_root = this_build_root + else: + if this_build_root != build_root: + print "Error: build targets not under same build root\n"\ + " %s\n %s" % (build_root, this_build_root) + Exit(1) + build_path = os.path.join('/',*path_dirs[:build_top+2]) + if build_path not in build_paths: + build_paths.append(build_path) + +################################################### +# +# Set up the default build environment. This environment is copied +# and modified according to each selected configuration. +# +################################################### + +env = Environment(ENV = os.environ, # inherit user's environment vars + ROOT = ROOT, + SRCDIR = SRCDIR) + +env.SConsignFile("sconsign") + +# I waffle on this setting... it does avoid a few painful but +# unnecessary builds, but it also seems to make trivial builds take +# noticeably longer. +if False: + env.TargetSignatures('content') + +# M5_PLY is used by isa_parser.py to find the PLY package. +env.Append(ENV = { 'M5_PLY' : Dir('ext/ply') }) + +# Set up default C++ compiler flags +env.Append(CCFLAGS='-pipe') +env.Append(CCFLAGS='-fno-strict-aliasing') +env.Append(CCFLAGS=Split('-Wall -Wno-sign-compare -Werror -Wundef')) +if sys.platform == 'cygwin': + # cygwin has some header file issues... + env.Append(CCFLAGS=Split("-Wno-uninitialized")) +env.Append(CPPPATH=[Dir('ext/dnet')]) + +# Default libraries +env.Append(LIBS=['z']) + +# Platform-specific configuration. Note again that we assume that all +# builds under a given build root run on the same host platform. +conf = Configure(env, + conf_dir = os.path.join(build_root, '.scons_config'), + log_file = os.path.join(build_root, 'scons_config.log')) + +# Check for (C99 FP environment control) +have_fenv = conf.CheckHeader('fenv.h', '<>') +if not have_fenv: + print "Warning: Header file not found." + print " This host has no IEEE FP rounding mode control." + +# Check for mysql. +mysql_config = WhereIs('mysql_config') +have_mysql = mysql_config != None + +# Check MySQL version. +if have_mysql: + mysql_version = os.popen(mysql_config + ' --version').read() + mysql_version = mysql_version.split('.') + mysql_major = int(mysql_version[0]) + mysql_minor = int(mysql_version[1]) + # This version check is probably overly conservative, but it deals + # with the versions we have installed. + if mysql_major < 4 or (mysql_major == 4 and mysql_minor < 1): + print "Warning: MySQL v4.1 or newer required." + have_mysql = False + +# Set up mysql_config commands. +if have_mysql: + mysql_config_include = mysql_config + ' --include' + if os.system(mysql_config_include + ' > /dev/null') != 0: + # older mysql_config versions don't support --include, use + # --cflags instead + mysql_config_include = mysql_config + ' --cflags | sed s/\\\'//g' + # This seems to work in all versions + mysql_config_libs = mysql_config + ' --libs' + +env = conf.Finish() + +# Define the universe of supported ISAs +env['ALL_ISA_LIST'] = ['alpha', 'sparc', 'mips'] + +# Define the universe of supported CPU models +env['ALL_CPU_LIST'] = ['AtomicSimpleCPU', 'TimingSimpleCPU', - 'FullCPU', 'AlphaFullCPU'] ++ 'FullCPU', 'AlphaFullCPU', ++ 'OzoneSimpleCPU', 'OzoneCPU', 'CheckerCPU'] + +# Sticky options get saved in the options file so they persist from +# one invocation to the next (unless overridden, in which case the new +# value becomes sticky). +sticky_opts = Options(args=ARGUMENTS) +sticky_opts.AddOptions( + EnumOption('TARGET_ISA', 'Target ISA', 'alpha', env['ALL_ISA_LIST']), + BoolOption('FULL_SYSTEM', 'Full-system support', False), + # There's a bug in scons 0.96.1 that causes ListOptions with list + # values (more than one value) not to be able to be restored from + # a saved option file. If this causes trouble then upgrade to + # scons 0.96.90 or later. + ListOption('CPU_MODELS', 'CPU models', 'AtomicSimpleCPU,TimingSimpleCPU', + env['ALL_CPU_LIST']), + BoolOption('ALPHA_TLASER', + 'Model Alpha TurboLaser platform (vs. Tsunami)', False), + BoolOption('NO_FAST_ALLOC', 'Disable fast object allocator', False), + BoolOption('EFENCE', 'Link with Electric Fence malloc debugger', + False), + BoolOption('SS_COMPATIBLE_FP', + 'Make floating-point results compatible with SimpleScalar', + False), + BoolOption('USE_SSE2', + 'Compile for SSE2 (-msse2) to get IEEE FP on x86 hosts', + False), + BoolOption('STATS_BINNING', 'Bin statistics by CPU mode', have_mysql), + BoolOption('USE_MYSQL', 'Use MySQL for stats output', have_mysql), + BoolOption('USE_FENV', 'Use IEEE mode control', have_fenv), + ('CC', 'C compiler', os.environ.get('CC', env['CC'])), + ('CXX', 'C++ compiler', os.environ.get('CXX', env['CXX'])), + BoolOption('BATCH', 'Use batch pool for build and tests', False), + ('BATCH_CMD', 'Batch pool submission command name', 'qdo') + ) + +# Non-sticky options only apply to the current build. +nonsticky_opts = Options(args=ARGUMENTS) +nonsticky_opts.AddOptions( + BoolOption('update_ref', 'Update test reference outputs', False) + ) + +# These options get exported to #defines in config/*.hh (see m5/SConscript). +env.ExportOptions = ['FULL_SYSTEM', 'ALPHA_TLASER', 'USE_FENV', \ + 'USE_MYSQL', 'NO_FAST_ALLOC', 'SS_COMPATIBLE_FP', \ + 'STATS_BINNING'] + +# Define a handy 'no-op' action +def no_action(target, source, env): + return 0 + +env.NoAction = Action(no_action, None) + +################################################### +# +# Define a SCons builder for configuration flag headers. +# +################################################### + +# This function generates a config header file that #defines the +# option symbol to the current option setting (0 or 1). The source +# operands are the name of the option and a Value node containing the +# value of the option. +def build_config_file(target, source, env): + (option, value) = [s.get_contents() for s in source] + f = file(str(target[0]), 'w') + print >> f, '#define', option, value + f.close() + return None + +# Generate the message to be printed when building the config file. +def build_config_file_string(target, source, env): + (option, value) = [s.get_contents() for s in source] + return "Defining %s as %s in %s." % (option, value, target[0]) + +# Combine the two functions into a scons Action object. +config_action = Action(build_config_file, build_config_file_string) + +# The emitter munges the source & target node lists to reflect what +# we're really doing. +def config_emitter(target, source, env): + # extract option name from Builder arg + option = str(target[0]) + # True target is config header file + target = os.path.join('config', option.lower() + '.hh') + # Force value to 0/1 even if it's a Python bool + val = int(eval(str(env[option]))) + # Sources are option name & value (packaged in SCons Value nodes) + return ([target], [Value(option), Value(val)]) + +config_builder = Builder(emitter = config_emitter, action = config_action) + +env.Append(BUILDERS = { 'ConfigFile' : config_builder }) + +# base help text +help_text = ''' +Usage: scons [scons options] [build options] [target(s)] + +''' + +# libelf build is shared across all configs in the build root. +env.SConscript('ext/libelf/SConscript', + build_dir = os.path.join(build_root, 'libelf'), + exports = 'env') + +################################################### +# +# Define build environments for selected configurations. +# +################################################### + +# rename base env +base_env = env + +for build_path in build_paths: + print "Building in", build_path + # build_dir is the tail component of build path, and is used to + # determine the build parameters (e.g., 'ALPHA_SE') + (build_root, build_dir) = os.path.split(build_path) + # Make a copy of the build-root environment to use for this config. + env = base_env.Copy() + + # Set env options according to the build directory config. + sticky_opts.files = [] + # Options for $BUILD_ROOT/$BUILD_DIR are stored in + # $BUILD_ROOT/options/$BUILD_DIR so you can nuke + # $BUILD_ROOT/$BUILD_DIR without losing your options settings. + current_opts_file = os.path.join(build_root, 'options', build_dir) + if os.path.isfile(current_opts_file): + sticky_opts.files.append(current_opts_file) + print "Using saved options file %s" % current_opts_file + else: + # Build dir-specific options file doesn't exist. + + # Make sure the directory is there so we can create it later + opt_dir = os.path.dirname(current_opts_file) + if not os.path.isdir(opt_dir): + os.mkdir(opt_dir) + + # Get default build options from source tree. Options are + # normally determined by name of $BUILD_DIR, but can be + # overriden by 'default=' arg on command line. + default_opts_file = os.path.join('build_opts', + ARGUMENTS.get('default', build_dir)) + if os.path.isfile(default_opts_file): + sticky_opts.files.append(default_opts_file) + print "Options file %s not found,\n using defaults in %s" \ + % (current_opts_file, default_opts_file) + else: + print "Error: cannot find options file %s or %s" \ + % (current_opts_file, default_opts_file) + Exit(1) + + # Apply current option settings to env + sticky_opts.Update(env) + nonsticky_opts.Update(env) + + help_text += "Sticky options for %s:\n" % build_dir \ + + sticky_opts.GenerateHelpText(env) \ + + "\nNon-sticky options for %s:\n" % build_dir \ + + nonsticky_opts.GenerateHelpText(env) + + # Process option settings. + + if not have_fenv and env['USE_FENV']: + print "Warning: not available; " \ + "forcing USE_FENV to False in", build_dir + "." + env['USE_FENV'] = False + + if not env['USE_FENV']: + print "Warning: No IEEE FP rounding mode control in", build_dir + "." + print " FP results may deviate slightly from other platforms." + + if env['EFENCE']: + env.Append(LIBS=['efence']) + + if env['USE_MYSQL']: + if not have_mysql: + print "Warning: MySQL not available; " \ + "forcing USE_MYSQL to False in", build_dir + "." + env['USE_MYSQL'] = False + else: + print "Compiling in", build_dir, "with MySQL support." + env.ParseConfig(mysql_config_libs) + env.ParseConfig(mysql_config_include) + + # Save sticky option settings back to current options file + sticky_opts.Save(current_opts_file, env) + + # Do this after we save setting back, or else we'll tack on an + # extra 'qdo' every time we run scons. + if env['BATCH']: + env['CC'] = env['BATCH_CMD'] + ' ' + env['CC'] + env['CXX'] = env['BATCH_CMD'] + ' ' + env['CXX'] + + if env['USE_SSE2']: + env.Append(CCFLAGS='-msse2') + + # The m5/SConscript file sets up the build rules in 'env' according + # to the configured options. It returns a list of environments, + # one for each variant build (debug, opt, etc.) + envList = SConscript('src/SConscript', build_dir = build_path, + exports = 'env', duplicate = False) + + # Set up the regression tests for each build. +# for e in envList: +# SConscript('m5-test/SConscript', +# build_dir = os.path.join(build_dir, 'test', e.Label), +# exports = { 'env' : e }, duplicate = False) + +Help(help_text) + +################################################### +# +# Let SCons do its thing. At this point SCons will use the defined +# build environments to build the requested targets. +# +################################################### + diff --cc src/SConscript index 43bd5d102,000000000..268bcc745 mode 100644,000000..100644 --- a/src/SConscript +++ b/src/SConscript @@@ -1,398 -1,0 +1,400 @@@ +# -*- mode:python -*- + +# Copyright (c) 2004-2005 The Regents of The University of Michigan +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import os +import sys +from os.path import isdir + +# This file defines how to build a particular configuration of M5 +# based on variable settings in the 'env' build environment. + +# Import build environment variable from SConstruct. +Import('env') + +################################################### +# +# Define needed sources. +# +################################################### + +# Base sources used by all configurations. + +base_sources = Split(''' + base/circlebuf.cc + base/copyright.cc + base/cprintf.cc + base/embedfile.cc + base/fast_alloc.cc + base/fifo_buffer.cc + base/hostinfo.cc + base/hybrid_pred.cc + base/inifile.cc + base/intmath.cc + base/match.cc + base/misc.cc + base/output.cc + base/pollevent.cc + base/range.cc + base/random.cc + base/sat_counter.cc + base/serializer.cc + base/socket.cc + base/statistics.cc + base/str.cc + base/time.cc + base/trace.cc + base/traceflags.cc + base/userinfo.cc + base/compression/lzss_compression.cc + base/loader/aout_object.cc + base/loader/ecoff_object.cc + base/loader/elf_object.cc + base/loader/object_file.cc + base/loader/symtab.cc + base/stats/events.cc + base/stats/statdb.cc + base/stats/visit.cc + base/stats/text.cc + ++ cpu/activity.cc + cpu/base.cc + cpu/cpu_exec_context.cc + cpu/cpuevent.cc + cpu/exetrace.cc + cpu/op_class.cc + cpu/pc_event.cc ++ cpu/quiesce_event.cc + cpu/static_inst.cc + cpu/sampler/sampler.cc + + mem/bridge.cc + mem/bus.cc + mem/connector.cc + mem/mem_object.cc + mem/packet.cc + mem/physical.cc + mem/port.cc + mem/request.cc + + python/pyconfig.cc + python/embedded_py.cc + + sim/builder.cc + sim/configfile.cc + sim/debug.cc + sim/eventq.cc + sim/faults.cc + sim/main.cc + sim/param.cc + sim/profile.cc + sim/root.cc + sim/serialize.cc + sim/sim_events.cc + sim/sim_exit.cc + sim/sim_object.cc + sim/startup.cc + sim/stat_context.cc + sim/stat_control.cc + sim/system.cc + sim/trace_context.cc + ''') + +# Old FullCPU sources +full_cpu_sources = Split(''' + encumbered/cpu/full/bpred.cc + encumbered/cpu/full/commit.cc + encumbered/cpu/full/cpu.cc + encumbered/cpu/full/create_vector.cc + encumbered/cpu/full/cv_spec_state.cc + encumbered/cpu/full/dd_queue.cc + encumbered/cpu/full/dep_link.cc + encumbered/cpu/full/dispatch.cc + encumbered/cpu/full/dyn_inst.cc + encumbered/cpu/full/execute.cc + encumbered/cpu/full/fetch.cc + encumbered/cpu/full/floss_reasons.cc + encumbered/cpu/full/fu_pool.cc + encumbered/cpu/full/inst_fifo.cc + encumbered/cpu/full/instpipe.cc + encumbered/cpu/full/issue.cc + encumbered/cpu/full/ls_queue.cc + encumbered/cpu/full/machine_queue.cc + encumbered/cpu/full/pipetrace.cc + encumbered/cpu/full/readyq.cc + encumbered/cpu/full/reg_info.cc + encumbered/cpu/full/rob_station.cc + encumbered/cpu/full/spec_memory.cc + encumbered/cpu/full/spec_state.cc + encumbered/cpu/full/storebuffer.cc + encumbered/cpu/full/writeback.cc + encumbered/cpu/full/iq/iq_station.cc + encumbered/cpu/full/iq/iqueue.cc + encumbered/cpu/full/iq/segmented/chain_info.cc + encumbered/cpu/full/iq/segmented/chain_wire.cc + encumbered/cpu/full/iq/segmented/iq_seg.cc + encumbered/cpu/full/iq/segmented/iq_segmented.cc + encumbered/cpu/full/iq/segmented/seg_chain.cc + encumbered/cpu/full/iq/seznec/iq_seznec.cc + encumbered/cpu/full/iq/standard/iq_standard.cc + ''') + +trace_reader_sources = Split(''' + cpu/trace/reader/mem_trace_reader.cc + cpu/trace/reader/ibm_reader.cc + cpu/trace/reader/itx_reader.cc + cpu/trace/reader/m5_reader.cc + cpu/trace/opt_cpu.cc + cpu/trace/trace_cpu.cc + ''') + + + +# MySql sources +mysql_sources = Split(''' + base/mysql.cc + base/stats/mysql.cc + ''') + +# Full-system sources +full_system_sources = Split(''' + base/crc.cc + base/inet.cc + base/remote_gdb.cc + + cpu/intr_control.cc + cpu/profile.cc + + dev/alpha_console.cc + dev/baddev.cc + dev/disk_image.cc + dev/etherbus.cc + dev/etherdump.cc + dev/etherint.cc + dev/etherlink.cc + dev/etherpkt.cc + dev/ethertap.cc + dev/ide_ctrl.cc + dev/ide_disk.cc + dev/io_device.cc + dev/isa_fake.cc + dev/ns_gige.cc + dev/pciconfigall.cc + dev/pcidev.cc + dev/pcifake.cc + dev/pktfifo.cc + dev/platform.cc + dev/simconsole.cc + dev/simple_disk.cc + dev/sinic.cc + dev/tsunami.cc + dev/tsunami_cchip.cc + dev/tsunami_io.cc + dev/tsunami_fake.cc + dev/tsunami_pchip.cc + + dev/uart.cc + dev/uart8250.cc + + kern/kernel_binning.cc + kern/kernel_stats.cc + kern/system_events.cc + kern/linux/events.cc + kern/linux/linux_syscalls.cc + kern/linux/printk.cc + + mem/vport.cc + + sim/pseudo_inst.cc + ''') + + +if env['TARGET_ISA'] == 'alpha': + full_system_sources += Split(''' + kern/tru64/dump_mbuf.cc + kern/tru64/printf.cc + kern/tru64/tru64_events.cc + kern/tru64/tru64_syscalls.cc + ''') + +# turbolaser encumbered sources +turbolaser_sources = Split(''' + encumbered/dev/dma.cc + encumbered/dev/etherdev.cc + encumbered/dev/scsi.cc + encumbered/dev/scsi_ctrl.cc + encumbered/dev/scsi_disk.cc + encumbered/dev/scsi_none.cc + encumbered/dev/tlaser_clock.cc + encumbered/dev/tlaser_ipi.cc + encumbered/dev/tlaser_mbox.cc + encumbered/dev/tlaser_mc146818.cc + encumbered/dev/tlaser_node.cc + encumbered/dev/tlaser_pcia.cc + encumbered/dev/tlaser_pcidev.cc + encumbered/dev/tlaser_serial.cc + encumbered/dev/turbolaser.cc + encumbered/dev/uart8530.cc + ''') + +# Syscall emulation (non-full-system) sources +syscall_emulation_sources = Split(''' + mem/translating_port.cc + mem/page_table.cc + sim/process.cc + sim/syscall_emul.cc + ''') + +#if env['TARGET_ISA'] == 'alpha': +# syscall_emulation_sources += Split(''' +# kern/tru64/tru64.cc +# ''') + +alpha_eio_sources = Split(''' + encumbered/eio/exolex.cc + encumbered/eio/libexo.cc + encumbered/eio/eio.cc + ''') + +if env['TARGET_ISA'] == 'ALPHA_ISA': + syscall_emulation_sources += alpha_eio_sources + +memtest_sources = Split(''' + cpu/memtest/memtest.cc + ''') + +# Add a flag defining what THE_ISA should be for all compilation +env.Append(CPPDEFINES=[('THE_ISA','%s_ISA' % env['TARGET_ISA'].upper())]) + +arch_sources = SConscript('arch/SConscript', + exports = 'env', duplicate = False) + +cpu_sources = SConscript('cpu/SConscript', + exports = 'env', duplicate = False) + +# This is outside of cpu/SConscript since the source directory isn't +# underneath 'cpu'. +if 'FullCPU' in env['CPU_MODELS']: + cpu_sources += full_cpu_sources + +# Set up complete list of sources based on configuration. +sources = base_sources + arch_sources + cpu_sources + +if env['FULL_SYSTEM']: + sources += full_system_sources + if env['ALPHA_TLASER']: + sources += turbolaser_sources +else: + sources += syscall_emulation_sources + +if env['USE_MYSQL']: + sources += mysql_sources + +for opt in env.ExportOptions: + env.ConfigFile(opt) + +################################################### +# +# Special build rules. +# +################################################### + +# base/traceflags.{cc,hh} are generated from base/traceflags.py. +# $TARGET.base will expand to "/base/traceflags". +env.Command(Split('base/traceflags.hh base/traceflags.cc'), + 'base/traceflags.py', + 'python $SOURCE $TARGET.base') + +SConscript('python/SConscript', exports = ['env'], duplicate=0) + +# This function adds the specified sources to the given build +# environment, and returns a list of all the corresponding SCons +# Object nodes (including an extra one for date.cc). We explicitly +# add the Object nodes so we can set up special dependencies for +# date.cc. +def make_objs(sources, env): + objs = [env.Object(s) for s in sources] + # make date.cc depend on all other objects so it always gets + # recompiled whenever anything else does + date_obj = env.Object('base/date.cc') + env.Depends(date_obj, objs) + objs.append(date_obj) + return objs + +################################################### +# +# Define binaries. Each different build type (debug, opt, etc.) gets +# a slightly different build environment. +# +################################################### + +# Include file paths are rooted in this directory. SCons will +# automatically expand '.' to refer to both the source directory and +# the corresponding build directory to pick up generated include +# files. +env.Append(CPPPATH='.') + +# Debug binary +debugEnv = env.Copy(OBJSUFFIX='.do') +debugEnv.Label = 'debug' +debugEnv.Append(CCFLAGS=Split('-g3 -gdwarf-2 -O0')) +debugEnv.Append(CPPDEFINES='DEBUG') +tlist = debugEnv.Program(target = 'm5.debug', + source = make_objs(sources, debugEnv)) +debugEnv.M5Binary = tlist[0] + +# Optimized binary +optEnv = env.Copy() +optEnv.Label = 'opt' +optEnv.Append(CCFLAGS=Split('-g -O3')) +tlist = optEnv.Program(target = 'm5.opt', + source = make_objs(sources, optEnv)) +optEnv.M5Binary = tlist[0] + +# "Fast" binary +fastEnv = env.Copy(OBJSUFFIX='.fo') +fastEnv.Label = 'fast' +fastEnv.Append(CCFLAGS=Split('-O3')) +fastEnv.Append(CPPDEFINES='NDEBUG') +fastEnv.Program(target = 'm5.fast.unstripped', + source = make_objs(sources, fastEnv)) +tlist = fastEnv.Command(target = 'm5.fast', + source = 'm5.fast.unstripped', + action = 'strip $SOURCE -o $TARGET') +fastEnv.M5Binary = tlist[0] + +# Profiled binary +profEnv = env.Copy(OBJSUFFIX='.po') +profEnv.Label = 'prof' +profEnv.Append(CCFLAGS=Split('-O3 -g -pg'), LINKFLAGS='-pg') +tlist = profEnv.Program(target = 'm5.prof', + source = make_objs(sources, profEnv)) +profEnv.M5Binary = tlist[0] + +envList = [debugEnv, optEnv, fastEnv, profEnv] + +Return('envList') diff --cc src/arch/alpha/ev5.cc index 12f7659e6,000000000..a242282ec mode 100644,000000..100644 --- a/src/arch/alpha/ev5.cc +++ b/src/arch/alpha/ev5.cc @@@ -1,577 -1,0 +1,584 @@@ +/* + * Copyright (c) 2002-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "arch/alpha/tlb.hh" +#include "arch/alpha/isa_traits.hh" +#include "arch/alpha/osfpal.hh" +#include "base/kgdb.h" +#include "base/remote_gdb.hh" +#include "base/stats/events.hh" +#include "config/full_system.hh" +#include "cpu/base.hh" +#include "cpu/cpu_exec_context.hh" +#include "cpu/exec_context.hh" +#include "kern/kernel_stats.hh" +#include "sim/debug.hh" +#include "sim/sim_events.hh" + +#if FULL_SYSTEM + +using namespace EV5; + +//////////////////////////////////////////////////////////////////////// +// +// Machine dependent functions +// +void +AlphaISA::initCPU(ExecContext *xc, int cpuId) +{ + initIPRs(xc, cpuId); + + xc->setIntReg(16, cpuId); + xc->setIntReg(0, cpuId); + + xc->setPC(xc->readMiscReg(IPR_PAL_BASE) + (new ResetFault)->vect()); + xc->setNextPC(xc->readPC() + sizeof(MachInst)); +} + +//////////////////////////////////////////////////////////////////////// +// +// +// +void +AlphaISA::initIPRs(ExecContext *xc, int cpuId) +{ + for (int i = 0; i < NumInternalProcRegs; ++i) { + xc->setMiscReg(i, 0); + } + + xc->setMiscReg(IPR_PAL_BASE, PalBase); + xc->setMiscReg(IPR_MCSR, 0x6); + xc->setMiscReg(IPR_PALtemp16, cpuId); +} + + +template +void +AlphaISA::processInterrupts(CPU *cpu) +{ + //Check if there are any outstanding interrupts + //Handle the interrupts + int ipl = 0; + int summary = 0; + + cpu->checkInterrupts = false; + + if (cpu->readMiscReg(IPR_ASTRR)) + panic("asynchronous traps not implemented\n"); + + if (cpu->readMiscReg(IPR_SIRR)) { + for (int i = INTLEVEL_SOFTWARE_MIN; + i < INTLEVEL_SOFTWARE_MAX; i++) { + if (cpu->readMiscReg(IPR_SIRR) & (ULL(1) << i)) { + // See table 4-19 of the 21164 hardware reference + ipl = (i - INTLEVEL_SOFTWARE_MIN) + 1; + summary |= (ULL(1) << i); + } + } + } + + uint64_t interrupts = cpu->intr_status(); + + if (interrupts) { + for (int i = INTLEVEL_EXTERNAL_MIN; + i < INTLEVEL_EXTERNAL_MAX; i++) { + if (interrupts & (ULL(1) << i)) { + // See table 4-19 of the 21164 hardware reference + ipl = i; + summary |= (ULL(1) << i); + } + } + } + + if (ipl && ipl > cpu->readMiscReg(IPR_IPLR)) { + cpu->setMiscReg(IPR_ISR, summary); + cpu->setMiscReg(IPR_INTID, ipl); + cpu->trap(new InterruptFault); + DPRINTF(Flow, "Interrupt! IPLR=%d ipl=%d summary=%x\n", + cpu->readMiscReg(IPR_IPLR), ipl, summary); + } + +} + +template +void +AlphaISA::zeroRegisters(CPU *cpu) +{ + // Insure ISA semantics + // (no longer very clean due to the change in setIntReg() in the + // cpu model. Consider changing later.) + cpu->cpuXC->setIntReg(ZeroReg, 0); + cpu->cpuXC->setFloatReg(ZeroReg, 0.0); +} + +Fault +CPUExecContext::hwrei() +{ + if (!inPalMode()) + return new UnimplementedOpcodeFault; + + setNextPC(readMiscReg(AlphaISA::IPR_EXC_ADDR)); + + if (!misspeculating()) { - cpu->kernelStats->hwrei(); ++ if (kernelStats) ++ kernelStats->hwrei(); + + cpu->checkInterrupts = true; + } + + // FIXME: XXX check for interrupts? XXX + return NoFault; +} + +int +AlphaISA::MiscRegFile::getInstAsid() +{ + return EV5::ITB_ASN_ASN(ipr[IPR_ITB_ASN]); +} + +int +AlphaISA::MiscRegFile::getDataAsid() +{ + return EV5::DTB_ASN_ASN(ipr[IPR_DTB_ASN]); +} + +AlphaISA::MiscReg +AlphaISA::MiscRegFile::readIpr(int idx, Fault &fault, ExecContext *xc) +{ + uint64_t retval = 0; // return value, default 0 + + switch (idx) { + case AlphaISA::IPR_PALtemp0: + case AlphaISA::IPR_PALtemp1: + case AlphaISA::IPR_PALtemp2: + case AlphaISA::IPR_PALtemp3: + case AlphaISA::IPR_PALtemp4: + case AlphaISA::IPR_PALtemp5: + case AlphaISA::IPR_PALtemp6: + case AlphaISA::IPR_PALtemp7: + case AlphaISA::IPR_PALtemp8: + case AlphaISA::IPR_PALtemp9: + case AlphaISA::IPR_PALtemp10: + case AlphaISA::IPR_PALtemp11: + case AlphaISA::IPR_PALtemp12: + case AlphaISA::IPR_PALtemp13: + case AlphaISA::IPR_PALtemp14: + case AlphaISA::IPR_PALtemp15: + case AlphaISA::IPR_PALtemp16: + case AlphaISA::IPR_PALtemp17: + case AlphaISA::IPR_PALtemp18: + case AlphaISA::IPR_PALtemp19: + case AlphaISA::IPR_PALtemp20: + case AlphaISA::IPR_PALtemp21: + case AlphaISA::IPR_PALtemp22: + case AlphaISA::IPR_PALtemp23: + case AlphaISA::IPR_PAL_BASE: + + case AlphaISA::IPR_IVPTBR: + case AlphaISA::IPR_DC_MODE: + case AlphaISA::IPR_MAF_MODE: + case AlphaISA::IPR_ISR: + case AlphaISA::IPR_EXC_ADDR: + case AlphaISA::IPR_IC_PERR_STAT: + case AlphaISA::IPR_DC_PERR_STAT: + case AlphaISA::IPR_MCSR: + case AlphaISA::IPR_ASTRR: + case AlphaISA::IPR_ASTER: + case AlphaISA::IPR_SIRR: + case AlphaISA::IPR_ICSR: + case AlphaISA::IPR_ICM: + case AlphaISA::IPR_DTB_CM: + case AlphaISA::IPR_IPLR: + case AlphaISA::IPR_INTID: + case AlphaISA::IPR_PMCTR: + // no side-effect + retval = ipr[idx]; + break; + + case AlphaISA::IPR_CC: + retval |= ipr[idx] & ULL(0xffffffff00000000); + retval |= xc->getCpuPtr()->curCycle() & ULL(0x00000000ffffffff); + break; + + case AlphaISA::IPR_VA: + retval = ipr[idx]; + break; + + case AlphaISA::IPR_VA_FORM: + case AlphaISA::IPR_MM_STAT: + case AlphaISA::IPR_IFAULT_VA_FORM: + case AlphaISA::IPR_EXC_MASK: + case AlphaISA::IPR_EXC_SUM: + retval = ipr[idx]; + break; + + case AlphaISA::IPR_DTB_PTE: + { + AlphaISA::PTE &pte = xc->getDTBPtr()->index(!xc->misspeculating()); + + retval |= ((u_int64_t)pte.ppn & ULL(0x7ffffff)) << 32; + retval |= ((u_int64_t)pte.xre & ULL(0xf)) << 8; + retval |= ((u_int64_t)pte.xwe & ULL(0xf)) << 12; + retval |= ((u_int64_t)pte.fonr & ULL(0x1)) << 1; + retval |= ((u_int64_t)pte.fonw & ULL(0x1))<< 2; + retval |= ((u_int64_t)pte.asma & ULL(0x1)) << 4; + retval |= ((u_int64_t)pte.asn & ULL(0x7f)) << 57; + } + break; + + // write only registers + case AlphaISA::IPR_HWINT_CLR: + case AlphaISA::IPR_SL_XMIT: + case AlphaISA::IPR_DC_FLUSH: + case AlphaISA::IPR_IC_FLUSH: + case AlphaISA::IPR_ALT_MODE: + case AlphaISA::IPR_DTB_IA: + case AlphaISA::IPR_DTB_IAP: + case AlphaISA::IPR_ITB_IA: + case AlphaISA::IPR_ITB_IAP: + fault = new UnimplementedOpcodeFault; + break; + + default: + // invalid IPR + fault = new UnimplementedOpcodeFault; + break; + } + + return retval; +} + +#ifdef DEBUG +// Cause the simulator to break when changing to the following IPL +int break_ipl = -1; +#endif + +Fault +AlphaISA::MiscRegFile::setIpr(int idx, uint64_t val, ExecContext *xc) +{ + uint64_t old; + + if (xc->misspeculating()) + return NoFault; + + switch (idx) { + case AlphaISA::IPR_PALtemp0: + case AlphaISA::IPR_PALtemp1: + case AlphaISA::IPR_PALtemp2: + case AlphaISA::IPR_PALtemp3: + case AlphaISA::IPR_PALtemp4: + case AlphaISA::IPR_PALtemp5: + case AlphaISA::IPR_PALtemp6: + case AlphaISA::IPR_PALtemp7: + case AlphaISA::IPR_PALtemp8: + case AlphaISA::IPR_PALtemp9: + case AlphaISA::IPR_PALtemp10: + case AlphaISA::IPR_PALtemp11: + case AlphaISA::IPR_PALtemp12: + case AlphaISA::IPR_PALtemp13: + case AlphaISA::IPR_PALtemp14: + case AlphaISA::IPR_PALtemp15: + case AlphaISA::IPR_PALtemp16: + case AlphaISA::IPR_PALtemp17: + case AlphaISA::IPR_PALtemp18: + case AlphaISA::IPR_PALtemp19: + case AlphaISA::IPR_PALtemp20: + case AlphaISA::IPR_PALtemp21: + case AlphaISA::IPR_PALtemp22: + case AlphaISA::IPR_PAL_BASE: + case AlphaISA::IPR_IC_PERR_STAT: + case AlphaISA::IPR_DC_PERR_STAT: + case AlphaISA::IPR_PMCTR: + // write entire quad w/ no side-effect + ipr[idx] = val; + break; + + case AlphaISA::IPR_CC_CTL: + // This IPR resets the cycle counter. We assume this only + // happens once... let's verify that. + assert(ipr[idx] == 0); + ipr[idx] = 1; + break; + + case AlphaISA::IPR_CC: + // This IPR only writes the upper 64 bits. It's ok to write + // all 64 here since we mask out the lower 32 in rpcc (see + // isa_desc). + ipr[idx] = val; + break; + + case AlphaISA::IPR_PALtemp23: + // write entire quad w/ no side-effect + old = ipr[idx]; + ipr[idx] = val; - xc->getCpuPtr()->kernelStats->context(old, val, xc); ++ if (xc->getKernelStats()) ++ xc->getKernelStats()->context(old, val, xc); + break; + + case AlphaISA::IPR_DTB_PTE: + // write entire quad w/ no side-effect, tag is forthcoming + ipr[idx] = val; + break; + + case AlphaISA::IPR_EXC_ADDR: + // second least significant bit in PC is always zero + ipr[idx] = val & ~2; + break; + + case AlphaISA::IPR_ASTRR: + case AlphaISA::IPR_ASTER: + // only write least significant four bits - privilege mask + ipr[idx] = val & 0xf; + break; + + case AlphaISA::IPR_IPLR: +#ifdef DEBUG + if (break_ipl != -1 && break_ipl == (val & 0x1f)) + debug_break(); +#endif + + // only write least significant five bits - interrupt level + ipr[idx] = val & 0x1f; - xc->getCpuPtr()->kernelStats->swpipl(ipr[idx]); ++ if (xc->getKernelStats()) ++ xc->getKernelStats()->swpipl(ipr[idx]); + break; + + case AlphaISA::IPR_DTB_CM: - if (val & 0x18) - xc->getCpuPtr()->kernelStats->mode(Kernel::user, xc); - else - xc->getCpuPtr()->kernelStats->mode(Kernel::kernel, xc); ++ if (val & 0x18) { ++ if (xc->getKernelStats()) ++ xc->getKernelStats()->mode(Kernel::user, xc); ++ } else { ++ if (xc->getKernelStats()) ++ xc->getKernelStats()->mode(Kernel::kernel, xc); ++ } + + case AlphaISA::IPR_ICM: + // only write two mode bits - processor mode + ipr[idx] = val & 0x18; + break; + + case AlphaISA::IPR_ALT_MODE: + // only write two mode bits - processor mode + ipr[idx] = val & 0x18; + break; + + case AlphaISA::IPR_MCSR: + // more here after optimization... + ipr[idx] = val; + break; + + case AlphaISA::IPR_SIRR: + // only write software interrupt mask + ipr[idx] = val & 0x7fff0; + break; + + case AlphaISA::IPR_ICSR: + ipr[idx] = val & ULL(0xffffff0300); + break; + + case AlphaISA::IPR_IVPTBR: + case AlphaISA::IPR_MVPTBR: + ipr[idx] = val & ULL(0xffffffffc0000000); + break; + + case AlphaISA::IPR_DC_TEST_CTL: + ipr[idx] = val & 0x1ffb; + break; + + case AlphaISA::IPR_DC_MODE: + case AlphaISA::IPR_MAF_MODE: + ipr[idx] = val & 0x3f; + break; + + case AlphaISA::IPR_ITB_ASN: + ipr[idx] = val & 0x7f0; + break; + + case AlphaISA::IPR_DTB_ASN: + ipr[idx] = val & ULL(0xfe00000000000000); + break; + + case AlphaISA::IPR_EXC_SUM: + case AlphaISA::IPR_EXC_MASK: + // any write to this register clears it + ipr[idx] = 0; + break; + + case AlphaISA::IPR_INTID: + case AlphaISA::IPR_SL_RCV: + case AlphaISA::IPR_MM_STAT: + case AlphaISA::IPR_ITB_PTE_TEMP: + case AlphaISA::IPR_DTB_PTE_TEMP: + // read-only registers + return new UnimplementedOpcodeFault; + + case AlphaISA::IPR_HWINT_CLR: + case AlphaISA::IPR_SL_XMIT: + case AlphaISA::IPR_DC_FLUSH: + case AlphaISA::IPR_IC_FLUSH: + // the following are write only + ipr[idx] = val; + break; + + case AlphaISA::IPR_DTB_IA: + // really a control write + ipr[idx] = 0; + + xc->getDTBPtr()->flushAll(); + break; + + case AlphaISA::IPR_DTB_IAP: + // really a control write + ipr[idx] = 0; + + xc->getDTBPtr()->flushProcesses(); + break; + + case AlphaISA::IPR_DTB_IS: + // really a control write + ipr[idx] = val; + + xc->getDTBPtr()->flushAddr(val, + DTB_ASN_ASN(ipr[AlphaISA::IPR_DTB_ASN])); + break; + + case AlphaISA::IPR_DTB_TAG: { + struct AlphaISA::PTE pte; + + // FIXME: granularity hints NYI... + if (DTB_PTE_GH(ipr[AlphaISA::IPR_DTB_PTE]) != 0) + panic("PTE GH field != 0"); + + // write entire quad + ipr[idx] = val; + + // construct PTE for new entry + pte.ppn = DTB_PTE_PPN(ipr[AlphaISA::IPR_DTB_PTE]); + pte.xre = DTB_PTE_XRE(ipr[AlphaISA::IPR_DTB_PTE]); + pte.xwe = DTB_PTE_XWE(ipr[AlphaISA::IPR_DTB_PTE]); + pte.fonr = DTB_PTE_FONR(ipr[AlphaISA::IPR_DTB_PTE]); + pte.fonw = DTB_PTE_FONW(ipr[AlphaISA::IPR_DTB_PTE]); + pte.asma = DTB_PTE_ASMA(ipr[AlphaISA::IPR_DTB_PTE]); + pte.asn = DTB_ASN_ASN(ipr[AlphaISA::IPR_DTB_ASN]); + + // insert new TAG/PTE value into data TLB + xc->getDTBPtr()->insert(val, pte); + } + break; + + case AlphaISA::IPR_ITB_PTE: { + struct AlphaISA::PTE pte; + + // FIXME: granularity hints NYI... + if (ITB_PTE_GH(val) != 0) + panic("PTE GH field != 0"); + + // write entire quad + ipr[idx] = val; + + // construct PTE for new entry + pte.ppn = ITB_PTE_PPN(val); + pte.xre = ITB_PTE_XRE(val); + pte.xwe = 0; + pte.fonr = ITB_PTE_FONR(val); + pte.fonw = ITB_PTE_FONW(val); + pte.asma = ITB_PTE_ASMA(val); + pte.asn = ITB_ASN_ASN(ipr[AlphaISA::IPR_ITB_ASN]); + + // insert new TAG/PTE value into data TLB + xc->getITBPtr()->insert(ipr[AlphaISA::IPR_ITB_TAG], pte); + } + break; + + case AlphaISA::IPR_ITB_IA: + // really a control write + ipr[idx] = 0; + + xc->getITBPtr()->flushAll(); + break; + + case AlphaISA::IPR_ITB_IAP: + // really a control write + ipr[idx] = 0; + + xc->getITBPtr()->flushProcesses(); + break; + + case AlphaISA::IPR_ITB_IS: + // really a control write + ipr[idx] = val; + + xc->getITBPtr()->flushAddr(val, + ITB_ASN_ASN(ipr[AlphaISA::IPR_ITB_ASN])); + break; + + default: + // invalid IPR + return new UnimplementedOpcodeFault; + } + + // no error... + return NoFault; +} + +void +AlphaISA::copyIprs(ExecContext *src, ExecContext *dest) +{ + for (int i = IPR_Base_DepTag; i < NumInternalProcRegs; ++i) { + dest->setMiscReg(i, src->readMiscReg(i)); + } +} + +/** + * Check for special simulator handling of specific PAL calls. + * If return value is false, actual PAL call will be suppressed. + */ +bool +CPUExecContext::simPalCheck(int palFunc) +{ - cpu->kernelStats->callpal(palFunc, proxy); ++ if (kernelStats) ++ kernelStats->callpal(palFunc, proxy); + + switch (palFunc) { + case PAL::halt: + halt(); + if (--System::numSystemsRunning == 0) + new SimExitEvent("all cpus halted"); + break; + + case PAL::bpt: + case PAL::bugchk: + if (system->breakpoint()) + return false; + break; + } + + return true; +} + +#endif // FULL_SYSTEM diff --cc src/arch/alpha/isa/decoder.isa index f41c46ac1,000000000..e6b4c234f mode 100644,000000..100644 --- a/src/arch/alpha/isa/decoder.isa +++ b/src/arch/alpha/isa/decoder.isa @@@ -1,824 -1,0 +1,824 @@@ +// -*- mode:c++ -*- + +// Copyright (c) 2003-2006 The Regents of The University of Michigan +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +//////////////////////////////////////////////////////////////////// +// +// The actual decoder specification +// + +decode OPCODE default Unknown::unknown() { + + format LoadAddress { + 0x08: lda({{ Ra = Rb + disp; }}); + 0x09: ldah({{ Ra = Rb + (disp << 16); }}); + } + + format LoadOrNop { + 0x0a: ldbu({{ Ra.uq = Mem.ub; }}); + 0x0c: ldwu({{ Ra.uq = Mem.uw; }}); + 0x0b: ldq_u({{ Ra = Mem.uq; }}, ea_code = {{ EA = (Rb + disp) & ~7; }}); + 0x23: ldt({{ Fa = Mem.df; }}); + 0x2a: ldl_l({{ Ra.sl = Mem.sl; }}, mem_flags = LOCKED); + 0x2b: ldq_l({{ Ra.uq = Mem.uq; }}, mem_flags = LOCKED); + 0x20: MiscPrefetch::copy_load({{ EA = Ra; }}, + {{ fault = xc->copySrcTranslate(EA); }}, + inst_flags = [IsMemRef, IsLoad, IsCopy]); + } + + format LoadOrPrefetch { + 0x28: ldl({{ Ra.sl = Mem.sl; }}); + 0x29: ldq({{ Ra.uq = Mem.uq; }}, pf_flags = EVICT_NEXT); + // IsFloating flag on lds gets the prefetch to disassemble + // using f31 instead of r31... funcitonally it's unnecessary + 0x22: lds({{ Fa.uq = s_to_t(Mem.ul); }}, + pf_flags = PF_EXCLUSIVE, inst_flags = IsFloating); + } + + format Store { + 0x0e: stb({{ Mem.ub = Ra<7:0>; }}); + 0x0d: stw({{ Mem.uw = Ra<15:0>; }}); + 0x2c: stl({{ Mem.ul = Ra<31:0>; }}); + 0x2d: stq({{ Mem.uq = Ra.uq; }}); + 0x0f: stq_u({{ Mem.uq = Ra.uq; }}, {{ EA = (Rb + disp) & ~7; }}); + 0x26: sts({{ Mem.ul = t_to_s(Fa.uq); }}); + 0x27: stt({{ Mem.df = Fa; }}); + 0x24: MiscPrefetch::copy_store({{ EA = Rb; }}, + {{ fault = xc->copy(EA); }}, + inst_flags = [IsMemRef, IsStore, IsCopy]); + } + + format StoreCond { + 0x2e: stl_c({{ Mem.ul = Ra<31:0>; }}, + {{ + uint64_t tmp = write_result; + // see stq_c + Ra = (tmp == 0 || tmp == 1) ? tmp : Ra; - }}, mem_flags = LOCKED); ++ }}, mem_flags = LOCKED, inst_flags = IsStoreConditional); + 0x2f: stq_c({{ Mem.uq = Ra; }}, + {{ + uint64_t tmp = write_result; + // If the write operation returns 0 or 1, then + // this was a conventional store conditional, + // and the value indicates the success/failure + // of the operation. If another value is + // returned, then this was a Turbolaser + // mailbox access, and we don't update the + // result register at all. + Ra = (tmp == 0 || tmp == 1) ? tmp : Ra; - }}, mem_flags = LOCKED); ++ }}, mem_flags = LOCKED, inst_flags = IsStoreConditional); + } + + format IntegerOperate { + + 0x10: decode INTFUNC { // integer arithmetic operations + + 0x00: addl({{ Rc.sl = Ra.sl + Rb_or_imm.sl; }}); + 0x40: addlv({{ + uint32_t tmp = Ra.sl + Rb_or_imm.sl; + // signed overflow occurs when operands have same sign + // and sign of result does not match. + if (Ra.sl<31:> == Rb_or_imm.sl<31:> && tmp<31:> != Ra.sl<31:>) + fault = new IntegerOverflowFault; + Rc.sl = tmp; + }}); + 0x02: s4addl({{ Rc.sl = (Ra.sl << 2) + Rb_or_imm.sl; }}); + 0x12: s8addl({{ Rc.sl = (Ra.sl << 3) + Rb_or_imm.sl; }}); + + 0x20: addq({{ Rc = Ra + Rb_or_imm; }}); + 0x60: addqv({{ + uint64_t tmp = Ra + Rb_or_imm; + // signed overflow occurs when operands have same sign + // and sign of result does not match. + if (Ra<63:> == Rb_or_imm<63:> && tmp<63:> != Ra<63:>) + fault = new IntegerOverflowFault; + Rc = tmp; + }}); + 0x22: s4addq({{ Rc = (Ra << 2) + Rb_or_imm; }}); + 0x32: s8addq({{ Rc = (Ra << 3) + Rb_or_imm; }}); + + 0x09: subl({{ Rc.sl = Ra.sl - Rb_or_imm.sl; }}); + 0x49: sublv({{ + uint32_t tmp = Ra.sl - Rb_or_imm.sl; + // signed overflow detection is same as for add, + // except we need to look at the *complemented* + // sign bit of the subtrahend (Rb), i.e., if the initial + // signs are the *same* then no overflow can occur + if (Ra.sl<31:> != Rb_or_imm.sl<31:> && tmp<31:> != Ra.sl<31:>) + fault = new IntegerOverflowFault; + Rc.sl = tmp; + }}); + 0x0b: s4subl({{ Rc.sl = (Ra.sl << 2) - Rb_or_imm.sl; }}); + 0x1b: s8subl({{ Rc.sl = (Ra.sl << 3) - Rb_or_imm.sl; }}); + + 0x29: subq({{ Rc = Ra - Rb_or_imm; }}); + 0x69: subqv({{ + uint64_t tmp = Ra - Rb_or_imm; + // signed overflow detection is same as for add, + // except we need to look at the *complemented* + // sign bit of the subtrahend (Rb), i.e., if the initial + // signs are the *same* then no overflow can occur + if (Ra<63:> != Rb_or_imm<63:> && tmp<63:> != Ra<63:>) + fault = new IntegerOverflowFault; + Rc = tmp; + }}); + 0x2b: s4subq({{ Rc = (Ra << 2) - Rb_or_imm; }}); + 0x3b: s8subq({{ Rc = (Ra << 3) - Rb_or_imm; }}); + + 0x2d: cmpeq({{ Rc = (Ra == Rb_or_imm); }}); + 0x6d: cmple({{ Rc = (Ra.sq <= Rb_or_imm.sq); }}); + 0x4d: cmplt({{ Rc = (Ra.sq < Rb_or_imm.sq); }}); + 0x3d: cmpule({{ Rc = (Ra.uq <= Rb_or_imm.uq); }}); + 0x1d: cmpult({{ Rc = (Ra.uq < Rb_or_imm.uq); }}); + + 0x0f: cmpbge({{ + int hi = 7; + int lo = 0; + uint64_t tmp = 0; + for (int i = 0; i < 8; ++i) { + tmp |= (Ra.uq >= Rb_or_imm.uq) << i; + hi += 8; + lo += 8; + } + Rc = tmp; + }}); + } + + 0x11: decode INTFUNC { // integer logical operations + + 0x00: and({{ Rc = Ra & Rb_or_imm; }}); + 0x08: bic({{ Rc = Ra & ~Rb_or_imm; }}); + 0x20: bis({{ Rc = Ra | Rb_or_imm; }}); + 0x28: ornot({{ Rc = Ra | ~Rb_or_imm; }}); + 0x40: xor({{ Rc = Ra ^ Rb_or_imm; }}); + 0x48: eqv({{ Rc = Ra ^ ~Rb_or_imm; }}); + + // conditional moves + 0x14: cmovlbs({{ Rc = ((Ra & 1) == 1) ? Rb_or_imm : Rc; }}); + 0x16: cmovlbc({{ Rc = ((Ra & 1) == 0) ? Rb_or_imm : Rc; }}); + 0x24: cmoveq({{ Rc = (Ra == 0) ? Rb_or_imm : Rc; }}); + 0x26: cmovne({{ Rc = (Ra != 0) ? Rb_or_imm : Rc; }}); + 0x44: cmovlt({{ Rc = (Ra.sq < 0) ? Rb_or_imm : Rc; }}); + 0x46: cmovge({{ Rc = (Ra.sq >= 0) ? Rb_or_imm : Rc; }}); + 0x64: cmovle({{ Rc = (Ra.sq <= 0) ? Rb_or_imm : Rc; }}); + 0x66: cmovgt({{ Rc = (Ra.sq > 0) ? Rb_or_imm : Rc; }}); + + // For AMASK, RA must be R31. + 0x61: decode RA { + 31: amask({{ Rc = Rb_or_imm & ~ULL(0x17); }}); + } + + // For IMPLVER, RA must be R31 and the B operand + // must be the immediate value 1. + 0x6c: decode RA { + 31: decode IMM { + 1: decode INTIMM { + // return EV5 for FULL_SYSTEM and EV6 otherwise + 1: implver({{ +#if FULL_SYSTEM + Rc = 1; +#else + Rc = 2; +#endif + }}); + } + } + } + +#if FULL_SYSTEM + // The mysterious 11.25... + 0x25: WarnUnimpl::eleven25(); +#endif + } + + 0x12: decode INTFUNC { + 0x39: sll({{ Rc = Ra << Rb_or_imm<5:0>; }}); + 0x34: srl({{ Rc = Ra.uq >> Rb_or_imm<5:0>; }}); + 0x3c: sra({{ Rc = Ra.sq >> Rb_or_imm<5:0>; }}); + + 0x02: mskbl({{ Rc = Ra & ~(mask( 8) << (Rb_or_imm<2:0> * 8)); }}); + 0x12: mskwl({{ Rc = Ra & ~(mask(16) << (Rb_or_imm<2:0> * 8)); }}); + 0x22: mskll({{ Rc = Ra & ~(mask(32) << (Rb_or_imm<2:0> * 8)); }}); + 0x32: mskql({{ Rc = Ra & ~(mask(64) << (Rb_or_imm<2:0> * 8)); }}); + + 0x52: mskwh({{ + int bv = Rb_or_imm<2:0>; + Rc = bv ? (Ra & ~(mask(16) >> (64 - 8 * bv))) : Ra; + }}); + 0x62: msklh({{ + int bv = Rb_or_imm<2:0>; + Rc = bv ? (Ra & ~(mask(32) >> (64 - 8 * bv))) : Ra; + }}); + 0x72: mskqh({{ + int bv = Rb_or_imm<2:0>; + Rc = bv ? (Ra & ~(mask(64) >> (64 - 8 * bv))) : Ra; + }}); + + 0x06: extbl({{ Rc = (Ra.uq >> (Rb_or_imm<2:0> * 8))< 7:0>; }}); + 0x16: extwl({{ Rc = (Ra.uq >> (Rb_or_imm<2:0> * 8))<15:0>; }}); + 0x26: extll({{ Rc = (Ra.uq >> (Rb_or_imm<2:0> * 8))<31:0>; }}); + 0x36: extql({{ Rc = (Ra.uq >> (Rb_or_imm<2:0> * 8)); }}); + + 0x5a: extwh({{ + Rc = (Ra << (64 - (Rb_or_imm<2:0> * 8))<5:0>)<15:0>; }}); + 0x6a: extlh({{ + Rc = (Ra << (64 - (Rb_or_imm<2:0> * 8))<5:0>)<31:0>; }}); + 0x7a: extqh({{ + Rc = (Ra << (64 - (Rb_or_imm<2:0> * 8))<5:0>); }}); + + 0x0b: insbl({{ Rc = Ra< 7:0> << (Rb_or_imm<2:0> * 8); }}); + 0x1b: inswl({{ Rc = Ra<15:0> << (Rb_or_imm<2:0> * 8); }}); + 0x2b: insll({{ Rc = Ra<31:0> << (Rb_or_imm<2:0> * 8); }}); + 0x3b: insql({{ Rc = Ra << (Rb_or_imm<2:0> * 8); }}); + + 0x57: inswh({{ + int bv = Rb_or_imm<2:0>; + Rc = bv ? (Ra.uq<15:0> >> (64 - 8 * bv)) : 0; + }}); + 0x67: inslh({{ + int bv = Rb_or_imm<2:0>; + Rc = bv ? (Ra.uq<31:0> >> (64 - 8 * bv)) : 0; + }}); + 0x77: insqh({{ + int bv = Rb_or_imm<2:0>; + Rc = bv ? (Ra.uq >> (64 - 8 * bv)) : 0; + }}); + + 0x30: zap({{ + uint64_t zapmask = 0; + for (int i = 0; i < 8; ++i) { + if (Rb_or_imm) + zapmask |= (mask(8) << (i * 8)); + } + Rc = Ra & ~zapmask; + }}); + 0x31: zapnot({{ + uint64_t zapmask = 0; + for (int i = 0; i < 8; ++i) { + if (!Rb_or_imm) + zapmask |= (mask(8) << (i * 8)); + } + Rc = Ra & ~zapmask; + }}); + } + + 0x13: decode INTFUNC { // integer multiplies + 0x00: mull({{ Rc.sl = Ra.sl * Rb_or_imm.sl; }}, IntMultOp); + 0x20: mulq({{ Rc = Ra * Rb_or_imm; }}, IntMultOp); + 0x30: umulh({{ + uint64_t hi, lo; + mul128(Ra, Rb_or_imm, hi, lo); + Rc = hi; + }}, IntMultOp); + 0x40: mullv({{ + // 32-bit multiply with trap on overflow + int64_t Rax = Ra.sl; // sign extended version of Ra.sl + int64_t Rbx = Rb_or_imm.sl; + int64_t tmp = Rax * Rbx; + // To avoid overflow, all the upper 32 bits must match + // the sign bit of the lower 32. We code this as + // checking the upper 33 bits for all 0s or all 1s. + uint64_t sign_bits = tmp<63:31>; + if (sign_bits != 0 && sign_bits != mask(33)) + fault = new IntegerOverflowFault; + Rc.sl = tmp<31:0>; + }}, IntMultOp); + 0x60: mulqv({{ + // 64-bit multiply with trap on overflow + uint64_t hi, lo; + mul128(Ra, Rb_or_imm, hi, lo); + // all the upper 64 bits must match the sign bit of + // the lower 64 + if (!((hi == 0 && lo<63:> == 0) || + (hi == mask(64) && lo<63:> == 1))) + fault = new IntegerOverflowFault; + Rc = lo; + }}, IntMultOp); + } + + 0x1c: decode INTFUNC { + 0x00: decode RA { 31: sextb({{ Rc.sb = Rb_or_imm< 7:0>; }}); } + 0x01: decode RA { 31: sextw({{ Rc.sw = Rb_or_imm<15:0>; }}); } + 0x32: ctlz({{ + uint64_t count = 0; + uint64_t temp = Rb; + if (temp<63:32>) temp >>= 32; else count += 32; + if (temp<31:16>) temp >>= 16; else count += 16; + if (temp<15:8>) temp >>= 8; else count += 8; + if (temp<7:4>) temp >>= 4; else count += 4; + if (temp<3:2>) temp >>= 2; else count += 2; + if (temp<1:1>) temp >>= 1; else count += 1; + if ((temp<0:0>) != 0x1) count += 1; + Rc = count; + }}, IntAluOp); + + 0x33: cttz({{ + uint64_t count = 0; + uint64_t temp = Rb; + if (!(temp<31:0>)) { temp >>= 32; count += 32; } + if (!(temp<15:0>)) { temp >>= 16; count += 16; } + if (!(temp<7:0>)) { temp >>= 8; count += 8; } + if (!(temp<3:0>)) { temp >>= 4; count += 4; } + if (!(temp<1:0>)) { temp >>= 2; count += 2; } + if (!(temp<0:0> & ULL(0x1))) count += 1; + Rc = count; + }}, IntAluOp); + + format FailUnimpl { + 0x30: ctpop(); + 0x31: perr(); + 0x34: unpkbw(); + 0x35: unpkbl(); + 0x36: pkwb(); + 0x37: pklb(); + 0x38: minsb8(); + 0x39: minsw4(); + 0x3a: minub8(); + 0x3b: minuw4(); + 0x3c: maxub8(); + 0x3d: maxuw4(); + 0x3e: maxsb8(); + 0x3f: maxsw4(); + } + + format BasicOperateWithNopCheck { + 0x70: decode RB { + 31: ftoit({{ Rc = Fa.uq; }}, FloatCvtOp); + } + 0x78: decode RB { + 31: ftois({{ Rc.sl = t_to_s(Fa.uq); }}, + FloatCvtOp); + } + } + } + } + + // Conditional branches. + format CondBranch { + 0x39: beq({{ cond = (Ra == 0); }}); + 0x3d: bne({{ cond = (Ra != 0); }}); + 0x3e: bge({{ cond = (Ra.sq >= 0); }}); + 0x3f: bgt({{ cond = (Ra.sq > 0); }}); + 0x3b: ble({{ cond = (Ra.sq <= 0); }}); + 0x3a: blt({{ cond = (Ra.sq < 0); }}); + 0x38: blbc({{ cond = ((Ra & 1) == 0); }}); + 0x3c: blbs({{ cond = ((Ra & 1) == 1); }}); + + 0x31: fbeq({{ cond = (Fa == 0); }}); + 0x35: fbne({{ cond = (Fa != 0); }}); + 0x36: fbge({{ cond = (Fa >= 0); }}); + 0x37: fbgt({{ cond = (Fa > 0); }}); + 0x33: fble({{ cond = (Fa <= 0); }}); + 0x32: fblt({{ cond = (Fa < 0); }}); + } + + // unconditional branches + format UncondBranch { + 0x30: br(); + 0x34: bsr(IsCall); + } + + // indirect branches + 0x1a: decode JMPFUNC { + format Jump { + 0: jmp(); + 1: jsr(IsCall); + 2: ret(IsReturn); + 3: jsr_coroutine(IsCall, IsReturn); + } + } + + // Square root and integer-to-FP moves + 0x14: decode FP_SHORTFUNC { + // Integer to FP register moves must have RB == 31 + 0x4: decode RB { + 31: decode FP_FULLFUNC { + format BasicOperateWithNopCheck { + 0x004: itofs({{ Fc.uq = s_to_t(Ra.ul); }}, FloatCvtOp); + 0x024: itoft({{ Fc.uq = Ra.uq; }}, FloatCvtOp); + 0x014: FailUnimpl::itoff(); // VAX-format conversion + } + } + } + + // Square root instructions must have FA == 31 + 0xb: decode FA { + 31: decode FP_TYPEFUNC { + format FloatingPointOperate { +#if SS_COMPATIBLE_FP + 0x0b: sqrts({{ + if (Fb < 0.0) + fault = new ArithmeticFault; + Fc = sqrt(Fb); + }}, FloatSqrtOp); +#else + 0x0b: sqrts({{ + if (Fb.sf < 0.0) + fault = new ArithmeticFault; + Fc.sf = sqrt(Fb.sf); + }}, FloatSqrtOp); +#endif + 0x2b: sqrtt({{ + if (Fb < 0.0) + fault = new ArithmeticFault; + Fc = sqrt(Fb); + }}, FloatSqrtOp); + } + } + } + + // VAX-format sqrtf and sqrtg are not implemented + 0xa: FailUnimpl::sqrtfg(); + } + + // IEEE floating point + 0x16: decode FP_SHORTFUNC_TOP2 { + // The top two bits of the short function code break this + // space into four groups: binary ops, compares, reserved, and + // conversions. See Table 4-12 of AHB. There are different + // special cases in these different groups, so we decode on + // these top two bits first just to select a decode strategy. + // Most of these instructions may have various trapping and + // rounding mode flags set; these are decoded in the + // FloatingPointDecode template used by the + // FloatingPointOperate format. + + // add/sub/mul/div: just decode on the short function code + // and source type. All valid trapping and rounding modes apply. + 0: decode FP_TRAPMODE { + // check for valid trapping modes here + 0,1,5,7: decode FP_TYPEFUNC { + format FloatingPointOperate { +#if SS_COMPATIBLE_FP + 0x00: adds({{ Fc = Fa + Fb; }}); + 0x01: subs({{ Fc = Fa - Fb; }}); + 0x02: muls({{ Fc = Fa * Fb; }}, FloatMultOp); + 0x03: divs({{ Fc = Fa / Fb; }}, FloatDivOp); +#else + 0x00: adds({{ Fc.sf = Fa.sf + Fb.sf; }}); + 0x01: subs({{ Fc.sf = Fa.sf - Fb.sf; }}); + 0x02: muls({{ Fc.sf = Fa.sf * Fb.sf; }}, FloatMultOp); + 0x03: divs({{ Fc.sf = Fa.sf / Fb.sf; }}, FloatDivOp); +#endif + + 0x20: addt({{ Fc = Fa + Fb; }}); + 0x21: subt({{ Fc = Fa - Fb; }}); + 0x22: mult({{ Fc = Fa * Fb; }}, FloatMultOp); + 0x23: divt({{ Fc = Fa / Fb; }}, FloatDivOp); + } + } + } + + // Floating-point compare instructions must have the default + // rounding mode, and may use the default trapping mode or + // /SU. Both trapping modes are treated the same by M5; the + // only difference on the real hardware (as far a I can tell) + // is that without /SU you'd get an imprecise trap if you + // tried to compare a NaN with something else (instead of an + // "unordered" result). + 1: decode FP_FULLFUNC { + format BasicOperateWithNopCheck { + 0x0a5, 0x5a5: cmpteq({{ Fc = (Fa == Fb) ? 2.0 : 0.0; }}, + FloatCmpOp); + 0x0a7, 0x5a7: cmptle({{ Fc = (Fa <= Fb) ? 2.0 : 0.0; }}, + FloatCmpOp); + 0x0a6, 0x5a6: cmptlt({{ Fc = (Fa < Fb) ? 2.0 : 0.0; }}, + FloatCmpOp); + 0x0a4, 0x5a4: cmptun({{ // unordered + Fc = (!(Fa < Fb) && !(Fa == Fb) && !(Fa > Fb)) ? 2.0 : 0.0; + }}, FloatCmpOp); + } + } + + // The FP-to-integer and integer-to-FP conversion insts + // require that FA be 31. + 3: decode FA { + 31: decode FP_TYPEFUNC { + format FloatingPointOperate { + 0x2f: decode FP_ROUNDMODE { + format FPFixedRounding { + // "chopped" i.e. round toward zero + 0: cvttq({{ Fc.sq = (int64_t)trunc(Fb); }}, + Chopped); + // round to minus infinity + 1: cvttq({{ Fc.sq = (int64_t)floor(Fb); }}, + MinusInfinity); + } + default: cvttq({{ Fc.sq = (int64_t)nearbyint(Fb); }}); + } + + // The cvtts opcode is overloaded to be cvtst if the trap + // mode is 2 or 6 (which are not valid otherwise) + 0x2c: decode FP_FULLFUNC { + format BasicOperateWithNopCheck { + // trap on denorm version "cvtst/s" is + // simulated same as cvtst + 0x2ac, 0x6ac: cvtst({{ Fc = Fb.sf; }}); + } + default: cvtts({{ Fc.sf = Fb; }}); + } + + // The trapping mode for integer-to-FP conversions + // must be /SUI or nothing; /U and /SU are not + // allowed. The full set of rounding modes are + // supported though. + 0x3c: decode FP_TRAPMODE { + 0,7: cvtqs({{ Fc.sf = Fb.sq; }}); + } + 0x3e: decode FP_TRAPMODE { + 0,7: cvtqt({{ Fc = Fb.sq; }}); + } + } + } + } + } + + // misc FP operate + 0x17: decode FP_FULLFUNC { + format BasicOperateWithNopCheck { + 0x010: cvtlq({{ + Fc.sl = (Fb.uq<63:62> << 30) | Fb.uq<58:29>; + }}); + 0x030: cvtql({{ + Fc.uq = (Fb.uq<31:30> << 62) | (Fb.uq<29:0> << 29); + }}); + + // We treat the precise & imprecise trapping versions of + // cvtql identically. + 0x130, 0x530: cvtqlv({{ + // To avoid overflow, all the upper 32 bits must match + // the sign bit of the lower 32. We code this as + // checking the upper 33 bits for all 0s or all 1s. + uint64_t sign_bits = Fb.uq<63:31>; + if (sign_bits != 0 && sign_bits != mask(33)) + fault = new IntegerOverflowFault; + Fc.uq = (Fb.uq<31:30> << 62) | (Fb.uq<29:0> << 29); + }}); + + 0x020: cpys({{ // copy sign + Fc.uq = (Fa.uq<63:> << 63) | Fb.uq<62:0>; + }}); + 0x021: cpysn({{ // copy sign negated + Fc.uq = (~Fa.uq<63:> << 63) | Fb.uq<62:0>; + }}); + 0x022: cpyse({{ // copy sign and exponent + Fc.uq = (Fa.uq<63:52> << 52) | Fb.uq<51:0>; + }}); + + 0x02a: fcmoveq({{ Fc = (Fa == 0) ? Fb : Fc; }}); + 0x02b: fcmovne({{ Fc = (Fa != 0) ? Fb : Fc; }}); + 0x02c: fcmovlt({{ Fc = (Fa < 0) ? Fb : Fc; }}); + 0x02d: fcmovge({{ Fc = (Fa >= 0) ? Fb : Fc; }}); + 0x02e: fcmovle({{ Fc = (Fa <= 0) ? Fb : Fc; }}); + 0x02f: fcmovgt({{ Fc = (Fa > 0) ? Fb : Fc; }}); + - 0x024: mt_fpcr({{ FPCR = Fa.uq; }}); - 0x025: mf_fpcr({{ Fa.uq = FPCR; }}); ++ 0x024: mt_fpcr({{ FPCR = Fa.uq; }}, IsIprAccess); ++ 0x025: mf_fpcr({{ Fa.uq = FPCR; }}, IsIprAccess); + } + } + + // miscellaneous mem-format ops + 0x18: decode MEMFUNC { + format WarnUnimpl { + 0x8000: fetch(); + 0xa000: fetch_m(); + 0xe800: ecb(); + } + + format MiscPrefetch { + 0xf800: wh64({{ EA = Rb & ~ULL(63); }}, + {{ xc->writeHint(EA, 64, memAccessFlags); }}, + mem_flags = NO_FAULT, + inst_flags = [IsMemRef, IsDataPrefetch, + IsStore, MemWriteOp]); + } + + format BasicOperate { + 0xc000: rpcc({{ +#if FULL_SYSTEM + /* Rb is a fake dependency so here is a fun way to get + * the parser to understand that. + */ + Ra = xc->readMiscRegWithEffect(AlphaISA::IPR_CC, fault) + (Rb & 0); + +#else + Ra = curTick; +#endif - }}); ++ }}, IsUnverifiable); + + // All of the barrier instructions below do nothing in + // their execute() methods (hence the empty code blocks). + // All of their functionality is hard-coded in the + // pipeline based on the flags IsSerializing, + // IsMemBarrier, and IsWriteBarrier. In the current + // detailed CPU model, the execute() function only gets + // called at fetch, so there's no way to generate pipeline + // behavior at any other stage. Once we go to an + // exec-in-exec CPU model we should be able to get rid of + // these flags and implement this behavior via the + // execute() methods. + + // trapb is just a barrier on integer traps, where excb is + // a barrier on integer and FP traps. "EXCB is thus a + // superset of TRAPB." (Alpha ARM, Sec 4.11.4) We treat + // them the same though. - 0x0000: trapb({{ }}, IsSerializing, No_OpClass); - 0x0400: excb({{ }}, IsSerializing, No_OpClass); ++ 0x0000: trapb({{ }}, IsSerializing, IsSerializeBefore, No_OpClass); ++ 0x0400: excb({{ }}, IsSerializing, IsSerializeBefore, No_OpClass); + 0x4000: mb({{ }}, IsMemBarrier, MemReadOp); + 0x4400: wmb({{ }}, IsWriteBarrier, MemWriteOp); + } + +#if FULL_SYSTEM + format BasicOperate { + 0xe000: rc({{ + Ra = xc->readIntrFlag(); + xc->setIntrFlag(0); + }}, IsNonSpeculative); + 0xf000: rs({{ + Ra = xc->readIntrFlag(); + xc->setIntrFlag(1); + }}, IsNonSpeculative); + } +#else + format FailUnimpl { + 0xe000: rc(); + 0xf000: rs(); + } +#endif + } + +#if FULL_SYSTEM + 0x00: CallPal::call_pal({{ + if (!palValid || + (palPriv + && xc->readMiscRegWithEffect(AlphaISA::IPR_ICM, fault) != AlphaISA::mode_kernel)) { + // invalid pal function code, or attempt to do privileged + // PAL call in non-kernel mode + fault = new UnimplementedOpcodeFault; + } + else { + // check to see if simulator wants to do something special + // on this PAL call (including maybe suppress it) + bool dopal = xc->simPalCheck(palFunc); + + if (dopal) { + xc->setMiscRegWithEffect(AlphaISA::IPR_EXC_ADDR, NPC); + NPC = xc->readMiscRegWithEffect(AlphaISA::IPR_PAL_BASE, fault) + palOffset; + } + } + }}, IsNonSpeculative); +#else + 0x00: decode PALFUNC { + format EmulatedCallPal { + 0x00: halt ({{ + SimExit(curTick, "halt instruction encountered"); + }}, IsNonSpeculative); + 0x83: callsys({{ + xc->syscall(R0); + }}, IsNonSpeculative); + // Read uniq reg into ABI return value register (r0) - 0x9e: rduniq({{ R0 = Runiq; }}); ++ 0x9e: rduniq({{ R0 = Runiq; }}, IsIprAccess); + // Write uniq reg with value from ABI arg register (r16) - 0x9f: wruniq({{ Runiq = R16; }}); ++ 0x9f: wruniq({{ Runiq = R16; }}, IsIprAccess); + } + } +#endif + +#if FULL_SYSTEM + 0x1b: decode PALMODE { + 0: OpcdecFault::hw_st_quad(); + 1: decode HW_LDST_QUAD { + format HwLoad { + 0: hw_ld({{ EA = (Rb + disp) & ~3; }}, {{ Ra = Mem.ul; }}, L); + 1: hw_ld({{ EA = (Rb + disp) & ~7; }}, {{ Ra = Mem.uq; }}, Q); + } + } + } + + 0x1f: decode PALMODE { + 0: OpcdecFault::hw_st_cond(); + format HwStore { + 1: decode HW_LDST_COND { + 0: decode HW_LDST_QUAD { + 0: hw_st({{ EA = (Rb + disp) & ~3; }}, + {{ Mem.ul = Ra<31:0>; }}, L); + 1: hw_st({{ EA = (Rb + disp) & ~7; }}, + {{ Mem.uq = Ra.uq; }}, Q); + } + + 1: FailUnimpl::hw_st_cond(); + } + } + } + + 0x19: decode PALMODE { + 0: OpcdecFault::hw_mfpr(); + format HwMoveIPR { + 1: hw_mfpr({{ + Ra = xc->readMiscRegWithEffect(ipr_index, fault); - }}); ++ }}, IsIprAccess); + } + } + + 0x1d: decode PALMODE { + 0: OpcdecFault::hw_mtpr(); + format HwMoveIPR { + 1: hw_mtpr({{ + xc->setMiscRegWithEffect(ipr_index, Ra); + if (traceData) { traceData->setData(Ra); } - }}); ++ }}, IsIprAccess); + } + } + + format BasicOperate { + 0x1e: decode PALMODE { + 0: OpcdecFault::hw_rei(); - 1:hw_rei({{ xc->hwrei(); }}, IsSerializing); ++ 1:hw_rei({{ xc->hwrei(); }}, IsSerializing, IsSerializeBefore); + } + + // M5 special opcodes use the reserved 0x01 opcode space + 0x01: decode M5FUNC { + 0x00: arm({{ + AlphaPseudo::arm(xc->xcBase()); + }}, IsNonSpeculative); + 0x01: quiesce({{ + AlphaPseudo::quiesce(xc->xcBase()); - }}, IsNonSpeculative); ++ }}, IsNonSpeculative, IsQuiesce); + 0x02: quiesceNs({{ + AlphaPseudo::quiesceNs(xc->xcBase(), R16); - }}, IsNonSpeculative); ++ }}, IsNonSpeculative, IsQuiesce); + 0x03: quiesceCycles({{ + AlphaPseudo::quiesceCycles(xc->xcBase(), R16); - }}, IsNonSpeculative); ++ }}, IsNonSpeculative, IsQuiesce); + 0x04: quiesceTime({{ + R0 = AlphaPseudo::quiesceTime(xc->xcBase()); + }}, IsNonSpeculative); + 0x10: ivlb({{ + AlphaPseudo::ivlb(xc->xcBase()); + }}, No_OpClass, IsNonSpeculative); + 0x11: ivle({{ + AlphaPseudo::ivle(xc->xcBase()); + }}, No_OpClass, IsNonSpeculative); + 0x20: m5exit_old({{ + AlphaPseudo::m5exit_old(xc->xcBase()); + }}, No_OpClass, IsNonSpeculative); + 0x21: m5exit({{ + AlphaPseudo::m5exit(xc->xcBase(), R16); + }}, No_OpClass, IsNonSpeculative); + 0x30: initparam({{ Ra = xc->xcBase()->getCpuPtr()->system->init_param; }}); + 0x40: resetstats({{ + AlphaPseudo::resetstats(xc->xcBase(), R16, R17); + }}, IsNonSpeculative); + 0x41: dumpstats({{ + AlphaPseudo::dumpstats(xc->xcBase(), R16, R17); + }}, IsNonSpeculative); + 0x42: dumpresetstats({{ + AlphaPseudo::dumpresetstats(xc->xcBase(), R16, R17); + }}, IsNonSpeculative); + 0x43: m5checkpoint({{ + AlphaPseudo::m5checkpoint(xc->xcBase(), R16, R17); + }}, IsNonSpeculative); + 0x50: m5readfile({{ + R0 = AlphaPseudo::readfile(xc->xcBase(), R16, R17, R18); + }}, IsNonSpeculative); + 0x51: m5break({{ + AlphaPseudo::debugbreak(xc->xcBase()); + }}, IsNonSpeculative); + 0x52: m5switchcpu({{ + AlphaPseudo::switchcpu(xc->xcBase()); + }}, IsNonSpeculative); + 0x53: m5addsymbol({{ + AlphaPseudo::addsymbol(xc->xcBase(), R16, R17); + }}, IsNonSpeculative); + 0x54: m5panic({{ + panic("M5 panic instruction called at pc=%#x.", xc->readPC()); + }}, IsNonSpeculative); + + } + } +#endif +} diff --cc src/arch/alpha/isa/pal.isa index 93640f274,000000000..37de20617 mode 100644,000000..100644 --- a/src/arch/alpha/isa/pal.isa +++ b/src/arch/alpha/isa/pal.isa @@@ -1,276 -1,0 +1,278 @@@ +// -*- mode:c++ -*- + +// Copyright (c) 2003-2005 The Regents of The University of Michigan +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +//////////////////////////////////////////////////////////////////// +// +// PAL calls & PAL-specific instructions +// + +output header {{ + /** + * Base class for emulated call_pal calls (used only in + * non-full-system mode). + */ + class EmulatedCallPal : public AlphaStaticInst + { + protected: + + /// Constructor. + EmulatedCallPal(const char *mnem, ExtMachInst _machInst, + OpClass __opClass) + : AlphaStaticInst(mnem, _machInst, __opClass) + { + } + + std::string + generateDisassembly(Addr pc, const SymbolTable *symtab) const; + }; +}}; + +output decoder {{ + std::string + EmulatedCallPal::generateDisassembly(Addr pc, + const SymbolTable *symtab) const + { +#ifdef SS_COMPATIBLE_DISASSEMBLY + return csprintf("%s %s", "call_pal", mnemonic); +#else + return csprintf("%-10s %s", "call_pal", mnemonic); +#endif + } +}}; + +def format EmulatedCallPal(code, *flags) {{ + iop = InstObjParams(name, Name, 'EmulatedCallPal', CodeBlock(code), flags) + header_output = BasicDeclare.subst(iop) + decoder_output = BasicConstructor.subst(iop) + decode_block = BasicDecode.subst(iop) + exec_output = BasicExecute.subst(iop) +}}; + +output header {{ + /** + * Base class for full-system-mode call_pal instructions. + * Probably could turn this into a leaf class and get rid of the + * parser template. + */ + class CallPalBase : public AlphaStaticInst + { + protected: + int palFunc; ///< Function code part of instruction + int palOffset; ///< Target PC, offset from IPR_PAL_BASE + bool palValid; ///< is the function code valid? + bool palPriv; ///< is this call privileged? + + /// Constructor. + CallPalBase(const char *mnem, ExtMachInst _machInst, + OpClass __opClass); + + std::string + generateDisassembly(Addr pc, const SymbolTable *symtab) const; + }; +}}; + +output decoder {{ + inline + CallPalBase::CallPalBase(const char *mnem, ExtMachInst _machInst, + OpClass __opClass) + : AlphaStaticInst(mnem, _machInst, __opClass), + palFunc(PALFUNC) + { + // From the 21164 HRM (paraphrased): + // Bit 7 of the function code (mask 0x80) indicates + // whether the call is privileged (bit 7 == 0) or + // unprivileged (bit 7 == 1). The privileged call table + // starts at 0x2000, the unprivielged call table starts at + // 0x3000. Bits 5-0 (mask 0x3f) are used to calculate the + // offset. + const int palPrivMask = 0x80; + const int palOffsetMask = 0x3f; + + // Pal call is invalid unless all other bits are 0 + palValid = ((machInst & ~(palPrivMask | palOffsetMask)) == 0); + palPriv = ((machInst & palPrivMask) == 0); + int shortPalFunc = (machInst & palOffsetMask); + // Add 1 to base to set pal-mode bit + palOffset = (palPriv ? 0x2001 : 0x3001) + (shortPalFunc << 6); + } + + std::string + CallPalBase::generateDisassembly(Addr pc, const SymbolTable *symtab) const + { + return csprintf("%-10s %#x", "call_pal", palFunc); + } +}}; + +def format CallPal(code, *flags) {{ + iop = InstObjParams(name, Name, 'CallPalBase', CodeBlock(code), flags) + header_output = BasicDeclare.subst(iop) + decoder_output = BasicConstructor.subst(iop) + decode_block = BasicDecode.subst(iop) + exec_output = BasicExecute.subst(iop) +}}; + +//////////////////////////////////////////////////////////////////// +// +// hw_ld, hw_st +// + +output header {{ + /** + * Base class for hw_ld and hw_st. + */ + class HwLoadStore : public Memory + { + protected: + + /// Displacement for EA calculation (signed). + int16_t disp; + + /// Constructor + HwLoadStore(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + StaticInstPtr _eaCompPtr = nullStaticInstPtr, + StaticInstPtr _memAccPtr = nullStaticInstPtr); + + std::string + generateDisassembly(Addr pc, const SymbolTable *symtab) const; + }; +}}; + + +output decoder {{ + inline + HwLoadStore::HwLoadStore(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, + StaticInstPtr _eaCompPtr, + StaticInstPtr _memAccPtr) + : Memory(mnem, _machInst, __opClass, _eaCompPtr, _memAccPtr), + disp(HW_LDST_DISP) + { + memAccessFlags = 0; + if (HW_LDST_PHYS) memAccessFlags |= PHYSICAL; + if (HW_LDST_ALT) memAccessFlags |= ALTMODE; + if (HW_LDST_VPTE) memAccessFlags |= VPTE; + if (HW_LDST_LOCK) memAccessFlags |= LOCKED; + } + + std::string + HwLoadStore::generateDisassembly(Addr pc, const SymbolTable *symtab) const + { +#ifdef SS_COMPATIBLE_DISASSEMBLY + return csprintf("%-10s r%d,%d(r%d)", mnemonic, RA, disp, RB); +#else + // HW_LDST_LOCK and HW_LDST_COND are the same bit. + const char *lock_str = + (HW_LDST_LOCK) ? (flags[IsLoad] ? ",LOCK" : ",COND") : ""; + + return csprintf("%-10s r%d,%d(r%d)%s%s%s%s%s", + mnemonic, RA, disp, RB, + HW_LDST_PHYS ? ",PHYS" : "", + HW_LDST_ALT ? ",ALT" : "", + HW_LDST_QUAD ? ",QUAD" : "", + HW_LDST_VPTE ? ",VPTE" : "", + lock_str); +#endif + } +}}; + +def format HwLoad(ea_code, memacc_code, class_ext, *flags) {{ + (header_output, decoder_output, decode_block, exec_output) = \ + LoadStoreBase(name, Name + class_ext, ea_code, memacc_code, + mem_flags = [], inst_flags = flags, + base_class = 'HwLoadStore', exec_template_base = 'Load') +}}; + + +def format HwStore(ea_code, memacc_code, class_ext, *flags) {{ + (header_output, decoder_output, decode_block, exec_output) = \ + LoadStoreBase(name, Name + class_ext, ea_code, memacc_code, + mem_flags = [], inst_flags = flags, + base_class = 'HwLoadStore', exec_template_base = 'Store') +}}; + + +def format HwStoreCond(ea_code, memacc_code, postacc_code, class_ext, + *flags) {{ + (header_output, decoder_output, decode_block, exec_output) = \ + LoadStoreBase(name, Name + class_ext, ea_code, memacc_code, + postacc_code, mem_flags = [], inst_flags = flags, + base_class = 'HwLoadStore') +}}; + + +output header {{ + /** + * Base class for hw_mfpr and hw_mtpr. + */ + class HwMoveIPR : public AlphaStaticInst + { + protected: + /// Index of internal processor register. + int ipr_index; + + /// Constructor + HwMoveIPR(const char *mnem, ExtMachInst _machInst, OpClass __opClass) + : AlphaStaticInst(mnem, _machInst, __opClass), + ipr_index(HW_IPR_IDX) + { + } + + std::string + generateDisassembly(Addr pc, const SymbolTable *symtab) const; + }; +}}; + +output decoder {{ + std::string + HwMoveIPR::generateDisassembly(Addr pc, const SymbolTable *symtab) const + { + if (_numSrcRegs > 0) { + // must be mtpr + return csprintf("%-10s r%d,IPR(%#x)", + mnemonic, RA, ipr_index); + } + else { + // must be mfpr + return csprintf("%-10s IPR(%#x),r%d", + mnemonic, ipr_index, RA); + } + } +}}; + - def format HwMoveIPR(code) {{ ++def format HwMoveIPR(code, *flags) {{ ++ all_flags = ['IprAccessOp'] ++ all_flags += flags + iop = InstObjParams(name, Name, 'HwMoveIPR', CodeBlock(code), - ['IprAccessOp']) ++ all_flags) + header_output = BasicDeclare.subst(iop) + decoder_output = BasicConstructor.subst(iop) + decode_block = BasicDecode.subst(iop) + exec_output = BasicExecute.subst(iop) +}}; + + diff --cc src/base/traceflags.py index 3b7dd0f81,000000000..9797e4cb7 mode 100644,000000..100644 --- a/src/base/traceflags.py +++ b/src/base/traceflags.py @@@ -1,318 -1,0 +1,289 @@@ +#!/usr/bin/env python + +# Copyright (c) 2004-2005 The Regents of The University of Michigan +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# +# This file generates the header and source files for the flags +# that control the tracing facility. +# + +import sys + +if len(sys.argv) != 2: + print "%s: Need argument (basename of cc/hh files)" % sys.argv[0] + sys.exit(1) + +hhfilename = sys.argv[1] + '.hh' +ccfilename = sys.argv[1] + '.cc' + +# +# The list of trace flags that can be used to condition DPRINTFs etc. +# To define a new flag, simply add it to this list. +# +baseFlags = [ + 'AlphaConsole', + 'BADADDR', + 'BPredRAS', + 'Bus', + 'BusAddrRanges', + 'BusBridge', + 'Cache', + 'Chains', + 'Clock', + 'Commit', + 'CommitRate', + 'Config', + 'Console', + 'ConsolePoll', + 'ConsoleVerbose', + 'Context', + 'Cycle', + 'DMA', + 'DMAReadVerbose', + 'DMAWriteVerbose', + 'DebugPrintf', + 'Decode', + 'DiskImage', + 'DiskImageRead', + 'DiskImageWrite', + 'DynInst', + 'Ethernet', + 'EthernetCksum', + 'EthernetDMA', + 'EthernetData', + 'EthernetDesc', + 'EthernetIntr', + 'EthernetPIO', + 'EthernetSM', + 'Event', + 'Fault', + 'Fetch', + 'Flow', + 'FreeList', + 'FullCPU', + 'GDBAcc', + 'GDBExtra', + 'GDBMisc', + 'GDBRead', + 'GDBRecv', + 'GDBSend', + 'GDBWrite', + 'HWPrefetch', + 'IEW', + 'IIC', + 'IICMore', + 'IPI', + 'IQ', + 'ISP', + 'IdeCtrl', + 'IdeDisk', + 'InstExec', + 'Interrupt', - 'LDSTQ', ++ 'LSQ', ++ 'LSQUnit', + 'Loader', + 'MC146818', + 'MMU', + 'MSHR', + 'Mbox', + 'MemDepUnit', - 'OoOCPU', - 'PCEvent', - 'PCIA', - 'PCIDEV', - 'PciConfigAll', - 'Pipeline', - 'Printf', - 'ROB', - 'Regs', - 'Rename', - 'RenameMap', - 'SQL', - 'Sampler', - 'ScsiCtrl', - 'ScsiDisk', - 'ScsiNone', - 'Serialize', - 'SimpleCPU', - 'SimpleDisk', - 'SimpleDiskData', - 'Sparc', - 'Split', - 'Stack', - 'StatEvents', - 'Stats', - 'StoreSet', - 'Syscall', - 'SyscallVerbose', - 'TCPIP', - 'TLB', - 'Thread', - 'Timer', - 'Tsunami', - 'Uart', - 'VtoPhys', - 'WriteBarrier', ++ 'OzoneCPU', ++ 'FE', ++ 'IBE', ++ 'BE', ++ 'OzoneLSQ', + ] + +# +# "Compound" flags correspond to a set of base flags. These exist +# solely for convenience in setting them via the command line: if a +# compound flag is specified, all of the corresponding base flags are +# set. Compound flags cannot be used directly in DPRINTFs etc. +# To define a new compound flag, add a new entry to this hash +# following the existing examples. +# +compoundFlagMap = { + 'GDBAll' : [ 'GDBMisc', 'GDBAcc', 'GDBRead', 'GDBWrite', 'GDBSend', 'GDBRecv', 'GDBExtra' ], + 'ScsiAll' : [ 'ScsiDisk', 'ScsiCtrl', 'ScsiNone' ], + 'DiskImageAll' : [ 'DiskImage', 'DiskImageRead', 'DiskImageWrite' ], + 'EthernetAll' : [ 'Ethernet', 'EthernetPIO', 'EthernetDMA', 'EthernetData' , 'EthernetDesc', 'EthernetIntr', 'EthernetSM', 'EthernetCksum' ], + 'EthernetNoData' : [ 'Ethernet', 'EthernetPIO', 'EthernetDesc', 'EthernetIntr', 'EthernetSM', 'EthernetCksum' ], + 'IdeAll' : [ 'IdeCtrl', 'IdeDisk' ], - 'FullCPUAll' : [ 'Fetch', 'Decode', 'Rename', 'IEW', 'Commit', 'IQ', 'ROB', 'FreeList', 'RenameMap', 'LDSTQ', 'StoreSet', 'MemDepUnit', 'DynInst', 'FullCPU'] ++ 'FullCPUAll' : [ 'Fetch', 'Decode', 'Rename', 'IEW', 'Commit', 'IQ', 'ROB', 'FreeList', 'RenameMap', 'LSQ', 'LSQUnit', 'StoreSet', 'MemDepUnit', 'DynInst', 'FullCPU', 'Activity','Scoreboard','Writeback'], ++ 'OzoneCPUAll' : [ 'BE', 'FE', 'IBE', 'OzoneLSQ', 'OzoneCPU'] +} + +############################################################# +# +# Everything below this point generates the appropriate C++ +# declarations and definitions for the trace flags. If you are simply +# adding or modifying flag definitions, you should not have to change +# anything below. +# + +import sys + +# extract just the compound flag names into a list +compoundFlags = [] +compoundFlags.extend(compoundFlagMap.keys()) +compoundFlags.sort() + +# +# First generate the header file. This defines the Flag enum +# and some extern declarations for the .cc file. +# +try: + hhfile = file(hhfilename, 'w') +except IOError, e: + sys.exit("can't open %s: %s" % (hhfilename, e)) + +# file header boilerplate +print >>hhfile, ''' +/* + * DO NOT EDIT THIS FILE! + * + * Automatically generated from traceflags.py + */ + +#ifndef __BASE_TRACE_FLAGS_HH__ +#define __BASE_TRACE_FLAGS_HH__ + +namespace Trace { + +enum Flags { +''', + +# Generate the enum. Base flags come first, then compound flags. +idx = 0 +for flag in baseFlags: + print >>hhfile, ' %s = %d,' % (flag, idx) + idx += 1 + +numBaseFlags = idx +print >>hhfile, ' NumFlags = %d,' % idx + +# put a comment in here to separate base from compound flags +print >>hhfile, ''' + // The remaining enum values are *not* valid indices for Trace::flags. + // They are "compound" flags, which correspond to sets of base + // flags, and are used only by TraceParamContext::setFlags(). +''', + +for flag in compoundFlags: + print >>hhfile, ' %s = %d,' % (flag, idx) + idx += 1 + +numCompoundFlags = idx - numBaseFlags +print >>hhfile, ' NumCompoundFlags = %d' % numCompoundFlags + +# trailer boilerplate +print >>hhfile, '''\ +}; // enum Flags + +// Array of strings for SimpleEnumParam +extern const char *flagStrings[]; +extern const int numFlagStrings; + +// Array of arraay pointers: for each compound flag, gives the list of +// base flags to set. Inidividual flag arrays are terminated by -1. +extern const Flags *compoundFlags[]; + +/* namespace Trace */ } + +#endif // __BASE_TRACE_FLAGS_HH__ +''', + +hhfile.close() + +# +# +# Print out .cc file with array definitions. +# +# +try: + ccfile = file(ccfilename, 'w') +except OSError, e: + sys.exit("can't open %s: %s" % (ccfilename, e)) + +# file header +print >>ccfile, ''' +/* + * DO NOT EDIT THIS FILE! + * + * Automatically generated from traceflags.pl. + */ + +#include "base/traceflags.hh" + +using namespace Trace; + +const char *Trace::flagStrings[] = +{ +''', + +# The string array is used by SimpleEnumParam to map the strings +# provided by the user to enum values. +for flag in baseFlags: + print >>ccfile, ' "%s",' % flag + +for flag in compoundFlags: + print >>ccfile, ' "%s",' % flag + +print >>ccfile, '};\n' + +numFlagStrings = len(baseFlags) + len(compoundFlags); + +print >>ccfile, 'const int Trace::numFlagStrings = %d;' % numFlagStrings +print >>ccfile + +# +# Now define the individual compound flag arrays. There is an array +# for each compound flag listing the component base flags. +# + +for flag in compoundFlags: + flags = compoundFlagMap[flag] + flags.append('(Flags)-1') + print >>ccfile, 'static const Flags %sMap[] =' % flag + print >>ccfile, '{ %s };' % (', '.join(flags)) + print >>ccfile + +# +# Finally the compoundFlags[] array maps the compound flags +# to their individual arrays/ +# +print >>ccfile, 'const Flags *Trace::compoundFlags[] =' +print >>ccfile, '{' + +for flag in compoundFlags: + print >>ccfile, ' %sMap,' % flag + +# file trailer +print >>ccfile, '};' + +ccfile.close() + diff --cc src/cpu/SConscript index 34fb6df78,000000000..a4cbe2aa6 mode 100644,000000..100644 --- a/src/cpu/SConscript +++ b/src/cpu/SConscript @@@ -1,143 -1,0 +1,179 @@@ +# -*- mode:python -*- + +# Copyright (c) 2006 The Regents of The University of Michigan +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import os +import os.path + +# Import build environment variable from SConstruct. +Import('env') + +################################################################# +# +# Generate StaticInst execute() method signatures. +# +# There must be one signature for each CPU model compiled in. +# Since the set of compiled-in models is flexible, we generate a +# header containing the appropriate set of signatures on the fly. +# +################################################################# + +# CPU model-specific data is contained in cpu_models.py +# Convert to SCons File node to get path handling +models_db = File('cpu_models.py') +# slurp in contents of file +execfile(models_db.srcnode().abspath) + +# Template for execute() signature. +exec_sig_template = ''' +virtual Fault execute(%s *xc, Trace::InstRecord *traceData) const = 0; +virtual Fault initiateAcc(%s *xc, Trace::InstRecord *traceData) const +{ panic("initiateAcc not defined!"); }; +virtual Fault completeAcc(Packet *pkt, %s *xc, + Trace::InstRecord *traceData) const +{ panic("completeAcc not defined!"); }; +''' + ++mem_ini_sig_template = ''' ++virtual Fault initiateAcc(%s *xc, Trace::InstRecord *traceData) const { panic("Not defined!"); }; ++''' ++ ++mem_comp_sig_template = ''' ++virtual Fault completeAcc(uint8_t *data, %s *xc, Trace::InstRecord *traceData) const { panic("Not defined!"); return NoFault; }; ++''' ++ +# Generate header. +def gen_cpu_exec_signatures(target, source, env): + f = open(str(target[0]), 'w') + print >> f, ''' +#ifndef __CPU_STATIC_INST_EXEC_SIGS_HH__ +#define __CPU_STATIC_INST_EXEC_SIGS_HH__ +''' + for cpu in env['CPU_MODELS']: + xc_type = CpuModel.dict[cpu].strings['CPU_exec_context'] + print >> f, exec_sig_template % (xc_type, xc_type, xc_type) + print >> f, ''' +#endif // __CPU_STATIC_INST_EXEC_SIGS_HH__ +''' + +# Generate string that gets printed when header is rebuilt +def gen_sigs_string(target, source, env): + return "Generating static_inst_exec_sigs.hh: " \ + + ', '.join(env['CPU_MODELS']) + +# Add command to generate header to environment. +env.Command('static_inst_exec_sigs.hh', models_db, + Action(gen_cpu_exec_signatures, gen_sigs_string, + varlist = ['CPU_MODELS'])) + +################################################################# +# +# Include CPU-model-specific files based on set of models +# specified in CPU_MODELS build option. +# +################################################################# + +sources = [] + +need_simple_base = False +if 'AtomicSimpleCPU' in env['CPU_MODELS']: + need_simple_base = True + sources += Split('simple/atomic.cc') + +if 'TimingSimpleCPU' in env['CPU_MODELS']: + need_simple_base = True + sources += Split('simple/timing.cc') + +if need_simple_base: + sources += Split('simple/base.cc') + +if 'FastCPU' in env['CPU_MODELS']: + sources += Split('fast/cpu.cc') + +if 'AlphaFullCPU' in env['CPU_MODELS']: + sources += Split(''' + o3/2bit_local_pred.cc + o3/alpha_dyn_inst.cc + o3/alpha_cpu.cc + o3/alpha_cpu_builder.cc + o3/bpred_unit.cc + o3/btb.cc + o3/commit.cc + o3/decode.cc + o3/fetch.cc + o3/free_list.cc ++ o3/fu_pool.cc + o3/cpu.cc + o3/iew.cc + o3/inst_queue.cc - o3/ldstq.cc ++ o3/lsq_unit.cc ++ o3/lsq.cc + o3/mem_dep_unit.cc + o3/ras.cc + o3/rename.cc + o3/rename_map.cc + o3/rob.cc - o3/sat_counter.cc ++ o3/scoreboard.cc + o3/store_set.cc + o3/tournament_pred.cc + ''') + ++if 'OzoneSimpleCPU' in env['CPU_MODELS']: ++ sources += Split(''' ++ ozone/cpu.cc ++ ozone/cpu_builder.cc ++ ozone/dyn_inst.cc ++ ozone/front_end.cc ++ ozone/inorder_back_end.cc ++ ozone/inst_queue.cc ++ ozone/rename_table.cc ++ ''') ++ ++if 'OzoneCPU' in env['CPU_MODELS']: ++ sources += Split(''' ++ ozone/back_end.cc ++ ozone/lsq_unit.cc ++ ozone/lw_back_end.cc ++ ozone/lw_lsq.cc ++ ''') ++ ++if 'CheckerCPU' in env['CPU_MODELS']: ++ sources += Split(''' ++ checker/cpu.cc ++ checker/cpu_builder.cc ++ checker/o3_cpu_builder.cc ++ ''') ++ +# FullCPU sources are included from m5/SConscript since they're not +# below this point in the file hierarchy. + +# Convert file names to SCons File objects. This takes care of the +# path relative to the top of the directory tree. +sources = [File(s) for s in sources] + +Return('sources') + diff --cc src/cpu/base.cc index fb6116fd4,000000000..8641d987d mode 100644,000000..100644 --- a/src/cpu/base.cc +++ b/src/cpu/base.cc @@@ -1,402 -1,0 +1,381 @@@ +/* + * Copyright (c) 2002-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include + +#include "base/cprintf.hh" +#include "base/loader/symtab.hh" +#include "base/misc.hh" +#include "base/output.hh" +#include "cpu/base.hh" +#include "cpu/cpuevent.hh" +#include "cpu/exec_context.hh" +#include "cpu/profile.hh" +#include "cpu/sampler/sampler.hh" +#include "sim/param.hh" +#include "sim/process.hh" +#include "sim/sim_events.hh" +#include "sim/system.hh" + +#include "base/trace.hh" + - #if FULL_SYSTEM - #include "kern/kernel_stats.hh" - #endif - +using namespace std; + +vector BaseCPU::cpuList; + +// This variable reflects the max number of threads in any CPU. Be +// careful to only use it once all the CPUs that you care about have +// been initialized +int maxThreadsPerCPU = 1; + +#if FULL_SYSTEM +BaseCPU::BaseCPU(Params *p) + : SimObject(p->name), clock(p->clock), checkInterrupts(true), + params(p), number_of_threads(p->numberOfThreads), system(p->system) +#else +BaseCPU::BaseCPU(Params *p) + : SimObject(p->name), clock(p->clock), params(p), + number_of_threads(p->numberOfThreads), system(p->system) +#endif +{ + DPRINTF(FullCPU, "BaseCPU: Creating object, mem address %#x.\n", this); + + // add self to global list of CPUs + cpuList.push_back(this); + + DPRINTF(FullCPU, "BaseCPU: CPU added to cpuList, mem address %#x.\n", + this); + + if (number_of_threads > maxThreadsPerCPU) + maxThreadsPerCPU = number_of_threads; + + // allocate per-thread instruction-based event queues + comInstEventQueue = new EventQueue *[number_of_threads]; + for (int i = 0; i < number_of_threads; ++i) + comInstEventQueue[i] = new EventQueue("instruction-based event queue"); + + // + // set up instruction-count-based termination events, if any + // + if (p->max_insts_any_thread != 0) + for (int i = 0; i < number_of_threads; ++i) + new SimExitEvent(comInstEventQueue[i], p->max_insts_any_thread, + "a thread reached the max instruction count"); + + if (p->max_insts_all_threads != 0) { + // allocate & initialize shared downcounter: each event will + // decrement this when triggered; simulation will terminate + // when counter reaches 0 + int *counter = new int; + *counter = number_of_threads; + for (int i = 0; i < number_of_threads; ++i) + new CountedExitEvent(comInstEventQueue[i], + "all threads reached the max instruction count", + p->max_insts_all_threads, *counter); + } + + // allocate per-thread load-based event queues + comLoadEventQueue = new EventQueue *[number_of_threads]; + for (int i = 0; i < number_of_threads; ++i) + comLoadEventQueue[i] = new EventQueue("load-based event queue"); + + // + // set up instruction-count-based termination events, if any + // + if (p->max_loads_any_thread != 0) + for (int i = 0; i < number_of_threads; ++i) + new SimExitEvent(comLoadEventQueue[i], p->max_loads_any_thread, + "a thread reached the max load count"); + + if (p->max_loads_all_threads != 0) { + // allocate & initialize shared downcounter: each event will + // decrement this when triggered; simulation will terminate + // when counter reaches 0 + int *counter = new int; + *counter = number_of_threads; + for (int i = 0; i < number_of_threads; ++i) + new CountedExitEvent(comLoadEventQueue[i], + "all threads reached the max load count", + p->max_loads_all_threads, *counter); + } + +#if FULL_SYSTEM + memset(interrupts, 0, sizeof(interrupts)); + intstatus = 0; +#endif + + functionTracingEnabled = false; + if (p->functionTrace) { + functionTraceStream = simout.find(csprintf("ftrace.%s", name())); + currentFunctionStart = currentFunctionEnd = 0; + functionEntryTick = p->functionTraceStart; + + if (p->functionTraceStart == 0) { + functionTracingEnabled = true; + } else { + Event *e = + new EventWrapper(this, + true); + e->schedule(p->functionTraceStart); + } + } +#if FULL_SYSTEM + profileEvent = NULL; + if (params->profile) + profileEvent = new ProfileEvent(this, params->profile); - - kernelStats = new Kernel::Statistics(system); +#endif + +} + +BaseCPU::Params::Params() +{ +#if FULL_SYSTEM + profile = false; +#endif ++ checker = NULL; +} + +void +BaseCPU::enableFunctionTrace() +{ + functionTracingEnabled = true; +} + +BaseCPU::~BaseCPU() +{ - #if FULL_SYSTEM - if (kernelStats) - delete kernelStats; - #endif +} + +void +BaseCPU::init() +{ + if (!params->deferRegistration) + registerExecContexts(); +} + +void +BaseCPU::startup() +{ +#if FULL_SYSTEM + if (!params->deferRegistration && profileEvent) + profileEvent->schedule(curTick); +#endif +} + + +void +BaseCPU::regStats() +{ + using namespace Stats; + + numCycles + .name(name() + ".numCycles") + .desc("number of cpu cycles simulated") + ; + + int size = execContexts.size(); + if (size > 1) { + for (int i = 0; i < size; ++i) { + stringstream namestr; + ccprintf(namestr, "%s.ctx%d", name(), i); + execContexts[i]->regStats(namestr.str()); + } + } else if (size == 1) + execContexts[0]->regStats(name()); + +#if FULL_SYSTEM - if (kernelStats) - kernelStats->regStats(name() + ".kern"); +#endif +} + + +void +BaseCPU::registerExecContexts() +{ + for (int i = 0; i < execContexts.size(); ++i) { + ExecContext *xc = execContexts[i]; + +#if FULL_SYSTEM + int id = params->cpu_id; + if (id != -1) + id += i; + + xc->setCpuId(system->registerExecContext(xc, id)); +#else + xc->setCpuId(xc->getProcessPtr()->registerExecContext(xc)); +#endif ++ } + } +} + + +void +BaseCPU::switchOut(Sampler *sampler) +{ + panic("This CPU doesn't support sampling!"); +} + +void +BaseCPU::takeOverFrom(BaseCPU *oldCPU) +{ + assert(execContexts.size() == oldCPU->execContexts.size()); + + for (int i = 0; i < execContexts.size(); ++i) { + ExecContext *newXC = execContexts[i]; + ExecContext *oldXC = oldCPU->execContexts[i]; + + newXC->takeOverFrom(oldXC); + + CpuEvent::replaceExecContext(oldXC, newXC); + + assert(newXC->readCpuId() == oldXC->readCpuId()); +#if FULL_SYSTEM + system->replaceExecContext(newXC, newXC->readCpuId()); +#else + assert(newXC->getProcessPtr() == oldXC->getProcessPtr()); + newXC->getProcessPtr()->replaceExecContext(newXC, newXC->readCpuId()); +#endif + } + +#if FULL_SYSTEM + for (int i = 0; i < TheISA::NumInterruptLevels; ++i) + interrupts[i] = oldCPU->interrupts[i]; + intstatus = oldCPU->intstatus; + + for (int i = 0; i < execContexts.size(); ++i) + execContexts[i]->profileClear(); + + if (profileEvent) + profileEvent->schedule(curTick); +#endif +} + + +#if FULL_SYSTEM +BaseCPU::ProfileEvent::ProfileEvent(BaseCPU *_cpu, int _interval) + : Event(&mainEventQueue), cpu(_cpu), interval(_interval) +{ } + +void +BaseCPU::ProfileEvent::process() +{ + for (int i = 0, size = cpu->execContexts.size(); i < size; ++i) { + ExecContext *xc = cpu->execContexts[i]; + xc->profileSample(); + } + + schedule(curTick + interval); +} + +void +BaseCPU::post_interrupt(int int_num, int index) +{ + DPRINTF(Interrupt, "Interrupt %d:%d posted\n", int_num, index); + + if (int_num < 0 || int_num >= TheISA::NumInterruptLevels) + panic("int_num out of bounds\n"); + + if (index < 0 || index >= sizeof(uint64_t) * 8) + panic("int_num out of bounds\n"); + + checkInterrupts = true; + interrupts[int_num] |= 1 << index; + intstatus |= (ULL(1) << int_num); +} + +void +BaseCPU::clear_interrupt(int int_num, int index) +{ + DPRINTF(Interrupt, "Interrupt %d:%d cleared\n", int_num, index); + + if (int_num < 0 || int_num >= TheISA::NumInterruptLevels) + panic("int_num out of bounds\n"); + + if (index < 0 || index >= sizeof(uint64_t) * 8) + panic("int_num out of bounds\n"); + + interrupts[int_num] &= ~(1 << index); + if (interrupts[int_num] == 0) + intstatus &= ~(ULL(1) << int_num); +} + +void +BaseCPU::clear_interrupts() +{ + DPRINTF(Interrupt, "Interrupts all cleared\n"); + + memset(interrupts, 0, sizeof(interrupts)); + intstatus = 0; +} + + +void +BaseCPU::serialize(std::ostream &os) +{ + SERIALIZE_ARRAY(interrupts, TheISA::NumInterruptLevels); + SERIALIZE_SCALAR(intstatus); - - #if FULL_SYSTEM - if (kernelStats) - kernelStats->serialize(os); - #endif - +} + +void +BaseCPU::unserialize(Checkpoint *cp, const std::string §ion) +{ + UNSERIALIZE_ARRAY(interrupts, TheISA::NumInterruptLevels); + UNSERIALIZE_SCALAR(intstatus); - - #if FULL_SYSTEM - if (kernelStats) - kernelStats->unserialize(cp, section); - #endif +} + +#endif // FULL_SYSTEM + +void +BaseCPU::traceFunctionsInternal(Addr pc) +{ + if (!debugSymbolTable) + return; + + // if pc enters different function, print new function symbol and + // update saved range. Otherwise do nothing. + if (pc < currentFunctionStart || pc >= currentFunctionEnd) { + string sym_str; + bool found = debugSymbolTable->findNearestSymbol(pc, sym_str, + currentFunctionStart, + currentFunctionEnd); + + if (!found) { + // no symbol found: use addr as label + sym_str = csprintf("0x%x", pc); + currentFunctionStart = pc; + currentFunctionEnd = pc + 1; + } + + ccprintf(*functionTraceStream, " (%d)\n%d: %s", + curTick - functionEntryTick, curTick, sym_str); + functionEntryTick = curTick; + } +} + + +DEFINE_SIM_OBJECT_CLASS_NAME("BaseCPU", BaseCPU) diff --cc src/cpu/base.hh index 79700c117,000000000..f14ace750 mode 100644,000000..100644 --- a/src/cpu/base.hh +++ b/src/cpu/base.hh @@@ -1,241 -1,0 +1,238 @@@ +/* + * Copyright (c) 2002-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_BASE_HH__ +#define __CPU_BASE_HH__ + +#include + +#include "base/statistics.hh" +#include "config/full_system.hh" +#include "cpu/sampler/sampler.hh" +#include "sim/eventq.hh" +#include "sim/sim_object.hh" +#include "arch/isa_traits.hh" + - class System; - namespace Kernel { class Statistics; } +class BranchPred; ++class CheckerCPU; +class ExecContext; ++class System; + +class BaseCPU : public SimObject +{ + protected: + // CPU's clock period in terms of the number of ticks of curTime. + Tick clock; + + public: + inline Tick frequency() const { return Clock::Frequency / clock; } + inline Tick cycles(int numCycles) const { return clock * numCycles; } + inline Tick curCycle() const { return curTick / clock; } + +#if FULL_SYSTEM + protected: + uint64_t interrupts[TheISA::NumInterruptLevels]; + uint64_t intstatus; + + public: + virtual void post_interrupt(int int_num, int index); + virtual void clear_interrupt(int int_num, int index); + virtual void clear_interrupts(); + bool checkInterrupts; + + bool check_interrupt(int int_num) const { + if (int_num > TheISA::NumInterruptLevels) + panic("int_num out of bounds\n"); + + return interrupts[int_num] != 0; + } + + bool check_interrupts() const { return intstatus != 0; } + uint64_t intr_status() const { return intstatus; } + + class ProfileEvent : public Event + { + private: + BaseCPU *cpu; + int interval; + + public: + ProfileEvent(BaseCPU *cpu, int interval); + void process(); + }; + ProfileEvent *profileEvent; +#endif + + protected: + std::vector execContexts; + + public: + + /// Notify the CPU that the indicated context is now active. The + /// delay parameter indicates the number of ticks to wait before + /// executing (typically 0 or 1). + virtual void activateContext(int thread_num, int delay) {} + + /// Notify the CPU that the indicated context is now suspended. + virtual void suspendContext(int thread_num) {} + + /// Notify the CPU that the indicated context is now deallocated. + virtual void deallocateContext(int thread_num) {} + + /// Notify the CPU that the indicated context is now halted. + virtual void haltContext(int thread_num) {} + + public: + struct Params + { + std::string name; + int numberOfThreads; + bool deferRegistration; + Counter max_insts_any_thread; + Counter max_insts_all_threads; + Counter max_loads_any_thread; + Counter max_loads_all_threads; + Tick clock; + bool functionTrace; + Tick functionTraceStart; + System *system; +#if FULL_SYSTEM + int cpu_id; + Tick profile; +#endif ++ BaseCPU *checker; + + Params(); + }; + + const Params *params; + + BaseCPU(Params *params); + virtual ~BaseCPU(); + + virtual void init(); + virtual void startup(); + virtual void regStats(); + + virtual void activateWhenReady(int tid) {}; + + void registerExecContexts(); + + /// Prepare for another CPU to take over execution. When it is + /// is ready (drained pipe) it signals the sampler. + virtual void switchOut(Sampler *); + + /// Take over execution from the given CPU. Used for warm-up and + /// sampling. + virtual void takeOverFrom(BaseCPU *); + + /** + * Number of threads we're actually simulating (<= SMT_MAX_THREADS). + * This is a constant for the duration of the simulation. + */ + int number_of_threads; + + /** + * Vector of per-thread instruction-based event queues. Used for + * scheduling events based on number of instructions committed by + * a particular thread. + */ + EventQueue **comInstEventQueue; + + /** + * Vector of per-thread load-based event queues. Used for + * scheduling events based on number of loads committed by + *a particular thread. + */ + EventQueue **comLoadEventQueue; + + System *system; + +#if FULL_SYSTEM + /** + * Serialize this object to the given output stream. + * @param os The stream to serialize to. + */ + virtual void serialize(std::ostream &os); + + /** + * Reconstruct the state of this object from a checkpoint. + * @param cp The checkpoint use. + * @param section The section name of this object + */ + virtual void unserialize(Checkpoint *cp, const std::string §ion); + +#endif + + /** + * Return pointer to CPU's branch predictor (NULL if none). + * @return Branch predictor pointer. + */ + virtual BranchPred *getBranchPred() { return NULL; }; + + virtual Counter totalInstructions() const { return 0; } + + // Function tracing + private: + bool functionTracingEnabled; + std::ostream *functionTraceStream; + Addr currentFunctionStart; + Addr currentFunctionEnd; + Tick functionEntryTick; + void enableFunctionTrace(); + void traceFunctionsInternal(Addr pc); + + protected: + void traceFunctions(Addr pc) + { + if (functionTracingEnabled) + traceFunctionsInternal(pc); + } + + private: + static std::vector cpuList; //!< Static global cpu list + + public: + static int numSimulatedCPUs() { return cpuList.size(); } + static Counter numSimulatedInstructions() + { + Counter total = 0; + + int size = cpuList.size(); + for (int i = 0; i < size; ++i) + total += cpuList[i]->totalInstructions(); + + return total; + } + + public: + // Number of CPU cycles simulated + Stats::Scalar<> numCycles; - - #if FULL_SYSTEM - Kernel::Statistics *kernelStats; - #endif +}; + +#endif // __CPU_BASE_HH__ diff --cc src/cpu/base_dyn_inst.cc index bf7c35cad,000000000..7ab760ae3 mode 100644,000000..100644 --- a/src/cpu/base_dyn_inst.cc +++ b/src/cpu/base_dyn_inst.cc @@@ -1,364 -1,0 +1,448 @@@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + - #ifndef __CPU_BASE_DYN_INST_CC__ - #define __CPU_BASE_DYN_INST_CC__ - +#include ++#include +#include +#include + +#include "base/cprintf.hh" +#include "base/trace.hh" + +#include "arch/faults.hh" +#include "cpu/exetrace.hh" +#include "mem/mem_req.hh" + +#include "cpu/base_dyn_inst.hh" +#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/alpha_cpu.hh" ++#include "cpu/ozone/simple_impl.hh" ++#include "cpu/ozone/ozone_impl.hh" + +using namespace std; +using namespace TheISA; + +#define NOHASH +#ifndef NOHASH + +#include "base/hashmap.hh" + +unsigned int MyHashFunc(const BaseDynInst *addr) +{ - unsigned a = (unsigned)addr; - unsigned hash = (((a >> 14) ^ ((a >> 2) & 0xffff))) & 0x7FFFFFFF; ++ unsigned a = (unsigned)addr; ++ unsigned hash = (((a >> 14) ^ ((a >> 2) & 0xffff))) & 0x7FFFFFFF; + - return hash; ++ return hash; +} + - typedef m5::hash_map my_hash_t; ++typedef m5::hash_map ++my_hash_t; ++ +my_hash_t thishash; +#endif + +template - BaseDynInst::BaseDynInst(MachInst machInst, Addr inst_PC, ++BaseDynInst::BaseDynInst(ExtMachInst machInst, Addr inst_PC, + Addr pred_PC, InstSeqNum seq_num, + FullCPU *cpu) - : staticInst(machInst), traceData(NULL), cpu(cpu), cpuXC(cpu->cpuXCBase()) ++ : staticInst(machInst), traceData(NULL), cpu(cpu)/*, xc(cpu->xcBase())*/ +{ + seqNum = seq_num; + + PC = inst_PC; + nextPC = PC + sizeof(MachInst); + predPC = pred_PC; + + initVars(); +} + +template +BaseDynInst::BaseDynInst(StaticInstPtr &_staticInst) + : staticInst(_staticInst), traceData(NULL) +{ ++ seqNum = 0; + initVars(); +} + +template +void +BaseDynInst::initVars() +{ ++ req = NULL; + effAddr = MemReq::inval_addr; + physEffAddr = MemReq::inval_addr; ++ storeSize = 0; + + readyRegs = 0; + + completed = false; ++ resultReady = false; + canIssue = false; + issued = false; + executed = false; + canCommit = false; ++ committed = false; + squashed = false; + squashedInIQ = false; ++ squashedInLSQ = false; ++ squashedInROB = false; + eaCalcDone = false; ++ memOpDone = false; ++ lqIdx = -1; ++ sqIdx = -1; ++ reachedCommit = false; + + blockingInst = false; + recoverInst = false; + ++ iqEntry = false; ++ robEntry = false; ++ ++ serializeBefore = false; ++ serializeAfter = false; ++ serializeHandled = false; ++ + // Eventually make this a parameter. + threadNumber = 0; + + // Also make this a parameter, or perhaps get it from xc or cpu. + asid = 0; + + // Initialize the fault to be unimplemented opcode. - fault = new UnimplementedOpcodeFault; ++// fault = new UnimplementedOpcodeFault; ++ fault = NoFault; + + ++instcount; + - DPRINTF(FullCPU, "DynInst: Instruction created. Instcount=%i\n", - instcount); ++ if (instcount > 1500) { ++ cpu->dumpInsts(); ++#ifdef DEBUG ++ dumpSNList(); ++#endif ++ assert(instcount <= 1500); ++ } ++ ++ DPRINTF(DynInst, "DynInst: [sn:%lli] Instruction created. Instcount=%i\n", ++ seqNum, instcount); ++ ++#ifdef DEBUG ++ cpu->snList.insert(seqNum); ++#endif +} + +template +BaseDynInst::~BaseDynInst() +{ ++ if (req) { ++ req = NULL; ++ } ++ ++ if (traceData) { ++ delete traceData; ++ } ++ + --instcount; - DPRINTF(FullCPU, "DynInst: Instruction destroyed. Instcount=%i\n", - instcount); ++ ++ DPRINTF(DynInst, "DynInst: [sn:%lli] Instruction destroyed. Instcount=%i\n", ++ seqNum, instcount); ++#ifdef DEBUG ++ cpu->snList.erase(seqNum); ++#endif +} + ++#ifdef DEBUG ++template ++void ++BaseDynInst::dumpSNList() ++{ ++ std::set::iterator sn_it = cpu->snList.begin(); ++ ++ int count = 0; ++ while (sn_it != cpu->snList.end()) { ++ cprintf("%i: [sn:%lli] not destroyed\n", count, (*sn_it)); ++ count++; ++ sn_it++; ++ } ++} ++#endif ++ +template +void +BaseDynInst::prefetch(Addr addr, unsigned flags) +{ + // This is the "functional" implementation of prefetch. Not much + // happens here since prefetches don't affect the architectural + // state. + + // Generate a MemReq so we can translate the effective address. - MemReqPtr req = new MemReq(addr, cpuXC->getProxy(), 1, flags); ++ MemReqPtr req = new MemReq(addr, thread->getXCProxy(), 1, flags); + req->asid = asid; + + // Prefetches never cause faults. + fault = NoFault; + + // note this is a local, not BaseDynInst::fault - Fault trans_fault = cpuXC->translateDataReadReq(req); ++ Fault trans_fault = cpu->translateDataReadReq(req); + + if (trans_fault == NoFault && !(req->flags & UNCACHEABLE)) { + // It's a valid address to cacheable space. Record key MemReq + // parameters so we can generate another one just like it for + // the timing access without calling translate() again (which + // might mess up the TLB). + effAddr = req->vaddr; + physEffAddr = req->paddr; + memReqFlags = req->flags; + } else { + // Bogus address (invalid or uncacheable space). Mark it by + // setting the eff_addr to InvalidAddr. + effAddr = physEffAddr = MemReq::inval_addr; + } + - /** - * @todo - * Replace the disjoint functional memory with a unified one and remove - * this hack. - */ - #if !FULL_SYSTEM - req->paddr = req->vaddr; - #endif - + if (traceData) { + traceData->setAddr(addr); + } +} + +template +void +BaseDynInst::writeHint(Addr addr, int size, unsigned flags) +{ + // Need to create a MemReq here so we can do a translation. This + // will casue a TLB miss trap if necessary... not sure whether + // that's the best thing to do or not. We don't really need the + // MemReq otherwise, since wh64 has no functional effect. - MemReqPtr req = new MemReq(addr, cpuXC->getProxy(), size, flags); ++ MemReqPtr req = new MemReq(addr, thread->getXCProxy(), size, flags); + req->asid = asid; + - fault = cpuXC->translateDataWriteReq(req); ++ fault = cpu->translateDataWriteReq(req); + + if (fault == NoFault && !(req->flags & UNCACHEABLE)) { + // Record key MemReq parameters so we can generate another one + // just like it for the timing access without calling translate() + // again (which might mess up the TLB). + effAddr = req->vaddr; + physEffAddr = req->paddr; + memReqFlags = req->flags; + } else { + // ignore faults & accesses to uncacheable space... treat as no-op + effAddr = physEffAddr = MemReq::inval_addr; + } + + storeSize = size; + storeData = 0; +} + +/** + * @todo Need to find a way to get the cache block size here. + */ +template +Fault +BaseDynInst::copySrcTranslate(Addr src) +{ - MemReqPtr req = new MemReq(src, cpuXC->getProxy(), 64); ++ MemReqPtr req = new MemReq(src, thread->getXCProxy(), 64); + req->asid = asid; + + // translate to physical address - Fault fault = cpuXC->translateDataReadReq(req); ++ Fault fault = cpu->translateDataReadReq(req); + + if (fault == NoFault) { - cpuXC->copySrcAddr = src; - cpuXC->copySrcPhysAddr = req->paddr; ++ thread->copySrcAddr = src; ++ thread->copySrcPhysAddr = req->paddr; + } else { - cpuXC->copySrcAddr = 0; - cpuXC->copySrcPhysAddr = 0; ++ thread->copySrcAddr = 0; ++ thread->copySrcPhysAddr = 0; + } + return fault; +} + +/** + * @todo Need to find a way to get the cache block size here. + */ +template +Fault +BaseDynInst::copy(Addr dest) +{ + uint8_t data[64]; - FunctionalMemory *mem = cpuXC->mem; - assert(cpuXC->copySrcPhysAddr || cpuXC->misspeculating()); - MemReqPtr req = new MemReq(dest, cpuXC->getProxy(), 64); ++ FunctionalMemory *mem = thread->mem; ++ assert(thread->copySrcPhysAddr || thread->misspeculating()); ++ MemReqPtr req = new MemReq(dest, thread->getXCProxy(), 64); + req->asid = asid; + + // translate to physical address - Fault fault = cpuXC->translateDataWriteReq(req); ++ Fault fault = cpu->translateDataWriteReq(req); + + if (fault == NoFault) { + Addr dest_addr = req->paddr; + // Need to read straight from memory since we have more than 8 bytes. - req->paddr = cpuXC->copySrcPhysAddr; ++ req->paddr = thread->copySrcPhysAddr; + mem->read(req, data); + req->paddr = dest_addr; + mem->write(req, data); + } + return fault; +} + +template +void +BaseDynInst::dump() +{ + cprintf("T%d : %#08d `", threadNumber, PC); + cout << staticInst->disassemble(PC); + cprintf("'\n"); +} + +template +void +BaseDynInst::dump(std::string &outstring) +{ + std::ostringstream s; + s << "T" << threadNumber << " : 0x" << PC << " " + << staticInst->disassemble(PC); + + outstring = s.str(); +} + - +#if 0 +template +Fault +BaseDynInst::mem_access(mem_cmd cmd, Addr addr, void *p, int nbytes) +{ + Fault fault; + + // check alignments, even speculative this test should always pass + if ((nbytes & nbytes - 1) != 0 || (addr & nbytes - 1) != 0) { + for (int i = 0; i < nbytes; i++) + ((char *) p)[i] = 0; + + // I added the following because according to the comment above, + // we should never get here. The comment lies +#if 0 + panic("unaligned access. Cycle = %n", curTick); +#endif + return NoFault; + } + + MemReqPtr req = new MemReq(addr, thread, nbytes); + switch(cmd) { + case Read: + fault = spec_mem->read(req, (uint8_t *)p); + break; + + case Write: + fault = spec_mem->write(req, (uint8_t *)p); + if (fault != NoFault) + break; + + specMemWrite = true; + storeSize = nbytes; + switch(nbytes) { + case sizeof(uint8_t): + *(uint8_t)&storeData = (uint8_t *)p; + break; + case sizeof(uint16_t): + *(uint16_t)&storeData = (uint16_t *)p; + break; + case sizeof(uint32_t): + *(uint32_t)&storeData = (uint32_t *)p; + break; + case sizeof(uint64_t): + *(uint64_t)&storeData = (uint64_t *)p; + break; + } + break; + + default: + fault = genMachineCheckFault(); + break; + } + + trace_mem(fault, cmd, addr, p, nbytes); + + return fault; +} + +#endif + ++template ++void ++BaseDynInst::markSrcRegReady() ++{ ++ if (++readyRegs == numSrcRegs()) { ++ canIssue = true; ++ } ++} ++ ++template ++void ++BaseDynInst::markSrcRegReady(RegIndex src_idx) ++{ ++ ++readyRegs; ++ ++ _readySrcRegIdx[src_idx] = true; ++ ++ if (readyRegs == numSrcRegs()) { ++ canIssue = true; ++ } ++} ++ +template +bool +BaseDynInst::eaSrcsReady() +{ + // For now I am assuming that src registers 1..n-1 are the ones that the + // EA calc depends on. (i.e. src reg 0 is the source of the data to be + // stored) + - for (int i = 1; i < numSrcRegs(); ++i) - { ++ for (int i = 1; i < numSrcRegs(); ++i) { + if (!_readySrcRegIdx[i]) + return false; + } + + return true; +} + +// Forward declaration +template class BaseDynInst; + +template <> +int +BaseDynInst::instcount = 0; + - #endif // __CPU_BASE_DYN_INST_CC__ ++// Forward declaration ++template class BaseDynInst; ++ ++template <> ++int ++BaseDynInst::instcount = 0; ++ ++// Forward declaration ++template class BaseDynInst; ++ ++template <> ++int ++BaseDynInst::instcount = 0; diff --cc src/cpu/base_dyn_inst.hh index 3a7852f79,000000000..388ea4a8d mode 100644,000000..100644 --- a/src/cpu/base_dyn_inst.hh +++ b/src/cpu/base_dyn_inst.hh @@@ -1,530 -1,0 +1,738 @@@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_BASE_DYN_INST_HH__ +#define __CPU_BASE_DYN_INST_HH__ + ++#include +#include - #include + +#include "base/fast_alloc.hh" +#include "base/trace.hh" +#include "config/full_system.hh" +#include "cpu/exetrace.hh" +#include "cpu/inst_seq.hh" - #include "cpu/o3/comm.hh" +#include "cpu/static_inst.hh" - #include "encumbered/cpu/full/bpred_update.hh" +#include "encumbered/cpu/full/op_class.hh" ++#include "mem/functional/memory_control.hh" ++#include "sim/system.hh" ++/* ++#include "encumbered/cpu/full/bpred_update.hh" +#include "encumbered/cpu/full/spec_memory.hh" +#include "encumbered/cpu/full/spec_state.hh" +#include "encumbered/mem/functional/main.hh" ++*/ + +/** + * @file + * Defines a dynamic instruction context. + */ + +// Forward declaration. +class StaticInstPtr; + +template +class BaseDynInst : public FastAlloc, public RefCounted +{ + public: + // Typedef for the CPU. + typedef typename Impl::FullCPU FullCPU; ++ typedef typename FullCPU::ImplState ImplState; + - /// Binary machine instruction type. ++ // Binary machine instruction type. + typedef TheISA::MachInst MachInst; - /// Logical register index type. ++ // Extended machine instruction type ++ typedef TheISA::ExtMachInst ExtMachInst; ++ // Logical register index type. + typedef TheISA::RegIndex RegIndex; - /// Integer register index type. ++ // Integer register index type. + typedef TheISA::IntReg IntReg; + ++ // The DynInstPtr type. ++ typedef typename Impl::DynInstPtr DynInstPtr; ++ ++ // The list of instructions iterator type. ++ typedef typename std::list::iterator ListIt; ++ + enum { - MaxInstSrcRegs = TheISA::MaxInstSrcRegs, //< Max source regs - MaxInstDestRegs = TheISA::MaxInstDestRegs, //< Max dest regs ++ MaxInstSrcRegs = TheISA::MaxInstSrcRegs, /// Max source regs ++ MaxInstDestRegs = TheISA::MaxInstDestRegs, /// Max dest regs + }; + - /** The static inst used by this dyn inst. */ ++ /** The StaticInst used by this BaseDynInst. */ + StaticInstPtr staticInst; + + //////////////////////////////////////////// + // + // INSTRUCTION EXECUTION + // + //////////////////////////////////////////// ++ /** InstRecord that tracks this instructions. */ + Trace::InstRecord *traceData; + ++ /** ++ * Does a read to a given address. ++ * @param addr The address to read. ++ * @param data The read's data is written into this parameter. ++ * @param flags The request's flags. ++ * @return Returns any fault due to the read. ++ */ + template + Fault read(Addr addr, T &data, unsigned flags); + ++ /** ++ * Does a write to a given address. ++ * @param data The data to be written. ++ * @param addr The address to write to. ++ * @param flags The request's flags. ++ * @param res The result of the write (for load locked/store conditionals). ++ * @return Returns any fault due to the write. ++ */ + template + Fault write(T data, Addr addr, unsigned flags, + uint64_t *res); + + void prefetch(Addr addr, unsigned flags); + void writeHint(Addr addr, int size, unsigned flags); + Fault copySrcTranslate(Addr src); + Fault copy(Addr dest); + + /** @todo: Consider making this private. */ + public: - /** Is this instruction valid. */ - bool valid; - + /** The sequence number of the instruction. */ + InstSeqNum seqNum; + - /** How many source registers are ready. */ - unsigned readyRegs; ++ /** Is the instruction in the IQ */ ++ bool iqEntry; ++ ++ /** Is the instruction in the ROB */ ++ bool robEntry; ++ ++ /** Is the instruction in the LSQ */ ++ bool lsqEntry; + + /** Is the instruction completed. */ + bool completed; + ++ /** Is the instruction's result ready. */ ++ bool resultReady; ++ + /** Can this instruction issue. */ + bool canIssue; + + /** Has this instruction issued. */ + bool issued; + + /** Has this instruction executed (or made it through execute) yet. */ + bool executed; + + /** Can this instruction commit. */ + bool canCommit; + ++ /** Is this instruction committed. */ ++ bool committed; ++ + /** Is this instruction squashed. */ + bool squashed; + + /** Is this instruction squashed in the instruction queue. */ + bool squashedInIQ; + ++ /** Is this instruction squashed in the instruction queue. */ ++ bool squashedInLSQ; ++ ++ /** Is this instruction squashed in the instruction queue. */ ++ bool squashedInROB; ++ + /** Is this a recover instruction. */ + bool recoverInst; + + /** Is this a thread blocking instruction. */ + bool blockingInst; /* this inst has called thread_block() */ + + /** Is this a thread syncrhonization instruction. */ + bool threadsyncWait; + + /** The thread this instruction is from. */ + short threadNumber; + + /** data address space ID, for loads & stores. */ + short asid; + ++ /** How many source registers are ready. */ ++ unsigned readyRegs; ++ + /** Pointer to the FullCPU object. */ + FullCPU *cpu; + - /** Pointer to the exec context. Will not exist in the final version. */ - CPUExecContext *cpuXC; ++ /** Pointer to the exec context. */ ++ ImplState *thread; + + /** The kind of fault this instruction has generated. */ + Fault fault; + ++ /** The memory request. */ ++ MemReqPtr req; ++ + /** The effective virtual address (lds & stores only). */ + Addr effAddr; + + /** The effective physical address. */ + Addr physEffAddr; + + /** Effective virtual address for a copy source. */ + Addr copySrcEffAddr; + + /** Effective physical address for a copy source. */ + Addr copySrcPhysEffAddr; + + /** The memory request flags (from translation). */ + unsigned memReqFlags; + + /** The size of the data to be stored. */ + int storeSize; + + /** The data to be stored. */ + IntReg storeData; + + union Result { + uint64_t integer; + float fp; + double dbl; + }; + + /** The result of the instruction; assumes for now that there's only one + * destination register. + */ + Result instResult; + + /** PC of this instruction. */ + Addr PC; + + /** Next non-speculative PC. It is not filled in at fetch, but rather + * once the target of the branch is truly known (either decode or + * execute). + */ + Addr nextPC; + + /** Predicted next PC. */ + Addr predPC; + + /** Count of total number of dynamic instructions. */ + static int instcount; + - /** Whether or not the source register is ready. Not sure this should be - * here vs. the derived class. ++#ifdef DEBUG ++ void dumpSNList(); ++#endif ++ ++ /** Whether or not the source register is ready. ++ * @todo: Not sure this should be here vs the derived class. + */ + bool _readySrcRegIdx[MaxInstSrcRegs]; + + public: - /** BaseDynInst constructor given a binary instruction. */ - BaseDynInst(MachInst inst, Addr PC, Addr Pred_PC, InstSeqNum seq_num, ++ /** BaseDynInst constructor given a binary instruction. ++ * @param inst The binary instruction. ++ * @param PC The PC of the instruction. ++ * @param pred_PC The predicted next PC. ++ * @param seq_num The sequence number of the instruction. ++ * @param cpu Pointer to the instruction's CPU. ++ */ ++ BaseDynInst(ExtMachInst inst, Addr PC, Addr pred_PC, InstSeqNum seq_num, + FullCPU *cpu); + - /** BaseDynInst constructor given a static inst pointer. */ ++ /** BaseDynInst constructor given a StaticInst pointer. ++ * @param _staticInst The StaticInst for this BaseDynInst. ++ */ + BaseDynInst(StaticInstPtr &_staticInst); + + /** BaseDynInst destructor. */ + ~BaseDynInst(); + + private: + /** Function to initialize variables in the constructors. */ + void initVars(); + + public: ++ /** ++ * @todo: Make this function work; currently it is a dummy function. ++ * @param fault Last fault. ++ * @param cmd Last command. ++ * @param addr Virtual address of access. ++ * @param p Memory accessed. ++ * @param nbytes Access size. ++ */ + void - trace_mem(Fault fault, // last fault - MemCmd cmd, // last command - Addr addr, // virtual address of access - void *p, // memory accessed - int nbytes); // access size ++ trace_mem(Fault fault, ++ MemCmd cmd, ++ Addr addr, ++ void *p, ++ int nbytes); + + /** Dumps out contents of this BaseDynInst. */ + void dump(); + + /** Dumps out contents of this BaseDynInst into given string. */ + void dump(std::string &outstring); + + /** Returns the fault type. */ + Fault getFault() { return fault; } + + /** Checks whether or not this instruction has had its branch target + * calculated yet. For now it is not utilized and is hacked to be + * always false. ++ * @todo: Actually use this instruction. + */ + bool doneTargCalc() { return false; } + + /** Returns the next PC. This could be the speculative next PC if it is + * called prior to the actual branch target being calculated. + */ + Addr readNextPC() { return nextPC; } + + /** Set the predicted target of this current instruction. */ + void setPredTarg(Addr predicted_PC) { predPC = predicted_PC; } + + /** Returns the predicted target of the branch. */ + Addr readPredTarg() { return predPC; } + + /** Returns whether the instruction was predicted taken or not. */ - bool predTaken() { - return( predPC != (PC + sizeof(MachInst) ) ); - } ++ bool predTaken() { return predPC != (PC + sizeof(MachInst)); } + + /** Returns whether the instruction mispredicted. */ - bool mispredicted() { return (predPC != nextPC); } ++ bool mispredicted() { return predPC != nextPC; } + + // + // Instruction types. Forward checks to StaticInst object. + // + bool isNop() const { return staticInst->isNop(); } + bool isMemRef() const { return staticInst->isMemRef(); } + bool isLoad() const { return staticInst->isLoad(); } + bool isStore() const { return staticInst->isStore(); } ++ bool isStoreConditional() const ++ { return staticInst->isStoreConditional(); } + bool isInstPrefetch() const { return staticInst->isInstPrefetch(); } + bool isDataPrefetch() const { return staticInst->isDataPrefetch(); } + bool isCopy() const { return staticInst->isCopy(); } + bool isInteger() const { return staticInst->isInteger(); } + bool isFloating() const { return staticInst->isFloating(); } + bool isControl() const { return staticInst->isControl(); } + bool isCall() const { return staticInst->isCall(); } + bool isReturn() const { return staticInst->isReturn(); } + bool isDirectCtrl() const { return staticInst->isDirectCtrl(); } + bool isIndirectCtrl() const { return staticInst->isIndirectCtrl(); } + bool isCondCtrl() const { return staticInst->isCondCtrl(); } + bool isUncondCtrl() const { return staticInst->isUncondCtrl(); } + bool isThreadSync() const { return staticInst->isThreadSync(); } + bool isSerializing() const { return staticInst->isSerializing(); } ++ bool isSerializeBefore() const ++ { return staticInst->isSerializeBefore() || serializeBefore; } ++ bool isSerializeAfter() const ++ { return staticInst->isSerializeAfter() || serializeAfter; } + bool isMemBarrier() const { return staticInst->isMemBarrier(); } + bool isWriteBarrier() const { return staticInst->isWriteBarrier(); } + bool isNonSpeculative() const { return staticInst->isNonSpeculative(); } ++ bool isQuiesce() const { return staticInst->isQuiesce(); } ++ bool isIprAccess() const { return staticInst->isIprAccess(); } ++ bool isUnverifiable() const { return staticInst->isUnverifiable(); } ++ ++ /** Temporarily sets this instruction as a serialize before instruction. */ ++ void setSerializeBefore() { serializeBefore = true; } ++ ++ /** Clears the serializeBefore part of this instruction. */ ++ void clearSerializeBefore() { serializeBefore = false; } ++ ++ /** Checks if this serializeBefore is only temporarily set. */ ++ bool isTempSerializeBefore() { return serializeBefore; } ++ ++ /** Tracks if instruction has been externally set as serializeBefore. */ ++ bool serializeBefore; ++ ++ /** Temporarily sets this instruction as a serialize after instruction. */ ++ void setSerializeAfter() { serializeAfter = true; } ++ ++ /** Clears the serializeAfter part of this instruction.*/ ++ void clearSerializeAfter() { serializeAfter = false; } ++ ++ /** Checks if this serializeAfter is only temporarily set. */ ++ bool isTempSerializeAfter() { return serializeAfter; } ++ ++ /** Tracks if instruction has been externally set as serializeAfter. */ ++ bool serializeAfter; ++ ++ /** Checks if the serialization part of this instruction has been ++ * handled. This does not apply to the temporary serializing ++ * state; it only applies to this instruction's own permanent ++ * serializing state. ++ */ ++ bool isSerializeHandled() { return serializeHandled; } ++ ++ /** Sets the serialization part of this instruction as handled. */ ++ void setSerializeHandled() { serializeHandled = true; } ++ ++ /** Whether or not the serialization of this instruction has been handled. */ ++ bool serializeHandled; + + /** Returns the opclass of this instruction. */ + OpClass opClass() const { return staticInst->opClass(); } + + /** Returns the branch target address. */ + Addr branchTarget() const { return staticInst->branchTarget(PC); } + - /** Number of source registers. */ - int8_t numSrcRegs() const { return staticInst->numSrcRegs(); } ++ /** Returns the number of source registers. */ ++ int8_t numSrcRegs() const { return staticInst->numSrcRegs(); } + - /** Number of destination registers. */ ++ /** Returns the number of destination registers. */ + int8_t numDestRegs() const { return staticInst->numDestRegs(); } + + // the following are used to track physical register usage + // for machines with separate int & FP reg files + int8_t numFPDestRegs() const { return staticInst->numFPDestRegs(); } + int8_t numIntDestRegs() const { return staticInst->numIntDestRegs(); } + + /** Returns the logical register index of the i'th destination register. */ - RegIndex destRegIdx(int i) const - { - return staticInst->destRegIdx(i); - } ++ RegIndex destRegIdx(int i) const { return staticInst->destRegIdx(i); } + + /** Returns the logical register index of the i'th source register. */ - RegIndex srcRegIdx(int i) const - { - return staticInst->srcRegIdx(i); - } ++ RegIndex srcRegIdx(int i) const { return staticInst->srcRegIdx(i); } + + /** Returns the result of an integer instruction. */ + uint64_t readIntResult() { return instResult.integer; } + + /** Returns the result of a floating point instruction. */ + float readFloatResult() { return instResult.fp; } + + /** Returns the result of a floating point (double) instruction. */ + double readDoubleResult() { return instResult.dbl; } + - //Push to .cc file. - /** Records that one of the source registers is ready. */ - void markSrcRegReady() ++ void setIntReg(const StaticInst *si, int idx, uint64_t val) + { - ++readyRegs; - if(readyRegs == numSrcRegs()) { - canIssue = true; - } ++ instResult.integer = val; + } + - /** Marks a specific register as ready. - * @todo: Move this to .cc file. - */ - void markSrcRegReady(RegIndex src_idx) ++ void setFloatRegSingle(const StaticInst *si, int idx, float val) + { - ++readyRegs; ++ instResult.fp = val; ++ } + - _readySrcRegIdx[src_idx] = 1; ++ void setFloatRegDouble(const StaticInst *si, int idx, double val) ++ { ++ instResult.dbl = val; ++ } + - if(readyRegs == numSrcRegs()) { - canIssue = true; - } ++ void setFloatRegInt(const StaticInst *si, int idx, uint64_t val) ++ { ++ instResult.integer = val; + } + ++ /** Records that one of the source registers is ready. */ ++ void markSrcRegReady(); ++ ++ /** Marks a specific register as ready. */ ++ void markSrcRegReady(RegIndex src_idx); ++ + /** Returns if a source register is ready. */ + bool isReadySrcRegIdx(int idx) const + { + return this->_readySrcRegIdx[idx]; + } + + /** Sets this instruction as completed. */ + void setCompleted() { completed = true; } + - /** Returns whethe or not this instruction is completed. */ ++ /** Returns whether or not this instruction is completed. */ + bool isCompleted() const { return completed; } + ++ void setResultReady() { resultReady = true; } ++ ++ bool isResultReady() const { return resultReady; } ++ + /** Sets this instruction as ready to issue. */ + void setCanIssue() { canIssue = true; } + + /** Returns whether or not this instruction is ready to issue. */ + bool readyToIssue() const { return canIssue; } + + /** Sets this instruction as issued from the IQ. */ + void setIssued() { issued = true; } + + /** Returns whether or not this instruction has issued. */ + bool isIssued() const { return issued; } + + /** Sets this instruction as executed. */ + void setExecuted() { executed = true; } + + /** Returns whether or not this instruction has executed. */ + bool isExecuted() const { return executed; } + + /** Sets this instruction as ready to commit. */ + void setCanCommit() { canCommit = true; } + + /** Clears this instruction as being ready to commit. */ + void clearCanCommit() { canCommit = false; } + + /** Returns whether or not this instruction is ready to commit. */ + bool readyToCommit() const { return canCommit; } + ++ /** Sets this instruction as committed. */ ++ void setCommitted() { committed = true; } ++ ++ /** Returns whether or not this instruction is committed. */ ++ bool isCommitted() const { return committed; } ++ + /** Sets this instruction as squashed. */ + void setSquashed() { squashed = true; } + + /** Returns whether or not this instruction is squashed. */ + bool isSquashed() const { return squashed; } + ++ //Instruction Queue Entry ++ //----------------------- ++ /** Sets this instruction as a entry the IQ. */ ++ void setInIQ() { iqEntry = true; } ++ ++ /** Sets this instruction as a entry the IQ. */ ++ void removeInIQ() { iqEntry = false; } ++ + /** Sets this instruction as squashed in the IQ. */ - void setSquashedInIQ() { squashedInIQ = true; } ++ void setSquashedInIQ() { squashedInIQ = true; squashed = true;} + + /** Returns whether or not this instruction is squashed in the IQ. */ + bool isSquashedInIQ() const { return squashedInIQ; } + ++ /** Returns whether or not this instruction has issued. */ ++ bool isInIQ() const { return iqEntry; } ++ ++ ++ //Load / Store Queue Functions ++ //----------------------- ++ /** Sets this instruction as a entry the LSQ. */ ++ void setInLSQ() { lsqEntry = true; } ++ ++ /** Sets this instruction as a entry the LSQ. */ ++ void removeInLSQ() { lsqEntry = false; } ++ ++ /** Sets this instruction as squashed in the LSQ. */ ++ void setSquashedInLSQ() { squashedInLSQ = true;} ++ ++ /** Returns whether or not this instruction is squashed in the LSQ. */ ++ bool isSquashedInLSQ() const { return squashedInLSQ; } ++ ++ /** Returns whether or not this instruction is in the LSQ. */ ++ bool isInLSQ() const { return lsqEntry; } ++ ++ ++ //Reorder Buffer Functions ++ //----------------------- ++ /** Sets this instruction as a entry the ROB. */ ++ void setInROB() { robEntry = true; } ++ ++ /** Sets this instruction as a entry the ROB. */ ++ void removeInROB() { robEntry = false; } ++ ++ /** Sets this instruction as squashed in the ROB. */ ++ void setSquashedInROB() { squashedInROB = true; } ++ ++ /** Returns whether or not this instruction is squashed in the ROB. */ ++ bool isSquashedInROB() const { return squashedInROB; } ++ ++ /** Returns whether or not this instruction is in the ROB. */ ++ bool isInROB() const { return robEntry; } ++ + /** Read the PC of this instruction. */ + const Addr readPC() const { return PC; } + + /** Set the next PC of this instruction (its actual target). */ - void setNextPC(uint64_t val) { nextPC = val; } ++ void setNextPC(uint64_t val) ++ { ++ nextPC = val; ++// instResult.integer = val; ++ } ++ ++ void setASID(short addr_space_id) { asid = addr_space_id; } ++ ++ void setThread(unsigned tid) { threadNumber = tid; } ++ ++ void setState(ImplState *state) { thread = state; } + + /** Returns the exec context. + * @todo: Remove this once the ExecContext is no longer used. + */ - ExecContext *xcBase() { return cpuXC->getProxy(); } ++ ExecContext *xcBase() { return thread->getXCProxy(); } + + private: + /** Instruction effective address. + * @todo: Consider if this is necessary or not. + */ + Addr instEffAddr; ++ + /** Whether or not the effective address calculation is completed. + * @todo: Consider if this is necessary or not. + */ + bool eaCalcDone; + + public: + /** Sets the effective address. */ + void setEA(Addr &ea) { instEffAddr = ea; eaCalcDone = true; } + + /** Returns the effective address. */ - const Addr &getEA() const { return instEffAddr; } ++ const Addr &getEA() const { return req->vaddr; } + + /** Returns whether or not the eff. addr. calculation has been completed. */ + bool doneEACalc() { return eaCalcDone; } + + /** Returns whether or not the eff. addr. source registers are ready. */ + bool eaSrcsReady(); + ++ /** Whether or not the memory operation is done. */ ++ bool memOpDone; ++ + public: + /** Load queue index. */ + int16_t lqIdx; + + /** Store queue index. */ + int16_t sqIdx; ++ ++ bool reachedCommit; ++ ++ /** Iterator pointing to this BaseDynInst in the list of all insts. */ ++ ListIt instListIt; ++ ++ /** Returns iterator to this instruction in the list of all insts. */ ++ ListIt &getInstListIt() { return instListIt; } ++ ++ /** Sets iterator for this instruction in the list of all insts. */ ++ void setInstListIt(ListIt _instListIt) { instListIt = _instListIt; } +}; + +template +template +inline Fault +BaseDynInst::read(Addr addr, T &data, unsigned flags) +{ - MemReqPtr req = new MemReq(addr, cpuXC->getProxy(), sizeof(T), flags); ++ if (executed) { ++ fault = cpu->read(req, data, lqIdx); ++ return fault; ++ } ++ ++ req = new MemReq(addr, thread->getXCProxy(), sizeof(T), flags); + req->asid = asid; ++ req->thread_num = threadNumber; ++ req->pc = this->PC; ++ ++ if ((req->vaddr & (TheISA::VMPageSize - 1)) + req->size > ++ TheISA::VMPageSize) { ++ return TheISA::genAlignmentFault(); ++ } + + fault = cpu->translateDataReadReq(req); + - // Record key MemReq parameters so we can generate another one - // just like it for the timing access without calling translate() - // again (which might mess up the TLB). - // Do I ever really need this? -KTL 3/05 + effAddr = req->vaddr; + physEffAddr = req->paddr; + memReqFlags = req->flags; + - /** - * @todo - * Replace the disjoint functional memory with a unified one and remove - * this hack. - */ - #if !FULL_SYSTEM - req->paddr = req->vaddr; - #endif - + if (fault == NoFault) { ++#if FULL_SYSTEM ++ if (cpu->system->memctrl->badaddr(physEffAddr)) { ++ fault = TheISA::genMachineCheckFault(); ++ data = (T)-1; ++ this->setExecuted(); ++ } else { ++ fault = cpu->read(req, data, lqIdx); ++ } ++#else + fault = cpu->read(req, data, lqIdx); ++#endif + } else { + // Return a fixed value to keep simulation deterministic even + // along misspeculated paths. + data = (T)-1; ++ ++ // Commit will have to clean up whatever happened. Set this ++ // instruction as executed. ++ this->setExecuted(); + } + + if (traceData) { + traceData->setAddr(addr); + traceData->setData(data); + } + + return fault; +} + +template +template +inline Fault +BaseDynInst::write(T data, Addr addr, unsigned flags, uint64_t *res) +{ + if (traceData) { + traceData->setAddr(addr); + traceData->setData(data); + } + - MemReqPtr req = new MemReq(addr, cpuXC->getProxy(), sizeof(T), flags); ++ req = new MemReq(addr, thread->getXCProxy(), sizeof(T), flags); + + req->asid = asid; ++ req->thread_num = threadNumber; ++ req->pc = this->PC; ++ ++ if ((req->vaddr & (TheISA::VMPageSize - 1)) + req->size > ++ TheISA::VMPageSize) { ++ return TheISA::genAlignmentFault(); ++ } + + fault = cpu->translateDataWriteReq(req); + - // Record key MemReq parameters so we can generate another one - // just like it for the timing access without calling translate() - // again (which might mess up the TLB). + effAddr = req->vaddr; + physEffAddr = req->paddr; + memReqFlags = req->flags; + - /** - * @todo - * Replace the disjoint functional memory with a unified one and remove - * this hack. - */ - #if !FULL_SYSTEM - req->paddr = req->vaddr; - #endif - + if (fault == NoFault) { ++#if FULL_SYSTEM ++ if (cpu->system->memctrl->badaddr(physEffAddr)) { ++ fault = TheISA::genMachineCheckFault(); ++ } else { ++ fault = cpu->write(req, data, sqIdx); ++ } ++#else + fault = cpu->write(req, data, sqIdx); ++#endif + } + + if (res) { + // always return some result to keep misspeculated paths + // (which will ignore faults) deterministic + *res = (fault == NoFault) ? req->result : 0; + } + + return fault; +} + +#endif // __CPU_BASE_DYN_INST_HH__ diff --cc src/cpu/cpu_exec_context.cc index b8aa9a67e,000000000..7c2b32a93 mode 100644,000000..100644 --- a/src/cpu/cpu_exec_context.cc +++ b/src/cpu/cpu_exec_context.cc @@@ -1,324 -1,0 +1,340 @@@ +/* + * Copyright (c) 2001-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include "arch/isa_traits.hh" +#include "cpu/base.hh" +#include "cpu/cpu_exec_context.hh" +#include "cpu/exec_context.hh" + +#if FULL_SYSTEM +#include "base/callback.hh" +#include "base/cprintf.hh" +#include "base/output.hh" +#include "base/trace.hh" +#include "cpu/profile.hh" ++#include "cpu/quiesce_event.hh" +#include "kern/kernel_stats.hh" +#include "sim/serialize.hh" +#include "sim/sim_exit.hh" +#include "arch/stacktrace.hh" +#else +#include "sim/process.hh" +#include "sim/system.hh" +#include "mem/translating_port.hh" +#endif + +using namespace std; + +// constructor +#if FULL_SYSTEM +CPUExecContext::CPUExecContext(BaseCPU *_cpu, int _thread_num, System *_sys, - AlphaITB *_itb, AlphaDTB *_dtb) ++ AlphaITB *_itb, AlphaDTB *_dtb, ++ bool use_kernel_stats) + : _status(ExecContext::Unallocated), cpu(_cpu), thread_num(_thread_num), + cpu_id(-1), lastActivate(0), lastSuspend(0), system(_sys), itb(_itb), - dtb(_dtb), profile(NULL), quiesceEvent(this), func_exe_inst(0), - storeCondFailures(0) ++ dtb(_dtb), profile(NULL), func_exe_inst(0), storeCondFailures(0) + +{ + proxy = new ProxyExecContext(this); + ++ quiesceEvent = new EndQuiesceEvent(proxy); ++ + regs.clear(); + + if (cpu->params->profile) { + profile = new FunctionProfile(system->kernelSymtab); + Callback *cb = + new MakeCallback(this); + registerExitCallback(cb); + } + + // let's fill with a dummy node for now so we don't get a segfault + // on the first cycle when there's no node available. + static ProfileNode dummyNode; + profileNode = &dummyNode; + profilePC = 3; + ++ ++ if (use_kernel_stats) { ++ kernelStats = new Kernel::Statistics(system); ++ } else { ++ kernelStats = NULL; ++ } + Port *mem_port; + physPort = new FunctionalPort(csprintf("%s-%d-funcport", + cpu->name(), thread_num)); + mem_port = system->physmem->getPort("functional"); + mem_port->setPeer(physPort); + physPort->setPeer(mem_port); + + virtPort = new VirtualPort(csprintf("%s-%d-vport", + cpu->name(), thread_num)); + mem_port = system->physmem->getPort("functional"); + mem_port->setPeer(virtPort); + virtPort->setPeer(mem_port); +} +#else +CPUExecContext::CPUExecContext(BaseCPU *_cpu, int _thread_num, + Process *_process, int _asid, MemObject* memobj) + : _status(ExecContext::Unallocated), + cpu(_cpu), thread_num(_thread_num), cpu_id(-1), lastActivate(0), + lastSuspend(0), process(_process), asid(_asid), + func_exe_inst(0), storeCondFailures(0) +{ + /* Use this port to for syscall emulation writes to memory. */ + Port *mem_port; + port = new TranslatingPort(csprintf("%s-%d-funcport", + cpu->name(), thread_num), + process->pTable, false); + mem_port = memobj->getPort("functional"); + mem_port->setPeer(port); + port->setPeer(mem_port); + + regs.clear(); + proxy = new ProxyExecContext(this); +} + +CPUExecContext::CPUExecContext(RegFile *regFile) + : cpu(NULL), thread_num(-1), process(NULL), asid(-1), + func_exe_inst(0), storeCondFailures(0) +{ + regs = *regFile; + proxy = new ProxyExecContext(this); +} + +#endif + +CPUExecContext::~CPUExecContext() +{ + delete proxy; +} + +#if FULL_SYSTEM +void +CPUExecContext::dumpFuncProfile() +{ + std::ostream *os = simout.create(csprintf("profile.%s.dat", cpu->name())); + profile->dump(proxy, *os); +} + - CPUExecContext::EndQuiesceEvent::EndQuiesceEvent(CPUExecContext *_cpuXC) - : Event(&mainEventQueue), cpuXC(_cpuXC) - { - } - - void - CPUExecContext::EndQuiesceEvent::process() - { - cpuXC->activate(); - } - - const char* - CPUExecContext::EndQuiesceEvent::description() - { - return "End Quiesce Event."; - } - +void +CPUExecContext::profileClear() +{ + if (profile) + profile->clear(); +} + +void +CPUExecContext::profileSample() +{ + if (profile) + profile->sample(profileNode, profilePC); +} + +#endif + +void +CPUExecContext::takeOverFrom(ExecContext *oldContext) +{ + // some things should already be set up +#if FULL_SYSTEM + assert(system == oldContext->getSystemPtr()); +#else + assert(process == oldContext->getProcessPtr()); +#endif + + // copy over functional state + _status = oldContext->status(); + copyArchRegs(oldContext); + cpu_id = oldContext->readCpuId(); +#if !FULL_SYSTEM + func_exe_inst = oldContext->readFuncExeInst(); ++#else ++ EndQuiesceEvent *quiesce = oldContext->getQuiesceEvent(); ++ if (quiesce) { ++ // Point the quiesce event's XC at this XC so that it wakes up ++ // the proper CPU. ++ quiesce->xc = proxy; ++ } ++ if (quiesceEvent) { ++ quiesceEvent->xc = proxy; ++ } +#endif + + storeCondFailures = 0; + + oldContext->setStatus(ExecContext::Unallocated); +} + +void +CPUExecContext::serialize(ostream &os) +{ + SERIALIZE_ENUM(_status); + regs.serialize(os); + // thread_num and cpu_id are deterministic from the config + SERIALIZE_SCALAR(func_exe_inst); + SERIALIZE_SCALAR(inst); + +#if FULL_SYSTEM + Tick quiesceEndTick = 0; - if (quiesceEvent.scheduled()) - quiesceEndTick = quiesceEvent.when(); ++ if (quiesceEvent->scheduled()) ++ quiesceEndTick = quiesceEvent->when(); + SERIALIZE_SCALAR(quiesceEndTick); - ++ if (kernelStats) ++ kernelStats->serialize(os); +#endif +} + + +void +CPUExecContext::unserialize(Checkpoint *cp, const std::string §ion) +{ + UNSERIALIZE_ENUM(_status); + regs.unserialize(cp, section); + // thread_num and cpu_id are deterministic from the config + UNSERIALIZE_SCALAR(func_exe_inst); + UNSERIALIZE_SCALAR(inst); + +#if FULL_SYSTEM + Tick quiesceEndTick; + UNSERIALIZE_SCALAR(quiesceEndTick); + if (quiesceEndTick) - quiesceEvent.schedule(quiesceEndTick); ++ quiesceEvent->schedule(quiesceEndTick); ++ if (kernelStats) ++ kernelStats->unserialize(cp, section); +#endif +} + + +void +CPUExecContext::activate(int delay) +{ + if (status() == ExecContext::Active) + return; + + lastActivate = curTick; + ++ if (status() == ExecContext::Unallocated) { ++ cpu->activateWhenReady(thread_num); ++ return; ++ } ++ + _status = ExecContext::Active; ++ ++ // status() == Suspended + cpu->activateContext(thread_num, delay); +} + +void +CPUExecContext::suspend() +{ + if (status() == ExecContext::Suspended) + return; + + lastActivate = curTick; + lastSuspend = curTick; +/* +#if FULL_SYSTEM + // Don't change the status from active if there are pending interrupts + if (cpu->check_interrupts()) { + assert(status() == ExecContext::Active); + return; + } +#endif +*/ + _status = ExecContext::Suspended; + cpu->suspendContext(thread_num); +} + +void +CPUExecContext::deallocate() +{ + if (status() == ExecContext::Unallocated) + return; + + _status = ExecContext::Unallocated; + cpu->deallocateContext(thread_num); +} + +void +CPUExecContext::halt() +{ + if (status() == ExecContext::Halted) + return; + + _status = ExecContext::Halted; + cpu->haltContext(thread_num); +} + + +void +CPUExecContext::regStats(const string &name) +{ ++#if FULL_SYSTEM ++ if (kernelStats) ++ kernelStats->regStats(name + ".kern"); ++#endif +} + +void +CPUExecContext::copyArchRegs(ExecContext *xc) +{ + TheISA::copyRegs(xc, proxy); +} + +#if FULL_SYSTEM +VirtualPort* +CPUExecContext::getVirtPort(ExecContext *xc) +{ + if (!xc) + return virtPort; + + VirtualPort *vp; + Port *mem_port; + + vp = new VirtualPort("xc-vport", xc); + mem_port = system->physmem->getPort("functional"); + mem_port->setPeer(vp); + vp->setPeer(mem_port); + return vp; +} + +void +CPUExecContext::delVirtPort(VirtualPort *vp) +{ +// assert(!vp->nullExecContext()); + delete vp->getPeer(); + delete vp; +} + + +#endif + diff --cc src/cpu/cpu_exec_context.hh index 2c06a7b3b,000000000..61e6550af mode 100644,000000..100644 --- a/src/cpu/cpu_exec_context.hh +++ b/src/cpu/cpu_exec_context.hh @@@ -1,546 -1,0 +1,540 @@@ +/* + * Copyright (c) 2001-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_CPU_EXEC_CONTEXT_HH__ +#define __CPU_CPU_EXEC_CONTEXT_HH__ + +#include "arch/isa_traits.hh" +#include "config/full_system.hh" +#include "cpu/exec_context.hh" +#include "mem/physical.hh" +#include "mem/request.hh" +#include "sim/byteswap.hh" +#include "sim/eventq.hh" +#include "sim/host.hh" +#include "sim/serialize.hh" + +class BaseCPU; + +#if FULL_SYSTEM + +#include "sim/system.hh" +#include "arch/tlb.hh" + +class FunctionProfile; +class ProfileNode; +class FunctionalPort; +class PhysicalPort; + + ++namespace Kernel { ++ class Statistics; ++}; ++ +#else // !FULL_SYSTEM + +#include "sim/process.hh" +#include "mem/page_table.hh" +class TranslatingPort; + + +#endif // FULL_SYSTEM + +// +// The CPUExecContext object represents a functional context for +// instruction execution. It incorporates everything required for +// architecture-level functional simulation of a single thread. +// + +class CPUExecContext +{ + protected: + typedef TheISA::RegFile RegFile; + typedef TheISA::MachInst MachInst; + typedef TheISA::MiscRegFile MiscRegFile; + typedef TheISA::MiscReg MiscReg; + typedef TheISA::FloatReg FloatReg; + typedef TheISA::FloatRegBits FloatRegBits; + public: + typedef ExecContext::Status Status; + + private: + Status _status; + + public: + Status status() const { return _status; } + + void setStatus(Status newStatus) { _status = newStatus; } + + /// Set the status to Active. Optional delay indicates number of + /// cycles to wait before beginning execution. + void activate(int delay = 1); + + /// Set the status to Suspended. + void suspend(); + + /// Set the status to Unallocated. + void deallocate(); + + /// Set the status to Halted. + void halt(); + + protected: + RegFile regs; // correct-path register context + + public: + // pointer to CPU associated with this context + BaseCPU *cpu; + + ProxyExecContext *proxy; + + // Current instruction + MachInst inst; + + // Index of hardware thread context on the CPU that this represents. + int thread_num; + + // ID of this context w.r.t. the System or Process object to which + // it belongs. For full-system mode, this is the system CPU ID. + int cpu_id; + + Tick lastActivate; + Tick lastSuspend; + + System *system; + + +#if FULL_SYSTEM + AlphaITB *itb; + AlphaDTB *dtb; + + /** A functional port outgoing only for functional accesses to physical + * addresses.*/ + FunctionalPort *physPort; + + /** A functional port, outgoing only, for functional accesse to virtual + * addresses. That doen't require execution context information */ + VirtualPort *virtPort; + + FunctionProfile *profile; + ProfileNode *profileNode; + Addr profilePC; + void dumpFuncProfile(); + - /** Event for timing out quiesce instruction */ - struct EndQuiesceEvent : public Event - { - /** A pointer to the execution context that is quiesced */ - CPUExecContext *cpuXC; - - EndQuiesceEvent(CPUExecContext *_cpuXC); - - /** Event process to occur at interrupt*/ - virtual void process(); ++ EndQuiesceEvent *quiesceEvent; + - /** Event description */ - virtual const char *description(); - }; - EndQuiesceEvent quiesceEvent; - - Event *getQuiesceEvent() { return &quiesceEvent; } ++ EndQuiesceEvent *getQuiesceEvent() { return quiesceEvent; } + + Tick readLastActivate() { return lastActivate; } + + Tick readLastSuspend() { return lastSuspend; } + + void profileClear(); + + void profileSample(); + ++ Kernel::Statistics *getKernelStats() { return kernelStats; } ++ ++ Kernel::Statistics *kernelStats; +#else + /// Port that syscalls can use to access memory (provides translation step). + TranslatingPort *port; + + Process *process; + + // Address space ID. Note that this is used for TIMING cache + // simulation only; all functional memory accesses should use + // one of the FunctionalMemory pointers above. + short asid; + +#endif + + /** + * Temporary storage to pass the source address from copy_load to + * copy_store. + * @todo Remove this temporary when we have a better way to do it. + */ + Addr copySrcAddr; + /** + * Temp storage for the physical source address of a copy. + * @todo Remove this temporary when we have a better way to do it. + */ + Addr copySrcPhysAddr; + + + /* + * number of executed instructions, for matching with syscall trace + * points in EIO files. + */ + Counter func_exe_inst; + + // + // Count failed store conditionals so we can warn of apparent + // application deadlock situations. + unsigned storeCondFailures; + + // constructor: initialize context from given process structure +#if FULL_SYSTEM + CPUExecContext(BaseCPU *_cpu, int _thread_num, System *_system, - AlphaITB *_itb, AlphaDTB *_dtb); ++ AlphaITB *_itb, AlphaDTB *_dtb, ++ bool use_kernel_stats = true); +#else + CPUExecContext(BaseCPU *_cpu, int _thread_num, Process *_process, int _asid, + MemObject *memobj); + // Constructor to use XC to pass reg file around. Not used for anything + // else. + CPUExecContext(RegFile *regFile); +#endif + virtual ~CPUExecContext(); + + virtual void takeOverFrom(ExecContext *oldContext); + + void regStats(const std::string &name); + + void serialize(std::ostream &os); + void unserialize(Checkpoint *cp, const std::string §ion); + + BaseCPU *getCpuPtr() { return cpu; } + + ExecContext *getProxy() { return proxy; } + + int getThreadNum() { return thread_num; } + +#if FULL_SYSTEM + System *getSystemPtr() { return system; } + + AlphaITB *getITBPtr() { return itb; } + + AlphaDTB *getDTBPtr() { return dtb; } + + int getInstAsid() { return regs.instAsid(); } + int getDataAsid() { return regs.dataAsid(); } + + Fault translateInstReq(RequestPtr &req) + { + return itb->translate(req, proxy); + } + + Fault translateDataReadReq(RequestPtr &req) + { + return dtb->translate(req, proxy, false); + } + + Fault translateDataWriteReq(RequestPtr &req) + { + return dtb->translate(req, proxy, true); + } + + FunctionalPort *getPhysPort() { return physPort; } + + /** Return a virtual port. If no exec context is specified then a static + * port is returned. Otherwise a port is created and returned. It must be + * deleted by deleteVirtPort(). */ + VirtualPort *getVirtPort(ExecContext *xc); + + void delVirtPort(VirtualPort *vp); + +#else + TranslatingPort *getMemPort() { return port; } + + Process *getProcessPtr() { return process; } + + int getInstAsid() { return asid; } + int getDataAsid() { return asid; } + + Fault translateInstReq(RequestPtr &req) + { + return process->pTable->translate(req); + } + + Fault translateDataReadReq(RequestPtr &req) + { + return process->pTable->translate(req); + } + + Fault translateDataWriteReq(RequestPtr &req) + { + return process->pTable->translate(req); + } + +#endif + +/* + template + Fault read(RequestPtr &req, T &data) + { +#if FULL_SYSTEM && THE_ISA == ALPHA_ISA + if (req->flags & LOCKED) { + req->xc->setMiscReg(TheISA::Lock_Addr_DepTag, req->paddr); + req->xc->setMiscReg(TheISA::Lock_Flag_DepTag, true); + } +#endif + + Fault error; + error = mem->prot_read(req->paddr, data, req->size); + data = LittleEndianGuest::gtoh(data); + return error; + } + + template + Fault write(RequestPtr &req, T &data) + { +#if FULL_SYSTEM && THE_ISA == ALPHA_ISA + ExecContext *xc; + + // If this is a store conditional, act appropriately + if (req->flags & LOCKED) { + xc = req->xc; + + if (req->flags & UNCACHEABLE) { + // Don't update result register (see stq_c in isa_desc) + req->result = 2; + xc->setStCondFailures(0);//Needed? [RGD] + } else { + bool lock_flag = xc->readMiscReg(TheISA::Lock_Flag_DepTag); + Addr lock_addr = xc->readMiscReg(TheISA::Lock_Addr_DepTag); + req->result = lock_flag; + if (!lock_flag || + ((lock_addr & ~0xf) != (req->paddr & ~0xf))) { + xc->setMiscReg(TheISA::Lock_Flag_DepTag, false); + xc->setStCondFailures(xc->readStCondFailures() + 1); + if (((xc->readStCondFailures()) % 100000) == 0) { + std::cerr << "Warning: " + << xc->readStCondFailures() + << " consecutive store conditional failures " + << "on cpu " << req->xc->readCpuId() + << std::endl; + } + return NoFault; + } + else xc->setStCondFailures(0); + } + } + + // Need to clear any locked flags on other proccessors for + // this address. Only do this for succsful Store Conditionals + // and all other stores (WH64?). Unsuccessful Store + // Conditionals would have returned above, and wouldn't fall + // through. + for (int i = 0; i < system->execContexts.size(); i++){ + xc = system->execContexts[i]; + if ((xc->readMiscReg(TheISA::Lock_Addr_DepTag) & ~0xf) == + (req->paddr & ~0xf)) { + xc->setMiscReg(TheISA::Lock_Flag_DepTag, false); + } + } + +#endif + return mem->prot_write(req->paddr, (T)htog(data), req->size); + } +*/ + virtual bool misspeculating(); + + + MachInst getInst() { return inst; } + + void setInst(MachInst new_inst) + { + inst = new_inst; + } + + Fault instRead(RequestPtr &req) + { + panic("instRead not implemented"); + // return funcPhysMem->read(req, inst); + return NoFault; + } + + void setCpuId(int id) { cpu_id = id; } + + int readCpuId() { return cpu_id; } + + void copyArchRegs(ExecContext *xc); + + // + // New accessors for new decoder. + // + uint64_t readIntReg(int reg_idx) + { + return regs.readIntReg(reg_idx); + } + + FloatReg readFloatReg(int reg_idx, int width) + { + return regs.readFloatReg(reg_idx, width); + } + + FloatReg readFloatReg(int reg_idx) + { + return regs.readFloatReg(reg_idx); + } + + FloatRegBits readFloatRegBits(int reg_idx, int width) + { + return regs.readFloatRegBits(reg_idx, width); + } + + FloatRegBits readFloatRegBits(int reg_idx) + { + return regs.readFloatRegBits(reg_idx); + } + + void setIntReg(int reg_idx, uint64_t val) + { + regs.setIntReg(reg_idx, val); + } + + void setFloatReg(int reg_idx, FloatReg val, int width) + { + regs.setFloatReg(reg_idx, val, width); + } + + void setFloatReg(int reg_idx, FloatReg val) + { + regs.setFloatReg(reg_idx, val); + } + + void setFloatRegBits(int reg_idx, FloatRegBits val, int width) + { + regs.setFloatRegBits(reg_idx, val, width); + } + + void setFloatRegBits(int reg_idx, FloatRegBits val) + { + regs.setFloatRegBits(reg_idx, val); + } + + uint64_t readPC() + { + return regs.readPC(); + } + + void setPC(uint64_t val) + { + regs.setPC(val); + } + + uint64_t readNextPC() + { + return regs.readNextPC(); + } + + void setNextPC(uint64_t val) + { + regs.setNextPC(val); + } + + uint64_t readNextNPC() + { + return regs.readNextNPC(); + } + + void setNextNPC(uint64_t val) + { + regs.setNextNPC(val); + } + + + MiscReg readMiscReg(int misc_reg) + { + return regs.readMiscReg(misc_reg); + } + + MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault) + { + return regs.readMiscRegWithEffect(misc_reg, fault, proxy); + } + + Fault setMiscReg(int misc_reg, const MiscReg &val) + { + return regs.setMiscReg(misc_reg, val); + } + + Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val) + { + return regs.setMiscRegWithEffect(misc_reg, val, proxy); + } + + unsigned readStCondFailures() { return storeCondFailures; } + + void setStCondFailures(unsigned sc_failures) + { storeCondFailures = sc_failures; } + + void clearArchRegs() { regs.clear(); } + +#if FULL_SYSTEM + int readIntrFlag() { return regs.intrflag; } + void setIntrFlag(int val) { regs.intrflag = val; } + Fault hwrei(); + bool inPalMode() { return AlphaISA::PcPAL(regs.readPC()); } + bool simPalCheck(int palFunc); +#endif + +#if !FULL_SYSTEM + TheISA::IntReg getSyscallArg(int i) + { + return regs.readIntReg(TheISA::ArgumentReg0 + i); + } + + // used to shift args for indirect syscall + void setSyscallArg(int i, TheISA::IntReg val) + { + regs.setIntReg(TheISA::ArgumentReg0 + i, val); + } + + void setSyscallReturn(SyscallReturn return_value) + { + TheISA::setSyscallReturn(return_value, ®s); + } + + void syscall(int64_t callnum) + { + process->syscall(callnum, proxy); + } + + Counter readFuncExeInst() { return func_exe_inst; } + + void setFuncExeInst(Counter new_val) { func_exe_inst = new_val; } +#endif + + void changeRegFileContext(RegFile::ContextParam param, + RegFile::ContextVal val) + { + regs.changeContext(param, val); + } +}; + + +// for non-speculative execution context, spec_mode is always false +inline bool +CPUExecContext::misspeculating() +{ + return false; +} + +#endif // __CPU_CPU_EXEC_CONTEXT_HH__ diff --cc src/cpu/cpu_models.py index 8d0a15f61,000000000..c3de03948 mode 100644,000000..100644 --- a/src/cpu/cpu_models.py +++ b/src/cpu/cpu_models.py @@@ -1,71 -1,0 +1,80 @@@ +# Copyright (c) 2003-2006 The Regents of The University of Michigan +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +################ +# CpuModel class +# +# The CpuModel class encapsulates everything the ISA parser needs to +# know about a particular CPU model. + +class CpuModel: + # Dict of available CPU model objects. Accessible as CpuModel.dict. + dict = {} + + # Constructor. Automatically adds models to CpuModel.dict. + def __init__(self, name, filename, includes, strings): + self.name = name + self.filename = filename # filename for output exec code + self.includes = includes # include files needed in exec file + # The 'strings' dict holds all the per-CPU symbols we can + # substitute into templates etc. + self.strings = strings + # Add self to dict + CpuModel.dict[name] = self + + +# +# Define CPU models. +# +# Parameters are: +# - name of model +# - filename for generated ISA execution file +# - includes needed for generated ISA execution file +# - substitution strings for ISA description templates +# + +CpuModel('AtomicSimpleCPU', 'atomic_simple_cpu_exec.cc', + '#include "cpu/simple/atomic.hh"', + { 'CPU_exec_context': 'AtomicSimpleCPU' }) +CpuModel('TimingSimpleCPU', 'timing_simple_cpu_exec.cc', + '#include "cpu/simple/timing.hh"', + { 'CPU_exec_context': 'TimingSimpleCPU' }) +CpuModel('FullCPU', 'full_cpu_exec.cc', + '#include "encumbered/cpu/full/dyn_inst.hh"', + { 'CPU_exec_context': 'DynInst' }) +CpuModel('AlphaFullCPU', 'alpha_o3_exec.cc', + '#include "cpu/o3/alpha_dyn_inst.hh"', + { 'CPU_exec_context': 'AlphaDynInst' }) ++CpuModel('OzoneSimpleCPU', 'ozone_simple_exec.cc', ++ '#include "cpu/ozone/dyn_inst.hh"', ++ { 'CPU_exec_context': 'OzoneDynInst' }) ++CpuModel('OzoneCPU', 'ozone_exec.cc', ++ '#include "cpu/ozone/dyn_inst.hh"', ++ { 'CPU_exec_context': 'OzoneDynInst' }) ++CpuModel('CheckerCPU', 'checker_cpu_exec.cc', ++ '#include "cpu/checker/cpu.hh"', ++ { 'CPU_exec_context': 'CheckerCPU' }) + diff --cc src/cpu/exec_context.hh index 1f26183ab,000000000..5b601bb30 mode 100644,000000..100644 --- a/src/cpu/exec_context.hh +++ b/src/cpu/exec_context.hh @@@ -1,450 -1,0 +1,417 @@@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_EXEC_CONTEXT_HH__ +#define __CPU_EXEC_CONTEXT_HH__ + +#include "config/full_system.hh" +#include "mem/request.hh" +#include "sim/faults.hh" +#include "sim/host.hh" +#include "sim/serialize.hh" +#include "sim/byteswap.hh" + +// @todo: Figure out a more architecture independent way to obtain the ITB and +// DTB pointers. +class AlphaDTB; +class AlphaITB; +class BaseCPU; ++class EndQuiesceEvent; +class Event; +class TranslatingPort; +class FunctionalPort; +class VirtualPort; +class Process; +class System; ++namespace Kernel { ++ class Statistics; ++}; + +class ExecContext +{ + protected: + typedef TheISA::RegFile RegFile; + typedef TheISA::MachInst MachInst; + typedef TheISA::IntReg IntReg; + typedef TheISA::FloatReg FloatReg; + typedef TheISA::FloatRegBits FloatRegBits; + typedef TheISA::MiscRegFile MiscRegFile; + typedef TheISA::MiscReg MiscReg; + public: + enum Status + { + /// Initialized but not running yet. All CPUs start in + /// this state, but most transition to Active on cycle 1. + /// In MP or SMT systems, non-primary contexts will stay + /// in this state until a thread is assigned to them. + Unallocated, + + /// Running. Instructions should be executed only when + /// the context is in this state. + Active, + + /// Temporarily inactive. Entered while waiting for + /// synchronization, etc. + Suspended, + + /// Permanently shut down. Entered when target executes + /// m5exit pseudo-instruction. When all contexts enter + /// this state, the simulation will terminate. + Halted + }; + + virtual ~ExecContext() { }; + + virtual BaseCPU *getCpuPtr() = 0; + + virtual void setCpuId(int id) = 0; + + virtual int readCpuId() = 0; + +#if FULL_SYSTEM + virtual System *getSystemPtr() = 0; + + virtual AlphaITB *getITBPtr() = 0; + + virtual AlphaDTB * getDTBPtr() = 0; + ++ virtual Kernel::Statistics *getKernelStats() = 0; ++ + virtual FunctionalPort *getPhysPort() = 0; + + virtual VirtualPort *getVirtPort(ExecContext *xc = NULL) = 0; + + virtual void delVirtPort(VirtualPort *vp) = 0; +#else + virtual TranslatingPort *getMemPort() = 0; + + virtual Process *getProcessPtr() = 0; +#endif + + virtual Status status() const = 0; + + virtual void setStatus(Status new_status) = 0; + + /// Set the status to Active. Optional delay indicates number of + /// cycles to wait before beginning execution. + virtual void activate(int delay = 1) = 0; + + /// Set the status to Suspended. + virtual void suspend() = 0; + + /// Set the status to Unallocated. + virtual void deallocate() = 0; + + /// Set the status to Halted. + virtual void halt() = 0; + +#if FULL_SYSTEM + virtual void dumpFuncProfile() = 0; +#endif + + virtual void takeOverFrom(ExecContext *old_context) = 0; + + virtual void regStats(const std::string &name) = 0; + + virtual void serialize(std::ostream &os) = 0; + virtual void unserialize(Checkpoint *cp, const std::string §ion) = 0; + +#if FULL_SYSTEM - virtual Event *getQuiesceEvent() = 0; ++ virtual EndQuiesceEvent *getQuiesceEvent() = 0; + + // Not necessarily the best location for these... + // Having an extra function just to read these is obnoxious + virtual Tick readLastActivate() = 0; + virtual Tick readLastSuspend() = 0; + + virtual void profileClear() = 0; + virtual void profileSample() = 0; +#endif + + virtual int getThreadNum() = 0; + - virtual int getInstAsid() = 0; - virtual int getDataAsid() = 0; - - virtual Fault translateInstReq(RequestPtr &req) = 0; - - virtual Fault translateDataReadReq(RequestPtr &req) = 0; - - virtual Fault translateDataWriteReq(RequestPtr &req) = 0; - + // Also somewhat obnoxious. Really only used for the TLB fault. + // However, may be quite useful in SPARC. + virtual TheISA::MachInst getInst() = 0; + + virtual void copyArchRegs(ExecContext *xc) = 0; + + virtual void clearArchRegs() = 0; + + // + // New accessors for new decoder. + // + virtual uint64_t readIntReg(int reg_idx) = 0; + + virtual FloatReg readFloatReg(int reg_idx, int width) = 0; + + virtual FloatReg readFloatReg(int reg_idx) = 0; + + virtual FloatRegBits readFloatRegBits(int reg_idx, int width) = 0; + + virtual FloatRegBits readFloatRegBits(int reg_idx) = 0; + + virtual void setIntReg(int reg_idx, uint64_t val) = 0; + + virtual void setFloatReg(int reg_idx, FloatReg val, int width) = 0; + + virtual void setFloatReg(int reg_idx, FloatReg val) = 0; + + virtual void setFloatRegBits(int reg_idx, FloatRegBits val) = 0; + + virtual void setFloatRegBits(int reg_idx, FloatRegBits val, int width) = 0; + + virtual uint64_t readPC() = 0; + + virtual void setPC(uint64_t val) = 0; + + virtual uint64_t readNextPC() = 0; + + virtual void setNextPC(uint64_t val) = 0; + + virtual uint64_t readNextNPC() = 0; + + virtual void setNextNPC(uint64_t val) = 0; + + virtual MiscReg readMiscReg(int misc_reg) = 0; + + virtual MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault) = 0; + + virtual Fault setMiscReg(int misc_reg, const MiscReg &val) = 0; + + virtual Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val) = 0; + + // Also not necessarily the best location for these two. Hopefully will go + // away once we decide upon where st cond failures goes. + virtual unsigned readStCondFailures() = 0; + + virtual void setStCondFailures(unsigned sc_failures) = 0; + +#if FULL_SYSTEM - virtual int readIntrFlag() = 0; - virtual void setIntrFlag(int val) = 0; - virtual Fault hwrei() = 0; + virtual bool inPalMode() = 0; - virtual bool simPalCheck(int palFunc) = 0; +#endif + + // Only really makes sense for old CPU model. Still could be useful though. + virtual bool misspeculating() = 0; + +#if !FULL_SYSTEM + virtual IntReg getSyscallArg(int i) = 0; + + // used to shift args for indirect syscall + virtual void setSyscallArg(int i, IntReg val) = 0; + + virtual void setSyscallReturn(SyscallReturn return_value) = 0; + - virtual void syscall(int64_t callnum) = 0; + + // Same with st cond failures. + virtual Counter readFuncExeInst() = 0; - - virtual void setFuncExeInst(Counter new_val) = 0; +#endif + + virtual void changeRegFileContext(RegFile::ContextParam param, + RegFile::ContextVal val) = 0; +}; + +template +class ProxyExecContext : public ExecContext +{ + public: + ProxyExecContext(XC *actual_xc) + { actualXC = actual_xc; } + + private: + XC *actualXC; + + public: + + BaseCPU *getCpuPtr() { return actualXC->getCpuPtr(); } + + void setCpuId(int id) { actualXC->setCpuId(id); } + + int readCpuId() { return actualXC->readCpuId(); } + +#if FULL_SYSTEM + System *getSystemPtr() { return actualXC->getSystemPtr(); } + + AlphaITB *getITBPtr() { return actualXC->getITBPtr(); } + + AlphaDTB *getDTBPtr() { return actualXC->getDTBPtr(); } + ++ Kernel::Statistics *getKernelStats() { return actualXC->getKernelStats(); } ++ + FunctionalPort *getPhysPort() { return actualXC->getPhysPort(); } + + VirtualPort *getVirtPort(ExecContext *xc = NULL) { return actualXC->getVirtPort(xc); } + + void delVirtPort(VirtualPort *vp) { return actualXC->delVirtPort(vp); } +#else + TranslatingPort *getMemPort() { return actualXC->getMemPort(); } + + Process *getProcessPtr() { return actualXC->getProcessPtr(); } +#endif + + Status status() const { return actualXC->status(); } + + void setStatus(Status new_status) { actualXC->setStatus(new_status); } + + /// Set the status to Active. Optional delay indicates number of + /// cycles to wait before beginning execution. + void activate(int delay = 1) { actualXC->activate(delay); } + + /// Set the status to Suspended. + void suspend() { actualXC->suspend(); } + + /// Set the status to Unallocated. + void deallocate() { actualXC->deallocate(); } + + /// Set the status to Halted. + void halt() { actualXC->halt(); } + +#if FULL_SYSTEM + void dumpFuncProfile() { actualXC->dumpFuncProfile(); } +#endif + + void takeOverFrom(ExecContext *oldContext) + { actualXC->takeOverFrom(oldContext); } + + void regStats(const std::string &name) { actualXC->regStats(name); } + + void serialize(std::ostream &os) { actualXC->serialize(os); } + void unserialize(Checkpoint *cp, const std::string §ion) + { actualXC->unserialize(cp, section); } + +#if FULL_SYSTEM - Event *getQuiesceEvent() { return actualXC->getQuiesceEvent(); } ++ EndQuiesceEvent *getQuiesceEvent() { return actualXC->getQuiesceEvent(); } + + Tick readLastActivate() { return actualXC->readLastActivate(); } + Tick readLastSuspend() { return actualXC->readLastSuspend(); } + + void profileClear() { return actualXC->profileClear(); } + void profileSample() { return actualXC->profileSample(); } +#endif + + int getThreadNum() { return actualXC->getThreadNum(); } + - int getInstAsid() { return actualXC->getInstAsid(); } - int getDataAsid() { return actualXC->getDataAsid(); } - - Fault translateInstReq(RequestPtr &req) - { return actualXC->translateInstReq(req); } - - Fault translateDataReadReq(RequestPtr &req) - { return actualXC->translateDataReadReq(req); } - - Fault translateDataWriteReq(RequestPtr &req) - { return actualXC->translateDataWriteReq(req); } - + // @todo: Do I need this? + MachInst getInst() { return actualXC->getInst(); } + + // @todo: Do I need this? + void copyArchRegs(ExecContext *xc) { actualXC->copyArchRegs(xc); } + + void clearArchRegs() { actualXC->clearArchRegs(); } + + // + // New accessors for new decoder. + // + uint64_t readIntReg(int reg_idx) + { return actualXC->readIntReg(reg_idx); } + + FloatReg readFloatReg(int reg_idx, int width) + { return actualXC->readFloatReg(reg_idx, width); } + + FloatReg readFloatReg(int reg_idx) + { return actualXC->readFloatReg(reg_idx); } + + FloatRegBits readFloatRegBits(int reg_idx, int width) + { return actualXC->readFloatRegBits(reg_idx, width); } + + FloatRegBits readFloatRegBits(int reg_idx) + { return actualXC->readFloatRegBits(reg_idx); } + + void setIntReg(int reg_idx, uint64_t val) + { actualXC->setIntReg(reg_idx, val); } + + void setFloatReg(int reg_idx, FloatReg val, int width) + { actualXC->setFloatReg(reg_idx, val, width); } + + void setFloatReg(int reg_idx, FloatReg val) + { actualXC->setFloatReg(reg_idx, val); } + + void setFloatRegBits(int reg_idx, FloatRegBits val, int width) + { actualXC->setFloatRegBits(reg_idx, val, width); } + + void setFloatRegBits(int reg_idx, FloatRegBits val) + { actualXC->setFloatRegBits(reg_idx, val); } + + uint64_t readPC() { return actualXC->readPC(); } + + void setPC(uint64_t val) { actualXC->setPC(val); } + + uint64_t readNextPC() { return actualXC->readNextPC(); } + + void setNextPC(uint64_t val) { actualXC->setNextPC(val); } + + uint64_t readNextNPC() { return actualXC->readNextNPC(); } + + void setNextNPC(uint64_t val) { actualXC->setNextNPC(val); } + + MiscReg readMiscReg(int misc_reg) + { return actualXC->readMiscReg(misc_reg); } + + MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault) + { return actualXC->readMiscRegWithEffect(misc_reg, fault); } + + Fault setMiscReg(int misc_reg, const MiscReg &val) + { return actualXC->setMiscReg(misc_reg, val); } + + Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val) + { return actualXC->setMiscRegWithEffect(misc_reg, val); } + + unsigned readStCondFailures() + { return actualXC->readStCondFailures(); } + + void setStCondFailures(unsigned sc_failures) + { actualXC->setStCondFailures(sc_failures); } - +#if FULL_SYSTEM - int readIntrFlag() { return actualXC->readIntrFlag(); } - - void setIntrFlag(int val) { actualXC->setIntrFlag(val); } - - Fault hwrei() { return actualXC->hwrei(); } - + bool inPalMode() { return actualXC->inPalMode(); } - - bool simPalCheck(int palFunc) { return actualXC->simPalCheck(palFunc); } +#endif + + // @todo: Fix this! + bool misspeculating() { return actualXC->misspeculating(); } + +#if !FULL_SYSTEM + IntReg getSyscallArg(int i) { return actualXC->getSyscallArg(i); } + + // used to shift args for indirect syscall + void setSyscallArg(int i, IntReg val) + { actualXC->setSyscallArg(i, val); } + + void setSyscallReturn(SyscallReturn return_value) + { actualXC->setSyscallReturn(return_value); } + - void syscall(int64_t callnum) { actualXC->syscall(callnum); } + + Counter readFuncExeInst() { return actualXC->readFuncExeInst(); } - - void setFuncExeInst(Counter new_val) - { return actualXC->setFuncExeInst(new_val); } +#endif + + void changeRegFileContext(RegFile::ContextParam param, + RegFile::ContextVal val) + { + actualXC->changeRegFileContext(param, val); + } +}; + +#endif diff --cc src/cpu/exetrace.cc index 0ed3b43c4,000000000..5ec05ea72 mode 100644,000000..100644 --- a/src/cpu/exetrace.cc +++ b/src/cpu/exetrace.cc @@@ -1,226 -1,0 +1,230 @@@ +/* + * Copyright (c) 2001-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +#include "base/loader/symtab.hh" +#include "cpu/base.hh" +#include "cpu/exetrace.hh" +#include "cpu/static_inst.hh" +#include "sim/param.hh" +#include "sim/system.hh" + +using namespace std; + + +//////////////////////////////////////////////////////////////////////// +// +// Methods for the InstRecord object +// + + +void +Trace::InstRecord::dump(ostream &outs) +{ + if (flags[INTEL_FORMAT]) { +#if FULL_SYSTEM + bool is_trace_system = (cpu->system->name() == trace_system); +#else + bool is_trace_system = true; +#endif + if (is_trace_system) { + ccprintf(outs, "%7d ) ", cycle); + outs << "0x" << hex << PC << ":\t"; + if (staticInst->isLoad()) { + outs << ""; + } else if (staticInst->isStore()) { + outs << ""; + } + outs << endl; + } + } else { + if (flags[PRINT_CYCLE]) + ccprintf(outs, "%7d: ", cycle); + + outs << cpu->name() << " "; + + if (flags[TRACE_MISSPEC]) + outs << (misspeculating ? "-" : "+") << " "; + + if (flags[PRINT_THREAD_NUM]) + outs << "T" << thread << " : "; + + + std::string sym_str; + Addr sym_addr; + if (debugSymbolTable - && debugSymbolTable->findNearestSymbol(PC, sym_str, sym_addr)) { ++ && debugSymbolTable->findNearestSymbol(PC, sym_str, sym_addr) ++ && flags[PC_SYMBOL]) { + if (PC != sym_addr) + sym_str += csprintf("+%d", PC - sym_addr); + outs << "@" << sym_str << " : "; + } + else { + outs << "0x" << hex << PC << " : "; + } + + // + // Print decoded instruction + // + +#if defined(__GNUC__) && (__GNUC__ < 3) + // There's a bug in gcc 2.x library that prevents setw() + // from working properly on strings + string mc(staticInst->disassemble(PC, debugSymbolTable)); + while (mc.length() < 26) + mc += " "; + outs << mc; +#else + outs << setw(26) << left << staticInst->disassemble(PC, debugSymbolTable); +#endif + + outs << " : "; + + if (flags[PRINT_OP_CLASS]) { + outs << opClassStrings[staticInst->opClass()] << " : "; + } + + if (flags[PRINT_RESULT_DATA] && data_status != DataInvalid) { + outs << " D="; +#if 0 + if (data_status == DataDouble) + ccprintf(outs, "%f", data.as_double); + else + ccprintf(outs, "%#018x", data.as_int); +#else + ccprintf(outs, "%#018x", data.as_int); +#endif + } + + if (flags[PRINT_EFF_ADDR] && addr_valid) + outs << " A=0x" << hex << addr; + + if (flags[PRINT_INT_REGS] && regs_valid) { + for (int i = 0; i < TheISA::NumIntRegs;) + for (int j = i + 1; i <= j; i++) + ccprintf(outs, "r%02d = %#018x%s", i, + iregs->regs.readReg(i), + ((i == j) ? "\n" : " ")); + outs << "\n"; + } + + if (flags[PRINT_FETCH_SEQ] && fetch_seq_valid) + outs << " FetchSeq=" << dec << fetch_seq; + + if (flags[PRINT_CP_SEQ] && cp_seq_valid) + outs << " CPSeq=" << dec << cp_seq; + + // + // End of line... + // + outs << endl; + } +} + + +vector Trace::InstRecord::flags(NUM_BITS); +string Trace::InstRecord::trace_system; + +//////////////////////////////////////////////////////////////////////// +// +// Parameter space for per-cycle execution address tracing options. +// Derive from ParamContext so we can override checkParams() function. +// +class ExecutionTraceParamContext : public ParamContext +{ + public: + ExecutionTraceParamContext(const string &_iniSection) + : ParamContext(_iniSection) + { + } + + void checkParams(); // defined at bottom of file +}; + +ExecutionTraceParamContext exeTraceParams("exetrace"); + +Param exe_trace_spec(&exeTraceParams, "speculative", + "capture speculative instructions", true); + +Param exe_trace_print_cycle(&exeTraceParams, "print_cycle", + "print cycle number", true); +Param exe_trace_print_opclass(&exeTraceParams, "print_opclass", + "print op class", true); +Param exe_trace_print_thread(&exeTraceParams, "print_thread", + "print thread number", true); +Param exe_trace_print_effaddr(&exeTraceParams, "print_effaddr", + "print effective address", true); +Param exe_trace_print_data(&exeTraceParams, "print_data", + "print result data", true); +Param exe_trace_print_iregs(&exeTraceParams, "print_iregs", + "print all integer regs", false); +Param exe_trace_print_fetchseq(&exeTraceParams, "print_fetchseq", + "print fetch sequence number", false); +Param exe_trace_print_cp_seq(&exeTraceParams, "print_cpseq", + "print correct-path sequence number", false); ++Param exe_trace_pc_symbol(&exeTraceParams, "pc_symbol", ++ "Use symbols for the PC if available", true); +Param exe_trace_intel_format(&exeTraceParams, "intel_format", + "print trace in intel compatible format", false); +Param exe_trace_system(&exeTraceParams, "trace_system", + "print trace of which system (client or server)", + "client"); + + +// +// Helper function for ExecutionTraceParamContext::checkParams() just +// to get us into the InstRecord namespace +// +void +Trace::InstRecord::setParams() +{ + flags[TRACE_MISSPEC] = exe_trace_spec; + + flags[PRINT_CYCLE] = exe_trace_print_cycle; + flags[PRINT_OP_CLASS] = exe_trace_print_opclass; + flags[PRINT_THREAD_NUM] = exe_trace_print_thread; + flags[PRINT_RESULT_DATA] = exe_trace_print_effaddr; + flags[PRINT_EFF_ADDR] = exe_trace_print_data; + flags[PRINT_INT_REGS] = exe_trace_print_iregs; + flags[PRINT_FETCH_SEQ] = exe_trace_print_fetchseq; + flags[PRINT_CP_SEQ] = exe_trace_print_cp_seq; ++ flags[PC_SYMBOL] = exe_trace_pc_symbol; + flags[INTEL_FORMAT] = exe_trace_intel_format; + trace_system = exe_trace_system; +} + +void +ExecutionTraceParamContext::checkParams() +{ + Trace::InstRecord::setParams(); +} + diff --cc src/cpu/exetrace.hh index a26cdc517,000000000..7b86a9344 mode 100644,000000..100644 --- a/src/cpu/exetrace.hh +++ b/src/cpu/exetrace.hh @@@ -1,188 -1,0 +1,189 @@@ +/* + * Copyright (c) 2001-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __EXETRACE_HH__ +#define __EXETRACE_HH__ + +#include +#include + +#include "sim/host.hh" +#include "cpu/inst_seq.hh" // for InstSeqNum +#include "base/trace.hh" +#include "cpu/exec_context.hh" +#include "cpu/static_inst.hh" + +class BaseCPU; + + +namespace Trace { + +class InstRecord : public Record +{ + protected: + typedef TheISA::IntRegFile IntRegFile; + + // The following fields are initialized by the constructor and + // thus guaranteed to be valid. + BaseCPU *cpu; + // need to make this ref-counted so it doesn't go away before we + // dump the record + StaticInstPtr staticInst; + Addr PC; + bool misspeculating; + unsigned thread; + + // The remaining fields are only valid for particular instruction + // types (e.g, addresses for memory ops) or when particular + // options are enabled (e.g., tracing full register contents). + // Each data field has an associated valid flag to indicate + // whether the data field is valid. + Addr addr; + bool addr_valid; + + union { + uint64_t as_int; + double as_double; + } data; + enum { + DataInvalid = 0, + DataInt8 = 1, // set to equal number of bytes + DataInt16 = 2, + DataInt32 = 4, + DataInt64 = 8, + DataDouble = 3 + } data_status; + + InstSeqNum fetch_seq; + bool fetch_seq_valid; + + InstSeqNum cp_seq; + bool cp_seq_valid; + + struct iRegFile { + IntRegFile regs; + }; + iRegFile *iregs; + bool regs_valid; + + public: + InstRecord(Tick _cycle, BaseCPU *_cpu, + const StaticInstPtr &_staticInst, + Addr _pc, bool spec, int _thread) + : Record(_cycle), cpu(_cpu), staticInst(_staticInst), PC(_pc), + misspeculating(spec), thread(_thread) + { + data_status = DataInvalid; + addr_valid = false; + regs_valid = false; + + fetch_seq_valid = false; + cp_seq_valid = false; + } + + virtual ~InstRecord() { } + + virtual void dump(std::ostream &outs); + + void setAddr(Addr a) { addr = a; addr_valid = true; } + + void setData(uint64_t d) { data.as_int = d; data_status = DataInt64; } + void setData(uint32_t d) { data.as_int = d; data_status = DataInt32; } + void setData(uint16_t d) { data.as_int = d; data_status = DataInt16; } + void setData(uint8_t d) { data.as_int = d; data_status = DataInt8; } + + void setData(int64_t d) { setData((uint64_t)d); } + void setData(int32_t d) { setData((uint32_t)d); } + void setData(int16_t d) { setData((uint16_t)d); } + void setData(int8_t d) { setData((uint8_t)d); } + + void setData(double d) { data.as_double = d; data_status = DataDouble; } + + void setFetchSeq(InstSeqNum seq) + { fetch_seq = seq; fetch_seq_valid = true; } + + void setCPSeq(InstSeqNum seq) + { cp_seq = seq; cp_seq_valid = true; } + + void setRegs(const IntRegFile ®s); + + void finalize() { theLog.append(this); } + + enum InstExecFlagBits { + TRACE_MISSPEC = 0, + PRINT_CYCLE, + PRINT_OP_CLASS, + PRINT_THREAD_NUM, + PRINT_RESULT_DATA, + PRINT_EFF_ADDR, + PRINT_INT_REGS, + PRINT_FETCH_SEQ, + PRINT_CP_SEQ, ++ PC_SYMBOL, + INTEL_FORMAT, + NUM_BITS + }; + + static std::vector flags; + static std::string trace_system; + + static void setParams(); + + static bool traceMisspec() { return flags[TRACE_MISSPEC]; } +}; + + +inline void +InstRecord::setRegs(const IntRegFile ®s) +{ + if (!iregs) + iregs = new iRegFile; + + memcpy(&iregs->regs, ®s, sizeof(IntRegFile)); + regs_valid = true; +} + +inline +InstRecord * +getInstRecord(Tick cycle, ExecContext *xc, BaseCPU *cpu, + const StaticInstPtr staticInst, + Addr pc, int thread = 0) +{ + if (DTRACE(InstExec) && + (InstRecord::traceMisspec() || !xc->misspeculating())) { + return new InstRecord(cycle, cpu, staticInst, pc, + xc->misspeculating(), thread); + } + + return NULL; +} + + +} + +#endif // __EXETRACE_HH__ diff --cc src/cpu/inst_seq.hh index 8de047af7,000000000..356d19df0 mode 100644,000000..100644 --- a/src/cpu/inst_seq.hh +++ b/src/cpu/inst_seq.hh @@@ -1,40 -1,0 +1,42 @@@ +/* + * Copyright (c) 2001, 2003-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __STD_TYPES_HH__ +#define __STD_TYPES_HH__ + ++#include ++ +// inst sequence type, used to order instructions in the ready list, +// if this rolls over the ready list order temporarily will get messed +// up, but execution will continue and complete correctly +typedef uint64_t InstSeqNum; + +// inst tag type, used to tag an operation instance in the IQ +typedef unsigned int InstTag; + +#endif // __STD_TYPES_HH__ diff --cc src/cpu/o3/2bit_local_pred.cc index d9744eec7,000000000..c3fb2fdb8 mode 100644,000000..100644 --- a/src/cpu/o3/2bit_local_pred.cc +++ b/src/cpu/o3/2bit_local_pred.cc @@@ -1,129 -1,0 +1,142 @@@ +/* - * Copyright (c) 2004-2005 The Regents of The University of Michigan ++ * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + ++#include "base/intmath.hh" +#include "base/trace.hh" +#include "cpu/o3/2bit_local_pred.hh" + +DefaultBP::DefaultBP(unsigned _localPredictorSize, + unsigned _localCtrBits, + unsigned _instShiftAmt) + : localPredictorSize(_localPredictorSize), + localCtrBits(_localCtrBits), + instShiftAmt(_instShiftAmt) +{ - // Should do checks here to make sure sizes are correct (powers of 2). ++ if (!isPowerOf2(localPredictorSize)) { ++ fatal("Invalid local predictor size!\n"); ++ } ++ ++ localPredictorSets = localPredictorSize / localCtrBits; ++ ++ if (!isPowerOf2(localPredictorSets)) { ++ fatal("Invalid number of local predictor sets! Check localCtrBits.\n"); ++ } + + // Setup the index mask. - indexMask = localPredictorSize - 1; ++ indexMask = localPredictorSets - 1; + + DPRINTF(Fetch, "Branch predictor: index mask: %#x\n", indexMask); + + // Setup the array of counters for the local predictor. - localCtrs = new SatCounter[localPredictorSize]; ++ localCtrs.resize(localPredictorSets); + - for (int i = 0; i < localPredictorSize; ++i) ++ for (int i = 0; i < localPredictorSets; ++i) + localCtrs[i].setBits(_localCtrBits); + + DPRINTF(Fetch, "Branch predictor: local predictor size: %i\n", + localPredictorSize); + + DPRINTF(Fetch, "Branch predictor: local counter bits: %i\n", localCtrBits); + + DPRINTF(Fetch, "Branch predictor: instruction shift amount: %i\n", + instShiftAmt); +} + ++void ++DefaultBP::reset() ++{ ++ for (int i = 0; i < localPredictorSets; ++i) { ++ localCtrs[i].reset(); ++ } ++} ++ +bool +DefaultBP::lookup(Addr &branch_addr) +{ + bool taken; + uint8_t local_prediction; + unsigned local_predictor_idx = getLocalIndex(branch_addr); + + DPRINTF(Fetch, "Branch predictor: Looking up index %#x\n", + local_predictor_idx); + - assert(local_predictor_idx < localPredictorSize); - + local_prediction = localCtrs[local_predictor_idx].read(); + + DPRINTF(Fetch, "Branch predictor: prediction is %i.\n", + (int)local_prediction); + + taken = getPrediction(local_prediction); + +#if 0 + // Speculative update. + if (taken) { + DPRINTF(Fetch, "Branch predictor: Branch updated as taken.\n"); + localCtrs[local_predictor_idx].increment(); + } else { + DPRINTF(Fetch, "Branch predictor: Branch updated as not taken.\n"); + localCtrs[local_predictor_idx].decrement(); + } +#endif + + return taken; +} + +void +DefaultBP::update(Addr &branch_addr, bool taken) +{ + unsigned local_predictor_idx; + + // Update the local predictor. + local_predictor_idx = getLocalIndex(branch_addr); + + DPRINTF(Fetch, "Branch predictor: Looking up index %#x\n", + local_predictor_idx); + - assert(local_predictor_idx < localPredictorSize); - + if (taken) { + DPRINTF(Fetch, "Branch predictor: Branch updated as taken.\n"); + localCtrs[local_predictor_idx].increment(); + } else { + DPRINTF(Fetch, "Branch predictor: Branch updated as not taken.\n"); + localCtrs[local_predictor_idx].decrement(); + } +} + +inline +bool +DefaultBP::getPrediction(uint8_t &count) +{ + // Get the MSB of the count + return (count >> (localCtrBits - 1)); +} + +inline +unsigned +DefaultBP::getLocalIndex(Addr &branch_addr) +{ + return (branch_addr >> instShiftAmt) & indexMask; +} diff --cc src/cpu/o3/2bit_local_pred.hh index 97433e542,000000000..cd65978ca mode 100644,000000..100644 --- a/src/cpu/o3/2bit_local_pred.hh +++ b/src/cpu/o3/2bit_local_pred.hh @@@ -1,86 -1,0 +1,99 @@@ +/* - * Copyright (c) 2004-2005 The Regents of The University of Michigan ++ * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + - #ifndef __CPU_O3_CPU_2BIT_LOCAL_PRED_HH__ - #define __CPU_O3_CPU_2BIT_LOCAL_PRED_HH__ ++#ifndef __CPU_O3_2BIT_LOCAL_PRED_HH__ ++#define __CPU_O3_2BIT_LOCAL_PRED_HH__ + +// For Addr type. +#include "arch/isa_traits.hh" +#include "cpu/o3/sat_counter.hh" + ++#include ++ +class DefaultBP +{ + public: + /** + * Default branch predictor constructor. ++ * @param localPredictorSize Size of the local predictor. ++ * @param localCtrBits Number of bits per counter. ++ * @param instShiftAmt Offset amount for instructions to ignore alignment. + */ + DefaultBP(unsigned localPredictorSize, unsigned localCtrBits, + unsigned instShiftAmt); + + /** + * Looks up the given address in the branch predictor and returns + * a true/false value as to whether it is taken. + * @param branch_addr The address of the branch to look up. + * @return Whether or not the branch is taken. + */ + bool lookup(Addr &branch_addr); + + /** + * Updates the branch predictor with the actual result of a branch. + * @param branch_addr The address of the branch to update. + * @param taken Whether or not the branch was taken. + */ + void update(Addr &branch_addr, bool taken); + ++ void reset(); ++ + private: + - /** Returns the taken/not taken prediction given the value of the ++ /** ++ * Returns the taken/not taken prediction given the value of the + * counter. ++ * @param count The value of the counter. ++ * @return The prediction based on the counter value. + */ + inline bool getPrediction(uint8_t &count); + + /** Calculates the local index based on the PC. */ + inline unsigned getLocalIndex(Addr &PC); + + /** Array of counters that make up the local predictor. */ - SatCounter *localCtrs; ++ std::vector localCtrs; + + /** Size of the local predictor. */ + unsigned localPredictorSize; + ++ /** Number of sets. */ ++ unsigned localPredictorSets; ++ + /** Number of bits of the local predictor's counters. */ + unsigned localCtrBits; + + /** Number of bits to shift the PC when calculating index. */ + unsigned instShiftAmt; + + /** Mask to get index bits. */ + unsigned indexMask; +}; + - #endif // __CPU_O3_CPU_2BIT_LOCAL_PRED_HH__ ++#endif // __CPU_O3_2BIT_LOCAL_PRED_HH__ diff --cc src/cpu/o3/alpha_cpu.hh index 8e1e0f42a,000000000..1bab0703e mode 100644,000000..100644 --- a/src/cpu/o3/alpha_cpu.hh +++ b/src/cpu/o3/alpha_cpu.hh @@@ -1,291 -1,0 +1,430 @@@ +/* - * Copyright (c) 2004-2005 The Regents of The University of Michigan ++ * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + - // Todo: Find all the stuff in ExecContext and ev5 that needs to be - // specifically designed for this CPU. ++#ifndef __CPU_O3_ALPHA_FULL_CPU_HH__ ++#define __CPU_O3_ALPHA_FULL_CPU_HH__ + - #ifndef __CPU_O3_CPU_ALPHA_FULL_CPU_HH__ - #define __CPU_O3_CPU_ALPHA_FULL_CPU_HH__ - - #include "cpu/o3/cpu.hh" +#include "arch/isa_traits.hh" ++#include "cpu/exec_context.hh" ++#include "cpu/o3/cpu.hh" +#include "sim/byteswap.hh" + ++class EndQuiesceEvent; ++namespace Kernel { ++ class Statistics; ++}; ++ +template +class AlphaFullCPU : public FullO3CPU +{ + protected: + typedef TheISA::IntReg IntReg; + typedef TheISA::MiscReg MiscReg; + typedef TheISA::RegFile RegFile; + typedef TheISA::MiscRegFile MiscRegFile; + + public: ++ typedef O3ThreadState ImplState; ++ typedef O3ThreadState Thread; + typedef typename Impl::Params Params; + - public: - AlphaFullCPU(Params ¶ms); ++ /** Constructs an AlphaFullCPU with the given parameters. */ ++ AlphaFullCPU(Params *params); ++ ++ class AlphaXC : public ExecContext ++ { ++ public: ++ AlphaFullCPU *cpu; ++ ++ O3ThreadState *thread; ++ ++ virtual BaseCPU *getCpuPtr() { return cpu; } ++ ++ virtual void setCpuId(int id) { cpu->cpu_id = id; } ++ ++ virtual int readCpuId() { return cpu->cpu_id; } ++ ++ virtual FunctionalMemory *getMemPtr() { return thread->mem; } ++ ++#if FULL_SYSTEM ++ virtual System *getSystemPtr() { return cpu->system; } ++ ++ virtual PhysicalMemory *getPhysMemPtr() { return cpu->physmem; } ++ ++ virtual AlphaITB *getITBPtr() { return cpu->itb; } ++ ++ virtual AlphaDTB * getDTBPtr() { return cpu->dtb; } ++ ++ virtual Kernel::Statistics *getKernelStats() ++ { return thread->kernelStats; } ++#else ++ virtual Process *getProcessPtr() { return thread->process; } ++#endif ++ ++ virtual Status status() const { return thread->status(); } ++ ++ virtual void setStatus(Status new_status) ++ { thread->setStatus(new_status); } ++ ++ /// Set the status to Active. Optional delay indicates number of ++ /// cycles to wait before beginning execution. ++ virtual void activate(int delay = 1); ++ ++ /// Set the status to Suspended. ++ virtual void suspend(); ++ ++ /// Set the status to Unallocated. ++ virtual void deallocate(); ++ ++ /// Set the status to Halted. ++ virtual void halt(); ++ ++#if FULL_SYSTEM ++ virtual void dumpFuncProfile(); ++#endif ++ ++ virtual void takeOverFrom(ExecContext *old_context); ++ ++ virtual void regStats(const std::string &name); ++ ++ virtual void serialize(std::ostream &os); ++ virtual void unserialize(Checkpoint *cp, const std::string §ion); ++ ++#if FULL_SYSTEM ++ virtual EndQuiesceEvent *getQuiesceEvent(); ++ ++ virtual Tick readLastActivate(); ++ virtual Tick readLastSuspend(); ++ ++ virtual void profileClear(); ++ virtual void profileSample(); ++#endif ++ ++ virtual int getThreadNum() { return thread->tid; } ++ ++ virtual TheISA::MachInst getInst(); ++ ++ virtual void copyArchRegs(ExecContext *xc); ++ ++ virtual void clearArchRegs(); ++ ++ virtual uint64_t readIntReg(int reg_idx); ++ ++ virtual float readFloatRegSingle(int reg_idx); ++ ++ virtual double readFloatRegDouble(int reg_idx); ++ ++ virtual uint64_t readFloatRegInt(int reg_idx); ++ ++ virtual void setIntReg(int reg_idx, uint64_t val); ++ ++ virtual void setFloatRegSingle(int reg_idx, float val); ++ ++ virtual void setFloatRegDouble(int reg_idx, double val); ++ ++ virtual void setFloatRegInt(int reg_idx, uint64_t val); ++ ++ virtual uint64_t readPC() ++ { return cpu->readPC(thread->tid); } ++ ++ virtual void setPC(uint64_t val); ++ ++ virtual uint64_t readNextPC() ++ { return cpu->readNextPC(thread->tid); } ++ ++ virtual void setNextPC(uint64_t val); ++ ++ virtual MiscReg readMiscReg(int misc_reg) ++ { return cpu->readMiscReg(misc_reg, thread->tid); } ++ ++ virtual MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault) ++ { return cpu->readMiscRegWithEffect(misc_reg, fault, thread->tid); } ++ ++ virtual Fault setMiscReg(int misc_reg, const MiscReg &val); ++ ++ virtual Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val); ++ ++ // @todo: Figure out where these store cond failures should go. ++ virtual unsigned readStCondFailures() ++ { return thread->storeCondFailures; } ++ ++ virtual void setStCondFailures(unsigned sc_failures) ++ { thread->storeCondFailures = sc_failures; } ++ ++#if FULL_SYSTEM ++ virtual bool inPalMode() ++ { return TheISA::PcPAL(cpu->readPC(thread->tid)); } ++#endif ++ ++ // Only really makes sense for old CPU model. Lots of code ++ // outside the CPU still checks this function, so it will ++ // always return false to keep everything working. ++ virtual bool misspeculating() { return false; } ++ ++#if !FULL_SYSTEM ++ virtual IntReg getSyscallArg(int i); ++ ++ virtual void setSyscallArg(int i, IntReg val); ++ ++ virtual void setSyscallReturn(SyscallReturn return_value); ++ ++ virtual void syscall() { return cpu->syscall(thread->tid); } ++ ++ virtual Counter readFuncExeInst() { return thread->funcExeInst; } ++#endif ++ }; + +#if FULL_SYSTEM ++ /** ITB pointer. */ + AlphaITB *itb; ++ /** DTB pointer. */ + AlphaDTB *dtb; +#endif + - public: ++ /** Registers statistics. */ + void regStats(); + +#if FULL_SYSTEM - //Note that the interrupt stuff from the base CPU might be somewhat - //ISA specific (ie NumInterruptLevels). These functions might not - //be needed in FullCPU though. - // void post_interrupt(int int_num, int index); - // void clear_interrupt(int int_num, int index); - // void clear_interrupts(); - ++ /** Translates instruction requestion. */ + Fault translateInstReq(MemReqPtr &req) + { + return itb->translate(req); + } + ++ /** Translates data read request. */ + Fault translateDataReadReq(MemReqPtr &req) + { + return dtb->translate(req, false); + } + ++ /** Translates data write request. */ + Fault translateDataWriteReq(MemReqPtr &req) + { + return dtb->translate(req, true); + } + +#else + Fault dummyTranslation(MemReqPtr &req) + { +#if 0 + assert((req->vaddr >> 48 & 0xffff) == 0); +#endif + + // put the asid in the upper 16 bits of the paddr + req->paddr = req->vaddr & ~((Addr)0xffff << sizeof(Addr) * 8 - 16); + req->paddr = req->paddr | (Addr)req->asid << sizeof(Addr) * 8 - 16; + return NoFault; + } + ++ /** Translates instruction requestion in syscall emulation mode. */ + Fault translateInstReq(MemReqPtr &req) + { + return dummyTranslation(req); + } + ++ /** Translates data read request in syscall emulation mode. */ + Fault translateDataReadReq(MemReqPtr &req) + { + return dummyTranslation(req); + } + ++ /** Translates data write request in syscall emulation mode. */ + Fault translateDataWriteReq(MemReqPtr &req) + { + return dummyTranslation(req); + } + +#endif ++ MiscReg readMiscReg(int misc_reg, unsigned tid); + - // Later on may want to remove this misc stuff from the regfile and - // have it handled at this level. Might prove to be an issue when - // trying to rename source/destination registers... - MiscReg readMiscReg(int misc_reg) - { - // Dummy function for now. - // @todo: Fix this once reg file gets fixed. - return 0; - } ++ MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault, unsigned tid); + - Fault setMiscReg(int misc_reg, const MiscReg &val) - { - // Dummy function for now. - // @todo: Fix this once reg file gets fixed. - return NoFault; - } ++ Fault setMiscReg(int misc_reg, const MiscReg &val, unsigned tid); ++ ++ Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val, unsigned tid); ++ ++ void squashFromXC(unsigned tid); + - // Most of the full system code and syscall emulation is not yet - // implemented. These functions do show what the final interface will - // look like. +#if FULL_SYSTEM ++ void post_interrupt(int int_num, int index); ++ + int readIntrFlag(); ++ /** Sets the interrupt flags. */ + void setIntrFlag(int val); - Fault hwrei(); - bool inPalMode() { return AlphaISA::PcPAL(this->regFile.readPC()); } ++ /** HW return from error interrupt. */ ++ Fault hwrei(unsigned tid); ++ /** Returns if a specific PC is a PAL mode PC. */ + bool inPalMode(uint64_t PC) + { return AlphaISA::PcPAL(PC); } + - void trap(Fault fault); - bool simPalCheck(int palFunc); ++ /** Traps to handle given fault. */ ++ void trap(Fault fault, unsigned tid); ++ bool simPalCheck(int palFunc, unsigned tid); + ++ /** Processes any interrupts. */ + void processInterrupts(); - #endif - - - #if !FULL_SYSTEM - // Need to change these into regfile calls that directly set a certain - // register. Actually, these functions should handle most of this - // functionality by themselves; should look up the rename and then - // set the register. - IntReg getSyscallArg(int i) - { - return this->cpuXC->readIntReg(AlphaISA::ArgumentReg0 + i); - } - - // used to shift args for indirect syscall - void setSyscallArg(int i, IntReg val) - { - this->cpuXC->setIntReg(AlphaISA::ArgumentReg0 + i, val); - } - - void setSyscallReturn(int64_t return_value) - { - // check for error condition. Alpha syscall convention is to - // indicate success/failure in reg a3 (r19) and put the - // return value itself in the standard return value reg (v0). - const int RegA3 = 19; // only place this is used - if (return_value >= 0) { - // no error - this->cpuXC->setIntReg(RegA3, 0); - this->cpuXC->setIntReg(AlphaISA::ReturnValueReg, return_value); - } else { - // got an error, return details - this->cpuXC->setIntReg(RegA3, (IntReg) -1); - this->cpuXC->setIntReg(AlphaISA::ReturnValueReg, -return_value); - } - } - - void syscall(short thread_num); - void squashStages(); + ++ /** Halts the CPU. */ ++ void halt() { panic("Halt not implemented!\n"); } +#endif + - void copyToXC(); - void copyFromXC(); - - public: - #if FULL_SYSTEM - bool palShadowEnabled; - - // Not sure this is used anywhere. - void intr_post(RegFile *regs, Fault fault, Addr pc); - // Actually used within exec files. Implement properly. - void swapPALShadow(bool use_shadow); - // Called by CPU constructor. Can implement as I please. - void initCPU(RegFile *regs); - // Called by initCPU. Implement as I please. - void initIPRs(RegFile *regs); + - void halt() { panic("Halt not implemented!\n"); } ++#if !FULL_SYSTEM ++ /** Executes a syscall. ++ * @todo: Determine if this needs to be virtual. ++ */ ++ void syscall(int thread_num); ++ /** Gets a syscall argument. */ ++ IntReg getSyscallArg(int i, int tid); ++ ++ /** Used to shift args for indirect syscall. */ ++ void setSyscallArg(int i, IntReg val, int tid); ++ ++ /** Sets the return value of a syscall. */ ++ void setSyscallReturn(SyscallReturn return_value, int tid); +#endif + - ++ /** Read from memory function. */ + template + Fault read(MemReqPtr &req, T &data) + { ++#if 0 +#if FULL_SYSTEM && THE_ISA == ALPHA_ISA + if (req->flags & LOCKED) { + req->xc->setMiscReg(TheISA::Lock_Addr_DepTag, req->paddr); + req->xc->setMiscReg(TheISA::Lock_Flag_DepTag, true); + } +#endif - ++#endif + Fault error; ++ ++#if FULL_SYSTEM ++ // @todo: Fix this LL/SC hack. ++ if (req->flags & LOCKED) { ++ lockAddr = req->paddr; ++ lockFlag = true; ++ } ++#endif ++ + error = this->mem->read(req, data); + data = gtoh(data); + return error; + } + ++ /** CPU read function, forwards read to LSQ. */ + template + Fault read(MemReqPtr &req, T &data, int load_idx) + { + return this->iew.ldstQueue.read(req, data, load_idx); + } + ++ /** Write to memory function. */ + template + Fault write(MemReqPtr &req, T &data) + { ++#if 0 +#if FULL_SYSTEM && THE_ISA == ALPHA_ISA + ExecContext *xc; + + // If this is a store conditional, act appropriately + if (req->flags & LOCKED) { + xc = req->xc; + + if (req->flags & UNCACHEABLE) { + // Don't update result register (see stq_c in isa_desc) + req->result = 2; + xc->setStCondFailures(0);//Needed? [RGD] + } else { + bool lock_flag = xc->readMiscReg(TheISA::Lock_Flag_DepTag); + Addr lock_addr = xc->readMiscReg(TheISA::Lock_Addr_DepTag); + req->result = lock_flag; + if (!lock_flag || + ((lock_addr & ~0xf) != (req->paddr & ~0xf))) { + xc->setMiscReg(TheISA::Lock_Flag_DepTag, false); + xc->setStCondFailures(xc->readStCondFailures() + 1); + if (((xc->readStCondFailures()) % 100000) == 0) { + std::cerr << "Warning: " + << xc->readStCondFailures() + << " consecutive store conditional failures " + << "on cpu " << req->xc->readCpuId() + << std::endl; + } + return NoFault; + } + else xc->setStCondFailures(0); + } + } + + // Need to clear any locked flags on other proccessors for + // this address. Only do this for succsful Store Conditionals + // and all other stores (WH64?). Unsuccessful Store + // Conditionals would have returned above, and wouldn't fall + // through. + for (int i = 0; i < this->system->execContexts.size(); i++){ + xc = this->system->execContexts[i]; + if ((xc->readMiscReg(TheISA::Lock_Addr_DepTag) & ~0xf) == + (req->paddr & ~0xf)) { + xc->setMiscReg(TheISA::Lock_Flag_DepTag, false); + } + } + ++#endif ++#endif ++ ++#if FULL_SYSTEM ++ // @todo: Fix this LL/SC hack. ++ if (req->flags & LOCKED) { ++ if (req->flags & UNCACHEABLE) { ++ req->result = 2; ++ } else { ++ if (this->lockFlag) { ++ req->result = 1; ++ } else { ++ req->result = 0; ++ return NoFault; ++ } ++ } ++ } +#endif + + return this->mem->write(req, (T)htog(data)); + } + ++ /** CPU write function, forwards write to LSQ. */ + template + Fault write(MemReqPtr &req, T &data, int store_idx) + { + return this->iew.ldstQueue.write(req, data, store_idx); + } + ++ Addr lockAddr; ++ ++ bool lockFlag; +}; + - #endif // __CPU_O3_CPU_ALPHA_FULL_CPU_HH__ ++#endif // __CPU_O3_ALPHA_FULL_CPU_HH__ diff --cc src/cpu/o3/alpha_cpu_builder.cc index 6025b8ef2,000000000..b0d812edc mode 100644,000000..100644 --- a/src/cpu/o3/alpha_cpu_builder.cc +++ b/src/cpu/o3/alpha_cpu_builder.cc @@@ -1,392 -1,0 +1,417 @@@ +/* - * Copyright (c) 2004-2005 The Regents of The University of Michigan ++ * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + - #include "base/inifile.hh" - #include "base/loader/symtab.hh" - #include "base/misc.hh" ++#include ++ +#include "cpu/base.hh" - #include "cpu/exetrace.hh" +#include "cpu/o3/alpha_cpu.hh" +#include "cpu/o3/alpha_impl.hh" - #include "mem/base_mem.hh" ++#include "cpu/o3/alpha_params.hh" ++#include "cpu/o3/fu_pool.hh" +#include "mem/cache/base_cache.hh" - #include "mem/mem_interface.hh" +#include "sim/builder.hh" - #include "sim/debug.hh" - #include "sim/host.hh" - #include "sim/process.hh" - #include "sim/sim_events.hh" - #include "sim/sim_object.hh" - #include "sim/stats.hh" - - #if FULL_SYSTEM - #include "base/remote_gdb.hh" - #include "mem/functional/memory_control.hh" - #include "mem/functional/physical.hh" - #include "sim/system.hh" - #include "arch/tlb.hh" - #include "arch/vtophys.hh" - #else // !FULL_SYSTEM - #include "mem/functional/functional.hh" - #endif // FULL_SYSTEM + +class DerivAlphaFullCPU : public AlphaFullCPU +{ + public: - DerivAlphaFullCPU(AlphaSimpleParams p) ++ DerivAlphaFullCPU(AlphaSimpleParams *p) + : AlphaFullCPU(p) + { } +}; + +BEGIN_DECLARE_SIM_OBJECT_PARAMS(DerivAlphaFullCPU) + + Param clock; + Param numThreads; ++Param activity; + +#if FULL_SYSTEM +SimObjectParam system; +Param cpu_id; +SimObjectParam itb; +SimObjectParam dtb; +#else +SimObjectVectorParam workload; ++//SimObjectParam page_table; +#endif // FULL_SYSTEM ++ +SimObjectParam mem; + ++SimObjectParam checker; ++ +Param max_insts_any_thread; +Param max_insts_all_threads; +Param max_loads_any_thread; +Param max_loads_all_threads; + +SimObjectParam icache; +SimObjectParam dcache; + ++Param cachePorts; ++ +Param decodeToFetchDelay; +Param renameToFetchDelay; +Param iewToFetchDelay; +Param commitToFetchDelay; +Param fetchWidth; + +Param renameToDecodeDelay; +Param iewToDecodeDelay; +Param commitToDecodeDelay; +Param fetchToDecodeDelay; +Param decodeWidth; + +Param iewToRenameDelay; +Param commitToRenameDelay; +Param decodeToRenameDelay; +Param renameWidth; + +Param commitToIEWDelay; +Param renameToIEWDelay; +Param issueToExecuteDelay; +Param issueWidth; +Param executeWidth; +Param executeIntWidth; +Param executeFloatWidth; +Param executeBranchWidth; +Param executeMemoryWidth; ++SimObjectParam fuPool; + +Param iewToCommitDelay; +Param renameToROBDelay; +Param commitWidth; +Param squashWidth; ++Param trapLatency; ++Param fetchTrapLatency; + - #if 0 +Param localPredictorSize; - Param localPredictorCtrBits; - #endif - Param local_predictor_size; - Param local_ctr_bits; - Param local_history_table_size; - Param local_history_bits; - Param global_predictor_size; - Param global_ctr_bits; - Param global_history_bits; - Param choice_predictor_size; - Param choice_ctr_bits; ++Param localCtrBits; ++Param localHistoryTableSize; ++Param localHistoryBits; ++Param globalPredictorSize; ++Param globalCtrBits; ++Param globalHistoryBits; ++Param choicePredictorSize; ++Param choiceCtrBits; + +Param BTBEntries; +Param BTBTagSize; + +Param RASSize; + +Param LQEntries; +Param SQEntries; +Param LFSTSize; +Param SSITSize; + +Param numPhysIntRegs; +Param numPhysFloatRegs; +Param numIQEntries; +Param numROBEntries; + ++Param smtNumFetchingThreads; ++Param smtFetchPolicy; ++Param smtLSQPolicy; ++Param smtLSQThreshold; ++Param smtIQPolicy; ++Param smtIQThreshold; ++Param smtROBPolicy; ++Param smtROBThreshold; ++Param smtCommitPolicy; ++ +Param instShiftAmt; + +Param defer_registration; + +Param function_trace; +Param function_trace_start; + +END_DECLARE_SIM_OBJECT_PARAMS(DerivAlphaFullCPU) + +BEGIN_INIT_SIM_OBJECT_PARAMS(DerivAlphaFullCPU) + + INIT_PARAM(clock, "clock speed"), + INIT_PARAM(numThreads, "number of HW thread contexts"), ++ INIT_PARAM_DFLT(activity, "Initial activity count", 0), + +#if FULL_SYSTEM + INIT_PARAM(system, "System object"), + INIT_PARAM(cpu_id, "processor ID"), + INIT_PARAM(itb, "Instruction translation buffer"), + INIT_PARAM(dtb, "Data translation buffer"), +#else + INIT_PARAM(workload, "Processes to run"), ++// INIT_PARAM(page_table, "Page table"), +#endif // FULL_SYSTEM + + INIT_PARAM_DFLT(mem, "Memory", NULL), + ++ INIT_PARAM_DFLT(checker, "Checker CPU", NULL), ++ + INIT_PARAM_DFLT(max_insts_any_thread, + "Terminate when any thread reaches this inst count", + 0), + INIT_PARAM_DFLT(max_insts_all_threads, + "Terminate when all threads have reached" + "this inst count", + 0), + INIT_PARAM_DFLT(max_loads_any_thread, + "Terminate when any thread reaches this load count", + 0), + INIT_PARAM_DFLT(max_loads_all_threads, + "Terminate when all threads have reached this load" + "count", + 0), + + INIT_PARAM_DFLT(icache, "L1 instruction cache", NULL), + INIT_PARAM_DFLT(dcache, "L1 data cache", NULL), + ++ INIT_PARAM_DFLT(cachePorts, "Cache Ports", 200), ++ + INIT_PARAM(decodeToFetchDelay, "Decode to fetch delay"), + INIT_PARAM(renameToFetchDelay, "Rename to fetch delay"), + INIT_PARAM(iewToFetchDelay, "Issue/Execute/Writeback to fetch" + "delay"), + INIT_PARAM(commitToFetchDelay, "Commit to fetch delay"), + INIT_PARAM(fetchWidth, "Fetch width"), - + INIT_PARAM(renameToDecodeDelay, "Rename to decode delay"), + INIT_PARAM(iewToDecodeDelay, "Issue/Execute/Writeback to decode" + "delay"), + INIT_PARAM(commitToDecodeDelay, "Commit to decode delay"), + INIT_PARAM(fetchToDecodeDelay, "Fetch to decode delay"), + INIT_PARAM(decodeWidth, "Decode width"), + + INIT_PARAM(iewToRenameDelay, "Issue/Execute/Writeback to rename" + "delay"), + INIT_PARAM(commitToRenameDelay, "Commit to rename delay"), + INIT_PARAM(decodeToRenameDelay, "Decode to rename delay"), + INIT_PARAM(renameWidth, "Rename width"), + + INIT_PARAM(commitToIEWDelay, "Commit to " + "Issue/Execute/Writeback delay"), + INIT_PARAM(renameToIEWDelay, "Rename to " + "Issue/Execute/Writeback delay"), + INIT_PARAM(issueToExecuteDelay, "Issue to execute delay (internal" + "to the IEW stage)"), + INIT_PARAM(issueWidth, "Issue width"), + INIT_PARAM(executeWidth, "Execute width"), + INIT_PARAM(executeIntWidth, "Integer execute width"), + INIT_PARAM(executeFloatWidth, "Floating point execute width"), + INIT_PARAM(executeBranchWidth, "Branch execute width"), + INIT_PARAM(executeMemoryWidth, "Memory execute width"), ++ INIT_PARAM_DFLT(fuPool, "Functional unit pool", NULL), + + INIT_PARAM(iewToCommitDelay, "Issue/Execute/Writeback to commit " + "delay"), + INIT_PARAM(renameToROBDelay, "Rename to reorder buffer delay"), + INIT_PARAM(commitWidth, "Commit width"), + INIT_PARAM(squashWidth, "Squash width"), - - #if 0 - INIT_PARAM(localPredictorSize, "Size of the local predictor in entries. " - "Must be a power of 2."), - INIT_PARAM(localPredictorCtrBits, "Number of bits per counter for bpred"), - #endif - INIT_PARAM(local_predictor_size, "Size of local predictor"), - INIT_PARAM(local_ctr_bits, "Bits per counter"), - INIT_PARAM(local_history_table_size, "Size of local history table"), - INIT_PARAM(local_history_bits, "Bits for the local history"), - INIT_PARAM(global_predictor_size, "Size of global predictor"), - INIT_PARAM(global_ctr_bits, "Bits per counter"), - INIT_PARAM(global_history_bits, "Bits of history"), - INIT_PARAM(choice_predictor_size, "Size of choice predictor"), - INIT_PARAM(choice_ctr_bits, "Bits of choice counters"), ++ INIT_PARAM_DFLT(trapLatency, "Number of cycles before the trap is handled", 6), ++ INIT_PARAM_DFLT(fetchTrapLatency, "Number of cycles before the fetch trap is handled", 12), ++ ++ INIT_PARAM(localPredictorSize, "Size of local predictor"), ++ INIT_PARAM(localCtrBits, "Bits per counter"), ++ INIT_PARAM(localHistoryTableSize, "Size of local history table"), ++ INIT_PARAM(localHistoryBits, "Bits for the local history"), ++ INIT_PARAM(globalPredictorSize, "Size of global predictor"), ++ INIT_PARAM(globalCtrBits, "Bits per counter"), ++ INIT_PARAM(globalHistoryBits, "Bits of history"), ++ INIT_PARAM(choicePredictorSize, "Size of choice predictor"), ++ INIT_PARAM(choiceCtrBits, "Bits of choice counters"), + + INIT_PARAM(BTBEntries, "Number of BTB entries"), + INIT_PARAM(BTBTagSize, "Size of the BTB tags, in bits"), + + INIT_PARAM(RASSize, "RAS size"), + + INIT_PARAM(LQEntries, "Number of load queue entries"), + INIT_PARAM(SQEntries, "Number of store queue entries"), + INIT_PARAM(LFSTSize, "Last fetched store table size"), + INIT_PARAM(SSITSize, "Store set ID table size"), + + INIT_PARAM(numPhysIntRegs, "Number of physical integer registers"), + INIT_PARAM(numPhysFloatRegs, "Number of physical floating point " + "registers"), + INIT_PARAM(numIQEntries, "Number of instruction queue entries"), + INIT_PARAM(numROBEntries, "Number of reorder buffer entries"), + ++ INIT_PARAM_DFLT(smtNumFetchingThreads, "SMT Number of Fetching Threads", 1), ++ INIT_PARAM_DFLT(smtFetchPolicy, "SMT Fetch Policy", "SingleThread"), ++ INIT_PARAM_DFLT(smtLSQPolicy, "SMT LSQ Sharing Policy", "Partitioned"), ++ INIT_PARAM_DFLT(smtLSQThreshold,"SMT LSQ Threshold", 100), ++ INIT_PARAM_DFLT(smtIQPolicy, "SMT IQ Policy", "Partitioned"), ++ INIT_PARAM_DFLT(smtIQThreshold, "SMT IQ Threshold", 100), ++ INIT_PARAM_DFLT(smtROBPolicy, "SMT ROB Sharing Policy", "Partitioned"), ++ INIT_PARAM_DFLT(smtROBThreshold,"SMT ROB Threshold", 100), ++ INIT_PARAM_DFLT(smtCommitPolicy,"SMT Commit Fetch Policy", "RoundRobin"), ++ + INIT_PARAM(instShiftAmt, "Number of bits to shift instructions by"), + INIT_PARAM(defer_registration, "defer system registration (for sampling)"), + + INIT_PARAM(function_trace, "Enable function trace"), + INIT_PARAM(function_trace_start, "Cycle to start function trace") + +END_INIT_SIM_OBJECT_PARAMS(DerivAlphaFullCPU) + +CREATE_SIM_OBJECT(DerivAlphaFullCPU) +{ + DerivAlphaFullCPU *cpu; + +#if FULL_SYSTEM + // Full-system only supports a single thread for the moment. + int actual_num_threads = 1; +#else + // In non-full-system mode, we infer the number of threads from + // the workload if it's not explicitly specified. + int actual_num_threads = + numThreads.isValid() ? numThreads : workload.size(); + + if (workload.size() == 0) { + fatal("Must specify at least one workload!"); + } + +#endif + - AlphaSimpleParams params; ++ AlphaSimpleParams *params = new AlphaSimpleParams; + - params.clock = clock; ++ params->clock = clock; + - params.name = getInstanceName(); - params.numberOfThreads = actual_num_threads; ++ params->name = getInstanceName(); ++ params->numberOfThreads = actual_num_threads; ++ params->activity = activity; + +#if FULL_SYSTEM - params.system = system; - params.cpu_id = cpu_id; - params.itb = itb; - params.dtb = dtb; ++ params->system = system; ++ params->cpu_id = cpu_id; ++ params->itb = itb; ++ params->dtb = dtb; +#else - params.workload = workload; ++ params->workload = workload; ++// params->pTable = page_table; +#endif // FULL_SYSTEM + - params.mem = mem; ++ params->mem = mem; + - params.max_insts_any_thread = max_insts_any_thread; - params.max_insts_all_threads = max_insts_all_threads; - params.max_loads_any_thread = max_loads_any_thread; - params.max_loads_all_threads = max_loads_all_threads; ++ params->checker = checker; ++ ++ params->max_insts_any_thread = max_insts_any_thread; ++ params->max_insts_all_threads = max_insts_all_threads; ++ params->max_loads_any_thread = max_loads_any_thread; ++ params->max_loads_all_threads = max_loads_all_threads; + + // + // Caches + // - params.icacheInterface = icache ? icache->getInterface() : NULL; - params.dcacheInterface = dcache ? dcache->getInterface() : NULL; - - params.decodeToFetchDelay = decodeToFetchDelay; - params.renameToFetchDelay = renameToFetchDelay; - params.iewToFetchDelay = iewToFetchDelay; - params.commitToFetchDelay = commitToFetchDelay; - params.fetchWidth = fetchWidth; - - params.renameToDecodeDelay = renameToDecodeDelay; - params.iewToDecodeDelay = iewToDecodeDelay; - params.commitToDecodeDelay = commitToDecodeDelay; - params.fetchToDecodeDelay = fetchToDecodeDelay; - params.decodeWidth = decodeWidth; - - params.iewToRenameDelay = iewToRenameDelay; - params.commitToRenameDelay = commitToRenameDelay; - params.decodeToRenameDelay = decodeToRenameDelay; - params.renameWidth = renameWidth; - - params.commitToIEWDelay = commitToIEWDelay; - params.renameToIEWDelay = renameToIEWDelay; - params.issueToExecuteDelay = issueToExecuteDelay; - params.issueWidth = issueWidth; - params.executeWidth = executeWidth; - params.executeIntWidth = executeIntWidth; - params.executeFloatWidth = executeFloatWidth; - params.executeBranchWidth = executeBranchWidth; - params.executeMemoryWidth = executeMemoryWidth; - - params.iewToCommitDelay = iewToCommitDelay; - params.renameToROBDelay = renameToROBDelay; - params.commitWidth = commitWidth; - params.squashWidth = squashWidth; - #if 0 - params.localPredictorSize = localPredictorSize; - params.localPredictorCtrBits = localPredictorCtrBits; - #endif - params.local_predictor_size = local_predictor_size; - params.local_ctr_bits = local_ctr_bits; - params.local_history_table_size = local_history_table_size; - params.local_history_bits = local_history_bits; - params.global_predictor_size = global_predictor_size; - params.global_ctr_bits = global_ctr_bits; - params.global_history_bits = global_history_bits; - params.choice_predictor_size = choice_predictor_size; - params.choice_ctr_bits = choice_ctr_bits; - - params.BTBEntries = BTBEntries; - params.BTBTagSize = BTBTagSize; - - params.RASSize = RASSize; - - params.LQEntries = LQEntries; - params.SQEntries = SQEntries; - params.SSITSize = SSITSize; - params.LFSTSize = LFSTSize; - - params.numPhysIntRegs = numPhysIntRegs; - params.numPhysFloatRegs = numPhysFloatRegs; - params.numIQEntries = numIQEntries; - params.numROBEntries = numROBEntries; - - params.instShiftAmt = 2; - - params.defReg = defer_registration; - - params.functionTrace = function_trace; - params.functionTraceStart = function_trace_start; ++ params->icacheInterface = icache ? icache->getInterface() : NULL; ++ params->dcacheInterface = dcache ? dcache->getInterface() : NULL; ++ params->cachePorts = cachePorts; ++ ++ params->decodeToFetchDelay = decodeToFetchDelay; ++ params->renameToFetchDelay = renameToFetchDelay; ++ params->iewToFetchDelay = iewToFetchDelay; ++ params->commitToFetchDelay = commitToFetchDelay; ++ params->fetchWidth = fetchWidth; ++ ++ params->renameToDecodeDelay = renameToDecodeDelay; ++ params->iewToDecodeDelay = iewToDecodeDelay; ++ params->commitToDecodeDelay = commitToDecodeDelay; ++ params->fetchToDecodeDelay = fetchToDecodeDelay; ++ params->decodeWidth = decodeWidth; ++ ++ params->iewToRenameDelay = iewToRenameDelay; ++ params->commitToRenameDelay = commitToRenameDelay; ++ params->decodeToRenameDelay = decodeToRenameDelay; ++ params->renameWidth = renameWidth; ++ ++ params->commitToIEWDelay = commitToIEWDelay; ++ params->renameToIEWDelay = renameToIEWDelay; ++ params->issueToExecuteDelay = issueToExecuteDelay; ++ params->issueWidth = issueWidth; ++ params->executeWidth = executeWidth; ++ params->executeIntWidth = executeIntWidth; ++ params->executeFloatWidth = executeFloatWidth; ++ params->executeBranchWidth = executeBranchWidth; ++ params->executeMemoryWidth = executeMemoryWidth; ++ params->fuPool = fuPool; ++ ++ params->iewToCommitDelay = iewToCommitDelay; ++ params->renameToROBDelay = renameToROBDelay; ++ params->commitWidth = commitWidth; ++ params->squashWidth = squashWidth; ++ params->trapLatency = trapLatency; ++ params->fetchTrapLatency = fetchTrapLatency; ++ ++ params->localPredictorSize = localPredictorSize; ++ params->localCtrBits = localCtrBits; ++ params->localHistoryTableSize = localHistoryTableSize; ++ params->localHistoryBits = localHistoryBits; ++ params->globalPredictorSize = globalPredictorSize; ++ params->globalCtrBits = globalCtrBits; ++ params->globalHistoryBits = globalHistoryBits; ++ params->choicePredictorSize = choicePredictorSize; ++ params->choiceCtrBits = choiceCtrBits; ++ ++ params->BTBEntries = BTBEntries; ++ params->BTBTagSize = BTBTagSize; ++ ++ params->RASSize = RASSize; ++ ++ params->LQEntries = LQEntries; ++ params->SQEntries = SQEntries; ++ ++ params->SSITSize = SSITSize; ++ params->LFSTSize = LFSTSize; ++ ++ params->numPhysIntRegs = numPhysIntRegs; ++ params->numPhysFloatRegs = numPhysFloatRegs; ++ params->numIQEntries = numIQEntries; ++ params->numROBEntries = numROBEntries; ++ ++ params->smtNumFetchingThreads = smtNumFetchingThreads; ++ params->smtFetchPolicy = smtFetchPolicy; ++ params->smtIQPolicy = smtIQPolicy; ++ params->smtLSQPolicy = smtLSQPolicy; ++ params->smtLSQThreshold = smtLSQThreshold; ++ params->smtROBPolicy = smtROBPolicy; ++ params->smtROBThreshold = smtROBThreshold; ++ params->smtCommitPolicy = smtCommitPolicy; ++ ++ params->instShiftAmt = 2; ++ ++ params->deferRegistration = defer_registration; ++ ++ params->functionTrace = function_trace; ++ params->functionTraceStart = function_trace_start; + + cpu = new DerivAlphaFullCPU(params); + + return cpu; +} + +REGISTER_SIM_OBJECT("DerivAlphaFullCPU", DerivAlphaFullCPU) + diff --cc src/cpu/o3/alpha_cpu_impl.hh index 7c4c2b969,000000000..f7f0a3842 mode 100644,000000..100644 --- a/src/cpu/o3/alpha_cpu_impl.hh +++ b/src/cpu/o3/alpha_cpu_impl.hh @@@ -1,371 -1,0 +1,775 @@@ +/* - * Copyright (c) 2004-2005 The Regents of The University of Michigan ++ * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "arch/alpha/faults.hh" +#include "base/cprintf.hh" +#include "base/statistics.hh" +#include "base/timebuf.hh" - #include "mem/cache/cache.hh" // for dynamic cast ++#include "cpu/checker/exec_context.hh" +#include "mem/mem_interface.hh" - #include "sim/builder.hh" +#include "sim/sim_events.hh" +#include "sim/stats.hh" + +#include "cpu/o3/alpha_cpu.hh" +#include "cpu/o3/alpha_params.hh" +#include "cpu/o3/comm.hh" ++#include "cpu/o3/thread_state.hh" + +#if FULL_SYSTEM +#include "arch/alpha/osfpal.hh" - #include "arch/alpha/isa_traits.hh" ++#include "arch/isa_traits.hh" ++#include "cpu/quiesce_event.hh" ++#include "kern/kernel_stats.hh" +#endif + ++using namespace TheISA; ++ +template - AlphaFullCPU::AlphaFullCPU(Params ¶ms) ++AlphaFullCPU::AlphaFullCPU(Params *params) ++#if FULL_SYSTEM ++ : FullO3CPU(params), itb(params->itb), dtb(params->dtb) ++#else + : FullO3CPU(params) ++#endif +{ + DPRINTF(FullCPU, "AlphaFullCPU: Creating AlphaFullCPU object.\n"); + ++ this->thread.resize(this->numThreads); ++ ++ for (int i = 0; i < this->numThreads; ++i) { ++#if FULL_SYSTEM ++ assert(this->numThreads == 1); ++ this->thread[i] = new Thread(this, 0, params->mem); ++ this->thread[i]->setStatus(ExecContext::Suspended); ++#else ++ if (i < params->workload.size()) { ++ DPRINTF(FullCPU, "FullCPU: Workload[%i]'s starting PC is %#x, " ++ "process is %#x", ++ i, params->workload[i]->prog_entry, this->thread[i]); ++ this->thread[i] = new Thread(this, i, params->workload[i], i); ++ assert(params->workload[i]->getMemory() != NULL); ++ ++ this->thread[i]->setStatus(ExecContext::Suspended); ++ //usedTids[i] = true; ++ //threadMap[i] = i; ++ } else { ++ //Allocate Empty execution context so M5 can use later ++ //when scheduling threads to CPU ++ Process* dummy_proc = NULL; ++ ++ this->thread[i] = new Thread(this, i, dummy_proc, i); ++ //usedTids[i] = false; ++ } ++#endif // !FULL_SYSTEM ++ ++ this->thread[i]->numInst = 0; ++ ++ ExecContext *xc_proxy; ++ ++ AlphaXC *alpha_xc_proxy = new AlphaXC; ++ ++ if (params->checker) { ++ xc_proxy = new CheckerExecContext(alpha_xc_proxy, this->checker); ++ } else { ++ xc_proxy = alpha_xc_proxy; ++ } ++ ++ alpha_xc_proxy->cpu = this; ++ alpha_xc_proxy->thread = this->thread[i]; ++ ++#if FULL_SYSTEM ++ this->thread[i]->quiesceEvent = ++ new EndQuiesceEvent(xc_proxy); ++ this->thread[i]->lastActivate = 0; ++ this->thread[i]->lastSuspend = 0; ++#endif ++ this->thread[i]->xcProxy = xc_proxy; ++ ++ this->execContexts.push_back(xc_proxy); ++ } ++ ++ ++ for (int i=0; i < this->numThreads; i++) { ++ this->thread[i]->funcExeInst = 0; ++ } ++ ++ // Sets CPU pointers. These must be set at this level because the CPU ++ // pointers are defined to be the highest level of CPU class. + this->fetch.setCPU(this); + this->decode.setCPU(this); + this->rename.setCPU(this); + this->iew.setCPU(this); + this->commit.setCPU(this); + + this->rob.setCPU(this); ++ this->regFile.setCPU(this); ++ ++ lockAddr = 0; ++ lockFlag = false; +} + +template +void +AlphaFullCPU::regStats() +{ + // Register stats for everything that has stats. + this->fullCPURegStats(); + this->fetch.regStats(); + this->decode.regStats(); + this->rename.regStats(); + this->iew.regStats(); + this->commit.regStats(); +} + - #if !FULL_SYSTEM ++#if FULL_SYSTEM ++template ++void ++AlphaFullCPU::AlphaXC::dumpFuncProfile() ++{ ++ // Currently not supported ++} ++#endif + - // Will probably need to know which thread is calling syscall - // Will need to pass that information in to the DynInst when it is constructed, - // so that this call can be made with the proper thread number. +template +void - AlphaFullCPU::syscall(short thread_num) ++AlphaFullCPU::AlphaXC::takeOverFrom(ExecContext *old_context) +{ - DPRINTF(FullCPU, "AlphaFullCPU: Syscall() called.\n\n"); ++ // some things should already be set up ++ assert(getMemPtr() == old_context->getMemPtr()); ++#if FULL_SYSTEM ++ assert(getSystemPtr() == old_context->getSystemPtr()); ++#else ++ assert(getProcessPtr() == old_context->getProcessPtr()); ++#endif + - // Commit stage needs to run as well. - this->commit.tick(); ++ // copy over functional state ++ setStatus(old_context->status()); ++ copyArchRegs(old_context); ++ setCpuId(old_context->readCpuId()); ++#if !FULL_SYSTEM ++ thread->funcExeInst = old_context->readFuncExeInst(); ++#else ++ EndQuiesceEvent *other_quiesce = old_context->getQuiesceEvent(); ++ if (other_quiesce) { ++ // Point the quiesce event's XC at this XC so that it wakes up ++ // the proper CPU. ++ other_quiesce->xc = this; ++ } ++ if (thread->quiesceEvent) { ++ thread->quiesceEvent->xc = this; ++ } + - squashStages(); ++ // Transfer kernel stats from one CPU to the other. ++ thread->kernelStats = old_context->getKernelStats(); ++// storeCondFailures = 0; ++ cpu->lockFlag = false; ++#endif + - // Temporarily increase this by one to account for the syscall - // instruction. - ++(this->funcExeInst); ++ old_context->setStatus(ExecContext::Unallocated); ++ ++ thread->inSyscall = false; ++ thread->trapPending = false; ++} ++ ++template ++void ++AlphaFullCPU::AlphaXC::activate(int delay) ++{ ++ DPRINTF(FullCPU, "Calling activate on AlphaXC\n"); + - // Copy over all important state to xc once all the unrolling is done. - copyToXC(); ++ if (thread->status() == ExecContext::Active) ++ return; + - // This is hardcoded to thread 0 while the CPU is only single threaded. - this->thread[0]->syscall(); ++#if FULL_SYSTEM ++ thread->lastActivate = curTick; ++#endif + - // Copy over all important state back to CPU. - copyFromXC(); ++ if (thread->status() == ExecContext::Unallocated) { ++ cpu->activateWhenReady(thread->tid); ++ return; ++ } + - // Decrease funcExeInst by one as the normal commit will handle - // incrememnting it. - --(this->funcExeInst); ++ thread->setStatus(ExecContext::Active); ++ ++ // status() == Suspended ++ cpu->activateContext(thread->tid, delay); +} + - // This is not a pretty function, and should only be used if it is necessary - // to fake having everything squash all at once (ie for non-full system - // syscalls). Maybe put this at the FullCPU level? +template +void - AlphaFullCPU::squashStages() ++AlphaFullCPU::AlphaXC::suspend() +{ - InstSeqNum rob_head = this->rob.readHeadSeqNum(); ++ DPRINTF(FullCPU, "Calling suspend on AlphaXC\n"); + - // Now hack the time buffer to put this sequence number in the places - // where the stages might read it. - for (int i = 0; i < 5; ++i) - { - this->timeBuffer.access(-i)->commitInfo.doneSeqNum = rob_head; ++ if (thread->status() == ExecContext::Suspended) ++ return; ++ ++#if FULL_SYSTEM ++ thread->lastActivate = curTick; ++ thread->lastSuspend = curTick; ++#endif ++/* ++#if FULL_SYSTEM ++ // Don't change the status from active if there are pending interrupts ++ if (cpu->check_interrupts()) { ++ assert(status() == ExecContext::Active); ++ return; + } ++#endif ++*/ ++ thread->setStatus(ExecContext::Suspended); ++ cpu->suspendContext(thread->tid); ++} + - this->fetch.squash(this->rob.readHeadNextPC()); - this->fetchQueue.advance(); ++template ++void ++AlphaFullCPU::AlphaXC::deallocate() ++{ ++ DPRINTF(FullCPU, "Calling deallocate on AlphaXC\n"); + - this->decode.squash(); - this->decodeQueue.advance(); ++ if (thread->status() == ExecContext::Unallocated) ++ return; + - this->rename.squash(); - this->renameQueue.advance(); - this->renameQueue.advance(); ++ thread->setStatus(ExecContext::Unallocated); ++ cpu->deallocateContext(thread->tid); ++} + - // Be sure to advance the IEW queues so that the commit stage doesn't - // try to set an instruction as completed at the same time that it - // might be deleting it. - this->iew.squash(); - this->iewQueue.advance(); - this->iewQueue.advance(); - // Needs to tell the LSQ to write back all of its data - this->iew.lsqWriteback(); ++template ++void ++AlphaFullCPU::AlphaXC::halt() ++{ ++ DPRINTF(FullCPU, "Calling halt on AlphaXC\n"); + - this->rob.squash(rob_head); - this->commit.setSquashing(); ++ if (thread->status() == ExecContext::Halted) ++ return; + - // Now hack the time buffer to clear the sequence numbers in the places - // where the stages might read it.? - for (int i = 0; i < 5; ++i) - { - this->timeBuffer.access(-i)->commitInfo.doneSeqNum = 0; - } ++ thread->setStatus(ExecContext::Halted); ++ cpu->haltContext(thread->tid); ++} + ++template ++void ++AlphaFullCPU::AlphaXC::regStats(const std::string &name) ++{ ++#if FULL_SYSTEM ++ thread->kernelStats = new Kernel::Statistics(cpu->system); ++ thread->kernelStats->regStats(name + ".kern"); ++#endif +} + - #endif // FULL_SYSTEM ++template ++void ++AlphaFullCPU::AlphaXC::serialize(std::ostream &os) ++{ ++#if FULL_SYSTEM ++ if (thread->kernelStats) ++ thread->kernelStats->serialize(os); ++#endif ++ ++} + +template +void - AlphaFullCPU::copyToXC() ++AlphaFullCPU::AlphaXC::unserialize(Checkpoint *cp, const std::string §ion) +{ - PhysRegIndex renamed_reg; ++#if FULL_SYSTEM ++ if (thread->kernelStats) ++ thread->kernelStats->unserialize(cp, section); ++#endif + - // First loop through the integer registers. - for (int i = 0; i < AlphaISA::NumIntRegs; ++i) - { - renamed_reg = this->renameMap.lookup(i); - this->cpuXC->setIntReg(i, this->regFile.readIntReg(renamed_reg)); - DPRINTF(FullCPU, "FullCPU: Copying register %i, has data %lli.\n", - renamed_reg, this->regFile.intRegFile[renamed_reg]); - } ++} + - // Then loop through the floating point registers. - for (int i = 0; i < AlphaISA::NumFloatRegs; ++i) - { - renamed_reg = this->renameMap.lookup(i + AlphaISA::FP_Base_DepTag); - this->cpuXC->setFloatRegBits(i, - this->regFile.readFloatRegBits(renamed_reg)); - } ++#if FULL_SYSTEM ++template ++EndQuiesceEvent * ++AlphaFullCPU::AlphaXC::getQuiesceEvent() ++{ ++ return thread->quiesceEvent; ++} + - this->cpuXC->setMiscReg(AlphaISA::Fpcr_DepTag, - this->regFile.readMiscReg(AlphaISA::Fpcr_DepTag)); - this->cpuXC->setMiscReg(AlphaISA::Uniq_DepTag, - this->regFile.readMiscReg(AlphaISA::Uniq_DepTag)); - this->cpuXC->setMiscReg(AlphaISA::Lock_Flag_DepTag, - this->regFile.readMiscReg(AlphaISA::Lock_Flag_DepTag)); - this->cpuXC->setMiscReg(AlphaISA::Lock_Addr_DepTag, - this->regFile.readMiscReg(AlphaISA::Lock_Addr_DepTag)); ++template ++Tick ++AlphaFullCPU::AlphaXC::readLastActivate() ++{ ++ return thread->lastActivate; ++} + - this->cpuXC->setPC(this->rob.readHeadPC()); - this->cpuXC->setNextPC(this->cpuXC->readPC()+4); ++template ++Tick ++AlphaFullCPU::AlphaXC::readLastSuspend() ++{ ++ return thread->lastSuspend; ++} + - #if !FULL_SYSTEM - this->cpuXC->setFuncExeInst(this->funcExeInst); ++template ++void ++AlphaFullCPU::AlphaXC::profileClear() ++{} ++ ++template ++void ++AlphaFullCPU::AlphaXC::profileSample() ++{} +#endif ++ ++template ++TheISA::MachInst ++AlphaFullCPU::AlphaXC:: getInst() ++{ ++ return thread->inst; +} + - // This function will probably mess things up unless the ROB is empty and - // there are no instructions in the pipeline. +template +void - AlphaFullCPU::copyFromXC() ++AlphaFullCPU::AlphaXC::copyArchRegs(ExecContext *xc) +{ ++ // This function will mess things up unless the ROB is empty and ++ // there are no instructions in the pipeline. ++ unsigned tid = thread->tid; + PhysRegIndex renamed_reg; + + // First loop through the integer registers. - for (int i = 0; i < AlphaISA::NumIntRegs; ++i) - { - renamed_reg = this->renameMap.lookup(i); ++ for (int i = 0; i < AlphaISA::NumIntRegs; ++i) { ++ renamed_reg = cpu->renameMap[tid].lookup(i); + + DPRINTF(FullCPU, "FullCPU: Copying over register %i, had data %lli, " + "now has data %lli.\n", - renamed_reg, this->regFile.intRegFile[renamed_reg], - this->cpuXC->readIntReg(i)); ++ renamed_reg, cpu->readIntReg(renamed_reg), ++ xc->readIntReg(i)); + - this->regFile.setIntReg(renamed_reg, this->cpuXC->readIntReg(i)); ++ cpu->setIntReg(renamed_reg, xc->readIntReg(i)); + } + + // Then loop through the floating point registers. + for (int i = 0; i < AlphaISA::NumFloatRegs; ++i) + { + renamed_reg = this->renameMap.lookup(i + AlphaISA::FP_Base_DepTag); - this->regFile.setFloatRegBits(renamed_reg, - this->cpuXC->readFloatRegBits(i)); ++ this->cpuXC->setFloatRegBits(i, ++ this->regFile.readFloatRegBits(renamed_reg)); + } + - // Then loop through the misc registers. - this->regFile.setMiscReg(AlphaISA::Fpcr_DepTag, - this->cpuXC->readMiscReg(AlphaISA::Fpcr_DepTag)); - this->regFile.setMiscReg(AlphaISA::Uniq_DepTag, - this->cpuXC->readMiscReg(AlphaISA::Uniq_DepTag)); - this->regFile.setMiscReg(AlphaISA::Lock_Flag_DepTag, - this->cpuXC->readMiscReg(AlphaISA::Lock_Flag_DepTag)); - this->regFile.setMiscReg(AlphaISA::Lock_Addr_DepTag, - this->cpuXC->readMiscReg(AlphaISA::Lock_Addr_DepTag)); ++ // Copy the misc regs. ++ cpu->regFile.miscRegs[tid].copyMiscRegs(xc); + + // Then finally set the PC and the next PC. - // regFile.pc = cpuXC->regs.pc; - // regFile.npc = cpuXC->regs.npc; ++ cpu->setPC(xc->readPC(), tid); ++ cpu->setNextPC(xc->readNextPC(), tid); +#if !FULL_SYSTEM - this->funcExeInst = this->cpuXC->readFuncExeInst(); ++ this->thread->funcExeInst = xc->readFuncExeInst(); +#endif +} + ++template ++void ++AlphaFullCPU::AlphaXC::clearArchRegs() ++{} ++ ++template ++uint64_t ++AlphaFullCPU::AlphaXC::readIntReg(int reg_idx) ++{ ++ DPRINTF(Fault, "Reading int register through the XC!\n"); ++ return cpu->readArchIntReg(reg_idx, thread->tid); ++} ++ ++template ++float ++AlphaFullCPU::AlphaXC::readFloatRegSingle(int reg_idx) ++{ ++ DPRINTF(Fault, "Reading float register through the XC!\n"); ++ return cpu->readArchFloatRegSingle(reg_idx, thread->tid); ++} ++ ++template ++double ++AlphaFullCPU::AlphaXC::readFloatRegDouble(int reg_idx) ++{ ++ DPRINTF(Fault, "Reading float register through the XC!\n"); ++ return cpu->readArchFloatRegDouble(reg_idx, thread->tid); ++} ++ ++template ++uint64_t ++AlphaFullCPU::AlphaXC::readFloatRegInt(int reg_idx) ++{ ++ DPRINTF(Fault, "Reading floatint register through the XC!\n"); ++ return cpu->readArchFloatRegInt(reg_idx, thread->tid); ++} ++ ++template ++void ++AlphaFullCPU::AlphaXC::setIntReg(int reg_idx, uint64_t val) ++{ ++ DPRINTF(Fault, "Setting int register through the XC!\n"); ++ cpu->setArchIntReg(reg_idx, val, thread->tid); ++ ++ if (!thread->trapPending && !thread->inSyscall) { ++ cpu->squashFromXC(thread->tid); ++ } ++} ++ ++template ++void ++AlphaFullCPU::AlphaXC::setFloatRegSingle(int reg_idx, float val) ++{ ++ DPRINTF(Fault, "Setting float register through the XC!\n"); ++ cpu->setArchFloatRegSingle(reg_idx, val, thread->tid); ++ ++ if (!thread->trapPending && !thread->inSyscall) { ++ cpu->squashFromXC(thread->tid); ++ } ++} ++ ++template ++void ++AlphaFullCPU::AlphaXC::setFloatRegDouble(int reg_idx, double val) ++{ ++ DPRINTF(Fault, "Setting float register through the XC!\n"); ++ cpu->setArchFloatRegDouble(reg_idx, val, thread->tid); ++ ++ if (!thread->trapPending && !thread->inSyscall) { ++ cpu->squashFromXC(thread->tid); ++ } ++} ++ ++template ++void ++AlphaFullCPU::AlphaXC::setFloatRegInt(int reg_idx, uint64_t val) ++{ ++ DPRINTF(Fault, "Setting floatint register through the XC!\n"); ++ cpu->setArchFloatRegInt(reg_idx, val, thread->tid); ++ ++ if (!thread->trapPending && !thread->inSyscall) { ++ cpu->squashFromXC(thread->tid); ++ } ++} ++ ++template ++void ++AlphaFullCPU::AlphaXC::setPC(uint64_t val) ++{ ++ cpu->setPC(val, thread->tid); ++ ++ if (!thread->trapPending && !thread->inSyscall) { ++ cpu->squashFromXC(thread->tid); ++ } ++} ++ ++template ++void ++AlphaFullCPU::AlphaXC::setNextPC(uint64_t val) ++{ ++ cpu->setNextPC(val, thread->tid); ++ ++ if (!thread->trapPending && !thread->inSyscall) { ++ cpu->squashFromXC(thread->tid); ++ } ++} ++ ++template ++Fault ++AlphaFullCPU::AlphaXC::setMiscReg(int misc_reg, const MiscReg &val) ++{ ++ DPRINTF(Fault, "Setting misc register through the XC!\n"); ++ ++ Fault ret_fault = cpu->setMiscReg(misc_reg, val, thread->tid); ++ ++ if (!thread->trapPending && !thread->inSyscall) { ++ cpu->squashFromXC(thread->tid); ++ } ++ ++ return ret_fault; ++} ++ ++template ++Fault ++AlphaFullCPU::AlphaXC::setMiscRegWithEffect(int misc_reg, const MiscReg &val) ++{ ++ DPRINTF(Fault, "Setting misc register through the XC!\n"); ++ ++ Fault ret_fault = cpu->setMiscRegWithEffect(misc_reg, val, thread->tid); ++ ++ if (!thread->trapPending && !thread->inSyscall) { ++ cpu->squashFromXC(thread->tid); ++ } ++ ++ return ret_fault; ++} ++ ++#if !FULL_SYSTEM ++ ++template ++TheISA::IntReg ++AlphaFullCPU::AlphaXC::getSyscallArg(int i) ++{ ++ return cpu->getSyscallArg(i, thread->tid); ++} ++ ++template ++void ++AlphaFullCPU::AlphaXC::setSyscallArg(int i, IntReg val) ++{ ++ cpu->setSyscallArg(i, val, thread->tid); ++} ++ ++template ++void ++AlphaFullCPU::AlphaXC::setSyscallReturn(SyscallReturn return_value) ++{ ++ cpu->setSyscallReturn(return_value, thread->tid); ++} ++ ++#endif // FULL_SYSTEM ++ ++template ++MiscReg ++AlphaFullCPU::readMiscReg(int misc_reg, unsigned tid) ++{ ++ return this->regFile.readMiscReg(misc_reg, tid); ++} ++ ++template ++MiscReg ++AlphaFullCPU::readMiscRegWithEffect(int misc_reg, Fault &fault, ++ unsigned tid) ++{ ++ return this->regFile.readMiscRegWithEffect(misc_reg, fault, tid); ++} ++ ++template ++Fault ++AlphaFullCPU::setMiscReg(int misc_reg, const MiscReg &val, unsigned tid) ++{ ++ return this->regFile.setMiscReg(misc_reg, val, tid); ++} ++ ++template ++Fault ++AlphaFullCPU::setMiscRegWithEffect(int misc_reg, const MiscReg &val, ++ unsigned tid) ++{ ++ return this->regFile.setMiscRegWithEffect(misc_reg, val, tid); ++} ++ ++template ++void ++AlphaFullCPU::squashFromXC(unsigned tid) ++{ ++ this->thread[tid]->inSyscall = true; ++ this->commit.generateXCEvent(tid); ++} ++ +#if FULL_SYSTEM + ++template ++void ++AlphaFullCPU::post_interrupt(int int_num, int index) ++{ ++ BaseCPU::post_interrupt(int_num, index); ++ ++ if (this->thread[0]->status() == ExecContext::Suspended) { ++ DPRINTF(IPI,"Suspended Processor awoke\n"); ++// xcProxies[0]->activate(); ++ this->execContexts[0]->activate(); ++ } ++} ++ +template +int +AlphaFullCPU::readIntrFlag() +{ + return this->regFile.readIntrFlag(); +} + +template +void +AlphaFullCPU::setIntrFlag(int val) +{ + this->regFile.setIntrFlag(val); +} + - // Can force commit stage to squash and stuff. +template +Fault - AlphaFullCPU::hwrei() ++AlphaFullCPU::hwrei(unsigned tid) +{ - if (!inPalMode()) - return new AlphaISA::UnimplementedOpcodeFault; - - this->setNextPC(this->regFile.miscRegs.readReg(AlphaISA::IPR_EXC_ADDR)); ++ // Need to clear the lock flag upon returning from an interrupt. ++ this->lockFlag = false; + - // kernelStats.hwrei(); - - if ((this->regFile.miscRegs.readReg(AlphaISA::IPR_EXC_ADDR) & 1) == 0) - // AlphaISA::swap_palshadow(®s, false); ++ this->thread[tid]->kernelStats->hwrei(); + + this->checkInterrupts = true; + + // FIXME: XXX check for interrupts? XXX + return NoFault; +} + +template +bool - AlphaFullCPU::simPalCheck(int palFunc) ++AlphaFullCPU::simPalCheck(int palFunc, unsigned tid) +{ - // kernelStats.callpal(palFunc); ++ if (this->thread[tid]->kernelStats) ++ this->thread[tid]->kernelStats->callpal(palFunc, ++ this->execContexts[tid]); + + switch (palFunc) { + case PAL::halt: + halt(); + if (--System::numSystemsRunning == 0) + new SimExitEvent("all cpus halted"); + break; + + case PAL::bpt: + case PAL::bugchk: + if (this->system->breakpoint()) + return false; + break; + } + + return true; +} + - // Probably shouldn't be able to switch to the trap handler as quickly as - // this. Also needs to get the exception restart address from the commit - // stage. +template +void - AlphaFullCPU::trap(Fault fault) ++AlphaFullCPU::trap(Fault fault, unsigned tid) +{ - /* // Keep in mind that a trap may be initiated by fetch if there's a TLB - // miss - uint64_t PC = this->commit.readCommitPC(); ++ fault->invoke(this->execContexts[tid]); ++} + - DPRINTF(Fault, "Fault %s\n", fault->name()); - this->recordEvent(csprintf("Fault %s", fault->name())); ++template ++void ++AlphaFullCPU::processInterrupts() ++{ ++ // Check for interrupts here. For now can copy the code that ++ // exists within isa_fullsys_traits.hh. Also assume that thread 0 ++ // is the one that handles the interrupts. ++ // @todo: Possibly consolidate the interrupt checking code. ++ // @todo: Allow other threads to handle interrupts. ++ ++ // Check if there are any outstanding interrupts ++ //Handle the interrupts ++ int ipl = 0; ++ int summary = 0; ++ ++ this->checkInterrupts = false; ++ ++ if (this->readMiscReg(IPR_ASTRR, 0)) ++ panic("asynchronous traps not implemented\n"); ++ ++ if (this->readMiscReg(IPR_SIRR, 0)) { ++ for (int i = INTLEVEL_SOFTWARE_MIN; ++ i < INTLEVEL_SOFTWARE_MAX; i++) { ++ if (this->readMiscReg(IPR_SIRR, 0) & (ULL(1) << i)) { ++ // See table 4-19 of the 21164 hardware reference ++ ipl = (i - INTLEVEL_SOFTWARE_MIN) + 1; ++ summary |= (ULL(1) << i); ++ } ++ } ++ } + - //kernelStats.fault(fault); ++ uint64_t interrupts = this->intr_status(); ++ ++ if (interrupts) { ++ for (int i = INTLEVEL_EXTERNAL_MIN; ++ i < INTLEVEL_EXTERNAL_MAX; i++) { ++ if (interrupts & (ULL(1) << i)) { ++ // See table 4-19 of the 21164 hardware reference ++ ipl = i; ++ summary |= (ULL(1) << i); ++ } ++ } ++ } + - if (fault->isA()) - panic("Arithmetic traps are unimplemented!"); ++ if (ipl && ipl > this->readMiscReg(IPR_IPLR, 0)) { ++ this->setMiscReg(IPR_ISR, summary, 0); ++ this->setMiscReg(IPR_INTID, ipl, 0); ++ if (this->checker) { ++ this->checker->cpuXCBase()->setMiscReg(IPR_ISR, summary); ++ this->checker->cpuXCBase()->setMiscReg(IPR_INTID, ipl); ++ } ++ this->trap(Fault(new InterruptFault), 0); ++ DPRINTF(Flow, "Interrupt! IPLR=%d ipl=%d summary=%x\n", ++ this->readMiscReg(IPR_IPLR, 0), ipl, summary); ++ } ++} + - // exception restart address - Get the commit PC - if (!fault->isA() || !inPalMode(PC)) - this->regFile.miscRegs.setReg(AlphaISA::IPR_EXC_ADDR, PC); ++#endif // FULL_SYSTEM + - if (fault->isA() || fault->isA()) - // || fault == InterruptFault && !PC_PAL(regs.pc) - { - // traps... skip faulting instruction - AlphaISA::MiscReg ipr_exc_addr = - this->regFile.miscRegs.readReg(AlphaISA::IPR_EXC_ADDR); - this->regFile.miscRegs.setReg(AlphaISA::IPR_EXC_ADDR, - ipr_exc_addr + 4); - } ++#if !FULL_SYSTEM ++ ++template ++void ++AlphaFullCPU::syscall(int tid) ++{ ++ DPRINTF(FullCPU, "AlphaFullCPU: [tid:%i] Executing syscall().\n\n", tid); + - if (!inPalMode(PC)) - swapPALShadow(true); ++ DPRINTF(Activity,"Activity: syscall() called.\n"); + - this->regFile.setPC(this->regFile.miscRegs.readReg(AlphaISA::IPR_PAL_BASE) + - (dynamic_cast(fault.get()))->vect()); - this->regFile.setNextPC(PC + sizeof(MachInst));*/ ++ // Temporarily increase this by one to account for the syscall ++ // instruction. ++ ++(this->thread[tid]->funcExeInst); ++ ++ // Execute the actual syscall. ++ this->thread[tid]->syscall(); ++ ++ // Decrease funcExeInst by one as the normal commit will handle ++ // incrementing it. ++ --(this->thread[tid]->funcExeInst); +} + +template - void - AlphaFullCPU::processInterrupts() ++TheISA::IntReg ++AlphaFullCPU::getSyscallArg(int i, int tid) +{ - // Check for interrupts here. For now can copy the code that exists - // within isa_fullsys_traits.hh. ++ return this->readArchIntReg(AlphaISA::ArgumentReg0 + i, tid); +} + - // swap_palshadow swaps in the values of the shadow registers and - // swaps them with the values of the physical registers that map to the - // same logical index. +template +void - AlphaFullCPU::swapPALShadow(bool use_shadow) ++AlphaFullCPU::setSyscallArg(int i, IntReg val, int tid) +{ - if (palShadowEnabled == use_shadow) - panic("swap_palshadow: wrong PAL shadow state"); - - palShadowEnabled = use_shadow; - - // Will have to lookup in rename map to get physical registers, then - // swap. ++ this->setArchIntReg(AlphaISA::ArgumentReg0 + i, val, tid); +} + - #endif // FULL_SYSTEM ++template ++void ++AlphaFullCPU::setSyscallReturn(SyscallReturn return_value, int tid) ++{ ++ // check for error condition. Alpha syscall convention is to ++ // indicate success/failure in reg a3 (r19) and put the ++ // return value itself in the standard return value reg (v0). ++ if (return_value.successful()) { ++ // no error ++ this->setArchIntReg(SyscallSuccessReg, 0, tid); ++ this->setArchIntReg(ReturnValueReg, return_value.value(), tid); ++ } else { ++ // got an error, return details ++ this->setArchIntReg(SyscallSuccessReg, (IntReg) -1, tid); ++ this->setArchIntReg(ReturnValueReg, -return_value.value(), tid); ++ } ++} ++#endif diff --cc src/cpu/o3/alpha_dyn_inst.hh index 5b8a05e5c,000000000..b03c8c337 mode 100644,000000..100644 --- a/src/cpu/o3/alpha_dyn_inst.hh +++ b/src/cpu/o3/alpha_dyn_inst.hh @@@ -1,264 -1,0 +1,280 @@@ +/* - * Copyright (c) 2004-2005 The Regents of The University of Michigan ++ * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + - #ifndef __CPU_O3_CPU_ALPHA_DYN_INST_HH__ - #define __CPU_O3_CPU_ALPHA_DYN_INST_HH__ ++#ifndef __CPU_O3_ALPHA_DYN_INST_HH__ ++#define __CPU_O3_ALPHA_DYN_INST_HH__ + +#include "cpu/base_dyn_inst.hh" ++#include "cpu/inst_seq.hh" +#include "cpu/o3/alpha_cpu.hh" +#include "cpu/o3/alpha_impl.hh" - #include "cpu/inst_seq.hh" + +/** - * Mostly implementation specific AlphaDynInst. It is templated in case there - * are other implementations that are similar enough to be able to use this - * class without changes. This is mainly useful if there are multiple similar - * CPU implementations of the same ISA. ++ * Mostly implementation & ISA specific AlphaDynInst. As with most ++ * other classes in the new CPU model, it is templated on the Impl to ++ * allow for passing in of all types, such as the CPU type and the ISA ++ * type. The AlphaDynInst serves as the primary interface to the CPU ++ * for instructions that are executing. + */ - +template +class AlphaDynInst : public BaseDynInst +{ + public: + /** Typedef for the CPU. */ + typedef typename Impl::FullCPU FullCPU; + + /** Binary machine instruction type. */ + typedef TheISA::MachInst MachInst; ++ /** Extended machine instruction type. */ ++ typedef TheISA::ExtMachInst ExtMachInst; + /** Logical register index type. */ + typedef TheISA::RegIndex RegIndex; + /** Integer register index type. */ + typedef TheISA::IntReg IntReg; + /** Misc register index type. */ + typedef TheISA::MiscReg MiscReg; + + enum { + MaxInstSrcRegs = TheISA::MaxInstSrcRegs, //< Max source regs + MaxInstDestRegs = TheISA::MaxInstDestRegs, //< Max dest regs + }; + + public: + /** BaseDynInst constructor given a binary instruction. */ - AlphaDynInst(MachInst inst, Addr PC, Addr Pred_PC, InstSeqNum seq_num, ++ AlphaDynInst(ExtMachInst inst, Addr PC, Addr Pred_PC, InstSeqNum seq_num, + FullCPU *cpu); + + /** BaseDynInst constructor given a static inst pointer. */ + AlphaDynInst(StaticInstPtr &_staticInst); + + /** Executes the instruction.*/ - Fault execute() - { - return this->fault = this->staticInst->execute(this, this->traceData); - } ++ Fault execute(); ++ ++ /** Initiates the access. Only valid for memory operations. */ ++ Fault initiateAcc(); ++ ++ /** Completes the access. Only valid for memory operations. */ ++ Fault completeAcc(); ++ ++ private: ++ /** Initializes variables. */ ++ void initVars(); + + public: + MiscReg readMiscReg(int misc_reg) + { - // Dummy function for now. - // @todo: Fix this once reg file gets fixed. - return 0; ++ return this->cpu->readMiscReg(misc_reg, this->threadNumber); + } + + MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault) + { - // Dummy function for now. - // @todo: Fix this once reg file gets fixed. - return 0; ++ return this->cpu->readMiscRegWithEffect(misc_reg, fault, ++ this->threadNumber); + } + + Fault setMiscReg(int misc_reg, const MiscReg &val) + { - // Dummy function for now. - // @todo: Fix this once reg file gets fixed. - return NoFault; ++ this->instResult.integer = val; ++ return this->cpu->setMiscReg(misc_reg, val, this->threadNumber); + } + + Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val) + { - // Dummy function for now. - // @todo: Fix this once reg file gets fixed. - return NoFault; ++ return this->cpu->setMiscRegWithEffect(misc_reg, val, ++ this->threadNumber); + } + +#if FULL_SYSTEM ++ /** Calls hardware return from error interrupt. */ + Fault hwrei(); ++ /** Reads interrupt flag. */ + int readIntrFlag(); ++ /** Sets interrupt flag. */ + void setIntrFlag(int val); ++ /** Checks if system is in PAL mode. */ + bool inPalMode(); ++ /** Traps to handle specified fault. */ + void trap(Fault fault); + bool simPalCheck(int palFunc); +#else ++ /** Calls a syscall. */ + void syscall(); +#endif + - - + private: + /** Physical register index of the destination registers of this + * instruction. + */ + PhysRegIndex _destRegIdx[MaxInstDestRegs]; + + /** Physical register index of the source registers of this + * instruction. + */ + PhysRegIndex _srcRegIdx[MaxInstSrcRegs]; + + /** Physical register index of the previous producers of the + * architected destinations. + */ + PhysRegIndex _prevDestRegIdx[MaxInstDestRegs]; + + public: + + // The register accessor methods provide the index of the + // instruction's operand (e.g., 0 or 1), not the architectural + // register index, to simplify the implementation of register + // renaming. We find the architectural register index by indexing + // into the instruction's own operand index table. Note that a + // raw pointer to the StaticInst is provided instead of a + // ref-counted StaticInstPtr to redice overhead. This is fine as + // long as these methods don't copy the pointer into any long-term + // storage (which is pretty hard to imagine they would have reason + // to do). + + uint64_t readIntReg(const StaticInst *si, int idx) + { + return this->cpu->readIntReg(_srcRegIdx[idx]); + } + + FloatReg readFloatReg(const StaticInst *si, int idx, int width) + { + return this->cpu->readFloatReg(_srcRegIdx[idx], width); + } + + FloatReg readFloatReg(const StaticInst *si, int idx) + { + return this->cpu->readFloatReg(_srcRegIdx[idx]); + } + + FloatRegBits readFloatRegBits(const StaticInst *si, int idx, int width) + { + return this->cpu->readFloatRegBits(_srcRegIdx[idx], width); + } + + FloatRegBits readFloatRegBits(const StaticInst *si, int idx) + { + return this->cpu->readFloatRegBits(_srcRegIdx[idx]); + } + + /** @todo: Make results into arrays so they can handle multiple dest + * registers. + */ + void setIntReg(const StaticInst *si, int idx, uint64_t val) + { + this->cpu->setIntReg(_destRegIdx[idx], val); - this->instResult.integer = val; ++ BaseDynInst::setIntReg(si, idx, val); + } + + void setFloatReg(const StaticInst *si, int idx, FloatReg val, int width) + { + this->cpu->setFloatReg(_destRegIdx[idx], val, width); - this->instResult.fp = val; ++ BaseDynInst::setFloatRegSingle(si, idx, val); + } + + void setFloatReg(const StaticInst *si, int idx, FloatReg val) + { + this->cpu->setFloatReg(_destRegIdx[idx], val); - this->instResult.dbl = val; ++ BaseDynInst::setFloatRegDouble(si, idx, val); + } + + void setFloatRegBits(const StaticInst *si, int idx, + FloatRegBits val, int width) + { + this->cpu->setFloatRegBits(_destRegIdx[idx], val, width); + this->instResult.integer = val; + } + + void setFloatRegBits(const StaticInst *si, int idx, FloatRegBits val) + { + this->cpu->setFloatRegBits(_destRegIdx[idx], val); - this->instResult.integer = val; ++ BaseDynInst::setFloatRegInt(si, idx, val); + } + + /** Returns the physical register index of the i'th destination + * register. + */ + PhysRegIndex renamedDestRegIdx(int idx) const + { + return _destRegIdx[idx]; + } + + /** Returns the physical register index of the i'th source register. */ + PhysRegIndex renamedSrcRegIdx(int idx) const + { + return _srcRegIdx[idx]; + } + + /** Returns the physical register index of the previous physical register + * that remapped to the same logical register index. + */ + PhysRegIndex prevDestRegIdx(int idx) const + { + return _prevDestRegIdx[idx]; + } + + /** Renames a destination register to a physical register. Also records + * the previous physical register that the logical register mapped to. + */ + void renameDestReg(int idx, + PhysRegIndex renamed_dest, + PhysRegIndex previous_rename) + { + _destRegIdx[idx] = renamed_dest; + _prevDestRegIdx[idx] = previous_rename; + } + + /** Renames a source logical register to the physical register which + * has/will produce that logical register's result. + * @todo: add in whether or not the source register is ready. + */ + void renameSrcReg(int idx, PhysRegIndex renamed_src) + { + _srcRegIdx[idx] = renamed_src; + } + + public: ++ /** Calculates EA part of a memory instruction. Currently unused, ++ * though it may be useful in the future if we want to split ++ * memory operations into EA calculation and memory access parts. ++ */ + Fault calcEA() + { + return this->staticInst->eaCompInst()->execute(this, this->traceData); + } + ++ /** Does the memory access part of a memory instruction. Currently unused, ++ * though it may be useful in the future if we want to split ++ * memory operations into EA calculation and memory access parts. ++ */ + Fault memAccess() + { + return this->staticInst->memAccInst()->execute(this, this->traceData); + } +}; + - #endif // __CPU_O3_CPU_ALPHA_DYN_INST_HH__ ++#endif // __CPU_O3_ALPHA_DYN_INST_HH__ + diff --cc src/cpu/o3/alpha_dyn_inst_impl.hh index 96b7d3430,000000000..541d5ab82 mode 100644,000000..100644 --- a/src/cpu/o3/alpha_dyn_inst_impl.hh +++ b/src/cpu/o3/alpha_dyn_inst_impl.hh @@@ -1,120 -1,0 +1,176 @@@ +/* - * Copyright (c) 2004-2005 The Regents of The University of Michigan ++ * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cpu/o3/alpha_dyn_inst.hh" + +template - AlphaDynInst::AlphaDynInst(MachInst inst, Addr PC, Addr Pred_PC, ++AlphaDynInst::AlphaDynInst(ExtMachInst inst, Addr PC, Addr Pred_PC, + InstSeqNum seq_num, FullCPU *cpu) + : BaseDynInst(inst, PC, Pred_PC, seq_num, cpu) ++{ ++ initVars(); ++} ++ ++template ++AlphaDynInst::AlphaDynInst(StaticInstPtr &_staticInst) ++ : BaseDynInst(_staticInst) ++{ ++ initVars(); ++} ++ ++template ++void ++AlphaDynInst::initVars() +{ + // Make sure to have the renamed register entries set to the same + // as the normal register entries. It will allow the IQ to work + // without any modifications. - for (int i = 0; i < this->staticInst->numDestRegs(); i++) - { ++ for (int i = 0; i < this->staticInst->numDestRegs(); i++) { + _destRegIdx[i] = this->staticInst->destRegIdx(i); + } + - for (int i = 0; i < this->staticInst->numSrcRegs(); i++) - { ++ for (int i = 0; i < this->staticInst->numSrcRegs(); i++) { + _srcRegIdx[i] = this->staticInst->srcRegIdx(i); + this->_readySrcRegIdx[i] = 0; + } ++} ++ ++template ++Fault ++AlphaDynInst::execute() ++{ ++ // @todo: Pretty convoluted way to avoid squashing from happening when using ++ // the XC during an instruction's execution (specifically for instructions ++ // that have sideeffects that use the XC). Fix this. ++ bool in_syscall = this->thread->inSyscall; ++ this->thread->inSyscall = true; ++ ++ this->fault = this->staticInst->execute(this, this->traceData); ++ ++ this->thread->inSyscall = in_syscall; + ++ return this->fault; +} + +template - AlphaDynInst::AlphaDynInst(StaticInstPtr &_staticInst) - : BaseDynInst(_staticInst) ++Fault ++AlphaDynInst::initiateAcc() +{ - // Make sure to have the renamed register entries set to the same - // as the normal register entries. It will allow the IQ to work - // without any modifications. - for (int i = 0; i < _staticInst->numDestRegs(); i++) - { - _destRegIdx[i] = _staticInst->destRegIdx(i); - } ++ // @todo: Pretty convoluted way to avoid squashing from happening when using ++ // the XC during an instruction's execution (specifically for instructions ++ // that have sideeffects that use the XC). Fix this. ++ bool in_syscall = this->thread->inSyscall; ++ this->thread->inSyscall = true; ++ ++ this->fault = this->staticInst->initiateAcc(this, this->traceData); ++ ++ this->thread->inSyscall = in_syscall; + - for (int i = 0; i < _staticInst->numSrcRegs(); i++) - { - _srcRegIdx[i] = _staticInst->srcRegIdx(i); ++ return this->fault; ++} ++ ++template ++Fault ++AlphaDynInst::completeAcc() ++{ ++ if (this->isLoad()) { ++ this->fault = this->staticInst->completeAcc(this->req->data, ++ this, ++ this->traceData); ++ } else if (this->isStore()) { ++ this->fault = this->staticInst->completeAcc((uint8_t*)&this->req->result, ++ this, ++ this->traceData); ++ } else { ++ panic("Unknown type!"); + } ++ ++ return this->fault; +} + +#if FULL_SYSTEM +template +Fault +AlphaDynInst::hwrei() +{ - return this->cpu->hwrei(); ++ if (!this->cpu->inPalMode(this->readPC())) ++ return new AlphaISA::UnimplementedOpcodeFault; ++ ++ this->setNextPC(this->cpu->readMiscReg(AlphaISA::IPR_EXC_ADDR, ++ this->threadNumber)); ++ ++ // Tell CPU to clear any state it needs to if a hwrei is taken. ++ this->cpu->hwrei(this->threadNumber); ++ ++ // FIXME: XXX check for interrupts? XXX ++ return NoFault; +} + +template +int +AlphaDynInst::readIntrFlag() +{ - return this->cpu->readIntrFlag(); ++ return this->cpu->readIntrFlag(); +} + +template +void +AlphaDynInst::setIntrFlag(int val) +{ + this->cpu->setIntrFlag(val); +} + +template +bool +AlphaDynInst::inPalMode() +{ - return this->cpu->inPalMode(); ++ return this->cpu->inPalMode(this->PC); +} + +template +void +AlphaDynInst::trap(Fault fault) +{ - this->cpu->trap(fault); ++ this->cpu->trap(fault, this->threadNumber); +} + +template +bool +AlphaDynInst::simPalCheck(int palFunc) +{ - return this->cpu->simPalCheck(palFunc); ++ return this->cpu->simPalCheck(palFunc, this->threadNumber); +} +#else +template +void +AlphaDynInst::syscall() +{ + this->cpu->syscall(this->threadNumber); +} +#endif + diff --cc src/cpu/o3/alpha_impl.hh index 5e39fcb37,000000000..f404bd3ec mode 100644,000000..100644 --- a/src/cpu/o3/alpha_impl.hh +++ b/src/cpu/o3/alpha_impl.hh @@@ -1,79 -1,0 +1,80 @@@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + - #ifndef __CPU_O3_CPU_ALPHA_IMPL_HH__ - #define __CPU_O3_CPU_ALPHA_IMPL_HH__ ++#ifndef __CPU_O3_ALPHA_IMPL_HH__ ++#define __CPU_O3_ALPHA_IMPL_HH__ + +#include "arch/alpha/isa_traits.hh" + +#include "cpu/o3/alpha_params.hh" +#include "cpu/o3/cpu_policy.hh" + +// Forward declarations. +template +class AlphaDynInst; + +template +class AlphaFullCPU; + - /** Implementation specific struct that defines several key things to the ++/** Implementation specific struct that defines several key types to the + * CPU, the stages within the CPU, the time buffers, and the DynInst. + * The struct defines the ISA, the CPU policy, the specific DynInst, the + * specific FullCPU, and all of the structs from the time buffers to do + * communication. + * This is one of the key things that must be defined for each hardware + * specific CPU implementation. + */ +struct AlphaSimpleImpl +{ + /** The type of MachInst. */ + typedef TheISA::MachInst MachInst; + - /** The CPU policy to be used (ie fetch, decode, etc.). */ ++ /** The CPU policy to be used, which defines all of the CPU stages. */ + typedef SimpleCPUPolicy CPUPol; + - /** The DynInst to be used. */ ++ /** The DynInst type to be used. */ + typedef AlphaDynInst DynInst; + + /** The refcounted DynInst pointer to be used. In most cases this is + * what should be used, and not DynInst *. + */ + typedef RefCountingPtr DynInstPtr; + - /** The FullCPU to be used. */ ++ /** The FullCPU type to be used. */ + typedef AlphaFullCPU FullCPU; + + /** The Params to be passed to each stage. */ + typedef AlphaSimpleParams Params; + + enum { - MaxWidth = 8 ++ MaxWidth = 8, ++ MaxThreads = 4 + }; +}; + - #endif // __CPU_O3_CPU_ALPHA_IMPL_HH__ ++#endif // __CPU_O3_ALPHA_IMPL_HH__ diff --cc src/cpu/o3/alpha_params.hh index 79b0937e3,000000000..e3acf2c05 mode 100644,000000..100644 --- a/src/cpu/o3/alpha_params.hh +++ b/src/cpu/o3/alpha_params.hh @@@ -1,163 -1,0 +1,185 @@@ +/* - * Copyright (c) 2004-2005 The Regents of The University of Michigan ++ * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + - #ifndef __CPU_O3_CPU_ALPHA_SIMPLE_PARAMS_HH__ - #define __CPU_O3_CPU_ALPHA_SIMPLE_PARAMS_HH__ ++#ifndef __CPU_O3_ALPHA_PARAMS_HH__ ++#define __CPU_O3_ALPHA_PARAMS_HH__ + +#include "cpu/o3/cpu.hh" + +//Forward declarations - class System; - class AlphaITB; +class AlphaDTB; ++class AlphaITB; ++class FUPool; +class FunctionalMemory; - class Process; +class MemInterface; ++class Process; ++class System; + +/** + * This file defines the parameters that will be used for the AlphaFullCPU. + * This must be defined externally so that the Impl can have a params class + * defined that it can pass to all of the individual stages. + */ + +class AlphaSimpleParams : public BaseFullCPU::Params +{ + public: + +#if FULL_SYSTEM + AlphaITB *itb; AlphaDTB *dtb; +#else + std::vector workload; + Process *process; +#endif // FULL_SYSTEM + ++ //Page Table ++// PageTable *pTable; ++ + FunctionalMemory *mem; + ++ BaseCPU *checker; ++ ++ unsigned activity; ++ + // + // Caches + // + MemInterface *icacheInterface; + MemInterface *dcacheInterface; + ++ unsigned cachePorts; ++ + // + // Fetch + // + unsigned decodeToFetchDelay; + unsigned renameToFetchDelay; + unsigned iewToFetchDelay; + unsigned commitToFetchDelay; + unsigned fetchWidth; + + // + // Decode + // + unsigned renameToDecodeDelay; + unsigned iewToDecodeDelay; + unsigned commitToDecodeDelay; + unsigned fetchToDecodeDelay; + unsigned decodeWidth; + + // + // Rename + // + unsigned iewToRenameDelay; + unsigned commitToRenameDelay; + unsigned decodeToRenameDelay; + unsigned renameWidth; + + // + // IEW + // + unsigned commitToIEWDelay; + unsigned renameToIEWDelay; + unsigned issueToExecuteDelay; + unsigned issueWidth; + unsigned executeWidth; + unsigned executeIntWidth; + unsigned executeFloatWidth; + unsigned executeBranchWidth; + unsigned executeMemoryWidth; ++ FUPool *fuPool; + + // + // Commit + // + unsigned iewToCommitDelay; + unsigned renameToROBDelay; + unsigned commitWidth; + unsigned squashWidth; ++ Tick trapLatency; ++ Tick fetchTrapLatency; + + // + // Branch predictor (BP & BTB) + // - /* + unsigned localPredictorSize; - unsigned localPredictorCtrBits; - */ - - unsigned local_predictor_size; - unsigned local_ctr_bits; - unsigned local_history_table_size; - unsigned local_history_bits; - unsigned global_predictor_size; - unsigned global_ctr_bits; - unsigned global_history_bits; - unsigned choice_predictor_size; - unsigned choice_ctr_bits; ++ unsigned localCtrBits; ++ unsigned localHistoryTableSize; ++ unsigned localHistoryBits; ++ unsigned globalPredictorSize; ++ unsigned globalCtrBits; ++ unsigned globalHistoryBits; ++ unsigned choicePredictorSize; ++ unsigned choiceCtrBits; + + unsigned BTBEntries; + unsigned BTBTagSize; + + unsigned RASSize; + + // + // Load store queue + // + unsigned LQEntries; + unsigned SQEntries; + + // + // Memory dependence + // + unsigned SSITSize; + unsigned LFSTSize; + + // + // Miscellaneous + // + unsigned numPhysIntRegs; + unsigned numPhysFloatRegs; + unsigned numIQEntries; + unsigned numROBEntries; + ++ //SMT Parameters ++ unsigned smtNumFetchingThreads; ++ ++ std::string smtFetchPolicy; ++ ++ std::string smtIQPolicy; ++ unsigned smtIQThreshold; ++ ++ std::string smtLSQPolicy; ++ unsigned smtLSQThreshold; ++ ++ std::string smtCommitPolicy; ++ ++ std::string smtROBPolicy; ++ unsigned smtROBThreshold; ++ + // Probably can get this from somewhere. + unsigned instShiftAmt; - - bool defReg; +}; + - #endif // __CPU_O3_CPU_ALPHA_PARAMS_HH__ ++#endif // __CPU_O3_ALPHA_PARAMS_HH__ diff --cc src/cpu/o3/bpred_unit.cc index 85bd6f0a6,000000000..92344111f mode 100644,000000..100644 --- a/src/cpu/o3/bpred_unit.cc +++ b/src/cpu/o3/bpred_unit.cc @@@ -1,33 -1,0 +1,37 @@@ +/* - * Copyright (c) 2004-2005 The Regents of The University of Michigan ++ * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cpu/o3/bpred_unit_impl.hh" +#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/alpha_dyn_inst.hh" ++#include "cpu/ozone/ozone_impl.hh" ++#include "cpu/ozone/simple_impl.hh" + +template class TwobitBPredUnit; ++template class TwobitBPredUnit; ++template class TwobitBPredUnit; diff --cc src/cpu/o3/bpred_unit.hh index 2725684f7,000000000..b7814b2e9 mode 100644,000000..100644 --- a/src/cpu/o3/bpred_unit.hh +++ b/src/cpu/o3/bpred_unit.hh @@@ -1,133 -1,0 +1,226 @@@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + - #ifndef __BPRED_UNIT_HH__ - #define __BPRED_UNIT_HH__ ++#ifndef __CPU_O3_BPRED_UNIT_HH__ ++#define __CPU_O3_BPRED_UNIT_HH__ + +// For Addr type. +#include "arch/isa_traits.hh" +#include "base/statistics.hh" +#include "cpu/inst_seq.hh" + +#include "cpu/o3/2bit_local_pred.hh" - #include "cpu/o3/tournament_pred.hh" +#include "cpu/o3/btb.hh" +#include "cpu/o3/ras.hh" ++#include "cpu/o3/tournament_pred.hh" + +#include + +/** + * Basically a wrapper class to hold both the branch predictor - * and the BTB. Right now I'm unsure of the implementation; it would - * be nicer to have something closer to the CPUPolicy or the Impl where - * this is just typedefs, but it forces the upper level stages to be - * aware of the constructors of the BP and the BTB. The nicer thing - * to do is have this templated on the Impl, accept the usual Params - * object, and be able to call the constructors on the BP and BTB. ++ * and the BTB. + */ +template +class TwobitBPredUnit +{ + public: + typedef typename Impl::Params Params; + typedef typename Impl::DynInstPtr DynInstPtr; + - TwobitBPredUnit(Params ¶ms); ++ /** ++ * @param params The params object, that has the size of the BP and BTB. ++ */ ++ TwobitBPredUnit(Params *params); + ++ /** ++ * Registers statistics. ++ */ + void regStats(); + - bool predict(DynInstPtr &inst, Addr &PC); - - void update(const InstSeqNum &done_sn); - - void squash(const InstSeqNum &squashed_sn); - ++ void switchOut(); ++ ++ void takeOverFrom(); ++ ++ /** ++ * Predicts whether or not the instruction is a taken branch, and the ++ * target of the branch if it is taken. ++ * @param inst The branch instruction. ++ * @param PC The predicted PC is passed back through this parameter. ++ * @param tid The thread id. ++ * @return Returns if the branch is taken or not. ++ */ ++ bool predict(DynInstPtr &inst, Addr &PC, unsigned tid); ++ ++ /** ++ * Tells the branch predictor to commit any updates until the given ++ * sequence number. ++ * @param done_sn The sequence number to commit any older updates up until. ++ * @param tid The thread id. ++ */ ++ void update(const InstSeqNum &done_sn, unsigned tid); ++ ++ /** ++ * Squashes all outstanding updates until a given sequence number. ++ * @param squashed_sn The sequence number to squash any younger updates up ++ * until. ++ * @param tid The thread id. ++ */ ++ void squash(const InstSeqNum &squashed_sn, unsigned tid); ++ ++ /** ++ * Squashes all outstanding updates until a given sequence number, and ++ * corrects that sn's update with the proper address and taken/not taken. ++ * @param squashed_sn The sequence number to squash any younger updates up ++ * until. ++ * @param corr_target The correct branch target. ++ * @param actually_taken The correct branch direction. ++ * @param tid The thread id. ++ */ + void squash(const InstSeqNum &squashed_sn, const Addr &corr_target, - bool actually_taken); ++ bool actually_taken, unsigned tid); + ++ /** ++ * Looks up a given PC in the BP to see if it is taken or not taken. ++ * @param inst_PC The PC to look up. ++ * @return Whether the branch is taken or not taken. ++ */ + bool BPLookup(Addr &inst_PC) + { return BP.lookup(inst_PC); } + ++ /** ++ * Looks up a given PC in the BTB to see if a matching entry exists. ++ * @param inst_PC The PC to look up. ++ * @return Whether the BTB contains the given PC. ++ */ + bool BTBValid(Addr &inst_PC) - { return BTB.valid(inst_PC); } ++ { return BTB.valid(inst_PC, 0); } + ++ /** ++ * Looks up a given PC in the BTB to get the predicted target. ++ * @param inst_PC The PC to look up. ++ * @return The address of the target of the branch. ++ */ + Addr BTBLookup(Addr &inst_PC) - { return BTB.lookup(inst_PC); } - - // Will want to include global history. ++ { return BTB.lookup(inst_PC, 0); } ++ ++ /** ++ * Updates the BP with taken/not taken information. ++ * @param inst_PC The branch's PC that will be updated. ++ * @param taken Whether the branch was taken or not taken. ++ * @todo Make this update flexible enough to handle a global predictor. ++ */ + void BPUpdate(Addr &inst_PC, bool taken) + { BP.update(inst_PC, taken); } + ++ /** ++ * Updates the BTB with the target of a branch. ++ * @param inst_PC The branch's PC that will be updated. ++ * @param target_PC The branch's target that will be added to the BTB. ++ */ + void BTBUpdate(Addr &inst_PC, Addr &target_PC) - { BTB.update(inst_PC, target_PC); } ++ { BTB.update(inst_PC, target_PC,0); } + + private: + struct PredictorHistory { ++ /** ++ * Makes a predictor history struct that contains a sequence number, ++ * the PC of its instruction, and whether or not it was predicted ++ * taken. ++ */ + PredictorHistory(const InstSeqNum &seq_num, const Addr &inst_PC, - const bool pred_taken) - : seqNum(seq_num), PC(inst_PC), predTaken(pred_taken), - globalHistory(0), usedRAS(0), wasCall(0), RASIndex(0), - RASTarget(0) ++ const bool pred_taken, const unsigned _tid) ++ : seqNum(seq_num), PC(inst_PC), RASTarget(0), globalHistory(0), ++ RASIndex(0), tid(_tid), predTaken(pred_taken), usedRAS(0), ++ wasCall(0) + { } + ++ /** The sequence number for the predictor history entry. */ + InstSeqNum seqNum; + ++ /** The PC associated with the sequence number. */ + Addr PC; + - bool predTaken; ++ /** The RAS target (only valid if a return). */ ++ Addr RASTarget; + ++ /** The global history at the time this entry was created. */ + unsigned globalHistory; + - bool usedRAS; ++ /** The RAS index of the instruction (only valid if a call). */ ++ unsigned RASIndex; + - bool wasCall; ++ /** The thread id. */ ++ unsigned tid; + - unsigned RASIndex; ++ /** Whether or not it was predicted taken. */ ++ bool predTaken; + - Addr RASTarget; ++ /** Whether or not the RAS was used. */ ++ bool usedRAS; ++ ++ /** Whether or not the instruction was a call. */ ++ bool wasCall; + }; + - std::list predHist; ++ typedef std::list History; ++ ++ /** ++ * The per-thread predictor history. This is used to update the predictor ++ * as instructions are committed, or restore it to the proper state after ++ * a squash. ++ */ ++ History predHist[Impl::MaxThreads]; + ++ /** The branch predictor. */ + DefaultBP BP; + ++ /** The BTB. */ + DefaultBTB BTB; + - ReturnAddrStack RAS; ++ /** The per-thread return address stack. */ ++ ReturnAddrStack RAS[Impl::MaxThreads]; + ++ /** Stat for number of BP lookups. */ + Stats::Scalar<> lookups; ++ /** Stat for number of conditional branches predicted. */ + Stats::Scalar<> condPredicted; ++ /** Stat for number of conditional branches predicted incorrectly. */ + Stats::Scalar<> condIncorrect; ++ /** Stat for number of BTB lookups. */ + Stats::Scalar<> BTBLookups; ++ /** Stat for number of BTB hits. */ + Stats::Scalar<> BTBHits; ++ /** Stat for number of times the BTB is correct. */ + Stats::Scalar<> BTBCorrect; ++ /** Stat for number of times the RAS is used to get a target. */ + Stats::Scalar<> usedRAS; ++ /** Stat for number of times the RAS is incorrect. */ + Stats::Scalar<> RASIncorrect; +}; + - #endif // __BPRED_UNIT_HH__ ++#endif // __CPU_O3_BPRED_UNIT_HH__ diff --cc src/cpu/o3/bpred_unit_impl.hh index 8d16a0cdf,000000000..c37df606b mode 100644,000000..100644 --- a/src/cpu/o3/bpred_unit_impl.hh +++ b/src/cpu/o3/bpred_unit_impl.hh @@@ -1,276 -1,0 +1,324 @@@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + ++#include ++#include ++ +#include "base/trace.hh" +#include "base/traceflags.hh" +#include "cpu/o3/bpred_unit.hh" + ++using namespace std; ++ +template - TwobitBPredUnit::TwobitBPredUnit(Params ¶ms) - : BP(params.local_predictor_size, - params.local_ctr_bits, - params.instShiftAmt), - BTB(params.BTBEntries, - params.BTBTagSize, - params.instShiftAmt), - RAS(params.RASSize) ++TwobitBPredUnit::TwobitBPredUnit(Params *params) ++ : BP(params->localPredictorSize, ++ params->localCtrBits, ++ params->instShiftAmt), ++ BTB(params->BTBEntries, ++ params->BTBTagSize, ++ params->instShiftAmt) +{ ++ for (int i=0; i < Impl::MaxThreads; i++) ++ RAS[i].init(params->RASSize); +} + +template +void +TwobitBPredUnit::regStats() +{ + lookups + .name(name() + ".BPredUnit.lookups") + .desc("Number of BP lookups") + ; + + condPredicted + .name(name() + ".BPredUnit.condPredicted") + .desc("Number of conditional branches predicted") + ; + + condIncorrect + .name(name() + ".BPredUnit.condIncorrect") + .desc("Number of conditional branches incorrect") + ; + + BTBLookups + .name(name() + ".BPredUnit.BTBLookups") + .desc("Number of BTB lookups") + ; + + BTBHits + .name(name() + ".BPredUnit.BTBHits") + .desc("Number of BTB hits") + ; + + BTBCorrect + .name(name() + ".BPredUnit.BTBCorrect") + .desc("Number of correct BTB predictions (this stat may not " + "work properly.") + ; + + usedRAS + .name(name() + ".BPredUnit.usedRAS") - .desc("Number of times the RAS was used.") ++ .desc("Number of times the RAS was used to get a target.") + ; + + RASIncorrect + .name(name() + ".BPredUnit.RASInCorrect") + .desc("Number of incorrect RAS predictions.") + ; +} + ++template ++void ++TwobitBPredUnit::switchOut() ++{ ++ for (int i = 0; i < Impl::MaxThreads; ++i) { ++ predHist[i].clear(); ++ } ++} ++ ++template ++void ++TwobitBPredUnit::takeOverFrom() ++{ ++/* ++ for (int i = 0; i < Impl::MaxThreads; ++i) ++ RAS[i].reset(); ++ ++ BP.reset(); ++ BTB.reset(); ++*/ ++} ++ +template +bool - TwobitBPredUnit::predict(DynInstPtr &inst, Addr &PC) ++TwobitBPredUnit::predict(DynInstPtr &inst, Addr &PC, unsigned tid) +{ + // See if branch predictor predicts taken. + // If so, get its target addr either from the BTB or the RAS. + // Once that's done, speculatively update the predictor? + // Save off record of branch stuff so the RAS can be fixed + // up once it's done. + + using TheISA::MachInst; + + bool pred_taken = false; + Addr target; + + ++lookups; + + if (inst->isUncondCtrl()) { - DPRINTF(Fetch, "BranchPred: Unconditional control.\n"); ++ DPRINTF(Fetch, "BranchPred: [tid:%i] Unconditional control.\n", tid); + pred_taken = true; + } else { + ++condPredicted; + + pred_taken = BPLookup(PC); + - DPRINTF(Fetch, "BranchPred: Branch predictor predicted %i for PC %#x" - "\n", pred_taken, inst->readPC()); ++ DPRINTF(Fetch, "BranchPred: [tid:%i]: Branch predictor predicted %i " ++ "for PC %#x\n", ++ tid, pred_taken, inst->readPC()); + } + - PredictorHistory predict_record(inst->seqNum, PC, pred_taken); ++ PredictorHistory predict_record(inst->seqNum, PC, pred_taken, tid); + + // Now lookup in the BTB or RAS. + if (pred_taken) { + if (inst->isReturn()) { + ++usedRAS; + + // If it's a function return call, then look up the address + // in the RAS. - target = RAS.top(); ++ target = RAS[tid].top(); + + // Record the top entry of the RAS, and its index. + predict_record.usedRAS = true; - predict_record.RASIndex = RAS.topIdx(); ++ predict_record.RASIndex = RAS[tid].topIdx(); + predict_record.RASTarget = target; + - RAS.pop(); ++ assert(predict_record.RASIndex < 16); + - DPRINTF(Fetch, "BranchPred: Instruction %#x is a return, RAS " - "predicted target: %#x, RAS index: %i.\n", - inst->readPC(), target, predict_record.RASIndex); ++ RAS[tid].pop(); ++ ++ DPRINTF(Fetch, "BranchPred: [tid:%i]: Instruction %#x is a return, " ++ "RAS predicted target: %#x, RAS index: %i.\n", ++ tid, inst->readPC(), target, predict_record.RASIndex); + } else { + ++BTBLookups; + + if (inst->isCall()) { - RAS.push(PC+sizeof(MachInst)); ++ RAS[tid].push(PC + sizeof(MachInst)); + + // Record that it was a call so that the top RAS entry can + // be popped off if the speculation is incorrect. + predict_record.wasCall = true; + - DPRINTF(Fetch, "BranchPred: Instruction %#x was a call, " - "adding %#x to the RAS.\n", - inst->readPC(), PC+sizeof(MachInst)); ++ DPRINTF(Fetch, "BranchPred: [tid:%i] Instruction %#x was a call" ++ ", adding %#x to the RAS.\n", ++ tid, inst->readPC(), PC + sizeof(MachInst)); + } + - if (BTB.valid(PC)) { ++ if (BTB.valid(PC, tid)) { + ++BTBHits; + + //If it's anything else, use the BTB to get the target addr. - target = BTB.lookup(PC); ++ target = BTB.lookup(PC, tid); + - DPRINTF(Fetch, "BranchPred: Instruction %#x predicted target " - "is %#x.\n", inst->readPC(), target); ++ DPRINTF(Fetch, "BranchPred: [tid:%i]: Instruction %#x predicted" ++ " target is %#x.\n", ++ tid, inst->readPC(), target); + + } else { - DPRINTF(Fetch, "BranchPred: BTB doesn't have a valid entry." - "\n"); ++ DPRINTF(Fetch, "BranchPred: [tid:%i]: BTB doesn't have a " ++ "valid entry.\n",tid); + pred_taken = false; + } + + } + } + + if (pred_taken) { + // Set the PC and the instruction's predicted target. + PC = target; + inst->setPredTarg(target); + } else { + PC = PC + sizeof(MachInst); + inst->setPredTarg(PC); + } + - predHist.push_front(predict_record); ++ predHist[tid].push_front(predict_record); + - assert(!predHist.empty()); ++ DPRINTF(Fetch, "[tid:%i] predHist.size(): %i\n", tid, predHist[tid].size()); + + return pred_taken; +} + +template +void - TwobitBPredUnit::update(const InstSeqNum &done_sn) ++TwobitBPredUnit::update(const InstSeqNum &done_sn, unsigned tid) +{ - DPRINTF(Fetch, "BranchPred: Commiting branches until sequence number " - "%i.\n", done_sn); - - while (!predHist.empty() && predHist.back().seqNum <= done_sn) { - assert(!predHist.empty()); ++ DPRINTF(Fetch, "BranchPred: [tid:%i]: Commiting branches until sequence" ++ "number %lli.\n", tid, done_sn); + - // Update the branch predictor with the correct results of branches. - BP.update(predHist.back().PC, predHist.back().predTaken); ++ while (!predHist[tid].empty() && ++ predHist[tid].back().seqNum <= done_sn) { ++ // Update the branch predictor with the correct results. ++ BP.update(predHist[tid].back().PC, ++ predHist[tid].back().predTaken); + - predHist.pop_back(); ++ predHist[tid].pop_back(); + } +} + +template +void - TwobitBPredUnit::squash(const InstSeqNum &squashed_sn) ++TwobitBPredUnit::squash(const InstSeqNum &squashed_sn, unsigned tid) +{ - while (!predHist.empty() && predHist.front().seqNum > squashed_sn) { - if (predHist.front().usedRAS) { - DPRINTF(Fetch, "BranchPred: Restoring top of RAS to: %i, " - "target: %#x.\n", - predHist.front().RASIndex, - predHist.front().RASTarget); ++ History &pred_hist = predHist[tid]; ++ ++ while (!pred_hist.empty() && ++ pred_hist.front().seqNum > squashed_sn) { ++ if (pred_hist.front().usedRAS) { ++ DPRINTF(Fetch, "BranchPred: [tid:%i]: Restoring top of RAS to: %i," ++ " target: %#x.\n", ++ tid, ++ pred_hist.front().RASIndex, ++ pred_hist.front().RASTarget); ++ ++ RAS[tid].restore(pred_hist.front().RASIndex, ++ pred_hist.front().RASTarget); + - RAS.restore(predHist.front().RASIndex, - predHist.front().RASTarget); - } else if (predHist.front().wasCall) { - DPRINTF(Fetch, "BranchPred: Removing speculative entry added " - "to the RAS.\n"); ++ } else if (pred_hist.front().wasCall) { ++ DPRINTF(Fetch, "BranchPred: [tid:%i]: Removing speculative entry added " ++ "to the RAS.\n",tid); + - RAS.pop(); ++ RAS[tid].pop(); + } + - predHist.pop_front(); ++ pred_hist.pop_front(); + } ++ +} + +template +void +TwobitBPredUnit::squash(const InstSeqNum &squashed_sn, + const Addr &corr_target, - const bool actually_taken) ++ const bool actually_taken, ++ unsigned tid) +{ + // Now that we know that a branch was mispredicted, we need to undo + // all the branches that have been seen up until this branch and + // fix up everything. + ++ History &pred_hist = predHist[tid]; ++ + ++condIncorrect; + - DPRINTF(Fetch, "BranchPred: Squashing from sequence number %i, " ++ DPRINTF(Fetch, "BranchPred: [tid:%i]: Squashing from sequence number %i, " + "setting target to %#x.\n", - squashed_sn, corr_target); - - while (!predHist.empty() && predHist.front().seqNum > squashed_sn) { ++ tid, squashed_sn, corr_target); + - if (predHist.front().usedRAS) { - DPRINTF(Fetch, "BranchPred: Restoring top of RAS to: %i, " ++ while (!pred_hist.empty() && ++ pred_hist.front().seqNum > squashed_sn) { ++ if (pred_hist.front().usedRAS) { ++ DPRINTF(Fetch, "BranchPred: [tid:%i]: Restoring top of RAS to: %i, " + "target: %#x.\n", - predHist.front().RASIndex, - predHist.front().RASTarget); ++ tid, ++ pred_hist.front().RASIndex, ++ pred_hist.front().RASTarget); + - RAS.restore(predHist.front().RASIndex, - predHist.front().RASTarget); - } else if (predHist.front().wasCall) { - DPRINTF(Fetch, "BranchPred: Removing speculative entry added " - "to the RAS.\n"); ++ RAS[tid].restore(pred_hist.front().RASIndex, ++ pred_hist.front().RASTarget); ++ } else if (pred_hist.front().wasCall) { ++ DPRINTF(Fetch, "BranchPred: [tid:%i]: Removing speculative entry" ++ " added to the RAS.\n", tid); + - RAS.pop(); ++ RAS[tid].pop(); + } + - predHist.pop_front(); ++ pred_hist.pop_front(); + } + - predHist.front().predTaken = actually_taken; ++ // If there's a squash due to a syscall, there may not be an entry ++ // corresponding to the squash. In that case, don't bother trying to ++ // fix up the entry. ++ if (!pred_hist.empty()) { ++ pred_hist.front().predTaken = actually_taken; + - if (predHist.front().usedRAS) { - ++RASIncorrect; - } ++ if (pred_hist.front().usedRAS) { ++ ++RASIncorrect; ++ } + - BP.update(predHist.front().PC, actually_taken); ++ BP.update(pred_hist.front().PC, actually_taken); + - BTB.update(predHist.front().PC, corr_target); ++ BTB.update(pred_hist.front().PC, corr_target, tid); ++ pred_hist.pop_front(); ++ } +} diff --cc src/cpu/o3/btb.cc index 2d39c3856,000000000..e5f69043a mode 100644,000000..100644 --- a/src/cpu/o3/btb.cc +++ b/src/cpu/o3/btb.cc @@@ -1,120 -1,0 +1,134 @@@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "base/intmath.hh" +#include "base/trace.hh" +#include "cpu/o3/btb.hh" + +using namespace TheISA; + +DefaultBTB::DefaultBTB(unsigned _numEntries, + unsigned _tagBits, + unsigned _instShiftAmt) + : numEntries(_numEntries), + tagBits(_tagBits), + instShiftAmt(_instShiftAmt) +{ - // @todo Check to make sure num_entries is valid (a power of 2) - + DPRINTF(Fetch, "BTB: Creating BTB object.\n"); + - btb = new BTBEntry[numEntries]; ++ if (!isPowerOf2(numEntries)) { ++ fatal("BTB entries is not a power of 2!"); ++ } ++ ++ btb.resize(numEntries); + - for (int i = 0; i < numEntries; ++i) - { ++ for (int i = 0; i < numEntries; ++i) { + btb[i].valid = false; + } + + idxMask = numEntries - 1; + + tagMask = (1 << tagBits) - 1; + + tagShiftAmt = instShiftAmt + floorLog2(numEntries); +} + ++void ++DefaultBTB::reset() ++{ ++ for (int i = 0; i < numEntries; ++i) { ++ btb[i].valid = false; ++ } ++} ++ +inline +unsigned +DefaultBTB::getIndex(const Addr &inst_PC) +{ + // Need to shift PC over by the word offset. + return (inst_PC >> instShiftAmt) & idxMask; +} + +inline +Addr +DefaultBTB::getTag(const Addr &inst_PC) +{ + return (inst_PC >> tagShiftAmt) & tagMask; +} + +bool - DefaultBTB::valid(const Addr &inst_PC) ++DefaultBTB::valid(const Addr &inst_PC, unsigned tid) +{ + unsigned btb_idx = getIndex(inst_PC); + + Addr inst_tag = getTag(inst_PC); + + assert(btb_idx < numEntries); + - if (btb[btb_idx].valid && inst_tag == btb[btb_idx].tag) { ++ if (btb[btb_idx].valid ++ && inst_tag == btb[btb_idx].tag ++ && btb[btb_idx].tid == tid) { + return true; + } else { + return false; + } +} + +// @todo Create some sort of return struct that has both whether or not the +// address is valid, and also the address. For now will just use addr = 0 to +// represent invalid entry. +Addr - DefaultBTB::lookup(const Addr &inst_PC) ++DefaultBTB::lookup(const Addr &inst_PC, unsigned tid) +{ + unsigned btb_idx = getIndex(inst_PC); + + Addr inst_tag = getTag(inst_PC); + + assert(btb_idx < numEntries); + - if (btb[btb_idx].valid && inst_tag == btb[btb_idx].tag) { ++ if (btb[btb_idx].valid ++ && inst_tag == btb[btb_idx].tag ++ && btb[btb_idx].tid == tid) { + return btb[btb_idx].target; + } else { + return 0; + } +} + +void - DefaultBTB::update(const Addr &inst_PC, const Addr &target) ++DefaultBTB::update(const Addr &inst_PC, const Addr &target, unsigned tid) +{ + unsigned btb_idx = getIndex(inst_PC); + + assert(btb_idx < numEntries); + ++ btb[btb_idx].tid = tid; + btb[btb_idx].valid = true; + btb[btb_idx].target = target; + btb[btb_idx].tag = getTag(inst_PC); +} diff --cc src/cpu/o3/btb.hh index 77bdc32ea,000000000..b9ff42573 mode 100644,000000..100644 --- a/src/cpu/o3/btb.hh +++ b/src/cpu/o3/btb.hh @@@ -1,80 -1,0 +1,127 @@@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + - #ifndef __CPU_O3_CPU_BTB_HH__ - #define __CPU_O3_CPU_BTB_HH__ ++#ifndef __CPU_O3_BTB_HH__ ++#define __CPU_O3_BTB_HH__ + +// For Addr type. +#include "arch/isa_traits.hh" + +class DefaultBTB +{ + private: + struct BTBEntry + { + BTBEntry() + : tag(0), target(0), valid(false) + { + } + ++ /** The entry's tag. */ + Addr tag; ++ ++ /** The entry's target. */ + Addr target; ++ ++ /** The entry's thread id. */ ++ unsigned tid; ++ ++ /** Whether or not the entry is valid. */ + bool valid; + }; + + public: ++ /** Creates a BTB with the given number of entries, number of bits per ++ * tag, and instruction offset amount. ++ * @param numEntries Number of entries for the BTB. ++ * @param tagBits Number of bits for each tag in the BTB. ++ * @param instShiftAmt Offset amount for instructions to ignore alignment. ++ */ + DefaultBTB(unsigned numEntries, unsigned tagBits, + unsigned instShiftAmt); + - Addr lookup(const Addr &inst_PC); - - bool valid(const Addr &inst_PC); - - void update(const Addr &inst_PC, const Addr &target_PC); ++ void reset(); ++ ++ /** Looks up an address in the BTB. Must call valid() first on the address. ++ * @param inst_PC The address of the branch to look up. ++ * @param tid The thread id. ++ * @return Returns the target of the branch. ++ */ ++ Addr lookup(const Addr &inst_PC, unsigned tid); ++ ++ /** Checks if a branch is in the BTB. ++ * @param inst_PC The address of the branch to look up. ++ * @param tid The thread id. ++ * @return Whether or not the branch exists in the BTB. ++ */ ++ bool valid(const Addr &inst_PC, unsigned tid); ++ ++ /** Updates the BTB with the target of a branch. ++ * @param inst_PC The address of the branch being updated. ++ * @param target_PC The target address of the branch. ++ * @param tid The thread id. ++ */ ++ void update(const Addr &inst_PC, const Addr &target_PC, ++ unsigned tid); + + private: ++ /** Returns the index into the BTB, based on the branch's PC. ++ * @param inst_PC The branch to look up. ++ * @return Returns the index into the BTB. ++ */ + inline unsigned getIndex(const Addr &inst_PC); + ++ /** Returns the tag bits of a given address. ++ * @param inst_PC The branch's address. ++ * @return Returns the tag bits. ++ */ + inline Addr getTag(const Addr &inst_PC); + - BTBEntry *btb; ++ /** The actual BTB. */ ++ std::vector btb; + ++ /** The number of entries in the BTB. */ + unsigned numEntries; + ++ /** The index mask. */ + unsigned idxMask; + ++ /** The number of tag bits per entry. */ + unsigned tagBits; + ++ /** The tag mask. */ + unsigned tagMask; + ++ /** Number of bits to shift PC when calculating index. */ + unsigned instShiftAmt; + ++ /** Number of bits to shift PC when calculating tag. */ + unsigned tagShiftAmt; +}; + - #endif // __CPU_O3_CPU_BTB_HH__ ++#endif // __CPU_O3_BTB_HH__ diff --cc src/cpu/o3/comm.hh index c74c77ddf,000000000..c36c58d3d mode 100644,000000..100644 --- a/src/cpu/o3/comm.hh +++ b/src/cpu/o3/comm.hh @@@ -1,163 -1,0 +1,197 @@@ +/* - * Copyright (c) 2004-2005 The Regents of The University of Michigan ++ * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + - #ifndef __CPU_O3_CPU_COMM_HH__ - #define __CPU_O3_CPU_COMM_HH__ ++#ifndef __CPU_O3_COMM_HH__ ++#define __CPU_O3_COMM_HH__ + +#include + ++#include "arch/faults.hh" +#include "arch/isa_traits.hh" +#include "cpu/inst_seq.hh" +#include "sim/host.hh" + - // Find better place to put this typedef. - // The impl might be the best place for this. ++// Typedef for physical register index type. Although the Impl would be the ++// most likely location for this, there are a few classes that need this ++// typedef yet are not templated on the Impl. For now it will be defined here. +typedef short int PhysRegIndex; + +template - struct SimpleFetchSimpleDecode { ++struct DefaultFetchDefaultDecode { + typedef typename Impl::DynInstPtr DynInstPtr; + + int size; + + DynInstPtr insts[Impl::MaxWidth]; ++ Fault fetchFault; ++ InstSeqNum fetchFaultSN; ++ bool clearFetchFault; +}; + +template - struct SimpleDecodeSimpleRename { ++struct DefaultDecodeDefaultRename { + typedef typename Impl::DynInstPtr DynInstPtr; + + int size; + + DynInstPtr insts[Impl::MaxWidth]; +}; + +template - struct SimpleRenameSimpleIEW { ++struct DefaultRenameDefaultIEW { + typedef typename Impl::DynInstPtr DynInstPtr; + + int size; + + DynInstPtr insts[Impl::MaxWidth]; +}; + +template - struct SimpleIEWSimpleCommit { ++struct DefaultIEWDefaultCommit { + typedef typename Impl::DynInstPtr DynInstPtr; + + int size; + + DynInstPtr insts[Impl::MaxWidth]; + - bool squash; - bool branchMispredict; - bool branchTaken; - uint64_t mispredPC; - uint64_t nextPC; - InstSeqNum squashedSeqNum; ++ bool squash[Impl::MaxThreads]; ++ bool branchMispredict[Impl::MaxThreads]; ++ bool branchTaken[Impl::MaxThreads]; ++ uint64_t mispredPC[Impl::MaxThreads]; ++ uint64_t nextPC[Impl::MaxThreads]; ++ InstSeqNum squashedSeqNum[Impl::MaxThreads]; ++ ++ bool includeSquashInst[Impl::MaxThreads]; +}; + +template +struct IssueStruct { + typedef typename Impl::DynInstPtr DynInstPtr; + + int size; + + DynInstPtr insts[Impl::MaxWidth]; +}; + ++template +struct TimeBufStruct { + struct decodeComm { + bool squash; - bool stall; + bool predIncorrect; + uint64_t branchAddr; + + InstSeqNum doneSeqNum; + - // Might want to package this kind of branch stuff into a single ++ // @todo: Might want to package this kind of branch stuff into a single + // struct as it is used pretty frequently. + bool branchMispredict; + bool branchTaken; + uint64_t mispredPC; + uint64_t nextPC; ++ ++ unsigned branchCount; + }; + - decodeComm decodeInfo; ++ decodeComm decodeInfo[Impl::MaxThreads]; + + // Rename can't actually tell anything to squash or send a new PC back + // because it doesn't do anything along those lines. But maybe leave + // these fields in here to keep the stages mostly orthagonal. + struct renameComm { + bool squash; - bool stall; + + uint64_t nextPC; + }; + - renameComm renameInfo; ++ renameComm renameInfo[Impl::MaxThreads]; + + struct iewComm { - bool stall; - + // Also eventually include skid buffer space. ++ bool usedIQ; + unsigned freeIQEntries; ++ bool usedLSQ; ++ unsigned freeLSQEntries; ++ ++ unsigned iqCount; ++ unsigned ldstqCount; ++ ++ unsigned dispatched; ++ unsigned dispatchedToLSQ; + }; + - iewComm iewInfo; ++ iewComm iewInfo[Impl::MaxThreads]; + + struct commitComm { - bool squash; - bool stall; ++ bool usedROB; + unsigned freeROBEntries; ++ bool emptyROB; ++ ++ bool squash; ++ bool robSquashing; + + bool branchMispredict; + bool branchTaken; + uint64_t mispredPC; + uint64_t nextPC; + - bool robSquashing; - + // Represents the instruction that has either been retired or + // squashed. Similar to having a single bus that broadcasts the + // retired or squashed sequence number. + InstSeqNum doneSeqNum; + - // Extra bit of information so that the LDSTQ only updates when it - // needs to. - bool commitIsLoad; ++ //Just in case we want to do a commit/squash on a cycle ++ //(necessary for multiple ROBs?) ++ bool commitInsts; ++ InstSeqNum squashSeqNum; + + // Communication specifically to the IQ to tell the IQ that it can + // schedule a non-speculative instruction. + InstSeqNum nonSpecSeqNum; ++ ++ // Hack for now to send back an uncached access to the IEW stage. ++ typedef typename Impl::DynInstPtr DynInstPtr; ++ bool uncached; ++ DynInstPtr uncachedLoad; ++ ++ bool interruptPending; ++ bool clearInterrupt; + }; + - commitComm commitInfo; ++ commitComm commitInfo[Impl::MaxThreads]; ++ ++ bool decodeBlock[Impl::MaxThreads]; ++ bool decodeUnblock[Impl::MaxThreads]; ++ bool renameBlock[Impl::MaxThreads]; ++ bool renameUnblock[Impl::MaxThreads]; ++ bool iewBlock[Impl::MaxThreads]; ++ bool iewUnblock[Impl::MaxThreads]; ++ bool commitBlock[Impl::MaxThreads]; ++ bool commitUnblock[Impl::MaxThreads]; +}; + - #endif //__CPU_O3_CPU_COMM_HH__ ++#endif //__CPU_O3_COMM_HH__ diff --cc src/cpu/o3/commit.cc index cf33d7f8b,000000000..fe5e9c1de mode 100644,000000..100644 --- a/src/cpu/o3/commit.cc +++ b/src/cpu/o3/commit.cc @@@ -1,33 -1,0 +1,33 @@@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cpu/o3/alpha_dyn_inst.hh" +#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/commit_impl.hh" + - template class SimpleCommit; ++template class DefaultCommit; diff --cc src/cpu/o3/commit.hh index 580c1a316,000000000..66abf8dc6 mode 100644,000000..100644 --- a/src/cpu/o3/commit.hh +++ b/src/cpu/o3/commit.hh @@@ -1,180 -1,0 +1,424 @@@ +/* - * Copyright (c) 2004-2005 The Regents of The University of Michigan ++ * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + - // Todo: Maybe have a special method for handling interrupts/traps. - // - // Traps: Have IEW send a signal to commit saying that there's a trap to - // be handled. Have commit send the PC back to the fetch stage, along - // with the current commit PC. Fetch will directly access the IPR and save - // off all the proper stuff. Commit can send out a squash, or something - // close to it. - // Do the same for hwrei(). However, requires that commit be specifically - // built to support that kind of stuff. Probably not horrible to have - // commit support having the CPU tell it to squash the other stages and - // restart at a given address. The IPR register does become an issue. - // Probably not a big deal if the IPR stuff isn't cycle accurate. Can just - // have the original function handle writing to the IPR register. - - #ifndef __CPU_O3_CPU_SIMPLE_COMMIT_HH__ - #define __CPU_O3_CPU_SIMPLE_COMMIT_HH__ ++#ifndef __CPU_O3_COMMIT_HH__ ++#define __CPU_O3_COMMIT_HH__ + ++#include "arch/faults.hh" +#include "base/statistics.hh" +#include "base/timebuf.hh" ++#include "cpu/exetrace.hh" ++#include "cpu/inst_seq.hh" +#include "mem/memory_interface.hh" + ++template ++class O3ThreadState; ++ ++/** ++ * DefaultCommit handles single threaded and SMT commit. Its width is ++ * specified by the parameters; each cycle it tries to commit that ++ * many instructions. The SMT policy decides which thread it tries to ++ * commit instructions from. Non- speculative instructions must reach ++ * the head of the ROB before they are ready to execute; once they ++ * reach the head, commit will broadcast the instruction's sequence ++ * number to the previous stages so that they can issue/ execute the ++ * instruction. Only one non-speculative instruction is handled per ++ * cycle. Commit is responsible for handling all back-end initiated ++ * redirects. It receives the redirect, and then broadcasts it to all ++ * stages, indicating the sequence number they should squash until, ++ * and any necessary branch misprediction information as well. It ++ * priortizes redirects by instruction's age, only broadcasting a ++ * redirect if it corresponds to an instruction that should currently ++ * be in the ROB. This is done by tracking the sequence number of the ++ * youngest instruction in the ROB, which gets updated to any ++ * squashing instruction's sequence number, and only broadcasting a ++ * redirect if it corresponds to an older instruction. Commit also ++ * supports multiple cycle squashing, to model a ROB that can only ++ * remove a certain number of instructions per cycle. ++ */ +template - class SimpleCommit ++class DefaultCommit +{ + public: + // Typedefs from the Impl. + typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::DynInstPtr DynInstPtr; + typedef typename Impl::Params Params; + typedef typename Impl::CPUPol CPUPol; + ++ typedef typename CPUPol::RenameMap RenameMap; + typedef typename CPUPol::ROB ROB; + + typedef typename CPUPol::TimeStruct TimeStruct; ++ typedef typename CPUPol::FetchStruct FetchStruct; + typedef typename CPUPol::IEWStruct IEWStruct; + typedef typename CPUPol::RenameStruct RenameStruct; + - public: - // I don't believe commit can block, so it will only have two - // statuses for now. - // Actually if there's a cache access that needs to block (ie - // uncachable load or just a mem access in commit) then the stage - // may have to wait. - enum Status { ++ typedef typename CPUPol::Fetch Fetch; ++ typedef typename CPUPol::IEW IEW; ++ ++ typedef O3ThreadState Thread; ++ ++ class TrapEvent : public Event { ++ private: ++ DefaultCommit *commit; ++ unsigned tid; ++ ++ public: ++ TrapEvent(DefaultCommit *_commit, unsigned _tid); ++ ++ void process(); ++ const char *description(); ++ }; ++ ++ /** Overall commit status. Used to determine if the CPU can deschedule ++ * itself due to a lack of activity. ++ */ ++ enum CommitStatus{ ++ Active, ++ Inactive ++ }; ++ ++ /** Individual thread status. */ ++ enum ThreadStatus { + Running, + Idle, + ROBSquashing, - DcacheMissStall, - DcacheMissComplete ++ TrapPending, ++ FetchTrapPending ++ }; ++ ++ /** Commit policy for SMT mode. */ ++ enum CommitPolicy { ++ Aggressive, ++ RoundRobin, ++ OldestReady + }; + + private: - Status _status; ++ /** Overall commit status. */ ++ CommitStatus _status; ++ /** Next commit status, to be set at the end of the cycle. */ ++ CommitStatus _nextStatus; ++ /** Per-thread status. */ ++ ThreadStatus commitStatus[Impl::MaxThreads]; ++ /** Commit policy used in SMT mode. */ ++ CommitPolicy commitPolicy; + + public: - SimpleCommit(Params ¶ms); ++ /** Construct a DefaultCommit with the given parameters. */ ++ DefaultCommit(Params *params); ++ ++ /** Returns the name of the DefaultCommit. */ ++ std::string name() const; + ++ /** Registers statistics. */ + void regStats(); + ++ /** Sets the CPU pointer. */ + void setCPU(FullCPU *cpu_ptr); + ++ /** Sets the list of threads. */ ++ void setThreads(std::vector &threads); ++ ++ /** Sets the main time buffer pointer, used for backwards communication. */ + void setTimeBuffer(TimeBuffer *tb_ptr); + ++ void setFetchQueue(TimeBuffer *fq_ptr); ++ ++ /** Sets the pointer to the queue coming from rename. */ + void setRenameQueue(TimeBuffer *rq_ptr); + ++ /** Sets the pointer to the queue coming from IEW. */ + void setIEWQueue(TimeBuffer *iq_ptr); + ++ void setFetchStage(Fetch *fetch_stage); ++ ++ Fetch *fetchStage; ++ ++ /** Sets the poitner to the IEW stage. */ ++ void setIEWStage(IEW *iew_stage); ++ ++ /** The pointer to the IEW stage. Used solely to ensure that ++ * various events (traps, interrupts, syscalls) do not occur until ++ * all stores have written back. ++ */ ++ IEW *iewStage; ++ ++ /** Sets pointer to list of active threads. */ ++ void setActiveThreads(std::list *at_ptr); ++ ++ /** Sets pointer to the commited state rename map. */ ++ void setRenameMap(RenameMap rm_ptr[Impl::MaxThreads]); ++ ++ /** Sets pointer to the ROB. */ + void setROB(ROB *rob_ptr); + ++ /** Initializes stage by sending back the number of free entries. */ ++ void initStage(); ++ ++ void switchOut(); ++ ++ void doSwitchOut(); ++ ++ void takeOverFrom(); ++ ++ /** Ticks the commit stage, which tries to commit instructions. */ + void tick(); + ++ /** Handles any squashes that are sent from IEW, and adds instructions ++ * to the ROB and tries to commit instructions. ++ */ + void commit(); + ++ /** Returns the number of free ROB entries for a specific thread. */ ++ unsigned numROBFreeEntries(unsigned tid); ++ ++ void generateXCEvent(unsigned tid); ++ + private: ++ /** Updates the overall status of commit with the nextStatus, and ++ * tell the CPU if commit is active/inactive. */ ++ void updateStatus(); ++ ++ /** Sets the next status based on threads' statuses, which becomes the ++ * current status at the end of the cycle. ++ */ ++ void setNextStatus(); ++ ++ /** Checks if the ROB is completed with squashing. This is for the case ++ * where the ROB can take multiple cycles to complete squashing. ++ */ ++ bool robDoneSquashing(); ++ ++ /** Returns if any of the threads have the number of ROB entries changed ++ * on this cycle. Used to determine if the number of free ROB entries needs ++ * to be sent back to previous stages. ++ */ ++ bool changedROBEntries(); + ++ void squashAll(unsigned tid); ++ ++ void squashFromTrap(unsigned tid); ++ ++ void squashFromXC(unsigned tid); ++ ++ /** Commits as many instructions as possible. */ + void commitInsts(); + ++ /** Tries to commit the head ROB instruction passed in. ++ * @param head_inst The instruction to be committed. ++ */ + bool commitHead(DynInstPtr &head_inst, unsigned inst_num); + ++ void generateTrapEvent(unsigned tid); ++ ++ /** Gets instructions from rename and inserts them into the ROB. */ + void getInsts(); + ++ /** Marks completed instructions using information sent from IEW. */ + void markCompletedInsts(); + ++ /** Gets the thread to commit, based on the SMT policy. */ ++ int getCommittingThread(); ++ ++ /** Returns the thread ID to use based on a round robin policy. */ ++ int roundRobin(); ++ ++ /** Returns the thread ID to use based on an oldest instruction policy. */ ++ int oldestReady(); ++ + public: - uint64_t readCommitPC(); ++ /** Returns the PC of the head instruction of the ROB. ++ * @todo: Probably remove this function as it returns only thread 0. ++ */ ++ uint64_t readPC() { return PC[0]; } ++ ++ uint64_t readPC(unsigned tid) { return PC[tid]; } ++ ++ void setPC(uint64_t val, unsigned tid) { PC[tid] = val; } + - void setSquashing() { _status = ROBSquashing; } ++ uint64_t readNextPC(unsigned tid) { return nextPC[tid]; } ++ ++ void setNextPC(uint64_t val, unsigned tid) { nextPC[tid] = val; } + + private: + /** Time buffer interface. */ + TimeBuffer *timeBuffer; + + /** Wire to write information heading to previous stages. */ + typename TimeBuffer::wire toIEW; + + /** Wire to read information from IEW (for ROB). */ + typename TimeBuffer::wire robInfoFromIEW; + ++ TimeBuffer *fetchQueue; ++ ++ typename TimeBuffer::wire fromFetch; ++ + /** IEW instruction queue interface. */ + TimeBuffer *iewQueue; + + /** Wire to read information from IEW queue. */ + typename TimeBuffer::wire fromIEW; + + /** Rename instruction queue interface, for ROB. */ + TimeBuffer *renameQueue; + + /** Wire to read information from rename queue. */ + typename TimeBuffer::wire fromRename; + ++ public: + /** ROB interface. */ + ROB *rob; + ++ private: + /** Pointer to FullCPU. */ + FullCPU *cpu; + + /** Memory interface. Used for d-cache accesses. */ + MemInterface *dcacheInterface; + - private: ++ std::vector thread; ++ ++ Fault fetchFault; ++ ++ int fetchTrapWait; ++ ++ /** Records that commit has written to the time buffer this cycle. Used for ++ * the CPU to determine if it can deschedule itself if there is no activity. ++ */ ++ bool wroteToTimeBuffer; ++ ++ /** Records if the number of ROB entries has changed this cycle. If it has, ++ * then the number of free entries must be re-broadcast. ++ */ ++ bool changedROBNumEntries[Impl::MaxThreads]; ++ ++ /** A counter of how many threads are currently squashing. */ ++ int squashCounter; ++ ++ /** Records if a thread has to squash this cycle due to a trap. */ ++ bool trapSquash[Impl::MaxThreads]; ++ ++ /** Records if a thread has to squash this cycle due to an XC write. */ ++ bool xcSquash[Impl::MaxThreads]; ++ ++ /** Priority List used for Commit Policy */ ++ std::list priority_list; ++ + /** IEW to Commit delay, in ticks. */ + unsigned iewToCommitDelay; + ++ /** Commit to IEW delay, in ticks. */ ++ unsigned commitToIEWDelay; ++ + /** Rename to ROB delay, in ticks. */ + unsigned renameToROBDelay; + ++ unsigned fetchToCommitDelay; ++ + /** Rename width, in instructions. Used so ROB knows how many + * instructions to get from the rename instruction queue. + */ + unsigned renameWidth; + + /** IEW width, in instructions. Used so ROB knows how many + * instructions to get from the IEW instruction queue. + */ + unsigned iewWidth; + + /** Commit width, in instructions. */ + unsigned commitWidth; + ++ /** Number of Reorder Buffers */ ++ unsigned numRobs; ++ ++ /** Number of Active Threads */ ++ unsigned numThreads; ++ ++ bool switchPending; ++ bool switchedOut; ++ ++ Tick trapLatency; ++ ++ Tick fetchTrapLatency; ++ ++ Tick fetchFaultTick; ++ ++ Addr PC[Impl::MaxThreads]; ++ ++ Addr nextPC[Impl::MaxThreads]; ++ ++ /** The sequence number of the youngest valid instruction in the ROB. */ ++ InstSeqNum youngestSeqNum[Impl::MaxThreads]; ++ ++ /** Pointer to the list of active threads. */ ++ std::list *activeThreads; ++ ++ /** Rename map interface. */ ++ RenameMap *renameMap[Impl::MaxThreads]; ++ ++ void updateComInstStats(DynInstPtr &inst); ++ ++ /** Stat for the total number of committed instructions. */ + Stats::Scalar<> commitCommittedInsts; ++ /** Stat for the total number of squashed instructions discarded by commit. ++ */ + Stats::Scalar<> commitSquashedInsts; ++ /** Stat for the total number of times commit is told to squash. ++ * @todo: Actually increment this stat. ++ */ + Stats::Scalar<> commitSquashEvents; ++ /** Stat for the total number of times commit has had to stall due to a non- ++ * speculative instruction reaching the head of the ROB. ++ */ + Stats::Scalar<> commitNonSpecStalls; - Stats::Scalar<> commitCommittedBranches; - Stats::Scalar<> commitCommittedLoads; - Stats::Scalar<> commitCommittedMemRefs; ++ /** Stat for the total number of branch mispredicts that caused a squash. */ + Stats::Scalar<> branchMispredicts; - - Stats::Distribution<> n_committed_dist; ++ /** Distribution of the number of committed instructions each cycle. */ ++ Stats::Distribution<> numCommittedDist; ++ ++ /** Total number of instructions committed. */ ++ Stats::Vector<> statComInst; ++ /** Total number of software prefetches committed. */ ++ Stats::Vector<> statComSwp; ++ /** Stat for the total number of committed memory references. */ ++ Stats::Vector<> statComRefs; ++ /** Stat for the total number of committed loads. */ ++ Stats::Vector<> statComLoads; ++ /** Total number of committed memory barriers. */ ++ Stats::Vector<> statComMembars; ++ /** Total number of committed branches. */ ++ Stats::Vector<> statComBranches; ++ ++ Stats::Scalar<> commitEligibleSamples; ++ Stats::Vector<> commitEligible; +}; + - #endif // __CPU_O3_CPU_SIMPLE_COMMIT_HH__ ++#endif // __CPU_O3_COMMIT_HH__ diff --cc src/cpu/o3/commit_impl.hh index e289bc0c0,000000000..346a8bc1c mode 100644,000000..100644 --- a/src/cpu/o3/commit_impl.hh +++ b/src/cpu/o3/commit_impl.hh @@@ -1,502 -1,0 +1,1307 @@@ +/* - * Copyright (c) 2004-2005 The Regents of The University of Michigan ++ * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + ++#include ++#include ++ ++#include "base/loader/symtab.hh" +#include "base/timebuf.hh" - #include "cpu/o3/commit.hh" ++#include "cpu/checker/cpu.hh" +#include "cpu/exetrace.hh" ++#include "cpu/o3/commit.hh" ++#include "cpu/o3/thread_state.hh" ++ ++using namespace std; + +template - SimpleCommit::SimpleCommit(Params ¶ms) - : dcacheInterface(params.dcacheInterface), - iewToCommitDelay(params.iewToCommitDelay), - renameToROBDelay(params.renameToROBDelay), - renameWidth(params.renameWidth), - iewWidth(params.executeWidth), - commitWidth(params.commitWidth) ++DefaultCommit::TrapEvent::TrapEvent(DefaultCommit *_commit, ++ unsigned _tid) ++ : Event(&mainEventQueue, CPU_Tick_Pri), commit(_commit), tid(_tid) +{ - _status = Idle; ++ this->setFlags(Event::AutoDelete); +} + +template +void - SimpleCommit::regStats() ++DefaultCommit::TrapEvent::process() ++{ ++ // This will get reset by commit if it was switched out at the ++ // time of this event processing. ++ commit->trapSquash[tid] = true; ++} ++ ++template ++const char * ++DefaultCommit::TrapEvent::description() ++{ ++ return "Trap event"; ++} ++ ++template ++DefaultCommit::DefaultCommit(Params *params) ++ : dcacheInterface(params->dcacheInterface), ++ squashCounter(0), ++ iewToCommitDelay(params->iewToCommitDelay), ++ commitToIEWDelay(params->commitToIEWDelay), ++ renameToROBDelay(params->renameToROBDelay), ++ fetchToCommitDelay(params->commitToFetchDelay), ++ renameWidth(params->renameWidth), ++ iewWidth(params->executeWidth), ++ commitWidth(params->commitWidth), ++ numThreads(params->numberOfThreads), ++ switchedOut(false), ++ trapLatency(params->trapLatency), ++ fetchTrapLatency(params->fetchTrapLatency) +{ ++ _status = Active; ++ _nextStatus = Inactive; ++ string policy = params->smtCommitPolicy; ++ ++ //Convert string to lowercase ++ std::transform(policy.begin(), policy.end(), policy.begin(), ++ (int(*)(int)) tolower); ++ ++ //Assign commit policy ++ if (policy == "aggressive"){ ++ commitPolicy = Aggressive; ++ ++ DPRINTF(Commit,"Commit Policy set to Aggressive."); ++ } else if (policy == "roundrobin"){ ++ commitPolicy = RoundRobin; ++ ++ //Set-Up Priority List ++ for (int tid=0; tid < numThreads; tid++) { ++ priority_list.push_back(tid); ++ } ++ ++ DPRINTF(Commit,"Commit Policy set to Round Robin."); ++ } else if (policy == "oldestready"){ ++ commitPolicy = OldestReady; ++ ++ DPRINTF(Commit,"Commit Policy set to Oldest Ready."); ++ } else { ++ assert(0 && "Invalid SMT Commit Policy. Options Are: {Aggressive," ++ "RoundRobin,OldestReady}"); ++ } ++ ++ for (int i=0; i < numThreads; i++) { ++ commitStatus[i] = Idle; ++ changedROBNumEntries[i] = false; ++ trapSquash[i] = false; ++ xcSquash[i] = false; ++ } ++ ++ fetchFaultTick = 0; ++ fetchTrapWait = 0; ++} ++ ++template ++std::string ++DefaultCommit::name() const ++{ ++ return cpu->name() + ".commit"; ++} ++ ++template ++void ++DefaultCommit::regStats() ++{ ++ using namespace Stats; + commitCommittedInsts + .name(name() + ".commitCommittedInsts") + .desc("The number of committed instructions") + .prereq(commitCommittedInsts); + commitSquashedInsts + .name(name() + ".commitSquashedInsts") + .desc("The number of squashed insts skipped by commit") + .prereq(commitSquashedInsts); + commitSquashEvents + .name(name() + ".commitSquashEvents") + .desc("The number of times commit is told to squash") + .prereq(commitSquashEvents); + commitNonSpecStalls + .name(name() + ".commitNonSpecStalls") + .desc("The number of times commit has been forced to stall to " + "communicate backwards") + .prereq(commitNonSpecStalls); - commitCommittedBranches - .name(name() + ".commitCommittedBranches") - .desc("The number of committed branches") - .prereq(commitCommittedBranches); - commitCommittedLoads - .name(name() + ".commitCommittedLoads") - .desc("The number of committed loads") - .prereq(commitCommittedLoads); - commitCommittedMemRefs - .name(name() + ".commitCommittedMemRefs") - .desc("The number of committed memory references") - .prereq(commitCommittedMemRefs); + branchMispredicts + .name(name() + ".branchMispredicts") + .desc("The number of times a branch was mispredicted") + .prereq(branchMispredicts); - n_committed_dist ++ numCommittedDist + .init(0,commitWidth,1) + .name(name() + ".COM:committed_per_cycle") + .desc("Number of insts commited each cycle") + .flags(Stats::pdf) + ; ++ ++ statComInst ++ .init(cpu->number_of_threads) ++ .name(name() + ".COM:count") ++ .desc("Number of instructions committed") ++ .flags(total) ++ ; ++ ++ statComSwp ++ .init(cpu->number_of_threads) ++ .name(name() + ".COM:swp_count") ++ .desc("Number of s/w prefetches committed") ++ .flags(total) ++ ; ++ ++ statComRefs ++ .init(cpu->number_of_threads) ++ .name(name() + ".COM:refs") ++ .desc("Number of memory references committed") ++ .flags(total) ++ ; ++ ++ statComLoads ++ .init(cpu->number_of_threads) ++ .name(name() + ".COM:loads") ++ .desc("Number of loads committed") ++ .flags(total) ++ ; ++ ++ statComMembars ++ .init(cpu->number_of_threads) ++ .name(name() + ".COM:membars") ++ .desc("Number of memory barriers committed") ++ .flags(total) ++ ; ++ ++ statComBranches ++ .init(cpu->number_of_threads) ++ .name(name() + ".COM:branches") ++ .desc("Number of branches committed") ++ .flags(total) ++ ; ++ ++ // ++ // Commit-Eligible instructions... ++ // ++ // -> The number of instructions eligible to commit in those ++ // cycles where we reached our commit BW limit (less the number ++ // actually committed) ++ // ++ // -> The average value is computed over ALL CYCLES... not just ++ // the BW limited cycles ++ // ++ // -> The standard deviation is computed only over cycles where ++ // we reached the BW limit ++ // ++ commitEligible ++ .init(cpu->number_of_threads) ++ .name(name() + ".COM:bw_limited") ++ .desc("number of insts not committed due to BW limits") ++ .flags(total) ++ ; ++ ++ commitEligibleSamples ++ .name(name() + ".COM:bw_lim_events") ++ .desc("number cycles where commit BW limit reached") ++ ; +} + +template +void - SimpleCommit::setCPU(FullCPU *cpu_ptr) ++DefaultCommit::setCPU(FullCPU *cpu_ptr) +{ + DPRINTF(Commit, "Commit: Setting CPU pointer.\n"); + cpu = cpu_ptr; ++ ++ // Commit must broadcast the number of free entries it has at the start of ++ // the simulation, so it starts as active. ++ cpu->activateStage(FullCPU::CommitIdx); ++ ++ trapLatency = cpu->cycles(trapLatency); ++ fetchTrapLatency = cpu->cycles(fetchTrapLatency); ++} ++ ++template ++void ++DefaultCommit::setThreads(vector &threads) ++{ ++ thread = threads; +} + +template +void - SimpleCommit::setTimeBuffer(TimeBuffer *tb_ptr) ++DefaultCommit::setTimeBuffer(TimeBuffer *tb_ptr) +{ + DPRINTF(Commit, "Commit: Setting time buffer pointer.\n"); + timeBuffer = tb_ptr; + + // Setup wire to send information back to IEW. + toIEW = timeBuffer->getWire(0); + + // Setup wire to read data from IEW (for the ROB). + robInfoFromIEW = timeBuffer->getWire(-iewToCommitDelay); +} + +template +void - SimpleCommit::setRenameQueue(TimeBuffer *rq_ptr) ++DefaultCommit::setFetchQueue(TimeBuffer *fq_ptr) ++{ ++ DPRINTF(Commit, "Commit: Setting fetch queue pointer.\n"); ++ fetchQueue = fq_ptr; ++ ++ // Setup wire to get instructions from rename (for the ROB). ++ fromFetch = fetchQueue->getWire(-fetchToCommitDelay); ++} ++ ++template ++void ++DefaultCommit::setRenameQueue(TimeBuffer *rq_ptr) +{ + DPRINTF(Commit, "Commit: Setting rename queue pointer.\n"); + renameQueue = rq_ptr; + + // Setup wire to get instructions from rename (for the ROB). + fromRename = renameQueue->getWire(-renameToROBDelay); +} + +template +void - SimpleCommit::setIEWQueue(TimeBuffer *iq_ptr) ++DefaultCommit::setIEWQueue(TimeBuffer *iq_ptr) +{ + DPRINTF(Commit, "Commit: Setting IEW queue pointer.\n"); + iewQueue = iq_ptr; + + // Setup wire to get instructions from IEW. + fromIEW = iewQueue->getWire(-iewToCommitDelay); +} + +template +void - SimpleCommit::setROB(ROB *rob_ptr) ++DefaultCommit::setFetchStage(Fetch *fetch_stage) ++{ ++ fetchStage = fetch_stage; ++} ++ ++template ++void ++DefaultCommit::setIEWStage(IEW *iew_stage) ++{ ++ iewStage = iew_stage; ++} ++ ++template ++void ++DefaultCommit::setActiveThreads(list *at_ptr) ++{ ++ DPRINTF(Commit, "Commit: Setting active threads list pointer.\n"); ++ activeThreads = at_ptr; ++} ++ ++template ++void ++DefaultCommit::setRenameMap(RenameMap rm_ptr[]) ++{ ++ DPRINTF(Commit, "Setting rename map pointers.\n"); ++ ++ for (int i=0; i < numThreads; i++) { ++ renameMap[i] = &rm_ptr[i]; ++ } ++} ++ ++template ++void ++DefaultCommit::setROB(ROB *rob_ptr) +{ + DPRINTF(Commit, "Commit: Setting ROB pointer.\n"); + rob = rob_ptr; +} + +template +void - SimpleCommit::tick() - { - // If the ROB is currently in its squash sequence, then continue - // to squash. In this case, commit does not do anything. Otherwise - // run commit. - if (_status == ROBSquashing) { - if (rob->isDoneSquashing()) { - _status = Running; - } else { - rob->doSquash(); - - // Send back sequence number of tail of ROB, so other stages - // can squash younger instructions. Note that really the only - // stage that this is important for is the IEW stage; other - // stages can just clear all their state as long as selective - // replay isn't used. - toIEW->commitInfo.doneSeqNum = rob->readTailSeqNum(); - toIEW->commitInfo.robSquashing = true; ++DefaultCommit::initStage() ++{ ++ rob->setActiveThreads(activeThreads); ++ rob->resetEntries(); ++ ++ // Broadcast the number of free entries. ++ for (int i=0; i < numThreads; i++) { ++ toIEW->commitInfo[i].usedROB = true; ++ toIEW->commitInfo[i].freeROBEntries = rob->numFreeEntries(i); ++ } ++ ++ cpu->activityThisCycle(); ++} ++ ++template ++void ++DefaultCommit::switchOut() ++{ ++ switchPending = true; ++} ++ ++template ++void ++DefaultCommit::doSwitchOut() ++{ ++ switchedOut = true; ++ switchPending = false; ++ rob->switchOut(); ++} ++ ++template ++void ++DefaultCommit::takeOverFrom() ++{ ++ switchedOut = false; ++ _status = Active; ++ _nextStatus = Inactive; ++ for (int i=0; i < numThreads; i++) { ++ commitStatus[i] = Idle; ++ changedROBNumEntries[i] = false; ++ trapSquash[i] = false; ++ xcSquash[i] = false; ++ } ++ squashCounter = 0; ++ rob->takeOverFrom(); ++} ++ ++template ++void ++DefaultCommit::updateStatus() ++{ ++ // reset ROB changed variable ++ list::iterator threads = (*activeThreads).begin(); ++ while (threads != (*activeThreads).end()) { ++ unsigned tid = *threads++; ++ changedROBNumEntries[tid] = false; ++ ++ // Also check if any of the threads has a trap pending ++ if (commitStatus[tid] == TrapPending || ++ commitStatus[tid] == FetchTrapPending) { ++ _nextStatus = Active; + } - } else { - commit(); + } + ++ if (_nextStatus == Inactive && _status == Active) { ++ DPRINTF(Activity, "Deactivating stage.\n"); ++ cpu->deactivateStage(FullCPU::CommitIdx); ++ } else if (_nextStatus == Active && _status == Inactive) { ++ DPRINTF(Activity, "Activating stage.\n"); ++ cpu->activateStage(FullCPU::CommitIdx); ++ } ++ ++ _status = _nextStatus; ++} ++ ++template ++void ++DefaultCommit::setNextStatus() ++{ ++ int squashes = 0; ++ ++ list::iterator threads = (*activeThreads).begin(); ++ ++ while (threads != (*activeThreads).end()) { ++ unsigned tid = *threads++; ++ ++ if (commitStatus[tid] == ROBSquashing) { ++ squashes++; ++ } ++ } ++ ++ assert(squashes == squashCounter); ++ ++ // If commit is currently squashing, then it will have activity for the ++ // next cycle. Set its next status as active. ++ if (squashCounter) { ++ _nextStatus = Active; ++ } ++} ++ ++template ++bool ++DefaultCommit::changedROBEntries() ++{ ++ list::iterator threads = (*activeThreads).begin(); ++ ++ while (threads != (*activeThreads).end()) { ++ unsigned tid = *threads++; ++ ++ if (changedROBNumEntries[tid]) { ++ return true; ++ } ++ } ++ ++ return false; ++} ++ ++template ++unsigned ++DefaultCommit::numROBFreeEntries(unsigned tid) ++{ ++ return rob->numFreeEntries(tid); ++} ++ ++template ++void ++DefaultCommit::generateTrapEvent(unsigned tid) ++{ ++ DPRINTF(Commit, "Generating trap event for [tid:%i]\n", tid); ++ ++ TrapEvent *trap = new TrapEvent(this, tid); ++ ++ trap->schedule(curTick + trapLatency); ++ ++ thread[tid]->trapPending = true; ++} ++ ++template ++void ++DefaultCommit::generateXCEvent(unsigned tid) ++{ ++ DPRINTF(Commit, "Generating XC squash event for [tid:%i]\n", tid); ++ ++ xcSquash[tid] = true; ++} ++ ++template ++void ++DefaultCommit::squashAll(unsigned tid) ++{ ++ // If we want to include the squashing instruction in the squash, ++ // then use one older sequence number. ++ // Hopefully this doesn't mess things up. Basically I want to squash ++ // all instructions of this thread. ++ InstSeqNum squashed_inst = rob->isEmpty() ? ++ 0 : rob->readHeadInst(tid)->seqNum - 1;; ++ ++ // All younger instructions will be squashed. Set the sequence ++ // number as the youngest instruction in the ROB (0 in this case. ++ // Hopefully nothing breaks.) ++ youngestSeqNum[tid] = 0; ++ ++ rob->squash(squashed_inst, tid); ++ changedROBNumEntries[tid] = true; ++ ++ // Send back the sequence number of the squashed instruction. ++ toIEW->commitInfo[tid].doneSeqNum = squashed_inst; ++ ++ // Send back the squash signal to tell stages that they should ++ // squash. ++ toIEW->commitInfo[tid].squash = true; ++ ++ // Send back the rob squashing signal so other stages know that ++ // the ROB is in the process of squashing. ++ toIEW->commitInfo[tid].robSquashing = true; ++ ++ toIEW->commitInfo[tid].branchMispredict = false; ++ ++ toIEW->commitInfo[tid].nextPC = PC[tid]; ++} ++ ++template ++void ++DefaultCommit::squashFromTrap(unsigned tid) ++{ ++ squashAll(tid); ++ ++ DPRINTF(Commit, "Squashing from trap, restarting at PC %#x\n", PC[tid]); ++ ++ thread[tid]->trapPending = false; ++ thread[tid]->inSyscall = false; ++ ++ trapSquash[tid] = false; ++ ++ commitStatus[tid] = ROBSquashing; ++ cpu->activityThisCycle(); ++ ++ ++squashCounter; ++} ++ ++template ++void ++DefaultCommit::squashFromXC(unsigned tid) ++{ ++ squashAll(tid); ++ ++ DPRINTF(Commit, "Squashing from XC, restarting at PC %#x\n", PC[tid]); ++ ++ thread[tid]->inSyscall = false; ++ assert(!thread[tid]->trapPending); ++ ++ commitStatus[tid] = ROBSquashing; ++ cpu->activityThisCycle(); ++ ++ xcSquash[tid] = false; ++ ++ ++squashCounter; ++} ++ ++template ++void ++DefaultCommit::tick() ++{ ++ wroteToTimeBuffer = false; ++ _nextStatus = Inactive; ++ ++ if (switchPending && rob->isEmpty() && !iewStage->hasStoresToWB()) { ++ cpu->signalSwitched(); ++ return; ++ } ++ ++ list::iterator threads = (*activeThreads).begin(); ++ ++ // Check if any of the threads are done squashing. Change the ++ // status if they are done. ++ while (threads != (*activeThreads).end()) { ++ unsigned tid = *threads++; ++ ++ if (commitStatus[tid] == ROBSquashing) { ++ ++ if (rob->isDoneSquashing(tid)) { ++ commitStatus[tid] = Running; ++ --squashCounter; ++ } else { ++ DPRINTF(Commit,"[tid:%u]: Still Squashing, cannot commit any" ++ "insts this cycle.\n", tid); ++ } ++ } ++ } ++ ++ commit(); ++ + markCompletedInsts(); + - // Writeback number of free ROB entries here. - DPRINTF(Commit, "Commit: ROB has %d free entries.\n", - rob->numFreeEntries()); - toIEW->commitInfo.freeROBEntries = rob->numFreeEntries(); ++ threads = (*activeThreads).begin(); ++ ++ while (threads != (*activeThreads).end()) { ++ unsigned tid = *threads++; ++ ++ if (!rob->isEmpty(tid) && rob->readHeadInst(tid)->readyToCommit()) { ++ // The ROB has more instructions it can commit. Its next status ++ // will be active. ++ _nextStatus = Active; ++ ++ DynInstPtr inst = rob->readHeadInst(tid); ++ ++ DPRINTF(Commit,"[tid:%i]: Instruction [sn:%lli] PC %#x is head of" ++ " ROB and ready to commit\n", ++ tid, inst->seqNum, inst->readPC()); ++ ++ } else if (!rob->isEmpty(tid)) { ++ DynInstPtr inst = rob->readHeadInst(tid); ++ ++ DPRINTF(Commit,"[tid:%i]: Can't commit, Instruction [sn:%lli] PC " ++ "%#x is head of ROB and not ready\n", ++ tid, inst->seqNum, inst->readPC()); ++ } ++ ++ DPRINTF(Commit, "[tid:%i]: ROB has %d insts & %d free entries.\n", ++ tid, rob->countInsts(tid), rob->numFreeEntries(tid)); ++ } ++ ++ ++ if (wroteToTimeBuffer) { ++ DPRINTF(Activity, "Activity This Cycle.\n"); ++ cpu->activityThisCycle(); ++ } ++ ++ updateStatus(); +} + +template +void - SimpleCommit::commit() ++DefaultCommit::commit() +{ ++ + ////////////////////////////////////// + // Check for interrupts + ////////////////////////////////////// + - // Process interrupts if interrupts are enabled and not in PAL mode. - // Take the PC from commit and write it to the IPR, then squash. The - // interrupt completing will take care of restoring the PC from that value - // in the IPR. Look at IPR[EXC_ADDR]; - // hwrei() is what resets the PC to the place where instruction execution - // beings again. +#if FULL_SYSTEM - if (//checkInterrupts && ++ // Process interrupts if interrupts are enabled, not in PAL mode, ++ // and no other traps or external squashes are currently pending. ++ // @todo: Allow other threads to handle interrupts. ++ if (cpu->checkInterrupts && + cpu->check_interrupts() && - !cpu->inPalMode(readCommitPC())) { - // Will need to squash all instructions currently in flight and have - // the interrupt handler restart at the last non-committed inst. - // Most of that can be handled through the trap() function. The - // processInterrupts() function really just checks for interrupts - // and then calls trap() if there is an interrupt present. ++ !cpu->inPalMode(readPC()) && ++ !trapSquash[0] && ++ !xcSquash[0]) { ++ // Tell fetch that there is an interrupt pending. This will ++ // make fetch wait until it sees a non PAL-mode PC, at which ++ // point it stops fetching instructions. ++ toIEW->commitInfo[0].interruptPending = true; ++ ++ // Wait until the ROB is empty and all stores have drained in ++ // order to enter the interrupt. ++ if (rob->isEmpty() && !iewStage->hasStoresToWB()) { ++ // Not sure which thread should be the one to interrupt. For now ++ // always do thread 0. ++ assert(!thread[0]->inSyscall); ++ thread[0]->inSyscall = true; ++ ++ // CPU will handle implementation of the interrupt. ++ cpu->processInterrupts(); ++ ++ // Now squash or record that I need to squash this cycle. ++ commitStatus[0] = TrapPending; ++ ++ // Exit state update mode to avoid accidental updating. ++ thread[0]->inSyscall = false; ++ ++ // Generate trap squash event. ++ generateTrapEvent(0); + - // CPU will handle implementation of the interrupt. - cpu->processInterrupts(); ++ toIEW->commitInfo[0].clearInterrupt = true; ++ ++ DPRINTF(Commit, "Interrupt detected.\n"); ++ } else { ++ DPRINTF(Commit, "Interrupt pending, waiting for ROB to empty.\n"); ++ } + } +#endif // FULL_SYSTEM + + //////////////////////////////////// - // Check for squash signal, handle that first ++ // Check for any possible squashes, handle them first + //////////////////////////////////// + - // Want to mainly check if the IEW stage is telling the ROB to squash. - // Should I also check if the commit stage is telling the ROB to squah? - // This might be necessary to keep the same timing between the IQ and - // the ROB... - if (fromIEW->squash) { - DPRINTF(Commit, "Commit: Squashing instructions in the ROB.\n"); ++ list::iterator threads = (*activeThreads).begin(); ++ ++ while (threads != (*activeThreads).end()) { ++ unsigned tid = *threads++; ++ ++ if (fromFetch->fetchFault && commitStatus[0] != TrapPending) { ++ // Record the fault. Wait until it's empty in the ROB. ++ // Then handle the trap. Ignore it if there's already a ++ // trap pending as fetch will be redirected. ++ fetchFault = fromFetch->fetchFault; ++ fetchFaultTick = curTick + fetchTrapLatency; ++ commitStatus[0] = FetchTrapPending; ++ DPRINTF(Commit, "Fault from fetch recorded. Will trap if the " ++ "ROB empties without squashing the fault.\n"); ++ fetchTrapWait = 0; ++ } ++ ++ // Fetch may tell commit to clear the trap if it's been squashed. ++ if (fromFetch->clearFetchFault) { ++ DPRINTF(Commit, "Received clear fetch fault signal\n"); ++ fetchTrapWait = 0; ++ if (commitStatus[0] == FetchTrapPending) { ++ DPRINTF(Commit, "Clearing fault from fetch\n"); ++ commitStatus[0] = Running; ++ } ++ } ++ ++ // Not sure which one takes priority. I think if we have ++ // both, that's a bad sign. ++ if (trapSquash[tid] == true) { ++ assert(!xcSquash[tid]); ++ squashFromTrap(tid); ++ } else if (xcSquash[tid] == true) { ++ squashFromXC(tid); ++ } ++ ++ // Squashed sequence number must be older than youngest valid ++ // instruction in the ROB. This prevents squashes from younger ++ // instructions overriding squashes from older instructions. ++ if (fromIEW->squash[tid] && ++ commitStatus[tid] != TrapPending && ++ fromIEW->squashedSeqNum[tid] <= youngestSeqNum[tid]) { ++ ++ DPRINTF(Commit, "[tid:%i]: Squashing due to PC %#x [sn:%i]\n", ++ tid, ++ fromIEW->mispredPC[tid], ++ fromIEW->squashedSeqNum[tid]); + - _status = ROBSquashing; ++ DPRINTF(Commit, "[tid:%i]: Redirecting to PC %#x\n", ++ tid, ++ fromIEW->nextPC[tid]); + - InstSeqNum squashed_inst = fromIEW->squashedSeqNum; ++ commitStatus[tid] = ROBSquashing; + - rob->squash(squashed_inst); ++ ++squashCounter; + - // Send back the sequence number of the squashed instruction. - toIEW->commitInfo.doneSeqNum = squashed_inst; ++ // If we want to include the squashing instruction in the squash, ++ // then use one older sequence number. ++ InstSeqNum squashed_inst = fromIEW->squashedSeqNum[tid]; + - // Send back the squash signal to tell stages that they should squash. - toIEW->commitInfo.squash = true; ++ if (fromIEW->includeSquashInst[tid] == true) ++ squashed_inst--; + - // Send back the rob squashing signal so other stages know that the - // ROB is in the process of squashing. - toIEW->commitInfo.robSquashing = true; ++ // All younger instructions will be squashed. Set the sequence ++ // number as the youngest instruction in the ROB. ++ youngestSeqNum[tid] = squashed_inst; + - toIEW->commitInfo.branchMispredict = fromIEW->branchMispredict; ++ rob->squash(squashed_inst, tid); ++ changedROBNumEntries[tid] = true; + - toIEW->commitInfo.branchTaken = fromIEW->branchTaken; ++ toIEW->commitInfo[tid].doneSeqNum = squashed_inst; + - toIEW->commitInfo.nextPC = fromIEW->nextPC; ++ toIEW->commitInfo[tid].squash = true; + - toIEW->commitInfo.mispredPC = fromIEW->mispredPC; ++ // Send back the rob squashing signal so other stages know that ++ // the ROB is in the process of squashing. ++ toIEW->commitInfo[tid].robSquashing = true; + - if (toIEW->commitInfo.branchMispredict) { - ++branchMispredicts; ++ toIEW->commitInfo[tid].branchMispredict = ++ fromIEW->branchMispredict[tid]; ++ ++ toIEW->commitInfo[tid].branchTaken = ++ fromIEW->branchTaken[tid]; ++ ++ toIEW->commitInfo[tid].nextPC = fromIEW->nextPC[tid]; ++ ++ toIEW->commitInfo[tid].mispredPC = fromIEW->mispredPC[tid]; ++ ++ if (toIEW->commitInfo[tid].branchMispredict) { ++ ++branchMispredicts; ++ } + } ++ + } + - if (_status != ROBSquashing) { ++ setNextStatus(); ++ ++ if (squashCounter != numThreads) { + // If we're not currently squashing, then get instructions. + getInsts(); + + // Try to commit any instructions. + commitInsts(); + } + - // If the ROB is empty, we can set this stage to idle. Use this - // in the future when the Idle status will actually be utilized. - #if 0 - if (rob->isEmpty()) { - DPRINTF(Commit, "Commit: ROB is empty. Status changed to idle.\n"); - _status = Idle; - // Schedule an event so that commit will actually wake up - // once something gets put in the ROB. ++ //Check for any activity ++ threads = (*activeThreads).begin(); ++ ++ while (threads != (*activeThreads).end()) { ++ unsigned tid = *threads++; ++ ++ if (changedROBNumEntries[tid]) { ++ toIEW->commitInfo[tid].usedROB = true; ++ toIEW->commitInfo[tid].freeROBEntries = rob->numFreeEntries(tid); ++ ++ if (rob->isEmpty(tid)) { ++ toIEW->commitInfo[tid].emptyROB = true; ++ } ++ ++ wroteToTimeBuffer = true; ++ changedROBNumEntries[tid] = false; ++ } + } - #endif +} + - // Loop that goes through as many instructions in the ROB as possible and - // tries to commit them. The actual work for committing is done by the - // commitHead() function. +template +void - SimpleCommit::commitInsts() ++DefaultCommit::commitInsts() +{ + //////////////////////////////////// + // Handle commit - // Note that commit will be handled prior to the ROB so that the ROB - // only tries to commit instructions it has in this current cycle, and - // not instructions it is writing in during this cycle. - // Can't commit and squash things at the same time... ++ // Note that commit will be handled prior to putting new ++ // instructions in the ROB so that the ROB only tries to commit ++ // instructions it has in this current cycle, and not instructions ++ // it is writing in during this cycle. Can't commit and squash ++ // things at the same time... + //////////////////////////////////// + - if (rob->isEmpty()) - return; - - DynInstPtr head_inst = rob->readHeadInst(); ++ DPRINTF(Commit, "Trying to commit instructions in the ROB.\n"); + + unsigned num_committed = 0; + ++ DynInstPtr head_inst; ++ + // Commit as many instructions as possible until the commit bandwidth + // limit is reached, or it becomes impossible to commit any more. - while (!rob->isEmpty() && - head_inst->readyToCommit() && - num_committed < commitWidth) - { - DPRINTF(Commit, "Commit: Trying to commit head instruction.\n"); ++ while (num_committed < commitWidth) { ++ int commit_thread = getCommittingThread(); ++ ++ if (commit_thread == -1 || !rob->isHeadReady(commit_thread)) ++ break; ++ ++ head_inst = rob->readHeadInst(commit_thread); ++ ++ int tid = head_inst->threadNumber; ++ ++ assert(tid == commit_thread); ++ ++ DPRINTF(Commit, "Trying to commit head instruction, [sn:%i] [tid:%i]\n", ++ head_inst->seqNum, tid); + - // If the head instruction is squashed, it is ready to retire at any - // time. However, we need to avoid updating any other state - // incorrectly if it's already been squashed. ++ // If the head instruction is squashed, it is ready to retire ++ // (be removed from the ROB) at any time. + if (head_inst->isSquashed()) { + - DPRINTF(Commit, "Commit: Retiring squashed instruction from " ++ DPRINTF(Commit, "Retiring squashed instruction from " + "ROB.\n"); + - // Tell ROB to retire head instruction. This retires the head - // inst in the ROB without affecting any other stages. - rob->retireHead(); ++ rob->retireHead(commit_thread); + + ++commitSquashedInsts; + ++ // Record that the number of ROB entries has changed. ++ changedROBNumEntries[tid] = true; + } else { ++ PC[tid] = head_inst->readPC(); ++ nextPC[tid] = head_inst->readNextPC(); ++ + // Increment the total number of non-speculative instructions + // executed. + // Hack for now: it really shouldn't happen until after the + // commit is deemed to be successful, but this count is needed + // for syscalls. - cpu->funcExeInst++; ++ thread[tid]->funcExeInst++; + + // Try to commit the head instruction. + bool commit_success = commitHead(head_inst, num_committed); + - // Update what instruction we are looking at if the commit worked. + if (commit_success) { + ++num_committed; + - // Send back which instruction has been committed. - // @todo: Update this later when a wider pipeline is used. - // Hmm, can't really give a pointer here...perhaps the - // sequence number instead (copy). - toIEW->commitInfo.doneSeqNum = head_inst->seqNum; ++ changedROBNumEntries[tid] = true; ++ ++ // Set the doneSeqNum to the youngest committed instruction. ++ toIEW->commitInfo[tid].doneSeqNum = head_inst->seqNum; + + ++commitCommittedInsts; + - if (!head_inst->isNop()) { - cpu->instDone(); ++ // To match the old model, don't count nops and instruction ++ // prefetches towards the total commit count. ++ if (!head_inst->isNop() && !head_inst->isInstPrefetch()) { ++ cpu->instDone(tid); ++ } ++ ++ PC[tid] = nextPC[tid]; ++ nextPC[tid] = nextPC[tid] + sizeof(TheISA::MachInst); ++#if FULL_SYSTEM ++ int count = 0; ++ Addr oldpc; ++ do { ++ // Debug statement. Checks to make sure we're not ++ // currently updating state while handling PC events. ++ if (count == 0) ++ assert(!thread[tid]->inSyscall && ++ !thread[tid]->trapPending); ++ oldpc = PC[tid]; ++ cpu->system->pcEventQueue.service( ++ thread[tid]->getXCProxy()); ++ count++; ++ } while (oldpc != PC[tid]); ++ if (count > 1) { ++ DPRINTF(Commit, "PC skip function event, stopping commit\n"); ++ break; + } ++#endif + } else { ++ DPRINTF(Commit, "Unable to commit head instruction PC:%#x " ++ "[tid:%i] [sn:%i].\n", ++ head_inst->readPC(), tid ,head_inst->seqNum); + break; + } + } - - // Update the pointer to read the next instruction in the ROB. - head_inst = rob->readHeadInst(); + } + + DPRINTF(CommitRate, "%i\n", num_committed); - n_committed_dist.sample(num_committed); ++ numCommittedDist.sample(num_committed); ++ ++ if (num_committed == commitWidth) { ++ commitEligible[0]++; ++ } +} + +template +bool - SimpleCommit::commitHead(DynInstPtr &head_inst, unsigned inst_num) ++DefaultCommit::commitHead(DynInstPtr &head_inst, unsigned inst_num) +{ - // Make sure instruction is valid + assert(head_inst); + - // If the instruction is not executed yet, then it is a non-speculative - // or store inst. Signal backwards that it should be executed. ++ int tid = head_inst->threadNumber; ++ ++ // If the instruction is not executed yet, then it will need extra ++ // handling. Signal backwards that it should be executed. + if (!head_inst->isExecuted()) { + // Keep this number correct. We have not yet actually executed + // and committed this instruction. - cpu->funcExeInst--; - - if (head_inst->isNonSpeculative()) { - DPRINTF(Commit, "Commit: Encountered a store or non-speculative " - "instruction at the head of the ROB, PC %#x.\n", - head_inst->readPC()); ++ thread[tid]->funcExeInst--; ++ ++ head_inst->reachedCommit = true; ++ ++ if (head_inst->isNonSpeculative() || ++ head_inst->isMemBarrier() || ++ head_inst->isWriteBarrier()) { ++ ++ DPRINTF(Commit, "Encountered a barrier or non-speculative " ++ "instruction [sn:%lli] at the head of the ROB, PC %#x.\n", ++ head_inst->seqNum, head_inst->readPC()); ++ ++#if !FULL_SYSTEM ++ // Hack to make sure syscalls/memory barriers/quiesces ++ // aren't executed until all stores write back their data. ++ // This direct communication shouldn't be used for ++ // anything other than this. ++ if (inst_num > 0 || iewStage->hasStoresToWB()) ++#else ++ if ((head_inst->isMemBarrier() || head_inst->isWriteBarrier() || ++ head_inst->isQuiesce()) && ++ iewStage->hasStoresToWB()) ++#endif ++ { ++ DPRINTF(Commit, "Waiting for all stores to writeback.\n"); ++ return false; ++ } + - toIEW->commitInfo.nonSpecSeqNum = head_inst->seqNum; ++ toIEW->commitInfo[tid].nonSpecSeqNum = head_inst->seqNum; + + // Change the instruction so it won't try to commit again until + // it is executed. + head_inst->clearCanCommit(); + + ++commitNonSpecStalls; + ++ return false; ++ } else if (head_inst->isLoad()) { ++ DPRINTF(Commit, "[sn:%lli]: Uncached load, PC %#x.\n", ++ head_inst->seqNum, head_inst->readPC()); ++ ++ // Send back the non-speculative instruction's sequence ++ // number. Tell the lsq to re-execute the load. ++ toIEW->commitInfo[tid].nonSpecSeqNum = head_inst->seqNum; ++ toIEW->commitInfo[tid].uncached = true; ++ toIEW->commitInfo[tid].uncachedLoad = head_inst; ++ ++ head_inst->clearCanCommit(); ++ + return false; + } else { - panic("Commit: Trying to commit un-executed instruction " ++ panic("Trying to commit un-executed instruction " + "of unknown type!\n"); + } + } + - // Now check if it's one of the special trap or barrier or - // serializing instructions. - if (head_inst->isThreadSync() || - head_inst->isSerializing() || - head_inst->isMemBarrier() || - head_inst->isWriteBarrier() ) - { - // Not handled for now. Mem barriers and write barriers are safe - // to simply let commit as memory accesses only happen once they - // reach the head of commit. Not sure about the other two. - panic("Serializing or barrier instructions" - " are not handled yet.\n"); ++ if (head_inst->isThreadSync()) { ++ // Not handled for now. ++ panic("Thread sync instructions are not handled yet.\n"); ++ } ++ ++ // Stores mark themselves as completed. ++ if (!head_inst->isStore()) { ++ head_inst->setCompleted(); ++ } ++ ++ // Use checker prior to updating anything due to traps or PC ++ // based events. ++ if (cpu->checker) { ++ cpu->checker->tick(head_inst); + } + + // Check if the instruction caused a fault. If so, trap. + Fault inst_fault = head_inst->getFault(); + + if (inst_fault != NoFault) { - if (!head_inst->isNop()) { ++ head_inst->setCompleted(); +#if FULL_SYSTEM - cpu->trap(inst_fault); - #else // !FULL_SYSTEM - panic("fault (%d) detected @ PC %08p", inst_fault, - head_inst->PC); - #endif // FULL_SYSTEM ++ DPRINTF(Commit, "Inst [sn:%lli] PC %#x has a fault\n", ++ head_inst->seqNum, head_inst->readPC()); ++ ++ if (iewStage->hasStoresToWB() || inst_num > 0) { ++ DPRINTF(Commit, "Stores outstanding, fault must wait.\n"); ++ return false; + } - } + - // Check if we're really ready to commit. If not then return false. - // I'm pretty sure all instructions should be able to commit if they've - // reached this far. For now leave this in as a check. - if (!rob->isHeadReady()) { - panic("Commit: Unable to commit head instruction!\n"); - return false; - } ++ if (cpu->checker && head_inst->isStore()) { ++ cpu->checker->tick(head_inst); ++ } + - // If it's a branch, then send back branch prediction update info - // to the fetch stage. - // This should be handled in the iew stage if a mispredict happens... ++ assert(!thread[tid]->inSyscall); + - if (head_inst->isControl()) { ++ // Mark that we're in state update mode so that the trap's ++ // execution doesn't generate extra squashes. ++ thread[tid]->inSyscall = true; + - #if 0 - toIEW->nextPC = head_inst->readPC(); - //Maybe switch over to BTB incorrect. - toIEW->btbMissed = head_inst->btbMiss(); - toIEW->target = head_inst->nextPC; - //Maybe also include global history information. - //This simple version will have no branch prediction however. - #endif ++ // DTB will sometimes need the machine instruction for when ++ // faults happen. So we will set it here, prior to the DTB ++ // possibly needing it for its fault. ++ thread[tid]->setInst( ++ static_cast(head_inst->staticInst->machInst)); ++ ++ // Execute the trap. Although it's slightly unrealistic in ++ // terms of timing (as it doesn't wait for the full timing of ++ // the trap event to complete before updating state), it's ++ // needed to update the state as soon as possible. This ++ // prevents external agents from changing any specific state ++ // that the trap need. ++ cpu->trap(inst_fault, tid); ++ ++ // Exit state update mode to avoid accidental updating. ++ thread[tid]->inSyscall = false; ++ ++ commitStatus[tid] = TrapPending; + - ++commitCommittedBranches; ++ // Generate trap squash event. ++ generateTrapEvent(tid); ++ ++ return false; ++#else // !FULL_SYSTEM ++ panic("fault (%d) detected @ PC %08p", inst_fault, ++ head_inst->PC); ++#endif // FULL_SYSTEM + } + - // Now that the instruction is going to be committed, finalize its - // trace data. ++ updateComInstStats(head_inst); ++ + if (head_inst->traceData) { ++ head_inst->traceData->setFetchSeq(head_inst->seqNum); ++ head_inst->traceData->setCPSeq(thread[tid]->numInst); + head_inst->traceData->finalize(); ++ head_inst->traceData = NULL; ++ } ++ ++ // Update the commit rename map ++ for (int i = 0; i < head_inst->numDestRegs(); i++) { ++ renameMap[tid]->setEntry(head_inst->destRegIdx(i), ++ head_inst->renamedDestRegIdx(i)); + } + - //Finally clear the head ROB entry. - rob->retireHead(); ++ // Finally clear the head ROB entry. ++ rob->retireHead(tid); + + // Return true to indicate that we have committed an instruction. + return true; +} + +template +void - SimpleCommit::getInsts() ++DefaultCommit::getInsts() +{ - ////////////////////////////////////// - // Handle ROB functions - ////////////////////////////////////// - - // Read any issued instructions and place them into the ROB. Do this - // prior to squashing to avoid having instructions in the ROB that - // don't get squashed properly. ++ // Read any renamed instructions and place them into the ROB. + int insts_to_process = min((int)renameWidth, fromRename->size); + - for (int inst_num = 0; - inst_num < insts_to_process; - ++inst_num) ++ for (int inst_num = 0; inst_num < insts_to_process; ++inst_num) + { - if (!fromRename->insts[inst_num]->isSquashed()) { - DPRINTF(Commit, "Commit: Inserting PC %#x into ROB.\n", - fromRename->insts[inst_num]->readPC()); - rob->insertInst(fromRename->insts[inst_num]); ++ DynInstPtr inst = fromRename->insts[inst_num]; ++ int tid = inst->threadNumber; ++ ++ if (!inst->isSquashed() && ++ commitStatus[tid] != ROBSquashing) { ++ changedROBNumEntries[tid] = true; ++ ++ DPRINTF(Commit, "Inserting PC %#x [sn:%i] [tid:%i] into ROB.\n", ++ inst->readPC(), inst->seqNum, tid); ++ ++ rob->insertInst(inst); ++ ++ assert(rob->getThreadEntries(tid) <= rob->getMaxEntries(tid)); ++ ++ youngestSeqNum[tid] = inst->seqNum; + } else { - DPRINTF(Commit, "Commit: Instruction %i PC %#x was " ++ DPRINTF(Commit, "Instruction PC %#x [sn:%i] [tid:%i] was " + "squashed, skipping.\n", - fromRename->insts[inst_num]->seqNum, - fromRename->insts[inst_num]->readPC()); ++ inst->readPC(), inst->seqNum, tid); + } + } +} + +template +void - SimpleCommit::markCompletedInsts() ++DefaultCommit::markCompletedInsts() +{ + // Grab completed insts out of the IEW instruction queue, and mark + // instructions completed within the ROB. + for (int inst_num = 0; + inst_num < fromIEW->size && fromIEW->insts[inst_num]; + ++inst_num) + { - DPRINTF(Commit, "Commit: Marking PC %#x, SN %i ready within ROB.\n", - fromIEW->insts[inst_num]->readPC(), - fromIEW->insts[inst_num]->seqNum); ++ if (!fromIEW->insts[inst_num]->isSquashed()) { ++ DPRINTF(Commit, "[tid:%i]: Marking PC %#x, [sn:%lli] ready " ++ "within ROB.\n", ++ fromIEW->insts[inst_num]->threadNumber, ++ fromIEW->insts[inst_num]->readPC(), ++ fromIEW->insts[inst_num]->seqNum); ++ ++ // Mark the instruction as ready to commit. ++ fromIEW->insts[inst_num]->setCanCommit(); ++ } ++ } ++} ++ ++template ++bool ++DefaultCommit::robDoneSquashing() ++{ ++ list::iterator threads = (*activeThreads).begin(); + - // Mark the instruction as ready to commit. - fromIEW->insts[inst_num]->setCanCommit(); ++ while (threads != (*activeThreads).end()) { ++ unsigned tid = *threads++; ++ ++ if (!rob->isDoneSquashing(tid)) ++ return false; + } ++ ++ return true; +} + +template - uint64_t - SimpleCommit::readCommitPC() ++void ++DefaultCommit::updateComInstStats(DynInstPtr &inst) +{ - return rob->readHeadPC(); ++ unsigned thread = inst->threadNumber; ++ ++ // ++ // Pick off the software prefetches ++ // ++#ifdef TARGET_ALPHA ++ if (inst->isDataPrefetch()) { ++ statComSwp[thread]++; ++ } else { ++ statComInst[thread]++; ++ } ++#else ++ statComInst[thread]++; ++#endif ++ ++ // ++ // Control Instructions ++ // ++ if (inst->isControl()) ++ statComBranches[thread]++; ++ ++ // ++ // Memory references ++ // ++ if (inst->isMemRef()) { ++ statComRefs[thread]++; ++ ++ if (inst->isLoad()) { ++ statComLoads[thread]++; ++ } ++ } ++ ++ if (inst->isMemBarrier()) { ++ statComMembars[thread]++; ++ } ++} ++ ++//////////////////////////////////////// ++// // ++// SMT COMMIT POLICY MAINTAINED HERE // ++// // ++//////////////////////////////////////// ++template ++int ++DefaultCommit::getCommittingThread() ++{ ++ if (numThreads > 1) { ++ switch (commitPolicy) { ++ ++ case Aggressive: ++ //If Policy is Aggressive, commit will call ++ //this function multiple times per ++ //cycle ++ return oldestReady(); ++ ++ case RoundRobin: ++ return roundRobin(); ++ ++ case OldestReady: ++ return oldestReady(); ++ ++ default: ++ return -1; ++ } ++ } else { ++ int tid = (*activeThreads).front(); ++ ++ if (commitStatus[tid] == Running || ++ commitStatus[tid] == Idle || ++ commitStatus[tid] == FetchTrapPending) { ++ return tid; ++ } else { ++ return -1; ++ } ++ } ++} ++ ++template ++int ++DefaultCommit::roundRobin() ++{ ++ list::iterator pri_iter = priority_list.begin(); ++ list::iterator end = priority_list.end(); ++ ++ while (pri_iter != end) { ++ unsigned tid = *pri_iter; ++ ++ if (commitStatus[tid] == Running || ++ commitStatus[tid] == Idle) { ++ ++ if (rob->isHeadReady(tid)) { ++ priority_list.erase(pri_iter); ++ priority_list.push_back(tid); ++ ++ return tid; ++ } ++ } ++ ++ pri_iter++; ++ } ++ ++ return -1; ++} ++ ++template ++int ++DefaultCommit::oldestReady() ++{ ++ unsigned oldest = 0; ++ bool first = true; ++ ++ list::iterator threads = (*activeThreads).begin(); ++ ++ while (threads != (*activeThreads).end()) { ++ unsigned tid = *threads++; ++ ++ if (!rob->isEmpty(tid) && ++ (commitStatus[tid] == Running || ++ commitStatus[tid] == Idle || ++ commitStatus[tid] == FetchTrapPending)) { ++ ++ if (rob->isHeadReady(tid)) { ++ ++ DynInstPtr head_inst = rob->readHeadInst(tid); ++ ++ if (first) { ++ oldest = tid; ++ first = false; ++ } else if (head_inst->seqNum < oldest) { ++ oldest = tid; ++ } ++ } ++ } ++ } ++ ++ if (!first) { ++ return oldest; ++ } else { ++ return -1; ++ } +} diff --cc src/cpu/o3/cpu.cc index a268dbc23,000000000..ed02a845b mode 100644,000000..100644 --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@@ -1,566 -1,0 +1,1196 @@@ +/* - * Copyright (c) 2004-2005 The Regents of The University of Michigan ++ * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config/full_system.hh" + +#if FULL_SYSTEM +#include "sim/system.hh" +#else +#include "sim/process.hh" +#endif - #include "sim/root.hh" + ++#include "cpu/activity.hh" ++#include "cpu/checker/cpu.hh" +#include "cpu/cpu_exec_context.hh" +#include "cpu/exec_context.hh" +#include "cpu/o3/alpha_dyn_inst.hh" +#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/cpu.hh" + ++#include "sim/root.hh" ++#include "sim/stat_control.hh" ++ +using namespace std; + - BaseFullCPU::BaseFullCPU(Params ¶ms) - : BaseCPU(¶ms), cpu_id(0) ++BaseFullCPU::BaseFullCPU(Params *params) ++ : BaseCPU(params), cpu_id(0) ++{ ++} ++ ++void ++BaseFullCPU::regStats() +{ ++ BaseCPU::regStats(); +} + +template +FullO3CPU::TickEvent::TickEvent(FullO3CPU *c) + : Event(&mainEventQueue, CPU_Tick_Pri), cpu(c) +{ +} + +template +void +FullO3CPU::TickEvent::process() +{ + cpu->tick(); +} + +template +const char * +FullO3CPU::TickEvent::description() +{ + return "FullO3CPU tick event"; +} + - //Call constructor to all the pipeline stages here +template - FullO3CPU::FullO3CPU(Params ¶ms) - #if FULL_SYSTEM - : BaseFullCPU(params), - #else ++FullO3CPU::FullO3CPU(Params *params) + : BaseFullCPU(params), - #endif // FULL_SYSTEM + tickEvent(this), ++ removeInstsThisCycle(false), + fetch(params), + decode(params), + rename(params), + iew(params), + commit(params), + - regFile(params.numPhysIntRegs, params.numPhysFloatRegs), ++ regFile(params->numPhysIntRegs, params->numPhysFloatRegs), + - freeList(TheISA::NumIntRegs, params.numPhysIntRegs, - TheISA::NumFloatRegs, params.numPhysFloatRegs), ++ freeList(params->numberOfThreads,//number of activeThreads ++ TheISA::NumIntRegs, params->numPhysIntRegs, ++ TheISA::NumFloatRegs, params->numPhysFloatRegs), + - renameMap(TheISA::NumIntRegs, params.numPhysIntRegs, - TheISA::NumFloatRegs, params.numPhysFloatRegs, - TheISA::NumMiscRegs, - TheISA::ZeroReg, - TheISA::ZeroReg + TheISA::NumIntRegs), ++ rob(params->numROBEntries, params->squashWidth, ++ params->smtROBPolicy, params->smtROBThreshold, ++ params->numberOfThreads), + - rob(params.numROBEntries, params.squashWidth), ++ scoreboard(params->numberOfThreads,//number of activeThreads ++ TheISA::NumIntRegs, params->numPhysIntRegs, ++ TheISA::NumFloatRegs, params->numPhysFloatRegs, ++ TheISA::NumMiscRegs * number_of_threads, ++ TheISA::ZeroReg), + - // What to pass to these time buffers? + // For now just have these time buffers be pretty big. ++ // @todo: Make these time buffer sizes parameters or derived ++ // from latencies + timeBuffer(5, 5), + fetchQueue(5, 5), + decodeQueue(5, 5), + renameQueue(5, 5), + iewQueue(5, 5), - - cpuXC(NULL), ++ activityRec(NumStages, 10, params->activity), + + globalSeqNum(1), + +#if FULL_SYSTEM - system(params.system), ++ system(params->system), + memCtrl(system->memctrl), + physmem(system->physmem), - itb(params.itb), - dtb(params.dtb), - mem(params.mem), ++ mem(params->mem), +#else - // Hardcoded for a single thread!! - mem(params.workload[0]->getMemory()), ++// pTable(params->pTable), ++ mem(params->workload[0]->getMemory()), +#endif // FULL_SYSTEM - - icacheInterface(params.icacheInterface), - dcacheInterface(params.dcacheInterface), - deferRegistration(params.defReg), - numInsts(0), - funcExeInst(0) ++ switchCount(0), ++ icacheInterface(params->icacheInterface), ++ dcacheInterface(params->dcacheInterface), ++ deferRegistration(params->deferRegistration), ++ numThreads(number_of_threads) +{ + _status = Idle; + - #if !FULL_SYSTEM - thread.resize(this->number_of_threads); - #endif - - for (int i = 0; i < this->number_of_threads; ++i) { ++ if (params->checker) { ++ BaseCPU *temp_checker = params->checker; ++ checker = dynamic_cast *>(temp_checker); ++ checker->setMemory(mem); +#if FULL_SYSTEM - assert(i == 0); - thread[i] = new CPUExecContext(this, 0, system, itb, dtb, mem); - system->execContexts[i] = thread[i]->getProxy(); - - execContexts.push_back(system->execContexts[i]); - #else - if (i < params.workload.size()) { - DPRINTF(FullCPU, "FullCPU: Workload[%i]'s starting PC is %#x, " - "process is %#x", - i, params.workload[i]->prog_entry, thread[i]); - thread[i] = new CPUExecContext(this, i, params.workload[i], i); - } - assert(params.workload[i]->getMemory() != NULL); - assert(mem != NULL); - execContexts.push_back(thread[i]->getProxy()); - #endif // !FULL_SYSTEM ++ checker->setSystem(params->system); ++#endif ++ } else { ++ checker = NULL; + } + - // Note that this is a hack so that my code which still uses xc-> will - // still work. I should remove this eventually - cpuXC = thread[0]; ++#if !FULL_SYSTEM ++ thread.resize(number_of_threads); ++ tids.resize(number_of_threads); ++#endif + - // The stages also need their CPU pointer setup. However this must be - // done at the upper level CPU because they have pointers to the upper - // level CPU, and not this FullO3CPU. ++ // The stages also need their CPU pointer setup. However this ++ // must be done at the upper level CPU because they have pointers ++ // to the upper level CPU, and not this FullO3CPU. ++ ++ // Set up Pointers to the activeThreads list for each stage ++ fetch.setActiveThreads(&activeThreads); ++ decode.setActiveThreads(&activeThreads); ++ rename.setActiveThreads(&activeThreads); ++ iew.setActiveThreads(&activeThreads); ++ commit.setActiveThreads(&activeThreads); + + // Give each of the stages the time buffer they will use. + fetch.setTimeBuffer(&timeBuffer); + decode.setTimeBuffer(&timeBuffer); + rename.setTimeBuffer(&timeBuffer); + iew.setTimeBuffer(&timeBuffer); + commit.setTimeBuffer(&timeBuffer); + + // Also setup each of the stages' queues. + fetch.setFetchQueue(&fetchQueue); + decode.setFetchQueue(&fetchQueue); ++ commit.setFetchQueue(&fetchQueue); + decode.setDecodeQueue(&decodeQueue); + rename.setDecodeQueue(&decodeQueue); + rename.setRenameQueue(&renameQueue); + iew.setRenameQueue(&renameQueue); + iew.setIEWQueue(&iewQueue); + commit.setIEWQueue(&iewQueue); + commit.setRenameQueue(&renameQueue); + ++ commit.setFetchStage(&fetch); ++ commit.setIEWStage(&iew); ++ rename.setIEWStage(&iew); ++ rename.setCommitStage(&commit); ++ ++#if !FULL_SYSTEM ++ int active_threads = params->workload.size(); ++#else ++ int active_threads = 1; ++#endif ++ ++ //Make Sure That this a Valid Architeture ++ assert(params->numPhysIntRegs >= numThreads * TheISA::NumIntRegs); ++ assert(params->numPhysFloatRegs >= numThreads * TheISA::NumFloatRegs); ++ ++ rename.setScoreboard(&scoreboard); ++ iew.setScoreboard(&scoreboard); ++ + // Setup the rename map for whichever stages need it. - rename.setRenameMap(&renameMap); - iew.setRenameMap(&renameMap); ++ PhysRegIndex lreg_idx = 0; ++ PhysRegIndex freg_idx = params->numPhysIntRegs; //Index to 1 after int regs ++ ++ for (int tid=0; tid < numThreads; tid++) { ++ bool bindRegs = (tid <= active_threads - 1); ++ ++ commitRenameMap[tid].init(TheISA::NumIntRegs, ++ params->numPhysIntRegs, ++ lreg_idx, //Index for Logical. Regs ++ ++ TheISA::NumFloatRegs, ++ params->numPhysFloatRegs, ++ freg_idx, //Index for Float Regs ++ ++ TheISA::NumMiscRegs, ++ ++ TheISA::ZeroReg, ++ TheISA::ZeroReg, ++ ++ tid, ++ false); ++ ++ renameMap[tid].init(TheISA::NumIntRegs, ++ params->numPhysIntRegs, ++ lreg_idx, //Index for Logical. Regs ++ ++ TheISA::NumFloatRegs, ++ params->numPhysFloatRegs, ++ freg_idx, //Index for Float Regs + - // Setup the free list for whichever stages need it. ++ TheISA::NumMiscRegs, ++ ++ TheISA::ZeroReg, ++ TheISA::ZeroReg, ++ ++ tid, ++ bindRegs); ++ } ++ ++ rename.setRenameMap(renameMap); ++ commit.setRenameMap(commitRenameMap); ++ ++ // Give renameMap & rename stage access to the freeList; ++ for (int i=0; i < numThreads; i++) { ++ renameMap[i].setFreeList(&freeList); ++ } + rename.setFreeList(&freeList); - renameMap.setFreeList(&freeList); ++ ++ // Setup the page table for whichever stages need it. ++#if !FULL_SYSTEM ++// fetch.setPageTable(pTable); ++// iew.setPageTable(pTable); ++#endif + + // Setup the ROB for whichever stages need it. + commit.setROB(&rob); ++ ++ lastRunningCycle = curTick; ++ ++ contextSwitch = false; +} + +template +FullO3CPU::~FullO3CPU() +{ +} + +template +void +FullO3CPU::fullCPURegStats() +{ ++ BaseFullCPU::regStats(); ++ + // Register any of the FullCPU's stats here. ++ timesIdled ++ .name(name() + ".timesIdled") ++ .desc("Number of times that the entire CPU went into an idle state and" ++ " unscheduled itself") ++ .prereq(timesIdled); ++ ++ idleCycles ++ .name(name() + ".idleCycles") ++ .desc("Total number of cycles that the CPU has spent unscheduled due " ++ "to idling") ++ .prereq(idleCycles); ++ ++ // Number of Instructions simulated ++ // -------------------------------- ++ // Should probably be in Base CPU but need templated ++ // MaxThreads so put in here instead ++ committedInsts ++ .init(numThreads) ++ .name(name() + ".committedInsts") ++ .desc("Number of Instructions Simulated"); ++ ++ totalCommittedInsts ++ .name(name() + ".committedInsts_total") ++ .desc("Number of Instructions Simulated"); ++ ++ cpi ++ .name(name() + ".cpi") ++ .desc("CPI: Cycles Per Instruction") ++ .precision(6); ++ cpi = simTicks / committedInsts; ++ ++ totalCpi ++ .name(name() + ".cpi_total") ++ .desc("CPI: Total CPI of All Threads") ++ .precision(6); ++ totalCpi = simTicks / totalCommittedInsts; ++ ++ ipc ++ .name(name() + ".ipc") ++ .desc("IPC: Instructions Per Cycle") ++ .precision(6); ++ ipc = committedInsts / simTicks; ++ ++ totalIpc ++ .name(name() + ".ipc_total") ++ .desc("IPC: Total IPC of All Threads") ++ .precision(6); ++ totalIpc = totalCommittedInsts / simTicks; ++ +} + +template +void +FullO3CPU::tick() +{ + DPRINTF(FullCPU, "\n\nFullCPU: Ticking main, FullO3CPU.\n"); + - //Tick each of the stages if they're actually running. - //Will want to figure out a way to unschedule itself if they're all - //going to be idle for a long time. ++ ++numCycles; ++ ++// activity = false; ++ ++ //Tick each of the stages + fetch.tick(); + + decode.tick(); + + rename.tick(); + + iew.tick(); + + commit.tick(); + - // Now advance the time buffers, unless the stage is stalled. ++#if !FULL_SYSTEM ++ doContextSwitch(); ++#endif ++ ++ // Now advance the time buffers + timeBuffer.advance(); + + fetchQueue.advance(); + decodeQueue.advance(); + renameQueue.advance(); + iewQueue.advance(); + - if (_status == Running && !tickEvent.scheduled()) - tickEvent.schedule(curTick + 1); ++ activityRec.advance(); ++ ++ if (removeInstsThisCycle) { ++ cleanUpRemovedInsts(); ++ } ++ ++ if (!tickEvent.scheduled()) { ++ if (_status == SwitchedOut) { ++ // increment stat ++ lastRunningCycle = curTick; ++ } else if (!activityRec.active()) { ++ lastRunningCycle = curTick; ++ timesIdled++; ++ } else { ++ tickEvent.schedule(curTick + cycles(1)); ++ } ++ } ++ ++#if !FULL_SYSTEM ++ updateThreadPriority(); ++#endif ++ +} + +template +void +FullO3CPU::init() +{ - if(!deferRegistration) - { - this->registerExecContexts(); ++ if (!deferRegistration) { ++ registerExecContexts(); ++ } + - // Need to do a copy of the xc->regs into the CPU's regfile so - // that it can start properly. ++ // Set inSyscall so that the CPU doesn't squash when initially ++ // setting up registers. ++ for (int i = 0; i < number_of_threads; ++i) ++ thread[i]->inSyscall = true; ++ ++ for (int tid=0; tid < number_of_threads; tid++) { +#if FULL_SYSTEM - ExecContext *src_xc = system->execContexts[0]; - TheISA::initCPU(src_xc, src_xc->readCpuId()); ++ ExecContext *src_xc = execContexts[tid]; +#else - ExecContext *src_xc = thread[0]->getProxy(); ++ ExecContext *src_xc = thread[tid]->getXCProxy(); +#endif - // First loop through the integer registers. - for (int i = 0; i < TheISA::NumIntRegs; ++i) - { - regFile.intRegFile[i] = src_xc->readIntReg(i); ++ // Threads start in the Suspended State ++ if (src_xc->status() != ExecContext::Suspended) { ++ continue; + } + - // Then loop through the floating point registers. - for (int i = 0; i < TheISA::NumFloatRegs; ++i) - { - regFile.floatRegFile.setRegBits(i, src_xc->readRegBits(i)) - } - /* - // Then loop through the misc registers. - regFile.miscRegs.fpcr = src_xc->regs.miscRegs.fpcr; - regFile.miscRegs.uniq = src_xc->regs.miscRegs.uniq; - regFile.miscRegs.lock_flag = src_xc->regs.miscRegs.lock_flag; - regFile.miscRegs.lock_addr = src_xc->regs.miscRegs.lock_addr; - */ - // Then finally set the PC and the next PC. - regFile.pc = src_xc->readPC(); - regFile.npc = src_xc->readNextPC(); ++#if FULL_SYSTEM ++ TheISA::initCPU(src_xc, src_xc->readCpuId()); ++#endif ++ } ++ ++ // Clear inSyscall. ++ for (int i = 0; i < number_of_threads; ++i) ++ thread[i]->inSyscall = false; ++ ++ // Initialize stages. ++ fetch.initStage(); ++ iew.initStage(); ++ rename.initStage(); ++ commit.initStage(); ++ ++ commit.setThreads(thread); ++} ++ ++template ++void ++FullO3CPU::insertThread(unsigned tid) ++{ ++ DPRINTF(FullCPU,"[tid:%i] Initializing thread data"); ++ // Will change now that the PC and thread state is internal to the CPU ++ // and not in the CPUExecContext. ++#if 0 ++#if FULL_SYSTEM ++ ExecContext *src_xc = system->execContexts[tid]; ++#else ++ CPUExecContext *src_xc = thread[tid]; ++#endif ++ ++ //Bind Int Regs to Rename Map ++ for (int ireg = 0; ireg < TheISA::NumIntRegs; ireg++) { ++ PhysRegIndex phys_reg = freeList.getIntReg(); ++ ++ renameMap[tid].setEntry(ireg,phys_reg); ++ scoreboard.setReg(phys_reg); ++ } ++ ++ //Bind Float Regs to Rename Map ++ for (int freg = 0; freg < TheISA::NumFloatRegs; freg++) { ++ PhysRegIndex phys_reg = freeList.getFloatReg(); ++ ++ renameMap[tid].setEntry(freg,phys_reg); ++ scoreboard.setReg(phys_reg); ++ } ++ ++ //Copy Thread Data Into RegFile ++ this->copyFromXC(tid); ++ ++ //Set PC/NPC ++ regFile.pc[tid] = src_xc->readPC(); ++ regFile.npc[tid] = src_xc->readNextPC(); ++ ++ src_xc->setStatus(ExecContext::Active); ++ ++ activateContext(tid,1); ++ ++ //Reset ROB/IQ/LSQ Entries ++ commit.rob->resetEntries(); ++ iew.resetEntries(); ++#endif ++} ++ ++template ++void ++FullO3CPU::removeThread(unsigned tid) ++{ ++ DPRINTF(FullCPU,"[tid:%i] Removing thread data"); ++#if 0 ++ //Unbind Int Regs from Rename Map ++ for (int ireg = 0; ireg < TheISA::NumIntRegs; ireg++) { ++ PhysRegIndex phys_reg = renameMap[tid].lookup(ireg); ++ ++ scoreboard.unsetReg(phys_reg); ++ freeList.addReg(phys_reg); ++ } ++ ++ //Unbind Float Regs from Rename Map ++ for (int freg = 0; freg < TheISA::NumFloatRegs; freg++) { ++ PhysRegIndex phys_reg = renameMap[tid].lookup(freg); ++ ++ scoreboard.unsetReg(phys_reg); ++ freeList.addReg(phys_reg); ++ } ++ ++ //Copy Thread Data From RegFile ++ /* Fix Me: ++ * Do we really need to do this if we are removing a thread ++ * in the sense that it's finished (exiting)? If the thread is just ++ * being suspended we might... ++ */ ++// this->copyToXC(tid); ++ ++ //Squash Throughout Pipeline ++ fetch.squash(0,tid); ++ decode.squash(tid); ++ rename.squash(tid); ++ ++ assert(iew.ldstQueue.getCount(tid) == 0); ++ ++ //Reset ROB/IQ/LSQ Entries ++ if (activeThreads.size() >= 1) { ++ commit.rob->resetEntries(); ++ iew.resetEntries(); ++ } ++#endif ++} ++ ++ ++template ++void ++FullO3CPU::activateWhenReady(int tid) ++{ ++ DPRINTF(FullCPU,"[tid:%i]: Checking if resources are available for incoming" ++ "(e.g. PhysRegs/ROB/IQ/LSQ) \n", ++ tid); ++ ++ bool ready = true; ++ ++ if (freeList.numFreeIntRegs() >= TheISA::NumIntRegs) { ++ DPRINTF(FullCPU,"[tid:%i] Suspending thread due to not enough " ++ "Phys. Int. Regs.\n", ++ tid); ++ ready = false; ++ } else if (freeList.numFreeFloatRegs() >= TheISA::NumFloatRegs) { ++ DPRINTF(FullCPU,"[tid:%i] Suspending thread due to not enough " ++ "Phys. Float. Regs.\n", ++ tid); ++ ready = false; ++ } else if (commit.rob->numFreeEntries() >= ++ commit.rob->entryAmount(activeThreads.size() + 1)) { ++ DPRINTF(FullCPU,"[tid:%i] Suspending thread due to not enough " ++ "ROB entries.\n", ++ tid); ++ ready = false; ++ } else if (iew.instQueue.numFreeEntries() >= ++ iew.instQueue.entryAmount(activeThreads.size() + 1)) { ++ DPRINTF(FullCPU,"[tid:%i] Suspending thread due to not enough " ++ "IQ entries.\n", ++ tid); ++ ready = false; ++ } else if (iew.ldstQueue.numFreeEntries() >= ++ iew.ldstQueue.entryAmount(activeThreads.size() + 1)) { ++ DPRINTF(FullCPU,"[tid:%i] Suspending thread due to not enough " ++ "LSQ entries.\n", ++ tid); ++ ready = false; ++ } ++ ++ if (ready) { ++ insertThread(tid); ++ ++ contextSwitch = false; ++ ++ cpuWaitList.remove(tid); ++ } else { ++ suspendContext(tid); ++ ++ //blocks fetch ++ contextSwitch = true; ++ ++ //do waitlist ++ cpuWaitList.push_back(tid); + } +} + +template +void - FullO3CPU::activateContext(int thread_num, int delay) ++FullO3CPU::activateContext(int tid, int delay) +{ + // Needs to set each stage to running as well. ++ list::iterator isActive = find( ++ activeThreads.begin(), activeThreads.end(), tid); ++ ++ if (isActive == activeThreads.end()) { ++ //May Need to Re-code this if the delay variable is the ++ //delay needed for thread to activate ++ DPRINTF(FullCPU, "Adding Thread %i to active threads list\n", ++ tid); ++ ++ activeThreads.push_back(tid); ++ } ++ ++ assert(_status == Idle || _status == SwitchedOut); + + scheduleTickEvent(delay); + ++ // Be sure to signal that there's some activity so the CPU doesn't ++ // deschedule itself. ++ activityRec.activity(); ++ fetch.wakeFromQuiesce(); ++ + _status = Running; +} + +template +void - FullO3CPU::suspendContext(int thread_num) ++FullO3CPU::suspendContext(int tid) ++{ ++ DPRINTF(FullCPU,"[tid: %i]: Suspended ...\n", tid); ++ unscheduleTickEvent(); ++ _status = Idle; ++/* ++ //Remove From Active List, if Active ++ list::iterator isActive = find( ++ activeThreads.begin(), activeThreads.end(), tid); ++ ++ if (isActive != activeThreads.end()) { ++ DPRINTF(FullCPU,"[tid:%i]: Removing from active threads list\n", ++ tid); ++ activeThreads.erase(isActive); ++ } ++*/ ++} ++ ++template ++void ++FullO3CPU::deallocateContext(int tid) +{ - panic("suspendContext unimplemented!"); ++ DPRINTF(FullCPU,"[tid:%i]: Deallocating ...", tid); ++/* ++ //Remove From Active List, if Active ++ list::iterator isActive = find( ++ activeThreads.begin(), activeThreads.end(), tid); ++ ++ if (isActive != activeThreads.end()) { ++ DPRINTF(FullCPU,"[tid:%i]: Removing from active threads list\n", ++ tid); ++ activeThreads.erase(isActive); ++ ++ removeThread(tid); ++ } ++*/ +} + +template +void - FullO3CPU::deallocateContext(int thread_num) ++FullO3CPU::haltContext(int tid) +{ - panic("deallocateContext unimplemented!"); ++ DPRINTF(FullCPU,"[tid:%i]: Halted ...", tid); ++/* ++ //Remove From Active List, if Active ++ list::iterator isActive = find( ++ activeThreads.begin(), activeThreads.end(), tid); ++ ++ if (isActive != activeThreads.end()) { ++ DPRINTF(FullCPU,"[tid:%i]: Removing from active threads list\n", ++ tid); ++ activeThreads.erase(isActive); ++ ++ removeThread(tid); ++ } ++*/ +} + +template +void - FullO3CPU::haltContext(int thread_num) ++FullO3CPU::switchOut(Sampler *_sampler) +{ - panic("haltContext unimplemented!"); ++ sampler = _sampler; ++ switchCount = 0; ++ fetch.switchOut(); ++ decode.switchOut(); ++ rename.switchOut(); ++ iew.switchOut(); ++ commit.switchOut(); ++ ++ // Wake the CPU and record activity so everything can drain out if ++ // the CPU is currently idle. ++ wakeCPU(); ++ activityRec.activity(); +} + +template +void - FullO3CPU::switchOut() ++FullO3CPU::signalSwitched() +{ - panic("FullO3CPU does not have a switch out function.\n"); ++ if (++switchCount == NumStages) { ++ fetch.doSwitchOut(); ++ rename.doSwitchOut(); ++ commit.doSwitchOut(); ++ instList.clear(); ++ while (!removeList.empty()) { ++ removeList.pop(); ++ } ++ ++ if (checker) ++ checker->switchOut(sampler); ++ ++ if (tickEvent.scheduled()) ++ tickEvent.squash(); ++ sampler->signalSwitched(); ++ _status = SwitchedOut; ++ } ++ assert(switchCount <= 5); +} + +template +void +FullO3CPU::takeOverFrom(BaseCPU *oldCPU) +{ ++ // Flush out any old data from the time buffers. ++ for (int i = 0; i < 10; ++i) { ++ timeBuffer.advance(); ++ fetchQueue.advance(); ++ decodeQueue.advance(); ++ renameQueue.advance(); ++ iewQueue.advance(); ++ } ++ ++ activityRec.reset(); ++ + BaseCPU::takeOverFrom(oldCPU); + ++ fetch.takeOverFrom(); ++ decode.takeOverFrom(); ++ rename.takeOverFrom(); ++ iew.takeOverFrom(); ++ commit.takeOverFrom(); ++ + assert(!tickEvent.scheduled()); + - // Set all status's to active, schedule the - // CPU's tick event. ++ // @todo: Figure out how to properly select the tid to put onto ++ // the active threads list. ++ int tid = 0; ++ ++ list::iterator isActive = find( ++ activeThreads.begin(), activeThreads.end(), tid); ++ ++ if (isActive == activeThreads.end()) { ++ //May Need to Re-code this if the delay variable is the delay ++ //needed for thread to activate ++ DPRINTF(FullCPU, "Adding Thread %i to active threads list\n", ++ tid); ++ ++ activeThreads.push_back(tid); ++ } ++ ++ // Set all statuses to active, schedule the CPU's tick event. ++ // @todo: Fix up statuses so this is handled properly + for (int i = 0; i < execContexts.size(); ++i) { + ExecContext *xc = execContexts[i]; + if (xc->status() == ExecContext::Active && _status != Running) { + _status = Running; + tickEvent.schedule(curTick); + } + } - } - - template - InstSeqNum - FullO3CPU::getAndIncrementInstSeq() - { - // Hopefully this works right. - return globalSeqNum++; ++ if (!tickEvent.scheduled()) ++ tickEvent.schedule(curTick); +} + +template +uint64_t +FullO3CPU::readIntReg(int reg_idx) +{ + return regFile.readIntReg(reg_idx); +} + +template +FloatReg +FullO3CPU::readFloatReg(int reg_idx, int width) +{ + return regFile.readFloatReg(reg_idx, width); +} + +template +FloatReg +FullO3CPU::readFloatReg(int reg_idx) +{ + return regFile.readFloatReg(reg_idx); +} + +template +FloatRegBits +FullO3CPU::readFloatRegBits(int reg_idx, int width) - { + return regFile.readFloatRegBits(reg_idx, width); +} + +template +FloatRegBits +FullO3CPU::readFloatRegBits(int reg_idx) +{ + return regFile.readFloatRegBits(reg_idx); +} + +template +void +FullO3CPU::setIntReg(int reg_idx, uint64_t val) +{ + regFile.setIntReg(reg_idx, val); +} + +template +void +FullO3CPU::setFloatReg(int reg_idx, FloatReg val, int width) +{ + regFile.setFloatReg(reg_idx, val, width); +} + +template +void +FullO3CPU::setFloatReg(int reg_idx, FloatReg val) +{ + regFile.setFloatReg(reg_idx, val); +} + +template +void +FullO3CPU::setFloatRegBits(int reg_idx, FloatRegBits val, int width) +{ + regFile.setFloatRegBits(reg_idx, val, width); +} + +template +void +FullO3CPU::setFloatRegBits(int reg_idx, FloatRegBits val) +{ + regFile.setFloatRegBits(reg_idx, val); +} + +template +uint64_t - FullO3CPU::readPC() ++FullO3CPU::readArchIntReg(int reg_idx, unsigned tid) ++{ ++ PhysRegIndex phys_reg = commitRenameMap[tid].lookup(reg_idx); ++ ++ return regFile.readIntReg(phys_reg); ++} ++ ++template ++float ++FullO3CPU::readArchFloatRegSingle(int reg_idx, unsigned tid) ++{ ++ int idx = reg_idx + TheISA::FP_Base_DepTag; ++ PhysRegIndex phys_reg = commitRenameMap[tid].lookup(idx); ++ ++ return regFile.readFloatRegSingle(phys_reg); ++} ++ ++template ++double ++FullO3CPU::readArchFloatRegDouble(int reg_idx, unsigned tid) ++{ ++ int idx = reg_idx + TheISA::FP_Base_DepTag; ++ PhysRegIndex phys_reg = commitRenameMap[tid].lookup(idx); ++ ++ return regFile.readFloatRegDouble(phys_reg); ++} ++ ++template ++uint64_t ++FullO3CPU::readArchFloatRegInt(int reg_idx, unsigned tid) +{ - return regFile.readPC(); ++ int idx = reg_idx + TheISA::FP_Base_DepTag; ++ PhysRegIndex phys_reg = commitRenameMap[tid].lookup(idx); ++ ++ return regFile.readFloatRegInt(phys_reg); +} + +template +void - FullO3CPU::setNextPC(uint64_t val) ++FullO3CPU::setArchIntReg(int reg_idx, uint64_t val, unsigned tid) +{ - regFile.setNextPC(val); ++ PhysRegIndex phys_reg = commitRenameMap[tid].lookup(reg_idx); ++ ++ regFile.setIntReg(phys_reg, val); +} + +template +void - FullO3CPU::setPC(Addr new_PC) ++FullO3CPU::setArchFloatRegSingle(int reg_idx, float val, unsigned tid) +{ - regFile.setPC(new_PC); ++ PhysRegIndex phys_reg = commitRenameMap[tid].lookup(reg_idx); ++ ++ regFile.setFloatRegSingle(phys_reg, val); +} + +template +void - FullO3CPU::addInst(DynInstPtr &inst) ++FullO3CPU::setArchFloatRegDouble(int reg_idx, double val, unsigned tid) +{ - instList.push_back(inst); ++ PhysRegIndex phys_reg = commitRenameMap[tid].lookup(reg_idx); ++ ++ regFile.setFloatRegDouble(phys_reg, val); +} + +template +void - FullO3CPU::instDone() ++FullO3CPU::setArchFloatRegInt(int reg_idx, uint64_t val, unsigned tid) +{ - // Keep an instruction count. - numInsts++; ++ PhysRegIndex phys_reg = commitRenameMap[tid].lookup(reg_idx); + - // Check for instruction-count-based events. - comInstEventQueue[0]->serviceEvents(numInsts); ++ regFile.setFloatRegInt(phys_reg, val); ++} ++ ++template ++uint64_t ++FullO3CPU::readPC(unsigned tid) ++{ ++ return commit.readPC(tid); +} + +template +void - FullO3CPU::removeBackInst(DynInstPtr &inst) ++FullO3CPU::setPC(Addr new_PC,unsigned tid) +{ - DynInstPtr inst_to_delete; ++ commit.setPC(new_PC, tid); ++} + - // Walk through the instruction list, removing any instructions - // that were inserted after the given instruction, inst. - while (instList.back() != inst) - { - assert(!instList.empty()); ++template ++uint64_t ++FullO3CPU::readNextPC(unsigned tid) ++{ ++ return commit.readNextPC(tid); ++} ++ ++template ++void ++FullO3CPU::setNextPC(uint64_t val,unsigned tid) ++{ ++ commit.setNextPC(val, tid); ++} ++ ++template ++typename FullO3CPU::ListIt ++FullO3CPU::addInst(DynInstPtr &inst) ++{ ++ instList.push_back(inst); + - // Obtain the pointer to the instruction. - inst_to_delete = instList.back(); ++ return --(instList.end()); ++} + - DPRINTF(FullCPU, "FullCPU: Removing instruction %i, PC %#x\n", - inst_to_delete->seqNum, inst_to_delete->readPC()); ++template ++void ++FullO3CPU::instDone(unsigned tid) ++{ ++ // Keep an instruction count. ++ thread[tid]->numInst++; ++ thread[tid]->numInsts++; ++ committedInsts[tid]++; ++ totalCommittedInsts++; + - // Remove the instruction from the list. - instList.pop_back(); ++ // Check for instruction-count-based events. ++ comInstEventQueue[tid]->serviceEvents(thread[tid]->numInst); ++} + - // Mark it as squashed. - inst_to_delete->setSquashed(); - } ++template ++void ++FullO3CPU::addToRemoveList(DynInstPtr &inst) ++{ ++ removeInstsThisCycle = true; ++ ++ removeList.push(inst->getInstListIt()); +} + +template +void +FullO3CPU::removeFrontInst(DynInstPtr &inst) +{ - DynInstPtr inst_to_remove; ++ DPRINTF(FullCPU, "FullCPU: Removing committed instruction [tid:%i] PC %#x " ++ "[sn:%lli]\n", ++ inst->threadNumber, inst->readPC(), inst->seqNum); + - // The front instruction should be the same one being asked to be removed. - assert(instList.front() == inst); ++ removeInstsThisCycle = true; + + // Remove the front instruction. - inst_to_remove = inst; - instList.pop_front(); - - DPRINTF(FullCPU, "FullCPU: Removing committed instruction %#x, PC %#x\n", - inst_to_remove, inst_to_remove->readPC()); ++ removeList.push(inst->getInstListIt()); +} + +template +void - FullO3CPU::removeInstsNotInROB() ++FullO3CPU::removeInstsNotInROB(unsigned tid) +{ - DPRINTF(FullCPU, "FullCPU: Deleting instructions from instruction " - "list.\n"); ++ DPRINTF(FullCPU, "FullCPU: Thread %i: Deleting instructions from instruction" ++ " list.\n", tid); ++ ++ ListIt end_it; ++ ++ bool rob_empty = false; ++ ++ if (instList.empty()) { ++ return; ++ } else if (rob.isEmpty(/*tid*/)) { ++ DPRINTF(FullCPU, "FullCPU: ROB is empty, squashing all insts.\n"); ++ end_it = instList.begin(); ++ rob_empty = true; ++ } else { ++ end_it = (rob.readTailInst(tid))->getInstListIt(); ++ DPRINTF(FullCPU, "FullCPU: ROB is not empty, squashing insts not in ROB.\n"); ++ } ++ ++ removeInstsThisCycle = true; ++ ++ ListIt inst_it = instList.end(); ++ ++ inst_it--; ++ ++ // Walk through the instruction list, removing any instructions ++ // that were inserted after the given instruction iterator, end_it. ++ while (inst_it != end_it) { ++ assert(!instList.empty()); + - DynInstPtr rob_tail = rob.readTailInst(); ++ squashInstIt(inst_it, tid); + - removeBackInst(rob_tail); ++ inst_it--; ++ } ++ ++ // If the ROB was empty, then we actually need to remove the first ++ // instruction as well. ++ if (rob_empty) { ++ squashInstIt(inst_it, tid); ++ } +} + +template +void - FullO3CPU::removeInstsUntil(const InstSeqNum &seq_num) ++FullO3CPU::removeInstsUntil(const InstSeqNum &seq_num, ++ unsigned tid) +{ ++ assert(!instList.empty()); ++ ++ removeInstsThisCycle = true; ++ ++ ListIt inst_iter = instList.end(); ++ ++ inst_iter--; ++ + DPRINTF(FullCPU, "FullCPU: Deleting instructions from instruction " - "list.\n"); ++ "list that are from [tid:%i] and above [sn:%lli] (end=%lli).\n", ++ tid, seq_num, (*inst_iter)->seqNum); + - DynInstPtr inst_to_delete; ++ while ((*inst_iter)->seqNum > seq_num) { + - while (instList.back()->seqNum > seq_num) { - assert(!instList.empty()); ++ bool break_loop = (inst_iter == instList.begin()); + - // Obtain the pointer to the instruction. - inst_to_delete = instList.back(); ++ squashInstIt(inst_iter, tid); + - DPRINTF(FullCPU, "FullCPU: Removing instruction %i, PC %#x\n", - inst_to_delete->seqNum, inst_to_delete->readPC()); ++ inst_iter--; + - // Remove the instruction from the list. - instList.back() = NULL; - instList.pop_back(); ++ if (break_loop) ++ break; ++ } ++} ++ ++template ++inline void ++FullO3CPU::squashInstIt(const ListIt &instIt, const unsigned &tid) ++{ ++ if ((*instIt)->threadNumber == tid) { ++ DPRINTF(FullCPU, "FullCPU: Squashing instruction, " ++ "[tid:%i] [sn:%lli] PC %#x\n", ++ (*instIt)->threadNumber, ++ (*instIt)->seqNum, ++ (*instIt)->readPC()); + + // Mark it as squashed. - inst_to_delete->setSquashed(); - } ++ (*instIt)->setSquashed(); + ++ // @todo: Formulate a consistent method for deleting ++ // instructions from the instruction list ++ // Remove the instruction from the list. ++ removeList.push(instIt); ++ } +} + ++template ++void ++FullO3CPU::cleanUpRemovedInsts() ++{ ++ while (!removeList.empty()) { ++ DPRINTF(FullCPU, "FullCPU: Removing instruction, " ++ "[tid:%i] [sn:%lli] PC %#x\n", ++ (*removeList.front())->threadNumber, ++ (*removeList.front())->seqNum, ++ (*removeList.front())->readPC()); ++ ++ instList.erase(removeList.front()); ++ ++ removeList.pop(); ++ } ++ ++ removeInstsThisCycle = false; ++} ++/* +template +void +FullO3CPU::removeAllInsts() +{ + instList.clear(); +} - ++*/ +template +void +FullO3CPU::dumpInsts() +{ + int num = 0; - typename list::iterator inst_list_it = instList.begin(); + - while (inst_list_it != instList.end()) - { - cprintf("Instruction:%i\nPC:%#x\nSN:%lli\nIssued:%i\nSquashed:%i\n\n", - num, (*inst_list_it)->readPC(), (*inst_list_it)->seqNum, - (*inst_list_it)->isIssued(), (*inst_list_it)->isSquashed()); ++ ListIt inst_list_it = instList.begin(); ++ ++ cprintf("Dumping Instruction List\n"); ++ ++ while (inst_list_it != instList.end()) { ++ cprintf("Instruction:%i\nPC:%#x\n[tid:%i]\n[sn:%lli]\nIssued:%i\n" ++ "Squashed:%i\n\n", ++ num, (*inst_list_it)->readPC(), (*inst_list_it)->threadNumber, ++ (*inst_list_it)->seqNum, (*inst_list_it)->isIssued(), ++ (*inst_list_it)->isSquashed()); + inst_list_it++; + ++num; + } +} - ++/* +template +void +FullO3CPU::wakeDependents(DynInstPtr &inst) +{ + iew.wakeDependents(inst); +} ++*/ ++template ++void ++FullO3CPU::wakeCPU() ++{ ++ if (activityRec.active() || tickEvent.scheduled()) { ++ DPRINTF(Activity, "CPU already running.\n"); ++ return; ++ } ++ ++ DPRINTF(Activity, "Waking up CPU\n"); ++ ++ idleCycles += (curTick - 1) - lastRunningCycle; ++ ++ tickEvent.schedule(curTick); ++} ++ ++template ++int ++FullO3CPU::getFreeTid() ++{ ++ for (int i=0; i < numThreads; i++) { ++ if (!tids[i]) { ++ tids[i] = true; ++ return i; ++ } ++ } ++ ++ return -1; ++} ++ ++template ++void ++FullO3CPU::doContextSwitch() ++{ ++ if (contextSwitch) { ++ ++ //ADD CODE TO DEACTIVE THREAD HERE (???) ++ ++ for (int tid=0; tid < cpuWaitList.size(); tid++) { ++ activateWhenReady(tid); ++ } ++ ++ if (cpuWaitList.size() == 0) ++ contextSwitch = true; ++ } ++} ++ ++template ++void ++FullO3CPU::updateThreadPriority() ++{ ++ if (activeThreads.size() > 1) ++ { ++ //DEFAULT TO ROUND ROBIN SCHEME ++ //e.g. Move highest priority to end of thread list ++ list::iterator list_begin = activeThreads.begin(); ++ list::iterator list_end = activeThreads.end(); ++ ++ unsigned high_thread = *list_begin; ++ ++ activeThreads.erase(list_begin); ++ ++ activeThreads.push_back(high_thread); ++ } ++} + +// Forward declaration of FullO3CPU. +template class FullO3CPU; diff --cc src/cpu/o3/cpu.hh index f7c80e8a1,000000000..bed95ad54 mode 100644,000000..100644 --- a/src/cpu/o3/cpu.hh +++ b/src/cpu/o3/cpu.hh @@@ -1,363 -1,0 +1,525 @@@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + - //Todo: Add in a lot of the functions that are ISA specific. Also define - //the functions that currently exist within the base cpu class. Define - //everything for the simobject stuff so it can be serialized and - //instantiated, add in debugging statements everywhere. Have CPU schedule - //itself properly. Threads! - // Avoid running stages and advancing queues if idle/stalled. - - #ifndef __CPU_O3_CPU_FULL_CPU_HH__ - #define __CPU_O3_CPU_FULL_CPU_HH__ ++#ifndef __CPU_O3_CPU_HH__ ++#define __CPU_O3_CPU_HH__ + +#include +#include ++#include ++#include +#include + +#include "base/statistics.hh" +#include "base/timebuf.hh" +#include "config/full_system.hh" ++#include "cpu/activity.hh" +#include "cpu/base.hh" +#include "cpu/cpu_exec_context.hh" +#include "cpu/o3/comm.hh" +#include "cpu/o3/cpu_policy.hh" ++#include "cpu/o3/scoreboard.hh" ++#include "cpu/o3/thread_state.hh" +#include "sim/process.hh" + ++template ++class Checker; +class ExecContext; - class FunctionalMemory; ++class MemInterface; +class Process; + +class BaseFullCPU : public BaseCPU +{ + //Stuff that's pretty ISA independent will go here. + public: + typedef BaseCPU::Params Params; + - #if FULL_SYSTEM - BaseFullCPU(Params ¶ms); - #else - BaseFullCPU(Params ¶ms); - #endif // FULL_SYSTEM ++ BaseFullCPU(Params *params); ++ ++ void regStats(); + + protected: + int cpu_id; +}; + +template +class FullO3CPU : public BaseFullCPU +{ + public: - //Put typedefs from the Impl here. ++ // Typedefs from the Impl here. + typedef typename Impl::CPUPol CPUPolicy; + typedef typename Impl::Params Params; + typedef typename Impl::DynInstPtr DynInstPtr; + ++ typedef O3ThreadState Thread; ++ ++ typedef typename std::list::iterator ListIt; ++ + public: + enum Status { + Running, + Idle, + Halted, - Blocked // ? ++ Blocked, ++ SwitchedOut + }; + ++ /** Overall CPU status. */ + Status _status; + + private: + class TickEvent : public Event + { + private: ++ /** Pointer to the CPU. */ + FullO3CPU *cpu; + + public: ++ /** Constructs a tick event. */ + TickEvent(FullO3CPU *c); ++ ++ /** Processes a tick event, calling tick() on the CPU. */ + void process(); ++ /** Returns the description of the tick event. */ + const char *description(); + }; + ++ /** The tick event used for scheduling CPU ticks. */ + TickEvent tickEvent; + - /// Schedule tick event, regardless of its current state. ++ /** Schedule tick event, regardless of its current state. */ + void scheduleTickEvent(int delay) + { + if (tickEvent.squashed()) - tickEvent.reschedule(curTick + delay); ++ tickEvent.reschedule(curTick + cycles(delay)); + else if (!tickEvent.scheduled()) - tickEvent.schedule(curTick + delay); ++ tickEvent.schedule(curTick + cycles(delay)); + } + - /// Unschedule tick event, regardless of its current state. ++ /** Unschedule tick event, regardless of its current state. */ + void unscheduleTickEvent() + { + if (tickEvent.scheduled()) + tickEvent.squash(); + } + + public: - FullO3CPU(Params ¶ms); ++ /** Constructs a CPU with the given parameters. */ ++ FullO3CPU(Params *params); ++ /** Destructor. */ + ~FullO3CPU(); + ++ /** Registers statistics. */ + void fullCPURegStats(); + ++ /** Ticks CPU, calling tick() on each stage, and checking the overall ++ * activity to see if the CPU should deschedule itself. ++ */ + void tick(); + ++ /** Initialize the CPU */ + void init(); + - void activateContext(int thread_num, int delay); - void suspendContext(int thread_num); - void deallocateContext(int thread_num); - void haltContext(int thread_num); ++ /** Setup CPU to insert a thread's context */ ++ void insertThread(unsigned tid); ++ ++ /** Remove all of a thread's context from CPU */ ++ void removeThread(unsigned tid); ++ ++ /** Count the Total Instructions Committed in the CPU. */ ++ virtual Counter totalInstructions() const ++ { ++ Counter total(0); ++ ++ for (int i=0; i < thread.size(); i++) ++ total += thread[i]->numInst; ++ ++ return total; ++ } ++ ++ /** Add Thread to Active Threads List. */ ++ void activateContext(int tid, int delay); ++ ++ /** Remove Thread from Active Threads List */ ++ void suspendContext(int tid); ++ ++ /** Remove Thread from Active Threads List && ++ * Remove Thread Context from CPU. ++ */ ++ void deallocateContext(int tid); ++ ++ /** Remove Thread from Active Threads List && ++ * Remove Thread Context from CPU. ++ */ ++ void haltContext(int tid); ++ ++ /** Activate a Thread When CPU Resources are Available. */ ++ void activateWhenReady(int tid); + - void switchOut(); ++ /** Add or Remove a Thread Context in the CPU. */ ++ void doContextSwitch(); ++ ++ /** Update The Order In Which We Process Threads. */ ++ void updateThreadPriority(); ++ ++ /** Executes a syscall on this cycle. ++ * --------------------------------------- ++ * Note: this is a virtual function. CPU-Specific ++ * functionality defined in derived classes ++ */ ++ virtual void syscall(int tid) { panic("Unimplemented!"); } ++ ++ /** Check if there are any system calls pending. */ ++ void checkSyscalls(); ++ ++ /** Switches out this CPU. ++ */ ++ void switchOut(Sampler *sampler); ++ ++ void signalSwitched(); ++ ++ /** Takes over from another CPU. ++ */ + void takeOverFrom(BaseCPU *oldCPU); + + /** Get the current instruction sequence number, and increment it. */ - InstSeqNum getAndIncrementInstSeq(); ++ InstSeqNum getAndIncrementInstSeq() ++ { return globalSeqNum++; } + +#if FULL_SYSTEM + /** Check if this address is a valid instruction address. */ + bool validInstAddr(Addr addr) { return true; } + + /** Check if this address is a valid data address. */ + bool validDataAddr(Addr addr) { return true; } + + /** Get instruction asid. */ - int getInstAsid() - { return regFile.miscRegs.getInstAsid(); } ++ int getInstAsid(unsigned tid) ++ { return regFile.miscRegs[tid].getInstAsid(); } + + /** Get data asid. */ - int getDataAsid() - { return regFile.miscRegs.getDataAsid(); } ++ int getDataAsid(unsigned tid) ++ { return regFile.miscRegs[tid].getDataAsid(); } +#else - bool validInstAddr(Addr addr) - { return thread[0]->validInstAddr(addr); } ++ /** Check if this address is a valid instruction address. */ ++ bool validInstAddr(Addr addr,unsigned tid) ++ { return thread[tid]->validInstAddr(addr); } + - bool validDataAddr(Addr addr) - { return thread[0]->validDataAddr(addr); } ++ /** Check if this address is a valid data address. */ ++ bool validDataAddr(Addr addr,unsigned tid) ++ { return thread[tid]->validDataAddr(addr); } + - int getInstAsid() { return thread[0]->getInstAsid(); } - int getDataAsid() { return thread[0]->getDataAsid(); } ++ /** Get instruction asid. */ ++ int getInstAsid(unsigned tid) ++ { return thread[tid]->asid; } ++ ++ /** Get data asid. */ ++ int getDataAsid(unsigned tid) ++ { return thread[tid]->asid; } + +#endif + + // + // New accessors for new decoder. + // + uint64_t readIntReg(int reg_idx); + + FloatReg readFloatReg(int reg_idx); + + FloatReg readFloatReg(int reg_idx, int width); + + FloatRegBits readFloatRegBits(int reg_idx); + + FloatRegBits readFloatRegBits(int reg_idx, int width); + + void setIntReg(int reg_idx, uint64_t val); + + void setFloatReg(int reg_idx, FloatReg val, int width); + + void setFloatReg(int reg_idx, FloatReg val, int width); + + void setFloatRegBits(int reg_idx, FloatRegBits val); + + void setFloatRegBits(int reg_idx, FloatRegBits val); + - uint64_t readPC(); ++ uint64_t readArchIntReg(int reg_idx, unsigned tid); ++ ++ float readArchFloatRegSingle(int reg_idx, unsigned tid); ++ ++ double readArchFloatRegDouble(int reg_idx, unsigned tid); ++ ++ uint64_t readArchFloatRegInt(int reg_idx, unsigned tid); ++ ++ void setArchIntReg(int reg_idx, uint64_t val, unsigned tid); ++ ++ void setArchFloatRegSingle(int reg_idx, float val, unsigned tid); ++ ++ void setArchFloatRegDouble(int reg_idx, double val, unsigned tid); ++ ++ void setArchFloatRegInt(int reg_idx, uint64_t val, unsigned tid); + - void setNextPC(uint64_t val); ++ uint64_t readPC(unsigned tid); + - void setPC(Addr new_PC); ++ void setPC(Addr new_PC,unsigned tid); ++ ++ uint64_t readNextPC(unsigned tid); ++ ++ void setNextPC(uint64_t val,unsigned tid); + + /** Function to add instruction onto the head of the list of the + * instructions. Used when new instructions are fetched. + */ - void addInst(DynInstPtr &inst); ++ ListIt addInst(DynInstPtr &inst); + + /** Function to tell the CPU that an instruction has completed. */ - void instDone(); - - /** Remove all instructions in back of the given instruction, but leave - * that instruction in the list. This is useful in a squash, when there - * are instructions in this list that don't exist in structures such as - * the ROB. The instruction doesn't have to be the last instruction in - * the list, but will be once this function completes. - * @todo: Remove only up until that inst? Squashed inst is most likely - * valid. - */ - void removeBackInst(DynInstPtr &inst); - - /** Remove an instruction from the front of the list. It is expected - * that there are no instructions in front of it (that is, none are older - * than the instruction being removed). Used when retiring instructions. - * @todo: Remove the argument to this function, and just have it remove - * last instruction once it's verified that commit has the same ordering - * as the instruction list. ++ void instDone(unsigned tid); ++ ++ /** Add Instructions to the CPU Remove List*/ ++ void addToRemoveList(DynInstPtr &inst); ++ ++ /** Remove an instruction from the front end of the list. There's ++ * no restriction on location of the instruction. + */ + void removeFrontInst(DynInstPtr &inst); + + /** Remove all instructions that are not currently in the ROB. */ - void removeInstsNotInROB(); ++ void removeInstsNotInROB(unsigned tid); + + /** Remove all instructions younger than the given sequence number. */ - void removeInstsUntil(const InstSeqNum &seq_num); ++ void removeInstsUntil(const InstSeqNum &seq_num,unsigned tid); ++ ++ inline void squashInstIt(const ListIt &instIt, const unsigned &tid); ++ ++ void cleanUpRemovedInsts(); + + /** Remove all instructions from the list. */ - void removeAllInsts(); ++// void removeAllInsts(); + + void dumpInsts(); + + /** Basically a wrapper function so that instructions executed at - * commit can tell the instruction queue that they have completed. - * Eventually this hack should be removed. ++ * commit can tell the instruction queue that they have ++ * completed. Eventually this hack should be removed. + */ - void wakeDependents(DynInstPtr &inst); ++// void wakeDependents(DynInstPtr &inst); + + public: + /** List of all the instructions in flight. */ - list instList; ++ std::list instList; ++ ++ /** List of all the instructions that will be removed at the end of this ++ * cycle. ++ */ ++ std::queue removeList; ++ ++#ifdef DEBUG ++ std::set snList; ++#endif ++ ++ /** Records if instructions need to be removed this cycle due to ++ * being retired or squashed. ++ */ ++ bool removeInstsThisCycle; + - //not sure these should be private. + protected: + /** The fetch stage. */ + typename CPUPolicy::Fetch fetch; + - /** The fetch stage's status. */ - typename CPUPolicy::Fetch::Status fetchStatus; - + /** The decode stage. */ + typename CPUPolicy::Decode decode; + - /** The decode stage's status. */ - typename CPUPolicy::Decode::Status decodeStatus; - + /** The dispatch stage. */ + typename CPUPolicy::Rename rename; + - /** The dispatch stage's status. */ - typename CPUPolicy::Rename::Status renameStatus; - + /** The issue/execute/writeback stages. */ + typename CPUPolicy::IEW iew; + - /** The issue/execute/writeback stage's status. */ - typename CPUPolicy::IEW::Status iewStatus; - + /** The commit stage. */ + typename CPUPolicy::Commit commit; + - /** The fetch stage's status. */ - typename CPUPolicy::Commit::Status commitStatus; - - //Might want to just pass these objects in to the constructors of the - //appropriate stage. regFile is in iew, freeList in dispatch, renameMap - //in dispatch, and the rob in commit. + /** The register file. */ + typename CPUPolicy::RegFile regFile; + + /** The free list. */ + typename CPUPolicy::FreeList freeList; + + /** The rename map. */ - typename CPUPolicy::RenameMap renameMap; ++ typename CPUPolicy::RenameMap renameMap[Impl::MaxThreads]; ++ ++ /** The commit rename map. */ ++ typename CPUPolicy::RenameMap commitRenameMap[Impl::MaxThreads]; + + /** The re-order buffer. */ + typename CPUPolicy::ROB rob; + ++ /** Active Threads List */ ++ std::list activeThreads; ++ ++ /** Integer Register Scoreboard */ ++ Scoreboard scoreboard; ++ + public: ++ /** Enum to give each stage a specific index, so when calling ++ * activateStage() or deactivateStage(), they can specify which stage ++ * is being activated/deactivated. ++ */ ++ enum StageIdx { ++ FetchIdx, ++ DecodeIdx, ++ RenameIdx, ++ IEWIdx, ++ CommitIdx, ++ NumStages }; ++ + /** Typedefs from the Impl to get the structs that each of the + * time buffers should use. + */ + typedef typename CPUPolicy::TimeStruct TimeStruct; + + typedef typename CPUPolicy::FetchStruct FetchStruct; + + typedef typename CPUPolicy::DecodeStruct DecodeStruct; + + typedef typename CPUPolicy::RenameStruct RenameStruct; + + typedef typename CPUPolicy::IEWStruct IEWStruct; + + /** The main time buffer to do backwards communication. */ + TimeBuffer timeBuffer; + + /** The fetch stage's instruction queue. */ + TimeBuffer fetchQueue; + + /** The decode stage's instruction queue. */ + TimeBuffer decodeQueue; + + /** The rename stage's instruction queue. */ + TimeBuffer renameQueue; + + /** The IEW stage's instruction queue. */ + TimeBuffer iewQueue; + + public: - /** The temporary exec context to support older accessors. */ - CPUExecContext *cpuXC; ++ ActivityRecorder activityRec; + - /** Temporary function to get pointer to exec context. */ - ExecContext *xcBase() - { - return thread[0]->getProxy(); - } ++ void activityThisCycle() { activityRec.activity(); } ++ ++ void activateStage(const StageIdx idx) ++ { activityRec.activateStage(idx); } + - CPUExecContext *cpuXCBase() ++ void deactivateStage(const StageIdx idx) ++ { activityRec.deactivateStage(idx); } ++ ++ /** Wakes the CPU, rescheduling the CPU if it's not already active. */ ++ void wakeCPU(); ++ ++ /** Gets a free thread id. Use if thread ids change across system. */ ++ int getFreeTid(); ++ ++ public: ++ /** Temporary function to get pointer to exec context. */ ++ ExecContext *xcBase(unsigned tid) + { - return thread[0]; ++ return thread[tid]->getXCProxy(); + } + ++ /** The global sequence number counter. */ + InstSeqNum globalSeqNum; + ++ Checker *checker; ++ +#if FULL_SYSTEM ++ /** Pointer to the system. */ + System *system; + ++ /** Pointer to the memory controller. */ + MemoryController *memCtrl; ++ /** Pointer to physical memory. */ + PhysicalMemory *physmem; - - AlphaITB *itb; - AlphaDTB *dtb; - - // SWContext *swCtx; +#endif - std::vector thread; + ++ /** Pointer to memory. */ + FunctionalMemory *mem; + ++ Sampler *sampler; ++ ++ int switchCount; ++ ++ // List of all ExecContexts. ++ std::vector thread; ++ ++#if 0 ++ /** Page table pointer. */ ++ PageTable *pTable; ++#endif ++ ++ /** Pointer to the icache interface. */ + MemInterface *icacheInterface; ++ /** Pointer to the dcache interface. */ + MemInterface *dcacheInterface; + ++ /** Whether or not the CPU should defer its registration. */ + bool deferRegistration; + - Counter numInsts; - - Counter funcExeInst; ++ /** Is there a context switch pending? */ ++ bool contextSwitch; ++ ++ /** Threads Scheduled to Enter CPU */ ++ std::list cpuWaitList; ++ ++ /** The cycle that the CPU was last running, used for statistics. */ ++ Tick lastRunningCycle; ++ ++ /** Number of Threads CPU can process */ ++ unsigned numThreads; ++ ++ /** Mapping for system thread id to cpu id */ ++ std::map threadMap; ++ ++ /** Available thread ids in the cpu*/ ++ std::vector tids; ++ ++ /** Stat for total number of times the CPU is descheduled. */ ++ Stats::Scalar<> timesIdled; ++ /** Stat for total number of cycles the CPU spends descheduled. */ ++ Stats::Scalar<> idleCycles; ++ /** Stat for the number of committed instructions per thread. */ ++ Stats::Vector<> committedInsts; ++ /** Stat for the total number of committed instructions. */ ++ Stats::Scalar<> totalCommittedInsts; ++ /** Stat for the CPI per thread. */ ++ Stats::Formula cpi; ++ /** Stat for the total CPI. */ ++ Stats::Formula totalCpi; ++ /** Stat for the IPC per thread. */ ++ Stats::Formula ipc; ++ /** Stat for the total IPC. */ ++ Stats::Formula totalIpc; +}; + - #endif ++#endif // __CPU_O3_CPU_HH__ diff --cc src/cpu/o3/cpu_policy.hh index 41f06f81b,000000000..52227013e mode 100644,000000..100644 --- a/src/cpu/o3/cpu_policy.hh +++ b/src/cpu/o3/cpu_policy.hh @@@ -1,88 -1,0 +1,91 @@@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + - #ifndef __CPU_O3_CPU_CPU_POLICY_HH__ - #define __CPU_O3_CPU_CPU_POLICY_HH__ ++#ifndef __CPU_O3_CPU_POLICY_HH__ ++#define __CPU_O3_CPU_POLICY_HH__ + +#include "cpu/o3/bpred_unit.hh" +#include "cpu/o3/free_list.hh" +#include "cpu/o3/inst_queue.hh" - #include "cpu/o3/ldstq.hh" ++#include "cpu/o3/lsq.hh" ++#include "cpu/o3/lsq_unit.hh" +#include "cpu/o3/mem_dep_unit.hh" +#include "cpu/o3/regfile.hh" +#include "cpu/o3/rename_map.hh" +#include "cpu/o3/rob.hh" +#include "cpu/o3/store_set.hh" + +#include "cpu/o3/commit.hh" +#include "cpu/o3/decode.hh" +#include "cpu/o3/fetch.hh" +#include "cpu/o3/iew.hh" +#include "cpu/o3/rename.hh" + +#include "cpu/o3/comm.hh" + +template +struct SimpleCPUPolicy +{ + typedef TwobitBPredUnit BPredUnit; + typedef PhysRegFile RegFile; + typedef SimpleFreeList FreeList; + typedef SimpleRenameMap RenameMap; + typedef ROB ROB; + typedef InstructionQueue IQ; + typedef MemDepUnit MemDepUnit; - typedef LDSTQ LDSTQ; ++ typedef LSQ LSQ; ++ typedef LSQUnit LSQUnit; + - typedef SimpleFetch Fetch; - typedef SimpleDecode Decode; - typedef SimpleRename Rename; - typedef SimpleIEW IEW; - typedef SimpleCommit Commit; ++ ++ typedef DefaultFetch Fetch; ++ typedef DefaultDecode Decode; ++ typedef DefaultRename Rename; ++ typedef DefaultIEW IEW; ++ typedef DefaultCommit Commit; + + /** The struct for communication between fetch and decode. */ - typedef SimpleFetchSimpleDecode FetchStruct; ++ typedef DefaultFetchDefaultDecode FetchStruct; + + /** The struct for communication between decode and rename. */ - typedef SimpleDecodeSimpleRename DecodeStruct; ++ typedef DefaultDecodeDefaultRename DecodeStruct; + + /** The struct for communication between rename and IEW. */ - typedef SimpleRenameSimpleIEW RenameStruct; ++ typedef DefaultRenameDefaultIEW RenameStruct; + + /** The struct for communication between IEW and commit. */ - typedef SimpleIEWSimpleCommit IEWStruct; ++ typedef DefaultIEWDefaultCommit IEWStruct; + + /** The struct for communication within the IEW stage. */ + typedef IssueStruct IssueStruct; + + /** The struct for all backwards communication. */ - typedef TimeBufStruct TimeStruct; ++ typedef TimeBufStruct TimeStruct; + +}; + - #endif //__CPU_O3_CPU_CPU_POLICY_HH__ ++#endif //__CPU_O3_CPU_POLICY_HH__ diff --cc src/cpu/o3/decode.cc index 290648318,000000000..b14fbb7a3 mode 100644,000000..100644 --- a/src/cpu/o3/decode.cc +++ b/src/cpu/o3/decode.cc @@@ -1,33 -1,0 +1,33 @@@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cpu/o3/alpha_dyn_inst.hh" +#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/decode_impl.hh" + - template class SimpleDecode; ++template class DefaultDecode; diff --cc src/cpu/o3/decode.hh index 5b9a0f822,000000000..3035b3387 mode 100644,000000..100644 --- a/src/cpu/o3/decode.hh +++ b/src/cpu/o3/decode.hh @@@ -1,165 -1,0 +1,292 @@@ +/* - * Copyright (c) 2004-2005 The Regents of The University of Michigan ++ * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + - #ifndef __CPU_O3_CPU_SIMPLE_DECODE_HH__ - #define __CPU_O3_CPU_SIMPLE_DECODE_HH__ ++#ifndef __CPU_O3_DECODE_HH__ ++#define __CPU_O3_DECODE_HH__ + +#include + +#include "base/statistics.hh" +#include "base/timebuf.hh" + ++/** ++ * DefaultDecode class handles both single threaded and SMT ++ * decode. Its width is specified by the parameters; each cycles it ++ * tries to decode that many instructions. Because instructions are ++ * actually decoded when the StaticInst is created, this stage does ++ * not do much other than check any PC-relative branches. ++ */ +template - class SimpleDecode ++class DefaultDecode +{ + private: + // Typedefs from the Impl. + typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::DynInstPtr DynInstPtr; + typedef typename Impl::Params Params; + typedef typename Impl::CPUPol CPUPol; + + // Typedefs from the CPU policy. + typedef typename CPUPol::FetchStruct FetchStruct; + typedef typename CPUPol::DecodeStruct DecodeStruct; + typedef typename CPUPol::TimeStruct TimeStruct; + + public: - // The only time decode will become blocked is if dispatch becomes - // blocked, which means IQ or ROB is probably full. - enum Status { ++ /** Overall decode stage status. Used to determine if the CPU can ++ * deschedule itself due to a lack of activity. ++ */ ++ enum DecodeStatus { ++ Active, ++ Inactive ++ }; ++ ++ /** Individual thread status. */ ++ enum ThreadStatus { + Running, + Idle, ++ StartSquash, + Squashing, + Blocked, + Unblocking + }; + + private: - // May eventually need statuses on a per thread basis. - Status _status; ++ /** Decode status. */ ++ DecodeStatus _status; ++ ++ /** Per-thread status. */ ++ ThreadStatus decodeStatus[Impl::MaxThreads]; + + public: - SimpleDecode(Params ¶ms); ++ /** DefaultDecode constructor. */ ++ DefaultDecode(Params *params); + ++ /** Returns the name of decode. */ ++ std::string name() const; ++ ++ /** Registers statistics. */ + void regStats(); + ++ /** Sets CPU pointer. */ + void setCPU(FullCPU *cpu_ptr); + ++ /** Sets the main backwards communication time buffer pointer. */ + void setTimeBuffer(TimeBuffer *tb_ptr); + ++ /** Sets pointer to time buffer used to communicate to the next stage. */ + void setDecodeQueue(TimeBuffer *dq_ptr); + ++ /** Sets pointer to time buffer coming from fetch. */ + void setFetchQueue(TimeBuffer *fq_ptr); + ++ /** Sets pointer to list of active threads. */ ++ void setActiveThreads(std::list *at_ptr); ++ ++ void switchOut(); ++ ++ void takeOverFrom(); ++ /** Ticks decode, processing all input signals and decoding as many ++ * instructions as possible. ++ */ + void tick(); + - void decode(); ++ /** Determines what to do based on decode's current status. ++ * @param status_change decode() sets this variable if there was a status ++ * change (ie switching from from blocking to unblocking). ++ * @param tid Thread id to decode instructions from. ++ */ ++ void decode(bool &status_change, unsigned tid); ++ ++ /** Processes instructions from fetch and passes them on to rename. ++ * Decoding of instructions actually happens when they are created in ++ * fetch, so this function mostly checks if PC-relative branches are ++ * correct. ++ */ ++ void decodeInsts(unsigned tid); + + private: ++ /** Inserts a thread's instructions into the skid buffer, to be decoded ++ * once decode unblocks. ++ */ ++ void skidInsert(unsigned tid); ++ ++ /** Returns if all of the skid buffers are empty. */ ++ bool skidsEmpty(); ++ ++ /** Updates overall decode status based on all of the threads' statuses. */ ++ void updateStatus(); ++ ++ /** Separates instructions from fetch into individual lists of instructions ++ * sorted by thread. ++ */ ++ void sortInsts(); ++ ++ /** Reads all stall signals from the backwards communication timebuffer. */ ++ void readStallSignals(unsigned tid); ++ ++ /** Checks all input signals and updates decode's status appropriately. */ ++ bool checkSignalsAndUpdate(unsigned tid); ++ ++ /** Checks all stall signals, and returns if any are true. */ ++ bool checkStall(unsigned tid) const; ++ ++ /** Returns if there any instructions from fetch on this cycle. */ + inline bool fetchInstsValid(); + - void block(); ++ /** Switches decode to blocking, and signals back that decode has ++ * become blocked. ++ * @return Returns true if there is a status change. ++ */ ++ bool block(unsigned tid); + - inline void unblock(); ++ /** Switches decode to unblocking if the skid buffer is empty, and ++ * signals back that decode has unblocked. ++ * @return Returns true if there is a status change. ++ */ ++ bool unblock(unsigned tid); + - void squash(DynInstPtr &inst); ++ /** Squashes if there is a PC-relative branch that was predicted ++ * incorrectly. Sends squash information back to fetch. ++ */ ++ void squash(DynInstPtr &inst, unsigned tid); + + public: - // Might want to make squash a friend function. - void squash(); ++ /** Squashes due to commit signalling a squash. Changes status to ++ * squashing and clears block/unblock signals as needed. ++ */ ++ unsigned squash(unsigned tid); + + private: + // Interfaces to objects outside of decode. + /** CPU interface. */ + FullCPU *cpu; + + /** Time buffer interface. */ + TimeBuffer *timeBuffer; + + /** Wire to get rename's output from backwards time buffer. */ + typename TimeBuffer::wire fromRename; + + /** Wire to get iew's information from backwards time buffer. */ + typename TimeBuffer::wire fromIEW; + + /** Wire to get commit's information from backwards time buffer. */ + typename TimeBuffer::wire fromCommit; + + /** Wire to write information heading to previous stages. */ + // Might not be the best name as not only fetch will read it. + typename TimeBuffer::wire toFetch; + + /** Decode instruction queue. */ + TimeBuffer *decodeQueue; + + /** Wire used to write any information heading to rename. */ + typename TimeBuffer::wire toRename; + + /** Fetch instruction queue interface. */ + TimeBuffer *fetchQueue; + + /** Wire to get fetch's output from fetch queue. */ + typename TimeBuffer::wire fromFetch; + ++ /** Queue of all instructions coming from fetch this cycle. */ ++ std::queue insts[Impl::MaxThreads]; ++ + /** Skid buffer between fetch and decode. */ - std::queue skidBuffer; ++ std::queue skidBuffer[Impl::MaxThreads]; ++ ++ /** Variable that tracks if decode has written to the time buffer this ++ * cycle. Used to tell CPU if there is activity this cycle. ++ */ ++ bool wroteToTimeBuffer; ++ ++ /** Source of possible stalls. */ ++ struct Stalls { ++ bool rename; ++ bool iew; ++ bool commit; ++ }; ++ ++ /** Tracks which stages are telling decode to stall. */ ++ Stalls stalls[Impl::MaxThreads]; + - //Consider making these unsigned to avoid any confusion. + /** Rename to decode delay, in ticks. */ + unsigned renameToDecodeDelay; + + /** IEW to decode delay, in ticks. */ + unsigned iewToDecodeDelay; + + /** Commit to decode delay, in ticks. */ + unsigned commitToDecodeDelay; + + /** Fetch to decode delay, in ticks. */ + unsigned fetchToDecodeDelay; + + /** The width of decode, in instructions. */ + unsigned decodeWidth; + - /** The instruction that decode is currently on. It needs to have - * persistent state so that when a stall occurs in the middle of a - * group of instructions, it can restart at the proper instruction. - */ - unsigned numInst; ++ /** Index of instructions being sent to rename. */ ++ unsigned toRenameIndex; ++ ++ /** number of Active Threads*/ ++ unsigned numThreads; + ++ /** List of active thread ids */ ++ std::list *activeThreads; ++ ++ /** Number of branches in flight. */ ++ unsigned branchCount[Impl::MaxThreads]; ++ ++ /** Maximum size of the skid buffer. */ ++ unsigned skidBufferMax; ++ ++ /** Stat for total number of idle cycles. */ + Stats::Scalar<> decodeIdleCycles; ++ /** Stat for total number of blocked cycles. */ + Stats::Scalar<> decodeBlockedCycles; ++ /** Stat for total number of normal running cycles. */ ++ Stats::Scalar<> decodeRunCycles; ++ /** Stat for total number of unblocking cycles. */ + Stats::Scalar<> decodeUnblockCycles; ++ /** Stat for total number of squashing cycles. */ + Stats::Scalar<> decodeSquashCycles; ++ /** Stat for number of times a branch is resolved at decode. */ ++ Stats::Scalar<> decodeBranchResolved; ++ /** Stat for number of times a branch mispredict is detected. */ + Stats::Scalar<> decodeBranchMispred; ++ /** Stat for number of times decode detected a non-control instruction ++ * incorrectly predicted as a branch. ++ */ + Stats::Scalar<> decodeControlMispred; ++ /** Stat for total number of decoded instructions. */ + Stats::Scalar<> decodeDecodedInsts; ++ /** Stat for total number of squashed instructions. */ + Stats::Scalar<> decodeSquashedInsts; +}; + - #endif // __CPU_O3_CPU_SIMPLE_DECODE_HH__ ++#endif // __CPU_O3_DECODE_HH__ diff --cc src/cpu/o3/decode_impl.hh index 463f0ddac,000000000..2ed7ec6fc mode 100644,000000..100644 --- a/src/cpu/o3/decode_impl.hh +++ b/src/cpu/o3/decode_impl.hh @@@ -1,425 -1,0 +1,741 @@@ +/* - * Copyright (c) 2004-2005 The Regents of The University of Michigan ++ * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cpu/o3/decode.hh" + ++using namespace std; ++ +template - SimpleDecode::SimpleDecode(Params ¶ms) - : renameToDecodeDelay(params.renameToDecodeDelay), - iewToDecodeDelay(params.iewToDecodeDelay), - commitToDecodeDelay(params.commitToDecodeDelay), - fetchToDecodeDelay(params.fetchToDecodeDelay), - decodeWidth(params.decodeWidth), - numInst(0) ++DefaultDecode::DefaultDecode(Params *params) ++ : renameToDecodeDelay(params->renameToDecodeDelay), ++ iewToDecodeDelay(params->iewToDecodeDelay), ++ commitToDecodeDelay(params->commitToDecodeDelay), ++ fetchToDecodeDelay(params->fetchToDecodeDelay), ++ decodeWidth(params->decodeWidth), ++ numThreads(params->numberOfThreads) +{ - DPRINTF(Decode, "Decode: decodeWidth=%i.\n", decodeWidth); - _status = Idle; ++ _status = Inactive; ++ ++ for (int i = 0; i < numThreads; ++i) { ++ decodeStatus[i] = Idle; ++ ++ stalls[i].rename = false; ++ stalls[i].iew = false; ++ stalls[i].commit = false; ++ } ++ ++ // @todo: Make into a parameter ++ skidBufferMax = (fetchToDecodeDelay * params->fetchWidth) + decodeWidth; ++} ++ ++template ++std::string ++DefaultDecode::name() const ++{ ++ return cpu->name() + ".decode"; +} + +template +void - SimpleDecode::regStats() ++DefaultDecode::regStats() +{ + decodeIdleCycles - .name(name() + ".decodeIdleCycles") ++ .name(name() + ".DECODE:IdleCycles") + .desc("Number of cycles decode is idle") + .prereq(decodeIdleCycles); + decodeBlockedCycles - .name(name() + ".decodeBlockedCycles") ++ .name(name() + ".DECODE:BlockedCycles") + .desc("Number of cycles decode is blocked") + .prereq(decodeBlockedCycles); ++ decodeRunCycles ++ .name(name() + ".DECODE:RunCycles") ++ .desc("Number of cycles decode is running") ++ .prereq(decodeRunCycles); + decodeUnblockCycles - .name(name() + ".decodeUnblockCycles") ++ .name(name() + ".DECODE:UnblockCycles") + .desc("Number of cycles decode is unblocking") + .prereq(decodeUnblockCycles); + decodeSquashCycles - .name(name() + ".decodeSquashCycles") ++ .name(name() + ".DECODE:SquashCycles") + .desc("Number of cycles decode is squashing") + .prereq(decodeSquashCycles); ++ decodeBranchResolved ++ .name(name() + ".DECODE:BranchResolved") ++ .desc("Number of times decode resolved a branch") ++ .prereq(decodeBranchResolved); + decodeBranchMispred - .name(name() + ".decodeBranchMispred") ++ .name(name() + ".DECODE:BranchMispred") + .desc("Number of times decode detected a branch misprediction") + .prereq(decodeBranchMispred); + decodeControlMispred - .name(name() + ".decodeControlMispred") ++ .name(name() + ".DECODE:ControlMispred") + .desc("Number of times decode detected an instruction incorrectly" + " predicted as a control") + .prereq(decodeControlMispred); + decodeDecodedInsts - .name(name() + ".decodeDecodedInsts") ++ .name(name() + ".DECODE:DecodedInsts") + .desc("Number of instructions handled by decode") + .prereq(decodeDecodedInsts); + decodeSquashedInsts - .name(name() + ".decodeSquashedInsts") ++ .name(name() + ".DECODE:SquashedInsts") + .desc("Number of squashed instructions handled by decode") + .prereq(decodeSquashedInsts); +} + +template +void - SimpleDecode::setCPU(FullCPU *cpu_ptr) ++DefaultDecode::setCPU(FullCPU *cpu_ptr) +{ - DPRINTF(Decode, "Decode: Setting CPU pointer.\n"); ++ DPRINTF(Decode, "Setting CPU pointer.\n"); + cpu = cpu_ptr; +} + +template +void - SimpleDecode::setTimeBuffer(TimeBuffer *tb_ptr) ++DefaultDecode::setTimeBuffer(TimeBuffer *tb_ptr) +{ - DPRINTF(Decode, "Decode: Setting time buffer pointer.\n"); ++ DPRINTF(Decode, "Setting time buffer pointer.\n"); + timeBuffer = tb_ptr; + + // Setup wire to write information back to fetch. + toFetch = timeBuffer->getWire(0); + + // Create wires to get information from proper places in time buffer. + fromRename = timeBuffer->getWire(-renameToDecodeDelay); + fromIEW = timeBuffer->getWire(-iewToDecodeDelay); + fromCommit = timeBuffer->getWire(-commitToDecodeDelay); +} + +template +void - SimpleDecode::setDecodeQueue(TimeBuffer *dq_ptr) ++DefaultDecode::setDecodeQueue(TimeBuffer *dq_ptr) +{ - DPRINTF(Decode, "Decode: Setting decode queue pointer.\n"); ++ DPRINTF(Decode, "Setting decode queue pointer.\n"); + decodeQueue = dq_ptr; + + // Setup wire to write information to proper place in decode queue. + toRename = decodeQueue->getWire(0); +} + +template +void - SimpleDecode::setFetchQueue(TimeBuffer *fq_ptr) ++DefaultDecode::setFetchQueue(TimeBuffer *fq_ptr) +{ - DPRINTF(Decode, "Decode: Setting fetch queue pointer.\n"); ++ DPRINTF(Decode, "Setting fetch queue pointer.\n"); + fetchQueue = fq_ptr; + + // Setup wire to read information from fetch queue. + fromFetch = fetchQueue->getWire(-fetchToDecodeDelay); +} + ++template ++void ++DefaultDecode::setActiveThreads(list *at_ptr) ++{ ++ DPRINTF(Decode, "Setting active threads list pointer.\n"); ++ activeThreads = at_ptr; ++} ++ ++template ++void ++DefaultDecode::switchOut() ++{ ++ cpu->signalSwitched(); ++} ++ ++template ++void ++DefaultDecode::takeOverFrom() ++{ ++ _status = Inactive; ++ ++ for (int i = 0; i < numThreads; ++i) { ++ decodeStatus[i] = Idle; ++ ++ stalls[i].rename = false; ++ stalls[i].iew = false; ++ stalls[i].commit = false; ++ while (!insts[i].empty()) ++ insts[i].pop(); ++ while (!skidBuffer[i].empty()) ++ skidBuffer[i].pop(); ++ branchCount[i] = 0; ++ } ++ wroteToTimeBuffer = false; ++} ++ ++template ++bool ++DefaultDecode::checkStall(unsigned tid) const ++{ ++ bool ret_val = false; ++ ++ if (stalls[tid].rename) { ++ DPRINTF(Decode,"[tid:%i]: Stall fom Rename stage detected.\n", tid); ++ ret_val = true; ++ } else if (stalls[tid].iew) { ++ DPRINTF(Decode,"[tid:%i]: Stall fom IEW stage detected.\n", tid); ++ ret_val = true; ++ } else if (stalls[tid].commit) { ++ DPRINTF(Decode,"[tid:%i]: Stall fom Commit stage detected.\n", tid); ++ ret_val = true; ++ } ++ ++ return ret_val; ++} ++ +template +inline bool - SimpleDecode::fetchInstsValid() ++DefaultDecode::fetchInstsValid() +{ + return fromFetch->size > 0; +} + +template - void - SimpleDecode::block() ++bool ++DefaultDecode::block(unsigned tid) +{ - DPRINTF(Decode, "Decode: Blocking.\n"); - - // Set the status to Blocked. - _status = Blocked; ++ DPRINTF(Decode, "[tid:%u]: Blocking.\n", tid); ++ ++ // If the decode status is blocked or unblocking then decode has not yet ++ // signalled fetch to unblock. In that case, there is no need to tell ++ // fetch to block. ++ if (decodeStatus[tid] != Blocked && ++ decodeStatus[tid] != Unblocking) { ++ toFetch->decodeBlock[tid] = true; ++ wroteToTimeBuffer = true; ++ } + + // Add the current inputs to the skid buffer so they can be + // reprocessed when this stage unblocks. - skidBuffer.push(*fromFetch); ++ skidInsert(tid); ++ ++ if (decodeStatus[tid] != Blocked) { ++ // Set the status to Blocked. ++ decodeStatus[tid] = Blocked; ++ return true; ++ } + - // Note that this stage only signals previous stages to stall when - // it is the cause of the stall originates at this stage. Otherwise - // the previous stages are expected to check all possible stall signals. ++ return false; +} + +template - inline void - SimpleDecode::unblock() ++bool ++DefaultDecode::unblock(unsigned tid) +{ - DPRINTF(Decode, "Decode: Unblocking, going to remove " - "instructions from skid buffer.\n"); - // Remove the now processed instructions from the skid buffer. - skidBuffer.pop(); - - // If there's still information in the skid buffer, then - // continue to tell previous stages to stall. They will be - // able to restart once the skid buffer is empty. - if (!skidBuffer.empty()) { - toFetch->decodeInfo.stall = true; - } else { - DPRINTF(Decode, "Decode: Finished unblocking.\n"); - _status = Running; ++ // Decode is done unblocking only if the skid buffer is empty. ++ if (skidBuffer[tid].empty()) { ++ DPRINTF(Decode, "[tid:%u]: Done unblocking.\n", tid); ++ toFetch->decodeUnblock[tid] = true; ++ wroteToTimeBuffer = true; ++ ++ decodeStatus[tid] = Running; ++ return true; + } ++ ++ DPRINTF(Decode, "[tid:%u]: Currently unblocking.\n", tid); ++ ++ return false; +} + - // This squash is specifically for when Decode detects a PC-relative branch - // was predicted incorrectly. +template +void - SimpleDecode::squash(DynInstPtr &inst) ++DefaultDecode::squash(DynInstPtr &inst, unsigned tid) +{ - DPRINTF(Decode, "Decode: Squashing due to incorrect branch prediction " - "detected at decode.\n"); - Addr new_PC = inst->readNextPC(); - - toFetch->decodeInfo.branchMispredict = true; - toFetch->decodeInfo.doneSeqNum = inst->seqNum; - toFetch->decodeInfo.predIncorrect = true; - toFetch->decodeInfo.squash = true; - toFetch->decodeInfo.nextPC = new_PC; - toFetch->decodeInfo.branchTaken = true; ++ DPRINTF(Decode, "[tid:%i]: Squashing due to incorrect branch prediction " ++ "detected at decode.\n", tid); ++ ++ toFetch->decodeInfo[tid].branchMispredict = true; ++ toFetch->decodeInfo[tid].doneSeqNum = inst->seqNum; ++ toFetch->decodeInfo[tid].predIncorrect = true; ++ toFetch->decodeInfo[tid].squash = true; ++ toFetch->decodeInfo[tid].nextPC = inst->readNextPC(); ++ toFetch->decodeInfo[tid].branchTaken = true; ++ ++ if (decodeStatus[tid] == Blocked || ++ decodeStatus[tid] == Unblocking) { ++ toFetch->decodeUnblock[tid] = 1; ++ } + + // Set status to squashing. - _status = Squashing; ++ decodeStatus[tid] = Squashing; ++ ++ for (int i=0; isize; i++) { ++ if (fromFetch->insts[i]->threadNumber == tid && ++ fromFetch->insts[i]->seqNum > inst->seqNum) { ++ fromFetch->insts[i]->squashed = true; ++ } ++ } ++ ++ while (!insts[tid].empty()) { ++ insts[tid].pop(); ++ } + + // Clear the skid buffer in case it has any data in it. - while (!skidBuffer.empty()) { - skidBuffer.pop(); ++ while (!skidBuffer[tid].empty()) { ++ skidBuffer[tid].pop(); + } + + // Squash instructions up until this one - // Slightly unrealistic! - cpu->removeInstsUntil(inst->seqNum); ++ cpu->removeInstsUntil(inst->seqNum, tid); +} + +template - void - SimpleDecode::squash() ++unsigned ++DefaultDecode::squash(unsigned tid) +{ - DPRINTF(Decode, "Decode: Squashing.\n"); ++ DPRINTF(Decode, "[tid:%i]: Squashing.\n",tid); ++ ++ if (decodeStatus[tid] == Blocked || ++ decodeStatus[tid] == Unblocking) { ++#if !FULL_SYSTEM ++ // In syscall emulation, we can have both a block and a squash due ++ // to a syscall in the same cycle. This would cause both signals to ++ // be high. This shouldn't happen in full system. ++ // @todo: Determine if this still happens. ++ if (toFetch->decodeBlock[tid]) { ++ toFetch->decodeBlock[tid] = 0; ++ } else { ++ toFetch->decodeUnblock[tid] = 1; ++ } ++#else ++ toFetch->decodeUnblock[tid] = 1; ++#endif ++ } ++ + // Set status to squashing. - _status = Squashing; ++ decodeStatus[tid] = Squashing; + - // Maybe advance the time buffer? Not sure what to do in the normal - // case. ++ // Go through incoming instructions from fetch and squash them. ++ unsigned squash_count = 0; ++ ++ for (int i=0; isize; i++) { ++ if (fromFetch->insts[i]->threadNumber == tid) { ++ fromFetch->insts[i]->squashed = true; ++ squash_count++; ++ } ++ } ++ ++ while (!insts[tid].empty()) { ++ insts[tid].pop(); ++ } + + // Clear the skid buffer in case it has any data in it. - while (!skidBuffer.empty()) - { - skidBuffer.pop(); ++ while (!skidBuffer[tid].empty()) { ++ skidBuffer[tid].pop(); + } ++ ++ return squash_count; +} + +template +void - SimpleDecode::tick() ++DefaultDecode::skidInsert(unsigned tid) +{ - // Decode should try to execute as many instructions as its bandwidth - // will allow, as long as it is not currently blocked. - if (_status != Blocked && _status != Squashing) { - DPRINTF(Decode, "Decode: Not blocked, so attempting to run " - "stage.\n"); - // Make sure that the skid buffer has something in it if the - // status is unblocking. - assert(_status == Unblocking ? !skidBuffer.empty() : 1); ++ DynInstPtr inst = NULL; + - decode(); ++ while (!insts[tid].empty()) { ++ inst = insts[tid].front(); + - // If the status was unblocking, then instructions from the skid - // buffer were used. Remove those instructions and handle - // the rest of unblocking. - if (_status == Unblocking) { - ++decodeUnblockCycles; ++ insts[tid].pop(); + - if (fetchInstsValid()) { - // Add the current inputs to the skid buffer so they can be - // reprocessed when this stage unblocks. - skidBuffer.push(*fromFetch); - } ++ assert(tid == inst->threadNumber); + - unblock(); - } - } else if (_status == Blocked) { - ++decodeBlockedCycles; ++ DPRINTF(Decode,"Inserting [sn:%lli] PC:%#x into decode skidBuffer %i\n", ++ inst->seqNum, inst->readPC(), inst->threadNumber); + - if (fetchInstsValid()) { - block(); - } ++ skidBuffer[tid].push(inst); ++ } + - if (!fromRename->renameInfo.stall && - !fromIEW->iewInfo.stall && - !fromCommit->commitInfo.stall) { - DPRINTF(Decode, "Decode: Stall signals cleared, going to " - "unblock.\n"); - _status = Unblocking; ++ // @todo: Eventually need to enforce this by not letting a thread ++ // fetch past its skidbuffer ++ assert(skidBuffer[tid].size() <= skidBufferMax); ++} + - // Continue to tell previous stage to block until this - // stage is done unblocking. - toFetch->decodeInfo.stall = true; - } else { - DPRINTF(Decode, "Decode: Still blocked.\n"); - toFetch->decodeInfo.stall = true; ++template ++bool ++DefaultDecode::skidsEmpty() ++{ ++ list::iterator threads = (*activeThreads).begin(); ++ ++ while (threads != (*activeThreads).end()) { ++ if (!skidBuffer[*threads++].empty()) ++ return false; ++ } ++ ++ return true; ++} ++ ++template ++void ++DefaultDecode::updateStatus() ++{ ++ bool any_unblocking = false; ++ ++ list::iterator threads = (*activeThreads).begin(); ++ ++ threads = (*activeThreads).begin(); ++ ++ while (threads != (*activeThreads).end()) { ++ unsigned tid = *threads++; ++ ++ if (decodeStatus[tid] == Unblocking) { ++ any_unblocking = true; ++ break; + } ++ } ++ ++ // Decode will have activity if it's unblocking. ++ if (any_unblocking) { ++ if (_status == Inactive) { ++ _status = Active; ++ ++ DPRINTF(Activity, "Activating stage.\n"); + - if (fromCommit->commitInfo.squash || - fromCommit->commitInfo.robSquashing) { - squash(); ++ cpu->activateStage(FullCPU::DecodeIdx); + } - } else if (_status == Squashing) { - if (!fromCommit->commitInfo.squash && - !fromCommit->commitInfo.robSquashing) { - _status = Running; - } else if (fromCommit->commitInfo.squash) { - ++decodeSquashCycles; - - squash(); ++ } else { ++ // If it's not unblocking, then decode will not have any internal ++ // activity. Switch it to inactive. ++ if (_status == Active) { ++ _status = Inactive; ++ DPRINTF(Activity, "Deactivating stage.\n"); ++ ++ cpu->deactivateStage(FullCPU::DecodeIdx); + } + } +} + ++template ++void ++DefaultDecode::sortInsts() ++{ ++ int insts_from_fetch = fromFetch->size; ++#ifdef DEBUG ++ for (int i=0; i < numThreads; i++) ++ assert(insts[i].empty()); ++#endif ++ for (int i = 0; i < insts_from_fetch; ++i) { ++ insts[fromFetch->insts[i]->threadNumber].push(fromFetch->insts[i]); ++ } ++} ++ +template +void - SimpleDecode::decode() ++DefaultDecode::readStallSignals(unsigned tid) +{ - // Check time buffer if being told to squash. - if (fromCommit->commitInfo.squash) { - squash(); - return; ++ if (fromRename->renameBlock[tid]) { ++ stalls[tid].rename = true; + } + - // Check time buffer if being told to stall. - if (fromRename->renameInfo.stall || - fromIEW->iewInfo.stall || - fromCommit->commitInfo.stall) { - block(); - return; ++ if (fromRename->renameUnblock[tid]) { ++ assert(stalls[tid].rename); ++ stalls[tid].rename = false; ++ } ++ ++ if (fromIEW->iewBlock[tid]) { ++ stalls[tid].iew = true; ++ } ++ ++ if (fromIEW->iewUnblock[tid]) { ++ assert(stalls[tid].iew); ++ stalls[tid].iew = false; ++ } ++ ++ if (fromCommit->commitBlock[tid]) { ++ stalls[tid].commit = true; ++ } ++ ++ if (fromCommit->commitUnblock[tid]) { ++ assert(stalls[tid].commit); ++ stalls[tid].commit = false; ++ } ++} ++ ++template ++bool ++DefaultDecode::checkSignalsAndUpdate(unsigned tid) ++{ ++ // Check if there's a squash signal, squash if there is. ++ // Check stall signals, block if necessary. ++ // If status was blocked ++ // Check if stall conditions have passed ++ // if so then go to unblocking ++ // If status was Squashing ++ // check if squashing is not high. Switch to running this cycle. ++ ++ // Update the per thread stall statuses. ++ readStallSignals(tid); ++ ++ // Check squash signals from commit. ++ if (fromCommit->commitInfo[tid].squash) { ++ ++ DPRINTF(Decode, "[tid:%u]: Squashing instructions due to squash " ++ "from commit.\n", tid); ++ ++ squash(tid); ++ ++ return true; ++ } ++ ++ // Check ROB squash signals from commit. ++ if (fromCommit->commitInfo[tid].robSquashing) { ++ DPRINTF(Decode, "[tid:%]: ROB is still squashing.\n",tid); ++ ++ // Continue to squash. ++ decodeStatus[tid] = Squashing; ++ ++ return true; ++ } ++ ++ if (checkStall(tid)) { ++ return block(tid); + } + - // Check fetch queue to see if instructions are available. - // If no available instructions, do nothing, unless this stage is - // currently unblocking. - if (!fetchInstsValid() && _status != Unblocking) { - DPRINTF(Decode, "Decode: Nothing to do, breaking out early.\n"); ++ if (decodeStatus[tid] == Blocked) { ++ DPRINTF(Decode, "[tid:%u]: Done blocking, switching to unblocking.\n", ++ tid); ++ ++ decodeStatus[tid] = Unblocking; ++ ++ unblock(tid); ++ ++ return true; ++ } ++ ++ if (decodeStatus[tid] == Squashing) { ++ // Switch status to running if decode isn't being told to block or ++ // squash this cycle. ++ DPRINTF(Decode, "[tid:%u]: Done squashing, switching to running.\n", ++ tid); ++ ++ decodeStatus[tid] = Running; ++ ++ return false; ++ } ++ ++ // If we've reached this point, we have not gotten any signals that ++ // cause decode to change its status. Decode remains the same as before. ++ return false; ++} ++ ++template ++void ++DefaultDecode::tick() ++{ ++ wroteToTimeBuffer = false; ++ ++ bool status_change = false; ++ ++ toRenameIndex = 0; ++ ++ list::iterator threads = (*activeThreads).begin(); ++ ++ sortInsts(); ++ ++ //Check stall and squash signals. ++ while (threads != (*activeThreads).end()) { ++ unsigned tid = *threads++; ++ ++ DPRINTF(Decode,"Processing [tid:%i]\n",tid); ++ status_change = checkSignalsAndUpdate(tid) || status_change; ++ ++ decode(status_change, tid); ++ } ++ ++ if (status_change) { ++ updateStatus(); ++ } ++ ++ if (wroteToTimeBuffer) { ++ DPRINTF(Activity, "Activity this cycle.\n"); ++ ++ cpu->activityThisCycle(); ++ } ++} ++ ++template ++void ++DefaultDecode::decode(bool &status_change, unsigned tid) ++{ ++ // If status is Running or idle, ++ // call decodeInsts() ++ // If status is Unblocking, ++ // buffer any instructions coming from fetch ++ // continue trying to empty skid buffer ++ // check if stall conditions have passed ++ ++ if (decodeStatus[tid] == Blocked) { ++ ++decodeBlockedCycles; ++ } else if (decodeStatus[tid] == Squashing) { ++ ++decodeSquashCycles; ++ } ++ ++ // Decode should try to decode as many instructions as its bandwidth ++ // will allow, as long as it is not currently blocked. ++ if (decodeStatus[tid] == Running || ++ decodeStatus[tid] == Idle) { ++ DPRINTF(Decode, "[tid:%u] Not blocked, so attempting to run " ++ "stage.\n",tid); ++ ++ decodeInsts(tid); ++ } else if (decodeStatus[tid] == Unblocking) { ++ // Make sure that the skid buffer has something in it if the ++ // status is unblocking. ++ assert(!skidsEmpty()); ++ ++ // If the status was unblocking, then instructions from the skid ++ // buffer were used. Remove those instructions and handle ++ // the rest of unblocking. ++ decodeInsts(tid); ++ ++ if (fetchInstsValid()) { ++ // Add the current inputs to the skid buffer so they can be ++ // reprocessed when this stage unblocks. ++ skidInsert(tid); ++ } ++ ++ status_change = unblock(tid) || status_change; ++ } ++} ++ ++template ++void ++DefaultDecode::decodeInsts(unsigned tid) ++{ ++ // Instructions can come either from the skid buffer or the list of ++ // instructions coming from fetch, depending on decode's status. ++ int insts_available = decodeStatus[tid] == Unblocking ? ++ skidBuffer[tid].size() : insts[tid].size(); ++ ++ if (insts_available == 0) { ++ DPRINTF(Decode, "[tid:%u] Nothing to do, breaking out" ++ " early.\n",tid); + // Should I change the status to idle? + ++decodeIdleCycles; + return; ++ } else if (decodeStatus[tid] == Unblocking) { ++ DPRINTF(Decode, "[tid:%u] Unblocking, removing insts from skid " ++ "buffer.\n",tid); ++ ++decodeUnblockCycles; ++ } else if (decodeStatus[tid] == Running) { ++ ++decodeRunCycles; + } + - // Might be better to use a base DynInst * instead? + DynInstPtr inst; + - unsigned to_rename_index = 0; ++ std::queue ++ &insts_to_decode = decodeStatus[tid] == Unblocking ? ++ skidBuffer[tid] : insts[tid]; + - int insts_available = _status == Unblocking ? - skidBuffer.front().size - numInst : - fromFetch->size; ++ DPRINTF(Decode, "[tid:%u]: Sending instruction to rename.\n",tid); + - // Debug block... - #if 0 - if (insts_available) { - DPRINTF(Decode, "Decode: Instructions available.\n"); - } else { - if (_status == Unblocking && skidBuffer.empty()) { - DPRINTF(Decode, "Decode: No instructions available, skid buffer " - "empty.\n"); - } else if (_status != Unblocking && - !fromFetch->insts[0]) { - DPRINTF(Decode, "Decode: No instructions available, fetch queue " - "empty.\n"); - } else { - panic("Decode: No instructions available, unexpected condition!" - "\n"); - } - } - #endif ++ while (insts_available > 0 && toRenameIndex < decodeWidth) { ++ assert(!insts_to_decode.empty()); + - while (insts_available > 0) - { - DPRINTF(Decode, "Decode: Sending instruction to rename.\n"); ++ inst = insts_to_decode.front(); + - inst = _status == Unblocking ? skidBuffer.front().insts[numInst] : - fromFetch->insts[numInst]; ++ insts_to_decode.pop(); + - DPRINTF(Decode, "Decode: Processing instruction %i with PC %#x\n", - inst->seqNum, inst->readPC()); ++ DPRINTF(Decode, "[tid:%u]: Processing instruction [sn:%lli] with " ++ "PC %#x\n", ++ tid, inst->seqNum, inst->readPC()); + + if (inst->isSquashed()) { - DPRINTF(Decode, "Decode: Instruction %i with PC %#x is " ++ DPRINTF(Decode, "[tid:%u]: Instruction %i with PC %#x is " + "squashed, skipping.\n", - inst->seqNum, inst->readPC()); ++ tid, inst->seqNum, inst->readPC()); + + ++decodeSquashedInsts; + - ++numInst; + --insts_available; + + continue; + } + - + // Also check if instructions have no source registers. Mark + // them as ready to issue at any time. Not sure if this check + // should exist here or at a later stage; however it doesn't matter + // too much for function correctness. - // Isn't this handled by the inst queue? + if (inst->numSrcRegs() == 0) { + inst->setCanIssue(); + } + + // This current instruction is valid, so add it into the decode + // queue. The next instruction may not be valid, so check to + // see if branches were predicted correctly. - toRename->insts[to_rename_index] = inst; ++ toRename->insts[toRenameIndex] = inst; + + ++(toRename->size); ++ ++toRenameIndex; ++ ++decodeDecodedInsts; ++ --insts_available; + + // Ensure that if it was predicted as a branch, it really is a + // branch. + if (inst->predTaken() && !inst->isControl()) { + panic("Instruction predicted as a branch!"); + + ++decodeControlMispred; ++ + // Might want to set some sort of boolean and just do + // a check at the end - squash(inst); ++ squash(inst, inst->threadNumber); ++ + break; + } + + // Go ahead and compute any PC-relative branches. - + if (inst->isDirectCtrl() && inst->isUncondCtrl()) { - ++ ++decodeBranchResolved; + inst->setNextPC(inst->branchTarget()); + + if (inst->mispredicted()) { + ++decodeBranchMispred; ++ + // Might want to set some sort of boolean and just do + // a check at the end - squash(inst); ++ squash(inst, inst->threadNumber); ++ + break; + } + } ++ } + - // Normally can check if a direct branch has the right target - // addr (either the immediate, or the branch PC + 4) and redirect - // fetch if it's incorrect. - - // Increment which instruction we're looking at. - ++numInst; - ++to_rename_index; - ++decodeDecodedInsts; - - --insts_available; ++ // If we didn't process all instructions, then we will need to block ++ // and put all those instructions into the skid buffer. ++ if (!insts_to_decode.empty()) { ++ block(tid); + } + - numInst = 0; ++ // Record that decode has written to the time buffer for activity ++ // tracking. ++ if (toRenameIndex) { ++ wroteToTimeBuffer = true; ++ } +} diff --cc src/cpu/o3/fetch.cc index 8ad5e6565,000000000..7959416be mode 100644,000000..100644 --- a/src/cpu/o3/fetch.cc +++ b/src/cpu/o3/fetch.cc @@@ -1,33 -1,0 +1,33 @@@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cpu/o3/alpha_dyn_inst.hh" +#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/fetch_impl.hh" + - template class SimpleFetch; ++template class DefaultFetch; diff --cc src/cpu/o3/fetch.hh index cc64800d9,000000000..3fcfdc3a1 mode 100644,000000..100644 --- a/src/cpu/o3/fetch.hh +++ b/src/cpu/o3/fetch.hh @@@ -1,223 -1,0 +1,411 @@@ +/* - * Copyright (c) 2004-2005 The Regents of The University of Michigan ++ * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + - // Todo: SMT fetch, - // Add a way to get a stage's current status. - - #ifndef __CPU_O3_CPU_SIMPLE_FETCH_HH__ - #define __CPU_O3_CPU_SIMPLE_FETCH_HH__ ++#ifndef __CPU_O3_FETCH_HH__ ++#define __CPU_O3_FETCH_HH__ + +#include "base/statistics.hh" +#include "base/timebuf.hh" +#include "cpu/pc_event.hh" +#include "mem/mem_interface.hh" +#include "sim/eventq.hh" + ++class Sampler; ++ +/** - * SimpleFetch class to fetch a single instruction each cycle. SimpleFetch - * will stall if there's an Icache miss, but otherwise assumes a one cycle - * Icache hit. ++ * DefaultFetch class handles both single threaded and SMT fetch. Its ++ * width is specified by the parameters; each cycle it tries to fetch ++ * that many instructions. It supports using a branch predictor to ++ * predict direction and targets. ++ * It supports the idling functionalitiy of the CPU by indicating to ++ * the CPU when it is active and inactive. + */ - +template - class SimpleFetch ++class DefaultFetch +{ + public: + /** Typedefs from Impl. */ + typedef typename Impl::CPUPol CPUPol; + typedef typename Impl::DynInst DynInst; + typedef typename Impl::DynInstPtr DynInstPtr; + typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::Params Params; + ++ /** Typedefs from the CPU policy. */ + typedef typename CPUPol::BPredUnit BPredUnit; + typedef typename CPUPol::FetchStruct FetchStruct; + typedef typename CPUPol::TimeStruct TimeStruct; + + /** Typedefs from ISA. */ + typedef TheISA::MachInst MachInst; ++ typedef TheISA::ExtMachInst ExtMachInst; + + public: - enum Status { ++ /** Overall fetch status. Used to determine if the CPU can ++ * deschedule itsef due to a lack of activity. ++ */ ++ enum FetchStatus { ++ Active, ++ Inactive ++ }; ++ ++ /** Individual thread status. */ ++ enum ThreadStatus { + Running, + Idle, + Squashing, + Blocked, ++ Fetching, ++ TrapPending, ++ QuiescePending, ++ SwitchOut, + IcacheMissStall, + IcacheMissComplete + }; + - // May eventually need statuses on a per thread basis. - Status _status; ++ /** Fetching Policy, Add new policies here.*/ ++ enum FetchPriority { ++ SingleThread, ++ RoundRobin, ++ Branch, ++ IQ, ++ LSQ ++ }; ++ ++ private: ++ /** Fetch status. */ ++ FetchStatus _status; ++ ++ /** Per-thread status. */ ++ ThreadStatus fetchStatus[Impl::MaxThreads]; ++ ++ /** Fetch policy. */ ++ FetchPriority fetchPolicy; + - bool stalled; ++ /** List that has the threads organized by priority. */ ++ std::list priorityList; + + public: + class CacheCompletionEvent : public Event + { + private: - SimpleFetch *fetch; ++ MemReqPtr req; ++ /** Pointer to fetch. */ ++ DefaultFetch *fetch; ++ /** Thread id. */ ++// unsigned threadId; + + public: - CacheCompletionEvent(SimpleFetch *_fetch); ++ /** Constructs a cache completion event, which tells fetch when the ++ * cache miss is complete. ++ */ ++ CacheCompletionEvent(MemReqPtr &_req, DefaultFetch *_fetch); + ++ /** Processes cache completion event. */ + virtual void process(); ++ /** Returns the description of the cache completion event. */ + virtual const char *description(); + }; + + public: - /** SimpleFetch constructor. */ - SimpleFetch(Params ¶ms); ++ /** DefaultFetch constructor. */ ++ DefaultFetch(Params *params); + ++ /** Returns the name of fetch. */ ++ std::string name() const; ++ ++ /** Registers statistics. */ + void regStats(); + ++ /** Sets CPU pointer. */ + void setCPU(FullCPU *cpu_ptr); + ++ /** Sets the main backwards communication time buffer pointer. */ + void setTimeBuffer(TimeBuffer *time_buffer); + ++ /** Sets pointer to list of active threads. */ ++ void setActiveThreads(std::list *at_ptr); ++ ++ /** Sets pointer to time buffer used to communicate to the next stage. */ + void setFetchQueue(TimeBuffer *fq_ptr); + - void processCacheCompletion(); ++ /** Sets pointer to page table. */ ++// void setPageTable(PageTable *pt_ptr); ++ ++ /** Initialize stage. */ ++ void initStage(); ++ ++ /** Processes cache completion event. */ ++ void processCacheCompletion(MemReqPtr &req); ++ ++ void switchOut(); ++ ++ void doSwitchOut(); ++ ++ void takeOverFrom(); ++ ++ bool isSwitchedOut() { return switchedOut; } ++ ++ void wakeFromQuiesce(); + + private: ++ /** Changes the status of this stage to active, and indicates this ++ * to the CPU. ++ */ ++ inline void switchToActive(); ++ ++ /** Changes the status of this stage to inactive, and indicates ++ * this to the CPU. ++ */ ++ inline void switchToInactive(); ++ + /** + * Looks up in the branch predictor to see if the next PC should be + * either next PC+=MachInst or a branch target. + * @param next_PC Next PC variable passed in by reference. It is + * expected to be set to the current PC; it will be updated with what + * the next PC will be. + * @return Whether or not a branch was predicted as taken. + */ + bool lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC); + + /** + * Fetches the cache line that contains fetch_PC. Returns any + * fault that happened. Puts the data into the class variable + * cacheData. + * @param fetch_PC The PC address that is being fetched from. ++ * @param ret_fault The fault reference that will be set to the result of ++ * the icache access. ++ * @param tid Thread id. + * @return Any fault that occured. + */ - Fault fetchCacheLine(Addr fetch_PC); ++ bool fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid); + - inline void doSquash(const Addr &new_PC); ++ /** Squashes a specific thread and resets the PC. */ ++ inline void doSquash(const Addr &new_PC, unsigned tid); + - void squashFromDecode(const Addr &new_PC, const InstSeqNum &seq_num); ++ /** Squashes a specific thread and resets the PC. Also tells the CPU to ++ * remove any instructions between fetch and decode that should be sqaushed. ++ */ ++ void squashFromDecode(const Addr &new_PC, const InstSeqNum &seq_num, ++ unsigned tid); ++ ++ /** Checks if a thread is stalled. */ ++ bool checkStall(unsigned tid) const; ++ ++ /** Updates overall fetch stage status; to be called at the end of each ++ * cycle. */ ++ FetchStatus updateFetchStatus(); + + public: - // Figure out PC vs next PC and how it should be updated - void squash(const Addr &new_PC); ++ /** Squashes a specific thread and resets the PC. Also tells the CPU to ++ * remove any instructions that are not in the ROB. The source of this ++ * squash should be the commit stage. ++ */ ++ void squash(const Addr &new_PC, unsigned tid); + ++ /** Ticks the fetch stage, processing all inputs signals and fetching ++ * as many instructions as possible. ++ */ + void tick(); + - void fetch(); ++ /** Checks all input signals and updates the status as necessary. ++ * @return: Returns if the status has changed due to input signals. ++ */ ++ bool checkSignalsAndUpdate(unsigned tid); ++ ++ /** Does the actual fetching of instructions and passing them on to the ++ * next stage. ++ * @param status_change fetch() sets this variable if there was a status ++ * change (ie switching to IcacheMissStall). ++ */ ++ void fetch(bool &status_change); + - // Align an address (typically a PC) to the start of an I-cache block. - // We fold in the PISA 64- to 32-bit conversion here as well. ++ /** Align a PC to the start of an I-cache block. */ + Addr icacheBlockAlignPC(Addr addr) + { + addr = TheISA::realPCToFetchPC(addr); + return (addr & ~(cacheBlkMask)); + } + ++ private: ++ /** Returns the appropriate thread to fetch, given the fetch policy. */ ++ int getFetchingThread(FetchPriority &fetch_priority); ++ ++ /** Returns the appropriate thread to fetch using a round robin policy. */ ++ int roundRobin(); ++ ++ /** Returns the appropriate thread to fetch using the IQ count policy. */ ++ int iqCount(); ++ ++ /** Returns the appropriate thread to fetch using the LSQ count policy. */ ++ int lsqCount(); ++ ++ /** Returns the appropriate thread to fetch using the branch count policy. */ ++ int branchCount(); ++ + private: + /** Pointer to the FullCPU. */ + FullCPU *cpu; + + /** Time buffer interface. */ + TimeBuffer *timeBuffer; + + /** Wire to get decode's information from backwards time buffer. */ + typename TimeBuffer::wire fromDecode; + + /** Wire to get rename's information from backwards time buffer. */ + typename TimeBuffer::wire fromRename; + + /** Wire to get iew's information from backwards time buffer. */ + typename TimeBuffer::wire fromIEW; + + /** Wire to get commit's information from backwards time buffer. */ + typename TimeBuffer::wire fromCommit; + + /** Internal fetch instruction queue. */ + TimeBuffer *fetchQueue; + + //Might be annoying how this name is different than the queue. + /** Wire used to write any information heading to decode. */ + typename TimeBuffer::wire toDecode; + + /** Icache interface. */ + MemInterface *icacheInterface; + + /** BPredUnit. */ + BPredUnit branchPred; + ++ Addr PC[Impl::MaxThreads]; ++ ++ Addr nextPC[Impl::MaxThreads]; ++ + /** Memory request used to access cache. */ - MemReqPtr memReq; ++ MemReqPtr memReq[Impl::MaxThreads]; ++ ++ /** Variable that tracks if fetch has written to the time buffer this ++ * cycle. Used to tell CPU if there is activity this cycle. ++ */ ++ bool wroteToTimeBuffer; ++ ++ /** Tracks how many instructions has been fetched this cycle. */ ++ int numInst; ++ ++ /** Source of possible stalls. */ ++ struct Stalls { ++ bool decode; ++ bool rename; ++ bool iew; ++ bool commit; ++ }; ++ ++ /** Tracks which stages are telling fetch to stall. */ ++ Stalls stalls[Impl::MaxThreads]; + + /** Decode to fetch delay, in ticks. */ + unsigned decodeToFetchDelay; + + /** Rename to fetch delay, in ticks. */ + unsigned renameToFetchDelay; + + /** IEW to fetch delay, in ticks. */ + unsigned iewToFetchDelay; + + /** Commit to fetch delay, in ticks. */ + unsigned commitToFetchDelay; + + /** The width of fetch in instructions. */ + unsigned fetchWidth; + + /** Cache block size. */ + int cacheBlkSize; + + /** Mask to get a cache block's address. */ + Addr cacheBlkMask; + + /** The cache line being fetched. */ - uint8_t *cacheData; ++ uint8_t *cacheData[Impl::MaxThreads]; + + /** Size of instructions. */ + int instSize; + + /** Icache stall statistics. */ - Counter lastIcacheStall; ++ Counter lastIcacheStall[Impl::MaxThreads]; ++ ++ /** List of Active Threads */ ++ std::list *activeThreads; ++ ++ /** Number of threads. */ ++ unsigned numThreads; ++ ++ /** Number of threads that are actively fetching. */ ++ unsigned numFetchingThreads; + ++ /** Thread ID being fetched. */ ++ int threadFetched; ++ ++ bool interruptPending; ++ ++ bool switchedOut; ++ ++#if !FULL_SYSTEM ++ /** Page table pointer. */ ++// PageTable *pTable; ++#endif ++ ++ // @todo: Consider making these vectors and tracking on a per thread basis. ++ /** Stat for total number of cycles stalled due to an icache miss. */ + Stats::Scalar<> icacheStallCycles; ++ /** Stat for total number of fetched instructions. */ + Stats::Scalar<> fetchedInsts; ++ Stats::Scalar<> fetchedBranches; ++ /** Stat for total number of predicted branches. */ + Stats::Scalar<> predictedBranches; ++ /** Stat for total number of cycles spent fetching. */ + Stats::Scalar<> fetchCycles; ++ /** Stat for total number of cycles spent squashing. */ + Stats::Scalar<> fetchSquashCycles; ++ /** Stat for total number of cycles spent blocked due to other stages in ++ * the pipeline. ++ */ ++ Stats::Scalar<> fetchIdleCycles; + Stats::Scalar<> fetchBlockedCycles; ++ ++ Stats::Scalar<> fetchMiscStallCycles; ++ /** Stat for total number of fetched cache lines. */ + Stats::Scalar<> fetchedCacheLines; + - Stats::Distribution<> fetch_nisn_dist; ++ Stats::Scalar<> fetchIcacheSquashes; ++ /** Distribution of number of instructions fetched each cycle. */ ++ Stats::Distribution<> fetchNisnDist; ++ Stats::Formula idleRate; ++ Stats::Formula branchRate; ++ Stats::Formula fetchRate; +}; + - #endif //__CPU_O3_CPU_SIMPLE_FETCH_HH__ ++#endif //__CPU_O3_FETCH_HH__ diff --cc src/cpu/o3/fetch_impl.hh index 8029fc732,000000000..1c5e508f6 mode 100644,000000..100644 --- a/src/cpu/o3/fetch_impl.hh +++ b/src/cpu/o3/fetch_impl.hh @@@ -1,617 -1,0 +1,1219 @@@ +/* - * Copyright (c) 2004-2005 The Regents of The University of Michigan ++ * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + - // Remove this later; used only for debugging. - #define OPCODE(X) (X >> 26) & 0x3f - +#include "arch/isa_traits.hh" - #include "sim/byteswap.hh" +#include "cpu/exetrace.hh" ++#include "cpu/o3/fetch.hh" +#include "mem/base_mem.hh" +#include "mem/mem_interface.hh" +#include "mem/mem_req.hh" - #include "cpu/o3/fetch.hh" - ++#include "sim/byteswap.hh" +#include "sim/root.hh" + ++#if FULL_SYSTEM ++#include "arch/tlb.hh" ++#include "arch/vtophys.hh" ++#include "base/remote_gdb.hh" ++#include "mem/functional/memory_control.hh" ++#include "mem/functional/physical.hh" ++#include "sim/system.hh" ++#else // !FULL_SYSTEM ++#include "mem/functional/functional.hh" ++#endif // FULL_SYSTEM ++ ++#include ++ ++using namespace std; ++ +template - SimpleFetch::CacheCompletionEvent - ::CacheCompletionEvent(SimpleFetch *_fetch) - : Event(&mainEventQueue), ++DefaultFetch::CacheCompletionEvent::CacheCompletionEvent(MemReqPtr &_req, ++ DefaultFetch *_fetch) ++ : Event(&mainEventQueue, Delayed_Writeback_Pri), ++ req(_req), + fetch(_fetch) +{ ++ this->setFlags(Event::AutoDelete); +} + +template +void - SimpleFetch::CacheCompletionEvent::process() ++DefaultFetch::CacheCompletionEvent::process() +{ - fetch->processCacheCompletion(); ++ fetch->processCacheCompletion(req); +} + +template +const char * - SimpleFetch::CacheCompletionEvent::description() ++DefaultFetch::CacheCompletionEvent::description() +{ - return "SimpleFetch cache completion event"; ++ return "DefaultFetch cache completion event"; +} + +template - SimpleFetch::SimpleFetch(Params ¶ms) - : icacheInterface(params.icacheInterface), ++DefaultFetch::DefaultFetch(Params *params) ++ : icacheInterface(params->icacheInterface), + branchPred(params), - decodeToFetchDelay(params.decodeToFetchDelay), - renameToFetchDelay(params.renameToFetchDelay), - iewToFetchDelay(params.iewToFetchDelay), - commitToFetchDelay(params.commitToFetchDelay), - fetchWidth(params.fetchWidth) ++ decodeToFetchDelay(params->decodeToFetchDelay), ++ renameToFetchDelay(params->renameToFetchDelay), ++ iewToFetchDelay(params->iewToFetchDelay), ++ commitToFetchDelay(params->commitToFetchDelay), ++ fetchWidth(params->fetchWidth), ++ numThreads(params->numberOfThreads), ++ numFetchingThreads(params->smtNumFetchingThreads), ++ interruptPending(false) +{ - DPRINTF(Fetch, "Fetch: Fetch constructor called\n"); - - // Set status to idle. - _status = Idle; - - // Create a new memory request. - memReq = new MemReq(); - // Not sure of this parameter. I think it should be based on the - // thread number. - #if !FULL_SYSTEM - memReq->asid = 0; - #else - memReq->asid = 0; - #endif // FULL_SYSTEM - memReq->data = new uint8_t[64]; ++ if (numThreads > Impl::MaxThreads) ++ fatal("numThreads is not a valid value\n"); ++ ++ DPRINTF(Fetch, "Fetch constructor called\n"); ++ ++ // Set fetch stage's status to inactive. ++ _status = Inactive; ++ ++ string policy = params->smtFetchPolicy; ++ ++ // Convert string to lowercase ++ std::transform(policy.begin(), policy.end(), policy.begin(), ++ (int(*)(int)) tolower); ++ ++ // Figure out fetch policy ++ if (policy == "singlethread") { ++ fetchPolicy = SingleThread; ++ } else if (policy == "roundrobin") { ++ fetchPolicy = RoundRobin; ++ DPRINTF(Fetch, "Fetch policy set to Round Robin\n"); ++ } else if (policy == "branch") { ++ fetchPolicy = Branch; ++ DPRINTF(Fetch, "Fetch policy set to Branch Count\n"); ++ } else if (policy == "iqcount") { ++ fetchPolicy = IQ; ++ DPRINTF(Fetch, "Fetch policy set to IQ count\n"); ++ } else if (policy == "lsqcount") { ++ fetchPolicy = LSQ; ++ DPRINTF(Fetch, "Fetch policy set to LSQ count\n"); ++ } else { ++ fatal("Invalid Fetch Policy. Options Are: {SingleThread," ++ " RoundRobin,LSQcount,IQcount}\n"); ++ } + + // Size of cache block. + cacheBlkSize = icacheInterface ? icacheInterface->getBlockSize() : 64; + + // Create mask to get rid of offset bits. + cacheBlkMask = (cacheBlkSize - 1); + ++ for (int tid=0; tid < numThreads; tid++) { ++ ++ fetchStatus[tid] = Running; ++ ++ priorityList.push_back(tid); ++ ++ // Create a new memory request. ++ memReq[tid] = NULL; ++ ++ // Create space to store a cache line. ++ cacheData[tid] = new uint8_t[cacheBlkSize]; ++ ++ stalls[tid].decode = 0; ++ stalls[tid].rename = 0; ++ stalls[tid].iew = 0; ++ stalls[tid].commit = 0; ++ } ++ + // Get the size of an instruction. + instSize = sizeof(MachInst); ++} + - // Create space to store a cache line. - cacheData = new uint8_t[cacheBlkSize]; ++template ++std::string ++DefaultFetch::name() const ++{ ++ return cpu->name() + ".fetch"; +} + +template +void - SimpleFetch::regStats() ++DefaultFetch::regStats() +{ + icacheStallCycles - .name(name() + ".icacheStallCycles") ++ .name(name() + ".FETCH:icacheStallCycles") + .desc("Number of cycles fetch is stalled on an Icache miss") + .prereq(icacheStallCycles); + + fetchedInsts - .name(name() + ".fetchedInsts") ++ .name(name() + ".FETCH:Insts") + .desc("Number of instructions fetch has processed") + .prereq(fetchedInsts); ++ ++ fetchedBranches ++ .name(name() + ".FETCH:Branches") ++ .desc("Number of branches that fetch encountered") ++ .prereq(fetchedBranches); ++ + predictedBranches - .name(name() + ".predictedBranches") ++ .name(name() + ".FETCH:predictedBranches") + .desc("Number of branches that fetch has predicted taken") + .prereq(predictedBranches); ++ + fetchCycles - .name(name() + ".fetchCycles") ++ .name(name() + ".FETCH:Cycles") + .desc("Number of cycles fetch has run and was not squashing or" + " blocked") + .prereq(fetchCycles); ++ + fetchSquashCycles - .name(name() + ".fetchSquashCycles") ++ .name(name() + ".FETCH:SquashCycles") + .desc("Number of cycles fetch has spent squashing") + .prereq(fetchSquashCycles); ++ ++ fetchIdleCycles ++ .name(name() + ".FETCH:IdleCycles") ++ .desc("Number of cycles fetch was idle") ++ .prereq(fetchIdleCycles); ++ + fetchBlockedCycles - .name(name() + ".fetchBlockedCycles") ++ .name(name() + ".FETCH:BlockedCycles") + .desc("Number of cycles fetch has spent blocked") + .prereq(fetchBlockedCycles); ++ + fetchedCacheLines - .name(name() + ".fetchedCacheLines") ++ .name(name() + ".FETCH:CacheLines") + .desc("Number of cache lines fetched") + .prereq(fetchedCacheLines); + - fetch_nisn_dist ++ fetchMiscStallCycles ++ .name(name() + ".FETCH:MiscStallCycles") ++ .desc("Number of cycles fetch has spent waiting on interrupts, or " ++ "bad addresses, or out of MSHRs") ++ .prereq(fetchMiscStallCycles); ++ ++ fetchIcacheSquashes ++ .name(name() + ".FETCH:IcacheSquashes") ++ .desc("Number of outstanding Icache misses that were squashed") ++ .prereq(fetchIcacheSquashes); ++ ++ fetchNisnDist + .init(/* base value */ 0, + /* last value */ fetchWidth, + /* bucket size */ 1) - .name(name() + ".FETCH:rate_dist") ++ .name(name() + ".FETCH:rateDist") + .desc("Number of instructions fetched each cycle (Total)") - .flags(Stats::pdf) - ; ++ .flags(Stats::pdf); ++ ++ idleRate ++ .name(name() + ".FETCH:idleRate") ++ .desc("Percent of cycles fetch was idle") ++ .prereq(idleRate); ++ idleRate = fetchIdleCycles * 100 / cpu->numCycles; ++ ++ branchRate ++ .name(name() + ".FETCH:branchRate") ++ .desc("Number of branch fetches per cycle") ++ .flags(Stats::total); ++ branchRate = predictedBranches / cpu->numCycles; ++ ++ fetchRate ++ .name(name() + ".FETCH:rate") ++ .desc("Number of inst fetches per cycle") ++ .flags(Stats::total); ++ fetchRate = fetchedInsts / cpu->numCycles; + + branchPred.regStats(); +} + +template +void - SimpleFetch::setCPU(FullCPU *cpu_ptr) ++DefaultFetch::setCPU(FullCPU *cpu_ptr) +{ - DPRINTF(Fetch, "Fetch: Setting the CPU pointer.\n"); ++ DPRINTF(Fetch, "Setting the CPU pointer.\n"); + cpu = cpu_ptr; - // This line will be removed eventually. - memReq->xc = cpu->xcBase(); ++ ++ // Fetch needs to start fetching instructions at the very beginning, ++ // so it must start up in active state. ++ switchToActive(); +} + +template +void - SimpleFetch::setTimeBuffer(TimeBuffer *time_buffer) ++DefaultFetch::setTimeBuffer(TimeBuffer *time_buffer) +{ - DPRINTF(Fetch, "Fetch: Setting the time buffer pointer.\n"); ++ DPRINTF(Fetch, "Setting the time buffer pointer.\n"); + timeBuffer = time_buffer; + + // Create wires to get information from proper places in time buffer. + fromDecode = timeBuffer->getWire(-decodeToFetchDelay); + fromRename = timeBuffer->getWire(-renameToFetchDelay); + fromIEW = timeBuffer->getWire(-iewToFetchDelay); + fromCommit = timeBuffer->getWire(-commitToFetchDelay); +} + +template +void - SimpleFetch::setFetchQueue(TimeBuffer *fq_ptr) ++DefaultFetch::setActiveThreads(list *at_ptr) ++{ ++ DPRINTF(Fetch, "Setting active threads list pointer.\n"); ++ activeThreads = at_ptr; ++} ++ ++template ++void ++DefaultFetch::setFetchQueue(TimeBuffer *fq_ptr) +{ - DPRINTF(Fetch, "Fetch: Setting the fetch queue pointer.\n"); ++ DPRINTF(Fetch, "Setting the fetch queue pointer.\n"); + fetchQueue = fq_ptr; + + // Create wire to write information to proper place in fetch queue. + toDecode = fetchQueue->getWire(0); +} + ++#if 0 ++template ++void ++DefaultFetch::setPageTable(PageTable *pt_ptr) ++{ ++ DPRINTF(Fetch, "Setting the page table pointer.\n"); ++#if !FULL_SYSTEM ++ pTable = pt_ptr; ++#endif ++} ++#endif ++ ++template ++void ++DefaultFetch::initStage() ++{ ++ for (int tid = 0; tid < numThreads; tid++) { ++ PC[tid] = cpu->readPC(tid); ++ nextPC[tid] = cpu->readNextPC(tid); ++ } ++} ++ +template +void - SimpleFetch::processCacheCompletion() ++DefaultFetch::processCacheCompletion(MemReqPtr &req) +{ - DPRINTF(Fetch, "Fetch: Waking up from cache miss.\n"); ++ unsigned tid = req->thread_num; ++ ++ DPRINTF(Fetch, "[tid:%u] Waking up from cache miss.\n",tid); + + // Only change the status if it's still waiting on the icache access + // to return. + // Can keep track of how many cache accesses go unused due to + // misspeculation here. - if (_status == IcacheMissStall) - _status = IcacheMissComplete; ++ if (fetchStatus[tid] != IcacheMissStall || ++ req != memReq[tid] || ++ isSwitchedOut()) { ++ ++fetchIcacheSquashes; ++ return; ++ } ++ ++ // Wake up the CPU (if it went to sleep and was waiting on this completion ++ // event). ++ cpu->wakeCPU(); ++ ++ DPRINTF(Activity, "[tid:%u] Activating fetch due to cache completion\n", ++ tid); ++ ++ switchToActive(); ++ ++ // Only switch to IcacheMissComplete if we're not stalled as well. ++ if (checkStall(tid)) { ++ fetchStatus[tid] = Blocked; ++ } else { ++ fetchStatus[tid] = IcacheMissComplete; ++ } ++ ++// memcpy(cacheData[tid], memReq[tid]->data, memReq[tid]->size); ++ ++ // Reset the mem req to NULL. ++ memReq[tid] = NULL; ++} ++ ++template ++void ++DefaultFetch::switchOut() ++{ ++ switchedOut = true; ++ cpu->signalSwitched(); ++} ++ ++template ++void ++DefaultFetch::doSwitchOut() ++{ ++ branchPred.switchOut(); ++} ++ ++template ++void ++DefaultFetch::takeOverFrom() ++{ ++ // Reset all state ++ for (int i = 0; i < Impl::MaxThreads; ++i) { ++ stalls[i].decode = 0; ++ stalls[i].rename = 0; ++ stalls[i].iew = 0; ++ stalls[i].commit = 0; ++ PC[i] = cpu->readPC(i); ++ nextPC[i] = cpu->readNextPC(i); ++ fetchStatus[i] = Running; ++ } ++ numInst = 0; ++ wroteToTimeBuffer = false; ++ _status = Inactive; ++ switchedOut = false; ++ branchPred.takeOverFrom(); ++} ++ ++template ++void ++DefaultFetch::wakeFromQuiesce() ++{ ++ DPRINTF(Fetch, "Waking up from quiesce\n"); ++ // Hopefully this is safe ++ fetchStatus[0] = Running; ++} ++ ++template ++inline void ++DefaultFetch::switchToActive() ++{ ++ if (_status == Inactive) { ++ DPRINTF(Activity, "Activating stage.\n"); ++ ++ cpu->activateStage(FullCPU::FetchIdx); ++ ++ _status = Active; ++ } ++} ++ ++template ++inline void ++DefaultFetch::switchToInactive() ++{ ++ if (_status == Active) { ++ DPRINTF(Activity, "Deactivating stage.\n"); ++ ++ cpu->deactivateStage(FullCPU::FetchIdx); ++ ++ _status = Inactive; ++ } +} + +template +bool - SimpleFetch::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC) ++DefaultFetch::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC) +{ + // Do branch prediction check here. + // A bit of a misnomer...next_PC is actually the current PC until + // this function updates it. + bool predict_taken; + + if (!inst->isControl()) { + next_PC = next_PC + instSize; + inst->setPredTarg(next_PC); + return false; + } + - predict_taken = branchPred.predict(inst, next_PC); ++ predict_taken = branchPred.predict(inst, next_PC, inst->threadNumber); ++ ++ ++fetchedBranches; + + if (predict_taken) { + ++predictedBranches; + } + + return predict_taken; +} + +template - Fault - SimpleFetch::fetchCacheLine(Addr fetch_PC) ++bool ++DefaultFetch::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid) +{ - // Check if the instruction exists within the cache. - // If it does, then proceed on to read the instruction and the rest - // of the instructions in the cache line until either the end of the - // cache line or a predicted taken branch is encountered. ++ Fault fault = NoFault; + +#if FULL_SYSTEM + // Flag to say whether or not address is physical addr. - unsigned flags = cpu->inPalMode() ? PHYSICAL : 0; ++ unsigned flags = cpu->inPalMode(fetch_PC) ? PHYSICAL : 0; +#else + unsigned flags = 0; +#endif // FULL_SYSTEM + - Fault fault = NoFault; ++ if (interruptPending && flags == 0 || switchedOut) { ++ // Hold off fetch from getting new instructions while an interrupt ++ // is pending. ++ return false; ++ } + + // Align the fetch PC so it's at the start of a cache block. + fetch_PC = icacheBlockAlignPC(fetch_PC); + - // Setup the memReq to do a read of the first isntruction's address. ++ // Setup the memReq to do a read of the first instruction's address. + // Set the appropriate read size and flags as well. - memReq->cmd = Read; - memReq->reset(fetch_PC, cacheBlkSize, flags); ++ memReq[tid] = new MemReq(); + - // Translate the instruction request. - // Should this function be - // in the CPU class ? Probably...ITB/DTB should exist within the - // CPU. ++ memReq[tid]->asid = tid; ++ memReq[tid]->thread_num = tid; ++ memReq[tid]->data = new uint8_t[64]; ++ memReq[tid]->xc = cpu->xcBase(tid); ++ memReq[tid]->cmd = Read; ++ memReq[tid]->reset(fetch_PC, cacheBlkSize, flags); + - fault = cpu->translateInstReq(memReq); ++ // Translate the instruction request. ++//#if FULL_SYSTEM ++ fault = cpu->translateInstReq(memReq[tid]); ++//#else ++// fault = pTable->translate(memReq[tid]); ++//#endif + + // In the case of faults, the fetch stage may need to stall and wait - // on what caused the fetch (ITB or Icache miss). ++ // for the ITB miss to be handled. + + // If translation was successful, attempt to read the first + // instruction. + if (fault == NoFault) { ++#if FULL_SYSTEM ++ if (cpu->system->memctrl->badaddr(memReq[tid]->paddr) || ++ memReq[tid]->flags & UNCACHEABLE) { ++ DPRINTF(Fetch, "Fetch: Bad address %#x (hopefully on a " ++ "misspeculating path)!", ++ memReq[tid]->paddr); ++ ret_fault = TheISA::genMachineCheckFault(); ++ return false; ++ } ++#endif ++ + DPRINTF(Fetch, "Fetch: Doing instruction read.\n"); - fault = cpu->mem->read(memReq, cacheData); ++ fault = cpu->mem->read(memReq[tid], cacheData[tid]); + // This read may change when the mem interface changes. + - fetchedCacheLines++; - } ++ // Now do the timing access to see whether or not the instruction ++ // exists within the cache. ++ if (icacheInterface && !icacheInterface->isBlocked()) { ++ DPRINTF(Fetch, "Doing cache access.\n"); + - // Now do the timing access to see whether or not the instruction - // exists within the cache. - if (icacheInterface && fault == NoFault) { - DPRINTF(Fetch, "Fetch: Doing timing memory access.\n"); - memReq->completionEvent = NULL; ++ memReq[tid]->completionEvent = NULL; + - memReq->time = curTick; ++ memReq[tid]->time = curTick; + - MemAccessResult result = icacheInterface->access(memReq); ++ MemAccessResult result = icacheInterface->access(memReq[tid]); + - // If the cache missed (in this model functional and timing - // memories are different), then schedule an event to wake - // up this stage once the cache miss completes. - if (result != MA_HIT && icacheInterface->doEvents()) { - memReq->completionEvent = new CacheCompletionEvent(this); ++ fetchedCacheLines++; + - // How does current model work as far as individual - // stages scheduling/unscheduling? - // Perhaps have only the main CPU scheduled/unscheduled, - // and have it choose what stages to run appropriately. ++ // If the cache missed, then schedule an event to wake ++ // up this stage once the cache miss completes. ++ // @todo: Possibly allow for longer than 1 cycle cache hits. ++ if (result != MA_HIT && icacheInterface->doEvents()) { + - DPRINTF(Fetch, "Fetch: Stalling due to icache miss.\n"); - _status = IcacheMissStall; ++ memReq[tid]->completionEvent = ++ new CacheCompletionEvent(memReq[tid], this); ++ ++ lastIcacheStall[tid] = curTick; ++ ++ DPRINTF(Activity, "[tid:%i]: Activity: Stalling due to I-cache " ++ "miss.\n", tid); ++ ++ fetchStatus[tid] = IcacheMissStall; ++ } else { ++ DPRINTF(Fetch, "[tid:%i]: I-Cache hit. Doing Instruction " ++ "read.\n", tid); ++ ++// memcpy(cacheData[tid], memReq[tid]->data, memReq[tid]->size); ++ } ++ } else { ++ DPRINTF(Fetch, "[tid:%i] Out of MSHRs!\n", tid); ++ ret_fault = NoFault; ++ return false; + } + } + - return fault; ++ ret_fault = fault; ++ return true; +} + +template +inline void - SimpleFetch::doSquash(const Addr &new_PC) ++DefaultFetch::doSquash(const Addr &new_PC, unsigned tid) +{ - DPRINTF(Fetch, "Fetch: Squashing, setting PC to: %#x.\n", new_PC); ++ DPRINTF(Fetch, "[tid:%i]: Squashing, setting PC to: %#x.\n", ++ tid, new_PC); + - cpu->setNextPC(new_PC + instSize); - cpu->setPC(new_PC); ++ PC[tid] = new_PC; ++ nextPC[tid] = new_PC + instSize; + + // Clear the icache miss if it's outstanding. - if (_status == IcacheMissStall && icacheInterface) { - DPRINTF(Fetch, "Fetch: Squashing outstanding Icache miss.\n"); - // @todo: Use an actual thread number here. - icacheInterface->squash(0); ++ if (fetchStatus[tid] == IcacheMissStall && icacheInterface) { ++ DPRINTF(Fetch, "[tid:%i]: Squashing outstanding Icache miss.\n", ++ tid); ++ memReq[tid] = NULL; + } + - _status = Squashing; ++ fetchStatus[tid] = Squashing; + + ++fetchSquashCycles; +} + +template +void - SimpleFetch::squashFromDecode(const Addr &new_PC, - const InstSeqNum &seq_num) ++DefaultFetch::squashFromDecode(const Addr &new_PC, ++ const InstSeqNum &seq_num, ++ unsigned tid) +{ - DPRINTF(Fetch, "Fetch: Squashing from decode.\n"); ++ DPRINTF(Fetch, "[tid:%i]: Squashing from decode.\n",tid); + - doSquash(new_PC); ++ doSquash(new_PC, tid); + + // Tell the CPU to remove any instructions that are in flight between + // fetch and decode. - cpu->removeInstsUntil(seq_num); ++ cpu->removeInstsUntil(seq_num, tid); ++} ++ ++template ++bool ++DefaultFetch::checkStall(unsigned tid) const ++{ ++ bool ret_val = false; ++ ++ if (cpu->contextSwitch) { ++ DPRINTF(Fetch,"[tid:%i]: Stalling for a context switch.\n",tid); ++ ret_val = true; ++ } else if (stalls[tid].decode) { ++ DPRINTF(Fetch,"[tid:%i]: Stall from Decode stage detected.\n",tid); ++ ret_val = true; ++ } else if (stalls[tid].rename) { ++ DPRINTF(Fetch,"[tid:%i]: Stall from Rename stage detected.\n",tid); ++ ret_val = true; ++ } else if (stalls[tid].iew) { ++ DPRINTF(Fetch,"[tid:%i]: Stall from IEW stage detected.\n",tid); ++ ret_val = true; ++ } else if (stalls[tid].commit) { ++ DPRINTF(Fetch,"[tid:%i]: Stall from Commit stage detected.\n",tid); ++ ret_val = true; ++ } ++ ++ return ret_val; ++} ++ ++template ++typename DefaultFetch::FetchStatus ++DefaultFetch::updateFetchStatus() ++{ ++ //Check Running ++ list::iterator threads = (*activeThreads).begin(); ++ ++ while (threads != (*activeThreads).end()) { ++ ++ unsigned tid = *threads++; ++ ++ if (fetchStatus[tid] == Running || ++ fetchStatus[tid] == Squashing || ++ fetchStatus[tid] == IcacheMissComplete) { ++ ++ if (_status == Inactive) { ++ DPRINTF(Activity, "[tid:%i]: Activating stage.\n",tid); ++ ++ if (fetchStatus[tid] == IcacheMissComplete) { ++ DPRINTF(Activity, "[tid:%i]: Activating fetch due to cache" ++ "completion\n",tid); ++ } ++ ++ cpu->activateStage(FullCPU::FetchIdx); ++ } ++ ++ return Active; ++ } ++ } ++ ++ // Stage is switching from active to inactive, notify CPU of it. ++ if (_status == Active) { ++ DPRINTF(Activity, "Deactivating stage.\n"); ++ ++ cpu->deactivateStage(FullCPU::FetchIdx); ++ } ++ ++ return Inactive; +} + +template +void - SimpleFetch::squash(const Addr &new_PC) ++DefaultFetch::squash(const Addr &new_PC, unsigned tid) +{ - DPRINTF(Fetch, "Fetch: Squash from commit.\n"); ++ DPRINTF(Fetch, "[tid:%u]: Squash from commit.\n",tid); + - doSquash(new_PC); ++ doSquash(new_PC, tid); + + // Tell the CPU to remove any instructions that are not in the ROB. - cpu->removeInstsNotInROB(); ++ cpu->removeInstsNotInROB(tid); +} + - template ++template +void - SimpleFetch::tick() ++DefaultFetch::tick() +{ ++ list::iterator threads = (*activeThreads).begin(); ++ bool status_change = false; ++ ++ wroteToTimeBuffer = false; ++ ++ while (threads != (*activeThreads).end()) { ++ unsigned tid = *threads++; ++ ++ // Check the signals for each thread to determine the proper status ++ // for each thread. ++ bool updated_status = checkSignalsAndUpdate(tid); ++ status_change = status_change || updated_status; ++ } ++ ++ DPRINTF(Fetch, "Running stage.\n"); ++ ++ // Reset the number of the instruction we're fetching. ++ numInst = 0; ++ ++ if (fromCommit->commitInfo[0].interruptPending) { ++ interruptPending = true; ++ } ++ if (fromCommit->commitInfo[0].clearInterrupt) { ++ interruptPending = false; ++ } ++ ++ for (threadFetched = 0; threadFetched < numFetchingThreads; ++ threadFetched++) { ++ // Fetch each of the actively fetching threads. ++ fetch(status_change); ++ } ++ ++ // Record number of instructions fetched this cycle for distribution. ++ fetchNisnDist.sample(numInst); ++ ++ if (status_change) { ++ // Change the fetch stage status if there was a status change. ++ _status = updateFetchStatus(); ++ } ++ ++ // If there was activity this cycle, inform the CPU of it. ++ if (wroteToTimeBuffer || cpu->contextSwitch) { ++ DPRINTF(Activity, "Activity this cycle.\n"); ++ ++ cpu->activityThisCycle(); ++ } ++} ++ ++template ++bool ++DefaultFetch::checkSignalsAndUpdate(unsigned tid) ++{ ++ // Update the per thread stall statuses. ++ if (fromDecode->decodeBlock[tid]) { ++ stalls[tid].decode = true; ++ } ++ ++ if (fromDecode->decodeUnblock[tid]) { ++ assert(stalls[tid].decode); ++ assert(!fromDecode->decodeBlock[tid]); ++ stalls[tid].decode = false; ++ } ++ ++ if (fromRename->renameBlock[tid]) { ++ stalls[tid].rename = true; ++ } ++ ++ if (fromRename->renameUnblock[tid]) { ++ assert(stalls[tid].rename); ++ assert(!fromRename->renameBlock[tid]); ++ stalls[tid].rename = false; ++ } ++ ++ if (fromIEW->iewBlock[tid]) { ++ stalls[tid].iew = true; ++ } ++ ++ if (fromIEW->iewUnblock[tid]) { ++ assert(stalls[tid].iew); ++ assert(!fromIEW->iewBlock[tid]); ++ stalls[tid].iew = false; ++ } ++ ++ if (fromCommit->commitBlock[tid]) { ++ stalls[tid].commit = true; ++ } ++ ++ if (fromCommit->commitUnblock[tid]) { ++ assert(stalls[tid].commit); ++ assert(!fromCommit->commitBlock[tid]); ++ stalls[tid].commit = false; ++ } ++ + // Check squash signals from commit. - if (fromCommit->commitInfo.squash) { - DPRINTF(Fetch, "Fetch: Squashing instructions due to squash " - "from commit.\n"); ++ if (fromCommit->commitInfo[tid].squash) { ++ ++ DPRINTF(Fetch, "[tid:%u]: Squashing instructions due to squash " ++ "from commit.\n",tid); + + // In any case, squash. - squash(fromCommit->commitInfo.nextPC); ++ squash(fromCommit->commitInfo[tid].nextPC,tid); + + // Also check if there's a mispredict that happened. - if (fromCommit->commitInfo.branchMispredict) { - branchPred.squash(fromCommit->commitInfo.doneSeqNum, - fromCommit->commitInfo.nextPC, - fromCommit->commitInfo.branchTaken); ++ if (fromCommit->commitInfo[tid].branchMispredict) { ++ branchPred.squash(fromCommit->commitInfo[tid].doneSeqNum, ++ fromCommit->commitInfo[tid].nextPC, ++ fromCommit->commitInfo[tid].branchTaken, ++ tid); + } else { - branchPred.squash(fromCommit->commitInfo.doneSeqNum); ++ branchPred.squash(fromCommit->commitInfo[tid].doneSeqNum, ++ tid); + } + - return; - } else if (fromCommit->commitInfo.doneSeqNum) { ++ return true; ++ } else if (fromCommit->commitInfo[tid].doneSeqNum) { + // Update the branch predictor if it wasn't a squashed instruction - // that was braodcasted. - branchPred.update(fromCommit->commitInfo.doneSeqNum); ++ // that was broadcasted. ++ branchPred.update(fromCommit->commitInfo[tid].doneSeqNum, tid); + } + + // Check ROB squash signals from commit. - if (fromCommit->commitInfo.robSquashing) { - DPRINTF(Fetch, "Fetch: ROB is still squashing.\n"); ++ if (fromCommit->commitInfo[tid].robSquashing) { ++ DPRINTF(Fetch, "[tid:%u]: ROB is still squashing Thread %u.\n", tid); + + // Continue to squash. - _status = Squashing; ++ fetchStatus[tid] = Squashing; + - ++fetchSquashCycles; - return; ++ return true; + } + + // Check squash signals from decode. - if (fromDecode->decodeInfo.squash) { - DPRINTF(Fetch, "Fetch: Squashing instructions due to squash " - "from decode.\n"); ++ if (fromDecode->decodeInfo[tid].squash) { ++ DPRINTF(Fetch, "[tid:%u]: Squashing instructions due to squash " ++ "from decode.\n",tid); + + // Update the branch predictor. - if (fromDecode->decodeInfo.branchMispredict) { - branchPred.squash(fromDecode->decodeInfo.doneSeqNum, - fromDecode->decodeInfo.nextPC, - fromDecode->decodeInfo.branchTaken); ++ if (fromDecode->decodeInfo[tid].branchMispredict) { ++ branchPred.squash(fromDecode->decodeInfo[tid].doneSeqNum, ++ fromDecode->decodeInfo[tid].nextPC, ++ fromDecode->decodeInfo[tid].branchTaken, ++ tid); + } else { - branchPred.squash(fromDecode->decodeInfo.doneSeqNum); ++ branchPred.squash(fromDecode->decodeInfo[tid].doneSeqNum, ++ tid); + } + - if (_status != Squashing) { - // Squash unless we're already squashing? - squashFromDecode(fromDecode->decodeInfo.nextPC, - fromDecode->decodeInfo.doneSeqNum); - return; ++ if (fetchStatus[tid] != Squashing) { ++ // Squash unless we're already squashing ++ squashFromDecode(fromDecode->decodeInfo[tid].nextPC, ++ fromDecode->decodeInfo[tid].doneSeqNum, ++ tid); ++ ++ return true; + } + } + - // Check if any of the stall signals are high. - if (fromDecode->decodeInfo.stall || - fromRename->renameInfo.stall || - fromIEW->iewInfo.stall || - fromCommit->commitInfo.stall) - { - // Block stage, regardless of current status. ++ if (checkStall(tid) && fetchStatus[tid] != IcacheMissStall) { ++ DPRINTF(Fetch, "[tid:%i]: Setting to blocked\n",tid); + - DPRINTF(Fetch, "Fetch: Stalling stage.\n"); - DPRINTF(Fetch, "Fetch: Statuses: Decode: %i Rename: %i IEW: %i " - "Commit: %i\n", - fromDecode->decodeInfo.stall, - fromRename->renameInfo.stall, - fromIEW->iewInfo.stall, - fromCommit->commitInfo.stall); ++ fetchStatus[tid] = Blocked; + - _status = Blocked; - - ++fetchBlockedCycles; - return; - } else if (_status == Blocked) { - // Unblock stage if status is currently blocked and none of the - // stall signals are being held high. - _status = Running; - - ++fetchBlockedCycles; - return; ++ return true; + } + - // If fetch has reached this point, then there are no squash signals - // still being held high. Check if fetch is in the squashing state; - // if so, fetch can switch to running. - // Similarly, there are no blocked signals still being held high. - // Check if fetch is in the blocked state; if so, fetch can switch to - // running. - if (_status == Squashing) { - DPRINTF(Fetch, "Fetch: Done squashing, switching to running.\n"); - - // Switch status to running - _status = Running; ++ if (fetchStatus[tid] == Blocked || ++ fetchStatus[tid] == Squashing) { ++ // Switch status to running if fetch isn't being told to block or ++ // squash this cycle. ++ DPRINTF(Fetch, "[tid:%i]: Done squashing, switching to running.\n", ++ tid); + - ++fetchCycles; ++ fetchStatus[tid] = Running; + - fetch(); - } else if (_status != IcacheMissStall) { - DPRINTF(Fetch, "Fetch: Running stage.\n"); - - ++fetchCycles; - - fetch(); ++ return true; + } ++ ++ // If we've reached this point, we have not gotten any signals that ++ // cause fetch to change its status. Fetch remains the same as before. ++ return false; +} + +template +void - SimpleFetch::fetch() ++DefaultFetch::fetch(bool &status_change) +{ + ////////////////////////////////////////// + // Start actual fetch + ////////////////////////////////////////// ++ int tid = getFetchingThread(fetchPolicy); ++ ++ if (tid == -1) { ++ DPRINTF(Fetch,"There are no more threads available to fetch from.\n"); ++ ++ // Breaks looping condition in tick() ++ threadFetched = numFetchingThreads; ++ return; ++ } + + // The current PC. - Addr fetch_PC = cpu->readPC(); ++ Addr &fetch_PC = PC[tid]; + + // Fault code for memory access. + Fault fault = NoFault; + + // If returning from the delay of a cache miss, then update the status + // to running, otherwise do the cache access. Possibly move this up + // to tick() function. - if (_status == IcacheMissComplete) { - DPRINTF(Fetch, "Fetch: Icache miss is complete.\n"); - - // Reset the completion event to NULL. - memReq->completionEvent = NULL; - - _status = Running; ++ if (fetchStatus[tid] == IcacheMissComplete) { ++ DPRINTF(Fetch, "[tid:%i]: Icache miss is complete.\n", ++ tid); ++ ++ fetchStatus[tid] = Running; ++ status_change = true; ++ } else if (fetchStatus[tid] == Running) { ++ DPRINTF(Fetch, "[tid:%i]: Attempting to translate and read " ++ "instruction, starting at PC %08p.\n", ++ tid, fetch_PC); ++ ++ bool fetch_success = fetchCacheLine(fetch_PC, fault, tid); ++ if (!fetch_success) { ++ ++fetchMiscStallCycles; ++ return; ++ } + } else { - DPRINTF(Fetch, "Fetch: Attempting to translate and read " - "instruction, starting at PC %08p.\n", - fetch_PC); ++ if (fetchStatus[tid] == Idle) { ++ ++fetchIdleCycles; ++ } else if (fetchStatus[tid] == Blocked) { ++ ++fetchBlockedCycles; ++ } else if (fetchStatus[tid] == Squashing) { ++ ++fetchSquashCycles; ++ } else if (fetchStatus[tid] == IcacheMissStall) { ++ ++icacheStallCycles; ++ } + - fault = fetchCacheLine(fetch_PC); ++ // Status is Idle, Squashing, Blocked, or IcacheMissStall, so ++ // fetch should do nothing. ++ return; + } + - // If we had a stall due to an icache miss, then return. It'd - // be nicer if this were handled through the kind of fault that - // is returned by the function. - if (_status == IcacheMissStall) { ++ ++fetchCycles; ++ ++ // If we had a stall due to an icache miss, then return. ++ if (fetchStatus[tid] == IcacheMissStall) { ++ ++icacheStallCycles; ++ status_change = true; + return; + } + - // As far as timing goes, the CPU will need to send an event through - // the MemReq in order to be woken up once the memory access completes. - // Probably have a status on a per thread basis so each thread can - // block independently and be woken up independently. - + Addr next_PC = fetch_PC; + InstSeqNum inst_seq; + MachInst inst; - unsigned offset = fetch_PC & cacheBlkMask; - unsigned fetched; ++ ExtMachInst ext_inst; ++ // @todo: Fix this hack. ++ unsigned offset = (fetch_PC & cacheBlkMask) & ~3; + + if (fault == NoFault) { + // If the read of the first instruction was successful, then grab the + // instructions from the rest of the cache line and put them into the + // queue heading to decode. + - DPRINTF(Fetch, "Fetch: Adding instructions to queue to decode.\n"); - - ////////////////////////// - // Fetch first instruction - ////////////////////////// ++ DPRINTF(Fetch, "[tid:%i]: Adding instructions to queue to " ++ "decode.\n",tid); + + // Need to keep track of whether or not a predicted branch + // ended this fetch block. + bool predicted_branch = false; + - for (fetched = 0; ++ for (; + offset < cacheBlkSize && - fetched < fetchWidth && ++ numInst < fetchWidth && + !predicted_branch; - ++fetched) - { ++ ++numInst) { + + // Get a sequence number. + inst_seq = cpu->getAndIncrementInstSeq(); + + // Make sure this is a valid index. + assert(offset <= cacheBlkSize - instSize); + + // Get the instruction from the array of the cache line. + inst = gtoh(*reinterpret_cast - (&cacheData[offset])); ++ (&cacheData[tid][offset])); ++ ++ ext_inst = TheISA::makeExtMI(inst, fetch_PC); + + // Create a new DynInst from the instruction fetched. - DynInstPtr instruction = new DynInst(inst, fetch_PC, next_PC, ++ DynInstPtr instruction = new DynInst(ext_inst, fetch_PC, ++ next_PC, + inst_seq, cpu); ++ instruction->setThread(tid); ++ ++ instruction->setASID(tid); + - DPRINTF(Fetch, "Fetch: Instruction %i created, with PC %#x\n", - inst_seq, instruction->readPC()); ++ instruction->setState(cpu->thread[tid]); + - DPRINTF(Fetch, "Fetch: Instruction opcode is: %03p\n", - OPCODE(inst)); ++ DPRINTF(Fetch, "[tid:%i]: Instruction PC %#x created " ++ "[sn:%lli]\n", ++ tid, instruction->readPC(), inst_seq); ++ ++ DPRINTF(Fetch, "[tid:%i]: Instruction is: %s\n", ++ tid, instruction->staticInst->disassemble(fetch_PC)); + + instruction->traceData = - Trace::getInstRecord(curTick, cpu->xcBase(), cpu, ++ Trace::getInstRecord(curTick, cpu->xcBase(tid), cpu, + instruction->staticInst, - instruction->readPC(), 0); ++ instruction->readPC(),tid); + + predicted_branch = lookupAndUpdateNextPC(instruction, next_PC); + + // Add instruction to the CPU's list of instructions. - cpu->addInst(instruction); ++ instruction->setInstListIt(cpu->addInst(instruction)); + + // Write the instruction to the first slot in the queue + // that heads to decode. - toDecode->insts[fetched] = instruction; ++ toDecode->insts[numInst] = instruction; + + toDecode->size++; + + // Increment stat of fetched instructions. + ++fetchedInsts; + + // Move to the next instruction, unless we have a branch. + fetch_PC = next_PC; + ++ if (instruction->isQuiesce()) { ++ warn("%lli: Quiesce instruction encountered, halting fetch!", ++ curTick); ++ fetchStatus[tid] = QuiescePending; ++ ++numInst; ++ status_change = true; ++ break; ++ } ++ + offset+= instSize; + } ++ } + - fetch_nisn_dist.sample(fetched); ++ if (numInst > 0) { ++ wroteToTimeBuffer = true; + } + + // Now that fetching is completed, update the PC to signify what the next - // cycle will be. Might want to move this to the beginning of this - // function so that the PC updates at the beginning of everything. - // Or might want to leave setting the PC to the main CPU, with fetch - // only changing the nextPC (will require correct determination of - // next PC). ++ // cycle will be. + if (fault == NoFault) { - DPRINTF(Fetch, "Fetch: Setting PC to %08p.\n", next_PC); - cpu->setPC(next_PC); - cpu->setNextPC(next_PC + instSize); ++ DPRINTF(Fetch, "[tid:%i]: Setting PC to %08p.\n",tid, next_PC); ++ ++ PC[tid] = next_PC; ++ nextPC[tid] = next_PC + instSize; + } else { - // If the issue was an icache miss, then we can just return and - // wait until it is handled. - if (_status == IcacheMissStall) { - return; ++ // We shouldn't be in an icache miss and also have a fault (an ITB ++ // miss) ++ if (fetchStatus[tid] == IcacheMissStall) { ++ panic("Fetch should have exited prior to this!"); + } + - // Handle the fault. - // This stage will not be able to continue until all the ROB - // slots are empty, at which point the fault can be handled. - // The only other way it can wake up is if a squash comes along - // and changes the PC. Not sure how to handle that case...perhaps - // have it handled by the upper level CPU class which peeks into the - // time buffer and sees if a squash comes along, in which case it - // changes the status. ++ // Send the fault to commit. This thread will not do anything ++ // until commit handles the fault. The only other way it can ++ // wake up is if a squash comes along and changes the PC. ++#if FULL_SYSTEM ++ assert(numInst != fetchWidth); ++ // Get a sequence number. ++ inst_seq = cpu->getAndIncrementInstSeq(); ++ // We will use a nop in order to carry the fault. ++ ext_inst = TheISA::NoopMachInst; + - DPRINTF(Fetch, "Fetch: Blocked, need to handle the trap.\n"); ++ // Create a new DynInst from the dummy nop. ++ DynInstPtr instruction = new DynInst(ext_inst, fetch_PC, ++ next_PC, ++ inst_seq, cpu); ++ instruction->setPredTarg(next_PC + instSize); ++ instruction->setThread(tid); + - _status = Blocked; - #if FULL_SYSTEM - // cpu->trap(fault); - // Send a signal to the ROB indicating that there's a trap from the - // fetch stage that needs to be handled. Need to indicate that - // there's a fault, and the fault type. ++ instruction->setASID(tid); ++ ++ instruction->setState(cpu->thread[tid]); ++ ++ instruction->traceData = NULL; ++ ++ instruction->setInstListIt(cpu->addInst(instruction)); ++ ++ instruction->fault = fault; ++ ++ toDecode->insts[numInst] = instruction; ++ toDecode->size++; ++ ++ DPRINTF(Fetch, "[tid:%i]: Blocked, need to handle the trap.\n",tid); ++ ++ fetchStatus[tid] = TrapPending; ++ status_change = true; ++ ++ warn("%lli fault (%d) detected @ PC %08p", curTick, fault, PC[tid]); +#else // !FULL_SYSTEM - fatal("fault (%d) detected @ PC %08p", fault, cpu->readPC()); ++ fatal("fault (%d) detected @ PC %08p", fault, PC[tid]); +#endif // FULL_SYSTEM + } +} ++ ++ ++/////////////////////////////////////// ++// // ++// SMT FETCH POLICY MAINTAINED HERE // ++// // ++/////////////////////////////////////// ++template ++int ++DefaultFetch::getFetchingThread(FetchPriority &fetch_priority) ++{ ++ if (numThreads > 1) { ++ switch (fetch_priority) { ++ ++ case SingleThread: ++ return 0; ++ ++ case RoundRobin: ++ return roundRobin(); ++ ++ case IQ: ++ return iqCount(); ++ ++ case LSQ: ++ return lsqCount(); ++ ++ case Branch: ++ return branchCount(); ++ ++ default: ++ return -1; ++ } ++ } else { ++ int tid = *((*activeThreads).begin()); ++ ++ if (fetchStatus[tid] == Running || ++ fetchStatus[tid] == IcacheMissComplete || ++ fetchStatus[tid] == Idle) { ++ return tid; ++ } else { ++ return -1; ++ } ++ } ++ ++} ++ ++ ++template ++int ++DefaultFetch::roundRobin() ++{ ++ list::iterator pri_iter = priorityList.begin(); ++ list::iterator end = priorityList.end(); ++ ++ int high_pri; ++ ++ while (pri_iter != end) { ++ high_pri = *pri_iter; ++ ++ assert(high_pri <= numThreads); ++ ++ if (fetchStatus[high_pri] == Running || ++ fetchStatus[high_pri] == IcacheMissComplete || ++ fetchStatus[high_pri] == Idle) { ++ ++ priorityList.erase(pri_iter); ++ priorityList.push_back(high_pri); ++ ++ return high_pri; ++ } ++ ++ pri_iter++; ++ } ++ ++ return -1; ++} ++ ++template ++int ++DefaultFetch::iqCount() ++{ ++ priority_queue PQ; ++ ++ list::iterator threads = (*activeThreads).begin(); ++ ++ while (threads != (*activeThreads).end()) { ++ unsigned tid = *threads++; ++ ++ PQ.push(fromIEW->iewInfo[tid].iqCount); ++ } ++ ++ while (!PQ.empty()) { ++ ++ unsigned high_pri = PQ.top(); ++ ++ if (fetchStatus[high_pri] == Running || ++ fetchStatus[high_pri] == IcacheMissComplete || ++ fetchStatus[high_pri] == Idle) ++ return high_pri; ++ else ++ PQ.pop(); ++ ++ } ++ ++ return -1; ++} ++ ++template ++int ++DefaultFetch::lsqCount() ++{ ++ priority_queue PQ; ++ ++ ++ list::iterator threads = (*activeThreads).begin(); ++ ++ while (threads != (*activeThreads).end()) { ++ unsigned tid = *threads++; ++ ++ PQ.push(fromIEW->iewInfo[tid].ldstqCount); ++ } ++ ++ while (!PQ.empty()) { ++ ++ unsigned high_pri = PQ.top(); ++ ++ if (fetchStatus[high_pri] == Running || ++ fetchStatus[high_pri] == IcacheMissComplete || ++ fetchStatus[high_pri] == Idle) ++ return high_pri; ++ else ++ PQ.pop(); ++ ++ } ++ ++ return -1; ++} ++ ++template ++int ++DefaultFetch::branchCount() ++{ ++ list::iterator threads = (*activeThreads).begin(); ++ ++ return *threads; ++} diff --cc src/cpu/o3/free_list.cc index 6f0b4be1e,000000000..bd0f4f034 mode 100644,000000..100644 --- a/src/cpu/o3/free_list.cc +++ b/src/cpu/o3/free_list.cc @@@ -1,82 -1,0 +1,70 @@@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "base/trace.hh" + +#include "cpu/o3/free_list.hh" + - SimpleFreeList::SimpleFreeList(unsigned _numLogicalIntRegs, ++SimpleFreeList::SimpleFreeList(unsigned activeThreads, ++ unsigned _numLogicalIntRegs, + unsigned _numPhysicalIntRegs, + unsigned _numLogicalFloatRegs, + unsigned _numPhysicalFloatRegs) + : numLogicalIntRegs(_numLogicalIntRegs), + numPhysicalIntRegs(_numPhysicalIntRegs), + numLogicalFloatRegs(_numLogicalFloatRegs), + numPhysicalFloatRegs(_numPhysicalFloatRegs), + numPhysicalRegs(numPhysicalIntRegs + numPhysicalFloatRegs) +{ - DPRINTF(FreeList, "FreeList: Creating new free list object.\n"); - - // DEBUG stuff. - freeIntRegsScoreboard.resize(numPhysicalIntRegs); - - freeFloatRegsScoreboard.resize(numPhysicalRegs); - - for (PhysRegIndex i = 0; i < numLogicalIntRegs; ++i) { - freeIntRegsScoreboard[i] = 0; - } ++ DPRINTF(FreeList, "Creating new free list object.\n"); + + // Put all of the extra physical registers onto the free list. This + // means excluding all of the base logical registers. - for (PhysRegIndex i = numLogicalIntRegs; ++ for (PhysRegIndex i = numLogicalIntRegs * activeThreads; + i < numPhysicalIntRegs; ++i) + { + freeIntRegs.push(i); - - freeIntRegsScoreboard[i] = 1; - } - - for (PhysRegIndex i = 0; i < numPhysicalIntRegs + numLogicalFloatRegs; - ++i) - { - freeFloatRegsScoreboard[i] = 0; + } + + // Put all of the extra physical registers onto the free list. This + // means excluding all of the base logical registers. Because the + // float registers' indices start where the physical registers end, + // some math must be done to determine where the free registers start. - for (PhysRegIndex i = numPhysicalIntRegs + numLogicalFloatRegs; - i < numPhysicalRegs; ++i) ++ PhysRegIndex i = numPhysicalIntRegs + (numLogicalFloatRegs * activeThreads); ++ ++ for ( ; i < numPhysicalRegs; ++i) + { + freeFloatRegs.push(i); - - freeFloatRegsScoreboard[i] = 1; + } +} + ++std::string ++SimpleFreeList::name() const ++{ ++ return "cpu.freelist"; ++} diff --cc src/cpu/o3/free_list.hh index 0b85dba1e,000000000..29e84cd44 mode 100644,000000..100644 --- a/src/cpu/o3/free_list.hh +++ b/src/cpu/o3/free_list.hh @@@ -1,195 -1,0 +1,189 @@@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + - #ifndef __CPU_O3_CPU_FREE_LIST_HH__ - #define __CPU_O3_CPU_FREE_LIST_HH__ ++#ifndef __CPU_O3_FREE_LIST_HH__ ++#define __CPU_O3_FREE_LIST_HH__ + +#include +#include + +#include "arch/isa_traits.hh" +#include "base/trace.hh" +#include "base/traceflags.hh" +#include "cpu/o3/comm.hh" + +/** + * FreeList class that simply holds the list of free integer and floating + * point registers. Can request for a free register of either type, and + * also send back free registers of either type. This is a very simple + * class, but it should be sufficient for most implementations. Like all + * other classes, it assumes that the indices for the floating point + * registers starts after the integer registers end. Hence the variable + * numPhysicalIntRegs is logically equivalent to the baseFP dependency. - * Note that - * while this most likely should be called FreeList, the name "FreeList" - * is used in a typedef within the CPU Policy, and therefore no class - * can be named simply "FreeList". ++ * Note that while this most likely should be called FreeList, the name ++ * "FreeList" is used in a typedef within the CPU Policy, and therefore no ++ * class can be named simply "FreeList". + * @todo: Give a better name to the base FP dependency. + */ +class SimpleFreeList +{ + private: + /** The list of free integer registers. */ + std::queue freeIntRegs; + + /** The list of free floating point registers. */ + std::queue freeFloatRegs; + + /** Number of logical integer registers. */ + int numLogicalIntRegs; + + /** Number of physical integer registers. */ + int numPhysicalIntRegs; + + /** Number of logical floating point registers. */ + int numLogicalFloatRegs; + + /** Number of physical floating point registers. */ + int numPhysicalFloatRegs; + + /** Total number of physical registers. */ + int numPhysicalRegs; + - /** DEBUG stuff below. */ - std::vector freeIntRegsScoreboard; - - std::vector freeFloatRegsScoreboard; - + public: - SimpleFreeList(unsigned _numLogicalIntRegs, ++ /** Constructs a free list. ++ * @param activeThreads Number of active threads. ++ * @param _numLogicalIntRegs Number of logical integer registers. ++ * @param _numPhysicalIntRegs Number of physical integer registers. ++ * @param _numLogicalFloatRegs Number of logical fp registers. ++ * @param _numPhysicalFloatRegs Number of physical fp registers. ++ */ ++ SimpleFreeList(unsigned activeThreads, ++ unsigned _numLogicalIntRegs, + unsigned _numPhysicalIntRegs, + unsigned _numLogicalFloatRegs, + unsigned _numPhysicalFloatRegs); + ++ /** Gives the name of the freelist. */ ++ std::string name() const; ++ ++ /** Gets a free integer register. */ + inline PhysRegIndex getIntReg(); + ++ /** Gets a free fp register. */ + inline PhysRegIndex getFloatReg(); + ++ /** Adds a register back to the free list. */ + inline void addReg(PhysRegIndex freed_reg); + ++ /** Adds an integer register back to the free list. */ + inline void addIntReg(PhysRegIndex freed_reg); + ++ /** Adds a fp register back to the free list. */ + inline void addFloatReg(PhysRegIndex freed_reg); + ++ /** Checks if there are any free integer registers. */ + bool hasFreeIntRegs() + { return !freeIntRegs.empty(); } + ++ /** Checks if there are any free fp registers. */ + bool hasFreeFloatRegs() + { return !freeFloatRegs.empty(); } + ++ /** Returns the number of free integer registers. */ + int numFreeIntRegs() + { return freeIntRegs.size(); } + ++ /** Returns the number of free fp registers. */ + int numFreeFloatRegs() + { return freeFloatRegs.size(); } +}; + +inline PhysRegIndex +SimpleFreeList::getIntReg() +{ - DPRINTF(Rename, "FreeList: Trying to get free integer register.\n"); ++ DPRINTF(FreeList, "Trying to get free integer register.\n"); ++ + if (freeIntRegs.empty()) { + panic("No free integer registers!"); + } + + PhysRegIndex free_reg = freeIntRegs.front(); + + freeIntRegs.pop(); + - // DEBUG - assert(freeIntRegsScoreboard[free_reg]); - freeIntRegsScoreboard[free_reg] = 0; - + return(free_reg); +} + +inline PhysRegIndex +SimpleFreeList::getFloatReg() +{ - DPRINTF(Rename, "FreeList: Trying to get free float register.\n"); ++ DPRINTF(FreeList, "Trying to get free float register.\n"); ++ + if (freeFloatRegs.empty()) { + panic("No free integer registers!"); + } + + PhysRegIndex free_reg = freeFloatRegs.front(); + + freeFloatRegs.pop(); + - // DEBUG - assert(freeFloatRegsScoreboard[free_reg]); - freeFloatRegsScoreboard[free_reg] = 0; - + return(free_reg); +} + +inline void +SimpleFreeList::addReg(PhysRegIndex freed_reg) +{ - DPRINTF(Rename, "Freelist: Freeing register %i.\n", freed_reg); ++ DPRINTF(FreeList,"Freeing register %i.\n", freed_reg); + //Might want to add in a check for whether or not this register is + //already in there. A bit vector or something similar would be useful. + if (freed_reg < numPhysicalIntRegs) { - freeIntRegs.push(freed_reg); - - // DEBUG - assert(freeIntRegsScoreboard[freed_reg] == false); - freeIntRegsScoreboard[freed_reg] = 1; ++ if (freed_reg != TheISA::ZeroReg) ++ freeIntRegs.push(freed_reg); + } else if (freed_reg < numPhysicalRegs) { - freeFloatRegs.push(freed_reg); - - // DEBUG - assert(freeFloatRegsScoreboard[freed_reg] == false); - freeFloatRegsScoreboard[freed_reg] = 1; ++ if (freed_reg != (TheISA::ZeroReg + numPhysicalIntRegs)) ++ freeFloatRegs.push(freed_reg); + } +} + +inline void +SimpleFreeList::addIntReg(PhysRegIndex freed_reg) +{ - DPRINTF(Rename, "Freelist: Freeing int register %i.\n", freed_reg); - - // DEBUG - assert(!freeIntRegsScoreboard[freed_reg]); - freeIntRegsScoreboard[freed_reg] = 1; ++ DPRINTF(FreeList,"Freeing int register %i.\n", freed_reg); + + freeIntRegs.push(freed_reg); +} + +inline void +SimpleFreeList::addFloatReg(PhysRegIndex freed_reg) +{ - DPRINTF(Rename, "Freelist: Freeing float register %i.\n", freed_reg); - - // DEBUG - assert(!freeFloatRegsScoreboard[freed_reg]); - freeFloatRegsScoreboard[freed_reg] = 1; ++ DPRINTF(FreeList,"Freeing float register %i.\n", freed_reg); + + freeFloatRegs.push(freed_reg); +} + - #endif // __CPU_O3_CPU_FREE_LIST_HH__ ++#endif // __CPU_O3_FREE_LIST_HH__ diff --cc src/cpu/o3/iew.cc index 45b5610e7,000000000..90d035f71 mode 100644,000000..100644 --- a/src/cpu/o3/iew.cc +++ b/src/cpu/o3/iew.cc @@@ -1,34 -1,0 +1,34 @@@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cpu/o3/alpha_dyn_inst.hh" +#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/iew_impl.hh" +#include "cpu/o3/inst_queue.hh" + - template class SimpleIEW; ++template class DefaultIEW; diff --cc src/cpu/o3/iew.hh index 1e370d4e6,000000000..935320628 mode 100644,000000..100644 --- a/src/cpu/o3/iew.hh +++ b/src/cpu/o3/iew.hh @@@ -1,239 -1,0 +1,501 @@@ +/* - * Copyright (c) 2004-2005 The Regents of The University of Michigan ++ * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + - //Todo: Update with statuses. - //Need to handle delaying writes to the writeback bus if it's full at the - //given time. - - #ifndef __CPU_O3_CPU_SIMPLE_IEW_HH__ - #define __CPU_O3_CPU_SIMPLE_IEW_HH__ ++#ifndef __CPU_O3_IEW_HH__ ++#define __CPU_O3_IEW_HH__ + +#include + - #include "config/full_system.hh" +#include "base/statistics.hh" +#include "base/timebuf.hh" ++#include "config/full_system.hh" +#include "cpu/o3/comm.hh" - ++#include "cpu/o3/scoreboard.hh" ++#include "cpu/o3/lsq.hh" ++ ++class FUPool; ++ ++/** ++ * DefaultIEW handles both single threaded and SMT IEW ++ * (issue/execute/writeback). It handles the dispatching of ++ * instructions to the LSQ/IQ as part of the issue stage, and has the ++ * IQ try to issue instructions each cycle. The execute latency is ++ * actually tied into the issue latency to allow the IQ to be able to ++ * do back-to-back scheduling without having to speculatively schedule ++ * instructions. This happens by having the IQ have access to the ++ * functional units, and the IQ gets the execution latencies from the ++ * FUs when it issues instructions. Instructions reach the execute ++ * stage on the last cycle of their execution, which is when the IQ ++ * knows to wake up any dependent instructions, allowing back to back ++ * scheduling. The execute portion of IEW separates memory ++ * instructions from non-memory instructions, either telling the LSQ ++ * to execute the instruction, or executing the instruction directly. ++ * The writeback portion of IEW completes the instructions by waking ++ * up any dependents, and marking the register ready on the ++ * scoreboard. ++ */ +template - class SimpleIEW ++class DefaultIEW +{ + private: + //Typedefs from Impl + typedef typename Impl::CPUPol CPUPol; + typedef typename Impl::DynInstPtr DynInstPtr; + typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::Params Params; + + typedef typename CPUPol::IQ IQ; + typedef typename CPUPol::RenameMap RenameMap; - typedef typename CPUPol::LDSTQ LDSTQ; ++ typedef typename CPUPol::LSQ LSQ; + + typedef typename CPUPol::TimeStruct TimeStruct; + typedef typename CPUPol::IEWStruct IEWStruct; + typedef typename CPUPol::RenameStruct RenameStruct; + typedef typename CPUPol::IssueStruct IssueStruct; + + friend class Impl::FullCPU; ++ friend class CPUPol::IQ; ++ + public: ++ /** Overall IEW stage status. Used to determine if the CPU can ++ * deschedule itself due to a lack of activity. ++ */ + enum Status { ++ Active, ++ Inactive ++ }; ++ ++ /** Status for Issue, Execute, and Writeback stages. */ ++ enum StageStatus { + Running, + Blocked, + Idle, ++ StartSquash, + Squashing, + Unblocking + }; + + private: ++ /** Overall stage status. */ + Status _status; - Status _issueStatus; - Status _exeStatus; - Status _wbStatus; ++ /** Dispatch status. */ ++ StageStatus dispatchStatus[Impl::MaxThreads]; ++ /** Execute status. */ ++ StageStatus exeStatus; ++ /** Writeback status. */ ++ StageStatus wbStatus; + + public: - class WritebackEvent : public Event { ++ /** LdWriteback event for a load completion. */ ++ class LdWritebackEvent : public Event { + private: ++ /** Instruction that is writing back data to the register file. */ + DynInstPtr inst; - SimpleIEW *iewStage; ++ /** Pointer to IEW stage. */ ++ DefaultIEW *iewStage; + + public: - WritebackEvent(DynInstPtr &_inst, SimpleIEW *_iew); ++ /** Constructs a load writeback event. */ ++ LdWritebackEvent(DynInstPtr &_inst, DefaultIEW *_iew); + ++ /** Processes writeback event. */ + virtual void process(); ++ /** Returns the description of the writeback event. */ + virtual const char *description(); + }; + + public: - SimpleIEW(Params ¶ms); ++ /** Constructs a DefaultIEW with the given parameters. */ ++ DefaultIEW(Params *params); ++ ++ /** Returns the name of the DefaultIEW stage. */ ++ std::string name() const; + ++ /** Registers statistics. */ + void regStats(); + ++ /** Initializes stage; sends back the number of free IQ and LSQ entries. */ ++ void initStage(); ++ ++ /** Sets CPU pointer for IEW, IQ, and LSQ. */ + void setCPU(FullCPU *cpu_ptr); + ++ /** Sets main time buffer used for backwards communication. */ + void setTimeBuffer(TimeBuffer *tb_ptr); + ++ /** Sets time buffer for getting instructions coming from rename. */ + void setRenameQueue(TimeBuffer *rq_ptr); + ++ /** Sets time buffer to pass on instructions to commit. */ + void setIEWQueue(TimeBuffer *iq_ptr); + - void setRenameMap(RenameMap *rm_ptr); ++ /** Sets pointer to list of active threads. */ ++ void setActiveThreads(std::list *at_ptr); ++ ++ /** Sets pointer to the scoreboard. */ ++ void setScoreboard(Scoreboard *sb_ptr); ++ ++ void switchOut(); + - void squash(); ++ void doSwitchOut(); + - void squashDueToBranch(DynInstPtr &inst); ++ void takeOverFrom(); + - void squashDueToMem(DynInstPtr &inst); ++ bool isSwitchedOut() { return switchedOut; } + - void block(); ++ /** Sets page table pointer within LSQ. */ ++// void setPageTable(PageTable *pt_ptr); + - inline void unblock(); ++ /** Squashes instructions in IEW for a specific thread. */ ++ void squash(unsigned tid); + ++ /** Wakes all dependents of a completed instruction. */ + void wakeDependents(DynInstPtr &inst); + ++ /** Tells memory dependence unit that a memory instruction needs to be ++ * rescheduled. It will re-execute once replayMemInst() is called. ++ */ ++ void rescheduleMemInst(DynInstPtr &inst); ++ ++ /** Re-executes all rescheduled memory instructions. */ ++ void replayMemInst(DynInstPtr &inst); ++ ++ /** Sends an instruction to commit through the time buffer. */ + void instToCommit(DynInstPtr &inst); + ++ /** Inserts unused instructions of a thread into the skid buffer. */ ++ void skidInsert(unsigned tid); ++ ++ /** Returns the max of the number of entries in all of the skid buffers. */ ++ int skidCount(); ++ ++ /** Returns if all of the skid buffers are empty. */ ++ bool skidsEmpty(); ++ ++ /** Updates overall IEW status based on all of the stages' statuses. */ ++ void updateStatus(); ++ ++ /** Resets entries of the IQ and the LSQ. */ ++ void resetEntries(); ++ ++ /** Tells the CPU to wakeup if it has descheduled itself due to no ++ * activity. Used mainly by the LdWritebackEvent. ++ */ ++ void wakeCPU(); ++ ++ /** Reports to the CPU that there is activity this cycle. */ ++ void activityThisCycle(); ++ ++ /** Tells CPU that the IEW stage is active and running. */ ++ inline void activateStage(); ++ ++ /** Tells CPU that the IEW stage is inactive and idle. */ ++ inline void deactivateStage(); ++ ++ /** Returns if the LSQ has any stores to writeback. */ ++ bool hasStoresToWB() { return ldstQueue.hasStoresToWB(); } ++ + private: - void dispatchInsts(); ++ /** Sends commit proper information for a squash due to a branch ++ * mispredict. ++ */ ++ void squashDueToBranch(DynInstPtr &inst, unsigned thread_id); + ++ /** Sends commit proper information for a squash due to a memory order ++ * violation. ++ */ ++ void squashDueToMemOrder(DynInstPtr &inst, unsigned thread_id); ++ ++ /** Sends commit proper information for a squash due to memory becoming ++ * blocked (younger issued instructions must be retried). ++ */ ++ void squashDueToMemBlocked(DynInstPtr &inst, unsigned thread_id); ++ ++ /** Sets Dispatch to blocked, and signals back to other stages to block. */ ++ void block(unsigned thread_id); ++ ++ /** Unblocks Dispatch if the skid buffer is empty, and signals back to ++ * other stages to unblock. ++ */ ++ void unblock(unsigned thread_id); ++ ++ /** Determines proper actions to take given Dispatch's status. */ ++ void dispatch(unsigned tid); ++ ++ /** Dispatches instructions to IQ and LSQ. */ ++ void dispatchInsts(unsigned tid); ++ ++ /** Executes instructions. In the case of memory operations, it informs the ++ * LSQ to execute the instructions. Also handles any redirects that occur ++ * due to the executed instructions. ++ */ + void executeInsts(); + ++ /** Writebacks instructions. In our model, the instruction's execute() ++ * function atomically reads registers, executes, and writes registers. ++ * Thus this writeback only wakes up dependent instructions, and informs ++ * the scoreboard of registers becoming ready. ++ */ ++ void writebackInsts(); ++ ++ /** Returns the number of valid, non-squashed instructions coming from ++ * rename to dispatch. ++ */ ++ unsigned validInstsFromRename(); ++ ++ /** Reads the stall signals. */ ++ void readStallSignals(unsigned tid); ++ ++ /** Checks if any of the stall conditions are currently true. */ ++ bool checkStall(unsigned tid); ++ ++ /** Processes inputs and changes state accordingly. */ ++ void checkSignalsAndUpdate(unsigned tid); ++ ++ /** Sorts instructions coming from rename into lists separated by thread. */ ++ void sortInsts(); ++ + public: ++ /** Ticks IEW stage, causing Dispatch, the IQ, the LSQ, Execute, and ++ * Writeback to run for one cycle. ++ */ + void tick(); + - void iew(); ++ private: ++ void updateExeInstStats(DynInstPtr &inst); + - //Interfaces to objects inside and outside of IEW. - /** Time buffer interface. */ ++ /** Pointer to main time buffer used for backwards communication. */ + TimeBuffer *timeBuffer; + ++ /** Wire to write information heading to previous stages. */ ++ typename TimeBuffer::wire toFetch; ++ + /** Wire to get commit's output from backwards time buffer. */ + typename TimeBuffer::wire fromCommit; + + /** Wire to write information heading to previous stages. */ + typename TimeBuffer::wire toRename; + + /** Rename instruction queue interface. */ + TimeBuffer *renameQueue; + + /** Wire to get rename's output from rename queue. */ + typename TimeBuffer::wire fromRename; + + /** Issue stage queue. */ + TimeBuffer issueToExecQueue; + + /** Wire to read information from the issue stage time queue. */ + typename TimeBuffer::wire fromIssue; + + /** + * IEW stage time buffer. Holds ROB indices of instructions that + * can be marked as completed. + */ + TimeBuffer *iewQueue; + + /** Wire to write infromation heading to commit. */ + typename TimeBuffer::wire toCommit; + - //Will need internal queue to hold onto instructions coming from - //the rename stage in case of a stall. ++ /** Queue of all instructions coming from rename this cycle. */ ++ std::queue insts[Impl::MaxThreads]; ++ + /** Skid buffer between rename and IEW. */ - std::queue skidBuffer; ++ std::queue skidBuffer[Impl::MaxThreads]; + - protected: ++ /** Scoreboard pointer. */ ++ Scoreboard* scoreboard; ++ ++ public: + /** Instruction queue. */ + IQ instQueue; + - LDSTQ ldstQueue; ++ /** Load / store queue. */ ++ LSQ ldstQueue; + - #if !FULL_SYSTEM - public: - void lsqWriteback(); - #endif ++ /** Pointer to the functional unit pool. */ ++ FUPool *fuPool; + + private: - /** Pointer to rename map. Might not want this stage to directly - * access this though... ++ /** CPU pointer. */ ++ FullCPU *cpu; ++ ++ /** Records if IEW has written to the time buffer this cycle, so that the ++ * CPU can deschedule itself if there is no activity. + */ - RenameMap *renameMap; ++ bool wroteToTimeBuffer; + - /** CPU interface. */ - FullCPU *cpu; ++ /** Source of possible stalls. */ ++ struct Stalls { ++ bool commit; ++ }; ++ ++ /** Stages that are telling IEW to stall. */ ++ Stalls stalls[Impl::MaxThreads]; ++ ++ /** Debug function to print instructions that are issued this cycle. */ ++ void printAvailableInsts(); ++ ++ public: ++ /** Records if the LSQ needs to be updated on the next cycle, so that ++ * IEW knows if there will be activity on the next cycle. ++ */ ++ bool updateLSQNextCycle; + + private: ++ /** Records if there is a fetch redirect on this cycle for each thread. */ ++ bool fetchRedirect[Impl::MaxThreads]; ++ ++ /** Used to track if all instructions have been dispatched this cycle. ++ * If they have not, then blocking must have occurred, and the instructions ++ * would already be added to the skid buffer. ++ * @todo: Fix this hack. ++ */ ++ bool dispatchedAllInsts; ++ ++ /** Records if the queues have been changed (inserted or issued insts), ++ * so that IEW knows to broadcast the updated amount of free entries. ++ */ ++ bool updatedQueues; ++ + /** Commit to IEW delay, in ticks. */ + unsigned commitToIEWDelay; + + /** Rename to IEW delay, in ticks. */ + unsigned renameToIEWDelay; + + /** + * Issue to execute delay, in ticks. What this actually represents is + * the amount of time it takes for an instruction to wake up, be + * scheduled, and sent to a FU for execution. + */ + unsigned issueToExecuteDelay; + + /** Width of issue's read path, in instructions. The read path is both + * the skid buffer and the rename instruction queue. + * Note to self: is this really different than issueWidth? + */ + unsigned issueReadWidth; + + /** Width of issue, in instructions. */ + unsigned issueWidth; + + /** Width of execute, in instructions. Might make more sense to break + * down into FP vs int. + */ + unsigned executeWidth; + - /** Number of cycles stage has been squashing. Used so that the stage - * knows when it can start unblocking, which is when the previous stage - * has received the stall signal and clears up its outputs. ++ /** Index into queue of instructions being written back. */ ++ unsigned wbNumInst; ++ ++ /** Cycle number within the queue of instructions being written back. ++ * Used in case there are too many instructions writing back at the current ++ * cycle and writesbacks need to be scheduled for the future. See comments ++ * in instToCommit(). + */ - unsigned cyclesSquashing; ++ unsigned wbCycle; ++ ++ /** Number of active threads. */ ++ unsigned numThreads; ++ ++ /** Pointer to list of active threads. */ ++ std::list *activeThreads; ++ ++ /** Maximum size of the skid buffer. */ ++ unsigned skidBufferMax; + ++ bool switchedOut; ++ ++ /** Stat for total number of idle cycles. */ + Stats::Scalar<> iewIdleCycles; ++ /** Stat for total number of squashing cycles. */ + Stats::Scalar<> iewSquashCycles; ++ /** Stat for total number of blocking cycles. */ + Stats::Scalar<> iewBlockCycles; ++ /** Stat for total number of unblocking cycles. */ + Stats::Scalar<> iewUnblockCycles; - // Stats::Scalar<> iewWBInsts; ++ /** Stat for total number of instructions dispatched. */ + Stats::Scalar<> iewDispatchedInsts; ++ /** Stat for total number of squashed instructions dispatch skips. */ + Stats::Scalar<> iewDispSquashedInsts; ++ /** Stat for total number of dispatched load instructions. */ + Stats::Scalar<> iewDispLoadInsts; ++ /** Stat for total number of dispatched store instructions. */ + Stats::Scalar<> iewDispStoreInsts; ++ /** Stat for total number of dispatched non speculative instructions. */ + Stats::Scalar<> iewDispNonSpecInsts; ++ /** Stat for number of times the IQ becomes full. */ + Stats::Scalar<> iewIQFullEvents; ++ /** Stat for number of times the LSQ becomes full. */ ++ Stats::Scalar<> iewLSQFullEvents; ++ /** Stat for total number of executed instructions. */ + Stats::Scalar<> iewExecutedInsts; - Stats::Scalar<> iewExecLoadInsts; - Stats::Scalar<> iewExecStoreInsts; ++ /** Stat for total number of executed load instructions. */ ++ Stats::Vector<> iewExecLoadInsts; ++ /** Stat for total number of executed store instructions. */ ++// Stats::Scalar<> iewExecStoreInsts; ++ /** Stat for total number of squashed instructions skipped at execute. */ + Stats::Scalar<> iewExecSquashedInsts; ++ /** Stat for total number of memory ordering violation events. */ + Stats::Scalar<> memOrderViolationEvents; ++ /** Stat for total number of incorrect predicted taken branches. */ + Stats::Scalar<> predictedTakenIncorrect; ++ /** Stat for total number of incorrect predicted not taken branches. */ ++ Stats::Scalar<> predictedNotTakenIncorrect; ++ /** Stat for total number of mispredicted branches detected at execute. */ ++ Stats::Formula branchMispredicts; ++ ++ Stats::Vector<> exeSwp; ++ Stats::Vector<> exeNop; ++ Stats::Vector<> exeRefs; ++ Stats::Vector<> exeBranches; ++ ++// Stats::Vector<> issued_ops; ++/* ++ Stats::Vector<> stat_fu_busy; ++ Stats::Vector2d<> stat_fuBusy; ++ Stats::Vector<> dist_unissued; ++ Stats::Vector2d<> stat_issued_inst_type; ++*/ ++ Stats::Formula issueRate; ++ Stats::Formula iewExecStoreInsts; ++// Stats::Formula issue_op_rate; ++// Stats::Formula fu_busy_rate; ++ ++ Stats::Vector<> iewInstsToCommit; ++ Stats::Vector<> writebackCount; ++ Stats::Vector<> producerInst; ++ Stats::Vector<> consumerInst; ++ Stats::Vector<> wbPenalized; ++ ++ Stats::Formula wbRate; ++ Stats::Formula wbFanout; ++ Stats::Formula wbPenalizedRate; +}; + - #endif // __CPU_O3_CPU_IEW_HH__ ++#endif // __CPU_O3_IEW_HH__ diff --cc src/cpu/o3/iew_impl.hh index 85217dd10,000000000..b0137d7fc mode 100644,000000..100644 --- a/src/cpu/o3/iew_impl.hh +++ b/src/cpu/o3/iew_impl.hh @@@ -1,736 -1,0 +1,1578 @@@ +/* - * Copyright (c) 2004-2005 The Regents of The University of Michigan ++ * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +// @todo: Fix the instantaneous communication among all the stages within +// iew. There's a clear delay between issue and execute, yet backwards +// communication happens simultaneously. - // Update the statuses for each stage. + +#include + +#include "base/timebuf.hh" ++#include "cpu/o3/fu_pool.hh" +#include "cpu/o3/iew.hh" + ++using namespace std; ++ +template - SimpleIEW::WritebackEvent::WritebackEvent(DynInstPtr &_inst, - SimpleIEW *_iew) - : Event(&mainEventQueue, CPU_Tick_Pri), inst(_inst), iewStage(_iew) ++DefaultIEW::LdWritebackEvent::LdWritebackEvent(DynInstPtr &_inst, ++ DefaultIEW *_iew) ++ : Event(&mainEventQueue), inst(_inst), iewStage(_iew) +{ + this->setFlags(Event::AutoDelete); +} + +template +void - SimpleIEW::WritebackEvent::process() ++DefaultIEW::LdWritebackEvent::process() +{ - DPRINTF(IEW, "IEW: WRITEBACK EVENT!!!!\n"); ++ DPRINTF(IEW, "Load writeback event [sn:%lli]\n", inst->seqNum); ++ DPRINTF(Activity, "Activity: Ld Writeback event [sn:%lli]\n", inst->seqNum); ++ ++ //iewStage->ldstQueue.removeMSHR(inst->threadNumber,inst->seqNum); ++ ++ if (iewStage->isSwitchedOut()) { ++ inst = NULL; ++ return; ++ } else if (inst->isSquashed()) { ++ iewStage->wakeCPU(); ++ inst = NULL; ++ return; ++ } ++ ++ iewStage->wakeCPU(); ++ ++ if (!inst->isExecuted()) { ++ inst->setExecuted(); ++ ++ // Complete access to copy data to proper place. ++ if (inst->isStore()) { ++ inst->completeAcc(); ++ } ++ } + + // Need to insert instruction into queue to commit + iewStage->instToCommit(inst); - // Need to execute second half of the instruction, do actual writing to - // registers and such - inst->execute(); ++ ++ iewStage->activityThisCycle(); ++ ++ inst = NULL; +} + +template +const char * - SimpleIEW::WritebackEvent::description() ++DefaultIEW::LdWritebackEvent::description() +{ - return "LSQ writeback event"; ++ return "Load writeback event"; +} + +template - SimpleIEW::SimpleIEW(Params ¶ms) - : // Just make this time buffer really big for now ++DefaultIEW::DefaultIEW(Params *params) ++ : // @todo: Make this into a parameter. + issueToExecQueue(5, 5), + instQueue(params), + ldstQueue(params), - commitToIEWDelay(params.commitToIEWDelay), - renameToIEWDelay(params.renameToIEWDelay), - issueToExecuteDelay(params.issueToExecuteDelay), - issueReadWidth(params.issueWidth), - issueWidth(params.issueWidth), - executeWidth(params.executeWidth) - { - DPRINTF(IEW, "IEW: executeIntWidth: %i.\n", params.executeIntWidth); - _status = Idle; - _issueStatus = Idle; - _exeStatus = Idle; - _wbStatus = Idle; ++ fuPool(params->fuPool), ++ commitToIEWDelay(params->commitToIEWDelay), ++ renameToIEWDelay(params->renameToIEWDelay), ++ issueToExecuteDelay(params->issueToExecuteDelay), ++ issueReadWidth(params->issueWidth), ++ issueWidth(params->issueWidth), ++ executeWidth(params->executeWidth), ++ numThreads(params->numberOfThreads), ++ switchedOut(false) ++{ ++ _status = Active; ++ exeStatus = Running; ++ wbStatus = Idle; + + // Setup wire to read instructions coming from issue. + fromIssue = issueToExecQueue.getWire(-issueToExecuteDelay); + + // Instruction queue needs the queue between issue and execute. + instQueue.setIssueToExecuteQueue(&issueToExecQueue); + ++ instQueue.setIEW(this); + ldstQueue.setIEW(this); ++ ++ for (int i=0; i < numThreads; i++) { ++ dispatchStatus[i] = Running; ++ stalls[i].commit = false; ++ fetchRedirect[i] = false; ++ } ++ ++ updateLSQNextCycle = false; ++ ++ skidBufferMax = (3 * (renameToIEWDelay * params->renameWidth)) + issueWidth; ++} ++ ++template ++std::string ++DefaultIEW::name() const ++{ ++ return cpu->name() + ".iew"; +} + +template +void - SimpleIEW::regStats() ++DefaultIEW::regStats() +{ ++ using namespace Stats; ++ + instQueue.regStats(); + + iewIdleCycles + .name(name() + ".iewIdleCycles") + .desc("Number of cycles IEW is idle"); + + iewSquashCycles + .name(name() + ".iewSquashCycles") + .desc("Number of cycles IEW is squashing"); + + iewBlockCycles + .name(name() + ".iewBlockCycles") + .desc("Number of cycles IEW is blocking"); + + iewUnblockCycles + .name(name() + ".iewUnblockCycles") + .desc("Number of cycles IEW is unblocking"); + - // iewWBInsts; - + iewDispatchedInsts + .name(name() + ".iewDispatchedInsts") + .desc("Number of instructions dispatched to IQ"); + + iewDispSquashedInsts + .name(name() + ".iewDispSquashedInsts") + .desc("Number of squashed instructions skipped by dispatch"); + + iewDispLoadInsts + .name(name() + ".iewDispLoadInsts") + .desc("Number of dispatched load instructions"); + + iewDispStoreInsts + .name(name() + ".iewDispStoreInsts") + .desc("Number of dispatched store instructions"); + + iewDispNonSpecInsts + .name(name() + ".iewDispNonSpecInsts") + .desc("Number of dispatched non-speculative instructions"); + + iewIQFullEvents + .name(name() + ".iewIQFullEvents") + .desc("Number of times the IQ has become full, causing a stall"); + ++ iewLSQFullEvents ++ .name(name() + ".iewLSQFullEvents") ++ .desc("Number of times the LSQ has become full, causing a stall"); ++ + iewExecutedInsts + .name(name() + ".iewExecutedInsts") + .desc("Number of executed instructions"); + + iewExecLoadInsts ++ .init(cpu->number_of_threads) + .name(name() + ".iewExecLoadInsts") - .desc("Number of load instructions executed"); - - iewExecStoreInsts - .name(name() + ".iewExecStoreInsts") - .desc("Number of store instructions executed"); ++ .desc("Number of load instructions executed") ++ .flags(total); + + iewExecSquashedInsts + .name(name() + ".iewExecSquashedInsts") + .desc("Number of squashed instructions skipped in execute"); + + memOrderViolationEvents + .name(name() + ".memOrderViolationEvents") + .desc("Number of memory order violations"); + + predictedTakenIncorrect + .name(name() + ".predictedTakenIncorrect") + .desc("Number of branches that were predicted taken incorrectly"); ++ ++ predictedNotTakenIncorrect ++ .name(name() + ".predictedNotTakenIncorrect") ++ .desc("Number of branches that were predicted not taken incorrectly"); ++ ++ branchMispredicts ++ .name(name() + ".branchMispredicts") ++ .desc("Number of branch mispredicts detected at execute"); ++ ++ branchMispredicts = predictedTakenIncorrect + predictedNotTakenIncorrect; ++ ++ exeSwp ++ .init(cpu->number_of_threads) ++ .name(name() + ".EXEC:swp") ++ .desc("number of swp insts executed") ++ .flags(total) ++ ; ++ ++ exeNop ++ .init(cpu->number_of_threads) ++ .name(name() + ".EXEC:nop") ++ .desc("number of nop insts executed") ++ .flags(total) ++ ; ++ ++ exeRefs ++ .init(cpu->number_of_threads) ++ .name(name() + ".EXEC:refs") ++ .desc("number of memory reference insts executed") ++ .flags(total) ++ ; ++ ++ exeBranches ++ .init(cpu->number_of_threads) ++ .name(name() + ".EXEC:branches") ++ .desc("Number of branches executed") ++ .flags(total) ++ ; ++ ++ issueRate ++ .name(name() + ".EXEC:rate") ++ .desc("Inst execution rate") ++ .flags(total) ++ ; ++ issueRate = iewExecutedInsts / cpu->numCycles; ++ ++ iewExecStoreInsts ++ .name(name() + ".EXEC:stores") ++ .desc("Number of stores executed") ++ .flags(total) ++ ; ++ iewExecStoreInsts = exeRefs - iewExecLoadInsts; ++/* ++ for (int i=0; inumber_of_threads) ++ .name(name() + ".WB:sent") ++ .desc("cumulative count of insts sent to commit") ++ .flags(total) ++ ; ++ ++ writebackCount ++ .init(cpu->number_of_threads) ++ .name(name() + ".WB:count") ++ .desc("cumulative count of insts written-back") ++ .flags(total) ++ ; ++ ++ producerInst ++ .init(cpu->number_of_threads) ++ .name(name() + ".WB:producers") ++ .desc("num instructions producing a value") ++ .flags(total) ++ ; ++ ++ consumerInst ++ .init(cpu->number_of_threads) ++ .name(name() + ".WB:consumers") ++ .desc("num instructions consuming a value") ++ .flags(total) ++ ; ++ ++ wbPenalized ++ .init(cpu->number_of_threads) ++ .name(name() + ".WB:penalized") ++ .desc("number of instrctions required to write to 'other' IQ") ++ .flags(total) ++ ; ++ ++ wbPenalizedRate ++ .name(name() + ".WB:penalized_rate") ++ .desc ("fraction of instructions written-back that wrote to 'other' IQ") ++ .flags(total) ++ ; ++ ++ wbPenalizedRate = wbPenalized / writebackCount; ++ ++ wbFanout ++ .name(name() + ".WB:fanout") ++ .desc("average fanout of values written-back") ++ .flags(total) ++ ; ++ ++ wbFanout = producerInst / consumerInst; ++ ++ wbRate ++ .name(name() + ".WB:rate") ++ .desc("insts written-back per cycle") ++ .flags(total) ++ ; ++ wbRate = writebackCount / cpu->numCycles; +} + +template +void - SimpleIEW::setCPU(FullCPU *cpu_ptr) ++DefaultIEW::initStage() +{ - DPRINTF(IEW, "IEW: Setting CPU pointer.\n"); ++ for (int tid=0; tid < numThreads; tid++) { ++ toRename->iewInfo[tid].usedIQ = true; ++ toRename->iewInfo[tid].freeIQEntries = ++ instQueue.numFreeEntries(tid); ++ ++ toRename->iewInfo[tid].usedLSQ = true; ++ toRename->iewInfo[tid].freeLSQEntries = ++ ldstQueue.numFreeEntries(tid); ++ } ++} ++ ++template ++void ++DefaultIEW::setCPU(FullCPU *cpu_ptr) ++{ ++ DPRINTF(IEW, "Setting CPU pointer.\n"); + cpu = cpu_ptr; + + instQueue.setCPU(cpu_ptr); + ldstQueue.setCPU(cpu_ptr); ++ ++ cpu->activateStage(FullCPU::IEWIdx); +} + +template +void - SimpleIEW::setTimeBuffer(TimeBuffer *tb_ptr) ++DefaultIEW::setTimeBuffer(TimeBuffer *tb_ptr) +{ - DPRINTF(IEW, "IEW: Setting time buffer pointer.\n"); ++ DPRINTF(IEW, "Setting time buffer pointer.\n"); + timeBuffer = tb_ptr; + + // Setup wire to read information from time buffer, from commit. + fromCommit = timeBuffer->getWire(-commitToIEWDelay); + + // Setup wire to write information back to previous stages. + toRename = timeBuffer->getWire(0); + ++ toFetch = timeBuffer->getWire(0); ++ + // Instruction queue also needs main time buffer. + instQueue.setTimeBuffer(tb_ptr); +} + +template +void - SimpleIEW::setRenameQueue(TimeBuffer *rq_ptr) ++DefaultIEW::setRenameQueue(TimeBuffer *rq_ptr) +{ - DPRINTF(IEW, "IEW: Setting rename queue pointer.\n"); ++ DPRINTF(IEW, "Setting rename queue pointer.\n"); + renameQueue = rq_ptr; + + // Setup wire to read information from rename queue. + fromRename = renameQueue->getWire(-renameToIEWDelay); +} + +template +void - SimpleIEW::setIEWQueue(TimeBuffer *iq_ptr) ++DefaultIEW::setIEWQueue(TimeBuffer *iq_ptr) +{ - DPRINTF(IEW, "IEW: Setting IEW queue pointer.\n"); ++ DPRINTF(IEW, "Setting IEW queue pointer.\n"); + iewQueue = iq_ptr; + + // Setup wire to write instructions to commit. + toCommit = iewQueue->getWire(0); +} + +template +void - SimpleIEW::setRenameMap(RenameMap *rm_ptr) ++DefaultIEW::setActiveThreads(list *at_ptr) ++{ ++ DPRINTF(IEW, "Setting active threads list pointer.\n"); ++ activeThreads = at_ptr; ++ ++ ldstQueue.setActiveThreads(at_ptr); ++ instQueue.setActiveThreads(at_ptr); ++} ++ ++template ++void ++DefaultIEW::setScoreboard(Scoreboard *sb_ptr) ++{ ++ DPRINTF(IEW, "Setting scoreboard pointer.\n"); ++ scoreboard = sb_ptr; ++} ++ ++#if 0 ++template ++void ++DefaultIEW::setPageTable(PageTable *pt_ptr) ++{ ++ ldstQueue.setPageTable(pt_ptr); ++} ++#endif ++ ++template ++void ++DefaultIEW::switchOut() ++{ ++ cpu->signalSwitched(); ++} ++ ++template ++void ++DefaultIEW::doSwitchOut() +{ - DPRINTF(IEW, "IEW: Setting rename map pointer.\n"); - renameMap = rm_ptr; ++ switchedOut = true; ++ ++ instQueue.switchOut(); ++ ldstQueue.switchOut(); ++ fuPool->switchOut(); ++ ++ for (int i = 0; i < numThreads; i++) { ++ while (!insts[i].empty()) ++ insts[i].pop(); ++ while (!skidBuffer[i].empty()) ++ skidBuffer[i].pop(); ++ } ++} ++ ++template ++void ++DefaultIEW::takeOverFrom() ++{ ++ _status = Active; ++ exeStatus = Running; ++ wbStatus = Idle; ++ switchedOut = false; ++ ++ instQueue.takeOverFrom(); ++ ldstQueue.takeOverFrom(); ++ fuPool->takeOverFrom(); ++ ++ initStage(); ++ cpu->activityThisCycle(); ++ ++ for (int i=0; i < numThreads; i++) { ++ dispatchStatus[i] = Running; ++ stalls[i].commit = false; ++ fetchRedirect[i] = false; ++ } ++ ++ updateLSQNextCycle = false; ++ ++ // @todo: Fix hardcoded number ++ for (int i = 0; i < 6; ++i) { ++ issueToExecQueue.advance(); ++ } +} + +template +void - SimpleIEW::squash() ++DefaultIEW::squash(unsigned tid) +{ - DPRINTF(IEW, "IEW: Squashing all instructions.\n"); - _status = Squashing; ++ DPRINTF(IEW, "[tid:%i]: Squashing all instructions.\n", ++ tid); + + // Tell the IQ to start squashing. - instQueue.squash(); ++ instQueue.squash(tid); + + // Tell the LDSTQ to start squashing. - ldstQueue.squash(fromCommit->commitInfo.doneSeqNum); ++ ldstQueue.squash(fromCommit->commitInfo[tid].doneSeqNum, tid); ++ ++ updatedQueues = true; ++ ++ // Clear the skid buffer in case it has any data in it. ++ while (!skidBuffer[tid].empty()) { ++ ++ if (skidBuffer[tid].front()->isLoad() || ++ skidBuffer[tid].front()->isStore() ) { ++ toRename->iewInfo[tid].dispatchedToLSQ++; ++ } ++ ++ toRename->iewInfo[tid].dispatched++; ++ ++ skidBuffer[tid].pop(); ++ } ++ ++ while (!insts[tid].empty()) { ++ if (insts[tid].front()->isLoad() || ++ insts[tid].front()->isStore() ) { ++ toRename->iewInfo[tid].dispatchedToLSQ++; ++ } ++ ++ toRename->iewInfo[tid].dispatched++; ++ ++ insts[tid].pop(); ++ } +} + +template +void - SimpleIEW::squashDueToBranch(DynInstPtr &inst) - { - DPRINTF(IEW, "IEW: Squashing from a specific instruction, PC: %#x.\n", - inst->PC); - // Perhaps leave the squashing up to the ROB stage to tell it when to - // squash? - _status = Squashing; - - // Tell rename to squash through the time buffer. - toCommit->squash = true; - // Also send PC update information back to prior stages. - toCommit->squashedSeqNum = inst->seqNum; - toCommit->mispredPC = inst->readPC(); - toCommit->nextPC = inst->readNextPC(); - toCommit->branchMispredict = true; - // Prediction was incorrect, so send back inverse. - toCommit->branchTaken = inst->readNextPC() != ++DefaultIEW::squashDueToBranch(DynInstPtr &inst, unsigned tid) ++{ ++ DPRINTF(IEW, "[tid:%i]: Squashing from a specific instruction, PC: %#x " ++ "[sn:%i].\n", tid, inst->readPC(), inst->seqNum); ++ ++ toCommit->squash[tid] = true; ++ toCommit->squashedSeqNum[tid] = inst->seqNum; ++ toCommit->mispredPC[tid] = inst->readPC(); ++ toCommit->nextPC[tid] = inst->readNextPC(); ++ toCommit->branchMispredict[tid] = true; ++ toCommit->branchTaken[tid] = inst->readNextPC() != + (inst->readPC() + sizeof(TheISA::MachInst)); ++ ++ toCommit->includeSquashInst[tid] = false; ++ ++ wroteToTimeBuffer = true; ++} ++ ++template ++void ++DefaultIEW::squashDueToMemOrder(DynInstPtr &inst, unsigned tid) ++{ ++ DPRINTF(IEW, "[tid:%i]: Squashing from a specific instruction, " ++ "PC: %#x [sn:%i].\n", tid, inst->readPC(), inst->seqNum); ++ ++ toCommit->squash[tid] = true; ++ toCommit->squashedSeqNum[tid] = inst->seqNum; ++ toCommit->nextPC[tid] = inst->readNextPC(); ++ ++ toCommit->includeSquashInst[tid] = false; ++ ++ wroteToTimeBuffer = true; +} + +template +void - SimpleIEW::squashDueToMem(DynInstPtr &inst) ++DefaultIEW::squashDueToMemBlocked(DynInstPtr &inst, unsigned tid) +{ - DPRINTF(IEW, "IEW: Squashing from a specific instruction, PC: %#x.\n", - inst->PC); - // Perhaps leave the squashing up to the ROB stage to tell it when to - // squash? - _status = Squashing; ++ DPRINTF(IEW, "[tid:%i]: Memory blocked, squashing load and younger insts, " ++ "PC: %#x [sn:%i].\n", tid, inst->readPC(), inst->seqNum); ++ ++ toCommit->squash[tid] = true; ++ toCommit->squashedSeqNum[tid] = inst->seqNum; ++ toCommit->nextPC[tid] = inst->readPC(); ++ ++ toCommit->includeSquashInst[tid] = true; + - // Tell rename to squash through the time buffer. - toCommit->squash = true; - // Also send PC update information back to prior stages. - toCommit->squashedSeqNum = inst->seqNum; - toCommit->nextPC = inst->readNextPC(); ++ ldstQueue.setLoadBlockedHandled(tid); ++ ++ wroteToTimeBuffer = true; +} + +template +void - SimpleIEW::block() ++DefaultIEW::block(unsigned tid) +{ - DPRINTF(IEW, "IEW: Blocking.\n"); - // Set the status to Blocked. - _status = Blocked; ++ DPRINTF(IEW, "[tid:%u]: Blocking.\n", tid); ++ ++ if (dispatchStatus[tid] != Blocked && ++ dispatchStatus[tid] != Unblocking) { ++ toRename->iewBlock[tid] = true; ++ wroteToTimeBuffer = true; ++ } + + // Add the current inputs to the skid buffer so they can be + // reprocessed when this stage unblocks. - skidBuffer.push(*fromRename); ++ skidInsert(tid); + - // Note that this stage only signals previous stages to stall when - // it is the cause of the stall originates at this stage. Otherwise - // the previous stages are expected to check all possible stall signals. ++ dispatchStatus[tid] = Blocked; +} + +template - inline void - SimpleIEW::unblock() ++void ++DefaultIEW::unblock(unsigned tid) +{ - // Check if there's information in the skid buffer. If there is, then - // set status to unblocking, otherwise set it directly to running. - DPRINTF(IEW, "IEW: Reading instructions out of the skid " - "buffer.\n"); - // Remove the now processed instructions from the skid buffer. - skidBuffer.pop(); - - // If there's still information in the skid buffer, then - // continue to tell previous stages to stall. They will be - // able to restart once the skid buffer is empty. - if (!skidBuffer.empty()) { - toRename->iewInfo.stall = true; - } else { - DPRINTF(IEW, "IEW: Stage is done unblocking.\n"); - _status = Running; ++ DPRINTF(IEW, "[tid:%i]: Reading instructions out of the skid " ++ "buffer %u.\n",tid, tid); ++ ++ // If the skid bufffer is empty, signal back to previous stages to unblock. ++ // Also switch status to running. ++ if (skidBuffer[tid].empty()) { ++ toRename->iewUnblock[tid] = true; ++ wroteToTimeBuffer = true; ++ DPRINTF(IEW, "[tid:%i]: Done unblocking.\n",tid); ++ dispatchStatus[tid] = Running; + } +} + +template +void - SimpleIEW::wakeDependents(DynInstPtr &inst) ++DefaultIEW::wakeDependents(DynInstPtr &inst) +{ + instQueue.wakeDependents(inst); +} + ++template ++void ++DefaultIEW::rescheduleMemInst(DynInstPtr &inst) ++{ ++ instQueue.rescheduleMemInst(inst); ++} ++ ++template ++void ++DefaultIEW::replayMemInst(DynInstPtr &inst) ++{ ++ instQueue.replayMemInst(inst); ++} + +template +void - SimpleIEW::instToCommit(DynInstPtr &inst) ++DefaultIEW::instToCommit(DynInstPtr &inst) +{ ++ // First check the time slot that this instruction will write ++ // to. If there are free write ports at the time, then go ahead ++ // and write the instruction to that time. If there are not, ++ // keep looking back to see where's the first time there's a ++ // free slot. ++ while ((*iewQueue)[wbCycle].insts[wbNumInst]) { ++ ++wbNumInst; ++ if (wbNumInst == issueWidth) { ++ ++wbCycle; ++ wbNumInst = 0; ++ } + ++ assert(wbCycle < 5); ++ } ++ ++ // Add finished instruction to queue to commit. ++ (*iewQueue)[wbCycle].insts[wbNumInst] = inst; ++ (*iewQueue)[wbCycle].size++; +} + +template ++unsigned ++DefaultIEW::validInstsFromRename() ++{ ++ unsigned inst_count = 0; ++ ++ for (int i=0; isize; i++) { ++ if (!fromRename->insts[i]->squashed) ++ inst_count++; ++ } ++ ++ return inst_count; ++} ++ ++template +void - SimpleIEW::dispatchInsts() - { - //////////////////////////////////////// - // DISPATCH/ISSUE stage - //////////////////////////////////////// - - //Put into its own function? - //Add instructions to IQ if there are any instructions there - - // Check if there are any instructions coming from rename, and we're. - // not squashing. - if (fromRename->size > 0) { - int insts_to_add = fromRename->size; - - // Loop through the instructions, putting them in the instruction - // queue. - for (int inst_num = 0; inst_num < insts_to_add; ++inst_num) - { - DynInstPtr inst = fromRename->insts[inst_num]; - - // Make sure there's a valid instruction there. - assert(inst); - - DPRINTF(IEW, "IEW: Issue: Adding PC %#x to IQ.\n", - inst->readPC()); - - // Be sure to mark these instructions as ready so that the - // commit stage can go ahead and execute them, and mark - // them as issued so the IQ doesn't reprocess them. - if (inst->isSquashed()) { - ++iewDispSquashedInsts; - continue; - } else if (instQueue.isFull()) { - DPRINTF(IEW, "IEW: Issue: IQ has become full.\n"); - // Call function to start blocking. - block(); - // Tell previous stage to stall. - toRename->iewInfo.stall = true; - - ++iewIQFullEvents; - break; - } else if (inst->isLoad()) { - DPRINTF(IEW, "IEW: Issue: Memory instruction " - "encountered, adding to LDSTQ.\n"); - - // Reserve a spot in the load store queue for this - // memory access. - ldstQueue.insertLoad(inst); - - ++iewDispLoadInsts; - } else if (inst->isStore()) { - ldstQueue.insertStore(inst); ++DefaultIEW::skidInsert(unsigned tid) ++{ ++ DynInstPtr inst = NULL; + - ++iewDispStoreInsts; - } else if (inst->isNonSpeculative()) { - DPRINTF(IEW, "IEW: Issue: Nonspeculative instruction " - "encountered, skipping.\n"); ++ while (!insts[tid].empty()) { ++ inst = insts[tid].front(); + - // Same hack as with stores. - inst->setCanCommit(); ++ insts[tid].pop(); ++ ++ DPRINTF(Decode,"[tid:%i]: Inserting [sn:%lli] PC:%#x into " ++ "dispatch skidBuffer %i\n",tid, inst->seqNum, ++ inst->readPC(),tid); ++ ++ skidBuffer[tid].push(inst); ++ } ++ ++ assert(skidBuffer[tid].size() <= skidBufferMax && ++ "Skidbuffer Exceeded Max Size"); ++} ++ ++template ++int ++DefaultIEW::skidCount() ++{ ++ int max=0; ++ ++ list::iterator threads = (*activeThreads).begin(); ++ ++ while (threads != (*activeThreads).end()) { ++ unsigned thread_count = skidBuffer[*threads++].size(); ++ if (max < thread_count) ++ max = thread_count; ++ } ++ ++ return max; ++} ++ ++template ++bool ++DefaultIEW::skidsEmpty() ++{ ++ list::iterator threads = (*activeThreads).begin(); ++ ++ while (threads != (*activeThreads).end()) { ++ if (!skidBuffer[*threads++].empty()) ++ return false; ++ } ++ ++ return true; ++} ++ ++template ++void ++DefaultIEW::updateStatus() ++{ ++ bool any_unblocking = false; ++ ++ list::iterator threads = (*activeThreads).begin(); ++ ++ threads = (*activeThreads).begin(); ++ ++ while (threads != (*activeThreads).end()) { ++ unsigned tid = *threads++; ++ ++ if (dispatchStatus[tid] == Unblocking) { ++ any_unblocking = true; ++ break; ++ } ++ } ++ ++ // If there are no ready instructions waiting to be scheduled by the IQ, ++ // and there's no stores waiting to write back, and dispatch is not ++ // unblocking, then there is no internal activity for the IEW stage. ++ if (_status == Active && !instQueue.hasReadyInsts() && ++ !ldstQueue.willWB() && !any_unblocking) { ++ DPRINTF(IEW, "IEW switching to idle\n"); ++ ++ deactivateStage(); ++ ++ _status = Inactive; ++ } else if (_status == Inactive && (instQueue.hasReadyInsts() || ++ ldstQueue.willWB() || ++ any_unblocking)) { ++ // Otherwise there is internal activity. Set to active. ++ DPRINTF(IEW, "IEW switching to active\n"); ++ ++ activateStage(); ++ ++ _status = Active; ++ } ++} ++ ++template ++void ++DefaultIEW::resetEntries() ++{ ++ instQueue.resetEntries(); ++ ldstQueue.resetEntries(); ++} ++ ++template ++void ++DefaultIEW::readStallSignals(unsigned tid) ++{ ++ if (fromCommit->commitBlock[tid]) { ++ stalls[tid].commit = true; ++ } ++ ++ if (fromCommit->commitUnblock[tid]) { ++ assert(stalls[tid].commit); ++ stalls[tid].commit = false; ++ } ++} ++ ++template ++bool ++DefaultIEW::checkStall(unsigned tid) ++{ ++ bool ret_val(false); ++ ++ if (stalls[tid].commit) { ++ DPRINTF(IEW,"[tid:%i]: Stall from Commit stage detected.\n",tid); ++ ret_val = true; ++ } else if (instQueue.isFull(tid)) { ++ DPRINTF(IEW,"[tid:%i]: Stall: IQ is full.\n",tid); ++ ret_val = true; ++ } else if (ldstQueue.isFull(tid)) { ++ DPRINTF(IEW,"[tid:%i]: Stall: LSQ is full\n",tid); ++ ++ if (ldstQueue.numLoads(tid) > 0 ) { ++ ++ DPRINTF(IEW,"[tid:%i]: LSQ oldest load: [sn:%i] \n", ++ tid,ldstQueue.getLoadHeadSeqNum(tid)); ++ } ++ ++ if (ldstQueue.numStores(tid) > 0) { ++ ++ DPRINTF(IEW,"[tid:%i]: LSQ oldest store: [sn:%i] \n", ++ tid,ldstQueue.getStoreHeadSeqNum(tid)); ++ } ++ ++ ret_val = true; ++ } else if (ldstQueue.isStalled(tid)) { ++ DPRINTF(IEW,"[tid:%i]: Stall: LSQ stall detected.\n",tid); ++ ret_val = true; ++ } ++ ++ return ret_val; ++} ++ ++template ++void ++DefaultIEW::checkSignalsAndUpdate(unsigned tid) ++{ ++ // Check if there's a squash signal, squash if there is ++ // Check stall signals, block if there is. ++ // If status was Blocked ++ // if so then go to unblocking ++ // If status was Squashing ++ // check if squashing is not high. Switch to running this cycle. ++ ++ readStallSignals(tid); ++ ++ if (fromCommit->commitInfo[tid].squash) { ++ squash(tid); ++ ++ if (dispatchStatus[tid] == Blocked || ++ dispatchStatus[tid] == Unblocking) { ++ toRename->iewUnblock[tid] = true; ++ wroteToTimeBuffer = true; ++ } ++ ++ dispatchStatus[tid] = Squashing; ++ ++ fetchRedirect[tid] = false; ++ return; ++ } ++ ++ if (fromCommit->commitInfo[tid].robSquashing) { ++ DPRINTF(IEW, "[tid:%i]: ROB is still squashing.\n"); ++ ++ dispatchStatus[tid] = Squashing; ++ ++ return; ++ } ++ ++ if (checkStall(tid)) { ++ block(tid); ++ dispatchStatus[tid] = Blocked; ++ return; ++ } ++ ++ if (dispatchStatus[tid] == Blocked) { ++ // Status from previous cycle was blocked, but there are no more stall ++ // conditions. Switch over to unblocking. ++ DPRINTF(IEW, "[tid:%i]: Done blocking, switching to unblocking.\n", ++ tid); ++ ++ dispatchStatus[tid] = Unblocking; + - // Specificall insert it as nonspeculative. ++ unblock(tid); ++ ++ return; ++ } ++ ++ if (dispatchStatus[tid] == Squashing) { ++ // Switch status to running if rename isn't being told to block or ++ // squash this cycle. ++ DPRINTF(IEW, "[tid:%i]: Done squashing, switching to running.\n", ++ tid); ++ ++ dispatchStatus[tid] = Running; ++ ++ return; ++ } ++} ++ ++template ++void ++DefaultIEW::sortInsts() ++{ ++ int insts_from_rename = fromRename->size; ++#ifdef DEBUG ++ for (int i = 0; i < numThreads; i++) ++ assert(insts[i].empty()); ++#endif ++ for (int i = 0; i < insts_from_rename; ++i) { ++ insts[fromRename->insts[i]->threadNumber].push(fromRename->insts[i]); ++ } ++} ++ ++template ++void ++DefaultIEW::wakeCPU() ++{ ++ cpu->wakeCPU(); ++} ++ ++template ++void ++DefaultIEW::activityThisCycle() ++{ ++ DPRINTF(Activity, "Activity this cycle.\n"); ++ cpu->activityThisCycle(); ++} ++ ++template ++inline void ++DefaultIEW::activateStage() ++{ ++ DPRINTF(Activity, "Activating stage.\n"); ++ cpu->activateStage(FullCPU::IEWIdx); ++} ++ ++template ++inline void ++DefaultIEW::deactivateStage() ++{ ++ DPRINTF(Activity, "Deactivating stage.\n"); ++ cpu->deactivateStage(FullCPU::IEWIdx); ++} ++ ++template ++void ++DefaultIEW::dispatch(unsigned tid) ++{ ++ // If status is Running or idle, ++ // call dispatchInsts() ++ // If status is Unblocking, ++ // buffer any instructions coming from rename ++ // continue trying to empty skid buffer ++ // check if stall conditions have passed ++ ++ if (dispatchStatus[tid] == Blocked) { ++ ++iewBlockCycles; ++ ++ } else if (dispatchStatus[tid] == Squashing) { ++ ++iewSquashCycles; ++ } ++ ++ // Dispatch should try to dispatch as many instructions as its bandwidth ++ // will allow, as long as it is not currently blocked. ++ if (dispatchStatus[tid] == Running || ++ dispatchStatus[tid] == Idle) { ++ DPRINTF(IEW, "[tid:%i] Not blocked, so attempting to run " ++ "dispatch.\n", tid); ++ ++ dispatchInsts(tid); ++ } else if (dispatchStatus[tid] == Unblocking) { ++ // Make sure that the skid buffer has something in it if the ++ // status is unblocking. ++ assert(!skidsEmpty()); ++ ++ // If the status was unblocking, then instructions from the skid ++ // buffer were used. Remove those instructions and handle ++ // the rest of unblocking. ++ dispatchInsts(tid); ++ ++ ++iewUnblockCycles; ++ ++ if (validInstsFromRename() && dispatchedAllInsts) { ++ // Add the current inputs to the skid buffer so they can be ++ // reprocessed when this stage unblocks. ++ skidInsert(tid); ++ } ++ ++ unblock(tid); ++ } ++} ++ ++template ++void ++DefaultIEW::dispatchInsts(unsigned tid) ++{ ++ dispatchedAllInsts = true; ++ ++ // Obtain instructions from skid buffer if unblocking, or queue from rename ++ // otherwise. ++ std::queue &insts_to_dispatch = ++ dispatchStatus[tid] == Unblocking ? ++ skidBuffer[tid] : insts[tid]; ++ ++ int insts_to_add = insts_to_dispatch.size(); ++ ++ DynInstPtr inst; ++ bool add_to_iq = false; ++ int dis_num_inst = 0; ++ ++ // Loop through the instructions, putting them in the instruction ++ // queue. ++ for ( ; dis_num_inst < insts_to_add && ++ dis_num_inst < issueReadWidth; ++ ++dis_num_inst) ++ { ++ inst = insts_to_dispatch.front(); ++ ++ if (dispatchStatus[tid] == Unblocking) { ++ DPRINTF(IEW, "[tid:%i]: Issue: Examining instruction from skid " ++ "buffer\n", tid); ++ } ++ ++ // Make sure there's a valid instruction there. ++ assert(inst); ++ ++ DPRINTF(IEW, "[tid:%i]: Issue: Adding PC %#x [sn:%lli] [tid:%i] to " ++ "IQ.\n", ++ tid, inst->readPC(), inst->seqNum, inst->threadNumber); ++ ++ // Be sure to mark these instructions as ready so that the ++ // commit stage can go ahead and execute them, and mark ++ // them as issued so the IQ doesn't reprocess them. ++ ++ // Check for squashed instructions. ++ if (inst->isSquashed()) { ++ DPRINTF(IEW, "[tid:%i]: Issue: Squashed instruction encountered, " ++ "not adding to IQ.\n", tid); ++ ++ ++iewDispSquashedInsts; ++ ++ insts_to_dispatch.pop(); ++ ++ //Tell Rename That An Instruction has been processed ++ if (inst->isLoad() || inst->isStore()) { ++ toRename->iewInfo[tid].dispatchedToLSQ++; ++ } ++ toRename->iewInfo[tid].dispatched++; ++ ++ continue; ++ } ++ ++ // Check for full conditions. ++ if (instQueue.isFull(tid)) { ++ DPRINTF(IEW, "[tid:%i]: Issue: IQ has become full.\n", tid); ++ ++ // Call function to start blocking. ++ block(tid); ++ ++ // Set unblock to false. Special case where we are using ++ // skidbuffer (unblocking) instructions but then we still ++ // get full in the IQ. ++ toRename->iewUnblock[tid] = false; ++ ++ dispatchedAllInsts = false; ++ ++ ++iewIQFullEvents; ++ break; ++ } else if (ldstQueue.isFull(tid)) { ++ DPRINTF(IEW, "[tid:%i]: Issue: LSQ has become full.\n",tid); ++ ++ // Call function to start blocking. ++ block(tid); ++ ++ // Set unblock to false. Special case where we are using ++ // skidbuffer (unblocking) instructions but then we still ++ // get full in the IQ. ++ toRename->iewUnblock[tid] = false; ++ ++ dispatchedAllInsts = false; ++ ++ ++iewLSQFullEvents; ++ break; ++ } ++ ++ // Otherwise issue the instruction just fine. ++ if (inst->isLoad()) { ++ DPRINTF(IEW, "[tid:%i]: Issue: Memory instruction " ++ "encountered, adding to LSQ.\n", tid); ++ ++ // Reserve a spot in the load store queue for this ++ // memory access. ++ ldstQueue.insertLoad(inst); ++ ++ ++iewDispLoadInsts; ++ ++ add_to_iq = true; ++ ++ toRename->iewInfo[tid].dispatchedToLSQ++; ++ } else if (inst->isStore()) { ++ DPRINTF(IEW, "[tid:%i]: Issue: Memory instruction " ++ "encountered, adding to LSQ.\n", tid); ++ ++ ldstQueue.insertStore(inst); ++ ++ ++iewDispStoreInsts; ++ ++ if (inst->isStoreConditional()) { ++ // Store conditionals need to be set as "canCommit()" ++ // so that commit can process them when they reach the ++ // head of commit. ++ inst->setCanCommit(); + instQueue.insertNonSpec(inst); ++ add_to_iq = false; + + ++iewDispNonSpecInsts; ++ } else { ++ add_to_iq = true; ++ } + - continue; - } else if (inst->isNop()) { - DPRINTF(IEW, "IEW: Issue: Nop instruction encountered " - ", skipping.\n"); ++ toRename->iewInfo[tid].dispatchedToLSQ++; ++#if FULL_SYSTEM ++ } else if (inst->isMemBarrier() || inst->isWriteBarrier()) { ++ // Same as non-speculative stores. ++ inst->setCanCommit(); ++ instQueue.insertBarrier(inst); ++ add_to_iq = false; ++#endif ++ } else if (inst->isNonSpeculative()) { ++ DPRINTF(IEW, "[tid:%i]: Issue: Nonspeculative instruction " ++ "encountered, skipping.\n", tid); + - inst->setIssued(); - inst->setExecuted(); - inst->setCanCommit(); ++ // Same as non-speculative stores. ++ inst->setCanCommit(); + - instQueue.advanceTail(inst); ++ // Specifically insert it as nonspeculative. ++ instQueue.insertNonSpec(inst); + - continue; - } else if (inst->isExecuted()) { - assert(0 && "Instruction shouldn't be executed.\n"); - DPRINTF(IEW, "IEW: Issue: Executed branch encountered, " - "skipping.\n"); ++ ++iewDispNonSpecInsts; + - inst->setIssued(); - inst->setCanCommit(); ++ add_to_iq = false; ++ } else if (inst->isNop()) { ++ DPRINTF(IEW, "[tid:%i]: Issue: Nop instruction encountered, " ++ "skipping.\n", tid); + - instQueue.advanceTail(inst); ++ inst->setIssued(); ++ inst->setExecuted(); ++ inst->setCanCommit(); + - continue; - } ++ instQueue.recordProducer(inst); + - // If the instruction queue is not full, then add the - // instruction. - instQueue.insert(fromRename->insts[inst_num]); ++ exeNop[tid]++; ++ ++ add_to_iq = false; ++ } else if (inst->isExecuted()) { ++ assert(0 && "Instruction shouldn't be executed.\n"); ++ DPRINTF(IEW, "Issue: Executed branch encountered, " ++ "skipping.\n"); ++ ++ inst->setIssued(); ++ inst->setCanCommit(); ++ ++ instQueue.recordProducer(inst); ++ ++ add_to_iq = false; ++ } else { ++ add_to_iq = true; ++ } + - ++iewDispatchedInsts; ++ // If the instruction queue is not full, then add the ++ // instruction. ++ if (add_to_iq) { ++ instQueue.insert(inst); + } ++ ++ insts_to_dispatch.pop(); ++ ++ toRename->iewInfo[tid].dispatched++; ++ ++ ++iewDispatchedInsts; ++ } ++ ++ if (!insts_to_dispatch.empty()) { ++ DPRINTF(IEW,"[tid:%i]: Issue: Bandwidth Full. Blocking.\n"); ++ block(tid); ++ toRename->iewUnblock[tid] = false; ++ } ++ ++ if (dispatchStatus[tid] == Idle && dis_num_inst) { ++ dispatchStatus[tid] = Running; ++ ++ updatedQueues = true; + } ++ ++ dis_num_inst = 0; +} + +template +void - SimpleIEW::executeInsts() ++DefaultIEW::printAvailableInsts() +{ - //////////////////////////////////////// - //EXECUTE/WRITEBACK stage - //////////////////////////////////////// ++ int inst = 0; ++ ++ cout << "Available Instructions: "; ++ ++ while (fromIssue->insts[inst]) { + - //Put into its own function? - //Similarly should probably have separate execution for int vs FP. - // Above comment is handled by the issue queue only issuing a valid - // mix of int/fp instructions. - //Actually okay to just have one execution, buuuuuut will need - //somewhere that defines the execution latency of all instructions. - // @todo: Move to the FU pool used in the current full cpu. ++ if (inst%3==0) cout << "\n\t"; + - int fu_usage = 0; - bool fetch_redirect = false; - int inst_slot = 0; - int time_slot = 0; ++ cout << "PC: " << fromIssue->insts[inst]->readPC() ++ << " TN: " << fromIssue->insts[inst]->threadNumber ++ << " SN: " << fromIssue->insts[inst]->seqNum << " | "; ++ ++ inst++; ++ ++ } ++ ++ cout << "\n"; ++} ++ ++template ++void ++DefaultIEW::executeInsts() ++{ ++ wbNumInst = 0; ++ wbCycle = 0; ++ ++ list::iterator threads = (*activeThreads).begin(); ++ ++ while (threads != (*activeThreads).end()) { ++ unsigned tid = *threads++; ++ fetchRedirect[tid] = false; ++ } ++ ++#if 0 ++ printAvailableInsts(); ++#endif + + // Execute/writeback any instructions that are available. - for (int inst_num = 0; - fu_usage < executeWidth && /* Haven't exceeded available FU's. */ - inst_num < issueWidth && - fromIssue->insts[inst_num]; - ++inst_num) { ++ int insts_to_execute = fromIssue->size; ++ int inst_num = 0; ++ for (; inst_num < insts_to_execute; ++ ++inst_num) { + - DPRINTF(IEW, "IEW: Execute: Executing instructions from IQ.\n"); ++ DPRINTF(IEW, "Execute: Executing instructions from IQ.\n"); + - // Get instruction from issue's queue. - DynInstPtr inst = fromIssue->insts[inst_num]; ++ DynInstPtr inst = instQueue.getInstToExecute(); + - DPRINTF(IEW, "IEW: Execute: Processing PC %#x.\n", inst->readPC()); ++ DPRINTF(IEW, "Execute: Processing PC %#x, [tid:%i] [sn:%i].\n", ++ inst->readPC(), inst->threadNumber,inst->seqNum); + + // Check if the instruction is squashed; if so then skip it - // and don't count it towards the FU usage. + if (inst->isSquashed()) { - DPRINTF(IEW, "IEW: Execute: Instruction was squashed.\n"); ++ DPRINTF(IEW, "Execute: Instruction was squashed.\n"); + + // Consider this instruction executed so that commit can go + // ahead and retire the instruction. + inst->setExecuted(); + - toCommit->insts[inst_num] = inst; ++ // Not sure if I should set this here or just let commit try to ++ // commit any squashed instructions. I like the latter a bit more. ++ inst->setCanCommit(); + + ++iewExecSquashedInsts; + + continue; + } + - inst->setExecuted(); - - // If an instruction is executed, then count it towards FU usage. - ++fu_usage; ++ Fault fault = NoFault; + + // Execute instruction. + // Note that if the instruction faults, it will be handled + // at the commit stage. - if (inst->isMemRef()) { - DPRINTF(IEW, "IEW: Execute: Calculating address for memory " ++ if (inst->isMemRef() && ++ (!inst->isDataPrefetch() && !inst->isInstPrefetch())) { ++ DPRINTF(IEW, "Execute: Calculating address for memory " + "reference.\n"); + + // Tell the LDSTQ to execute this instruction (if it is a load). + if (inst->isLoad()) { - ldstQueue.executeLoad(inst); - - ++iewExecLoadInsts; ++ // Loads will mark themselves as executed, and their writeback ++ // event adds the instruction to the queue to commit ++ fault = ldstQueue.executeLoad(inst); + } else if (inst->isStore()) { + ldstQueue.executeStore(inst); + - ++iewExecStoreInsts; ++ // If the store had a fault then it may not have a mem req ++ if (inst->req && !(inst->req->flags & LOCKED)) { ++ inst->setExecuted(); ++ ++ instToCommit(inst); ++ } ++ ++ // Store conditionals will mark themselves as ++ // executed, and their writeback event will add the ++ // instruction to the queue to commit. + } else { - panic("IEW: Unexpected memory type!\n"); ++ panic("Unexpected memory type!\n"); + } + + } else { + inst->execute(); + - ++iewExecutedInsts; ++ inst->setExecuted(); ++ ++ instToCommit(inst); + } + - // First check the time slot that this instruction will write - // to. If there are free write ports at the time, then go ahead - // and write the instruction to that time. If there are not, - // keep looking back to see where's the first time there's a - // free slot. What happens if you run out of free spaces? - // For now naively assume that all instructions take one cycle. - // Otherwise would have to look into the time buffer based on the - // latency of the instruction. - (*iewQueue)[time_slot].insts[inst_slot]; - while ((*iewQueue)[time_slot].insts[inst_slot]) { - if (inst_slot < issueWidth) { - ++inst_slot; - } else { - ++time_slot; - inst_slot = 0; - } ++ updateExeInstStats(inst); + - assert(time_slot < 5); - } ++ // Check if branch prediction was correct, if not then we need ++ // to tell commit to squash in flight instructions. Only ++ // handle this if there hasn't already been something that ++ // redirects fetch in this group of instructions. + - // May actually have to work this out, especially with loads and stores ++ // This probably needs to prioritize the redirects if a different ++ // scheduler is used. Currently the scheduler schedules the oldest ++ // instruction first, so the branch resolution order will be correct. ++ unsigned tid = inst->threadNumber; + - // Add finished instruction to queue to commit. - (*iewQueue)[time_slot].insts[inst_slot] = inst; - (*iewQueue)[time_slot].size++; ++ if (!fetchRedirect[tid]) { + - // Check if branch was correct. This check happens after the - // instruction is added to the queue because even if the branch - // is mispredicted, the branch instruction itself is still valid. - // Only handle this if there hasn't already been something that - // redirects fetch in this group of instructions. - if (!fetch_redirect) { + if (inst->mispredicted()) { - fetch_redirect = true; ++ fetchRedirect[tid] = true; + - DPRINTF(IEW, "IEW: Execute: Branch mispredict detected.\n"); - DPRINTF(IEW, "IEW: Execute: Redirecting fetch to PC: %#x.\n", ++ DPRINTF(IEW, "Execute: Branch mispredict detected.\n"); ++ DPRINTF(IEW, "Execute: Redirecting fetch to PC: %#x.\n", + inst->nextPC); + + // If incorrect, then signal the ROB that it must be squashed. - squashDueToBranch(inst); ++ squashDueToBranch(inst, tid); + + if (inst->predTaken()) { + predictedTakenIncorrect++; ++ } else { ++ predictedNotTakenIncorrect++; + } - } else if (ldstQueue.violation()) { - fetch_redirect = true; ++ } else if (ldstQueue.violation(tid)) { ++ fetchRedirect[tid] = true; + - // Get the DynInst that caused the violation. - DynInstPtr violator = ldstQueue.getMemDepViolator(); ++ // If there was an ordering violation, then get the ++ // DynInst that caused the violation. Note that this ++ // clears the violation signal. ++ DynInstPtr violator; ++ violator = ldstQueue.getMemDepViolator(tid); + - DPRINTF(IEW, "IEW: LDSTQ detected a violation. Violator PC: " ++ DPRINTF(IEW, "LDSTQ detected a violation. Violator PC: " + "%#x, inst PC: %#x. Addr is: %#x.\n", + violator->readPC(), inst->readPC(), inst->physEffAddr); + + // Tell the instruction queue that a violation has occured. + instQueue.violation(inst, violator); + + // Squash. - squashDueToMem(inst); ++ squashDueToMemOrder(inst,tid); + + ++memOrderViolationEvents; ++ } else if (ldstQueue.loadBlocked(tid) && ++ !ldstQueue.isLoadBlockedHandled(tid)) { ++ fetchRedirect[tid] = true; ++ ++ DPRINTF(IEW, "Load operation couldn't execute because the " ++ "memory system is blocked. PC: %#x [sn:%lli]\n", ++ inst->readPC(), inst->seqNum); ++ ++ squashDueToMemBlocked(inst, tid); + } + } + } ++ ++ if (inst_num) { ++ if (exeStatus == Idle) { ++ exeStatus = Running; ++ } ++ ++ updatedQueues = true; ++ ++ cpu->activityThisCycle(); ++ } ++ ++ // Need to reset this in case a writeback event needs to write into the ++ // iew queue. That way the writeback event will write into the correct ++ // spot in the queue. ++ wbNumInst = 0; ++} ++ ++template ++void ++DefaultIEW::writebackInsts() ++{ ++ // Loop through the head of the time buffer and wake any ++ // dependents. These instructions are about to write back. Also ++ // mark scoreboard that this instruction is finally complete. ++ // Either have IEW have direct access to scoreboard, or have this ++ // as part of backwards communication. ++ for (int inst_num = 0; inst_num < issueWidth && ++ toCommit->insts[inst_num]; inst_num++) { ++ DynInstPtr inst = toCommit->insts[inst_num]; ++ int tid = inst->threadNumber; ++ ++ DPRINTF(IEW, "Sending instructions to commit, PC %#x.\n", ++ inst->readPC()); ++ ++ iewInstsToCommit[tid]++; ++ ++ // Some instructions will be sent to commit without having ++ // executed because they need commit to handle them. ++ // E.g. Uncached loads have not actually executed when they ++ // are first sent to commit. Instead commit must tell the LSQ ++ // when it's ready to execute the uncached load. ++ if (!inst->isSquashed() && inst->isExecuted()) { ++ int dependents = instQueue.wakeDependents(inst); ++ ++ for (int i = 0; i < inst->numDestRegs(); i++) { ++ //mark as Ready ++ DPRINTF(IEW,"Setting Destination Register %i\n", ++ inst->renamedDestRegIdx(i)); ++ scoreboard->setReg(inst->renamedDestRegIdx(i)); ++ } ++ ++ producerInst[tid]++; ++ consumerInst[tid]+= dependents; ++ writebackCount[tid]++; ++ } ++ } +} + +template +void - SimpleIEW::tick() ++DefaultIEW::tick() +{ - // Considering putting all the state-determining stuff in this section. ++ wbNumInst = 0; ++ wbCycle = 0; + - // Try to fill up issue queue with as many instructions as bandwidth - // allows. - // Decode should try to execute as many instructions as its bandwidth - // will allow, as long as it is not currently blocked. ++ wroteToTimeBuffer = false; ++ updatedQueues = false; + - // Check if the stage is in a running status. - if (_status != Blocked && _status != Squashing) { - DPRINTF(IEW, "IEW: Status is not blocked, attempting to run " - "stage.\n"); - iew(); ++ sortInsts(); + - // If it's currently unblocking, check to see if it should switch - // to running. - if (_status == Unblocking) { - unblock(); ++ // Free function units marked as being freed this cycle. ++ fuPool->processFreeUnits(); + - ++iewUnblockCycles; - } - } else if (_status == Squashing) { ++ list::iterator threads = (*activeThreads).begin(); + - DPRINTF(IEW, "IEW: Still squashing.\n"); ++ // Check stall and squash signals, dispatch any instructions. ++ while (threads != (*activeThreads).end()) { ++ unsigned tid = *threads++; + - // Check if stage should remain squashing. Stop squashing if the - // squash signal clears. - if (!fromCommit->commitInfo.squash && - !fromCommit->commitInfo.robSquashing) { - DPRINTF(IEW, "IEW: Done squashing, changing status to " - "running.\n"); ++ DPRINTF(IEW,"Issue: Processing [tid:%i]\n",tid); + - _status = Running; - instQueue.stopSquash(); - } else { - instQueue.doSquash(); - } ++ checkSignalsAndUpdate(tid); ++ dispatch(tid); ++ } + - ++iewSquashCycles; - } else if (_status == Blocked) { - // Continue to tell previous stage to stall. - toRename->iewInfo.stall = true; - - // Check if possible stall conditions have cleared. - if (!fromCommit->commitInfo.stall && - !instQueue.isFull()) { - DPRINTF(IEW, "IEW: Stall signals cleared, going to unblock.\n"); - _status = Unblocking; - } ++ if (exeStatus != Squashing) { ++ executeInsts(); + - // If there's still instructions coming from rename, continue to - // put them on the skid buffer. - if (fromRename->size == 0) { - block(); - } ++ writebackInsts(); + - if (fromCommit->commitInfo.squash || - fromCommit->commitInfo.robSquashing) { - squash(); - } ++ // Have the instruction queue try to schedule any ready instructions. ++ // (In actuality, this scheduling is for instructions that will ++ // be executed next cycle.) ++ instQueue.scheduleReadyInsts(); + - ++iewBlockCycles; ++ // Also should advance its own time buffers if the stage ran. ++ // Not the best place for it, but this works (hopefully). ++ issueToExecQueue.advance(); + } + - // @todo: Maybe put these at the beginning, so if it's idle it can - // return early. - // Write back number of free IQ entries here. - toRename->iewInfo.freeIQEntries = instQueue.numFreeEntries(); ++ bool broadcast_free_entries = false; ++ ++ if (updatedQueues || exeStatus == Running || updateLSQNextCycle) { ++ exeStatus = Idle; ++ updateLSQNextCycle = false; ++ ++ broadcast_free_entries = true; ++ } + ++ // Writeback any stores using any leftover bandwidth. + ldstQueue.writebackStores(); + + // Check the committed load/store signals to see if there's a load + // or store to commit. Also check if it's being told to execute a + // nonspeculative instruction. + // This is pretty inefficient... - if (!fromCommit->commitInfo.squash && - !fromCommit->commitInfo.robSquashing) { - ldstQueue.commitStores(fromCommit->commitInfo.doneSeqNum); - ldstQueue.commitLoads(fromCommit->commitInfo.doneSeqNum); - } + - if (fromCommit->commitInfo.nonSpecSeqNum != 0) { - instQueue.scheduleNonSpec(fromCommit->commitInfo.nonSpecSeqNum); - } ++ threads = (*activeThreads).begin(); ++ while (threads != (*activeThreads).end()) { ++ unsigned tid = (*threads++); + - DPRINTF(IEW, "IEW: IQ has %i free entries.\n", - instQueue.numFreeEntries()); - } ++ DPRINTF(IEW,"Processing [tid:%i]\n",tid); + - template - void - SimpleIEW::iew() - { - // Might want to put all state checks in the tick() function. - // Check if being told to stall from commit. - if (fromCommit->commitInfo.stall) { - block(); - return; - } else if (fromCommit->commitInfo.squash || - fromCommit->commitInfo.robSquashing) { - // Also check if commit is telling this stage to squash. - squash(); - return; - } ++ if (fromCommit->commitInfo[tid].doneSeqNum != 0 && ++ !fromCommit->commitInfo[tid].squash && ++ !fromCommit->commitInfo[tid].robSquashing) { + - dispatchInsts(); ++ ldstQueue.commitStores(fromCommit->commitInfo[tid].doneSeqNum,tid); + - // Have the instruction queue try to schedule any ready instructions. - instQueue.scheduleReadyInsts(); ++ ldstQueue.commitLoads(fromCommit->commitInfo[tid].doneSeqNum,tid); + - executeInsts(); ++ updateLSQNextCycle = true; ++ instQueue.commit(fromCommit->commitInfo[tid].doneSeqNum,tid); ++ } + - // Loop through the head of the time buffer and wake any dependents. - // These instructions are about to write back. In the simple model - // this loop can really happen within the previous loop, but when - // instructions have actual latencies, this loop must be separate. - // Also mark scoreboard that this instruction is finally complete. - // Either have IEW have direct access to rename map, or have this as - // part of backwards communication. - for (int inst_num = 0; inst_num < issueWidth && - toCommit->insts[inst_num]; inst_num++) - { - DynInstPtr inst = toCommit->insts[inst_num]; ++ if (fromCommit->commitInfo[tid].nonSpecSeqNum != 0) { + - DPRINTF(IEW, "IEW: Sending instructions to commit, PC %#x.\n", - inst->readPC()); ++ //DPRINTF(IEW,"NonspecInst from thread %i",tid); ++ if (fromCommit->commitInfo[tid].uncached) { ++ instQueue.replayMemInst(fromCommit->commitInfo[tid].uncachedLoad); ++ } else { ++ instQueue.scheduleNonSpec( ++ fromCommit->commitInfo[tid].nonSpecSeqNum); ++ } ++ } + - if(!inst->isSquashed()) { - instQueue.wakeDependents(inst); ++ if (broadcast_free_entries) { ++ toFetch->iewInfo[tid].iqCount = ++ instQueue.getCount(tid); ++ toFetch->iewInfo[tid].ldstqCount = ++ ldstQueue.getCount(tid); + - for (int i = 0; i < inst->numDestRegs(); i++) - { - renameMap->markAsReady(inst->renamedDestRegIdx(i)); - } ++ toRename->iewInfo[tid].usedIQ = true; ++ toRename->iewInfo[tid].freeIQEntries = ++ instQueue.numFreeEntries(); ++ toRename->iewInfo[tid].usedLSQ = true; ++ toRename->iewInfo[tid].freeLSQEntries = ++ ldstQueue.numFreeEntries(tid); ++ ++ wroteToTimeBuffer = true; + } ++ ++ DPRINTF(IEW, "[tid:%i], Dispatch dispatched %i instructions.\n", ++ tid, toRename->iewInfo[tid].dispatched); + } + - // Also should advance its own time buffers if the stage ran. - // Not the best place for it, but this works (hopefully). - issueToExecQueue.advance(); ++ DPRINTF(IEW, "IQ has %i free entries (Can schedule: %i). " ++ "LSQ has %i free entries.\n", ++ instQueue.numFreeEntries(), instQueue.hasReadyInsts(), ++ ldstQueue.numFreeEntries()); ++ ++ updateStatus(); ++ ++ if (wroteToTimeBuffer) { ++ DPRINTF(Activity, "Activity this cycle.\n"); ++ cpu->activityThisCycle(); ++ } +} + - #if !FULL_SYSTEM - template ++template +void - SimpleIEW::lsqWriteback() ++DefaultIEW::updateExeInstStats(DynInstPtr &inst) +{ - ldstQueue.writebackAllInsts(); - } ++ int thread_number = inst->threadNumber; ++ ++ // ++ // Pick off the software prefetches ++ // ++#ifdef TARGET_ALPHA ++ if (inst->isDataPrefetch()) ++ exeSwp[thread_number]++; ++ else ++ iewExecutedInsts++; ++#else ++ iewExecutedInsts[thread_number]++; +#endif ++ ++ // ++ // Control operations ++ // ++ if (inst->isControl()) ++ exeBranches[thread_number]++; ++ ++ // ++ // Memory operations ++ // ++ if (inst->isMemRef()) { ++ exeRefs[thread_number]++; ++ ++ if (inst->isLoad()) { ++ iewExecLoadInsts[thread_number]++; ++ } ++ } ++} diff --cc src/cpu/o3/inst_queue.cc index 2ff2282b4,000000000..95ae2b699 mode 100644,000000..100644 --- a/src/cpu/o3/inst_queue.cc +++ b/src/cpu/o3/inst_queue.cc @@@ -1,38 -1,0 +1,34 @@@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cpu/o3/alpha_dyn_inst.hh" +#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/inst_queue_impl.hh" + +// Force instantiation of InstructionQueue. +template class InstructionQueue; - - template<> - unsigned - InstructionQueue::DependencyEntry::mem_alloc_counter = 0; diff --cc src/cpu/o3/inst_queue.hh index 43fe96c49,000000000..518de73d9 mode 100644,000000..100644 --- a/src/cpu/o3/inst_queue.hh +++ b/src/cpu/o3/inst_queue.hh @@@ -1,336 -1,0 +1,479 @@@ +/* - * Copyright (c) 2004-2005 The Regents of The University of Michigan ++ * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + - #ifndef __CPU_O3_CPU_INST_QUEUE_HH__ - #define __CPU_O3_CPU_INST_QUEUE_HH__ ++#ifndef __CPU_O3_INST_QUEUE_HH__ ++#define __CPU_O3_INST_QUEUE_HH__ + +#include +#include +#include +#include + +#include "base/statistics.hh" +#include "base/timebuf.hh" +#include "cpu/inst_seq.hh" ++#include "cpu/o3/dep_graph.hh" ++#include "encumbered/cpu/full/op_class.hh" +#include "sim/host.hh" + ++class FUPool; ++class MemInterface; ++ +/** + * A standard instruction queue class. It holds ready instructions, in + * order, in seperate priority queues to facilitate the scheduling of + * instructions. The IQ uses a separate linked list to track dependencies. + * Similar to the rename map and the free list, it expects that + * floating point registers have their indices start after the integer + * registers (ie with 96 int and 96 fp registers, regs 0-95 are integer + * and 96-191 are fp). This remains true even for both logical and - * physical register indices. ++ * physical register indices. The IQ depends on the memory dependence unit to ++ * track when memory operations are ready in terms of ordering; register ++ * dependencies are tracked normally. Right now the IQ also handles the ++ * execution timing; this is mainly to allow back-to-back scheduling without ++ * requiring IEW to be able to peek into the IQ. At the end of the execution ++ * latency, the instruction is put into the queue to execute, where it will ++ * have the execute() function called on it. ++ * @todo: Make IQ able to handle multiple FU pools. + */ +template +class InstructionQueue +{ + public: + //Typedefs from the Impl. + typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::DynInstPtr DynInstPtr; + typedef typename Impl::Params Params; + ++ typedef typename Impl::CPUPol::IEW IEW; + typedef typename Impl::CPUPol::MemDepUnit MemDepUnit; + typedef typename Impl::CPUPol::IssueStruct IssueStruct; + typedef typename Impl::CPUPol::TimeStruct TimeStruct; + - // Typedef of iterator through the list of instructions. Might be - // better to untie this from the FullCPU or pass its information to - // the stages. ++ // Typedef of iterator through the list of instructions. + typedef typename std::list::iterator ListIt; + - /** - * Struct for comparing entries to be added to the priority queue. This - * gives reverse ordering to the instructions in terms of sequence - * numbers: the instructions with smaller sequence numbers (and hence - * are older) will be at the top of the priority queue. - */ - struct pqCompare - { - bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const - { - return lhs->seqNum > rhs->seqNum; - } - }; ++ friend class Impl::FullCPU; + - /** - * Struct for comparing entries to be added to the set. This gives - * standard ordering in terms of sequence numbers. - */ - struct setCompare - { - bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const - { - return lhs->seqNum < rhs->seqNum; - } ++ /** FU completion event class. */ ++ class FUCompletion : public Event { ++ private: ++ /** Executing instruction. */ ++ DynInstPtr inst; ++ ++ /** Index of the FU used for executing. */ ++ int fuIdx; ++ ++ /** Pointer back to the instruction queue. */ ++ InstructionQueue *iqPtr; ++ ++ bool freeFU; ++ ++ public: ++ /** Construct a FU completion event. */ ++ FUCompletion(DynInstPtr &_inst, int fu_idx, ++ InstructionQueue *iq_ptr); ++ ++ virtual void process(); ++ virtual const char *description(); ++ void setFreeFU() { freeFU = true; } + }; + - typedef std::priority_queue, pqCompare> - ReadyInstQueue; ++ /** Constructs an IQ. */ ++ InstructionQueue(Params *params); + - InstructionQueue(Params ¶ms); ++ /** Destructs the IQ. */ ++ ~InstructionQueue(); + ++ /** Returns the name of the IQ. */ ++ std::string name() const; ++ ++ /** Registers statistics. */ + void regStats(); + - void setCPU(FullCPU *cpu); ++ void resetState(); ++ ++ /** Sets CPU pointer. */ ++ void setCPU(FullCPU *_cpu) { cpu = _cpu; } + ++ /** Sets active threads list. */ ++ void setActiveThreads(std::list *at_ptr); ++ ++ /** Sets the IEW pointer. */ ++ void setIEW(IEW *iew_ptr) { iewStage = iew_ptr; } ++ ++ /** Sets the timer buffer between issue and execute. */ + void setIssueToExecuteQueue(TimeBuffer *i2eQueue); + ++ /** Sets the global time buffer. */ + void setTimeBuffer(TimeBuffer *tb_ptr); + ++ void switchOut(); ++ ++ void takeOverFrom(); ++ ++ bool isSwitchedOut() { return switchedOut; } ++ ++ /** Number of entries needed for given amount of threads. */ ++ int entryAmount(int num_threads); ++ ++ /** Resets max entries for all threads. */ ++ void resetEntries(); ++ ++ /** Returns total number of free entries. */ + unsigned numFreeEntries(); + ++ /** Returns number of free entries for a thread. */ ++ unsigned numFreeEntries(unsigned tid); ++ ++ /** Returns whether or not the IQ is full. */ + bool isFull(); + ++ /** Returns whether or not the IQ is full for a specific thread. */ ++ bool isFull(unsigned tid); ++ ++ /** Returns if there are any ready instructions in the IQ. */ ++ bool hasReadyInsts(); ++ ++ /** Inserts a new instruction into the IQ. */ + void insert(DynInstPtr &new_inst); + ++ /** Inserts a new, non-speculative instruction into the IQ. */ + void insertNonSpec(DynInstPtr &new_inst); + - void advanceTail(DynInstPtr &inst); ++ /** Inserts a memory or write barrier into the IQ to make sure ++ * loads and stores are ordered properly. ++ */ ++ void insertBarrier(DynInstPtr &barr_inst); + ++ DynInstPtr getInstToExecute(); ++ ++ /** ++ * Records the instruction as the producer of a register without ++ * adding it to the rest of the IQ. ++ */ ++ void recordProducer(DynInstPtr &inst) ++ { addToProducers(inst); } ++ ++ /** Process FU completion event. */ ++ void processFUCompletion(DynInstPtr &inst, int fu_idx); ++ ++ /** ++ * Schedules ready instructions, adding the ready ones (oldest first) to ++ * the queue to execute. ++ */ + void scheduleReadyInsts(); + ++ /** Schedules a single specific non-speculative instruction. */ + void scheduleNonSpec(const InstSeqNum &inst); + - void wakeDependents(DynInstPtr &completed_inst); ++ /** ++ * Commits all instructions up to and including the given sequence number, ++ * for a specific thread. ++ */ ++ void commit(const InstSeqNum &inst, unsigned tid = 0); ++ ++ /** Wakes all dependents of a completed instruction. */ ++ int wakeDependents(DynInstPtr &completed_inst); ++ ++ /** Adds a ready memory instruction to the ready list. */ ++ void addReadyMemInst(DynInstPtr &ready_inst); ++ ++ /** ++ * Reschedules a memory instruction. It will be ready to issue once ++ * replayMemInst() is called. ++ */ ++ void rescheduleMemInst(DynInstPtr &resched_inst); ++ ++ /** Replays a memory instruction. It must be rescheduled first. */ ++ void replayMemInst(DynInstPtr &replay_inst); + ++ /** Completes a memory operation. */ ++ void completeMemInst(DynInstPtr &completed_inst); ++ ++ /** Indicates an ordering violation between a store and a load. */ + void violation(DynInstPtr &store, DynInstPtr &faulting_load); + - // Change this to take in the sequence number - void squash(); ++ /** ++ * Squashes instructions for a thread. Squashing information is obtained ++ * from the time buffer. ++ */ ++ void squash(unsigned tid); + - void doSquash(); ++ /** Returns the number of used entries for a thread. */ ++ unsigned getCount(unsigned tid) { return count[tid]; }; + - void stopSquash(); ++ /** Debug function to print all instructions. */ ++ void printInsts(); + + private: ++ /** Does the actual squashing. */ ++ void doSquash(unsigned tid); ++ ++ ///////////////////////// ++ // Various pointers ++ ///////////////////////// ++ + /** Pointer to the CPU. */ + FullCPU *cpu; + ++ /** Cache interface. */ ++ MemInterface *dcacheInterface; ++ ++ /** Pointer to IEW stage. */ ++ IEW *iewStage; ++ + /** The memory dependence unit, which tracks/predicts memory dependences + * between instructions. + */ - MemDepUnit memDepUnit; ++ MemDepUnit memDepUnit[Impl::MaxThreads]; + + /** The queue to the execute stage. Issued instructions will be written + * into it. + */ + TimeBuffer *issueToExecuteQueue; + + /** The backwards time buffer. */ + TimeBuffer *timeBuffer; + + /** Wire to read information from timebuffer. */ + typename TimeBuffer::wire fromCommit; + - enum InstList { - Int, - Float, - Branch, - Memory, - Misc, - Squashed, - None - }; ++ /** Function unit pool. */ ++ FUPool *fuPool; + - /** List of ready int instructions. Used to keep track of the order in - * which instructions should issue. - */ - ReadyInstQueue readyIntInsts; ++ ////////////////////////////////////// ++ // Instruction lists, ready queues, and ordering ++ ////////////////////////////////////// + - /** List of ready floating point instructions. */ - ReadyInstQueue readyFloatInsts; ++ /** List of all the instructions in the IQ (some of which may be issued). */ ++ std::list instList[Impl::MaxThreads]; + - /** List of ready branch instructions. */ - ReadyInstQueue readyBranchInsts; ++ std::list instsToExecute; + - /** List of ready miscellaneous instructions. */ - ReadyInstQueue readyMiscInsts; ++ /** ++ * Struct for comparing entries to be added to the priority queue. This ++ * gives reverse ordering to the instructions in terms of sequence ++ * numbers: the instructions with smaller sequence numbers (and hence ++ * are older) will be at the top of the priority queue. ++ */ ++ struct pqCompare { ++ bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const ++ { ++ return lhs->seqNum > rhs->seqNum; ++ } ++ }; ++ ++ typedef std::priority_queue, pqCompare> ++ ReadyInstQueue; + - /** List of squashed instructions (which are still valid and in IQ). - * Implemented using a priority queue; the entries must contain both - * the IQ index and sequence number of each instruction so that - * ordering based on sequence numbers can be used. ++ /** List of ready instructions, per op class. They are separated by op ++ * class to allow for easy mapping to FUs. + */ - ReadyInstQueue squashedInsts; ++ ReadyInstQueue readyInsts[Num_OpClasses]; + + /** List of non-speculative instructions that will be scheduled + * once the IQ gets a signal from commit. While it's redundant to + * have the key be a part of the value (the sequence number is stored + * inside of DynInst), when these instructions are woken up only + * the sequence number will be available. Thus it is most efficient to be + * able to search by the sequence number alone. + */ + std::map nonSpecInsts; + - typedef typename std::map::iterator non_spec_it_t; ++ typedef typename std::map::iterator NonSpecMapIt; + - /** Number of free IQ entries left. */ - unsigned freeEntries; ++ /** Entry for the list age ordering by op class. */ ++ struct ListOrderEntry { ++ OpClass queueType; ++ InstSeqNum oldestInst; ++ }; + - /** The number of entries in the instruction queue. */ - unsigned numEntries; ++ /** List that contains the age order of the oldest instruction of each ++ * ready queue. Used to select the oldest instruction available ++ * among op classes. ++ * @todo: Might be better to just move these entries around instead ++ * of creating new ones every time the position changes due to an ++ * instruction issuing. Not sure std::list supports this. ++ */ ++ std::list listOrder; ++ ++ typedef typename std::list::iterator ListOrderIt; ++ ++ /** Tracks if each ready queue is on the age order list. */ ++ bool queueOnList[Num_OpClasses]; + - /** The number of integer instructions that can be issued in one - * cycle. ++ /** Iterators of each ready queue. Points to their spot in the age order ++ * list. + */ - unsigned intWidth; ++ ListOrderIt readyIt[Num_OpClasses]; + - /** The number of floating point instructions that can be issued - * in one cycle. ++ /** Add an op class to the age order list. */ ++ void addToOrderList(OpClass op_class); ++ ++ /** ++ * Called when the oldest instruction has been removed from a ready queue; ++ * this places that ready queue into the proper spot in the age order list. + */ - unsigned floatWidth; ++ void moveToYoungerInst(ListOrderIt age_order_it); ++ ++ DependencyGraph dependGraph; ++ ++ ////////////////////////////////////// ++ // Various parameters ++ ////////////////////////////////////// ++ ++ /** IQ Resource Sharing Policy */ ++ enum IQPolicy { ++ Dynamic, ++ Partitioned, ++ Threshold ++ }; ++ ++ /** IQ sharing policy for SMT. */ ++ IQPolicy iqPolicy; + - /** The number of branches that can be issued in one cycle. */ - unsigned branchWidth; ++ /** Number of Total Threads*/ ++ unsigned numThreads; + - /** The number of memory instructions that can be issued in one cycle. */ - unsigned memoryWidth; ++ /** Pointer to list of active threads. */ ++ std::list *activeThreads; ++ ++ /** Per Thread IQ count */ ++ unsigned count[Impl::MaxThreads]; ++ ++ /** Max IQ Entries Per Thread */ ++ unsigned maxEntries[Impl::MaxThreads]; ++ ++ /** Number of free IQ entries left. */ ++ unsigned freeEntries; ++ ++ /** The number of entries in the instruction queue. */ ++ unsigned numEntries; + + /** The total number of instructions that can be issued in one cycle. */ + unsigned totalWidth; + - //The number of physical registers in the CPU. ++ /** The number of physical registers in the CPU. */ + unsigned numPhysRegs; + + /** The number of physical integer registers in the CPU. */ + unsigned numPhysIntRegs; + + /** The number of floating point registers in the CPU. */ + unsigned numPhysFloatRegs; + + /** Delay between commit stage and the IQ. + * @todo: Make there be a distinction between the delays within IEW. + */ + unsigned commitToIEWDelay; + - ////////////////////////////////// - // Variables needed for squashing - ////////////////////////////////// ++ bool switchedOut; + + /** The sequence number of the squashed instruction. */ - InstSeqNum squashedSeqNum; - - /** Iterator that points to the youngest instruction in the IQ. */ - ListIt tail; - - /** Iterator that points to the last instruction that has been squashed. - * This will not be valid unless the IQ is in the process of squashing. - */ - ListIt squashIt; - - /////////////////////////////////// - // Dependency graph stuff - /////////////////////////////////// - - class DependencyEntry - { - public: - DynInstPtr inst; - //Might want to include data about what arch. register the - //dependence is waiting on. - DependencyEntry *next; - - //This function, and perhaps this whole class, stand out a little - //bit as they don't fit a classification well. I want access - //to the underlying structure of the linked list, yet at - //the same time it feels like this should be something abstracted - //away. So for now it will sit here, within the IQ, until - //a better implementation is decided upon. - // This function probably shouldn't be within the entry... - void insert(DynInstPtr &new_inst); - - void remove(DynInstPtr &inst_to_remove); - - // Debug variable, remove when done testing. - static unsigned mem_alloc_counter; - }; - - /** Array of linked lists. Each linked list is a list of all the - * instructions that depend upon a given register. The actual - * register's index is used to index into the graph; ie all - * instructions in flight that are dependent upon r34 will be - * in the linked list of dependGraph[34]. - */ - DependencyEntry *dependGraph; ++ InstSeqNum squashedSeqNum[Impl::MaxThreads]; + + /** A cache of the recently woken registers. It is 1 if the register + * has been woken up recently, and 0 if the register has been added + * to the dependency graph and has not yet received its value. It + * is basically a secondary scoreboard, and should pretty much mirror + * the scoreboard that exists in the rename map. + */ - vector regScoreboard; ++ std::vector regScoreboard; + ++ /** Adds an instruction to the dependency graph, as a consumer. */ + bool addToDependents(DynInstPtr &new_inst); - void insertDependency(DynInstPtr &new_inst); - void createDependency(DynInstPtr &new_inst); + ++ /** Adds an instruction to the dependency graph, as a producer. */ ++ void addToProducers(DynInstPtr &new_inst); ++ ++ /** Moves an instruction to the ready queue if it is ready. */ + void addIfReady(DynInstPtr &inst); + - private: + /** Debugging function to count how many entries are in the IQ. It does + * a linear walk through the instructions, so do not call this function + * during normal execution. + */ + int countInsts(); + - /** Debugging function to dump out the dependency graph. - */ - void dumpDependGraph(); - + /** Debugging function to dump all the list sizes, as well as print + * out the list of nonspeculative instructions. Should not be used + * in any other capacity, but it has no harmful sideaffects. + */ + void dumpLists(); + ++ /** Debugging function to dump out all instructions that are in the ++ * IQ. ++ */ ++ void dumpInsts(); ++ ++ /** Stat for number of instructions added. */ + Stats::Scalar<> iqInstsAdded; ++ /** Stat for number of non-speculative instructions added. */ + Stats::Scalar<> iqNonSpecInstsAdded; - // Stats::Scalar<> iqIntInstsAdded; ++ ++ Stats::Scalar<> iqInstsIssued; ++ /** Stat for number of integer instructions issued. */ + Stats::Scalar<> iqIntInstsIssued; - // Stats::Scalar<> iqFloatInstsAdded; ++ /** Stat for number of floating point instructions issued. */ + Stats::Scalar<> iqFloatInstsIssued; - // Stats::Scalar<> iqBranchInstsAdded; ++ /** Stat for number of branch instructions issued. */ + Stats::Scalar<> iqBranchInstsIssued; - // Stats::Scalar<> iqMemInstsAdded; ++ /** Stat for number of memory instructions issued. */ + Stats::Scalar<> iqMemInstsIssued; - // Stats::Scalar<> iqMiscInstsAdded; ++ /** Stat for number of miscellaneous instructions issued. */ + Stats::Scalar<> iqMiscInstsIssued; ++ /** Stat for number of squashed instructions that were ready to issue. */ + Stats::Scalar<> iqSquashedInstsIssued; - Stats::Scalar<> iqLoopSquashStalls; ++ /** Stat for number of squashed instructions examined when squashing. */ + Stats::Scalar<> iqSquashedInstsExamined; ++ /** Stat for number of squashed instruction operands examined when ++ * squashing. ++ */ + Stats::Scalar<> iqSquashedOperandsExamined; ++ /** Stat for number of non-speculative instructions removed due to a squash. ++ */ + Stats::Scalar<> iqSquashedNonSpecRemoved; + ++ Stats::VectorDistribution<> queueResDist; ++ Stats::Distribution<> numIssuedDist; ++ Stats::VectorDistribution<> issueDelayDist; ++ ++ Stats::Vector<> statFuBusy; ++// Stats::Vector<> dist_unissued; ++ Stats::Vector2d<> statIssuedInstType; ++ ++ Stats::Formula issueRate; ++// Stats::Formula issue_stores; ++// Stats::Formula issue_op_rate; ++ Stats::Vector<> fuBusy; //cumulative fu busy ++ ++ Stats::Formula fuBusyRate; +}; + - #endif //__CPU_O3_CPU_INST_QUEUE_HH__ ++#endif //__CPU_O3_INST_QUEUE_HH__ diff --cc src/cpu/o3/inst_queue_impl.hh index 048dc7c00,000000000..f1dc4e01f mode 100644,000000..100644 --- a/src/cpu/o3/inst_queue_impl.hh +++ b/src/cpu/o3/inst_queue_impl.hh @@@ -1,1136 -1,0 +1,1367 @@@ +/* - * Copyright (c) 2004-2005 The Regents of The University of Michigan ++ * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + - // Todo: - // Current ordering allows for 0 cycle added-to-scheduled. Could maybe fake - // it; either do in reverse order, or have added instructions put into a - // different ready queue that, in scheduleRreadyInsts(), gets put onto the - // normal ready queue. This would however give only a one cycle delay, - // but probably is more flexible to actually add in a delay parameter than - // just running it backwards. - +#include +#include + +#include "sim/root.hh" + ++#include "cpu/o3/fu_pool.hh" +#include "cpu/o3/inst_queue.hh" + - // Either compile error or max int due to sign extension. - // Hack to avoid compile warnings. - const InstSeqNum MaxInstSeqNum = std::numeric_limits::max(); ++using namespace std; + +template - InstructionQueue::InstructionQueue(Params ¶ms) - : memDepUnit(params), - numEntries(params.numIQEntries), - intWidth(params.executeIntWidth), - floatWidth(params.executeFloatWidth), - branchWidth(params.executeBranchWidth), - memoryWidth(params.executeMemoryWidth), - totalWidth(params.issueWidth), - numPhysIntRegs(params.numPhysIntRegs), - numPhysFloatRegs(params.numPhysFloatRegs), - commitToIEWDelay(params.commitToIEWDelay) ++InstructionQueue::FUCompletion::FUCompletion(DynInstPtr &_inst, ++ int fu_idx, ++ InstructionQueue *iq_ptr) ++ : Event(&mainEventQueue, Stat_Event_Pri), ++ inst(_inst), fuIdx(fu_idx), iqPtr(iq_ptr), freeFU(false) +{ - // Initialize the number of free IQ entries. - freeEntries = numEntries; ++ this->setFlags(Event::AutoDelete); ++} ++ ++template ++void ++InstructionQueue::FUCompletion::process() ++{ ++ iqPtr->processFUCompletion(inst, freeFU ? fuIdx : -1); ++ inst = NULL; ++} ++ ++ ++template ++const char * ++InstructionQueue::FUCompletion::description() ++{ ++ return "Functional unit completion event"; ++} ++ ++template ++InstructionQueue::InstructionQueue(Params *params) ++ : dcacheInterface(params->dcacheInterface), ++ fuPool(params->fuPool), ++ numEntries(params->numIQEntries), ++ totalWidth(params->issueWidth), ++ numPhysIntRegs(params->numPhysIntRegs), ++ numPhysFloatRegs(params->numPhysFloatRegs), ++ commitToIEWDelay(params->commitToIEWDelay) ++{ ++ assert(fuPool); ++ ++ switchedOut = false; ++ ++ numThreads = params->numberOfThreads; + + // Set the number of physical registers as the number of int + float + numPhysRegs = numPhysIntRegs + numPhysFloatRegs; + - DPRINTF(IQ, "IQ: There are %i physical registers.\n", numPhysRegs); ++ DPRINTF(IQ, "There are %i physical registers.\n", numPhysRegs); + + //Create an entry for each physical register within the + //dependency graph. - dependGraph = new DependencyEntry[numPhysRegs]; ++ dependGraph.resize(numPhysRegs); + + // Resize the register scoreboard. + regScoreboard.resize(numPhysRegs); + - // Initialize all the head pointers to point to NULL, and all the - // entries as unready. - // Note that in actuality, the registers corresponding to the logical - // registers start off as ready. However this doesn't matter for the - // IQ as the instruction should have been correctly told if those - // registers are ready in rename. Thus it can all be initialized as - // unready. - for (int i = 0; i < numPhysRegs; ++i) - { - dependGraph[i].next = NULL; - dependGraph[i].inst = NULL; - regScoreboard[i] = false; ++ //Initialize Mem Dependence Units ++ for (int i = 0; i < numThreads; i++) { ++ memDepUnit[i].init(params,i); ++ memDepUnit[i].setIQ(this); + } + ++ resetState(); ++ ++ string policy = params->smtIQPolicy; ++ ++ //Convert string to lowercase ++ std::transform(policy.begin(), policy.end(), policy.begin(), ++ (int(*)(int)) tolower); ++ ++ //Figure out resource sharing policy ++ if (policy == "dynamic") { ++ iqPolicy = Dynamic; ++ ++ //Set Max Entries to Total ROB Capacity ++ for (int i = 0; i < numThreads; i++) { ++ maxEntries[i] = numEntries; ++ } ++ ++ } else if (policy == "partitioned") { ++ iqPolicy = Partitioned; ++ ++ //@todo:make work if part_amt doesnt divide evenly. ++ int part_amt = numEntries / numThreads; ++ ++ //Divide ROB up evenly ++ for (int i = 0; i < numThreads; i++) { ++ maxEntries[i] = part_amt; ++ } ++ ++ DPRINTF(Fetch, "IQ sharing policy set to Partitioned:" ++ "%i entries per thread.\n",part_amt); ++ ++ } else if (policy == "threshold") { ++ iqPolicy = Threshold; ++ ++ double threshold = (double)params->smtIQThreshold / 100; ++ ++ int thresholdIQ = (int)((double)threshold * numEntries); ++ ++ //Divide up by threshold amount ++ for (int i = 0; i < numThreads; i++) { ++ maxEntries[i] = thresholdIQ; ++ } ++ ++ DPRINTF(Fetch, "IQ sharing policy set to Threshold:" ++ "%i entries per thread.\n",thresholdIQ); ++ } else { ++ assert(0 && "Invalid IQ Sharing Policy.Options Are:{Dynamic," ++ "Partitioned, Threshold}"); ++ } ++} ++ ++template ++InstructionQueue::~InstructionQueue() ++{ ++ dependGraph.reset(); ++ cprintf("Nodes traversed: %i, removed: %i\n", ++ dependGraph.nodesTraversed, dependGraph.nodesRemoved); ++} ++ ++template ++std::string ++InstructionQueue::name() const ++{ ++ return cpu->name() + ".iq"; +} + +template +void +InstructionQueue::regStats() +{ ++ using namespace Stats; + iqInstsAdded + .name(name() + ".iqInstsAdded") + .desc("Number of instructions added to the IQ (excludes non-spec)") + .prereq(iqInstsAdded); + + iqNonSpecInstsAdded + .name(name() + ".iqNonSpecInstsAdded") + .desc("Number of non-speculative instructions added to the IQ") + .prereq(iqNonSpecInstsAdded); + - // iqIntInstsAdded; ++ iqInstsIssued ++ .name(name() + ".iqInstsIssued") ++ .desc("Number of instructions issued") ++ .prereq(iqInstsIssued); + + iqIntInstsIssued + .name(name() + ".iqIntInstsIssued") + .desc("Number of integer instructions issued") + .prereq(iqIntInstsIssued); + - // iqFloatInstsAdded; - + iqFloatInstsIssued + .name(name() + ".iqFloatInstsIssued") + .desc("Number of float instructions issued") + .prereq(iqFloatInstsIssued); + - // iqBranchInstsAdded; - + iqBranchInstsIssued + .name(name() + ".iqBranchInstsIssued") + .desc("Number of branch instructions issued") + .prereq(iqBranchInstsIssued); + - // iqMemInstsAdded; - + iqMemInstsIssued + .name(name() + ".iqMemInstsIssued") + .desc("Number of memory instructions issued") + .prereq(iqMemInstsIssued); + - // iqMiscInstsAdded; - + iqMiscInstsIssued + .name(name() + ".iqMiscInstsIssued") + .desc("Number of miscellaneous instructions issued") + .prereq(iqMiscInstsIssued); + + iqSquashedInstsIssued + .name(name() + ".iqSquashedInstsIssued") + .desc("Number of squashed instructions issued") + .prereq(iqSquashedInstsIssued); + - iqLoopSquashStalls - .name(name() + ".iqLoopSquashStalls") - .desc("Number of times issue loop had to restart due to squashed " - "inst; mainly for profiling") - .prereq(iqLoopSquashStalls); - + iqSquashedInstsExamined + .name(name() + ".iqSquashedInstsExamined") + .desc("Number of squashed instructions iterated over during squash;" + " mainly for profiling") + .prereq(iqSquashedInstsExamined); + + iqSquashedOperandsExamined + .name(name() + ".iqSquashedOperandsExamined") + .desc("Number of squashed operands that are examined and possibly " + "removed from graph") + .prereq(iqSquashedOperandsExamined); + + iqSquashedNonSpecRemoved + .name(name() + ".iqSquashedNonSpecRemoved") + .desc("Number of squashed non-spec instructions that were removed") + .prereq(iqSquashedNonSpecRemoved); + - // Tell mem dependence unit to reg stats as well. - memDepUnit.regStats(); ++ queueResDist ++ .init(Num_OpClasses, 0, 99, 2) ++ .name(name() + ".IQ:residence:") ++ .desc("cycles from dispatch to issue") ++ .flags(total | pdf | cdf ) ++ ; ++ for (int i = 0; i < Num_OpClasses; ++i) { ++ queueResDist.subname(i, opClassStrings[i]); ++ } ++ numIssuedDist ++ .init(0,totalWidth,1) ++ .name(name() + ".ISSUE:issued_per_cycle") ++ .desc("Number of insts issued each cycle") ++ .flags(pdf) ++ ; ++/* ++ dist_unissued ++ .init(Num_OpClasses+2) ++ .name(name() + ".ISSUE:unissued_cause") ++ .desc("Reason ready instruction not issued") ++ .flags(pdf | dist) ++ ; ++ for (int i=0; i < (Num_OpClasses + 2); ++i) { ++ dist_unissued.subname(i, unissued_names[i]); ++ } ++*/ ++ statIssuedInstType ++ .init(numThreads,Num_OpClasses) ++ .name(name() + ".ISSUE:FU_type") ++ .desc("Type of FU issued") ++ .flags(total | pdf | dist) ++ ; ++ statIssuedInstType.ysubnames(opClassStrings); ++ ++ // ++ // How long did instructions for a particular FU type wait prior to issue ++ // ++ ++ issueDelayDist ++ .init(Num_OpClasses,0,99,2) ++ .name(name() + ".ISSUE:") ++ .desc("cycles from operands ready to issue") ++ .flags(pdf | cdf) ++ ; ++ ++ for (int i=0; inumCycles; ++/* ++ issue_stores ++ .name(name() + ".ISSUE:stores") ++ .desc("Number of stores issued") ++ .flags(total) ++ ; ++ issue_stores = exe_refs - exe_loads; ++*/ ++/* ++ issue_op_rate ++ .name(name() + ".ISSUE:op_rate") ++ .desc("Operation issue rate") ++ .flags(total) ++ ; ++ issue_op_rate = issued_ops / numCycles; ++*/ ++ statFuBusy ++ .init(Num_OpClasses) ++ .name(name() + ".ISSUE:fu_full") ++ .desc("attempts to use FU when none available") ++ .flags(pdf | dist) ++ ; ++ for (int i=0; i < Num_OpClasses; ++i) { ++ statFuBusy.subname(i, opClassStrings[i]); ++ } ++ ++ fuBusy ++ .init(numThreads) ++ .name(name() + ".ISSUE:fu_busy_cnt") ++ .desc("FU busy when requested") ++ .flags(total) ++ ; ++ ++ fuBusyRate ++ .name(name() + ".ISSUE:fu_busy_rate") ++ .desc("FU busy rate (busy events/executed inst)") ++ .flags(total) ++ ; ++ fuBusyRate = fuBusy / iqInstsIssued; ++ ++ for ( int i=0; i < numThreads; i++) { ++ // Tell mem dependence unit to reg stats as well. ++ memDepUnit[i].regStats(); ++ } +} + +template +void - InstructionQueue::setCPU(FullCPU *cpu_ptr) ++InstructionQueue::resetState() +{ - cpu = cpu_ptr; ++ //Initialize thread IQ counts ++ for (int i = 0; i instList.begin(); ++template ++void ++InstructionQueue::setActiveThreads(list *at_ptr) ++{ ++ DPRINTF(IQ, "Setting active threads list pointer.\n"); ++ activeThreads = at_ptr; +} + +template +void - InstructionQueue::setIssueToExecuteQueue( - TimeBuffer *i2e_ptr) ++InstructionQueue::setIssueToExecuteQueue(TimeBuffer *i2e_ptr) +{ - DPRINTF(IQ, "IQ: Set the issue to execute queue.\n"); ++ DPRINTF(IQ, "Set the issue to execute queue.\n"); + issueToExecuteQueue = i2e_ptr; +} + +template +void +InstructionQueue::setTimeBuffer(TimeBuffer *tb_ptr) +{ - DPRINTF(IQ, "IQ: Set the time buffer.\n"); ++ DPRINTF(IQ, "Set the time buffer.\n"); + timeBuffer = tb_ptr; + + fromCommit = timeBuffer->getWire(-commitToIEWDelay); +} + ++template ++void ++InstructionQueue::switchOut() ++{ ++ resetState(); ++ dependGraph.reset(); ++ switchedOut = true; ++ for (int i = 0; i < numThreads; ++i) { ++ memDepUnit[i].switchOut(); ++ } ++} ++ ++template ++void ++InstructionQueue::takeOverFrom() ++{ ++ switchedOut = false; ++} ++ ++template ++int ++InstructionQueue::entryAmount(int num_threads) ++{ ++ if (iqPolicy == Partitioned) { ++ return numEntries / num_threads; ++ } else { ++ return 0; ++ } ++} ++ ++ ++template ++void ++InstructionQueue::resetEntries() ++{ ++ if (iqPolicy != Dynamic || numThreads > 1) { ++ int active_threads = (*activeThreads).size(); ++ ++ list::iterator threads = (*activeThreads).begin(); ++ list::iterator list_end = (*activeThreads).end(); ++ ++ while (threads != list_end) { ++ if (iqPolicy == Partitioned) { ++ maxEntries[*threads++] = numEntries / active_threads; ++ } else if(iqPolicy == Threshold && active_threads == 1) { ++ maxEntries[*threads++] = numEntries; ++ } ++ } ++ } ++} ++ +template +unsigned +InstructionQueue::numFreeEntries() +{ + return freeEntries; +} + ++template ++unsigned ++InstructionQueue::numFreeEntries(unsigned tid) ++{ ++ return maxEntries[tid] - count[tid]; ++} ++ +// Might want to do something more complex if it knows how many instructions +// will be issued this cycle. +template +bool +InstructionQueue::isFull() +{ + if (freeEntries == 0) { + return(true); + } else { + return(false); + } +} + ++template ++bool ++InstructionQueue::isFull(unsigned tid) ++{ ++ if (numFreeEntries(tid) == 0) { ++ return(true); ++ } else { ++ return(false); ++ } ++} ++ ++template ++bool ++InstructionQueue::hasReadyInsts() ++{ ++ if (!listOrder.empty()) { ++ return true; ++ } ++ ++ for (int i = 0; i < Num_OpClasses; ++i) { ++ if (!readyInsts[i].empty()) { ++ return true; ++ } ++ } ++ ++ return false; ++} ++ +template +void +InstructionQueue::insert(DynInstPtr &new_inst) +{ + // Make sure the instruction is valid + assert(new_inst); + - DPRINTF(IQ, "IQ: Adding instruction PC %#x to the IQ.\n", - new_inst->readPC()); ++ DPRINTF(IQ, "Adding instruction [sn:%lli] PC %#x to the IQ.\n", ++ new_inst->seqNum, new_inst->readPC()); + - // Check if there are any free entries. Panic if there are none. - // Might want to have this return a fault in the future instead of - // panicing. + assert(freeEntries != 0); + - // If the IQ currently has nothing in it, then there's a possibility - // that the tail iterator is invalid (might have been pointing at an - // instruction that was retired). Reset the tail iterator. - if (freeEntries == numEntries) { - tail = cpu->instList.begin(); - } - - // Move the tail iterator. Instructions may not have been issued - // to the IQ, so we may have to increment the iterator more than once. - while ((*tail) != new_inst) { - tail++; - - // Make sure the tail iterator points at something legal. - assert(tail != cpu->instList.end()); - } - ++ instList[new_inst->threadNumber].push_back(new_inst); + - // Decrease the number of free entries. + --freeEntries; + ++ new_inst->setInIQ(); ++ + // Look through its source registers (physical regs), and mark any + // dependencies. + addToDependents(new_inst); + + // Have this instruction set itself as the producer of its destination + // register(s). - createDependency(new_inst); ++ addToProducers(new_inst); + - // If it's a memory instruction, add it to the memory dependency - // unit. + if (new_inst->isMemRef()) { - memDepUnit.insert(new_inst); - // Uh..forgot to look it up and put it on the proper dependency list - // if the instruction should not go yet. ++ memDepUnit[new_inst->threadNumber].insert(new_inst); + } else { - // If the instruction is ready then add it to the ready list. + addIfReady(new_inst); + } + + ++iqInstsAdded; + ++ count[new_inst->threadNumber]++; ++ + assert(freeEntries == (numEntries - countInsts())); +} + +template +void - InstructionQueue::insertNonSpec(DynInstPtr &inst) ++InstructionQueue::insertNonSpec(DynInstPtr &new_inst) +{ - nonSpecInsts[inst->seqNum] = inst; - + // @todo: Clean up this code; can do it by setting inst as unable + // to issue, then calling normal insert on the inst. + - // Make sure the instruction is valid - assert(inst); - - DPRINTF(IQ, "IQ: Adding instruction PC %#x to the IQ.\n", - inst->readPC()); ++ assert(new_inst); + - // Check if there are any free entries. Panic if there are none. - // Might want to have this return a fault in the future instead of - // panicing. - assert(freeEntries != 0); ++ nonSpecInsts[new_inst->seqNum] = new_inst; + - // If the IQ currently has nothing in it, then there's a possibility - // that the tail iterator is invalid (might have been pointing at an - // instruction that was retired). Reset the tail iterator. - if (freeEntries == numEntries) { - tail = cpu->instList.begin(); - } ++ DPRINTF(IQ, "Adding non-speculative instruction [sn:%lli] PC %#x " ++ "to the IQ.\n", ++ new_inst->seqNum, new_inst->readPC()); + - // Move the tail iterator. Instructions may not have been issued - // to the IQ, so we may have to increment the iterator more than once. - while ((*tail) != inst) { - tail++; ++ assert(freeEntries != 0); + - // Make sure the tail iterator points at something legal. - assert(tail != cpu->instList.end()); - } ++ instList[new_inst->threadNumber].push_back(new_inst); + - // Decrease the number of free entries. + --freeEntries; + ++ new_inst->setInIQ(); ++ + // Have this instruction set itself as the producer of its destination + // register(s). - createDependency(inst); ++ addToProducers(new_inst); + + // If it's a memory instruction, add it to the memory dependency + // unit. - if (inst->isMemRef()) { - memDepUnit.insertNonSpec(inst); ++ if (new_inst->isMemRef()) { ++ memDepUnit[new_inst->threadNumber].insertNonSpec(new_inst); + } + + ++iqNonSpecInstsAdded; ++ ++ count[new_inst->threadNumber]++; ++ ++ assert(freeEntries == (numEntries - countInsts())); +} + - // Slightly hack function to advance the tail iterator in the case that - // the IEW stage issues an instruction that is not added to the IQ. This - // is needed in case a long chain of such instructions occurs. - // I don't think this is used anymore. +template +void - InstructionQueue::advanceTail(DynInstPtr &inst) ++InstructionQueue::insertBarrier(DynInstPtr &barr_inst) +{ - // Make sure the instruction is valid - assert(inst); - - DPRINTF(IQ, "IQ: Adding instruction PC %#x to the IQ.\n", - inst->readPC()); - - // Check if there are any free entries. Panic if there are none. - // Might want to have this return a fault in the future instead of - // panicing. - assert(freeEntries != 0); - - // If the IQ currently has nothing in it, then there's a possibility - // that the tail iterator is invalid (might have been pointing at an - // instruction that was retired). Reset the tail iterator. - if (freeEntries == numEntries) { - tail = cpu->instList.begin(); - } - - // Move the tail iterator. Instructions may not have been issued - // to the IQ, so we may have to increment the iterator more than once. - while ((*tail) != inst) { - tail++; - - // Make sure the tail iterator points at something legal. - assert(tail != cpu->instList.end()); - } ++ memDepUnit[barr_inst->threadNumber].insertBarrier(barr_inst); + - assert(freeEntries <= numEntries); ++ insertNonSpec(barr_inst); ++} + - // Have this instruction set itself as the producer of its destination - // register(s). - createDependency(inst); ++template ++typename Impl::DynInstPtr ++InstructionQueue::getInstToExecute() ++{ ++ assert(!instsToExecute.empty()); ++ DynInstPtr inst = instsToExecute.front(); ++ instsToExecute.pop_front(); ++ return inst; +} + - // Need to make sure the number of float and integer instructions - // issued does not exceed the total issue bandwidth. - // @todo: Figure out a better way to remove the squashed items from the - // lists. Checking the top item of each list to see if it's squashed - // wastes time and forces jumps. +template +void - InstructionQueue::scheduleReadyInsts() ++InstructionQueue::addToOrderList(OpClass op_class) +{ - DPRINTF(IQ, "IQ: Attempting to schedule ready instructions from " - "the IQ.\n"); - - int int_issued = 0; - int float_issued = 0; - int branch_issued = 0; - int memory_issued = 0; - int squashed_issued = 0; - int total_issued = 0; - - IssueStruct *i2e_info = issueToExecuteQueue->access(0); ++ assert(!readyInsts[op_class].empty()); + - bool insts_available = !readyBranchInsts.empty() || - !readyIntInsts.empty() || - !readyFloatInsts.empty() || - !memDepUnit.empty() || - !readyMiscInsts.empty() || - !squashedInsts.empty(); - - // Note: Requires a globally defined constant. - InstSeqNum oldest_inst = MaxInstSeqNum; - InstList list_with_oldest = None; - - // Temporary values. - DynInstPtr int_head_inst; - DynInstPtr float_head_inst; - DynInstPtr branch_head_inst; - DynInstPtr mem_head_inst; - DynInstPtr misc_head_inst; - DynInstPtr squashed_head_inst; - - // Somewhat nasty code to look at all of the lists where issuable - // instructions are located, and choose the oldest instruction among - // those lists. Consider a rewrite in the future. - while (insts_available && total_issued < totalWidth) - { - // Set this to false. Each if-block is required to set it to true - // if there were instructions available this check. This will cause - // this loop to run once more than necessary, but avoids extra calls. - insts_available = false; - - oldest_inst = MaxInstSeqNum; - - list_with_oldest = None; - - if (!readyIntInsts.empty() && - int_issued < intWidth) { ++ ListOrderEntry queue_entry; + - insts_available = true; ++ queue_entry.queueType = op_class; + - int_head_inst = readyIntInsts.top(); ++ queue_entry.oldestInst = readyInsts[op_class].top()->seqNum; + - if (int_head_inst->isSquashed()) { - readyIntInsts.pop(); ++ ListOrderIt list_it = listOrder.begin(); ++ ListOrderIt list_end_it = listOrder.end(); + - ++iqLoopSquashStalls; - - continue; - } - - oldest_inst = int_head_inst->seqNum; - - list_with_oldest = Int; ++ while (list_it != list_end_it) { ++ if ((*list_it).oldestInst > queue_entry.oldestInst) { ++ break; + } + - if (!readyFloatInsts.empty() && - float_issued < floatWidth) { - - insts_available = true; - - float_head_inst = readyFloatInsts.top(); - - if (float_head_inst->isSquashed()) { - readyFloatInsts.pop(); - - ++iqLoopSquashStalls; ++ list_it++; ++ } + - continue; - } else if (float_head_inst->seqNum < oldest_inst) { - oldest_inst = float_head_inst->seqNum; ++ readyIt[op_class] = listOrder.insert(list_it, queue_entry); ++ queueOnList[op_class] = true; ++} + - list_with_oldest = Float; - } - } ++template ++void ++InstructionQueue::moveToYoungerInst(ListOrderIt list_order_it) ++{ ++ // Get iterator of next item on the list ++ // Delete the original iterator ++ // Determine if the next item is either the end of the list or younger ++ // than the new instruction. If so, then add in a new iterator right here. ++ // If not, then move along. ++ ListOrderEntry queue_entry; ++ OpClass op_class = (*list_order_it).queueType; ++ ListOrderIt next_it = list_order_it; ++ ++ ++next_it; ++ ++ queue_entry.queueType = op_class; ++ queue_entry.oldestInst = readyInsts[op_class].top()->seqNum; ++ ++ while (next_it != listOrder.end() && ++ (*next_it).oldestInst < queue_entry.oldestInst) { ++ ++next_it; ++ } + - if (!readyBranchInsts.empty() && - branch_issued < branchWidth) { ++ readyIt[op_class] = listOrder.insert(next_it, queue_entry); ++} + - insts_available = true; ++template ++void ++InstructionQueue::processFUCompletion(DynInstPtr &inst, int fu_idx) ++{ ++ // The CPU could have been sleeping until this op completed (*extremely* ++ // long latency op). Wake it if it was. This may be overkill. ++ if (isSwitchedOut()) { ++ return; ++ } + - branch_head_inst = readyBranchInsts.top(); ++ iewStage->wakeCPU(); + - if (branch_head_inst->isSquashed()) { - readyBranchInsts.pop(); ++ if (fu_idx > -1) ++ fuPool->freeUnitNextCycle(fu_idx); + - ++iqLoopSquashStalls; ++ // @todo: Ensure that these FU Completions happen at the beginning ++ // of a cycle, otherwise they could add too many instructions to ++ // the queue. ++ // @todo: This could break if there's multiple multi-cycle ops ++ // finishing on this cycle. Maybe implement something like ++ // instToCommit in iew_impl.hh. ++ issueToExecuteQueue->access(0)->size++; ++ instsToExecute.push_back(inst); ++// int &size = issueToExecuteQueue->access(0)->size; + - continue; - } else if (branch_head_inst->seqNum < oldest_inst) { - oldest_inst = branch_head_inst->seqNum; ++// issueToExecuteQueue->access(0)->insts[size++] = inst; ++} + - list_with_oldest = Branch; - } ++// @todo: Figure out a better way to remove the squashed items from the ++// lists. Checking the top item of each list to see if it's squashed ++// wastes time and forces jumps. ++template ++void ++InstructionQueue::scheduleReadyInsts() ++{ ++ DPRINTF(IQ, "Attempting to schedule ready instructions from " ++ "the IQ.\n"); + - } ++ IssueStruct *i2e_info = issueToExecuteQueue->access(0); + - if (!memDepUnit.empty() && - memory_issued < memoryWidth) { ++ // Have iterator to head of the list ++ // While I haven't exceeded bandwidth or reached the end of the list, ++ // Try to get a FU that can do what this op needs. ++ // If successful, change the oldestInst to the new top of the list, put ++ // the queue in the proper place in the list. ++ // Increment the iterator. ++ // This will avoid trying to schedule a certain op class if there are no ++ // FUs that handle it. ++ ListOrderIt order_it = listOrder.begin(); ++ ListOrderIt order_end_it = listOrder.end(); ++ int total_issued = 0; + - insts_available = true; ++ while (total_issued < totalWidth && ++ order_it != order_end_it) { ++ OpClass op_class = (*order_it).queueType; + - mem_head_inst = memDepUnit.top(); ++ assert(!readyInsts[op_class].empty()); + - if (mem_head_inst->isSquashed()) { - memDepUnit.pop(); ++ DynInstPtr issuing_inst = readyInsts[op_class].top(); + - ++iqLoopSquashStalls; ++ assert(issuing_inst->seqNum == (*order_it).oldestInst); + - continue; - } else if (mem_head_inst->seqNum < oldest_inst) { - oldest_inst = mem_head_inst->seqNum; ++ if (issuing_inst->isSquashed()) { ++ readyInsts[op_class].pop(); + - list_with_oldest = Memory; ++ if (!readyInsts[op_class].empty()) { ++ moveToYoungerInst(order_it); ++ } else { ++ readyIt[op_class] = listOrder.end(); ++ queueOnList[op_class] = false; + } - } - - if (!readyMiscInsts.empty()) { + - insts_available = true; ++ listOrder.erase(order_it++); + - misc_head_inst = readyMiscInsts.top(); ++ ++iqSquashedInstsIssued; + - if (misc_head_inst->isSquashed()) { - readyMiscInsts.pop(); - - ++iqLoopSquashStalls; - - continue; - } else if (misc_head_inst->seqNum < oldest_inst) { - oldest_inst = misc_head_inst->seqNum; - - list_with_oldest = Misc; - } ++ continue; + } + - if (!squashedInsts.empty()) { ++ int idx = -2; ++ int op_latency = 1; ++ int tid = issuing_inst->threadNumber; + - insts_available = true; ++ if (op_class != No_OpClass) { ++ idx = fuPool->getUnit(op_class); + - squashed_head_inst = squashedInsts.top(); - - if (squashed_head_inst->seqNum < oldest_inst) { - list_with_oldest = Squashed; ++ if (idx > -1) { ++ op_latency = fuPool->getOpLatency(op_class); + } - + } + - DynInstPtr issuing_inst = NULL; - - switch (list_with_oldest) { - case None: - DPRINTF(IQ, "IQ: Not able to schedule any instructions. Issuing " - "inst is %#x.\n", issuing_inst); - break; ++ if (idx == -2 || idx != -1) { ++ if (op_latency == 1) { ++// i2e_info->insts[exec_queue_slot++] = issuing_inst; ++ i2e_info->size++; ++ instsToExecute.push_back(issuing_inst); + - case Int: - issuing_inst = int_head_inst; - readyIntInsts.pop(); - ++int_issued; - DPRINTF(IQ, "IQ: Issuing integer instruction PC %#x.\n", - issuing_inst->readPC()); - break; - - case Float: - issuing_inst = float_head_inst; - readyFloatInsts.pop(); - ++float_issued; - DPRINTF(IQ, "IQ: Issuing float instruction PC %#x.\n", - issuing_inst->readPC()); - break; - - case Branch: - issuing_inst = branch_head_inst; - readyBranchInsts.pop(); - ++branch_issued; - DPRINTF(IQ, "IQ: Issuing branch instruction PC %#x.\n", - issuing_inst->readPC()); - break; - - case Memory: - issuing_inst = mem_head_inst; ++ // Add the FU onto the list of FU's to be freed next ++ // cycle if we used one. ++ if (idx >= 0) ++ fuPool->freeUnitNextCycle(idx); ++ } else { ++ int issue_latency = fuPool->getIssueLatency(op_class); ++ // Generate completion event for the FU ++ FUCompletion *execution = new FUCompletion(issuing_inst, ++ idx, this); ++ ++ execution->schedule(curTick + cpu->cycles(issue_latency - 1)); ++ ++ // @todo: Enforce that issue_latency == 1 or op_latency ++ if (issue_latency > 1) { ++ execution->setFreeFU(); ++ } else { ++ // @todo: Not sure I'm accounting for the ++ // multi-cycle op in a pipelined FU properly, or ++ // the number of instructions issued in one cycle. ++// i2e_info->insts[exec_queue_slot++] = issuing_inst; ++// i2e_info->size++; ++ ++ // Add the FU onto the list of FU's to be freed next cycle. ++ fuPool->freeUnitNextCycle(idx); ++ } ++ } + - memDepUnit.pop(); - ++memory_issued; - DPRINTF(IQ, "IQ: Issuing memory instruction PC %#x.\n", - issuing_inst->readPC()); - break; ++ DPRINTF(IQ, "Thread %i: Issuing instruction PC %#x " ++ "[sn:%lli]\n", ++ tid, issuing_inst->readPC(), ++ issuing_inst->seqNum); + - case Misc: - issuing_inst = misc_head_inst; - readyMiscInsts.pop(); ++ readyInsts[op_class].pop(); + - ++iqMiscInstsIssued; ++ if (!readyInsts[op_class].empty()) { ++ moveToYoungerInst(order_it); ++ } else { ++ readyIt[op_class] = listOrder.end(); ++ queueOnList[op_class] = false; ++ } + - DPRINTF(IQ, "IQ: Issuing a miscellaneous instruction PC %#x.\n", - issuing_inst->readPC()); - break; ++ issuing_inst->setIssued(); ++ ++total_issued; + - case Squashed: - assert(0 && "Squashed insts should not issue any more!"); - squashedInsts.pop(); - // Set the squashed instruction as able to commit so that commit - // can just drop it from the ROB. This is a bit faked. - ++squashed_issued; - ++freeEntries; ++ if (!issuing_inst->isMemRef()) { ++ // Memory instructions can not be freed from the IQ until they ++ // complete. ++ ++freeEntries; ++ count[tid]--; ++ issuing_inst->removeInIQ(); ++ } else { ++ memDepUnit[tid].issue(issuing_inst); ++ } + - DPRINTF(IQ, "IQ: Issuing squashed instruction PC %#x.\n", - squashed_head_inst->readPC()); - break; ++ listOrder.erase(order_it++); ++ statIssuedInstType[tid][op_class]++; ++ } else { ++ statFuBusy[op_class]++; ++ fuBusy[tid]++; ++ ++order_it; + } ++ } + - if (list_with_oldest != None && list_with_oldest != Squashed) { - i2e_info->insts[total_issued] = issuing_inst; - i2e_info->size++; - - issuing_inst->setIssued(); - - ++freeEntries; - ++total_issued; - } ++ numIssuedDist.sample(total_issued); ++ iqInstsIssued+= total_issued; + - assert(freeEntries == (numEntries - countInsts())); ++ if (total_issued) { ++ cpu->activityThisCycle(); ++ } else { ++ DPRINTF(IQ, "Not able to schedule any instructions.\n"); + } - - iqIntInstsIssued += int_issued; - iqFloatInstsIssued += float_issued; - iqBranchInstsIssued += branch_issued; - iqMemInstsIssued += memory_issued; - iqSquashedInstsIssued += squashed_issued; +} + +template +void +InstructionQueue::scheduleNonSpec(const InstSeqNum &inst) +{ - DPRINTF(IQ, "IQ: Marking nonspeculative instruction with sequence " - "number %i as ready to execute.\n", inst); ++ DPRINTF(IQ, "Marking nonspeculative instruction [sn:%lli] as ready " ++ "to execute.\n", inst); + - non_spec_it_t inst_it = nonSpecInsts.find(inst); ++ NonSpecMapIt inst_it = nonSpecInsts.find(inst); + + assert(inst_it != nonSpecInsts.end()); + - // Mark this instruction as ready to issue. ++ unsigned tid = (*inst_it).second->threadNumber; ++ + (*inst_it).second->setCanIssue(); + - // Now schedule the instruction. + if (!(*inst_it).second->isMemRef()) { + addIfReady((*inst_it).second); + } else { - memDepUnit.nonSpecInstReady((*inst_it).second); ++ memDepUnit[tid].nonSpecInstReady((*inst_it).second); + } + ++ (*inst_it).second = NULL; ++ + nonSpecInsts.erase(inst_it); +} + +template +void ++InstructionQueue::commit(const InstSeqNum &inst, unsigned tid) ++{ ++ DPRINTF(IQ, "[tid:%i]: Committing instructions older than [sn:%i]\n", ++ tid,inst); ++ ++ ListIt iq_it = instList[tid].begin(); ++ ++ while (iq_it != instList[tid].end() && ++ (*iq_it)->seqNum <= inst) { ++ ++iq_it; ++ instList[tid].pop_front(); ++ } ++ ++ assert(freeEntries == (numEntries - countInsts())); ++} ++ ++template ++int +InstructionQueue::wakeDependents(DynInstPtr &completed_inst) +{ - DPRINTF(IQ, "IQ: Waking dependents of completed instruction.\n"); - //Look at the physical destination register of the DynInst - //and look it up on the dependency graph. Then mark as ready - //any instructions within the instruction queue. - DependencyEntry *curr; ++ int dependents = 0; + - // Tell the memory dependence unit to wake any dependents on this - // instruction if it is a memory instruction. ++ DPRINTF(IQ, "Waking dependents of completed instruction.\n"); + ++ assert(!completed_inst->isSquashed()); ++ ++ // Tell the memory dependence unit to wake any dependents on this ++ // instruction if it is a memory instruction. Also complete the memory ++ // instruction at this point since we know it executed without issues. ++ // @todo: Might want to rename "completeMemInst" to something that ++ // indicates that it won't need to be replayed, and call this ++ // earlier. Might not be a big deal. + if (completed_inst->isMemRef()) { - memDepUnit.wakeDependents(completed_inst); ++ memDepUnit[completed_inst->threadNumber].wakeDependents(completed_inst); ++ completeMemInst(completed_inst); ++ } else if (completed_inst->isMemBarrier() || ++ completed_inst->isWriteBarrier()) { ++ memDepUnit[completed_inst->threadNumber].completeBarrier(completed_inst); + } + + for (int dest_reg_idx = 0; + dest_reg_idx < completed_inst->numDestRegs(); + dest_reg_idx++) + { + PhysRegIndex dest_reg = + completed_inst->renamedDestRegIdx(dest_reg_idx); + + // Special case of uniq or control registers. They are not + // handled by the IQ and thus have no dependency graph entry. + // @todo Figure out a cleaner way to handle this. + if (dest_reg >= numPhysRegs) { + continue; + } + - DPRINTF(IQ, "IQ: Waking any dependents on register %i.\n", ++ DPRINTF(IQ, "Waking any dependents on register %i.\n", + (int) dest_reg); + - //Maybe abstract this part into a function. - //Go through the dependency chain, marking the registers as ready - //within the waiting instructions. - while (dependGraph[dest_reg].next) { - - curr = dependGraph[dest_reg].next; ++ //Go through the dependency chain, marking the registers as ++ //ready within the waiting instructions. ++ DynInstPtr dep_inst = dependGraph.pop(dest_reg); + - DPRINTF(IQ, "IQ: Waking up a dependent instruction, PC%#x.\n", - curr->inst->readPC()); ++ while (dep_inst) { ++ DPRINTF(IQ, "Waking up a dependent instruction, PC%#x.\n", ++ dep_inst->readPC()); + + // Might want to give more information to the instruction - // so that it knows which of its source registers is ready. - // However that would mean that the dependency graph entries - // would need to hold the src_reg_idx. - curr->inst->markSrcRegReady(); ++ // so that it knows which of its source registers is ++ // ready. However that would mean that the dependency ++ // graph entries would need to hold the src_reg_idx. ++ dep_inst->markSrcRegReady(); + - addIfReady(curr->inst); ++ addIfReady(dep_inst); + - dependGraph[dest_reg].next = curr->next; ++ dep_inst = dependGraph.pop(dest_reg); + - DependencyEntry::mem_alloc_counter--; - - curr->inst = NULL; - - delete curr; ++ ++dependents; + } + - // Reset the head node now that all of its dependents have been woken - // up. - dependGraph[dest_reg].next = NULL; - dependGraph[dest_reg].inst = NULL; ++ // Reset the head node now that all of its dependents have ++ // been woken up. ++ assert(dependGraph.empty(dest_reg)); ++ dependGraph.clearInst(dest_reg); + + // Mark the scoreboard as having that register ready. + regScoreboard[dest_reg] = true; + } ++ return dependents; ++} ++ ++template ++void ++InstructionQueue::addReadyMemInst(DynInstPtr &ready_inst) ++{ ++ OpClass op_class = ready_inst->opClass(); ++ ++ readyInsts[op_class].push(ready_inst); ++ ++ // Will need to reorder the list if either a queue is not on the list, ++ // or it has an older instruction than last time. ++ if (!queueOnList[op_class]) { ++ addToOrderList(op_class); ++ } else if (readyInsts[op_class].top()->seqNum < ++ (*readyIt[op_class]).oldestInst) { ++ listOrder.erase(readyIt[op_class]); ++ addToOrderList(op_class); ++ } ++ ++ DPRINTF(IQ, "Instruction is ready to issue, putting it onto " ++ "the ready list, PC %#x opclass:%i [sn:%lli].\n", ++ ready_inst->readPC(), op_class, ready_inst->seqNum); ++} ++ ++template ++void ++InstructionQueue::rescheduleMemInst(DynInstPtr &resched_inst) ++{ ++ memDepUnit[resched_inst->threadNumber].reschedule(resched_inst); ++} ++ ++template ++void ++InstructionQueue::replayMemInst(DynInstPtr &replay_inst) ++{ ++ memDepUnit[replay_inst->threadNumber].replay(replay_inst); ++} ++ ++template ++void ++InstructionQueue::completeMemInst(DynInstPtr &completed_inst) ++{ ++ int tid = completed_inst->threadNumber; ++ ++ DPRINTF(IQ, "Completing mem instruction PC:%#x [sn:%lli]\n", ++ completed_inst->readPC(), completed_inst->seqNum); ++ ++ ++freeEntries; ++ ++ completed_inst->memOpDone = true; ++ ++ memDepUnit[tid].completed(completed_inst); ++ ++ count[tid]--; +} + +template +void +InstructionQueue::violation(DynInstPtr &store, + DynInstPtr &faulting_load) +{ - memDepUnit.violation(store, faulting_load); ++ memDepUnit[store->threadNumber].violation(store, faulting_load); +} + +template +void - InstructionQueue::squash() ++InstructionQueue::squash(unsigned tid) +{ - DPRINTF(IQ, "IQ: Starting to squash instructions in the IQ.\n"); ++ DPRINTF(IQ, "[tid:%i]: Starting to squash instructions in " ++ "the IQ.\n", tid); + + // Read instruction sequence number of last instruction out of the + // time buffer. - squashedSeqNum = fromCommit->commitInfo.doneSeqNum; - - // Setup the squash iterator to point to the tail. - squashIt = tail; ++ squashedSeqNum[tid] = fromCommit->commitInfo[tid].doneSeqNum; + + // Call doSquash if there are insts in the IQ - if (freeEntries != numEntries) { - doSquash(); ++ if (count[tid] > 0) { ++ doSquash(tid); + } + + // Also tell the memory dependence unit to squash. - memDepUnit.squash(squashedSeqNum); ++ memDepUnit[tid].squash(squashedSeqNum[tid], tid); +} + +template +void - InstructionQueue::doSquash() ++InstructionQueue::doSquash(unsigned tid) +{ - // Make sure the squash iterator isn't pointing to nothing. - assert(squashIt != cpu->instList.end()); - // Make sure the squashed sequence number is valid. - assert(squashedSeqNum != 0); ++ // Start at the tail. ++ ListIt squash_it = instList[tid].end(); ++ --squash_it; + - DPRINTF(IQ, "IQ: Squashing instructions in the IQ.\n"); ++ DPRINTF(IQ, "[tid:%i]: Squashing until sequence number %i!\n", ++ tid, squashedSeqNum[tid]); + + // Squash any instructions younger than the squashed sequence number + // given. - while ((*squashIt)->seqNum > squashedSeqNum) { - DynInstPtr squashed_inst = (*squashIt); ++ while (squash_it != instList[tid].end() && ++ (*squash_it)->seqNum > squashedSeqNum[tid]) { ++ ++ DynInstPtr squashed_inst = (*squash_it); + + // Only handle the instruction if it actually is in the IQ and + // hasn't already been squashed in the IQ. - if (!squashed_inst->isIssued() && - !squashed_inst->isSquashedInIQ()) { ++ if (squashed_inst->threadNumber != tid || ++ squashed_inst->isSquashedInIQ()) { ++ --squash_it; ++ continue; ++ } ++ ++ if (!squashed_inst->isIssued() || ++ (squashed_inst->isMemRef() && ++ !squashed_inst->memOpDone)) { + + // Remove the instruction from the dependency list. - // Hack for now: These below don't add themselves to the - // dependency list, so don't try to remove them. - if (!squashed_inst->isNonSpeculative()/* && - !squashed_inst->isStore()*/ - ) { ++ if (!squashed_inst->isNonSpeculative() && ++ !squashed_inst->isStoreConditional() && ++ !squashed_inst->isMemBarrier() && ++ !squashed_inst->isWriteBarrier()) { + + for (int src_reg_idx = 0; + src_reg_idx < squashed_inst->numSrcRegs(); + src_reg_idx++) + { + PhysRegIndex src_reg = + squashed_inst->renamedSrcRegIdx(src_reg_idx); + - // Only remove it from the dependency graph if it was - // placed there in the first place. - // HACK: This assumes that instructions woken up from the - // dependency chain aren't informed that a specific src - // register has become ready. This may not always be true - // in the future. ++ // Only remove it from the dependency graph if it ++ // was placed there in the first place. ++ ++ // Instead of doing a linked list traversal, we ++ // can just remove these squashed instructions ++ // either at issue time, or when the register is ++ // overwritten. The only downside to this is it ++ // leaves more room for error. ++ + if (!squashed_inst->isReadySrcRegIdx(src_reg_idx) && + src_reg < numPhysRegs) { - dependGraph[src_reg].remove(squashed_inst); ++ dependGraph.remove(src_reg, squashed_inst); + } + ++ + ++iqSquashedOperandsExamined; + } - - // Might want to remove producers as well. + } else { - nonSpecInsts[squashed_inst->seqNum] = NULL; ++ NonSpecMapIt ns_inst_it = ++ nonSpecInsts.find(squashed_inst->seqNum); ++ assert(ns_inst_it != nonSpecInsts.end()); ++ ++ (*ns_inst_it).second = NULL; + - nonSpecInsts.erase(squashed_inst->seqNum); ++ nonSpecInsts.erase(ns_inst_it); + + ++iqSquashedNonSpecRemoved; + } + + // Might want to also clear out the head of the dependency graph. + + // Mark it as squashed within the IQ. + squashed_inst->setSquashedInIQ(); + - // squashedInsts.push(squashed_inst); ++ // @todo: Remove this hack where several statuses are set so the ++ // inst will flow through the rest of the pipeline. + squashed_inst->setIssued(); + squashed_inst->setCanCommit(); ++ squashed_inst->removeInIQ(); ++ ++ //Update Thread IQ Count ++ count[squashed_inst->threadNumber]--; + + ++freeEntries; + - DPRINTF(IQ, "IQ: Instruction PC %#x squashed.\n", - squashed_inst->readPC()); ++ DPRINTF(IQ, "[tid:%i]: Instruction [sn:%lli] PC %#x " ++ "squashed.\n", ++ tid, squashed_inst->seqNum, squashed_inst->readPC()); + } + - --squashIt; ++ instList[tid].erase(squash_it--); + ++iqSquashedInstsExamined; + } - - assert(freeEntries <= numEntries); - - if (freeEntries == numEntries) { - tail = cpu->instList.end(); - } - - } - - template - void - InstructionQueue::stopSquash() - { - // Clear up the squash variables to ensure that squashing doesn't - // get called improperly. - squashedSeqNum = 0; - - squashIt = cpu->instList.end(); - } - - template - void - InstructionQueue::DependencyEntry::insert(DynInstPtr &new_inst) - { - //Add this new, dependent instruction at the head of the dependency - //chain. - - // First create the entry that will be added to the head of the - // dependency chain. - DependencyEntry *new_entry = new DependencyEntry; - new_entry->next = this->next; - new_entry->inst = new_inst; - - // Then actually add it to the chain. - this->next = new_entry; - - ++mem_alloc_counter; - } - - template - void - InstructionQueue::DependencyEntry::remove(DynInstPtr &inst_to_remove) - { - DependencyEntry *prev = this; - DependencyEntry *curr = this->next; - - // Make sure curr isn't NULL. Because this instruction is being - // removed from a dependency list, it must have been placed there at - // an earlier time. The dependency chain should not be empty, - // unless the instruction dependent upon it is already ready. - if (curr == NULL) { - return; - } - - // Find the instruction to remove within the dependency linked list. - while(curr->inst != inst_to_remove) - { - prev = curr; - curr = curr->next; - - assert(curr != NULL); - } - - // Now remove this instruction from the list. - prev->next = curr->next; - - --mem_alloc_counter; - - // Could push this off to the destructor of DependencyEntry - curr->inst = NULL; - - delete curr; +} + +template +bool +InstructionQueue::addToDependents(DynInstPtr &new_inst) +{ + // Loop through the instruction's source registers, adding + // them to the dependency list if they are not ready. + int8_t total_src_regs = new_inst->numSrcRegs(); + bool return_val = false; + + for (int src_reg_idx = 0; + src_reg_idx < total_src_regs; + src_reg_idx++) + { + // Only add it to the dependency graph if it's not ready. + if (!new_inst->isReadySrcRegIdx(src_reg_idx)) { + PhysRegIndex src_reg = new_inst->renamedSrcRegIdx(src_reg_idx); + + // Check the IQ's scoreboard to make sure the register + // hasn't become ready while the instruction was in flight + // between stages. Only if it really isn't ready should + // it be added to the dependency graph. + if (src_reg >= numPhysRegs) { + continue; + } else if (regScoreboard[src_reg] == false) { - DPRINTF(IQ, "IQ: Instruction PC %#x has src reg %i that " ++ DPRINTF(IQ, "Instruction PC %#x has src reg %i that " + "is being added to the dependency chain.\n", + new_inst->readPC(), src_reg); + - dependGraph[src_reg].insert(new_inst); ++ dependGraph.insert(src_reg, new_inst); + + // Change the return value to indicate that something + // was added to the dependency graph. + return_val = true; + } else { - DPRINTF(IQ, "IQ: Instruction PC %#x has src reg %i that " ++ DPRINTF(IQ, "Instruction PC %#x has src reg %i that " + "became ready before it reached the IQ.\n", + new_inst->readPC(), src_reg); + // Mark a register ready within the instruction. - new_inst->markSrcRegReady(); ++ new_inst->markSrcRegReady(src_reg_idx); + } + } + } + + return return_val; +} + +template +void - InstructionQueue::createDependency(DynInstPtr &new_inst) ++InstructionQueue::addToProducers(DynInstPtr &new_inst) +{ - //Actually nothing really needs to be marked when an - //instruction becomes the producer of a register's value, - //but for convenience a ptr to the producing instruction will - //be placed in the head node of the dependency links. ++ // Nothing really needs to be marked when an instruction becomes ++ // the producer of a register's value, but for convenience a ptr ++ // to the producing instruction will be placed in the head node of ++ // the dependency links. + int8_t total_dest_regs = new_inst->numDestRegs(); + + for (int dest_reg_idx = 0; + dest_reg_idx < total_dest_regs; + dest_reg_idx++) + { + PhysRegIndex dest_reg = new_inst->renamedDestRegIdx(dest_reg_idx); + + // Instructions that use the misc regs will have a reg number + // higher than the normal physical registers. In this case these + // registers are not renamed, and there is no need to track + // dependencies as these instructions must be executed at commit. + if (dest_reg >= numPhysRegs) { + continue; + } + - dependGraph[dest_reg].inst = new_inst; - - if (dependGraph[dest_reg].next) { - dumpDependGraph(); - panic("IQ: Dependency graph not empty!"); ++ if (!dependGraph.empty(dest_reg)) { ++ dependGraph.dump(); ++ panic("Dependency graph %i not empty!", dest_reg); + } + ++ dependGraph.setInst(dest_reg, new_inst); ++ + // Mark the scoreboard to say it's not yet ready. + regScoreboard[dest_reg] = false; + } +} + +template +void +InstructionQueue::addIfReady(DynInstPtr &inst) +{ - //If the instruction now has all of its source registers ++ // If the instruction now has all of its source registers + // available, then add it to the list of ready instructions. + if (inst->readyToIssue()) { + + //Add the instruction to the proper ready list. - if (inst->isControl()) { ++ if (inst->isMemRef()) { + - DPRINTF(IQ, "IQ: Branch instruction is ready to issue, " - "putting it onto the ready list, PC %#x.\n", - inst->readPC()); - readyBranchInsts.push(inst); - - } else if (inst->isMemRef()) { - - DPRINTF(IQ, "IQ: Checking if memory instruction can issue.\n"); ++ DPRINTF(IQ, "Checking if memory instruction can issue.\n"); + + // Message to the mem dependence unit that this instruction has + // its registers ready. ++ memDepUnit[inst->threadNumber].regsReady(inst); + - memDepUnit.regsReady(inst); - - #if 0 - if (memDepUnit.readyToIssue(inst)) { - DPRINTF(IQ, "IQ: Memory instruction is ready to issue, " - "putting it onto the ready list, PC %#x.\n", - inst->readPC()); - readyMemInsts.push(inst); - } else { - // Make dependent on the store. - // Will need some way to get the store instruction it should - // be dependent upon; then when the store issues it can - // put the instruction on the ready list. - // Yet another tree? - assert(0 && "Instruction has no way to actually issue"); - } - #endif - - } else if (inst->isInteger()) { ++ return; ++ } + - DPRINTF(IQ, "IQ: Integer instruction is ready to issue, " - "putting it onto the ready list, PC %#x.\n", - inst->readPC()); - readyIntInsts.push(inst); ++ OpClass op_class = inst->opClass(); + - } else if (inst->isFloating()) { ++ DPRINTF(IQ, "Instruction is ready to issue, putting it onto " ++ "the ready list, PC %#x opclass:%i [sn:%lli].\n", ++ inst->readPC(), op_class, inst->seqNum); + - DPRINTF(IQ, "IQ: Floating instruction is ready to issue, " - "putting it onto the ready list, PC %#x.\n", - inst->readPC()); - readyFloatInsts.push(inst); ++ readyInsts[op_class].push(inst); + - } else { - DPRINTF(IQ, "IQ: Miscellaneous instruction is ready to issue, " - "putting it onto the ready list, PC %#x..\n", - inst->readPC()); - - readyMiscInsts.push(inst); ++ // Will need to reorder the list if either a queue is not on the list, ++ // or it has an older instruction than last time. ++ if (!queueOnList[op_class]) { ++ addToOrderList(op_class); ++ } else if (readyInsts[op_class].top()->seqNum < ++ (*readyIt[op_class]).oldestInst) { ++ listOrder.erase(readyIt[op_class]); ++ addToOrderList(op_class); + } + } +} + - /* - * Caution, this function must not be called prior to tail being updated at - * least once, otherwise it will fail the assertion. This is because - * instList.begin() actually changes upon the insertion of an element into the - * list when the list is empty. - */ +template +int +InstructionQueue::countInsts() +{ - ListIt count_it = cpu->instList.begin(); ++ //ksewell:This works but definitely could use a cleaner write ++ //with a more intuitive way of counting. Right now it's ++ //just brute force .... ++ ++#if 0 + int total_insts = 0; + - if (tail == cpu->instList.end()) - return 0; ++ for (int i = 0; i < numThreads; ++i) { ++ ListIt count_it = instList[i].begin(); ++ ++ while (count_it != instList[i].end()) { ++ if (!(*count_it)->isSquashed() && !(*count_it)->isSquashedInIQ()) { ++ if (!(*count_it)->isIssued()) { ++ ++total_insts; ++ } else if ((*count_it)->isMemRef() && ++ !(*count_it)->memOpDone) { ++ // Loads that have not been marked as executed still count ++ // towards the total instructions. ++ ++total_insts; ++ } ++ } + - while (count_it != tail) { - if (!(*count_it)->isIssued()) { - ++total_insts; ++ ++count_it; + } - - ++count_it; - - assert(count_it != cpu->instList.end()); - } - - // Need to count the tail iterator as well. - if (count_it != cpu->instList.end() && - (*count_it) && - !(*count_it)->isIssued()) { - ++total_insts; + } + + return total_insts; ++#else ++ return numEntries - freeEntries; ++#endif +} + +template +void - InstructionQueue::dumpDependGraph() ++InstructionQueue::dumpLists() +{ - DependencyEntry *curr; ++ for (int i = 0; i < Num_OpClasses; ++i) { ++ cprintf("Ready list %i size: %i\n", i, readyInsts[i].size()); + - for (int i = 0; i < numPhysRegs; ++i) - { - curr = &dependGraph[i]; ++ cprintf("\n"); ++ } + - if (curr->inst) { - cprintf("dependGraph[%i]: producer: %#x consumer: ", i, - curr->inst->readPC()); - } else { - cprintf("dependGraph[%i]: No producer. consumer: ", i); - } ++ cprintf("Non speculative list size: %i\n", nonSpecInsts.size()); + - while (curr->next != NULL) { - curr = curr->next; ++ NonSpecMapIt non_spec_it = nonSpecInsts.begin(); ++ NonSpecMapIt non_spec_end_it = nonSpecInsts.end(); + - cprintf("%#x ", curr->inst->readPC()); - } ++ cprintf("Non speculative list: "); + - cprintf("\n"); ++ while (non_spec_it != non_spec_end_it) { ++ cprintf("%#x [sn:%lli]", (*non_spec_it).second->readPC(), ++ (*non_spec_it).second->seqNum); ++ ++non_spec_it; + } - } + - template - void - InstructionQueue::dumpLists() - { - cprintf("Ready integer list size: %i\n", readyIntInsts.size()); ++ cprintf("\n"); + - cprintf("Ready float list size: %i\n", readyFloatInsts.size()); ++ ListOrderIt list_order_it = listOrder.begin(); ++ ListOrderIt list_order_end_it = listOrder.end(); ++ int i = 1; + - cprintf("Ready branch list size: %i\n", readyBranchInsts.size()); ++ cprintf("List order: "); + - cprintf("Ready misc list size: %i\n", readyMiscInsts.size()); ++ while (list_order_it != list_order_end_it) { ++ cprintf("%i OpClass:%i [sn:%lli] ", i, (*list_order_it).queueType, ++ (*list_order_it).oldestInst); + - cprintf("Squashed list size: %i\n", squashedInsts.size()); ++ ++list_order_it; ++ ++i; ++ } + - cprintf("Non speculative list size: %i\n", nonSpecInsts.size()); ++ cprintf("\n"); ++} + - non_spec_it_t non_spec_it = nonSpecInsts.begin(); + - cprintf("Non speculative list: "); ++template ++void ++InstructionQueue::dumpInsts() ++{ ++ for (int i = 0; i < numThreads; ++i) { ++ int num = 0; ++ int valid_num = 0; ++ ListIt inst_list_it = instList[i].begin(); ++ ++ while (inst_list_it != instList[i].end()) ++ { ++ cprintf("Instruction:%i\n", ++ num); ++ if (!(*inst_list_it)->isSquashed()) { ++ if (!(*inst_list_it)->isIssued()) { ++ ++valid_num; ++ cprintf("Count:%i\n", valid_num); ++ } else if ((*inst_list_it)->isMemRef() && ++ !(*inst_list_it)->memOpDone) { ++ // Loads that have not been marked as executed ++ // still count towards the total instructions. ++ ++valid_num; ++ cprintf("Count:%i\n", valid_num); ++ } ++ } + - while (non_spec_it != nonSpecInsts.end()) { - cprintf("%#x ", (*non_spec_it).second->readPC()); - ++non_spec_it; - } ++ cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n" ++ "Issued:%i\nSquashed:%i\n", ++ (*inst_list_it)->readPC(), ++ (*inst_list_it)->seqNum, ++ (*inst_list_it)->threadNumber, ++ (*inst_list_it)->isIssued(), ++ (*inst_list_it)->isSquashed()); + - cprintf("\n"); ++ if ((*inst_list_it)->isMemRef()) { ++ cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone); ++ } + ++ cprintf("\n"); ++ ++ inst_list_it++; ++ ++num; ++ } ++ } +} diff --cc src/cpu/o3/mem_dep_unit.cc index 9c1e7f9d8,000000000..ccdd1a515 mode 100644,000000..100644 --- a/src/cpu/o3/mem_dep_unit.cc +++ b/src/cpu/o3/mem_dep_unit.cc @@@ -1,36 -1,0 +1,46 @@@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cpu/o3/alpha_dyn_inst.hh" +#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/store_set.hh" +#include "cpu/o3/mem_dep_unit_impl.hh" + +// Force instantation of memory dependency unit using store sets and +// AlphaSimpleImpl. +template class MemDepUnit; ++ ++template <> ++int ++MemDepUnit::MemDepEntry::memdep_count = 0; ++template <> ++int ++MemDepUnit::MemDepEntry::memdep_insert = 0; ++template <> ++int ++MemDepUnit::MemDepEntry::memdep_erase = 0; diff --cc src/cpu/o3/mem_dep_unit.hh index ca63577a1,000000000..acbe08ec2 mode 100644,000000..100644 --- a/src/cpu/o3/mem_dep_unit.hh +++ b/src/cpu/o3/mem_dep_unit.hh @@@ -1,164 -1,0 +1,251 @@@ +/* - * Copyright (c) 2004-2005 The Regents of The University of Michigan ++ * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + - #ifndef __CPU_O3_CPU_MEM_DEP_UNIT_HH__ - #define __CPU_O3_CPU_MEM_DEP_UNIT_HH__ ++#ifndef __CPU_O3_MEM_DEP_UNIT_HH__ ++#define __CPU_O3_MEM_DEP_UNIT_HH__ + - #include ++#include +#include + ++#include "base/hashmap.hh" ++#include "base/refcnt.hh" +#include "base/statistics.hh" +#include "cpu/inst_seq.hh" + ++struct SNHash { ++ size_t operator() (const InstSeqNum &seq_num) const { ++ unsigned a = (unsigned)seq_num; ++ unsigned hash = (((a >> 14) ^ ((a >> 2) & 0xffff))) & 0x7FFFFFFF; ++ ++ return hash; ++ } ++}; ++ ++template ++class InstructionQueue; ++ +/** + * Memory dependency unit class. This holds the memory dependence predictor. + * As memory operations are issued to the IQ, they are also issued to this + * unit, which then looks up the prediction as to what they are dependent + * upon. This unit must be checked prior to a memory operation being able + * to issue. Although this is templated, it's somewhat hard to make a generic + * memory dependence unit. This one is mostly for store sets; it will be + * quite limited in what other memory dependence predictions it can also + * utilize. Thus this class should be most likely be rewritten for other + * dependence prediction schemes. + */ +template +class MemDepUnit { + public: + typedef typename Impl::Params Params; + typedef typename Impl::DynInstPtr DynInstPtr; + - public: - MemDepUnit(Params ¶ms); ++ /** Empty constructor. Must call init() prior to using in this case. */ ++ MemDepUnit() {} ++ ++ /** Constructs a MemDepUnit with given parameters. */ ++ MemDepUnit(Params *params); + ++ /** Frees up any memory allocated. */ ++ ~MemDepUnit(); ++ ++ /** Returns the name of the memory dependence unit. */ ++ std::string name() const; ++ ++ /** Initializes the unit with parameters and a thread id. */ ++ void init(Params *params, int tid); ++ ++ /** Registers statistics. */ + void regStats(); + ++ void switchOut(); ++ ++ void takeOverFrom(); ++ ++ /** Sets the pointer to the IQ. */ ++ void setIQ(InstructionQueue *iq_ptr); ++ ++ /** Inserts a memory instruction. */ + void insert(DynInstPtr &inst); + ++ /** Inserts a non-speculative memory instruction. */ + void insertNonSpec(DynInstPtr &inst); + - // Will want to make this operation relatively fast. Right now it - // is somewhat slow. - DynInstPtr &top(); - - void pop(); ++ /** Inserts a barrier instruction. */ ++ void insertBarrier(DynInstPtr &barr_inst); + ++ /** Indicate that an instruction has its registers ready. */ + void regsReady(DynInstPtr &inst); + ++ /** Indicate that a non-speculative instruction is ready. */ + void nonSpecInstReady(DynInstPtr &inst); + - void issue(DynInstPtr &inst); ++ /** Reschedules an instruction to be re-executed. */ ++ void reschedule(DynInstPtr &inst); ++ ++ /** Replays all instructions that have been rescheduled by moving them to ++ * the ready list. ++ */ ++ void replay(DynInstPtr &inst); ++ ++ /** Completes a memory instruction. */ ++ void completed(DynInstPtr &inst); + ++ /** Completes a barrier instruction. */ ++ void completeBarrier(DynInstPtr &inst); ++ ++ /** Wakes any dependents of a memory instruction. */ + void wakeDependents(DynInstPtr &inst); + - void squash(const InstSeqNum &squashed_num); ++ /** Squashes all instructions up until a given sequence number for a ++ * specific thread. ++ */ ++ void squash(const InstSeqNum &squashed_num, unsigned tid); + ++ /** Indicates an ordering violation between a store and a younger load. */ + void violation(DynInstPtr &store_inst, DynInstPtr &violating_load); + - inline bool empty() - { return readyInsts.empty(); } ++ /** Issues the given instruction */ ++ void issue(DynInstPtr &inst); ++ ++ /** Debugging function to dump the lists of instructions. */ ++ void dumpLists(); + + private: - typedef typename std::set::iterator sn_it_t; - typedef typename std::map::iterator dyn_it_t; - - // Forward declarations so that the following two typedefs work. - class Dependency; - class ltDependency; - - typedef typename std::set::iterator dep_it_t; - typedef typename std::map >::iterator - sd_it_t; - - struct Dependency { - Dependency(const InstSeqNum &_seqNum) - : seqNum(_seqNum), regsReady(0), memDepReady(0) - { } - - Dependency(const InstSeqNum &_seqNum, bool _regsReady, - bool _memDepReady) - : seqNum(_seqNum), regsReady(_regsReady), - memDepReady(_memDepReady) - { } - - InstSeqNum seqNum; - mutable bool regsReady; - mutable bool memDepReady; - mutable sd_it_t storeDep; - }; ++ typedef typename std::list::iterator ListIt; + - struct ltDependency { - bool operator() (const Dependency &lhs, const Dependency &rhs) ++ class MemDepEntry; ++ ++ typedef RefCountingPtr MemDepEntryPtr; ++ ++ /** Memory dependence entries that track memory operations, marking ++ * when the instruction is ready to execute and what instructions depend ++ * upon it. ++ */ ++ class MemDepEntry : public RefCounted { ++ public: ++ /** Constructs a memory dependence entry. */ ++ MemDepEntry(DynInstPtr &new_inst) ++ : inst(new_inst), regsReady(false), memDepReady(false), ++ completed(false), squashed(false) + { - return lhs.seqNum < rhs.seqNum; ++ ++memdep_count; ++ ++ DPRINTF(MemDepUnit, "Memory dependency entry created. " ++ "memdep_count=%i\n", memdep_count); + } ++ ++ /** Frees any pointers. */ ++ ~MemDepEntry() ++ { ++ for (int i = 0; i < dependInsts.size(); ++i) { ++ dependInsts[i] = NULL; ++ } ++ ++ --memdep_count; ++ ++ DPRINTF(MemDepUnit, "Memory dependency entry deleted. " ++ "memdep_count=%i\n", memdep_count); ++ } ++ ++ /** Returns the name of the memory dependence entry. */ ++ std::string name() const { return "memdepentry"; } ++ ++ /** The instruction being tracked. */ ++ DynInstPtr inst; ++ ++ /** The iterator to the instruction's location inside the list. */ ++ ListIt listIt; ++ ++ /** A vector of any dependent instructions. */ ++ std::vector dependInsts; ++ ++ /** If the registers are ready or not. */ ++ bool regsReady; ++ /** If all memory dependencies have been satisfied. */ ++ bool memDepReady; ++ /** If the instruction is completed. */ ++ bool completed; ++ /** If the instruction is squashed. */ ++ bool squashed; ++ ++ /** For debugging. */ ++ static int memdep_count; ++ static int memdep_insert; ++ static int memdep_erase; + }; + - inline void moveToReady(dep_it_t &woken_inst); ++ /** Finds the memory dependence entry in the hash map. */ ++ inline MemDepEntryPtr &findInHash(const DynInstPtr &inst); + - /** List of instructions that have passed through rename, yet are still - * waiting on either a memory dependence to resolve or source registers to - * become available before they can issue. - */ - std::set waitingInsts; ++ /** Moves an entry to the ready list. */ ++ inline void moveToReady(MemDepEntryPtr &ready_inst_entry); + - /** List of instructions that have all their predicted memory dependences - * resolved and their source registers ready. - */ - std::set readyInsts; ++ typedef m5::hash_map MemDepHash; + - // Change this to hold a vector of iterators, which will point to the - // entry of the waiting instructions. - /** List of stores' sequence numbers, each of which has a vector of - * iterators. The iterators point to the appropriate node within - * waitingInsts that has the depenendent instruction. - */ - std::map > storeDependents; ++ typedef typename MemDepHash::iterator MemDepHashIt; ++ ++ /** A hash map of all memory dependence entries. */ ++ MemDepHash memDepHash; + - // For now will implement this as a map...hash table might not be too - // bad, or could move to something that mimics the current dependency - // graph. - std::map memInsts; ++ /** A list of all instructions in the memory dependence unit. */ ++ std::list instList[Impl::MaxThreads]; + - // Iterator pointer to the top instruction which has is ready. - // Is set by the top() call. - dyn_it_t topInst; ++ /** A list of all instructions that are going to be replayed. */ ++ std::list instsToReplay; + + /** The memory dependence predictor. It is accessed upon new + * instructions being added to the IQ, and responds by telling + * this unit what instruction the newly added instruction is dependent + * upon. + */ + MemDepPred depPred; + ++ bool loadBarrier; ++ InstSeqNum loadBarrierSN; ++ bool storeBarrier; ++ InstSeqNum storeBarrierSN; ++ ++ /** Pointer to the IQ. */ ++ InstructionQueue *iqPtr; ++ ++ /** The thread id of this memory dependence unit. */ ++ int id; ++ ++ /** Stat for number of inserted loads. */ + Stats::Scalar<> insertedLoads; ++ /** Stat for number of inserted stores. */ + Stats::Scalar<> insertedStores; ++ /** Stat for number of conflicting loads that had to wait for a store. */ + Stats::Scalar<> conflictingLoads; ++ /** Stat for number of conflicting stores that had to wait for a store. */ + Stats::Scalar<> conflictingStores; +}; + - #endif // __CPU_O3_CPU_MEM_DEP_UNIT_HH__ ++#endif // __CPU_O3_MEM_DEP_UNIT_HH__ diff --cc src/cpu/o3/mem_dep_unit_impl.hh index 296db4c4e,000000000..8b195baab mode 100644,000000..100644 --- a/src/cpu/o3/mem_dep_unit_impl.hh +++ b/src/cpu/o3/mem_dep_unit_impl.hh @@@ -1,419 -1,0 +1,551 @@@ +/* - * Copyright (c) 2004-2005 The Regents of The University of Michigan ++ * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include + ++#include "cpu/o3/inst_queue.hh" +#include "cpu/o3/mem_dep_unit.hh" + +template - MemDepUnit::MemDepUnit(Params ¶ms) - : depPred(params.SSITSize, params.LFSTSize) ++MemDepUnit::MemDepUnit(Params *params) ++ : depPred(params->SSITSize, params->LFSTSize), loadBarrier(false), ++ loadBarrierSN(0), storeBarrier(false), storeBarrierSN(0), iqPtr(NULL) +{ - DPRINTF(MemDepUnit, "MemDepUnit: Creating MemDepUnit object.\n"); ++ DPRINTF(MemDepUnit, "Creating MemDepUnit object.\n"); ++} ++ ++template ++MemDepUnit::~MemDepUnit() ++{ ++ for (int tid=0; tid < Impl::MaxThreads; tid++) { ++ ++ ListIt inst_list_it = instList[tid].begin(); ++ ++ MemDepHashIt hash_it; ++ ++ while (!instList[tid].empty()) { ++ hash_it = memDepHash.find((*inst_list_it)->seqNum); ++ ++ assert(hash_it != memDepHash.end()); ++ ++ memDepHash.erase(hash_it); ++ ++ instList[tid].erase(inst_list_it++); ++ } ++ } ++ ++ assert(MemDepEntry::memdep_count == 0); ++} ++ ++template ++std::string ++MemDepUnit::name() const ++{ ++ return "memdepunit"; ++} ++ ++template ++void ++MemDepUnit::init(Params *params, int tid) ++{ ++ DPRINTF(MemDepUnit, "Creating MemDepUnit %i object.\n",tid); ++ ++ id = tid; ++ ++ depPred.init(params->SSITSize, params->LFSTSize); +} + +template +void +MemDepUnit::regStats() +{ + insertedLoads + .name(name() + ".memDep.insertedLoads") + .desc("Number of loads inserted to the mem dependence unit."); + + insertedStores + .name(name() + ".memDep.insertedStores") + .desc("Number of stores inserted to the mem dependence unit."); + + conflictingLoads + .name(name() + ".memDep.conflictingLoads") + .desc("Number of conflicting loads."); + + conflictingStores + .name(name() + ".memDep.conflictingStores") + .desc("Number of conflicting stores."); +} + ++template ++void ++MemDepUnit::switchOut() ++{ ++ for (int i = 0; i < Impl::MaxThreads; ++i) { ++ instList[i].clear(); ++ } ++ instsToReplay.clear(); ++ memDepHash.clear(); ++} ++ ++template ++void ++MemDepUnit::takeOverFrom() ++{ ++ loadBarrier = storeBarrier = false; ++ loadBarrierSN = storeBarrierSN = 0; ++ depPred.clear(); ++} ++ ++template ++void ++MemDepUnit::setIQ(InstructionQueue *iq_ptr) ++{ ++ iqPtr = iq_ptr; ++} ++ +template +void +MemDepUnit::insert(DynInstPtr &inst) +{ - InstSeqNum inst_seq_num = inst->seqNum; ++ unsigned tid = inst->threadNumber; ++ ++ MemDepEntryPtr inst_entry = new MemDepEntry(inst); ++ ++ // Add the MemDepEntry to the hash. ++ memDepHash.insert( ++ std::pair(inst->seqNum, inst_entry)); ++ MemDepEntry::memdep_insert++; ++ ++ instList[tid].push_back(inst); ++ ++ inst_entry->listIt = --(instList[tid].end()); ++ ++ // Check any barriers and the dependence predictor for any ++ // producing stores. ++ InstSeqNum producing_store; ++ if (inst->isLoad() && loadBarrier) { ++ producing_store = loadBarrierSN; ++ } else if (inst->isStore() && storeBarrier) { ++ producing_store = storeBarrierSN; ++ } else { ++ producing_store = depPred.checkInst(inst->readPC()); ++ } + - Dependency unresolved_dependencies(inst_seq_num); ++ MemDepEntryPtr store_entry = NULL; + - InstSeqNum producing_store = depPred.checkInst(inst->readPC()); ++ // If there is a producing store, try to find the entry. ++ if (producing_store != 0) { ++ MemDepHashIt hash_it = memDepHash.find(producing_store); + - if (producing_store == 0 || - storeDependents.find(producing_store) == storeDependents.end()) { ++ if (hash_it != memDepHash.end()) { ++ store_entry = (*hash_it).second; ++ } ++ } + - DPRINTF(MemDepUnit, "MemDepUnit: No dependency for inst PC " - "%#x.\n", inst->readPC()); ++ // If no store entry, then instruction can issue as soon as the registers ++ // are ready. ++ if (!store_entry) { ++ DPRINTF(MemDepUnit, "No dependency for inst PC " ++ "%#x [sn:%lli].\n", inst->readPC(), inst->seqNum); + - unresolved_dependencies.storeDep = storeDependents.end(); ++ inst_entry->memDepReady = true; + + if (inst->readyToIssue()) { - readyInsts.insert(inst_seq_num); - } else { - unresolved_dependencies.memDepReady = true; ++ inst_entry->regsReady = true; + - waitingInsts.insert(unresolved_dependencies); ++ moveToReady(inst_entry); + } + } else { - DPRINTF(MemDepUnit, "MemDepUnit: Adding to dependency list; " - "inst PC %#x is dependent on seq num %i.\n", ++ // Otherwise make the instruction dependent on the store/barrier. ++ DPRINTF(MemDepUnit, "Adding to dependency list; " ++ "inst PC %#x is dependent on [sn:%lli].\n", + inst->readPC(), producing_store); + + if (inst->readyToIssue()) { - unresolved_dependencies.regsReady = true; ++ inst_entry->regsReady = true; + } + - // Find the store that this instruction is dependent on. - sd_it_t store_loc = storeDependents.find(producing_store); - - assert(store_loc != storeDependents.end()); - - // Record the location of the store that this instruction is - // dependent on. - unresolved_dependencies.storeDep = store_loc; - - // If it's not already ready, then add it to the renamed - // list and the dependencies. - dep_it_t inst_loc = - (waitingInsts.insert(unresolved_dependencies)).first; - + // Add this instruction to the list of dependents. - (*store_loc).second.push_back(inst_loc); - - assert(!(*store_loc).second.empty()); ++ store_entry->dependInsts.push_back(inst_entry); + + if (inst->isLoad()) { + ++conflictingLoads; + } else { + ++conflictingStores; + } + } + + if (inst->isStore()) { - DPRINTF(MemDepUnit, "MemDepUnit: Inserting store PC %#x.\n", - inst->readPC()); - - depPred.insertStore(inst->readPC(), inst_seq_num); - - // Make sure this store isn't already in this list. - assert(storeDependents.find(inst_seq_num) == storeDependents.end()); - - // Put a dependency entry in at the store's sequence number. - // Uh, not sure how this works...I want to create an entry but - // I don't have anything to put into the value yet. - storeDependents[inst_seq_num]; ++ DPRINTF(MemDepUnit, "Inserting store PC %#x [sn:%lli].\n", ++ inst->readPC(), inst->seqNum); + - assert(storeDependents.size() != 0); ++ depPred.insertStore(inst->readPC(), inst->seqNum, inst->threadNumber); + + ++insertedStores; - + } else if (inst->isLoad()) { + ++insertedLoads; + } else { - panic("MemDepUnit: Unknown type! (most likely a barrier)."); ++ panic("Unknown type! (most likely a barrier)."); + } - - memInsts[inst_seq_num] = inst; +} + +template +void +MemDepUnit::insertNonSpec(DynInstPtr &inst) +{ - InstSeqNum inst_seq_num = inst->seqNum; ++ unsigned tid = inst->threadNumber; ++ ++ MemDepEntryPtr inst_entry = new MemDepEntry(inst); + - Dependency non_spec_inst(inst_seq_num); ++ // Insert the MemDepEntry into the hash. ++ memDepHash.insert( ++ std::pair(inst->seqNum, inst_entry)); ++ MemDepEntry::memdep_insert++; + - non_spec_inst.storeDep = storeDependents.end(); ++ // Add the instruction to the list. ++ instList[tid].push_back(inst); + - waitingInsts.insert(non_spec_inst); ++ inst_entry->listIt = --(instList[tid].end()); + + // Might want to turn this part into an inline function or something. + // It's shared between both insert functions. + if (inst->isStore()) { - DPRINTF(MemDepUnit, "MemDepUnit: Inserting store PC %#x.\n", - inst->readPC()); - - depPred.insertStore(inst->readPC(), inst_seq_num); - - // Make sure this store isn't already in this list. - assert(storeDependents.find(inst_seq_num) == storeDependents.end()); - - // Put a dependency entry in at the store's sequence number. - // Uh, not sure how this works...I want to create an entry but - // I don't have anything to put into the value yet. - storeDependents[inst_seq_num]; ++ DPRINTF(MemDepUnit, "Inserting store PC %#x [sn:%lli].\n", ++ inst->readPC(), inst->seqNum); + - assert(storeDependents.size() != 0); ++ depPred.insertStore(inst->readPC(), inst->seqNum, inst->threadNumber); + + ++insertedStores; - + } else if (inst->isLoad()) { + ++insertedLoads; + } else { - panic("MemDepUnit: Unknown type! (most likely a barrier)."); ++ panic("Unknown type! (most likely a barrier)."); + } - - memInsts[inst_seq_num] = inst; +} + +template - typename Impl::DynInstPtr & - MemDepUnit::top() ++void ++MemDepUnit::insertBarrier(DynInstPtr &barr_inst) +{ - topInst = memInsts.find( (*readyInsts.begin()) ); ++ InstSeqNum barr_sn = barr_inst->seqNum; ++ if (barr_inst->isMemBarrier()) { ++ loadBarrier = true; ++ loadBarrierSN = barr_sn; ++ storeBarrier = true; ++ storeBarrierSN = barr_sn; ++ DPRINTF(MemDepUnit, "Inserted a memory barrier\n"); ++ } else if (barr_inst->isWriteBarrier()) { ++ storeBarrier = true; ++ storeBarrierSN = barr_sn; ++ DPRINTF(MemDepUnit, "Inserted a write barrier\n"); ++ } ++ ++ unsigned tid = barr_inst->threadNumber; ++ ++ MemDepEntryPtr inst_entry = new MemDepEntry(barr_inst); ++ ++ // Add the MemDepEntry to the hash. ++ memDepHash.insert( ++ std::pair(barr_sn, inst_entry)); ++ MemDepEntry::memdep_insert++; + - DPRINTF(MemDepUnit, "MemDepUnit: Top instruction is PC %#x.\n", - (*topInst).second->readPC()); ++ // Add the instruction to the instruction list. ++ instList[tid].push_back(barr_inst); + - return (*topInst).second; ++ inst_entry->listIt = --(instList[tid].end()); +} + +template +void - MemDepUnit::pop() ++MemDepUnit::regsReady(DynInstPtr &inst) +{ - DPRINTF(MemDepUnit, "MemDepUnit: Removing instruction PC %#x.\n", - (*topInst).second->readPC()); ++ DPRINTF(MemDepUnit, "Marking registers as ready for " ++ "instruction PC %#x [sn:%lli].\n", ++ inst->readPC(), inst->seqNum); + - wakeDependents((*topInst).second); ++ MemDepEntryPtr inst_entry = findInHash(inst); + - issue((*topInst).second); ++ inst_entry->regsReady = true; + - memInsts.erase(topInst); ++ if (inst_entry->memDepReady) { ++ DPRINTF(MemDepUnit, "Instruction has its memory " ++ "dependencies resolved, adding it to the ready list.\n"); + - topInst = memInsts.end(); ++ moveToReady(inst_entry); ++ } else { ++ DPRINTF(MemDepUnit, "Instruction still waiting on " ++ "memory dependency.\n"); ++ } +} + +template +void - MemDepUnit::regsReady(DynInstPtr &inst) ++MemDepUnit::nonSpecInstReady(DynInstPtr &inst) +{ - DPRINTF(MemDepUnit, "MemDepUnit: Marking registers as ready for " - "instruction PC %#x.\n", - inst->readPC()); ++ DPRINTF(MemDepUnit, "Marking non speculative " ++ "instruction PC %#x as ready [sn:%lli].\n", ++ inst->readPC(), inst->seqNum); + - InstSeqNum inst_seq_num = inst->seqNum; ++ MemDepEntryPtr inst_entry = findInHash(inst); + - Dependency inst_to_find(inst_seq_num); ++ moveToReady(inst_entry); ++} + - dep_it_t waiting_inst = waitingInsts.find(inst_to_find); ++template ++void ++MemDepUnit::reschedule(DynInstPtr &inst) ++{ ++ instsToReplay.push_back(inst); ++} + - assert(waiting_inst != waitingInsts.end()); ++template ++void ++MemDepUnit::replay(DynInstPtr &inst) ++{ ++ DynInstPtr temp_inst; ++ bool found_inst = false; + - if ((*waiting_inst).memDepReady) { - DPRINTF(MemDepUnit, "MemDepUnit: Instruction has its memory " - "dependencies resolved, adding it to the ready list.\n"); ++ while (!instsToReplay.empty()) { ++ temp_inst = instsToReplay.front(); + - moveToReady(waiting_inst); - } else { - DPRINTF(MemDepUnit, "MemDepUnit: Instruction still waiting on " - "memory dependency.\n"); ++ MemDepEntryPtr inst_entry = findInHash(temp_inst); + - (*waiting_inst).regsReady = true; ++ DPRINTF(MemDepUnit, "Replaying mem instruction PC %#x " ++ "[sn:%lli].\n", ++ temp_inst->readPC(), temp_inst->seqNum); ++ ++ moveToReady(inst_entry); ++ ++ if (temp_inst == inst) { ++ found_inst = true; ++ } ++ ++ instsToReplay.pop_front(); + } ++ ++ assert(found_inst); +} + +template +void - MemDepUnit::nonSpecInstReady(DynInstPtr &inst) ++MemDepUnit::completed(DynInstPtr &inst) +{ - DPRINTF(MemDepUnit, "MemDepUnit: Marking non speculative " - "instruction PC %#x as ready.\n", - inst->readPC()); ++ DPRINTF(MemDepUnit, "Completed mem instruction PC %#x " ++ "[sn:%lli].\n", ++ inst->readPC(), inst->seqNum); ++ ++ unsigned tid = inst->threadNumber; + - InstSeqNum inst_seq_num = inst->seqNum; ++ // Remove the instruction from the hash and the list. ++ MemDepHashIt hash_it = memDepHash.find(inst->seqNum); + - Dependency inst_to_find(inst_seq_num); ++ assert(hash_it != memDepHash.end()); + - dep_it_t waiting_inst = waitingInsts.find(inst_to_find); ++ instList[tid].erase((*hash_it).second->listIt); + - assert(waiting_inst != waitingInsts.end()); ++ (*hash_it).second = NULL; + - moveToReady(waiting_inst); ++ memDepHash.erase(hash_it); ++ MemDepEntry::memdep_erase++; +} + +template +void - MemDepUnit::issue(DynInstPtr &inst) ++MemDepUnit::completeBarrier(DynInstPtr &inst) +{ - assert(readyInsts.find(inst->seqNum) != readyInsts.end()); - - DPRINTF(MemDepUnit, "MemDepUnit: Issuing instruction PC %#x.\n", - inst->readPC()); - - // Remove the instruction from the ready list. - readyInsts.erase(inst->seqNum); - - depPred.issued(inst->readPC(), inst->seqNum, inst->isStore()); ++ wakeDependents(inst); ++ completed(inst); ++ ++ InstSeqNum barr_sn = inst->seqNum; ++ ++ if (inst->isMemBarrier()) { ++ assert(loadBarrier && storeBarrier); ++ if (loadBarrierSN == barr_sn) ++ loadBarrier = false; ++ if (storeBarrierSN == barr_sn) ++ storeBarrier = false; ++ } else if (inst->isWriteBarrier()) { ++ assert(storeBarrier); ++ if (storeBarrierSN == barr_sn) ++ storeBarrier = false; ++ } +} + +template +void +MemDepUnit::wakeDependents(DynInstPtr &inst) +{ - // Only stores have dependents. - if (!inst->isStore()) { ++ // Only stores and barriers have dependents. ++ if (!inst->isStore() && !inst->isMemBarrier() && !inst->isWriteBarrier()) { + return; + } + - // Wake any dependencies. - sd_it_t sd_it = storeDependents.find(inst->seqNum); ++ MemDepEntryPtr inst_entry = findInHash(inst); + - // If there's no entry, then return. Really there should only be - // no entry if the instruction is a load. - if (sd_it == storeDependents.end()) { - DPRINTF(MemDepUnit, "MemDepUnit: Instruction PC %#x, sequence " - "number %i has no dependents.\n", - inst->readPC(), inst->seqNum); ++ for (int i = 0; i < inst_entry->dependInsts.size(); ++i ) { ++ MemDepEntryPtr woken_inst = inst_entry->dependInsts[i]; + - return; - } - - for (int i = 0; i < (*sd_it).second.size(); ++i ) { - dep_it_t woken_inst = (*sd_it).second[i]; - - DPRINTF(MemDepUnit, "MemDepUnit: Waking up a dependent inst, " - "sequence number %i.\n", - (*woken_inst).seqNum); - #if 0 - // Should we have reached instructions that are actually squashed, - // there will be no more useful instructions in this dependency - // list. Break out early. - if (waitingInsts.find(woken_inst) == waitingInsts.end()) { - DPRINTF(MemDepUnit, "MemDepUnit: Dependents on inst PC %#x " - "are squashed, starting at SN %i. Breaking early.\n", - inst->readPC(), woken_inst); - break; ++ if (!woken_inst->inst) { ++ // Potentially removed mem dep entries could be on this list ++ continue; + } - #endif + - if ((*woken_inst).regsReady) { ++ DPRINTF(MemDepUnit, "Waking up a dependent inst, " ++ "[sn:%lli].\n", ++ woken_inst->inst->seqNum); ++ ++ if (woken_inst->regsReady && !woken_inst->squashed) { + moveToReady(woken_inst); + } else { - (*woken_inst).memDepReady = true; ++ woken_inst->memDepReady = true; + } + } + - storeDependents.erase(sd_it); ++ inst_entry->dependInsts.clear(); +} + +template +void - MemDepUnit::squash(const InstSeqNum &squashed_num) ++MemDepUnit::squash(const InstSeqNum &squashed_num, ++ unsigned tid) +{ - - if (!waitingInsts.empty()) { - dep_it_t waiting_it = waitingInsts.end(); - - --waiting_it; - - // Remove entries from the renamed list as long as we haven't reached - // the end and the entries continue to be younger than the squashed. - while (!waitingInsts.empty() && - (*waiting_it).seqNum > squashed_num) - { - if (!(*waiting_it).memDepReady && - (*waiting_it).storeDep != storeDependents.end()) { - sd_it_t sd_it = (*waiting_it).storeDep; - - // Make sure the iterator that the store has pointing - // back is actually to this instruction. - assert((*sd_it).second.back() == waiting_it); - - // Now remove this from the store's list of dependent - // instructions. - (*sd_it).second.pop_back(); ++ if (!instsToReplay.empty()) { ++ ListIt replay_it = instsToReplay.begin(); ++ while (replay_it != instsToReplay.end()) { ++ if ((*replay_it)->threadNumber == tid && ++ (*replay_it)->seqNum > squashed_num) { ++ instsToReplay.erase(replay_it++); ++ } else { ++ ++replay_it; + } - - waitingInsts.erase(waiting_it--); + } + } + - if (!readyInsts.empty()) { - sn_it_t ready_it = readyInsts.end(); ++ ListIt squash_it = instList[tid].end(); ++ --squash_it; + - --ready_it; ++ MemDepHashIt hash_it; + - // Same for the ready list. - while (!readyInsts.empty() && - (*ready_it) > squashed_num) - { - readyInsts.erase(ready_it--); - } - } ++ while (!instList[tid].empty() && ++ (*squash_it)->seqNum > squashed_num) { + - if (!storeDependents.empty()) { - sd_it_t dep_it = storeDependents.end(); ++ DPRINTF(MemDepUnit, "Squashing inst [sn:%lli]\n", ++ (*squash_it)->seqNum); + - --dep_it; ++ hash_it = memDepHash.find((*squash_it)->seqNum); + - // Same for the dependencies list. - while (!storeDependents.empty() && - (*dep_it).first > squashed_num) - { - // This store's list of dependent instructions should be empty. - assert((*dep_it).second.empty()); ++ assert(hash_it != memDepHash.end()); + - storeDependents.erase(dep_it--); - } ++ (*hash_it).second->squashed = true; ++ ++ (*hash_it).second = NULL; ++ ++ memDepHash.erase(hash_it); ++ MemDepEntry::memdep_erase++; ++ ++ instList[tid].erase(squash_it--); + } + + // Tell the dependency predictor to squash as well. - depPred.squash(squashed_num); ++ depPred.squash(squashed_num, tid); +} + +template +void +MemDepUnit::violation(DynInstPtr &store_inst, + DynInstPtr &violating_load) +{ - DPRINTF(MemDepUnit, "MemDepUnit: Passing violating PCs to store sets," ++ DPRINTF(MemDepUnit, "Passing violating PCs to store sets," + " load: %#x, store: %#x\n", violating_load->readPC(), + store_inst->readPC()); + // Tell the memory dependence unit of the violation. + depPred.violation(violating_load->readPC(), store_inst->readPC()); +} + ++template ++void ++MemDepUnit::issue(DynInstPtr &inst) ++{ ++ DPRINTF(MemDepUnit, "Issuing instruction PC %#x [sn:%lli].\n", ++ inst->readPC(), inst->seqNum); ++ ++ depPred.issued(inst->readPC(), inst->seqNum, inst->isStore()); ++} ++ ++template ++inline typename MemDepUnit::MemDepEntryPtr & ++MemDepUnit::findInHash(const DynInstPtr &inst) ++{ ++ MemDepHashIt hash_it = memDepHash.find(inst->seqNum); ++ ++ assert(hash_it != memDepHash.end()); ++ ++ return (*hash_it).second; ++} ++ +template +inline void - MemDepUnit::moveToReady(dep_it_t &woken_inst) ++MemDepUnit::moveToReady(MemDepEntryPtr &woken_inst_entry) ++{ ++ DPRINTF(MemDepUnit, "Adding instruction [sn:%lli] " ++ "to the ready list.\n", woken_inst_entry->inst->seqNum); ++ ++ assert(!woken_inst_entry->squashed); ++ ++ iqPtr->addReadyMemInst(woken_inst_entry->inst); ++} ++ ++ ++template ++void ++MemDepUnit::dumpLists() +{ - DPRINTF(MemDepUnit, "MemDepUnit: Adding instruction sequence number %i " - "to the ready list.\n", (*woken_inst).seqNum); ++ for (unsigned tid=0; tid < Impl::MaxThreads; tid++) { ++ cprintf("Instruction list %i size: %i\n", ++ tid, instList[tid].size()); ++ ++ ListIt inst_list_it = instList[tid].begin(); ++ int num = 0; ++ ++ while (inst_list_it != instList[tid].end()) { ++ cprintf("Instruction:%i\nPC:%#x\n[sn:%i]\n[tid:%i]\nIssued:%i\n" ++ "Squashed:%i\n\n", ++ num, (*inst_list_it)->readPC(), ++ (*inst_list_it)->seqNum, ++ (*inst_list_it)->threadNumber, ++ (*inst_list_it)->isIssued(), ++ (*inst_list_it)->isSquashed()); ++ inst_list_it++; ++ ++num; ++ } ++ } + - // Add it to the ready list. - readyInsts.insert((*woken_inst).seqNum); ++ cprintf("Memory dependence hash size: %i\n", memDepHash.size()); + - // Remove it from the waiting instructions. - waitingInsts.erase(woken_inst); ++ cprintf("Memory dependence entries: %i\n", MemDepEntry::memdep_count); +} diff --cc src/cpu/o3/ras.cc index 0a7d6ca63,000000000..0b3ea4918 mode 100644,000000..100644 --- a/src/cpu/o3/ras.cc +++ b/src/cpu/o3/ras.cc @@@ -1,73 -1,0 +1,82 @@@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cpu/o3/ras.hh" + - ReturnAddrStack::ReturnAddrStack(unsigned _numEntries) - : numEntries(_numEntries), usedEntries(0), - tos(0) ++void ++ReturnAddrStack::init(unsigned _numEntries) +{ - addrStack = new Addr[numEntries]; ++ numEntries = _numEntries; ++ usedEntries = 0; ++ tos = 0; ++ ++ addrStack.resize(numEntries); ++ ++ for (int i = 0; i < numEntries; ++i) ++ addrStack[i] = 0; ++} + ++void ++ReturnAddrStack::reset() ++{ ++ usedEntries = 0; ++ tos = 0; + for (int i = 0; i < numEntries; ++i) + addrStack[i] = 0; +} + +void +ReturnAddrStack::push(const Addr &return_addr) +{ + incrTos(); + + addrStack[tos] = return_addr; + + if (usedEntries != numEntries) { + ++usedEntries; + } +} + +void +ReturnAddrStack::pop() +{ - // Not sure it's possible to really track usedEntries properly. - // assert(usedEntries > 0); - + if (usedEntries > 0) { + --usedEntries; + } + + decrTos(); +} + +void +ReturnAddrStack::restore(unsigned top_entry_idx, + const Addr &restored_target) +{ + tos = top_entry_idx; + + addrStack[tos] = restored_target; +} diff --cc src/cpu/o3/ras.hh index 46d98181e,000000000..27e7c2df4 mode 100644,000000..100644 --- a/src/cpu/o3/ras.hh +++ b/src/cpu/o3/ras.hh @@@ -1,68 -1,0 +1,95 @@@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + - #ifndef __CPU_O3_CPU_RAS_HH__ - #define __CPU_O3_CPU_RAS_HH__ ++#ifndef __CPU_O3_RAS_HH__ ++#define __CPU_O3_RAS_HH__ + +// For Addr type. +#include "arch/isa_traits.hh" ++#include + ++/** Return address stack class, implements a simple RAS. */ +class ReturnAddrStack +{ + public: - ReturnAddrStack(unsigned numEntries); ++ /** Creates a return address stack, but init() must be called prior to ++ * use. ++ */ ++ ReturnAddrStack() {} + ++ /** Initializes RAS with a specified number of entries. ++ * @param numEntries Number of entries in the RAS. ++ */ ++ void init(unsigned numEntries); ++ ++ void reset(); ++ ++ /** Returns the top address on the RAS. */ + Addr top() + { return addrStack[tos]; } + ++ /** Returns the index of the top of the RAS. */ + unsigned topIdx() + { return tos; } + ++ /** Pushes an address onto the RAS. */ + void push(const Addr &return_addr); + ++ /** Pops the top address from the RAS. */ + void pop(); + ++ /** Changes index to the top of the RAS, and replaces the top address with ++ * a new target. ++ * @param top_entry_idx The index of the RAS that will now be the top. ++ * @param restored_target The new target address of the new top of the RAS. ++ */ + void restore(unsigned top_entry_idx, const Addr &restored_target); + + private: ++ /** Increments the top of stack index. */ + inline void incrTos() + { if (++tos == numEntries) tos = 0; } + ++ /** Decrements the top of stack index. */ + inline void decrTos() + { tos = (tos == 0 ? numEntries - 1 : tos - 1); } + - Addr *addrStack; ++ /** The RAS itself. */ ++ std::vector addrStack; + ++ /** The number of entries in the RAS. */ + unsigned numEntries; + ++ /** The number of used entries in the RAS. */ + unsigned usedEntries; + ++ /** The top of stack index. */ + unsigned tos; +}; + - #endif // __CPU_O3_CPU_RAS_HH__ ++#endif // __CPU_O3_RAS_HH__ diff --cc src/cpu/o3/regfile.hh index a5cfa8f3c,000000000..3350903db mode 100644,000000..100644 --- a/src/cpu/o3/regfile.hh +++ b/src/cpu/o3/regfile.hh @@@ -1,299 -1,0 +1,289 @@@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + - #ifndef __CPU_O3_CPU_REGFILE_HH__ - #define __CPU_O3_CPU_REGFILE_HH__ - - // @todo: Destructor ++#ifndef __CPU_O3_REGFILE_HH__ ++#define __CPU_O3_REGFILE_HH__ + +#include "arch/isa_traits.hh" +#include "arch/faults.hh" +#include "base/trace.hh" +#include "config/full_system.hh" +#include "cpu/o3/comm.hh" + +#if FULL_SYSTEM +#include "kern/kernel_stats.hh" + +#endif + - // This really only depends on the ISA, and not the Impl. It might be nicer - // to see if I can make it depend on nothing... - // Things that are in the ifdef FULL_SYSTEM are pretty dependent on the ISA, - // and should go in the AlphaFullCPU. ++#include + ++/** ++ * Simple physical register file class. ++ * This really only depends on the ISA, and not the Impl. Things that are ++ * in the ifdef FULL_SYSTEM are pretty dependent on the ISA, and probably ++ * should go in the AlphaFullCPU. ++ */ +template +class PhysRegFile +{ + protected: + typedef TheISA::IntReg IntReg; + typedef TheISA::FloatReg FloatReg; + typedef TheISA::MiscRegFile MiscRegFile; + typedef TheISA::MiscReg MiscReg; ++ // Note that most of the definitions of the IntReg, FloatReg, etc. exist ++ // within the Impl/ISA class and not within this PhysRegFile class. + - //Note that most of the definitions of the IntReg, FloatReg, etc. exist - //within the Impl/ISA class and not within this PhysRegFile class. - - //Will need some way to allow stuff like swap_palshadow to access the - //correct registers. Might require code changes to swap_palshadow and - //other execution contexts. - - //Will make these registers public for now, but they probably should - //be private eventually with some accessor functions. ++ // Will make these registers public for now, but they probably should ++ // be private eventually with some accessor functions. + public: + typedef typename Impl::FullCPU FullCPU; + ++ /** ++ * Constructs a physical register file with the specified amount of ++ * integer and floating point registers. ++ */ + PhysRegFile(unsigned _numPhysicalIntRegs, + unsigned _numPhysicalFloatRegs); + + //Everything below should be pretty well identical to the normal + //register file that exists within AlphaISA class. + //The duplication is unfortunate but it's better than having + //different ways to access certain registers. + + //Add these in later when everything else is in place +// void serialize(std::ostream &os); +// void unserialize(Checkpoint *cp, const std::string §ion); + ++ /** Reads an integer register. */ + uint64_t readIntReg(PhysRegIndex reg_idx) + { + assert(reg_idx < numPhysicalIntRegs); + + DPRINTF(IEW, "RegFile: Access to int register %i, has data " + "%i\n", int(reg_idx), intRegFile[reg_idx]); + return intRegFile[reg_idx]; + } + + FloatReg readFloatReg(PhysRegIndex reg_idx, int width) + { + // Remove the base Float reg dependency. + reg_idx = reg_idx - numPhysicalIntRegs; + + assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs); + + FloatReg floatReg = floatRegFile.readReg(reg_idx, width); + + DPRINTF(IEW, "RegFile: Access to %d byte float register %i, has " + "data %8.8d\n", int(reg_idx), (double)floatReg); + + return floatReg; + } + ++ /** Reads a floating point register (double precision). */ + FloatReg readFloatReg(PhysRegIndex reg_idx) + { + // Remove the base Float reg dependency. + reg_idx = reg_idx - numPhysicalIntRegs; + + assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs); + + FloatReg floatReg = floatRegFile.readReg(reg_idx); + + DPRINTF(IEW, "RegFile: Access to float register %i, has " + "data %8.8d\n", int(reg_idx), (double)floatReg); + + return floatReg; + } + ++ /** Reads a floating point register as an integer. */ + FloatRegBits readFloatRegBits(PhysRegIndex reg_idx, int width) + { + // Remove the base Float reg dependency. + reg_idx = reg_idx - numPhysicalIntRegs; + + assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs); + + FloatRegBits floatRegBits = floatRegFile.readRegBits(reg_idx, width); + + DPRINTF(IEW, "RegFile: Access to %d byte float register %i as int, " + "has data %lli\n", int(reg_idx), (uint64_t)floatRegBits); + + return floatRegBits; + } + + FloatRegBits readFloatRegBits(PhysRegIndex reg_idx) + { + // Remove the base Float reg dependency. + reg_idx = reg_idx - numPhysicalIntRegs; + + assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs); + + FloatRegBits floatRegBits = floatRegFile.readRegBits(reg_idx); + + DPRINTF(IEW, "RegFile: Access to float register %i as int, " + "has data %lli\n", int(reg_idx), (uint64_t)floatRegBits); + + return floatRegBits; + } + ++ /** Sets an integer register to the given value. */ + void setIntReg(PhysRegIndex reg_idx, uint64_t val) + { + assert(reg_idx < numPhysicalIntRegs); + + DPRINTF(IEW, "RegFile: Setting int register %i to %lli\n", + int(reg_idx), val); + - intRegFile[reg_idx] = val; ++ if (reg_idx != TheISA::ZeroReg) ++ intRegFile[reg_idx] = val; + } + ++ /** Sets a single precision floating point register to the given value. */ + void setFloatReg(PhysRegIndex reg_idx, FloatReg val, int width) + { + // Remove the base Float reg dependency. + reg_idx = reg_idx - numPhysicalIntRegs; + + assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs); + + DPRINTF(IEW, "RegFile: Setting float register %i to %8.8d\n", + int(reg_idx), (double)val); + - floatRegFile.setReg(reg_idx, val, width); ++ if (reg_idx != TheISA::ZeroReg) ++ floatRegFile.setReg(reg_idx, val, width); + } + ++ /** Sets a double precision floating point register to the given value. */ + void setFloatReg(PhysRegIndex reg_idx, FloatReg val) + { + // Remove the base Float reg dependency. + reg_idx = reg_idx - numPhysicalIntRegs; + + assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs); + + DPRINTF(IEW, "RegFile: Setting float register %i to %8.8d\n", + int(reg_idx), (double)val); + - floatRegFile.setReg(reg_idx, val); ++ if (reg_idx != TheISA::ZeroReg) ++ floatRegFile.setReg(reg_idx, val); + } + ++ /** Sets a floating point register to the given integer value. */ + void setFloatRegBits(PhysRegIndex reg_idx, FloatRegBits val, int width) + { + // Remove the base Float reg dependency. + reg_idx = reg_idx - numPhysicalIntRegs; + + assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs); + + DPRINTF(IEW, "RegFile: Setting float register %i to %lli\n", + int(reg_idx), (uint64_t)val); + + floatRegFile.setRegBits(reg_idx, val, width); + } + + void setFloatRegBits(PhysRegIndex reg_idx, FloatRegBits val) + { + // Remove the base Float reg dependency. + reg_idx = reg_idx - numPhysicalIntRegs; + + assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs); + + DPRINTF(IEW, "RegFile: Setting float register %i to %lli\n", + int(reg_idx), (uint64_t)val); - - floatRegFile.setRegBits(reg_idx, val); - } - - uint64_t readPC() - { - return pc; - } - - void setPC(uint64_t val) - { - pc = val; + } + - void setNextPC(uint64_t val) ++ MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault, ++ unsigned thread_id) + { - npc = val; ++ return miscRegs[thread_id].readRegWithEffect(misc_reg, fault, ++ cpu->xcBase(thread_id)); + } + - //Consider leaving this stuff and below in some implementation specific - //file as opposed to the general register file. Or have a derived class. - MiscReg readMiscReg(int misc_reg) ++ Fault setMiscReg(int misc_reg, const MiscReg &val, unsigned thread_id) + { - // Dummy function for now. - // @todo: Fix this once proxy XC is used. - return 0; ++ return miscRegs[thread_id].setReg(misc_reg, val); + } + - Fault setMiscReg(int misc_reg, const MiscReg &val) ++ Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val, ++ unsigned thread_id) + { - // Dummy function for now. - // @todo: Fix this once proxy XC is used. - return NoFault; ++ return miscRegs[thread_id].setRegWithEffect(misc_reg, val, ++ cpu->xcBase(thread_id)); + } + +#if FULL_SYSTEM + int readIntrFlag() { return intrflag; } ++ /** Sets an interrupt flag. */ + void setIntrFlag(int val) { intrflag = val; } +#endif + - // These should be private eventually, but will be public for now - // so that I can hack around the initregs issue. + public: + /** (signed) integer register file. */ - IntReg *intRegFile; ++ std::vector intRegFile; + + /** Floating point register file. */ - FloatReg *floatRegFile; ++ std::vector floatRegFile; + + /** Miscellaneous register file. */ - MiscRegFile miscRegs; - - /** Program counter. */ - Addr pc; - - /** Next-cycle program counter. */ - Addr npc; ++ MiscRegFile miscRegs[Impl::MaxThreads]; + +#if FULL_SYSTEM + private: - // This is ISA specifc stuff; remove it eventually once ISAImpl is used - // IntReg palregs[NumIntRegs]; // PAL shadow registers + int intrflag; // interrupt flag - bool pal_shadow; // using pal_shadow registers +#endif + + private: ++ /** CPU pointer. */ + FullCPU *cpu; + + public: ++ /** Sets the CPU pointer. */ + void setCPU(FullCPU *cpu_ptr) { cpu = cpu_ptr; } + ++ /** Number of physical integer registers. */ + unsigned numPhysicalIntRegs; ++ /** Number of physical floating point registers. */ + unsigned numPhysicalFloatRegs; +}; + +template +PhysRegFile::PhysRegFile(unsigned _numPhysicalIntRegs, + unsigned _numPhysicalFloatRegs) + : numPhysicalIntRegs(_numPhysicalIntRegs), + numPhysicalFloatRegs(_numPhysicalFloatRegs) +{ - intRegFile = new IntReg[numPhysicalIntRegs]; - floatRegFile = new FloatReg[numPhysicalFloatRegs]; ++ intRegFile.resize(numPhysicalIntRegs); ++ floatRegFile.resize(numPhysicalFloatRegs); + - memset(intRegFile, 0, sizeof(*intRegFile)); - memset(floatRegFile, 0, sizeof(*floatRegFile)); ++ //memset(intRegFile, 0, sizeof(*intRegFile)); ++ //memset(floatRegFile, 0, sizeof(*floatRegFile)); +} + - #endif // __CPU_O3_CPU_REGFILE_HH__ ++#endif diff --cc src/cpu/o3/rename.cc index 6e9ee23da,000000000..4dc3bf6b2 mode 100644,000000..100644 --- a/src/cpu/o3/rename.cc +++ b/src/cpu/o3/rename.cc @@@ -1,33 -1,0 +1,33 @@@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cpu/o3/alpha_dyn_inst.hh" +#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/rename_impl.hh" + - template class SimpleRename; ++template class DefaultRename; diff --cc src/cpu/o3/rename.hh index 07b442964,000000000..3f1a27bb5 mode 100644,000000..100644 --- a/src/cpu/o3/rename.hh +++ b/src/cpu/o3/rename.hh @@@ -1,233 -1,0 +1,462 @@@ +/* - * Copyright (c) 2004-2005 The Regents of The University of Michigan ++ * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + - // Todo: - // Fix up trap and barrier handling. - // May want to have different statuses to differentiate the different stall - // conditions. - - #ifndef __CPU_O3_CPU_SIMPLE_RENAME_HH__ - #define __CPU_O3_CPU_SIMPLE_RENAME_HH__ ++#ifndef __CPU_O3_RENAME_HH__ ++#define __CPU_O3_RENAME_HH__ + +#include + +#include "base/statistics.hh" +#include "base/timebuf.hh" + - // Will need rename maps for both the int reg file and fp reg file. - // Or change rename map class to handle both. (RegFile handles both.) ++/** ++ * DefaultRename handles both single threaded and SMT rename. Its ++ * width is specified by the parameters; each cycle it tries to rename ++ * that many instructions. It holds onto the rename history of all ++ * instructions with destination registers, storing the ++ * arch. register, the new physical register, and the old physical ++ * register, to allow for undoing of mappings if squashing happens, or ++ * freeing up registers upon commit. Rename handles blocking if the ++ * ROB, IQ, or LSQ is going to be full. Rename also handles barriers, ++ * and does so by stalling on the instruction until the ROB is empty ++ * and there are no instructions in flight to the ROB. ++ */ +template - class SimpleRename ++class DefaultRename +{ + public: + // Typedefs from the Impl. + typedef typename Impl::CPUPol CPUPol; + typedef typename Impl::DynInstPtr DynInstPtr; + typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::Params Params; + - typedef typename CPUPol::FetchStruct FetchStruct; ++ // Typedefs from the CPUPol + typedef typename CPUPol::DecodeStruct DecodeStruct; + typedef typename CPUPol::RenameStruct RenameStruct; + typedef typename CPUPol::TimeStruct TimeStruct; - - // Typedefs from the CPUPol + typedef typename CPUPol::FreeList FreeList; + typedef typename CPUPol::RenameMap RenameMap; ++ // These are used only for initialization. ++ typedef typename CPUPol::IEW IEW; ++ typedef typename CPUPol::Commit Commit; + + // Typedefs from the ISA. + typedef TheISA::RegIndex RegIndex; + ++ // A list is used to queue the instructions. Barrier insts must ++ // be added to the front of the list, which is the only reason for ++ // using a list instead of a queue. (Most other stages use a ++ // queue) ++ typedef std::list InstQueue; ++ + public: - // Rename will block if ROB becomes full or issue queue becomes full, - // or there are no free registers to rename to. - // Only case where rename squashes is if IEW squashes. - enum Status { ++ /** Overall rename status. Used to determine if the CPU can ++ * deschedule itself due to a lack of activity. ++ */ ++ enum RenameStatus { ++ Active, ++ Inactive ++ }; ++ ++ /** Individual thread status. */ ++ enum ThreadStatus { + Running, + Idle, ++ StartSquash, + Squashing, + Blocked, + Unblocking, - BarrierStall ++ SerializeStall + }; + + private: - Status _status; ++ /** Rename status. */ ++ RenameStatus _status; ++ ++ /** Per-thread status. */ ++ ThreadStatus renameStatus[Impl::MaxThreads]; + + public: - SimpleRename(Params ¶ms); ++ /** DefaultRename constructor. */ ++ DefaultRename(Params *params); + ++ /** Returns the name of rename. */ ++ std::string name() const; ++ ++ /** Registers statistics. */ + void regStats(); + ++ /** Sets CPU pointer. */ + void setCPU(FullCPU *cpu_ptr); + ++ /** Sets the main backwards communication time buffer pointer. */ + void setTimeBuffer(TimeBuffer *tb_ptr); + ++ /** Sets pointer to time buffer used to communicate to the next stage. */ + void setRenameQueue(TimeBuffer *rq_ptr); + ++ /** Sets pointer to time buffer coming from decode. */ + void setDecodeQueue(TimeBuffer *dq_ptr); + - void setRenameMap(RenameMap *rm_ptr); ++ /** Sets pointer to IEW stage. Used only for initialization. */ ++ void setIEWStage(IEW *iew_stage) ++ { iew_ptr = iew_stage; } ++ ++ /** Sets pointer to commit stage. Used only for initialization. */ ++ void setCommitStage(Commit *commit_stage) ++ { commit_ptr = commit_stage; } ++ ++ private: ++ /** Pointer to IEW stage. Used only for initialization. */ ++ IEW *iew_ptr; ++ ++ /** Pointer to commit stage. Used only for initialization. */ ++ Commit *commit_ptr; + ++ public: ++ /** Initializes variables for the stage. */ ++ void initStage(); ++ ++ /** Sets pointer to list of active threads. */ ++ void setActiveThreads(std::list *at_ptr); ++ ++ /** Sets pointer to rename maps (per-thread structures). */ ++ void setRenameMap(RenameMap rm_ptr[Impl::MaxThreads]); ++ ++ /** Sets pointer to the free list. */ + void setFreeList(FreeList *fl_ptr); + - void dumpHistory(); ++ /** Sets pointer to the scoreboard. */ ++ void setScoreboard(Scoreboard *_scoreboard); + - void tick(); ++ void switchOut(); ++ ++ void doSwitchOut(); ++ ++ void takeOverFrom(); + - void rename(); ++ /** Squashes all instructions in a thread. */ ++ void squash(unsigned tid); ++ ++ /** Ticks rename, which processes all input signals and attempts to rename ++ * as many instructions as possible. ++ */ ++ void tick(); + - void squash(); ++ /** Debugging function used to dump history buffer of renamings. */ ++ void dumpHistory(); + + private: - void block(); ++ /** Determines what to do based on rename's current status. ++ * @param status_change rename() sets this variable if there was a status ++ * change (ie switching from blocking to unblocking). ++ * @param tid Thread id to rename instructions from. ++ */ ++ void rename(bool &status_change, unsigned tid); ++ ++ /** Renames instructions for the given thread. Also handles serializing ++ * instructions. ++ */ ++ void renameInsts(unsigned tid); ++ ++ /** Inserts unused instructions from a given thread into the skid buffer, ++ * to be renamed once rename unblocks. ++ */ ++ void skidInsert(unsigned tid); ++ ++ /** Separates instructions from decode into individual lists of instructions ++ * sorted by thread. ++ */ ++ void sortInsts(); ++ ++ /** Returns if all of the skid buffers are empty. */ ++ bool skidsEmpty(); ++ ++ /** Updates overall rename status based on all of the threads' statuses. */ ++ void updateStatus(); ++ ++ /** Switches rename to blocking, and signals back that rename has become ++ * blocked. ++ * @return Returns true if there is a status change. ++ */ ++ bool block(unsigned tid); ++ ++ /** Switches rename to unblocking if the skid buffer is empty, and signals ++ * back that rename has unblocked. ++ * @return Returns true if there is a status change. ++ */ ++ bool unblock(unsigned tid); + - inline void unblock(); ++ /** Executes actual squash, removing squashed instructions. */ ++ void doSquash(unsigned tid); + - void doSquash(); ++ /** Removes a committed instruction's rename history. */ ++ void removeFromHistory(InstSeqNum inst_seq_num, unsigned tid); + - void removeFromHistory(InstSeqNum inst_seq_num); ++ /** Renames the source registers of an instruction. */ ++ inline void renameSrcRegs(DynInstPtr &inst, unsigned tid); + - inline void renameSrcRegs(DynInstPtr &inst); ++ /** Renames the destination registers of an instruction. */ ++ inline void renameDestRegs(DynInstPtr &inst, unsigned tid); + - inline void renameDestRegs(DynInstPtr &inst); ++ /** Calculates the number of free ROB entries for a specific thread. */ ++ inline int calcFreeROBEntries(unsigned tid); + - inline int calcFreeROBEntries(); ++ /** Calculates the number of free IQ entries for a specific thread. */ ++ inline int calcFreeIQEntries(unsigned tid); + - inline int calcFreeIQEntries(); ++ /** Calculates the number of free LSQ entries for a specific thread. */ ++ inline int calcFreeLSQEntries(unsigned tid); + - /** Holds the previous information for each rename. - * Note that often times the inst may have been deleted, so only access - * the pointer for the address and do not dereference it. ++ /** Returns the number of valid instructions coming from decode. */ ++ unsigned validInsts(); ++ ++ /** Reads signals telling rename to block/unblock. */ ++ void readStallSignals(unsigned tid); ++ ++ /** Checks if any stages are telling rename to block. */ ++ bool checkStall(unsigned tid); ++ ++ void readFreeEntries(unsigned tid); ++ ++ bool checkSignalsAndUpdate(unsigned tid); ++ ++ /** Either serializes on the next instruction available in the InstQueue, ++ * or records that it must serialize on the next instruction to enter ++ * rename. ++ * @param inst_list The list of younger, unprocessed instructions for the ++ * thread that has the serializeAfter instruction. ++ * @param tid The thread id. ++ */ ++ void serializeAfter(InstQueue &inst_list, unsigned tid); ++ ++ /** Holds the information for each destination register rename. It holds ++ * the instruction's sequence number, the arch register, the old physical ++ * register for that arch. register, and the new physical register. + */ + struct RenameHistory { + RenameHistory(InstSeqNum _instSeqNum, RegIndex _archReg, + PhysRegIndex _newPhysReg, PhysRegIndex _prevPhysReg) + : instSeqNum(_instSeqNum), archReg(_archReg), - newPhysReg(_newPhysReg), prevPhysReg(_prevPhysReg), - placeHolder(false) - { - } - - /** Constructor used specifically for cases where a place holder - * rename history entry is being made. - */ - RenameHistory(InstSeqNum _instSeqNum) - : instSeqNum(_instSeqNum), archReg(0), newPhysReg(0), - prevPhysReg(0), placeHolder(true) ++ newPhysReg(_newPhysReg), prevPhysReg(_prevPhysReg) + { + } + ++ /** The sequence number of the instruction that renamed. */ + InstSeqNum instSeqNum; ++ /** The architectural register index that was renamed. */ + RegIndex archReg; ++ /** The new physical register that the arch. register is renamed to. */ + PhysRegIndex newPhysReg; ++ /** The old physical register that the arch. register was renamed to. */ + PhysRegIndex prevPhysReg; - bool placeHolder; + }; + - std::list historyBuffer; ++ /** A per-thread list of all destination register renames, used to either ++ * undo rename mappings or free old physical registers. ++ */ ++ std::list historyBuffer[Impl::MaxThreads]; + - /** CPU interface. */ ++ /** Pointer to CPU. */ + FullCPU *cpu; + - // Interfaces to objects outside of rename. - /** Time buffer interface. */ ++ /** Pointer to main time buffer used for backwards communication. */ + TimeBuffer *timeBuffer; + + /** Wire to get IEW's output from backwards time buffer. */ + typename TimeBuffer::wire fromIEW; + + /** Wire to get commit's output from backwards time buffer. */ + typename TimeBuffer::wire fromCommit; + + /** Wire to write infromation heading to previous stages. */ - // Might not be the best name as not only decode will read it. + typename TimeBuffer::wire toDecode; + + /** Rename instruction queue. */ + TimeBuffer *renameQueue; + + /** Wire to write any information heading to IEW. */ + typename TimeBuffer::wire toIEW; + + /** Decode instruction queue interface. */ + TimeBuffer *decodeQueue; + + /** Wire to get decode's output from decode queue. */ + typename TimeBuffer::wire fromDecode; + ++ /** Queue of all instructions coming from decode this cycle. */ ++ InstQueue insts[Impl::MaxThreads]; ++ + /** Skid buffer between rename and decode. */ - std::queue skidBuffer; ++ InstQueue skidBuffer[Impl::MaxThreads]; + + /** Rename map interface. */ - SimpleRenameMap *renameMap; ++ RenameMap *renameMap[Impl::MaxThreads]; + + /** Free list interface. */ + FreeList *freeList; + ++ /** Pointer to the list of active threads. */ ++ std::list *activeThreads; ++ ++ /** Pointer to the scoreboard. */ ++ Scoreboard *scoreboard; ++ ++ /** Count of instructions in progress that have been sent off to the IQ ++ * and ROB, but are not yet included in their occupancy counts. ++ */ ++ int instsInProgress[Impl::MaxThreads]; ++ ++ /** Variable that tracks if decode has written to the time buffer this ++ * cycle. Used to tell CPU if there is activity this cycle. ++ */ ++ bool wroteToTimeBuffer; ++ ++ /** Structures whose free entries impact the amount of instructions that ++ * can be renamed. ++ */ ++ struct FreeEntries { ++ unsigned iqEntries; ++ unsigned lsqEntries; ++ unsigned robEntries; ++ }; ++ ++ /** Per-thread tracking of the number of free entries of back-end ++ * structures. ++ */ ++ FreeEntries freeEntries[Impl::MaxThreads]; ++ ++ /** Records if the ROB is empty. In SMT mode the ROB may be dynamically ++ * partitioned between threads, so the ROB must tell rename when it is ++ * empty. ++ */ ++ bool emptyROB[Impl::MaxThreads]; ++ ++ /** Source of possible stalls. */ ++ struct Stalls { ++ bool iew; ++ bool commit; ++ }; ++ ++ /** Tracks which stages are telling decode to stall. */ ++ Stalls stalls[Impl::MaxThreads]; ++ ++ /** The serialize instruction that rename has stalled on. */ ++ DynInstPtr serializeInst[Impl::MaxThreads]; ++ ++ /** Records if rename needs to serialize on the next instruction for any ++ * thread. ++ */ ++ bool serializeOnNextInst[Impl::MaxThreads]; ++ + /** Delay between iew and rename, in ticks. */ + int iewToRenameDelay; + + /** Delay between decode and rename, in ticks. */ + int decodeToRenameDelay; + + /** Delay between commit and rename, in ticks. */ + unsigned commitToRenameDelay; + + /** Rename width, in instructions. */ + unsigned renameWidth; + + /** Commit width, in instructions. Used so rename knows how many + * instructions might have freed registers in the previous cycle. + */ + unsigned commitWidth; + - /** The instruction that rename is currently on. It needs to have - * persistent state so that when a stall occurs in the middle of a - * group of instructions, it can restart at the proper instruction. ++ /** The index of the instruction in the time buffer to IEW that rename is ++ * currently using. ++ */ ++ unsigned toIEWIndex; ++ ++ /** Whether or not rename needs to block this cycle. */ ++ bool blockThisCycle; ++ ++ /** The number of threads active in rename. */ ++ unsigned numThreads; ++ ++ /** The maximum skid buffer size. */ ++ unsigned skidBufferMax; ++ ++ /** Enum to record the source of a structure full stall. Can come from ++ * either ROB, IQ, LSQ, and it is priortized in that order. ++ */ ++ enum FullSource { ++ ROB, ++ IQ, ++ LSQ, ++ NONE ++ }; ++ ++ /** Function used to increment the stat that corresponds to the source of ++ * the stall. + */ - unsigned numInst; ++ inline void incrFullStat(const FullSource &source); + ++ /** Stat for total number of cycles spent squashing. */ + Stats::Scalar<> renameSquashCycles; ++ /** Stat for total number of cycles spent idle. */ + Stats::Scalar<> renameIdleCycles; ++ /** Stat for total number of cycles spent blocking. */ + Stats::Scalar<> renameBlockCycles; ++ /** Stat for total number of cycles spent stalling for a serializing inst. */ ++ Stats::Scalar<> renameSerializeStallCycles; ++ /** Stat for total number of cycles spent running normally. */ ++ Stats::Scalar<> renameRunCycles; ++ /** Stat for total number of cycles spent unblocking. */ + Stats::Scalar<> renameUnblockCycles; ++ /** Stat for total number of renamed instructions. */ + Stats::Scalar<> renameRenamedInsts; ++ /** Stat for total number of squashed instructions that rename discards. */ + Stats::Scalar<> renameSquashedInsts; ++ /** Stat for total number of times that the ROB starts a stall in rename. */ + Stats::Scalar<> renameROBFullEvents; ++ /** Stat for total number of times that the IQ starts a stall in rename. */ + Stats::Scalar<> renameIQFullEvents; ++ /** Stat for total number of times that the LSQ starts a stall in rename. */ ++ Stats::Scalar<> renameLSQFullEvents; ++ /** Stat for total number of times that rename runs out of free registers ++ * to use to rename. */ + Stats::Scalar<> renameFullRegistersEvents; ++ /** Stat for total number of renamed destination registers. */ + Stats::Scalar<> renameRenamedOperands; ++ /** Stat for total number of source register rename lookups. */ + Stats::Scalar<> renameRenameLookups; - Stats::Scalar<> renameHBPlaceHolders; ++ /** Stat for total number of committed renaming mappings. */ + Stats::Scalar<> renameCommittedMaps; ++ /** Stat for total number of mappings that were undone due to a squash. */ + Stats::Scalar<> renameUndoneMaps; - Stats::Scalar<> renameValidUndoneMaps; ++ Stats::Scalar<> renamedSerializing; ++ Stats::Scalar<> renamedTempSerializing; ++ Stats::Scalar<> renameSkidInsts; +}; + - #endif // __CPU_O3_CPU_SIMPLE_RENAME_HH__ ++#endif // __CPU_O3_RENAME_HH__ diff --cc src/cpu/o3/rename_impl.hh index 2068b36ab,000000000..b4f1077d1 mode 100644,000000..100644 --- a/src/cpu/o3/rename_impl.hh +++ b/src/cpu/o3/rename_impl.hh @@@ -1,754 -1,0 +1,1279 @@@ +/* - * Copyright (c) 2004-2005 The Regents of The University of Michigan ++ * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include "config/full_system.hh" +#include "cpu/o3/rename.hh" + ++using namespace std; ++ ++template ++DefaultRename::DefaultRename(Params *params) ++ : iewToRenameDelay(params->iewToRenameDelay), ++ decodeToRenameDelay(params->decodeToRenameDelay), ++ commitToRenameDelay(params->commitToRenameDelay), ++ renameWidth(params->renameWidth), ++ commitWidth(params->commitWidth), ++ numThreads(params->numberOfThreads) ++{ ++ _status = Inactive; ++ ++ for (int i=0; i< numThreads; i++) { ++ renameStatus[i] = Idle; ++ ++ freeEntries[i].iqEntries = 0; ++ freeEntries[i].lsqEntries = 0; ++ freeEntries[i].robEntries = 0; ++ ++ stalls[i].iew = false; ++ stalls[i].commit = false; ++ serializeInst[i] = NULL; ++ ++ instsInProgress[i] = 0; ++ ++ emptyROB[i] = true; ++ ++ serializeOnNextInst[i] = false; ++ } ++ ++ // @todo: Make into a parameter. ++ skidBufferMax = (2 * (iewToRenameDelay * params->decodeWidth)) + renameWidth; ++} ++ +template - SimpleRename::SimpleRename(Params ¶ms) - : iewToRenameDelay(params.iewToRenameDelay), - decodeToRenameDelay(params.decodeToRenameDelay), - commitToRenameDelay(params.commitToRenameDelay), - renameWidth(params.renameWidth), - commitWidth(params.commitWidth), - numInst(0) ++std::string ++DefaultRename::name() const +{ - _status = Idle; ++ return cpu->name() + ".rename"; +} + +template +void - SimpleRename::regStats() ++DefaultRename::regStats() +{ + renameSquashCycles - .name(name() + ".renameSquashCycles") ++ .name(name() + ".RENAME:SquashCycles") + .desc("Number of cycles rename is squashing") + .prereq(renameSquashCycles); + renameIdleCycles - .name(name() + ".renameIdleCycles") ++ .name(name() + ".RENAME:IdleCycles") + .desc("Number of cycles rename is idle") + .prereq(renameIdleCycles); + renameBlockCycles - .name(name() + ".renameBlockCycles") ++ .name(name() + ".RENAME:BlockCycles") + .desc("Number of cycles rename is blocking") + .prereq(renameBlockCycles); ++ renameSerializeStallCycles ++ .name(name() + ".RENAME:serializeStallCycles") ++ .desc("count of cycles rename stalled for serializing inst") ++ .flags(Stats::total); ++ renameRunCycles ++ .name(name() + ".RENAME:RunCycles") ++ .desc("Number of cycles rename is running") ++ .prereq(renameIdleCycles); + renameUnblockCycles - .name(name() + ".renameUnblockCycles") ++ .name(name() + ".RENAME:UnblockCycles") + .desc("Number of cycles rename is unblocking") + .prereq(renameUnblockCycles); + renameRenamedInsts - .name(name() + ".renameRenamedInsts") ++ .name(name() + ".RENAME:RenamedInsts") + .desc("Number of instructions processed by rename") + .prereq(renameRenamedInsts); + renameSquashedInsts - .name(name() + ".renameSquashedInsts") ++ .name(name() + ".RENAME:SquashedInsts") + .desc("Number of squashed instructions processed by rename") + .prereq(renameSquashedInsts); + renameROBFullEvents - .name(name() + ".renameROBFullEvents") - .desc("Number of times rename has considered the ROB 'full'") ++ .name(name() + ".RENAME:ROBFullEvents") ++ .desc("Number of times rename has blocked due to ROB full") + .prereq(renameROBFullEvents); + renameIQFullEvents - .name(name() + ".renameIQFullEvents") - .desc("Number of times rename has considered the IQ 'full'") ++ .name(name() + ".RENAME:IQFullEvents") ++ .desc("Number of times rename has blocked due to IQ full") + .prereq(renameIQFullEvents); ++ renameLSQFullEvents ++ .name(name() + ".RENAME:LSQFullEvents") ++ .desc("Number of times rename has blocked due to LSQ full") ++ .prereq(renameLSQFullEvents); + renameFullRegistersEvents - .name(name() + ".renameFullRegisterEvents") ++ .name(name() + ".RENAME:FullRegisterEvents") + .desc("Number of times there has been no free registers") + .prereq(renameFullRegistersEvents); + renameRenamedOperands - .name(name() + ".renameRenamedOperands") ++ .name(name() + ".RENAME:RenamedOperands") + .desc("Number of destination operands rename has renamed") + .prereq(renameRenamedOperands); + renameRenameLookups - .name(name() + ".renameRenameLookups") ++ .name(name() + ".RENAME:RenameLookups") + .desc("Number of register rename lookups that rename has made") + .prereq(renameRenameLookups); - renameHBPlaceHolders - .name(name() + ".renameHBPlaceHolders") - .desc("Number of place holders added to the history buffer") - .prereq(renameHBPlaceHolders); + renameCommittedMaps - .name(name() + ".renameCommittedMaps") ++ .name(name() + ".RENAME:CommittedMaps") + .desc("Number of HB maps that are committed") + .prereq(renameCommittedMaps); + renameUndoneMaps - .name(name() + ".renameUndoneMaps") ++ .name(name() + ".RENAME:UndoneMaps") + .desc("Number of HB maps that are undone due to squashing") + .prereq(renameUndoneMaps); - renameValidUndoneMaps - .name(name() + ".renameValidUndoneMaps") - .desc("Number of HB maps that are undone, and are not place holders") - .prereq(renameValidUndoneMaps); ++ renamedSerializing ++ .name(name() + ".RENAME:serializingInsts") ++ .desc("count of serializing insts renamed") ++ .flags(Stats::total) ++ ; ++ renamedTempSerializing ++ .name(name() + ".RENAME:tempSerializingInsts") ++ .desc("count of temporary serializing insts renamed") ++ .flags(Stats::total) ++ ; ++ renameSkidInsts ++ .name(name() + ".RENAME:skidInsts") ++ .desc("count of insts added to the skid buffer") ++ .flags(Stats::total) ++ ; +} + +template +void - SimpleRename::setCPU(FullCPU *cpu_ptr) ++DefaultRename::setCPU(FullCPU *cpu_ptr) +{ - DPRINTF(Rename, "Rename: Setting CPU pointer.\n"); ++ DPRINTF(Rename, "Setting CPU pointer.\n"); + cpu = cpu_ptr; +} + +template +void - SimpleRename::setTimeBuffer(TimeBuffer *tb_ptr) ++DefaultRename::setTimeBuffer(TimeBuffer *tb_ptr) +{ - DPRINTF(Rename, "Rename: Setting time buffer pointer.\n"); ++ DPRINTF(Rename, "Setting time buffer pointer.\n"); + timeBuffer = tb_ptr; + + // Setup wire to read information from time buffer, from IEW stage. + fromIEW = timeBuffer->getWire(-iewToRenameDelay); + + // Setup wire to read infromation from time buffer, from commit stage. + fromCommit = timeBuffer->getWire(-commitToRenameDelay); + + // Setup wire to write information to previous stages. + toDecode = timeBuffer->getWire(0); +} + +template +void - SimpleRename::setRenameQueue(TimeBuffer *rq_ptr) ++DefaultRename::setRenameQueue(TimeBuffer *rq_ptr) +{ - DPRINTF(Rename, "Rename: Setting rename queue pointer.\n"); ++ DPRINTF(Rename, "Setting rename queue pointer.\n"); + renameQueue = rq_ptr; + + // Setup wire to write information to future stages. + toIEW = renameQueue->getWire(0); +} + +template +void - SimpleRename::setDecodeQueue(TimeBuffer *dq_ptr) ++DefaultRename::setDecodeQueue(TimeBuffer *dq_ptr) +{ - DPRINTF(Rename, "Rename: Setting decode queue pointer.\n"); ++ DPRINTF(Rename, "Setting decode queue pointer.\n"); + decodeQueue = dq_ptr; + + // Setup wire to get information from decode. + fromDecode = decodeQueue->getWire(-decodeToRenameDelay); +} + +template +void - SimpleRename::setRenameMap(RenameMap *rm_ptr) ++DefaultRename::initStage() ++{ ++ // Grab the number of free entries directly from the stages. ++ for (int tid=0; tid < numThreads; tid++) { ++ freeEntries[tid].iqEntries = iew_ptr->instQueue.numFreeEntries(tid); ++ freeEntries[tid].lsqEntries = iew_ptr->ldstQueue.numFreeEntries(tid); ++ freeEntries[tid].robEntries = commit_ptr->numROBFreeEntries(tid); ++ emptyROB[tid] = true; ++ } ++} ++ ++template ++void ++DefaultRename::setActiveThreads(list *at_ptr) +{ - DPRINTF(Rename, "Rename: Setting rename map pointer.\n"); - renameMap = rm_ptr; ++ DPRINTF(Rename, "Setting active threads list pointer.\n"); ++ activeThreads = at_ptr; +} + ++ +template +void - SimpleRename::setFreeList(FreeList *fl_ptr) ++DefaultRename::setRenameMap(RenameMap rm_ptr[]) +{ - DPRINTF(Rename, "Rename: Setting free list pointer.\n"); ++ DPRINTF(Rename, "Setting rename map pointers.\n"); ++ ++ for (int i=0; i ++void ++DefaultRename::setFreeList(FreeList *fl_ptr) ++{ ++ DPRINTF(Rename, "Setting free list pointer.\n"); + freeList = fl_ptr; +} + ++template ++void ++DefaultRename::setScoreboard(Scoreboard *_scoreboard) ++{ ++ DPRINTF(Rename, "Setting scoreboard pointer.\n"); ++ scoreboard = _scoreboard; ++} ++ +template +void - SimpleRename::dumpHistory() ++DefaultRename::switchOut() +{ - typename list::iterator buf_it = historyBuffer.begin(); ++ cpu->signalSwitched(); ++} ++ ++template ++void ++DefaultRename::doSwitchOut() ++{ ++ for (int i = 0; i < numThreads; i++) { ++ typename list::iterator hb_it = historyBuffer[i].begin(); ++ ++ while (!historyBuffer[i].empty()) { ++ assert(hb_it != historyBuffer[i].end()); + - while (buf_it != historyBuffer.end()) - { - cprintf("Seq num: %i\nArch reg: %i New phys reg: %i Old phys " - "reg: %i\n", (*buf_it).instSeqNum, (int)(*buf_it).archReg, - (int)(*buf_it).newPhysReg, (int)(*buf_it).prevPhysReg); ++ DPRINTF(Rename, "[tid:%u]: Removing history entry with sequence " ++ "number %i.\n", i, (*hb_it).instSeqNum); + - buf_it++; ++ // Tell the rename map to set the architected register to the ++ // previous physical register that it was renamed to. ++ renameMap[i]->setEntry(hb_it->archReg, hb_it->prevPhysReg); ++ ++ // Put the renamed physical register back on the free list. ++ freeList->addReg(hb_it->newPhysReg); ++ ++ historyBuffer[i].erase(hb_it++); ++ } ++ insts[i].clear(); ++ skidBuffer[i].clear(); + } +} + +template +void - SimpleRename::block() ++DefaultRename::takeOverFrom() +{ - DPRINTF(Rename, "Rename: Blocking.\n"); - // Set status to Blocked. - _status = Blocked; ++ _status = Inactive; ++ initStage(); + - // Add the current inputs onto the skid buffer, so they can be - // reprocessed when this stage unblocks. - skidBuffer.push(*fromDecode); ++ // Reset all state prior to taking over from the other CPU. ++ for (int i=0; i< numThreads; i++) { ++ renameStatus[i] = Idle; ++ ++ stalls[i].iew = false; ++ stalls[i].commit = false; ++ serializeInst[i] = NULL; ++ ++ instsInProgress[i] = 0; ++ ++ emptyROB[i] = true; + - // Note that this stage only signals previous stages to stall when - // it is the cause of the stall originates at this stage. Otherwise - // the previous stages are expected to check all possible stall signals. ++ serializeOnNextInst[i] = false; ++ } +} + +template - inline void - SimpleRename::unblock() - { - DPRINTF(Rename, "Rename: Read instructions out of skid buffer this " - "cycle.\n"); - // Remove the now processed instructions from the skid buffer. - skidBuffer.pop(); - - // If there's still information in the skid buffer, then - // continue to tell previous stages to stall. They will be - // able to restart once the skid buffer is empty. - if (!skidBuffer.empty()) { - toDecode->renameInfo.stall = true; - } else { - DPRINTF(Rename, "Rename: Done unblocking.\n"); - _status = Running; ++void ++DefaultRename::squash(unsigned tid) ++{ ++ DPRINTF(Rename, "[tid:%u]: Squashing instructions.\n",tid); ++ ++ // Clear the stall signal if rename was blocked or unblocking before. ++ // If it still needs to block, the blocking should happen the next ++ // cycle and there should be space to hold everything due to the squash. ++ if (renameStatus[tid] == Blocked || ++ renameStatus[tid] == Unblocking || ++ renameStatus[tid] == SerializeStall) { ++#if 0 ++ // In syscall emulation, we can have both a block and a squash due ++ // to a syscall in the same cycle. This would cause both signals to ++ // be high. This shouldn't happen in full system. ++ if (toDecode->renameBlock[tid]) { ++ toDecode->renameBlock[tid] = 0; ++ } else { ++ toDecode->renameUnblock[tid] = 1; ++ } ++#else ++ toDecode->renameUnblock[tid] = 1; ++#endif ++ serializeInst[tid] = NULL; ++ } ++ ++ // Set the status to Squashing. ++ renameStatus[tid] = Squashing; ++ ++ // Squash any instructions from decode. ++ unsigned squashCount = 0; ++ ++ for (int i=0; isize; i++) { ++ if (fromDecode->insts[i]->threadNumber == tid) { ++ fromDecode->insts[i]->squashed = true; ++ wroteToTimeBuffer = true; ++ squashCount++; ++ } + } ++ ++ insts[tid].clear(); ++ ++ // Clear the skid buffer in case it has any data in it. ++ skidBuffer[tid].clear(); ++ ++ doSquash(tid); +} + +template +void - SimpleRename::doSquash() ++DefaultRename::tick() +{ - typename list::iterator hb_it = historyBuffer.begin(); ++ wroteToTimeBuffer = false; + - InstSeqNum squashed_seq_num = fromCommit->commitInfo.doneSeqNum; ++ blockThisCycle = false; + - #if FULL_SYSTEM - assert(!historyBuffer.empty()); - #else - // After a syscall squashes everything, the history buffer may be empty - // but the ROB may still be squashing instructions. - if (historyBuffer.empty()) { ++ bool status_change = false; ++ ++ toIEWIndex = 0; ++ ++ sortInsts(); ++ ++ list::iterator threads = (*activeThreads).begin(); ++ ++ // Check stall and squash signals. ++ while (threads != (*activeThreads).end()) { ++ unsigned tid = *threads++; ++ ++ DPRINTF(Rename, "Processing [tid:%i]\n", tid); ++ ++ status_change = checkSignalsAndUpdate(tid) || status_change; ++ ++ rename(status_change, tid); ++ } ++ ++ if (status_change) { ++ updateStatus(); ++ } ++ ++ if (wroteToTimeBuffer) { ++ DPRINTF(Activity, "Activity this cycle.\n"); ++ cpu->activityThisCycle(); ++ } ++ ++ threads = (*activeThreads).begin(); ++ ++ while (threads != (*activeThreads).end()) { ++ unsigned tid = *threads++; ++ ++ // If we committed this cycle then doneSeqNum will be > 0 ++ if (fromCommit->commitInfo[tid].doneSeqNum != 0 && ++ !fromCommit->commitInfo[tid].squash && ++ renameStatus[tid] != Squashing) { ++ ++ removeFromHistory(fromCommit->commitInfo[tid].doneSeqNum, ++ tid); ++ } ++ } ++ ++ // @todo: make into updateProgress function ++ for (int tid=0; tid < numThreads; tid++) { ++ instsInProgress[tid] -= fromIEW->iewInfo[tid].dispatched; ++ ++ assert(instsInProgress[tid] >=0); ++ } ++ ++} ++ ++template ++void ++DefaultRename::rename(bool &status_change, unsigned tid) ++{ ++ // If status is Running or idle, ++ // call renameInsts() ++ // If status is Unblocking, ++ // buffer any instructions coming from decode ++ // continue trying to empty skid buffer ++ // check if stall conditions have passed ++ ++ if (renameStatus[tid] == Blocked) { ++ ++renameBlockCycles; ++ } else if (renameStatus[tid] == Squashing) { ++ ++renameSquashCycles; ++ } else if (renameStatus[tid] == SerializeStall) { ++ ++renameSerializeStallCycles; ++ } ++ ++ if (renameStatus[tid] == Running || ++ renameStatus[tid] == Idle) { ++ DPRINTF(Rename, "[tid:%u]: Not blocked, so attempting to run " ++ "stage.\n", tid); ++ ++ renameInsts(tid); ++ } else if (renameStatus[tid] == Unblocking) { ++ renameInsts(tid); ++ ++ if (validInsts()) { ++ // Add the current inputs to the skid buffer so they can be ++ // reprocessed when this stage unblocks. ++ skidInsert(tid); ++ } ++ ++ // If we switched over to blocking, then there's a potential for ++ // an overall status change. ++ status_change = unblock(tid) || status_change || blockThisCycle; ++ } ++} ++ ++template ++void ++DefaultRename::renameInsts(unsigned tid) ++{ ++ // Instructions can be either in the skid buffer or the queue of ++ // instructions coming from decode, depending on the status. ++ int insts_available = renameStatus[tid] == Unblocking ? ++ skidBuffer[tid].size() : insts[tid].size(); ++ ++ // Check the decode queue to see if instructions are available. ++ // If there are no available instructions to rename, then do nothing. ++ if (insts_available == 0) { ++ DPRINTF(Rename, "[tid:%u]: Nothing to do, breaking out early.\n", ++ tid); ++ // Should I change status to idle? ++ ++renameIdleCycles; + return; ++ } else if (renameStatus[tid] == Unblocking) { ++ ++renameUnblockCycles; ++ } else if (renameStatus[tid] == Running) { ++ ++renameRunCycles; + } - #endif // FULL_SYSTEM + - // Go through the most recent instructions, undoing the mappings - // they did and freeing up the registers. - while ((*hb_it).instSeqNum > squashed_seq_num) - { - assert(hb_it != historyBuffer.end()); ++ DynInstPtr inst; + - DPRINTF(Rename, "Rename: Removing history entry with sequence " - "number %i.\n", (*hb_it).instSeqNum); ++ // Will have to do a different calculation for the number of free ++ // entries. ++ int free_rob_entries = calcFreeROBEntries(tid); ++ int free_iq_entries = calcFreeIQEntries(tid); ++ int free_lsq_entries = calcFreeLSQEntries(tid); ++ int min_free_entries = free_rob_entries; + - // If it's not simply a place holder, then add the registers. - if (!(*hb_it).placeHolder) { - // Tell the rename map to set the architected register to the - // previous physical register that it was renamed to. - renameMap->setEntry(hb_it->archReg, hb_it->prevPhysReg); ++ FullSource source = ROB; + - // Put the renamed physical register back on the free list. - freeList->addReg(hb_it->newPhysReg); ++ if (free_iq_entries < min_free_entries) { ++ min_free_entries = free_iq_entries; ++ source = IQ; ++ } + - ++renameValidUndoneMaps; ++ if (free_lsq_entries < min_free_entries) { ++ min_free_entries = free_lsq_entries; ++ source = LSQ; ++ } ++ ++ // Check if there's any space left. ++ if (min_free_entries <= 0) { ++ DPRINTF(Rename, "[tid:%u]: Blocking due to no free ROB/IQ/LSQ " ++ "entries.\n" ++ "ROB has %i free entries.\n" ++ "IQ has %i free entries.\n" ++ "LSQ has %i free entries.\n", ++ tid, ++ free_rob_entries, ++ free_iq_entries, ++ free_lsq_entries); ++ ++ blockThisCycle = true; ++ ++ block(tid); ++ ++ incrFullStat(source); ++ ++ return; ++ } else if (min_free_entries < insts_available) { ++ DPRINTF(Rename, "[tid:%u]: Will have to block this cycle." ++ "%i insts available, but only %i insts can be " ++ "renamed due to ROB/IQ/LSQ limits.\n", ++ tid, insts_available, min_free_entries); ++ ++ insts_available = min_free_entries; ++ ++ blockThisCycle = true; ++ ++ incrFullStat(source); ++ } ++ ++ InstQueue &insts_to_rename = renameStatus[tid] == Unblocking ? ++ skidBuffer[tid] : insts[tid]; ++ ++ DPRINTF(Rename, "[tid:%u]: %i available instructions to " ++ "send iew.\n", tid, insts_available); ++ ++ DPRINTF(Rename, "[tid:%u]: %i insts pipelining from Rename | %i insts " ++ "dispatched to IQ last cycle.\n", ++ tid, instsInProgress[tid], fromIEW->iewInfo[tid].dispatched); ++ ++ // Handle serializing the next instruction if necessary. ++ if (serializeOnNextInst[tid]) { ++ if (emptyROB[tid] && instsInProgress[tid] == 0) { ++ // ROB already empty; no need to serialize. ++ serializeOnNextInst[tid] = false; ++ } else if (!insts_to_rename.empty()) { ++ insts_to_rename.front()->setSerializeBefore(); + } ++ } + - historyBuffer.erase(hb_it++); ++ int renamed_insts = 0; + - ++renameUndoneMaps; ++ while (insts_available > 0 && toIEWIndex < renameWidth) { ++ DPRINTF(Rename, "[tid:%u]: Sending instructions to IEW.\n", tid); ++ ++ assert(!insts_to_rename.empty()); ++ ++ inst = insts_to_rename.front(); ++ ++ insts_to_rename.pop_front(); ++ ++ if (renameStatus[tid] == Unblocking) { ++ DPRINTF(Rename,"[tid:%u]: Removing [sn:%lli] PC:%#x from rename " ++ "skidBuffer\n", ++ tid, inst->seqNum, inst->readPC()); ++ } ++ ++ if (inst->isSquashed()) { ++ DPRINTF(Rename, "[tid:%u]: instruction %i with PC %#x is " ++ "squashed, skipping.\n", ++ tid, inst->seqNum, inst->threadNumber,inst->readPC()); ++ ++ ++renameSquashedInsts; ++ ++ // Decrement how many instructions are available. ++ --insts_available; ++ ++ continue; ++ } ++ ++ DPRINTF(Rename, "[tid:%u]: Processing instruction [sn:%lli] with " ++ "PC %#x.\n", ++ tid, inst->seqNum, inst->readPC()); ++ ++ // Handle serializeAfter/serializeBefore instructions. ++ // serializeAfter marks the next instruction as serializeBefore. ++ // serializeBefore makes the instruction wait in rename until the ROB ++ // is empty. ++ ++ // In this model, IPR accesses are serialize before ++ // instructions, and store conditionals are serialize after ++ // instructions. This is mainly due to lack of support for ++ // out-of-order operations of either of those classes of ++ // instructions. ++ if ((inst->isIprAccess() || inst->isSerializeBefore()) && ++ !inst->isSerializeHandled()) { ++ DPRINTF(Rename, "Serialize before instruction encountered.\n"); ++ ++ if (!inst->isTempSerializeBefore()) { ++ renamedSerializing++; ++ inst->setSerializeHandled(); ++ } else { ++ renamedTempSerializing++; ++ } ++ ++ // Change status over to SerializeStall so that other stages know ++ // what this is blocked on. ++ renameStatus[tid] = SerializeStall; ++ ++ serializeInst[tid] = inst; ++ ++ blockThisCycle = true; ++ ++ break; ++ } else if ((inst->isStoreConditional() || inst->isSerializeAfter()) && ++ !inst->isSerializeHandled()) { ++ DPRINTF(Rename, "Serialize after instruction encountered.\n"); ++ ++ renamedSerializing++; ++ ++ inst->setSerializeHandled(); ++ ++ serializeAfter(insts_to_rename, tid); ++ } ++ ++ // Check here to make sure there are enough destination registers ++ // to rename to. Otherwise block. ++ if (renameMap[tid]->numFreeEntries() < inst->numDestRegs()) { ++ DPRINTF(Rename, "Blocking due to lack of free " ++ "physical registers to rename to.\n"); ++ blockThisCycle = true; ++ ++ ++renameFullRegistersEvents; ++ ++ break; ++ } ++ ++ renameSrcRegs(inst, inst->threadNumber); ++ ++ renameDestRegs(inst, inst->threadNumber); ++ ++ ++renamed_insts; ++ ++ // Put instruction in rename queue. ++ toIEW->insts[toIEWIndex] = inst; ++ ++(toIEW->size); ++ ++ // Increment which instruction we're on. ++ ++toIEWIndex; ++ ++ // Decrement how many instructions are available. ++ --insts_available; ++ } ++ ++ instsInProgress[tid] += renamed_insts; ++ renameRenamedInsts += renamed_insts; ++ ++ // If we wrote to the time buffer, record this. ++ if (toIEWIndex) { ++ wroteToTimeBuffer = true; ++ } ++ ++ // Check if there's any instructions left that haven't yet been renamed. ++ // If so then block. ++ if (insts_available) { ++ blockThisCycle = true; ++ } ++ ++ if (blockThisCycle) { ++ block(tid); ++ toDecode->renameUnblock[tid] = false; + } +} + ++template ++void ++DefaultRename::skidInsert(unsigned tid) ++{ ++ DynInstPtr inst = NULL; ++ ++ while (!insts[tid].empty()) { ++ inst = insts[tid].front(); ++ ++ insts[tid].pop_front(); ++ ++ assert(tid == inst->threadNumber); ++ ++ DPRINTF(Rename, "[tid:%u]: Inserting [sn:%lli] PC:%#x into Rename " ++ "skidBuffer\n", tid, inst->seqNum, inst->readPC()); ++ ++ ++renameSkidInsts; ++ ++ skidBuffer[tid].push_back(inst); ++ } ++ ++ if (skidBuffer[tid].size() > skidBufferMax) ++ panic("Skidbuffer Exceeded Max Size"); ++} ++ +template +void - SimpleRename::squash() ++DefaultRename::sortInsts() +{ - DPRINTF(Rename, "Rename: Squashing instructions.\n"); - // Set the status to Squashing. - _status = Squashing; ++ int insts_from_decode = fromDecode->size; ++#ifdef DEBUG ++ for (int i=0; i < numThreads; i++) ++ assert(insts[i].empty()); ++#endif ++ for (int i = 0; i < insts_from_decode; ++i) { ++ DynInstPtr inst = fromDecode->insts[i]; ++ insts[inst->threadNumber].push_back(inst); ++ } ++} + - numInst = 0; ++template ++bool ++DefaultRename::skidsEmpty() ++{ ++ list::iterator threads = (*activeThreads).begin(); + - // Clear the skid buffer in case it has any data in it. - while (!skidBuffer.empty()) - { - skidBuffer.pop(); ++ while (threads != (*activeThreads).end()) { ++ if (!skidBuffer[*threads++].empty()) ++ return false; + } + - doSquash(); ++ return true; +} + +template +void - SimpleRename::removeFromHistory(InstSeqNum inst_seq_num) ++DefaultRename::updateStatus() +{ - DPRINTF(Rename, "Rename: Removing a committed instruction from the " - "history buffer, until sequence number %lli.\n", inst_seq_num); - typename list::iterator hb_it = historyBuffer.end(); ++ bool any_unblocking = false; + - --hb_it; ++ list::iterator threads = (*activeThreads).begin(); + - if (hb_it->instSeqNum > inst_seq_num) { - DPRINTF(Rename, "Rename: Old sequence number encountered. Ensure " - "that a syscall happened recently.\n"); - return; ++ threads = (*activeThreads).begin(); ++ ++ while (threads != (*activeThreads).end()) { ++ unsigned tid = *threads++; ++ ++ if (renameStatus[tid] == Unblocking) { ++ any_unblocking = true; ++ break; ++ } + } + - while ((*hb_it).instSeqNum != inst_seq_num) - { - // Make sure we haven't gone off the end of the list. - assert(hb_it != historyBuffer.end()); - - // In theory instructions at the end of the history buffer - // should be older than the instruction being removed, which - // means they will have a lower sequence number. Also the - // instruction being removed from the history really should - // be the last instruction in the list, as it is the instruction - // that was just committed that is being removed. - assert(hb_it->instSeqNum < inst_seq_num); - DPRINTF(Rename, "Rename: Freeing up older rename of reg %i, sequence" - " number %i.\n", - (*hb_it).prevPhysReg, (*hb_it).instSeqNum); - - if (!(*hb_it).placeHolder) { - freeList->addReg((*hb_it).prevPhysReg); - ++renameCommittedMaps; ++ // Rename will have activity if it's unblocking. ++ if (any_unblocking) { ++ if (_status == Inactive) { ++ _status = Active; ++ ++ DPRINTF(Activity, "Activating stage.\n"); ++ ++ cpu->activateStage(FullCPU::RenameIdx); + } ++ } else { ++ // If it's not unblocking, then rename will not have any internal ++ // activity. Switch it to inactive. ++ if (_status == Active) { ++ _status = Inactive; ++ DPRINTF(Activity, "Deactivating stage.\n"); + - historyBuffer.erase(hb_it--); ++ cpu->deactivateStage(FullCPU::RenameIdx); ++ } + } ++} + - // Finally free up the previous register of the finished instruction - // itself. - if (!(*hb_it).placeHolder) { - freeList->addReg(hb_it->prevPhysReg); - ++renameCommittedMaps; ++template ++bool ++DefaultRename::block(unsigned tid) ++{ ++ DPRINTF(Rename, "[tid:%u]: Blocking.\n", tid); ++ ++ // Add the current inputs onto the skid buffer, so they can be ++ // reprocessed when this stage unblocks. ++ skidInsert(tid); ++ ++ // Only signal backwards to block if the previous stages do not think ++ // rename is already blocked. ++ if (renameStatus[tid] != Blocked) { ++ if (renameStatus[tid] != Unblocking) { ++ toDecode->renameBlock[tid] = true; ++ toDecode->renameUnblock[tid] = false; ++ wroteToTimeBuffer = true; ++ } ++ ++ // Rename can not go from SerializeStall to Blocked, otherwise ++ // it would not know to complete the serialize stall. ++ if (renameStatus[tid] != SerializeStall) { ++ // Set status to Blocked. ++ renameStatus[tid] = Blocked; ++ return true; ++ } ++ } ++ ++ return false; ++} ++ ++template ++bool ++DefaultRename::unblock(unsigned tid) ++{ ++ DPRINTF(Rename, "[tid:%u]: Trying to unblock.\n", tid); ++ ++ // Rename is done unblocking if the skid buffer is empty. ++ if (skidBuffer[tid].empty() && renameStatus[tid] != SerializeStall) { ++ ++ DPRINTF(Rename, "[tid:%u]: Done unblocking.\n", tid); ++ ++ toDecode->renameUnblock[tid] = true; ++ wroteToTimeBuffer = true; ++ ++ renameStatus[tid] = Running; ++ return true; + } + - historyBuffer.erase(hb_it); ++ return false; ++} ++ ++template ++void ++DefaultRename::doSquash(unsigned tid) ++{ ++ typename list::iterator hb_it = historyBuffer[tid].begin(); ++ ++ InstSeqNum squashed_seq_num = fromCommit->commitInfo[tid].doneSeqNum; ++ ++ // After a syscall squashes everything, the history buffer may be empty ++ // but the ROB may still be squashing instructions. ++ if (historyBuffer[tid].empty()) { ++ return; ++ } ++ ++ // Go through the most recent instructions, undoing the mappings ++ // they did and freeing up the registers. ++ while (!historyBuffer[tid].empty() && ++ (*hb_it).instSeqNum > squashed_seq_num) { ++ assert(hb_it != historyBuffer[tid].end()); ++ ++ DPRINTF(Rename, "[tid:%u]: Removing history entry with sequence " ++ "number %i.\n", tid, (*hb_it).instSeqNum); ++ ++ // Tell the rename map to set the architected register to the ++ // previous physical register that it was renamed to. ++ renameMap[tid]->setEntry(hb_it->archReg, hb_it->prevPhysReg); ++ ++ // Put the renamed physical register back on the free list. ++ freeList->addReg(hb_it->newPhysReg); ++ ++ historyBuffer[tid].erase(hb_it++); ++ ++ ++renameUndoneMaps; ++ } ++} ++ ++template ++void ++DefaultRename::removeFromHistory(InstSeqNum inst_seq_num, unsigned tid) ++{ ++ DPRINTF(Rename, "[tid:%u]: Removing a committed instruction from the " ++ "history buffer %u (size=%i), until [sn:%lli].\n", ++ tid, tid, historyBuffer[tid].size(), inst_seq_num); ++ ++ typename list::iterator hb_it = historyBuffer[tid].end(); ++ ++ --hb_it; ++ ++ if (historyBuffer[tid].empty()) { ++ DPRINTF(Rename, "[tid:%u]: History buffer is empty.\n", tid); ++ return; ++ } else if (hb_it->instSeqNum > inst_seq_num) { ++ DPRINTF(Rename, "[tid:%u]: Old sequence number encountered. Ensure " ++ "that a syscall happened recently.\n", tid); ++ return; ++ } ++ ++ // Commit all the renames up until (and including) the committed sequence ++ // number. Some or even all of the committed instructions may not have ++ // rename histories if they did not have destination registers that were ++ // renamed. ++ while (!historyBuffer[tid].empty() && ++ hb_it != historyBuffer[tid].end() && ++ (*hb_it).instSeqNum <= inst_seq_num) { ++ ++ DPRINTF(Rename, "[tid:%u]: Freeing up older rename of reg %i, " ++ "[sn:%lli].\n", ++ tid, (*hb_it).prevPhysReg, (*hb_it).instSeqNum); ++ ++ freeList->addReg((*hb_it).prevPhysReg); ++ ++renameCommittedMaps; ++ ++ historyBuffer[tid].erase(hb_it--); ++ } +} + +template +inline void - SimpleRename::renameSrcRegs(DynInstPtr &inst) ++DefaultRename::renameSrcRegs(DynInstPtr &inst,unsigned tid) +{ ++ assert(renameMap[tid] != 0); ++ + unsigned num_src_regs = inst->numSrcRegs(); + + // Get the architectual register numbers from the source and + // destination operands, and redirect them to the right register. + // Will need to mark dependencies though. - for (int src_idx = 0; src_idx < num_src_regs; src_idx++) - { ++ for (int src_idx = 0; src_idx < num_src_regs; src_idx++) { + RegIndex src_reg = inst->srcRegIdx(src_idx); + + // Look up the source registers to get the phys. register they've + // been renamed to, and set the sources to those registers. - PhysRegIndex renamed_reg = renameMap->lookup(src_reg); ++ PhysRegIndex renamed_reg = renameMap[tid]->lookup(src_reg); + - DPRINTF(Rename, "Rename: Looking up arch reg %i, got " - "physical reg %i.\n", (int)src_reg, (int)renamed_reg); ++ DPRINTF(Rename, "[tid:%u]: Looking up arch reg %i, got " ++ "physical reg %i.\n", tid, (int)src_reg, ++ (int)renamed_reg); + + inst->renameSrcReg(src_idx, renamed_reg); + - // Either incorporate it into the info passed back, - // or make another function call to see if that register is - // ready or not. - if (renameMap->isReady(renamed_reg)) { - DPRINTF(Rename, "Rename: Register is ready.\n"); ++ // See if the register is ready or not. ++ if (scoreboard->getReg(renamed_reg) == true) { ++ DPRINTF(Rename, "[tid:%u]: Register is ready.\n", tid); + + inst->markSrcRegReady(src_idx); + } + + ++renameRenameLookups; + } +} + +template +inline void - SimpleRename::renameDestRegs(DynInstPtr &inst) ++DefaultRename::renameDestRegs(DynInstPtr &inst,unsigned tid) +{ - typename SimpleRenameMap::RenameInfo rename_result; ++ typename RenameMap::RenameInfo rename_result; + + unsigned num_dest_regs = inst->numDestRegs(); + - // If it's an instruction with no destination registers, then put - // a placeholder within the history buffer. It might be better - // to not put it in the history buffer at all (other than branches, - // which always need at least a place holder), and differentiate - // between instructions with and without destination registers - // when getting from commit the instructions that committed. - if (num_dest_regs == 0) { - RenameHistory hb_entry(inst->seqNum); ++ // Rename the destination registers. ++ for (int dest_idx = 0; dest_idx < num_dest_regs; dest_idx++) { ++ RegIndex dest_reg = inst->destRegIdx(dest_idx); + - historyBuffer.push_front(hb_entry); ++ // Get the physical register that the destination will be ++ // renamed to. ++ rename_result = renameMap[tid]->rename(dest_reg); + - DPRINTF(Rename, "Rename: Adding placeholder instruction to " - "history buffer, sequence number %lli.\n", - inst->seqNum); ++ //Mark Scoreboard entry as not ready ++ scoreboard->unsetReg(rename_result.first); + - ++renameHBPlaceHolders; - } else { ++ DPRINTF(Rename, "[tid:%u]: Renaming arch reg %i to physical " ++ "reg %i.\n", tid, (int)dest_reg, ++ (int)rename_result.first); + - // Rename the destination registers. - for (int dest_idx = 0; dest_idx < num_dest_regs; dest_idx++) - { - RegIndex dest_reg = inst->destRegIdx(dest_idx); ++ // Record the rename information so that a history can be kept. ++ RenameHistory hb_entry(inst->seqNum, dest_reg, ++ rename_result.first, ++ rename_result.second); + - // Get the physical register that the destination will be - // renamed to. - rename_result = renameMap->rename(dest_reg); ++ historyBuffer[tid].push_front(hb_entry); + - DPRINTF(Rename, "Rename: Renaming arch reg %i to physical " - "reg %i.\n", (int)dest_reg, - (int)rename_result.first); ++ DPRINTF(Rename, "[tid:%u]: Adding instruction to history buffer, " ++ "[sn:%lli].\n",tid, ++ (*historyBuffer[tid].begin()).instSeqNum); + - // Record the rename information so that a history can be kept. - RenameHistory hb_entry(inst->seqNum, dest_reg, - rename_result.first, - rename_result.second); ++ // Tell the instruction to rename the appropriate destination ++ // register (dest_idx) to the new physical register ++ // (rename_result.first), and record the previous physical ++ // register that the same logical register was renamed to ++ // (rename_result.second). ++ inst->renameDestReg(dest_idx, ++ rename_result.first, ++ rename_result.second); + - historyBuffer.push_front(hb_entry); ++ ++renameRenamedOperands; ++ } ++} + - DPRINTF(Rename, "Rename: Adding instruction to history buffer, " - "sequence number %lli.\n", - (*historyBuffer.begin()).instSeqNum); ++template ++inline int ++DefaultRename::calcFreeROBEntries(unsigned tid) ++{ ++ int num_free = freeEntries[tid].robEntries - ++ (instsInProgress[tid] - fromIEW->iewInfo[tid].dispatched); + - // Tell the instruction to rename the appropriate destination - // register (dest_idx) to the new physical register - // (rename_result.first), and record the previous physical - // register that the same logical register was renamed to - // (rename_result.second). - inst->renameDestReg(dest_idx, - rename_result.first, - rename_result.second); ++ //DPRINTF(Rename,"[tid:%i]: %i rob free\n",tid,num_free); + - ++renameRenamedOperands; - } - } ++ return num_free; +} + +template +inline int - SimpleRename::calcFreeROBEntries() ++DefaultRename::calcFreeIQEntries(unsigned tid) +{ - return fromCommit->commitInfo.freeROBEntries - - renameWidth * iewToRenameDelay; ++ int num_free = freeEntries[tid].iqEntries - ++ (instsInProgress[tid] - fromIEW->iewInfo[tid].dispatched); ++ ++ //DPRINTF(Rename,"[tid:%i]: %i iq free\n",tid,num_free); ++ ++ return num_free; +} + +template +inline int - SimpleRename::calcFreeIQEntries() ++DefaultRename::calcFreeLSQEntries(unsigned tid) +{ - return fromIEW->iewInfo.freeIQEntries - renameWidth * iewToRenameDelay; - } ++ int num_free = freeEntries[tid].lsqEntries - ++ (instsInProgress[tid] - fromIEW->iewInfo[tid].dispatchedToLSQ); + - template - void - SimpleRename::tick() - { - // Rename will need to try to rename as many instructions as it - // has bandwidth, unless it is blocked. - - // Check if _status is BarrierStall. If so, then check if the number - // of free ROB entries is equal to the number of total ROB entries. - // Once equal then wake this stage up. Set status to unblocking maybe. - - if (_status != Blocked && _status != Squashing) { - DPRINTF(Rename, "Rename: Status is not blocked, will attempt to " - "run stage.\n"); - // Make sure that the skid buffer has something in it if the - // status is unblocking. - assert(_status == Unblocking ? !skidBuffer.empty() : 1); - - rename(); - - // If the status was unblocking, then instructions from the skid - // buffer were used. Remove those instructions and handle - // the rest of unblocking. - if (_status == Unblocking) { - ++renameUnblockCycles; - - if (fromDecode->size > 0) { - // Add the current inputs onto the skid buffer, so they can be - // reprocessed when this stage unblocks. - skidBuffer.push(*fromDecode); - } ++ //DPRINTF(Rename,"[tid:%i]: %i lsq free\n",tid,num_free); + - unblock(); - } - } else if (_status == Blocked) { - ++renameBlockCycles; ++ return num_free; ++} + - // If stage is blocked and still receiving valid instructions, - // make sure to store them in the skid buffer. - if (fromDecode->size > 0) { ++template ++unsigned ++DefaultRename::validInsts() ++{ ++ unsigned inst_count = 0; + - block(); ++ for (int i=0; isize; i++) { ++ if (!fromDecode->insts[i]->squashed) ++ inst_count++; ++ } + - // Continue to tell previous stage to stall. - toDecode->renameInfo.stall = true; - } ++ return inst_count; ++} + - if (!fromIEW->iewInfo.stall && - !fromCommit->commitInfo.stall && - calcFreeROBEntries() > 0 && - calcFreeIQEntries() > 0 && - renameMap->numFreeEntries() > 0) { - - // Need to be sure to check all blocking conditions above. - // If they have cleared, then start unblocking. - DPRINTF(Rename, "Rename: Stall signals cleared, going to " - "unblock.\n"); - _status = Unblocking; - - // Continue to tell previous stage to block until this stage - // is done unblocking. - toDecode->renameInfo.stall = true; - } else { - // Otherwise no conditions have changed. Tell previous - // stage to continue blocking. - toDecode->renameInfo.stall = true; - } ++template ++void ++DefaultRename::readStallSignals(unsigned tid) ++{ ++ if (fromIEW->iewBlock[tid]) { ++ stalls[tid].iew = true; ++ } + - if (fromCommit->commitInfo.squash || - fromCommit->commitInfo.robSquashing) { - squash(); - return; - } - } else if (_status == Squashing) { - ++renameSquashCycles; ++ if (fromIEW->iewUnblock[tid]) { ++ assert(stalls[tid].iew); ++ stalls[tid].iew = false; ++ } + - if (fromCommit->commitInfo.squash) { - squash(); - } else if (!fromCommit->commitInfo.squash && - !fromCommit->commitInfo.robSquashing) { ++ if (fromCommit->commitBlock[tid]) { ++ stalls[tid].commit = true; ++ } + - DPRINTF(Rename, "Rename: Done squashing, going to running.\n"); - _status = Running; - rename(); - } else { - doSquash(); - } ++ if (fromCommit->commitUnblock[tid]) { ++ assert(stalls[tid].commit); ++ stalls[tid].commit = false; + } ++} + - // Ugly code, revamp all of the tick() functions eventually. - if (fromCommit->commitInfo.doneSeqNum != 0 && _status != Squashing) { - #if !FULL_SYSTEM - if (!fromCommit->commitInfo.squash) { - removeFromHistory(fromCommit->commitInfo.doneSeqNum); - } - #else - removeFromHistory(fromCommit->commitInfo.doneSeqNum); - #endif ++template ++bool ++DefaultRename::checkStall(unsigned tid) ++{ ++ bool ret_val = false; ++ ++ if (stalls[tid].iew) { ++ DPRINTF(Rename,"[tid:%i]: Stall from IEW stage detected.\n", tid); ++ ret_val = true; ++ } else if (stalls[tid].commit) { ++ DPRINTF(Rename,"[tid:%i]: Stall from Commit stage detected.\n", tid); ++ ret_val = true; ++ } else if (calcFreeROBEntries(tid) <= 0) { ++ DPRINTF(Rename,"[tid:%i]: Stall: ROB has 0 free entries.\n", tid); ++ ret_val = true; ++ } else if (calcFreeIQEntries(tid) <= 0) { ++ DPRINTF(Rename,"[tid:%i]: Stall: IQ has 0 free entries.\n", tid); ++ ret_val = true; ++ } else if (calcFreeLSQEntries(tid) <= 0) { ++ DPRINTF(Rename,"[tid:%i]: Stall: LSQ has 0 free entries.\n", tid); ++ ret_val = true; ++ } else if (renameMap[tid]->numFreeEntries() <= 0) { ++ DPRINTF(Rename,"[tid:%i]: Stall: RenameMap has 0 free entries.\n", tid); ++ ret_val = true; ++ } else if (renameStatus[tid] == SerializeStall && ++ (!emptyROB[tid] || instsInProgress[tid])) { ++ DPRINTF(Rename,"[tid:%i]: Stall: Serialize stall and ROB is not " ++ "empty.\n", ++ tid); ++ ret_val = true; + } + ++ return ret_val; +} + - template ++template +void - SimpleRename::rename() - { - // Check if any of the stages ahead of rename are telling rename - // to squash. The squash() function will also take care of fixing up - // the rename map and the free list. - if (fromCommit->commitInfo.squash || - fromCommit->commitInfo.robSquashing) { - DPRINTF(Rename, "Rename: Receiving signal from Commit to squash.\n"); - squash(); - return; ++DefaultRename::readFreeEntries(unsigned tid) ++{ ++ bool updated = false; ++ if (fromIEW->iewInfo[tid].usedIQ) { ++ freeEntries[tid].iqEntries = ++ fromIEW->iewInfo[tid].freeIQEntries; ++ updated = true; + } + - // Check if time buffer is telling this stage to stall. - if (fromIEW->iewInfo.stall || - fromCommit->commitInfo.stall) { - DPRINTF(Rename, "Rename: Receiving signal from IEW/Commit to " - "stall.\n"); - block(); - return; ++ if (fromIEW->iewInfo[tid].usedLSQ) { ++ freeEntries[tid].lsqEntries = ++ fromIEW->iewInfo[tid].freeLSQEntries; ++ updated = true; + } + - // Check if the current status is squashing. If so, set its status - // to running and resume execution the next cycle. - if (_status == Squashing) { - DPRINTF(Rename, "Rename: Done squashing.\n"); - _status = Running; - return; ++ if (fromCommit->commitInfo[tid].usedROB) { ++ freeEntries[tid].robEntries = ++ fromCommit->commitInfo[tid].freeROBEntries; ++ emptyROB[tid] = fromCommit->commitInfo[tid].emptyROB; ++ updated = true; + } + - // Check the decode queue to see if instructions are available. - // If there are no available instructions to rename, then do nothing. - // Or, if the stage is currently unblocking, then go ahead and run it. - if (fromDecode->size == 0 && _status != Unblocking) { - DPRINTF(Rename, "Rename: Nothing to do, breaking out early.\n"); - // Should I change status to idle? - return; - } - - //////////////////////////////////// - // Actual rename part. - //////////////////////////////////// - - DynInstPtr inst; ++ DPRINTF(Rename, "[tid:%i]: Free IQ: %i, Free ROB: %i, Free LSQ: %i\n", ++ tid, ++ freeEntries[tid].iqEntries, ++ freeEntries[tid].robEntries, ++ freeEntries[tid].lsqEntries); + - // If we're unblocking, then we may be in the middle of an instruction - // group. Subtract off numInst to get the proper number of instructions - // left. - int insts_available = _status == Unblocking ? - skidBuffer.front().size - numInst : - fromDecode->size; ++ DPRINTF(Rename, "[tid:%i]: %i instructions not yet in ROB\n", ++ tid, instsInProgress[tid]); ++} + - bool block_this_cycle = false; ++template ++bool ++DefaultRename::checkSignalsAndUpdate(unsigned tid) ++{ ++ // Check if there's a squash signal, squash if there is ++ // Check stall signals, block if necessary. ++ // If status was blocked ++ // check if stall conditions have passed ++ // if so then go to unblocking ++ // If status was Squashing ++ // check if squashing is not high. Switch to running this cycle. ++ // If status was serialize stall ++ // check if ROB is empty and no insts are in flight to the ROB ++ ++ readFreeEntries(tid); ++ readStallSignals(tid); ++ ++ if (fromCommit->commitInfo[tid].squash) { ++ DPRINTF(Rename, "[tid:%u]: Squashing instructions due to squash from " ++ "commit.\n", tid); ++ ++ squash(tid); ++ ++ return true; ++ } + - // Will have to do a different calculation for the number of free - // entries. Number of free entries recorded on this cycle - - // renameWidth * renameToDecodeDelay - int free_rob_entries = calcFreeROBEntries(); - int free_iq_entries = calcFreeIQEntries(); - int min_iq_rob = min(free_rob_entries, free_iq_entries); ++ if (fromCommit->commitInfo[tid].robSquashing) { ++ DPRINTF(Rename, "[tid:%u]: ROB is still squashing.\n", tid); + - unsigned to_iew_index = 0; ++ renameStatus[tid] = Squashing; + - // Check if there's any space left. - if (min_iq_rob <= 0) { - DPRINTF(Rename, "Rename: Blocking due to no free ROB or IQ " - "entries.\n" - "Rename: ROB has %d free entries.\n" - "Rename: IQ has %d free entries.\n", - free_rob_entries, - free_iq_entries); - block(); - // Tell previous stage to stall. - toDecode->renameInfo.stall = true; ++ return true; ++ } + - if (free_rob_entries <= 0) { - ++renameROBFullEvents; - } else { - ++renameIQFullEvents; - } ++ if (checkStall(tid)) { ++ return block(tid); ++ } + - return; - } else if (min_iq_rob < insts_available) { - DPRINTF(Rename, "Rename: Will have to block this cycle. Only " - "%i insts can be renamed due to IQ/ROB limits.\n", - min_iq_rob); ++ if (renameStatus[tid] == Blocked) { ++ DPRINTF(Rename, "[tid:%u]: Done blocking, switching to unblocking.\n", ++ tid); + - insts_available = min_iq_rob; ++ renameStatus[tid] = Unblocking; + - block_this_cycle = true; ++ unblock(tid); + - if (free_rob_entries < free_iq_entries) { - ++renameROBFullEvents; - } else { - ++renameIQFullEvents; - } ++ return true; + } + - while (insts_available > 0) { - DPRINTF(Rename, "Rename: Sending instructions to iew.\n"); ++ if (renameStatus[tid] == Squashing) { ++ // Switch status to running if rename isn't being told to block or ++ // squash this cycle. ++ DPRINTF(Rename, "[tid:%u]: Done squashing, switching to running.\n", ++ tid); + - // Get the next instruction either from the skid buffer or the - // decode queue. - inst = _status == Unblocking ? skidBuffer.front().insts[numInst] : - fromDecode->insts[numInst]; ++ renameStatus[tid] = Running; + - if (inst->isSquashed()) { - DPRINTF(Rename, "Rename: instruction %i with PC %#x is " - "squashed, skipping.\n", - inst->seqNum, inst->readPC()); ++ return false; ++ } + - // Go to the next instruction. - ++numInst; ++ if (renameStatus[tid] == SerializeStall) { ++ // Stall ends once the ROB is free. ++ DPRINTF(Rename, "[tid:%u]: Done with serialize stall, switching to " ++ "unblocking.\n", tid); + - ++renameSquashedInsts; ++ DynInstPtr serial_inst = serializeInst[tid]; + - // Decrement how many instructions are available. - --insts_available; ++ renameStatus[tid] = Unblocking; + - continue; - } ++ unblock(tid); + - DPRINTF(Rename, "Rename: Processing instruction %i with PC %#x.\n", - inst->seqNum, inst->readPC()); - - // If it's a trap instruction, then it needs to wait here within - // rename until the ROB is empty. Needs a way to detect that the - // ROB is empty. Maybe an event? - // Would be nice if it could be avoided putting this into a - // specific stage and instead just put it into the AlphaFullCPU. - // Might not really be feasible though... - // (EXCB, TRAPB) - if (inst->isSerializing()) { - panic("Rename: Serializing instruction encountered.\n"); - DPRINTF(Rename, "Rename: Serializing instruction " - "encountered.\n"); - - // Change status over to BarrierStall so that other stages know - // what this is blocked on. - _status = BarrierStall; ++ DPRINTF(Rename, "[tid:%u]: Processing instruction [%lli] with " ++ "PC %#x.\n", ++ tid, serial_inst->seqNum, serial_inst->readPC()); + - block_this_cycle = true; ++ // Put instruction into queue here. ++ serial_inst->clearSerializeBefore(); + - break; ++ if (!skidBuffer[tid].empty()) { ++ skidBuffer[tid].push_front(serial_inst); ++ } else { ++ insts[tid].push_front(serial_inst); + } + - // Check here to make sure there are enough destination registers - // to rename to. Otherwise block. - if (renameMap->numFreeEntries() < inst->numDestRegs()) - { - DPRINTF(Rename, "Rename: Blocking due to lack of free " - "physical registers to rename to.\n"); - // Need some sort of event based on a register being freed. - - block_this_cycle = true; ++ DPRINTF(Rename, "[tid:%u]: Instruction must be processed by rename." ++ " Adding to front of list.", tid); + - ++renameFullRegistersEvents; ++ serializeInst[tid] = NULL; + - break; - } ++ return true; ++ } + - renameSrcRegs(inst); ++ // If we've reached this point, we have not gotten any signals that ++ // cause rename to change its status. Rename remains the same as before. ++ return false; ++} + - renameDestRegs(inst); ++template ++void ++DefaultRename::serializeAfter(InstQueue &inst_list, ++ unsigned tid) ++{ ++ if (inst_list.empty()) { ++ // Mark a bit to say that I must serialize on the next instruction. ++ serializeOnNextInst[tid] = true; ++ return; ++ } + - // Put instruction in rename queue. - toIEW->insts[to_iew_index] = inst; - ++(toIEW->size); ++ // Set the next instruction as serializing. ++ inst_list.front()->setSerializeBefore(); ++} + - // Decrease the number of free ROB and IQ entries. - --free_rob_entries; - --free_iq_entries; ++template ++inline void ++DefaultRename::incrFullStat(const FullSource &source) ++{ ++ switch (source) { ++ case ROB: ++ ++renameROBFullEvents; ++ break; ++ case IQ: ++ ++renameIQFullEvents; ++ break; ++ case LSQ: ++ ++renameLSQFullEvents; ++ break; ++ default: ++ panic("Rename full stall stat should be incremented for a reason!"); ++ break; ++ } ++} + - // Increment which instruction we're on. - ++to_iew_index; - ++numInst; ++template ++void ++DefaultRename::dumpHistory() ++{ ++ typename list::iterator buf_it; + - ++renameRenamedInsts; ++ for (int i = 0; i < numThreads; i++) { + - // Decrement how many instructions are available. - --insts_available; - } ++ buf_it = historyBuffer[i].begin(); + - // Check if there's any instructions left that haven't yet been renamed. - // If so then block. - if (block_this_cycle) { - block(); ++ while (buf_it != historyBuffer[i].end()) { ++ cprintf("Seq num: %i\nArch reg: %i New phys reg: %i Old phys " ++ "reg: %i\n", (*buf_it).instSeqNum, (int)(*buf_it).archReg, ++ (int)(*buf_it).newPhysReg, (int)(*buf_it).prevPhysReg); + - toDecode->renameInfo.stall = true; - } else { - // If we had a successful rename and didn't have to exit early, then - // reset numInst so it will refer to the correct instruction on next - // run. - numInst = 0; ++ buf_it++; ++ } + } +} diff --cc src/cpu/o3/rename_map.cc index 10963f7de,000000000..fc59058a1 mode 100644,000000..100644 --- a/src/cpu/o3/rename_map.cc +++ b/src/cpu/o3/rename_map.cc @@@ -1,346 -1,0 +1,245 @@@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include "cpu/o3/rename_map.hh" + +using namespace std; + - // Todo: Consider making functions inline. Avoid having things that are - // using the zero register or misc registers from adding on the registers - // to the free list. Possibly remove the direct communication between - // this and the freelist. Considering making inline bool functions that - // determine if the register is a logical int, logical fp, physical int, - // physical fp, etc. - - SimpleRenameMap::SimpleRenameMap(unsigned _numLogicalIntRegs, - unsigned _numPhysicalIntRegs, - unsigned _numLogicalFloatRegs, - unsigned _numPhysicalFloatRegs, - unsigned _numMiscRegs, - RegIndex _intZeroReg, - RegIndex _floatZeroReg) - : numLogicalIntRegs(_numLogicalIntRegs), - numPhysicalIntRegs(_numPhysicalIntRegs), - numLogicalFloatRegs(_numLogicalFloatRegs), - numPhysicalFloatRegs(_numPhysicalFloatRegs), - numMiscRegs(_numMiscRegs), - intZeroReg(_intZeroReg), - floatZeroReg(_floatZeroReg) ++// @todo: Consider making inline bool functions that determine if the ++// register is a logical int, logical fp, physical int, physical fp, ++// etc. ++ ++SimpleRenameMap::~SimpleRenameMap() ++{ ++} ++ ++void ++SimpleRenameMap::init(unsigned _numLogicalIntRegs, ++ unsigned _numPhysicalIntRegs, ++ PhysRegIndex &ireg_idx, ++ ++ unsigned _numLogicalFloatRegs, ++ unsigned _numPhysicalFloatRegs, ++ PhysRegIndex &freg_idx, ++ ++ unsigned _numMiscRegs, ++ ++ RegIndex _intZeroReg, ++ RegIndex _floatZeroReg, ++ ++ int map_id, ++ bool bindRegs) +{ - DPRINTF(Rename, "Rename: Creating rename map. Phys: %i / %i, Float: " - "%i / %i.\n", numLogicalIntRegs, numPhysicalIntRegs, ++ id = map_id; ++ ++ numLogicalIntRegs = _numLogicalIntRegs; ++ ++ numLogicalFloatRegs = _numLogicalFloatRegs; ++ ++ numPhysicalIntRegs = _numPhysicalIntRegs; ++ ++ numPhysicalFloatRegs = _numPhysicalFloatRegs; ++ ++ numMiscRegs = _numMiscRegs; ++ ++ intZeroReg = _intZeroReg; ++ floatZeroReg = _floatZeroReg; ++ ++ DPRINTF(Rename, "Creating rename map %i. Phys: %i / %i, Float: " ++ "%i / %i.\n", id, numLogicalIntRegs, numPhysicalIntRegs, + numLogicalFloatRegs, numPhysicalFloatRegs); + + numLogicalRegs = numLogicalIntRegs + numLogicalFloatRegs; + + numPhysicalRegs = numPhysicalIntRegs + numPhysicalFloatRegs; + - //Create the rename maps, and their scoreboards. - intRenameMap = new RenameEntry[numLogicalIntRegs]; - floatRenameMap = new RenameEntry[numLogicalRegs]; - - // Should combine this into one scoreboard. - intScoreboard.resize(numPhysicalIntRegs); - floatScoreboard.resize(numPhysicalRegs); - miscScoreboard.resize(numPhysicalRegs + numMiscRegs); - - // Initialize the entries in the integer rename map to point to the - // physical registers of the same index, and consider each register - // ready until the first rename occurs. - for (RegIndex index = 0; index < numLogicalIntRegs; ++index) - { - intRenameMap[index].physical_reg = index; - intScoreboard[index] = 1; - } ++ //Create the rename maps ++ intRenameMap.resize(numLogicalIntRegs); ++ floatRenameMap.resize(numLogicalRegs); + - // Initialize the rest of the physical registers (the ones that don't - // directly map to a logical register) as unready. - for (PhysRegIndex index = numLogicalIntRegs; - index < numPhysicalIntRegs; - ++index) - { - intScoreboard[index] = 0; - } ++ if (bindRegs) { ++ DPRINTF(Rename, "Binding registers into rename map %i",id); + - int float_reg_idx = numPhysicalIntRegs; - - // Initialize the entries in the floating point rename map to point to - // the physical registers of the same index, and consider each register - // ready until the first rename occurs. - // Although the index refers purely to architected registers, because - // the floating reg indices come after the integer reg indices, they - // may exceed the size of a normal RegIndex (short). - for (PhysRegIndex index = numLogicalIntRegs; - index < numLogicalRegs; ++index) - { - floatRenameMap[index].physical_reg = float_reg_idx++; - } ++ // Initialize the entries in the integer rename map to point to the ++ // physical registers of the same index ++ for (RegIndex index = 0; index < numLogicalIntRegs; ++index) ++ { ++ intRenameMap[index].physical_reg = ireg_idx++; ++ } + - for (PhysRegIndex index = numPhysicalIntRegs; - index < numPhysicalIntRegs + numLogicalFloatRegs; ++index) - { - floatScoreboard[index] = 1; - } ++ // Initialize the entries in the floating point rename map to point to ++ // the physical registers of the same index ++ // Although the index refers purely to architected registers, because ++ // the floating reg indices come after the integer reg indices, they ++ // may exceed the size of a normal RegIndex (short). ++ for (PhysRegIndex index = numLogicalIntRegs; ++ index < numLogicalRegs; ++index) ++ { ++ floatRenameMap[index].physical_reg = freg_idx++; ++ } ++ } else { ++ DPRINTF(Rename, "Binding registers into rename map %i",id); + - // Initialize the rest of the physical registers (the ones that don't - // directly map to a logical register) as unready. - for (PhysRegIndex index = numPhysicalIntRegs + numLogicalFloatRegs; - index < numPhysicalRegs; - ++index) - { - floatScoreboard[index] = 0; - } ++ PhysRegIndex temp_ireg = ireg_idx; + - // Initialize the entries in the misc register scoreboard to be ready. - for (PhysRegIndex index = numPhysicalRegs; - index < numPhysicalRegs + numMiscRegs; ++index) - { - miscScoreboard[index] = 1; - } - } ++ for (RegIndex index = 0; index < numLogicalIntRegs; ++index) ++ { ++ intRenameMap[index].physical_reg = temp_ireg++; ++ } + - SimpleRenameMap::~SimpleRenameMap() - { - // Delete the rename maps as they were allocated with new. - delete [] intRenameMap; - delete [] floatRenameMap; ++ PhysRegIndex temp_freg = freg_idx; ++ ++ for (PhysRegIndex index = numLogicalIntRegs; ++ index < numLogicalRegs; ++index) ++ { ++ floatRenameMap[index].physical_reg = temp_freg++; ++ } ++ } +} + +void +SimpleRenameMap::setFreeList(SimpleFreeList *fl_ptr) +{ - //Setup the interface to the freelist. + freeList = fl_ptr; +} + + - // Don't allow this stage to fault; force that check to the rename stage. - // Simply ask to rename a logical register and get back a new physical - // register index. +SimpleRenameMap::RenameInfo +SimpleRenameMap::rename(RegIndex arch_reg) +{ + PhysRegIndex renamed_reg; + PhysRegIndex prev_reg; + + if (arch_reg < numLogicalIntRegs) { + + // Record the current physical register that is renamed to the + // requested architected register. + prev_reg = intRenameMap[arch_reg].physical_reg; + - // If it's not referencing the zero register, then mark the register - // as not ready. ++ // If it's not referencing the zero register, then rename the ++ // register. + if (arch_reg != intZeroReg) { - // Get a free physical register to rename to. + renamed_reg = freeList->getIntReg(); + - // Update the integer rename map. + intRenameMap[arch_reg].physical_reg = renamed_reg; + + assert(renamed_reg >= 0 && renamed_reg < numPhysicalIntRegs); + - // Mark register as not ready. - intScoreboard[renamed_reg] = false; + } else { + // Otherwise return the zero register so nothing bad happens. + renamed_reg = intZeroReg; + } + } else if (arch_reg < numLogicalRegs) { - // Subtract off the base offset for floating point registers. - // arch_reg = arch_reg - numLogicalIntRegs; - + // Record the current physical register that is renamed to the + // requested architected register. + prev_reg = floatRenameMap[arch_reg].physical_reg; + - // If it's not referencing the zero register, then mark the register - // as not ready. ++ // If it's not referencing the zero register, then rename the ++ // register. + if (arch_reg != floatZeroReg) { - // Get a free floating point register to rename to. + renamed_reg = freeList->getFloatReg(); + - // Update the floating point rename map. + floatRenameMap[arch_reg].physical_reg = renamed_reg; + + assert(renamed_reg < numPhysicalRegs && + renamed_reg >= numPhysicalIntRegs); - - // Mark register as not ready. - floatScoreboard[renamed_reg] = false; + } else { + // Otherwise return the zero register so nothing bad happens. + renamed_reg = floatZeroReg; + } + } else { + // Subtract off the base offset for miscellaneous registers. + arch_reg = arch_reg - numLogicalRegs; + - // No renaming happens to the misc. registers. They are simply the - // registers that come after all the physical registers; thus - // take the base architected register and add the physical registers - // to it. ++ // No renaming happens to the misc. registers. They are ++ // simply the registers that come after all the physical ++ // registers; thus take the base architected register and add ++ // the physical registers to it. + renamed_reg = arch_reg + numPhysicalRegs; + + // Set the previous register to the same register; mainly it must be + // known that the prev reg was outside the range of normal registers + // so the free list can avoid adding it. + prev_reg = renamed_reg; + + assert(renamed_reg < numPhysicalRegs + numMiscRegs); - - miscScoreboard[renamed_reg] = false; + } + + return RenameInfo(renamed_reg, prev_reg); +} + - //Perhaps give this a pair as a return value, of the physical register - //and whether or not it's ready. +PhysRegIndex +SimpleRenameMap::lookup(RegIndex arch_reg) +{ + if (arch_reg < numLogicalIntRegs) { + return intRenameMap[arch_reg].physical_reg; + } else if (arch_reg < numLogicalRegs) { - // Subtract off the base FP offset. - // arch_reg = arch_reg - numLogicalIntRegs; - + return floatRenameMap[arch_reg].physical_reg; + } else { + // Subtract off the misc registers offset. + arch_reg = arch_reg - numLogicalRegs; + + // Misc. regs don't rename, so simply add the base arch reg to + // the number of physical registers. + return numPhysicalRegs + arch_reg; + } +} + - bool - SimpleRenameMap::isReady(PhysRegIndex phys_reg) - { - if (phys_reg < numPhysicalIntRegs) { - return intScoreboard[phys_reg]; - } else if (phys_reg < numPhysicalRegs) { - - // Subtract off the base FP offset. - // phys_reg = phys_reg - numPhysicalIntRegs; - - return floatScoreboard[phys_reg]; - } else { - // Subtract off the misc registers offset. - // phys_reg = phys_reg - numPhysicalRegs; - - return miscScoreboard[phys_reg]; - } - } - - // In this implementation the miscellaneous registers do not actually rename, - // so this function does not allow you to try to change their mappings. +void +SimpleRenameMap::setEntry(RegIndex arch_reg, PhysRegIndex renamed_reg) +{ ++ // In this implementation the miscellaneous registers do not ++ // actually rename, so this function does not allow you to try to ++ // change their mappings. + if (arch_reg < numLogicalIntRegs) { + DPRINTF(Rename, "Rename Map: Integer register %i being set to %i.\n", + (int)arch_reg, renamed_reg); + + intRenameMap[arch_reg].physical_reg = renamed_reg; - } else { - assert(arch_reg < (numLogicalIntRegs + numLogicalFloatRegs)); - ++ } else if (arch_reg < numLogicalIntRegs + numLogicalFloatRegs) { + DPRINTF(Rename, "Rename Map: Float register %i being set to %i.\n", + (int)arch_reg - numLogicalIntRegs, renamed_reg); + + floatRenameMap[arch_reg].physical_reg = renamed_reg; + } +} + - void - SimpleRenameMap::squash(vector freed_regs, - vector unmaps) - { - panic("Not sure this function should be called."); - - // Not sure the rename map should be able to access the free list - // like this. - while (!freed_regs.empty()) { - RegIndex free_register = freed_regs.back(); - - if (free_register < numPhysicalIntRegs) { - freeList->addIntReg(free_register); - } else { - // Subtract off the base FP dependence tag. - free_register = free_register - numPhysicalIntRegs; - freeList->addFloatReg(free_register); - } - - freed_regs.pop_back(); - } - - // Take unmap info and roll back the rename map. - } - - void - SimpleRenameMap::markAsReady(PhysRegIndex ready_reg) - { - DPRINTF(Rename, "Rename map: Marking register %i as ready.\n", - (int)ready_reg); - - if (ready_reg < numPhysicalIntRegs) { - assert(ready_reg >= 0); - - intScoreboard[ready_reg] = 1; - } else if (ready_reg < numPhysicalRegs) { - - // Subtract off the base FP offset. - // ready_reg = ready_reg - numPhysicalIntRegs; - - floatScoreboard[ready_reg] = 1; - } else { - //Subtract off the misc registers offset. - // ready_reg = ready_reg - numPhysicalRegs; - - miscScoreboard[ready_reg] = 1; - } - } - +int +SimpleRenameMap::numFreeEntries() +{ + int free_int_regs = freeList->numFreeIntRegs(); + int free_float_regs = freeList->numFreeFloatRegs(); + + if (free_int_regs < free_float_regs) { + return free_int_regs; + } else { + return free_float_regs; + } +} diff --cc src/cpu/o3/rename_map.hh index 57be4a64a,000000000..d7e49ae83 mode 100644,000000..100644 --- a/src/cpu/o3/rename_map.hh +++ b/src/cpu/o3/rename_map.hh @@@ -1,173 -1,0 +1,165 @@@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +// Todo: Create destructor. +// Have it so that there's a more meaningful name given to the variable +// that marks the beginning of the FP registers. + - #ifndef __CPU_O3_CPU_RENAME_MAP_HH__ - #define __CPU_O3_CPU_RENAME_MAP_HH__ ++#ifndef __CPU_O3_RENAME_MAP_HH__ ++#define __CPU_O3_RENAME_MAP_HH__ + +#include +#include +#include + +#include "cpu/o3/free_list.hh" +//For RegIndex +#include "arch/isa_traits.hh" + +class SimpleRenameMap +{ + protected: + typedef TheISA::RegIndex RegIndex; + public: + /** + * Pair of a logical register and a physical register. Tells the + * previous mapping of a logical register to a physical register. + * Used to roll back the rename map to a previous state. + */ + typedef std::pair UnmapInfo; + + /** + * Pair of a physical register and a physical register. Used to + * return the physical register that a logical register has been + * renamed to, and the previous physical register that the same + * logical register was previously mapped to. + */ + typedef std::pair RenameInfo; + + public: + //Constructor - SimpleRenameMap(unsigned _numLogicalIntRegs, - unsigned _numPhysicalIntRegs, - unsigned _numLogicalFloatRegs, - unsigned _numPhysicalFloatRegs, - unsigned _numMiscRegs, - RegIndex _intZeroReg, - RegIndex _floatZeroReg); ++ SimpleRenameMap() {}; + + /** Destructor. */ + ~SimpleRenameMap(); + ++ void init(unsigned _numLogicalIntRegs, ++ unsigned _numPhysicalIntRegs, ++ PhysRegIndex &_int_reg_start, ++ ++ unsigned _numLogicalFloatRegs, ++ unsigned _numPhysicalFloatRegs, ++ PhysRegIndex &_float_reg_start, ++ ++ unsigned _numMiscRegs, ++ ++ RegIndex _intZeroReg, ++ RegIndex _floatZeroReg, ++ ++ int id, ++ bool bindRegs); ++ + void setFreeList(SimpleFreeList *fl_ptr); + + //Tell rename map to get a free physical register for a given + //architected register. Not sure it should have a return value, + //but perhaps it should have some sort of fault in case there are + //no free registers. + RenameInfo rename(RegIndex arch_reg); + + PhysRegIndex lookup(RegIndex phys_reg); + - bool isReady(PhysRegIndex arch_reg); - + /** + * Marks the given register as ready, meaning that its value has been + * calculated and written to the register file. + * @param ready_reg The index of the physical register that is now ready. + */ - void markAsReady(PhysRegIndex ready_reg); - + void setEntry(RegIndex arch_reg, PhysRegIndex renamed_reg); + - void squash(std::vector freed_regs, - std::vector unmaps); - + int numFreeEntries(); + + private: ++ /** Rename Map ID */ ++ int id; ++ + /** Number of logical integer registers. */ + int numLogicalIntRegs; + + /** Number of physical integer registers. */ + int numPhysicalIntRegs; + + /** Number of logical floating point registers. */ + int numLogicalFloatRegs; + + /** Number of physical floating point registers. */ + int numPhysicalFloatRegs; + + /** Number of miscellaneous registers. */ + int numMiscRegs; + + /** Number of logical integer + float registers. */ + int numLogicalRegs; + + /** Number of physical integer + float registers. */ + int numPhysicalRegs; + + /** The integer zero register. This implementation assumes it is always + * zero and never can be anything else. + */ + RegIndex intZeroReg; + + /** The floating point zero register. This implementation assumes it is + * always zero and never can be anything else. + */ + RegIndex floatZeroReg; + + class RenameEntry + { + public: + PhysRegIndex physical_reg; + bool valid; + + RenameEntry() + : physical_reg(0), valid(false) + { } + }; + ++ //Change this to private ++ private: + /** Integer rename map. */ - RenameEntry *intRenameMap; ++ std::vector intRenameMap; + + /** Floating point rename map. */ - RenameEntry *floatRenameMap; ++ std::vector floatRenameMap; + ++ private: + /** Free list interface. */ + SimpleFreeList *freeList; - - // Might want to make all these scoreboards into one large scoreboard. - - /** Scoreboard of physical integer registers, saying whether or not they - * are ready. - */ - std::vector intScoreboard; - - /** Scoreboard of physical floating registers, saying whether or not they - * are ready. - */ - std::vector floatScoreboard; - - /** Scoreboard of miscellaneous registers, saying whether or not they - * are ready. - */ - std::vector miscScoreboard; +}; + - #endif //__CPU_O3_CPU_RENAME_MAP_HH__ ++#endif //__CPU_O3_RENAME_MAP_HH__ diff --cc src/cpu/o3/rob.hh index 1185564ad,000000000..e05eebe5a mode 100644,000000..100644 --- a/src/cpu/o3/rob.hh +++ b/src/cpu/o3/rob.hh @@@ -1,164 -1,0 +1,314 @@@ +/* - * Copyright (c) 2004-2005 The Regents of The University of Michigan ++ * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + - // Todo: Probably add in support for scheduling events (more than one as - // well) on the case of the ROB being empty or full. Considering tracking - // free entries instead of insts in ROB. Differentiate between squashing - // all instructions after the instruction, and all instructions after *and* - // including that instruction. - - #ifndef __CPU_O3_CPU_ROB_HH__ - #define __CPU_O3_CPU_ROB_HH__ ++#ifndef __CPU_O3_ROB_HH__ ++#define __CPU_O3_ROB_HH__ + ++#include +#include +#include + +/** - * ROB class. Uses the instruction list that exists within the CPU to - * represent the ROB. This class doesn't contain that list, but instead - * a pointer to the CPU to get access to the list. The ROB, in this first - * implementation, is largely what drives squashing. ++ * ROB class. The ROB is largely what drives squashing. + */ +template +class ROB +{ + protected: + typedef TheISA::RegIndex RegIndex; + public: + //Typedefs from the Impl. + typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::DynInstPtr DynInstPtr; + - typedef std::pair UnmapInfo_t; - typedef typename list::iterator InstIt_t; ++ typedef std::pair UnmapInfo; ++ typedef typename std::list::iterator InstIt; ++ ++ /** Possible ROB statuses. */ ++ enum Status { ++ Running, ++ Idle, ++ ROBSquashing ++ }; ++ ++ /** SMT ROB Sharing Policy */ ++ enum ROBPolicy{ ++ Dynamic, ++ Partitioned, ++ Threshold ++ }; ++ ++ private: ++ /** Per-thread ROB status. */ ++ Status robStatus[Impl::MaxThreads]; ++ ++ /** ROB resource sharing policy for SMT mode. */ ++ ROBPolicy robPolicy; + + public: + /** ROB constructor. - * @param _numEntries Number of entries in ROB. - * @param _squashWidth Number of instructions that can be squashed in a - * single cycle. ++ * @param _numEntries Number of entries in ROB. ++ * @param _squashWidth Number of instructions that can be squashed in a ++ * single cycle. ++ * @param _smtROBPolicy ROB Partitioning Scheme for SMT. ++ * @param _smtROBThreshold Max Resources(by %) a thread can have in the ROB. ++ * @param _numThreads The number of active threads. + */ - ROB(unsigned _numEntries, unsigned _squashWidth); ++ ROB(unsigned _numEntries, unsigned _squashWidth, std::string smtROBPolicy, ++ unsigned _smtROBThreshold, unsigned _numThreads); ++ ++ std::string name() const; + + /** Function to set the CPU pointer, necessary due to which object the ROB + * is created within. + * @param cpu_ptr Pointer to the implementation specific full CPU object. + */ + void setCPU(FullCPU *cpu_ptr); + - /** Function to insert an instruction into the ROB. The parameter inst is - * not truly required, but is useful for checking correctness. Note - * that whatever calls this function must ensure that there is enough - * space within the ROB for the new instruction. ++ /** Sets pointer to the list of active threads. ++ * @param at_ptr Pointer to the list of active threads. ++ */ ++ void setActiveThreads(std::list* at_ptr); ++ ++ void switchOut(); ++ ++ void takeOverFrom(); ++ ++ /** Function to insert an instruction into the ROB. Note that whatever ++ * calls this function must ensure that there is enough space within the ++ * ROB for the new instruction. + * @param inst The instruction being inserted into the ROB. - * @todo Remove the parameter once correctness is ensured. + */ + void insertInst(DynInstPtr &inst); + + /** Returns pointer to the head instruction within the ROB. There is + * no guarantee as to the return value if the ROB is empty. + * @retval Pointer to the DynInst that is at the head of the ROB. + */ - DynInstPtr readHeadInst() { return cpu->instList.front(); } ++// DynInstPtr readHeadInst(); ++ ++ /** Returns a pointer to the head instruction of a specific thread within ++ * the ROB. ++ * @return Pointer to the DynInst that is at the head of the ROB. ++ */ ++ DynInstPtr readHeadInst(unsigned tid); ++ ++ /** Returns pointer to the tail instruction within the ROB. There is ++ * no guarantee as to the return value if the ROB is empty. ++ * @retval Pointer to the DynInst that is at the tail of the ROB. ++ */ ++// DynInstPtr readTailInst(); ++ ++ /** Returns a pointer to the tail instruction of a specific thread within ++ * the ROB. ++ * @return Pointer to the DynInst that is at the tail of the ROB. ++ */ ++ DynInstPtr readTailInst(unsigned tid); ++ ++ /** Retires the head instruction, removing it from the ROB. */ ++// void retireHead(); ++ ++ /** Retires the head instruction of a specific thread, removing it from the ++ * ROB. ++ */ ++ void retireHead(unsigned tid); + - DynInstPtr readTailInst() { return (*tail); } ++ /** Is the oldest instruction across all threads ready. */ ++// bool isHeadReady(); + - void retireHead(); ++ /** Is the oldest instruction across a particular thread ready. */ ++ bool isHeadReady(unsigned tid); + - bool isHeadReady(); ++ /** Is there any commitable head instruction across all threads ready. */ ++ bool canCommit(); + ++ /** Re-adjust ROB partitioning. */ ++ void resetEntries(); ++ ++ /** Number of entries needed For 'num_threads' amount of threads. */ ++ int entryAmount(int num_threads); ++ ++ /** Returns the number of total free entries in the ROB. */ + unsigned numFreeEntries(); + ++ /** Returns the number of free entries in a specific ROB paritition. */ ++ unsigned numFreeEntries(unsigned tid); ++ ++ /** Returns the maximum number of entries for a specific thread. */ ++ unsigned getMaxEntries(unsigned tid) ++ { return maxEntries[tid]; } ++ ++ /** Returns the number of entries being used by a specific thread. */ ++ unsigned getThreadEntries(unsigned tid) ++ { return threadEntries[tid]; } ++ ++ /** Returns if the ROB is full. */ + bool isFull() + { return numInstsInROB == numEntries; } + ++ /** Returns if a specific thread's partition is full. */ ++ bool isFull(unsigned tid) ++ { return threadEntries[tid] == numEntries; } ++ ++ /** Returns if the ROB is empty. */ + bool isEmpty() + { return numInstsInROB == 0; } + - void doSquash(); ++ /** Returns if a specific thread's partition is empty. */ ++ bool isEmpty(unsigned tid) ++ { return threadEntries[tid] == 0; } + - void squash(InstSeqNum squash_num); ++ /** Executes the squash, marking squashed instructions. */ ++ void doSquash(unsigned tid); + - uint64_t readHeadPC(); ++ /** Squashes all instructions younger than the given sequence number for ++ * the specific thread. ++ */ ++ void squash(InstSeqNum squash_num, unsigned tid); + - uint64_t readHeadNextPC(); ++ /** Updates the head instruction with the new oldest instruction. */ ++ void updateHead(); + - InstSeqNum readHeadSeqNum(); ++ /** Updates the tail instruction with the new youngest instruction. */ ++ void updateTail(); + - uint64_t readTailPC(); ++ /** Reads the PC of the oldest head instruction. */ ++// uint64_t readHeadPC(); + - InstSeqNum readTailSeqNum(); ++ /** Reads the PC of the head instruction of a specific thread. */ ++// uint64_t readHeadPC(unsigned tid); ++ ++ /** Reads the next PC of the oldest head instruction. */ ++// uint64_t readHeadNextPC(); ++ ++ /** Reads the next PC of the head instruction of a specific thread. */ ++// uint64_t readHeadNextPC(unsigned tid); ++ ++ /** Reads the sequence number of the oldest head instruction. */ ++// InstSeqNum readHeadSeqNum(); ++ ++ /** Reads the sequence number of the head instruction of a specific thread. ++ */ ++// InstSeqNum readHeadSeqNum(unsigned tid); ++ ++ /** Reads the PC of the youngest tail instruction. */ ++// uint64_t readTailPC(); ++ ++ /** Reads the PC of the tail instruction of a specific thread. */ ++// uint64_t readTailPC(unsigned tid); ++ ++ /** Reads the sequence number of the youngest tail instruction. */ ++// InstSeqNum readTailSeqNum(); ++ ++ /** Reads the sequence number of tail instruction of a specific thread. */ ++// InstSeqNum readTailSeqNum(unsigned tid); + + /** Checks if the ROB is still in the process of squashing instructions. + * @retval Whether or not the ROB is done squashing. + */ - bool isDoneSquashing() const { return doneSquashing; } ++ bool isDoneSquashing(unsigned tid) const ++ { return doneSquashing[tid]; } ++ ++ /** Checks if the ROB is still in the process of squashing instructions for ++ * any thread. ++ */ ++ bool isDoneSquashing(); + + /** This is more of a debugging function than anything. Use + * numInstsInROB to get the instructions in the ROB unless you are + * double checking that variable. + */ + int countInsts(); + - private: ++ /** This is more of a debugging function than anything. Use ++ * threadEntries to get the instructions in the ROB unless you are ++ * double checking that variable. ++ */ ++ int countInsts(unsigned tid); + ++ private: + /** Pointer to the CPU. */ + FullCPU *cpu; + ++ /** Active Threads in CPU */ ++ std::list* activeThreads; ++ + /** Number of instructions in the ROB. */ + unsigned numEntries; + ++ /** Entries Per Thread */ ++ unsigned threadEntries[Impl::MaxThreads]; ++ ++ /** Max Insts a Thread Can Have in the ROB */ ++ unsigned maxEntries[Impl::MaxThreads]; ++ ++ /** ROB List of Instructions */ ++ std::list instList[Impl::MaxThreads]; ++ + /** Number of instructions that can be squashed in a single cycle. */ + unsigned squashWidth; + ++ public: + /** Iterator pointing to the instruction which is the last instruction + * in the ROB. This may at times be invalid (ie when the ROB is empty), + * however it should never be incorrect. + */ - InstIt_t tail; ++ InstIt tail; + ++ /** Iterator pointing to the instruction which is the first instruction in ++ * in the ROB*/ ++ InstIt head; ++ ++ private: + /** Iterator used for walking through the list of instructions when + * squashing. Used so that there is persistent state between cycles; + * when squashing, the instructions are marked as squashed but not + * immediately removed, meaning the tail iterator remains the same before + * and after a squash. + * This will always be set to cpu->instList.end() if it is invalid. + */ - InstIt_t squashIt; ++ InstIt squashIt[Impl::MaxThreads]; + ++ public: + /** Number of instructions in the ROB. */ + int numInstsInROB; + ++ DynInstPtr dummyInst; ++ ++ private: + /** The sequence number of the squashed instruction. */ + InstSeqNum squashedSeqNum; + + /** Is the ROB done squashing. */ - bool doneSquashing; ++ bool doneSquashing[Impl::MaxThreads]; ++ ++ /** Number of active threads. */ ++ unsigned numThreads; +}; + - #endif //__CPU_O3_CPU_ROB_HH__ ++#endif //__CPU_O3_ROB_HH__ diff --cc src/cpu/o3/rob_impl.hh index e7a5671d9,000000000..25e0c80fd mode 100644,000000..100644 --- a/src/cpu/o3/rob_impl.hh +++ b/src/cpu/o3/rob_impl.hh @@@ -1,312 -1,0 +1,691 @@@ +/* - * Copyright (c) 2004-2005 The Regents of The University of Michigan ++ * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + - #ifndef __CPU_O3_CPU_ROB_IMPL_HH__ - #define __CPU_O3_CPU_ROB_IMPL_HH__ - +#include "config/full_system.hh" +#include "cpu/o3/rob.hh" + ++using namespace std; ++ +template - ROB::ROB(unsigned _numEntries, unsigned _squashWidth) ++ROB::ROB(unsigned _numEntries, unsigned _squashWidth, ++ string _smtROBPolicy, unsigned _smtROBThreshold, ++ unsigned _numThreads) + : numEntries(_numEntries), + squashWidth(_squashWidth), + numInstsInROB(0), - squashedSeqNum(0) ++ squashedSeqNum(0), ++ numThreads(_numThreads) ++{ ++ for (int tid=0; tid < numThreads; tid++) { ++ doneSquashing[tid] = true; ++ threadEntries[tid] = 0; ++ } ++ ++ string policy = _smtROBPolicy; ++ ++ //Convert string to lowercase ++ std::transform(policy.begin(), policy.end(), policy.begin(), ++ (int(*)(int)) tolower); ++ ++ //Figure out rob policy ++ if (policy == "dynamic") { ++ robPolicy = Dynamic; ++ ++ //Set Max Entries to Total ROB Capacity ++ for (int i = 0; i < numThreads; i++) { ++ maxEntries[i]=numEntries; ++ } ++ ++ } else if (policy == "partitioned") { ++ robPolicy = Partitioned; ++ DPRINTF(Fetch, "ROB sharing policy set to Partitioned\n"); ++ ++ //@todo:make work if part_amt doesnt divide evenly. ++ int part_amt = numEntries / numThreads; ++ ++ //Divide ROB up evenly ++ for (int i = 0; i < numThreads; i++) { ++ maxEntries[i]=part_amt; ++ } ++ ++ } else if (policy == "threshold") { ++ robPolicy = Threshold; ++ DPRINTF(Fetch, "ROB sharing policy set to Threshold\n"); ++ ++ int threshold = _smtROBThreshold;; ++ ++ //Divide up by threshold amount ++ for (int i = 0; i < numThreads; i++) { ++ maxEntries[i]=threshold; ++ } ++ } else { ++ assert(0 && "Invalid ROB Sharing Policy.Options Are:{Dynamic," ++ "Partitioned, Threshold}"); ++ } ++} ++ ++template ++std::string ++ROB::name() const +{ - doneSquashing = true; ++ return cpu->name() + ".rob"; +} + +template +void +ROB::setCPU(FullCPU *cpu_ptr) +{ + cpu = cpu_ptr; + - // Set the tail to the beginning of the CPU instruction list so that - // upon the first instruction being inserted into the ROB, the tail - // iterator can simply be incremented. - tail = cpu->instList.begin(); ++ // Set the per-thread iterators to the end of the instruction list. ++ for (int i=0; i < numThreads;i++) { ++ squashIt[i] = instList[i].end(); ++ } + - // Set the squash iterator to the end of the instruction list. - squashIt = cpu->instList.end(); ++ // Initialize the "universal" ROB head & tail point to invalid ++ // pointers ++ head = instList[0].end(); ++ tail = instList[0].end(); +} + +template - int - ROB::countInsts() ++void ++ROB::setActiveThreads(list *at_ptr) +{ - // Start at 1; if the tail matches cpu->instList.begin(), then there is - // one inst in the ROB. - int return_val = 1; ++ DPRINTF(ROB, "Setting active threads list pointer.\n"); ++ activeThreads = at_ptr; ++} + - // There are quite a few special cases. Do not use this function other - // than for debugging purposes. - if (cpu->instList.begin() == cpu->instList.end()) { - // In this case there are no instructions in the list. The ROB - // must be empty. - return 0; - } else if (tail == cpu->instList.end()) { - // In this case, the tail is not yet pointing to anything valid. - // The ROB must be empty. - return 0; ++template ++void ++ROB::switchOut() ++{ ++ for (int tid = 0; tid < numThreads; tid++) { ++ instList[tid].clear(); + } ++} + - // Iterate through the ROB from the head to the tail, counting the - // entries. - for (InstIt_t i = cpu->instList.begin(); i != tail; ++i) - { - assert(i != cpu->instList.end()); - ++return_val; ++template ++void ++ROB::takeOverFrom() ++{ ++ for (int tid=0; tid < numThreads; tid++) { ++ doneSquashing[tid] = true; ++ threadEntries[tid] = 0; ++ squashIt[tid] = instList[tid].end(); ++ } ++ numInstsInROB = 0; ++ ++ // Initialize the "universal" ROB head & tail point to invalid ++ // pointers ++ head = instList[0].end(); ++ tail = instList[0].end(); ++} ++ ++template ++void ++ROB::resetEntries() ++{ ++ if (robPolicy != Dynamic || numThreads > 1) { ++ int active_threads = (*activeThreads).size(); ++ ++ list::iterator threads = (*activeThreads).begin(); ++ list::iterator list_end = (*activeThreads).end(); ++ ++ while (threads != list_end) { ++ if (robPolicy == Partitioned) { ++ maxEntries[*threads++] = numEntries / active_threads; ++ } else if (robPolicy == Threshold && active_threads == 1) { ++ maxEntries[*threads++] = numEntries; ++ } ++ } + } ++} ++ ++template ++int ++ROB::entryAmount(int num_threads) ++{ ++ if (robPolicy == Partitioned) { ++ return numEntries / num_threads; ++ } else { ++ return 0; ++ } ++} ++ ++template ++int ++ROB::countInsts() ++{ ++ int total=0; + - return return_val; ++ for (int i=0;i < numThreads;i++) ++ total += countInsts(i); + - // Because the head won't be tracked properly until the ROB gets the - // first instruction, and any time that the ROB is empty and has not - // yet gotten the instruction, this function doesn't work. - // return numInstsInROB; ++ return total; ++} ++ ++template ++int ++ROB::countInsts(unsigned tid) ++{ ++ return instList[tid].size(); +} + +template +void +ROB::insertInst(DynInstPtr &inst) +{ - // Make sure we have the right number of instructions. - assert(numInstsInROB == countInsts()); - // Make sure the instruction is valid. ++ //assert(numInstsInROB == countInsts()); + assert(inst); + - DPRINTF(ROB, "ROB: Adding inst PC %#x to the ROB.\n", inst->readPC()); ++ DPRINTF(ROB, "Adding inst PC %#x to the ROB.\n", inst->readPC()); + - // If the ROB is full then exit. + assert(numInstsInROB != numEntries); + - ++numInstsInROB; ++ int tid = inst->threadNumber; + - // Increment the tail iterator, moving it one instruction back. - // There is a special case if the ROB was empty prior to this insertion, - // in which case the tail will be pointing at instList.end(). If that - // happens, then reset the tail to the beginning of the list. - if (tail != cpu->instList.end()) { - ++tail; - } else { - tail = cpu->instList.begin(); ++ instList[tid].push_back(inst); ++ ++ //Set Up head iterator if this is the 1st instruction in the ROB ++ if (numInstsInROB == 0) { ++ head = instList[tid].begin(); ++ assert((*head) == inst); + } + - // Make sure the tail iterator is actually pointing at the instruction - // added. - assert((*tail) == inst); ++ //Must Decrement for iterator to actually be valid since __.end() ++ //actually points to 1 after the last inst ++ tail = instList[tid].end(); ++ tail--; + - DPRINTF(ROB, "ROB: Now has %d instructions.\n", numInstsInROB); ++ inst->setInROB(); ++ ++ ++numInstsInROB; ++ ++threadEntries[tid]; + ++ assert((*tail) == inst); ++ ++ DPRINTF(ROB, "[tid:%i] Now has %d instructions.\n", tid, threadEntries[tid]); +} + +// Whatever calls this function needs to ensure that it properly frees up +// registers prior to this function. ++/* +template +void +ROB::retireHead() +{ - assert(numInstsInROB == countInsts()); ++ //assert(numInstsInROB == countInsts()); ++ assert(numInstsInROB > 0); ++ ++ int tid = (*head)->threadNumber; ++ ++ retireHead(tid); ++ ++ if (numInstsInROB == 0) { ++ tail = instList[tid].end(); ++ } ++} ++*/ ++ ++template ++void ++ROB::retireHead(unsigned tid) ++{ ++ //assert(numInstsInROB == countInsts()); + assert(numInstsInROB > 0); + + // Get the head ROB instruction. - DynInstPtr head_inst = cpu->instList.front(); ++ InstIt head_it = instList[tid].begin(); ++ ++ DynInstPtr head_inst = (*head_it); + - // Make certain this can retire. + assert(head_inst->readyToCommit()); + - DPRINTF(ROB, "ROB: Retiring head instruction of the ROB, " - "instruction PC %#x, seq num %i\n", head_inst->readPC(), ++ DPRINTF(ROB, "[tid:%u]: Retiring head instruction, " ++ "instruction PC %#x,[sn:%lli]\n", tid, head_inst->readPC(), + head_inst->seqNum); + - // Keep track of how many instructions are in the ROB. + --numInstsInROB; ++ --threadEntries[tid]; ++ ++ head_inst->removeInROB(); ++ head_inst->setCommitted(); ++ ++ instList[tid].erase(head_it); + - // Tell CPU to remove the instruction from the list of instructions. - // A special case is needed if the instruction being retired is the - // only instruction in the ROB; otherwise the tail iterator will become - // invalidated. ++ //Update "Global" Head of ROB ++ updateHead(); ++ ++ // @todo: A special case is needed if the instruction being ++ // retired is the only instruction in the ROB; otherwise the tail ++ // iterator will become invalidated. + cpu->removeFrontInst(head_inst); ++} ++/* ++template ++bool ++ROB::isHeadReady() ++{ ++ if (numInstsInROB != 0) { ++ return (*head)->readyToCommit(); ++ } + - if (numInstsInROB == 0) { - tail = cpu->instList.end(); ++ return false; ++} ++*/ ++template ++bool ++ROB::isHeadReady(unsigned tid) ++{ ++ if (threadEntries[tid] != 0) { ++ return instList[tid].front()->readyToCommit(); + } ++ ++ return false; +} + +template +bool - ROB::isHeadReady() ++ROB::canCommit() +{ - if (numInstsInROB != 0) { - return cpu->instList.front()->readyToCommit(); ++ //@todo: set ActiveThreads through ROB or CPU ++ list::iterator threads = (*activeThreads).begin(); ++ ++ while (threads != (*activeThreads).end()) { ++ unsigned tid = *threads++; ++ ++ if (isHeadReady(tid)) { ++ return true; ++ } + } + + return false; +} + +template +unsigned +ROB::numFreeEntries() +{ - assert(numInstsInROB == countInsts()); ++ //assert(numInstsInROB == countInsts()); + + return numEntries - numInstsInROB; +} + ++template ++unsigned ++ROB::numFreeEntries(unsigned tid) ++{ ++ return maxEntries[tid] - threadEntries[tid]; ++} ++ +template +void - ROB::doSquash() ++ROB::doSquash(unsigned tid) +{ - DPRINTF(ROB, "ROB: Squashing instructions.\n"); ++ DPRINTF(ROB, "[tid:%u]: Squashing instructions until [sn:%i].\n", ++ tid, squashedSeqNum); ++ ++ assert(squashIt[tid] != instList[tid].end()); ++ ++ if ((*squashIt[tid])->seqNum < squashedSeqNum) { ++ DPRINTF(ROB, "[tid:%u]: Done squashing instructions.\n", ++ tid); + - assert(squashIt != cpu->instList.end()); ++ squashIt[tid] = instList[tid].end(); ++ ++ doneSquashing[tid] = true; ++ return; ++ } ++ ++ bool robTailUpdate = false; + + for (int numSquashed = 0; - numSquashed < squashWidth && (*squashIt)->seqNum != squashedSeqNum; ++ numSquashed < squashWidth && ++ squashIt[tid] != instList[tid].end() && ++ (*squashIt[tid])->seqNum > squashedSeqNum; + ++numSquashed) + { - // Ensure that the instruction is younger. - assert((*squashIt)->seqNum > squashedSeqNum); - - DPRINTF(ROB, "ROB: Squashing instruction PC %#x, seq num %i.\n", - (*squashIt)->readPC(), (*squashIt)->seqNum); ++ DPRINTF(ROB, "[tid:%u]: Squashing instruction PC %#x, seq num %i.\n", ++ (*squashIt[tid])->threadNumber, ++ (*squashIt[tid])->readPC(), ++ (*squashIt[tid])->seqNum); + + // Mark the instruction as squashed, and ready to commit so that + // it can drain out of the pipeline. - (*squashIt)->setSquashed(); - - (*squashIt)->setCanCommit(); - - // Special case for when squashing due to a syscall. It's possible - // that the squash happened after the head instruction was already - // committed, meaning that (*squashIt)->seqNum != squashedSeqNum - // will never be false. Normally the squash would never be able - // to go past the head of the ROB; in this case it might, so it - // must be handled otherwise it will segfault. - #if !FULL_SYSTEM - if (squashIt == cpu->instList.begin()) { - DPRINTF(ROB, "ROB: Reached head of instruction list while " ++ (*squashIt[tid])->setSquashed(); ++ ++ (*squashIt[tid])->setCanCommit(); ++ ++ ++ if (squashIt[tid] == instList[tid].begin()) { ++ DPRINTF(ROB, "Reached head of instruction list while " + "squashing.\n"); + - squashIt = cpu->instList.end(); ++ squashIt[tid] = instList[tid].end(); + - doneSquashing = true; ++ doneSquashing[tid] = true; + + return; + } - #endif + - // Move the tail iterator to the next instruction. - squashIt--; ++ InstIt tail_thread = instList[tid].end(); ++ tail_thread--; ++ ++ if ((*squashIt[tid]) == (*tail_thread)) ++ robTailUpdate = true; ++ ++ squashIt[tid]--; + } + + + // Check if ROB is done squashing. - if ((*squashIt)->seqNum == squashedSeqNum) { - DPRINTF(ROB, "ROB: Done squashing instructions.\n"); ++ if ((*squashIt[tid])->seqNum <= squashedSeqNum) { ++ DPRINTF(ROB, "[tid:%u]: Done squashing instructions.\n", ++ tid); ++ ++ squashIt[tid] = instList[tid].end(); ++ ++ doneSquashing[tid] = true; ++ } ++ ++ if (robTailUpdate) { ++ updateTail(); ++ } ++} ++ ++ ++template ++void ++ROB::updateHead() ++{ ++ DynInstPtr head_inst; ++ InstSeqNum lowest_num = 0; ++ bool first_valid = true; ++ ++ // @todo: set ActiveThreads through ROB or CPU ++ list::iterator threads = (*activeThreads).begin(); ++ ++ while (threads != (*activeThreads).end()) { ++ unsigned thread_num = *threads++; ++ ++ if (instList[thread_num].empty()) ++ continue; ++ ++ if (first_valid) { ++ head = instList[thread_num].begin(); ++ lowest_num = (*head)->seqNum; ++ first_valid = false; ++ continue; ++ } ++ ++ InstIt head_thread = instList[thread_num].begin(); ++ ++ DynInstPtr head_inst = (*head_thread); ++ ++ assert(head_inst != 0); ++ ++ if (head_inst->seqNum < lowest_num) { ++ head = head_thread; ++ lowest_num = head_inst->seqNum; ++ } ++ } ++ ++ if (first_valid) { ++ head = instList[0].end(); ++ } + - squashIt = cpu->instList.end(); ++} ++ ++template ++void ++ROB::updateTail() ++{ ++ tail = instList[0].end(); ++ bool first_valid = true; ++ ++ list::iterator threads = (*activeThreads).begin(); ++ ++ while (threads != (*activeThreads).end()) { ++ unsigned tid = *threads++; ++ ++ if (instList[tid].empty()) { ++ continue; ++ } ++ ++ // If this is the first valid then assign w/out ++ // comparison ++ if (first_valid) { ++ tail = instList[tid].end(); ++ tail--; ++ first_valid = false; ++ continue; ++ } + - doneSquashing = true; ++ // Assign new tail if this thread's tail is younger ++ // than our current "tail high" ++ InstIt tail_thread = instList[tid].end(); ++ tail_thread--; ++ ++ if ((*tail_thread)->seqNum > (*tail)->seqNum) { ++ tail = tail_thread; ++ } + } +} + ++ +template +void - ROB::squash(InstSeqNum squash_num) ++ROB::squash(InstSeqNum squash_num,unsigned tid) +{ - DPRINTF(ROB, "ROB: Starting to squash within the ROB.\n"); - doneSquashing = false; ++ if (isEmpty()) { ++ DPRINTF(ROB, "Does not need to squash due to being empty " ++ "[sn:%i]\n", ++ squash_num); ++ ++ return; ++ } ++ ++ DPRINTF(ROB, "Starting to squash within the ROB.\n"); ++ ++ robStatus[tid] = ROBSquashing; ++ ++ doneSquashing[tid] = false; + + squashedSeqNum = squash_num; + - assert(tail != cpu->instList.end()); ++ if (!instList[tid].empty()) { ++ InstIt tail_thread = instList[tid].end(); ++ tail_thread--; + - squashIt = tail; ++ squashIt[tid] = tail_thread; + - doSquash(); ++ doSquash(tid); ++ } ++} ++/* ++template ++typename Impl::DynInstPtr ++ROB::readHeadInst() ++{ ++ if (numInstsInROB != 0) { ++ assert((*head)->isInROB()==true); ++ return *head; ++ } else { ++ return dummyInst; ++ } +} ++*/ ++template ++typename Impl::DynInstPtr ++ROB::readHeadInst(unsigned tid) ++{ ++ if (threadEntries[tid] != 0) { ++ InstIt head_thread = instList[tid].begin(); + ++ assert((*head_thread)->isInROB()==true); ++ ++ return *head_thread; ++ } else { ++ return dummyInst; ++ } ++} ++/* +template +uint64_t +ROB::readHeadPC() +{ - assert(numInstsInROB == countInsts()); ++ //assert(numInstsInROB == countInsts()); + - DynInstPtr head_inst = cpu->instList.front(); ++ DynInstPtr head_inst = *head; + + return head_inst->readPC(); +} + ++template ++uint64_t ++ROB::readHeadPC(unsigned tid) ++{ ++ //assert(numInstsInROB == countInsts()); ++ InstIt head_thread = instList[tid].begin(); ++ ++ return (*head_thread)->readPC(); ++} ++ ++ +template +uint64_t +ROB::readHeadNextPC() +{ - assert(numInstsInROB == countInsts()); ++ //assert(numInstsInROB == countInsts()); + - DynInstPtr head_inst = cpu->instList.front(); ++ DynInstPtr head_inst = *head; + + return head_inst->readNextPC(); +} + ++template ++uint64_t ++ROB::readHeadNextPC(unsigned tid) ++{ ++ //assert(numInstsInROB == countInsts()); ++ InstIt head_thread = instList[tid].begin(); ++ ++ return (*head_thread)->readNextPC(); ++} ++ +template +InstSeqNum +ROB::readHeadSeqNum() +{ - // Return the last sequence number that has not been squashed. Other - // stages can use it to squash any instructions younger than the current - // tail. - DynInstPtr head_inst = cpu->instList.front(); ++ //assert(numInstsInROB == countInsts()); ++ DynInstPtr head_inst = *head; + + return head_inst->seqNum; +} + ++template ++InstSeqNum ++ROB::readHeadSeqNum(unsigned tid) ++{ ++ InstIt head_thread = instList[tid].begin(); ++ ++ return ((*head_thread)->seqNum); ++} ++ ++template ++typename Impl::DynInstPtr ++ROB::readTailInst() ++{ ++ //assert(numInstsInROB == countInsts()); ++ //assert(tail != instList[0].end()); ++ ++ return (*tail); ++} ++*/ ++template ++typename Impl::DynInstPtr ++ROB::readTailInst(unsigned tid) ++{ ++ //assert(tail_thread[tid] != instList[tid].end()); ++ ++ InstIt tail_thread = instList[tid].end(); ++ tail_thread--; ++ ++ return *tail_thread; ++} ++ ++/* +template +uint64_t +ROB::readTailPC() +{ - assert(numInstsInROB == countInsts()); ++ //assert(numInstsInROB == countInsts()); + - assert(tail != cpu->instList.end()); ++ //assert(tail != instList[0].end()); + + return (*tail)->readPC(); +} + ++template ++uint64_t ++ROB::readTailPC(unsigned tid) ++{ ++ //assert(tail_thread[tid] != instList[tid].end()); ++ ++ InstIt tail_thread = instList[tid].end(); ++ tail_thread--; ++ ++ return (*tail_thread)->readPC(); ++} ++ +template +InstSeqNum +ROB::readTailSeqNum() +{ + // Return the last sequence number that has not been squashed. Other + // stages can use it to squash any instructions younger than the current + // tail. + return (*tail)->seqNum; +} + - #endif // __CPU_O3_CPU_ROB_IMPL_HH__ ++template ++InstSeqNum ++ROB::readTailSeqNum(unsigned tid) ++{ ++ // Return the last sequence number that has not been squashed. Other ++ // stages can use it to squash any instructions younger than the current ++ // tail. ++ // assert(tail_thread[tid] != instList[tid].end()); ++ ++ InstIt tail_thread = instList[tid].end(); ++ tail_thread--; ++ ++ return (*tail_thread)->seqNum; ++} ++*/ diff --cc src/cpu/o3/sat_counter.cc index d20fff650,000000000..b481b4ad2 mode 100644,000000..100644 --- a/src/cpu/o3/sat_counter.cc +++ b/src/cpu/o3/sat_counter.cc @@@ -1,71 -1,0 +1,55 @@@ +/* + * Copyright (c) 2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "base/misc.hh" +#include "cpu/o3/sat_counter.hh" + +SatCounter::SatCounter() - : maxVal(0), counter(0) ++ : initialVal(0), counter(0) +{ +} + +SatCounter::SatCounter(unsigned bits) - : maxVal((1 << bits) - 1), counter(0) ++ : initialVal(0), maxVal((1 << bits) - 1), counter(0) +{ +} + - SatCounter::SatCounter(unsigned bits, unsigned initial_val) - : maxVal((1 << bits) - 1), counter(initial_val) ++SatCounter::SatCounter(unsigned bits, uint8_t initial_val) ++ : initialVal(initialVal), maxVal((1 << bits) - 1), counter(initial_val) +{ + // Check to make sure initial value doesn't exceed the max counter value. + if (initial_val > maxVal) { - panic("BP: Initial counter value exceeds max size."); ++ fatal("BP: Initial counter value exceeds max size."); + } +} + +void +SatCounter::setBits(unsigned bits) +{ + maxVal = (1 << bits) - 1; +} - - void - SatCounter::increment() - { - if(counter < maxVal) { - ++counter; - } - } - - void - SatCounter::decrement() - { - if(counter > 0) { - --counter; - } - } diff --cc src/cpu/o3/sat_counter.hh index b7cfe6423,000000000..d01fd93ce mode 100644,000000..100644 --- a/src/cpu/o3/sat_counter.hh +++ b/src/cpu/o3/sat_counter.hh @@@ -1,90 -1,0 +1,113 @@@ +/* - * Copyright (c) 2005 The Regents of The University of Michigan ++ * Copyright (c) 2005-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + - #ifndef __CPU_O3_CPU_SAT_COUNTER_HH__ - #define __CPU_O3_CPU_SAT_COUNTER_HH__ ++#ifndef __CPU_O3_SAT_COUNTER_HH__ ++#define __CPU_O3_SAT_COUNTER_HH__ + +#include "sim/host.hh" + +/** + * Private counter class for the internal saturating counters. + * Implements an n bit saturating counter and provides methods to + * increment, decrement, and read it. + * @todo Consider making this something that more closely mimics a + * built in class so you can use ++ or --. + */ +class SatCounter +{ + public: + /** + * Constructor for the counter. + */ - SatCounter(); ++ SatCounter() ++ : initialVal(0), counter(0) ++ { } + + /** + * Constructor for the counter. + * @param bits How many bits the counter will have. + */ - SatCounter(unsigned bits); ++ SatCounter(unsigned bits) ++ : initialVal(0), maxVal((1 << bits) - 1), counter(0) ++ { } + + /** + * Constructor for the counter. + * @param bits How many bits the counter will have. + * @param initial_val Starting value for each counter. + */ - SatCounter(unsigned bits, unsigned initial_val); ++ SatCounter(unsigned bits, uint8_t initial_val) ++ : initialVal(initialVal), maxVal((1 << bits) - 1), counter(initial_val) ++ { ++ // Check to make sure initial value doesn't exceed the max ++ // counter value. ++ if (initial_val > maxVal) { ++ fatal("BP: Initial counter value exceeds max size."); ++ } ++ } + + /** + * Sets the number of bits. + */ - void setBits(unsigned bits); ++ void setBits(unsigned bits) { maxVal = (1 << bits) - 1; } ++ ++ void reset() { counter = initialVal; } + + /** + * Increments the counter's current value. + */ - void increment(); ++ void increment() ++ { ++ if (counter < maxVal) { ++ ++counter; ++ } ++ } + + /** + * Decrements the counter's current value. + */ - void decrement(); ++ void decrement() ++ { ++ if (counter > 0) { ++ --counter; ++ } ++ } + + /** + * Read the counter's value. + */ + const uint8_t read() const - { - return counter; - } ++ { return counter; } + + private: ++ uint8_t initialVal; + uint8_t maxVal; + uint8_t counter; +}; + - #endif // __CPU_O3_CPU_SAT_COUNTER_HH__ ++#endif // __CPU_O3_SAT_COUNTER_HH__ diff --cc src/cpu/o3/store_set.cc index 11023f4a8,000000000..0c957c8c7 mode 100644,000000..100644 --- a/src/cpu/o3/store_set.cc +++ b/src/cpu/o3/store_set.cc @@@ -1,282 -1,0 +1,320 @@@ +/* - * Copyright (c) 2004-2005 The Regents of The University of Michigan ++ * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "base/trace.hh" +#include "cpu/o3/store_set.hh" + +StoreSet::StoreSet(int _SSIT_size, int _LFST_size) - : SSIT_size(_SSIT_size), LFST_size(_LFST_size) ++ : SSITSize(_SSIT_size), LFSTSize(_LFST_size) +{ + DPRINTF(StoreSet, "StoreSet: Creating store set object.\n"); + DPRINTF(StoreSet, "StoreSet: SSIT size: %i, LFST size: %i.\n", - SSIT_size, LFST_size); ++ SSITSize, LFSTSize); + - SSIT = new SSID[SSIT_size]; ++ SSIT.resize(SSITSize); + - validSSIT.resize(SSIT_size); ++ validSSIT.resize(SSITSize); + - for (int i = 0; i < SSIT_size; ++i) ++ for (int i = 0; i < SSITSize; ++i) + validSSIT[i] = false; + - LFST = new InstSeqNum[LFST_size]; ++ LFST.resize(LFSTSize); + - validLFST.resize(LFST_size); ++ validLFST.resize(LFSTSize); + - SSCounters = new int[LFST_size]; ++ for (int i = 0; i < LFSTSize; ++i) { ++ validLFST[i] = false; ++ LFST[i] = 0; ++ } ++ ++ indexMask = SSITSize - 1; ++ ++ offsetBits = 2; ++} ++ ++StoreSet::~StoreSet() ++{ ++} ++ ++void ++StoreSet::init(int _SSIT_size, int _LFST_size) ++{ ++ SSITSize = _SSIT_size; ++ LFSTSize = _LFST_size; + - for (int i = 0; i < LFST_size; ++i) - { ++ DPRINTF(StoreSet, "StoreSet: Creating store set object.\n"); ++ DPRINTF(StoreSet, "StoreSet: SSIT size: %i, LFST size: %i.\n", ++ SSITSize, LFSTSize); ++ ++ SSIT.resize(SSITSize); ++ ++ validSSIT.resize(SSITSize); ++ ++ for (int i = 0; i < SSITSize; ++i) ++ validSSIT[i] = false; ++ ++ LFST.resize(LFSTSize); ++ ++ validLFST.resize(LFSTSize); ++ ++ for (int i = 0; i < LFSTSize; ++i) { + validLFST[i] = false; - SSCounters[i] = 0; ++ LFST[i] = 0; + } + - index_mask = SSIT_size - 1; ++ indexMask = SSITSize - 1; + - offset_bits = 2; ++ offsetBits = 2; +} + ++ +void +StoreSet::violation(Addr store_PC, Addr load_PC) +{ + int load_index = calcIndex(load_PC); + int store_index = calcIndex(store_PC); + - assert(load_index < SSIT_size && store_index < SSIT_size); ++ assert(load_index < SSITSize && store_index < SSITSize); + + bool valid_load_SSID = validSSIT[load_index]; + bool valid_store_SSID = validSSIT[store_index]; + + if (!valid_load_SSID && !valid_store_SSID) { + // Calculate a new SSID here. + SSID new_set = calcSSID(load_PC); + + validSSIT[load_index] = true; + + SSIT[load_index] = new_set; + + validSSIT[store_index] = true; + + SSIT[store_index] = new_set; + - assert(new_set < LFST_size); - - SSCounters[new_set]++; - ++ assert(new_set < LFSTSize); + + DPRINTF(StoreSet, "StoreSet: Neither load nor store had a valid " + "storeset, creating a new one: %i for load %#x, store %#x\n", + new_set, load_PC, store_PC); + } else if (valid_load_SSID && !valid_store_SSID) { + SSID load_SSID = SSIT[load_index]; + + validSSIT[store_index] = true; + + SSIT[store_index] = load_SSID; + - assert(load_SSID < LFST_size); - - SSCounters[load_SSID]++; ++ assert(load_SSID < LFSTSize); + + DPRINTF(StoreSet, "StoreSet: Load had a valid store set. Adding " + "store to that set: %i for load %#x, store %#x\n", + load_SSID, load_PC, store_PC); + } else if (!valid_load_SSID && valid_store_SSID) { + SSID store_SSID = SSIT[store_index]; + + validSSIT[load_index] = true; + + SSIT[load_index] = store_SSID; + - // Because we are having a load point to an already existing set, - // the size of the store set is not incremented. - + DPRINTF(StoreSet, "StoreSet: Store had a valid store set: %i for " + "load %#x, store %#x\n", + store_SSID, load_PC, store_PC); + } else { + SSID load_SSID = SSIT[load_index]; + SSID store_SSID = SSIT[store_index]; + - assert(load_SSID < LFST_size && store_SSID < LFST_size); - - int load_SS_size = SSCounters[load_SSID]; - int store_SS_size = SSCounters[store_SSID]; ++ assert(load_SSID < LFSTSize && store_SSID < LFSTSize); + - // If the load has the bigger store set, then assign the store - // to the same store set as the load. Otherwise vice-versa. - if (load_SS_size > store_SS_size) { ++ // The store set with the lower number wins ++ if (store_SSID > load_SSID) { + SSIT[store_index] = load_SSID; + - SSCounters[load_SSID]++; - SSCounters[store_SSID]--; - - DPRINTF(StoreSet, "StoreSet: Load had bigger store set: %i; " ++ DPRINTF(StoreSet, "StoreSet: Load had smaller store set: %i; " + "for load %#x, store %#x\n", + load_SSID, load_PC, store_PC); + } else { + SSIT[load_index] = store_SSID; + - SSCounters[store_SSID]++; - SSCounters[load_SSID]--; - - DPRINTF(StoreSet, "StoreSet: Store had bigger store set: %i; " ++ DPRINTF(StoreSet, "StoreSet: Store had smaller store set: %i; " + "for load %#x, store %#x\n", + store_SSID, load_PC, store_PC); + } + } +} + +void +StoreSet::insertLoad(Addr load_PC, InstSeqNum load_seq_num) +{ + // Does nothing. + return; +} + +void - StoreSet::insertStore(Addr store_PC, InstSeqNum store_seq_num) ++StoreSet::insertStore(Addr store_PC, InstSeqNum store_seq_num, ++ unsigned tid) +{ + int index = calcIndex(store_PC); + + int store_SSID; + - assert(index < SSIT_size); ++ assert(index < SSITSize); + + if (!validSSIT[index]) { + // Do nothing if there's no valid entry. + return; + } else { + store_SSID = SSIT[index]; + - assert(store_SSID < LFST_size); ++ assert(store_SSID < LFSTSize); + + // Update the last store that was fetched with the current one. + LFST[store_SSID] = store_seq_num; + + validLFST[store_SSID] = 1; + ++ storeList[store_seq_num] = store_SSID; ++ + DPRINTF(StoreSet, "Store %#x updated the LFST, SSID: %i\n", + store_PC, store_SSID); + } +} + +InstSeqNum +StoreSet::checkInst(Addr PC) +{ + int index = calcIndex(PC); + + int inst_SSID; + - assert(index < SSIT_size); ++ assert(index < SSITSize); + + if (!validSSIT[index]) { + DPRINTF(StoreSet, "Inst %#x with index %i had no SSID\n", + PC, index); + + // Return 0 if there's no valid entry. + return 0; + } else { + inst_SSID = SSIT[index]; + - assert(inst_SSID < LFST_size); ++ assert(inst_SSID < LFSTSize); + + if (!validLFST[inst_SSID]) { + + DPRINTF(StoreSet, "Inst %#x with index %i and SSID %i had no " + "dependency\n", PC, index, inst_SSID); + + return 0; + } else { + DPRINTF(StoreSet, "Inst %#x with index %i and SSID %i had LFST " + "inum of %i\n", PC, index, inst_SSID, LFST[inst_SSID]); + + return LFST[inst_SSID]; + } + } +} + +void +StoreSet::issued(Addr issued_PC, InstSeqNum issued_seq_num, bool is_store) +{ + // This only is updated upon a store being issued. + if (!is_store) { + return; + } + + int index = calcIndex(issued_PC); + + int store_SSID; + - assert(index < SSIT_size); ++ assert(index < SSITSize); ++ ++ SeqNumMapIt store_list_it = storeList.find(issued_seq_num); ++ ++ if (store_list_it != storeList.end()) { ++ storeList.erase(store_list_it); ++ } + + // Make sure the SSIT still has a valid entry for the issued store. + if (!validSSIT[index]) { + return; + } + + store_SSID = SSIT[index]; + - assert(store_SSID < LFST_size); ++ assert(store_SSID < LFSTSize); + + // If the last fetched store in the store set refers to the store that + // was just issued, then invalidate the entry. + if (validLFST[store_SSID] && LFST[store_SSID] == issued_seq_num) { + DPRINTF(StoreSet, "StoreSet: store invalidated itself in LFST.\n"); + validLFST[store_SSID] = false; + } +} + +void - StoreSet::squash(InstSeqNum squashed_num) ++StoreSet::squash(InstSeqNum squashed_num, unsigned tid) +{ - // Not really sure how to do this well. - // Generally this is small enough that it should be okay; short circuit - // evaluation should take care of invalid entries. - + DPRINTF(StoreSet, "StoreSet: Squashing until inum %i\n", + squashed_num); + - for (int i = 0; i < LFST_size; ++i) { - if (validLFST[i] && LFST[i] < squashed_num) { - validLFST[i] = false; ++ int idx; ++ SeqNumMapIt store_list_it = storeList.begin(); ++ ++ //@todo:Fix to only delete from correct thread ++ while (!storeList.empty()) { ++ idx = (*store_list_it).second; ++ ++ if ((*store_list_it).first <= squashed_num) { ++ break; ++ } ++ ++ bool younger = LFST[idx] > squashed_num; ++ ++ if (validLFST[idx] && younger) { ++ DPRINTF(StoreSet, "Squashed [sn:%lli]\n", LFST[idx]); ++ validLFST[idx] = false; ++ ++ storeList.erase(store_list_it++); ++ } else if (!validLFST[idx] && younger) { ++ storeList.erase(store_list_it++); + } + } +} + +void +StoreSet::clear() +{ - for (int i = 0; i < SSIT_size; ++i) { ++ for (int i = 0; i < SSITSize; ++i) { + validSSIT[i] = false; + } + - for (int i = 0; i < LFST_size; ++i) { ++ for (int i = 0; i < LFSTSize; ++i) { + validLFST[i] = false; + } - } + ++ storeList.clear(); ++} diff --cc src/cpu/o3/store_set.hh index 5a885d838,000000000..7189db3ab mode 100644,000000..100644 --- a/src/cpu/o3/store_set.hh +++ b/src/cpu/o3/store_set.hh @@@ -1,86 -1,0 +1,105 @@@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + - #ifndef __CPU_O3_CPU_STORE_SET_HH__ - #define __CPU_O3_CPU_STORE_SET_HH__ ++#ifndef __CPU_O3_STORE_SET_HH__ ++#define __CPU_O3_STORE_SET_HH__ + ++#include ++#include ++#include +#include + +#include "arch/isa_traits.hh" +#include "cpu/inst_seq.hh" + ++struct ltseqnum { ++ bool operator()(const InstSeqNum &lhs, const InstSeqNum &rhs) const ++ { ++ return lhs > rhs; ++ } ++}; ++ +class StoreSet +{ + public: + typedef unsigned SSID; + + public: ++ StoreSet() { }; ++ + StoreSet(int SSIT_size, int LFST_size); + ++ ~StoreSet(); ++ ++ void init(int SSIT_size, int LFST_size); ++ + void violation(Addr store_PC, Addr load_PC); + + void insertLoad(Addr load_PC, InstSeqNum load_seq_num); + - void insertStore(Addr store_PC, InstSeqNum store_seq_num); ++ void insertStore(Addr store_PC, InstSeqNum store_seq_num, ++ unsigned tid); + + InstSeqNum checkInst(Addr PC); + + void issued(Addr issued_PC, InstSeqNum issued_seq_num, bool is_store); + - void squash(InstSeqNum squashed_num); ++ void squash(InstSeqNum squashed_num, unsigned tid); + + void clear(); + + private: + inline int calcIndex(Addr PC) - { return (PC >> offset_bits) & index_mask; } ++ { return (PC >> offsetBits) & indexMask; } + + inline SSID calcSSID(Addr PC) - { return ((PC ^ (PC >> 10)) % LFST_size); } ++ { return ((PC ^ (PC >> 10)) % LFSTSize); } + - SSID *SSIT; ++ std::vector SSIT; + + std::vector validSSIT; + - InstSeqNum *LFST; ++ std::vector LFST; + + std::vector validLFST; + - int *SSCounters; ++ std::map storeList; ++ ++ typedef std::map::iterator SeqNumMapIt; + - int SSIT_size; ++ int SSITSize; + - int LFST_size; ++ int LFSTSize; + - int index_mask; ++ int indexMask; + + // HACK: Hardcoded for now. - int offset_bits; ++ int offsetBits; +}; + - #endif // __CPU_O3_CPU_STORE_SET_HH__ ++#endif // __CPU_O3_STORE_SET_HH__ diff --cc src/cpu/o3/tournament_pred.cc index 3fb580510,000000000..89da7b9f5 mode 100644,000000..100644 --- a/src/cpu/o3/tournament_pred.cc +++ b/src/cpu/o3/tournament_pred.cc @@@ -1,256 -1,0 +1,255 @@@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cpu/o3/tournament_pred.hh" + - TournamentBP::TournamentBP(unsigned _local_predictor_size, - unsigned _local_ctr_bits, - unsigned _local_history_table_size, - unsigned _local_history_bits, - unsigned _global_predictor_size, - unsigned _global_ctr_bits, - unsigned _global_history_bits, - unsigned _choice_predictor_size, - unsigned _choice_ctr_bits, ++TournamentBP::TournamentBP(unsigned _localPredictorSize, ++ unsigned _localCtrBits, ++ unsigned _localHistoryTableSize, ++ unsigned _localHistoryBits, ++ unsigned _globalPredictorSize, ++ unsigned _globalCtrBits, ++ unsigned _globalHistoryBits, ++ unsigned _choicePredictorSize, ++ unsigned _choiceCtrBits, + unsigned _instShiftAmt) - : localPredictorSize(_local_predictor_size), - localCtrBits(_local_ctr_bits), - localHistoryTableSize(_local_history_table_size), - localHistoryBits(_local_history_bits), - globalPredictorSize(_global_predictor_size), - globalCtrBits(_global_ctr_bits), - globalHistoryBits(_global_history_bits), - choicePredictorSize(_global_predictor_size), - choiceCtrBits(_choice_ctr_bits), ++ : localPredictorSize(_localPredictorSize), ++ localCtrBits(_localCtrBits), ++ localHistoryTableSize(_localHistoryTableSize), ++ localHistoryBits(_localHistoryBits), ++ globalPredictorSize(_globalPredictorSize), ++ globalCtrBits(_globalCtrBits), ++ globalHistoryBits(_globalHistoryBits), ++ choicePredictorSize(_globalPredictorSize), ++ choiceCtrBits(_choiceCtrBits), + instShiftAmt(_instShiftAmt) +{ + //Should do checks here to make sure sizes are correct (powers of 2) + + //Setup the array of counters for the local predictor - localCtrs = new SatCounter[localPredictorSize]; ++ localCtrs.resize(localPredictorSize); + + for (int i = 0; i < localPredictorSize; ++i) + localCtrs[i].setBits(localCtrBits); + + //Setup the history table for the local table - localHistoryTable = new unsigned[localHistoryTableSize]; ++ localHistoryTable.resize(localHistoryTableSize); + + for (int i = 0; i < localHistoryTableSize; ++i) + localHistoryTable[i] = 0; + + // Setup the local history mask + localHistoryMask = (1 << localHistoryBits) - 1; + + //Setup the array of counters for the global predictor - globalCtrs = new SatCounter[globalPredictorSize]; ++ globalCtrs.resize(globalPredictorSize); + + for (int i = 0; i < globalPredictorSize; ++i) + globalCtrs[i].setBits(globalCtrBits); + + //Clear the global history + globalHistory = 0; + // Setup the global history mask + globalHistoryMask = (1 << globalHistoryBits) - 1; + + //Setup the array of counters for the choice predictor - choiceCtrs = new SatCounter[choicePredictorSize]; ++ choiceCtrs.resize(choicePredictorSize); + + for (int i = 0; i < choicePredictorSize; ++i) + choiceCtrs[i].setBits(choiceCtrBits); + + threshold = (1 << (localCtrBits - 1)) - 1; + threshold = threshold / 2; +} + +inline +unsigned +TournamentBP::calcLocHistIdx(Addr &branch_addr) +{ + return (branch_addr >> instShiftAmt) & (localHistoryTableSize - 1); +} + +inline +void +TournamentBP::updateHistoriesTaken(unsigned local_history_idx) +{ + globalHistory = (globalHistory << 1) | 1; + globalHistory = globalHistory & globalHistoryMask; + + localHistoryTable[local_history_idx] = + (localHistoryTable[local_history_idx] << 1) | 1; +} + +inline +void +TournamentBP::updateHistoriesNotTaken(unsigned local_history_idx) +{ + globalHistory = (globalHistory << 1); + globalHistory = globalHistory & globalHistoryMask; + + localHistoryTable[local_history_idx] = + (localHistoryTable[local_history_idx] << 1); +} + +bool +TournamentBP::lookup(Addr &branch_addr) +{ + uint8_t local_prediction; + unsigned local_history_idx; + unsigned local_predictor_idx; + + uint8_t global_prediction; + uint8_t choice_prediction; + + //Lookup in the local predictor to get its branch prediction + local_history_idx = calcLocHistIdx(branch_addr); + local_predictor_idx = localHistoryTable[local_history_idx] + & localHistoryMask; + local_prediction = localCtrs[local_predictor_idx].read(); + + //Lookup in the global predictor to get its branch prediction + global_prediction = globalCtrs[globalHistory].read(); + + //Lookup in the choice predictor to see which one to use + choice_prediction = choiceCtrs[globalHistory].read(); + + //@todo Put a threshold value in for the three predictors that can + // be set through the constructor (so this isn't hard coded). + //Also should put some of this code into functions. + if (choice_prediction > threshold) { + if (global_prediction > threshold) { + updateHistoriesTaken(local_history_idx); + + assert(globalHistory < globalPredictorSize && + local_history_idx < localPredictorSize); + + globalCtrs[globalHistory].increment(); + localCtrs[local_history_idx].increment(); + + return true; + } else { + updateHistoriesNotTaken(local_history_idx); + + assert(globalHistory < globalPredictorSize && + local_history_idx < localPredictorSize); + + globalCtrs[globalHistory].decrement(); + localCtrs[local_history_idx].decrement(); + + return false; + } + } else { + if (local_prediction > threshold) { + updateHistoriesTaken(local_history_idx); + + assert(globalHistory < globalPredictorSize && + local_history_idx < localPredictorSize); + + globalCtrs[globalHistory].increment(); + localCtrs[local_history_idx].increment(); + + return true; + } else { + updateHistoriesNotTaken(local_history_idx); + + assert(globalHistory < globalPredictorSize && + local_history_idx < localPredictorSize); + + globalCtrs[globalHistory].decrement(); + localCtrs[local_history_idx].decrement(); + + return false; + } + } +} + +// Update the branch predictor if it predicted a branch wrong. +void +TournamentBP::update(Addr &branch_addr, unsigned correct_gh, bool taken) +{ + + uint8_t local_prediction; + unsigned local_history_idx; + unsigned local_predictor_idx; + bool local_pred_taken; + + uint8_t global_prediction; + bool global_pred_taken; + + // Load the correct global history into the register. + globalHistory = correct_gh; + + // Get the local predictor's current prediction, remove the incorrect + // update, and update the local predictor + local_history_idx = calcLocHistIdx(branch_addr); + local_predictor_idx = localHistoryTable[local_history_idx]; + local_predictor_idx = (local_predictor_idx >> 1) & localHistoryMask; + + local_prediction = localCtrs[local_predictor_idx].read(); + local_pred_taken = local_prediction > threshold; + + //Get the global predictor's current prediction, and update the + //global predictor + global_prediction = globalCtrs[globalHistory].read(); + global_pred_taken = global_prediction > threshold; + + //Update the choice predictor to tell it which one was correct + if (local_pred_taken != global_pred_taken) { + //If the local prediction matches the actual outcome, decerement + //the counter. Otherwise increment the counter. + if (local_pred_taken == taken) { + choiceCtrs[globalHistory].decrement(); + } else { + choiceCtrs[globalHistory].increment(); + } + } + + if (taken) { + assert(globalHistory < globalPredictorSize && + local_predictor_idx < localPredictorSize); + + localCtrs[local_predictor_idx].increment(); + globalCtrs[globalHistory].increment(); + + globalHistory = (globalHistory << 1) | 1; + globalHistory = globalHistory & globalHistoryMask; + + localHistoryTable[local_history_idx] |= 1; - } - else { ++ } else { + assert(globalHistory < globalPredictorSize && + local_predictor_idx < localPredictorSize); + + localCtrs[local_predictor_idx].decrement(); + globalCtrs[globalHistory].decrement(); + + globalHistory = (globalHistory << 1); + globalHistory = globalHistory & globalHistoryMask; + + localHistoryTable[local_history_idx] &= ~1; + } +} diff --cc src/cpu/o3/tournament_pred.hh index cb93c2f67,000000000..7b600aa53 mode 100644,000000..100644 --- a/src/cpu/o3/tournament_pred.hh +++ b/src/cpu/o3/tournament_pred.hh @@@ -1,143 -1,0 +1,144 @@@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + - #ifndef __CPU_O3_CPU_TOURNAMENT_PRED_HH__ - #define __CPU_O3_CPU_TOURNAMENT_PRED_HH__ ++#ifndef __CPU_O3_TOURNAMENT_PRED_HH__ ++#define __CPU_O3_TOURNAMENT_PRED_HH__ + +// For Addr type. +#include "arch/isa_traits.hh" +#include "cpu/o3/sat_counter.hh" ++#include + +class TournamentBP +{ + public: + /** + * Default branch predictor constructor. + */ - TournamentBP(unsigned local_predictor_size, - unsigned local_ctr_bits, - unsigned local_history_table_size, - unsigned local_history_bits, - unsigned global_predictor_size, - unsigned global_history_bits, - unsigned global_ctr_bits, - unsigned choice_predictor_size, - unsigned choice_ctr_bits, ++ TournamentBP(unsigned localPredictorSize, ++ unsigned localCtrBits, ++ unsigned localHistoryTableSize, ++ unsigned localHistoryBits, ++ unsigned globalPredictorSize, ++ unsigned globalHistoryBits, ++ unsigned globalCtrBits, ++ unsigned choicePredictorSize, ++ unsigned choiceCtrBits, + unsigned instShiftAmt); + + /** + * Looks up the given address in the branch predictor and returns + * a true/false value as to whether it is taken. + * @param branch_addr The address of the branch to look up. + * @return Whether or not the branch is taken. + */ + bool lookup(Addr &branch_addr); + + /** + * Updates the branch predictor with the actual result of a branch. + * @param branch_addr The address of the branch to update. + * @param taken Whether or not the branch was taken. + */ + void update(Addr &branch_addr, unsigned global_history, bool taken); + + inline unsigned readGlobalHist() { return globalHistory; } + + private: + + inline bool getPrediction(uint8_t &count); + + inline unsigned calcLocHistIdx(Addr &branch_addr); + + inline void updateHistoriesTaken(unsigned local_history_idx); + + inline void updateHistoriesNotTaken(unsigned local_history_idx); + + /** Local counters. */ - SatCounter *localCtrs; ++ std::vector localCtrs; + + /** Size of the local predictor. */ + unsigned localPredictorSize; + + /** Number of bits of the local predictor's counters. */ + unsigned localCtrBits; + + /** Array of local history table entries. */ - unsigned *localHistoryTable; ++ std::vector localHistoryTable; + + /** Size of the local history table. */ + unsigned localHistoryTableSize; + + /** Number of bits for each entry of the local history table. + * @todo Doesn't this come from the size of the local predictor? + */ + unsigned localHistoryBits; + + /** Mask to get the proper local history. */ + unsigned localHistoryMask; + + + /** Array of counters that make up the global predictor. */ - SatCounter *globalCtrs; ++ std::vector globalCtrs; + + /** Size of the global predictor. */ + unsigned globalPredictorSize; + + /** Number of bits of the global predictor's counters. */ + unsigned globalCtrBits; + + /** Global history register. */ + unsigned globalHistory; + + /** Number of bits for the global history. */ + unsigned globalHistoryBits; + + /** Mask to get the proper global history. */ + unsigned globalHistoryMask; + + + /** Array of counters that make up the choice predictor. */ - SatCounter *choiceCtrs; ++ std::vector choiceCtrs; + + /** Size of the choice predictor (identical to the global predictor). */ + unsigned choicePredictorSize; + + /** Number of bits of the choice predictor's counters. */ + unsigned choiceCtrBits; + + /** Number of bits to shift the instruction over to get rid of the word + * offset. + */ + unsigned instShiftAmt; + + /** Threshold for the counter value; above the threshold is taken, + * equal to or below the threshold is not taken. + */ + unsigned threshold; +}; + - #endif // __CPU_O3_CPU_TOURNAMENT_PRED_HH__ ++#endif // __CPU_O3_TOURNAMENT_PRED_HH__ diff --cc src/cpu/ozone/cpu.cc index cbeca9d3b,000000000..d2ea0164c mode 100644,000000..100644 --- a/src/cpu/ozone/cpu.cc +++ b/src/cpu/ozone/cpu.cc @@@ -1,33 -1,0 +1,34 @@@ +/* + * Copyright (c) 2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + - #include "cpu/ooo_cpu/ooo_cpu_impl.hh" - #include "cpu/ooo_cpu/ooo_dyn_inst.hh" - #include "cpu/ooo_cpu/ooo_impl.hh" ++#include "cpu/ozone/cpu_impl.hh" ++#include "cpu/ozone/ozone_impl.hh" ++#include "cpu/ozone/simple_impl.hh" + - template class OoOCPU; ++template class OzoneCPU; ++template class OzoneCPU; diff --cc src/cpu/ozone/cpu.hh index fa849bb09,000000000..5af2b02b2 mode 100644,000000..100644 --- a/src/cpu/ozone/cpu.hh +++ b/src/cpu/ozone/cpu.hh @@@ -1,638 -1,0 +1,629 @@@ +/* + * Copyright (c) 2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + - #ifndef __CPU_OOO_CPU_OOO_CPU_HH__ - #define __CPU_OOO_CPU_OOO_CPU_HH__ ++#ifndef __CPU_OZONE_CPU_HH__ ++#define __CPU_OZONE_CPU_HH__ ++ ++#include + +#include "base/statistics.hh" ++#include "base/timebuf.hh" +#include "config/full_system.hh" +#include "cpu/base.hh" +#include "cpu/exec_context.hh" - #include "encumbered/cpu/full/fu_pool.hh" - #include "cpu/ooo_cpu/ea_list.hh" ++#include "cpu/inst_seq.hh" ++#include "cpu/ozone/rename_table.hh" ++#include "cpu/ozone/thread_state.hh" +#include "cpu/pc_event.hh" +#include "cpu/static_inst.hh" +#include "mem/mem_interface.hh" +#include "sim/eventq.hh" + +// forward declarations +#if FULL_SYSTEM - class Processor; ++#include "arch/alpha/tlb.hh" ++ +class AlphaITB; +class AlphaDTB; +class PhysicalMemory; ++class MemoryController; + ++class Sampler; +class RemoteGDB; +class GDBListener; + ++namespace Kernel { ++ class Statistics; ++}; ++ +#else + +class Process; + +#endif // FULL_SYSTEM + +class Checkpoint; ++class EndQuiesceEvent; +class MemInterface; + +namespace Trace { + class InstRecord; +} + ++template ++class Checker; ++ +/** + * Declaration of Out-of-Order CPU class. Basically it is a SimpleCPU with + * simple out-of-order capabilities added to it. It is still a 1 CPI machine + * (?), but is capable of handling cache misses. Basically it models having + * a ROB/IQ by only allowing a certain amount of instructions to execute while + * the cache miss is outstanding. + */ + +template - class OoOCPU : public BaseCPU ++class OzoneCPU : public BaseCPU +{ + private: ++ typedef typename Impl::FrontEnd FrontEnd; ++ typedef typename Impl::BackEnd BackEnd; + typedef typename Impl::DynInst DynInst; + typedef typename Impl::DynInstPtr DynInstPtr; + ++ typedef TheISA::MiscReg MiscReg; ++ ++ public: ++ class OzoneXC : public ExecContext { ++ public: ++ OzoneCPU *cpu; ++ ++ OzoneThreadState *thread; ++ ++ BaseCPU *getCpuPtr(); ++ ++ void setCpuId(int id); ++ ++ int readCpuId() { return thread->cpuId; } ++ ++ FunctionalMemory *getMemPtr() { return thread->mem; } ++ ++#if FULL_SYSTEM ++ System *getSystemPtr() { return cpu->system; } ++ ++ PhysicalMemory *getPhysMemPtr() { return cpu->physmem; } ++ ++ AlphaITB *getITBPtr() { return cpu->itb; } ++ ++ AlphaDTB * getDTBPtr() { return cpu->dtb; } ++ ++ Kernel::Statistics *getKernelStats() { return thread->kernelStats; } ++#else ++ Process *getProcessPtr() { return thread->process; } ++#endif ++ ++ Status status() const { return thread->_status; } ++ ++ void setStatus(Status new_status); ++ ++ /// Set the status to Active. Optional delay indicates number of ++ /// cycles to wait before beginning execution. ++ void activate(int delay = 1); ++ ++ /// Set the status to Suspended. ++ void suspend(); ++ ++ /// Set the status to Unallocated. ++ void deallocate(); ++ ++ /// Set the status to Halted. ++ void halt(); ++ ++#if FULL_SYSTEM ++ void dumpFuncProfile(); ++#endif ++ ++ void takeOverFrom(ExecContext *old_context); ++ ++ void regStats(const std::string &name); ++ ++ void serialize(std::ostream &os); ++ void unserialize(Checkpoint *cp, const std::string §ion); ++ ++#if FULL_SYSTEM ++ EndQuiesceEvent *getQuiesceEvent(); ++ ++ Tick readLastActivate(); ++ Tick readLastSuspend(); ++ ++ void profileClear(); ++ void profileSample(); ++#endif ++ ++ int getThreadNum(); ++ ++ // Also somewhat obnoxious. Really only used for the TLB fault. ++ TheISA::MachInst getInst(); ++ ++ void copyArchRegs(ExecContext *xc); ++ ++ void clearArchRegs(); ++ ++ uint64_t readIntReg(int reg_idx); ++ ++ float readFloatRegSingle(int reg_idx); ++ ++ double readFloatRegDouble(int reg_idx); ++ ++ uint64_t readFloatRegInt(int reg_idx); ++ ++ void setIntReg(int reg_idx, uint64_t val); ++ ++ void setFloatRegSingle(int reg_idx, float val); ++ ++ void setFloatRegDouble(int reg_idx, double val); ++ ++ void setFloatRegInt(int reg_idx, uint64_t val); ++ ++ uint64_t readPC() { return thread->PC; } ++ void setPC(Addr val); ++ ++ uint64_t readNextPC() { return thread->nextPC; } ++ void setNextPC(Addr val); ++ ++ public: ++ // ISA stuff: ++ MiscReg readMiscReg(int misc_reg); ++ ++ MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault); ++ ++ Fault setMiscReg(int misc_reg, const MiscReg &val); ++ ++ Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val); ++ ++ unsigned readStCondFailures() ++ { return thread->storeCondFailures; } ++ ++ void setStCondFailures(unsigned sc_failures) ++ { thread->storeCondFailures = sc_failures; } ++ ++#if FULL_SYSTEM ++ bool inPalMode() { return cpu->inPalMode(); } ++#endif ++ ++ bool misspeculating() { return false; } ++ ++#if !FULL_SYSTEM ++ TheISA::IntReg getSyscallArg(int i) ++ { return thread->renameTable[TheISA::ArgumentReg0 + i]->readIntResult(); } ++ ++ // used to shift args for indirect syscall ++ void setSyscallArg(int i, TheISA::IntReg val) ++ { thread->renameTable[TheISA::ArgumentReg0 + i]->setIntResult(i); } ++ ++ void setSyscallReturn(SyscallReturn return_value) ++ { cpu->setSyscallReturn(return_value, thread->tid); } ++ ++ Counter readFuncExeInst() { return thread->funcExeInst; } ++ ++ void setFuncExeInst(Counter new_val) ++ { thread->funcExeInst = new_val; } ++#endif ++ }; ++ ++ // execution context proxy ++ OzoneXC ozoneXC; ++ ExecContext *xcProxy; ++ ExecContext *checkerXC; ++ ++ typedef OzoneThreadState ImplState; ++ ++ private: ++ OzoneThreadState thread; ++ + public: + // main simulation loop (one cycle) + void tick(); + ++ std::set snList; ++ std::set lockAddrList; + private: + struct TickEvent : public Event + { - OoOCPU *cpu; ++ OzoneCPU *cpu; + int width; + - TickEvent(OoOCPU *c, int w); ++ TickEvent(OzoneCPU *c, int w); + void process(); + const char *description(); + }; + + TickEvent tickEvent; + + /// Schedule tick event, regardless of its current state. + void scheduleTickEvent(int delay) + { + if (tickEvent.squashed()) - tickEvent.reschedule(curTick + delay); ++ tickEvent.reschedule(curTick + cycles(delay)); + else if (!tickEvent.scheduled()) - tickEvent.schedule(curTick + delay); ++ tickEvent.schedule(curTick + cycles(delay)); + } + + /// Unschedule tick event, regardless of its current state. + void unscheduleTickEvent() + { + if (tickEvent.scheduled()) + tickEvent.squash(); + } + + private: + Trace::InstRecord *traceData; + + template + void trace_data(T data); + + public: - // + enum Status { + Running, + Idle, - IcacheMiss, - IcacheMissComplete, - DcacheMissStall, + SwitchedOut + }; + - private: + Status _status; + + public: ++ bool checkInterrupts; ++ + void post_interrupt(int int_num, int index); + + void zero_fill_64(Addr addr) { + static int warned = 0; + if (!warned) { + warn ("WH64 is not implemented"); + warned = 1; + } + }; + - struct Params : public BaseCPU::Params - { - MemInterface *icache_interface; - MemInterface *dcache_interface; - int width; - #if FULL_SYSTEM - AlphaITB *itb; - AlphaDTB *dtb; - FunctionalMemory *mem; - #else - Process *process; - #endif - int issueWidth; - }; ++ typedef typename Impl::Params Params; + - OoOCPU(Params *params); ++ OzoneCPU(Params *params); + - virtual ~OoOCPU(); ++ virtual ~OzoneCPU(); + + void init(); + - private: - void copyFromXC(); - + public: - // execution context - ExecContext *xc; ++ BaseCPU *getCpuPtr() { return this; } ++ ++ void setCpuId(int id) { cpuId = id; } ++ ++ int readCpuId() { return cpuId; } ++ ++ int cpuId; + - void switchOut(); ++ void switchOut(Sampler *sampler); ++ void signalSwitched(); + void takeOverFrom(BaseCPU *oldCPU); + ++ Sampler *sampler; ++ ++ int switchCount; ++ +#if FULL_SYSTEM + Addr dbg_vtophys(Addr addr); + + bool interval_stats; ++ ++ AlphaITB *itb; ++ AlphaDTB *dtb; ++ System *system; ++ ++ // the following two fields are redundant, since we can always ++ // look them up through the system pointer, but we'll leave them ++ // here for now for convenience ++ MemoryController *memctrl; ++ PhysicalMemory *physmem; +#endif + + // L1 instruction cache + MemInterface *icacheInterface; + + // L1 data cache + MemInterface *dcacheInterface; + - FuncUnitPool *fuPool; ++ /** Pointer to memory. */ ++ FunctionalMemory *mem; + - // Refcounted pointer to the one memory request. - MemReqPtr cacheMemReq; - - class ICacheCompletionEvent : public Event - { - private: - OoOCPU *cpu; - - public: - ICacheCompletionEvent(OoOCPU *_cpu); - - virtual void process(); - virtual const char *description(); - }; - - // Will need to create a cache completion event upon any memory miss. - ICacheCompletionEvent iCacheCompletionEvent; - - class DCacheCompletionEvent; - - typedef typename - std::list::iterator DCacheCompEventIt; - - class DCacheCompletionEvent : public Event - { - private: - OoOCPU *cpu; - DynInstPtr inst; - DCacheCompEventIt dcceIt; - - public: - DCacheCompletionEvent(OoOCPU *_cpu, DynInstPtr &_inst, - DCacheCompEventIt &_dcceIt); - - virtual void process(); - virtual const char *description(); - }; - - friend class DCacheCompletionEvent; - - protected: - std::list dCacheCompList; - DCacheCompEventIt dcceIt; ++ FrontEnd *frontEnd; + ++ BackEnd *backEnd; + private: + Status status() const { return _status; } ++ void setStatus(Status new_status) { _status = new_status; } + + virtual void activateContext(int thread_num, int delay); + virtual void suspendContext(int thread_num); + virtual void deallocateContext(int thread_num); + virtual void haltContext(int thread_num); + + // statistics + virtual void regStats(); + virtual void resetStats(); + + // number of simulated instructions ++ public: + Counter numInst; + Counter startNumInst; - Stats::Scalar<> numInsts; + + virtual Counter totalInstructions() const + { + return numInst - startNumInst; + } + - // number of simulated memory references - Stats::Scalar<> numMemRefs; - ++ private: + // number of simulated loads + Counter numLoad; + Counter startNumLoad; + + // number of idle cycles + Stats::Average<> notIdleFraction; + Stats::Formula idleFraction; - - // number of cycles stalled for I-cache misses - Stats::Scalar<> icacheStallCycles; - Counter lastIcacheStall; - - // number of cycles stalled for D-cache misses - Stats::Scalar<> dcacheStallCycles; - Counter lastDcacheStall; - - void processICacheCompletion(); - + public: + + virtual void serialize(std::ostream &os); + virtual void unserialize(Checkpoint *cp, const std::string §ion); + ++ +#if FULL_SYSTEM + bool validInstAddr(Addr addr) { return true; } + bool validDataAddr(Addr addr) { return true; } - int getInstAsid() { return xc->regs.instAsid(); } - int getDataAsid() { return xc->regs.dataAsid(); } + + Fault translateInstReq(MemReqPtr &req) + { + return itb->translate(req); + } + + Fault translateDataReadReq(MemReqPtr &req) + { + return dtb->translate(req, false); + } + + Fault translateDataWriteReq(MemReqPtr &req) + { + return dtb->translate(req, true); + } + +#else + bool validInstAddr(Addr addr) - { return xc->validInstAddr(addr); } ++ { return true; } + + bool validDataAddr(Addr addr) - { return xc->validDataAddr(addr); } ++ { return true; } + - int getInstAsid() { return xc->asid; } - int getDataAsid() { return xc->asid; } ++ int getInstAsid() { return thread.asid; } ++ int getDataAsid() { return thread.asid; } + + Fault dummyTranslation(MemReqPtr &req) + { +#if 0 + assert((req->vaddr >> 48 & 0xffff) == 0); +#endif + + // put the asid in the upper 16 bits of the paddr + req->paddr = req->vaddr & ~((Addr)0xffff << sizeof(Addr) * 8 - 16); + req->paddr = req->paddr | (Addr)req->asid << sizeof(Addr) * 8 - 16; + return NoFault; + } ++ ++ /** Translates instruction requestion in syscall emulation mode. */ + Fault translateInstReq(MemReqPtr &req) + { + return dummyTranslation(req); + } ++ ++ /** Translates data read request in syscall emulation mode. */ + Fault translateDataReadReq(MemReqPtr &req) + { + return dummyTranslation(req); + } ++ ++ /** Translates data write request in syscall emulation mode. */ + Fault translateDataWriteReq(MemReqPtr &req) + { + return dummyTranslation(req); + } - +#endif + ++ /** Old CPU read from memory function. No longer used. */ + template - Fault read(Addr addr, T &data, unsigned flags, DynInstPtr inst); - - template - Fault write(T data, Addr addr, unsigned flags, - uint64_t *res, DynInstPtr inst); - - void prefetch(Addr addr, unsigned flags) ++ Fault read(MemReqPtr &req, T &data) + { - // need to do this... - } - - void writeHint(Addr addr, int size, unsigned flags) - { - // need to do this... - } - - Fault copySrcTranslate(Addr src); - - Fault copy(Addr dest); - - private: - bool executeInst(DynInstPtr &inst); - - void renameInst(DynInstPtr &inst); - - void addInst(DynInstPtr &inst); - - void commitHeadInst(); - - bool getOneInst(); - - Fault fetchCacheLine(); - - InstSeqNum getAndIncrementInstSeq(); - - bool ambigMemAddr; - - private: - InstSeqNum globalSeqNum; - - DynInstPtr renameTable[TheISA::TotalNumRegs]; - DynInstPtr commitTable[TheISA::TotalNumRegs]; - - // Might need a table of the shadow registers as well. - #if FULL_SYSTEM - DynInstPtr palShadowTable[TheISA::NumIntRegs]; ++#if 0 ++#if FULL_SYSTEM && defined(TARGET_ALPHA) ++ if (req->flags & LOCKED) { ++ req->xc->setMiscReg(TheISA::Lock_Addr_DepTag, req->paddr); ++ req->xc->setMiscReg(TheISA::Lock_Flag_DepTag, true); ++ } ++#endif +#endif ++ Fault error; ++ if (req->flags & LOCKED) { ++ lockAddrList.insert(req->paddr); ++ lockFlag = true; ++ } + - public: - // The register accessor methods provide the index of the - // instruction's operand (e.g., 0 or 1), not the architectural - // register index, to simplify the implementation of register - // renaming. We find the architectural register index by indexing - // into the instruction's own operand index table. Note that a - // raw pointer to the StaticInst is provided instead of a - // ref-counted StaticInstPtr to redice overhead. This is fine as - // long as these methods don't copy the pointer into any long-term - // storage (which is pretty hard to imagine they would have reason - // to do). - - // In the OoO case these shouldn't read from the XC but rather from the - // rename table of DynInsts. Also these likely shouldn't be called very - // often, other than when adding things into the xc during say a syscall. - - uint64_t readIntReg(StaticInst *si, int idx) - { - return xc->readIntReg(si->srcRegIdx(idx)); ++ error = this->mem->read(req, data); ++ data = gtoh(data); ++ return error; + } + - FloatReg readFloatReg(StaticInst *si, int idx, width) - { - int reg_idx = si->srcRegIdx(idx) - TheISA::FP_Base_DepTag; - return xc->readFloatReg(reg_idx, width); - } + - FloatReg readFloatReg(StaticInst *si, int idx) ++ /** CPU read function, forwards read to LSQ. */ ++ template ++ Fault read(MemReqPtr &req, T &data, int load_idx) + { - int reg_idx = si->srcRegIdx(idx) - TheISA::FP_Base_DepTag; - return xc->readFloatReg(reg_idx); ++ return backEnd->read(req, data, load_idx); + } + - FloatRegBits readFloatRegBits(StaticInst *si, int idx, int width) ++ /** Old CPU write to memory function. No longer used. */ ++ template ++ Fault write(MemReqPtr &req, T &data) + { - int reg_idx = si->srcRegIdx(idx) - TheISA::FP_Base_DepTag; - return xc->readFloatRegBits(reg_idx, width); - } ++#if 0 ++#if FULL_SYSTEM && defined(TARGET_ALPHA) ++ ExecContext *xc; ++ ++ // If this is a store conditional, act appropriately ++ if (req->flags & LOCKED) { ++ xc = req->xc; ++ ++ if (req->flags & UNCACHEABLE) { ++ // Don't update result register (see stq_c in isa_desc) ++ req->result = 2; ++ xc->setStCondFailures(0);//Needed? [RGD] ++ } else { ++ bool lock_flag = xc->readMiscReg(TheISA::Lock_Flag_DepTag); ++ Addr lock_addr = xc->readMiscReg(TheISA::Lock_Addr_DepTag); ++ req->result = lock_flag; ++ if (!lock_flag || ++ ((lock_addr & ~0xf) != (req->paddr & ~0xf))) { ++ xc->setMiscReg(TheISA::Lock_Flag_DepTag, false); ++ xc->setStCondFailures(xc->readStCondFailures() + 1); ++ if (((xc->readStCondFailures()) % 100000) == 0) { ++ std::cerr << "Warning: " ++ << xc->readStCondFailures() ++ << " consecutive store conditional failures " ++ << "on cpu " << req->xc->readCpuId() ++ << std::endl; ++ } ++ return NoFault; ++ } ++ else xc->setStCondFailures(0); ++ } ++ } + - FloatRegBits readFloatRegBits(StaticInst *si, int idx) - { - int reg_idx = si->srcRegIdx(idx) - TheISA::FP_Base_DepTag; - return xc->readFloatRegBits(reg_idx); - } ++ // Need to clear any locked flags on other proccessors for ++ // this address. Only do this for succsful Store Conditionals ++ // and all other stores (WH64?). Unsuccessful Store ++ // Conditionals would have returned above, and wouldn't fall ++ // through. ++ for (int i = 0; i < this->system->execContexts.size(); i++){ ++ xc = this->system->execContexts[i]; ++ if ((xc->readMiscReg(TheISA::Lock_Addr_DepTag) & ~0xf) == ++ (req->paddr & ~0xf)) { ++ xc->setMiscReg(TheISA::Lock_Flag_DepTag, false); ++ } ++ } + - void setIntReg(StaticInst *si, int idx, uint64_t val) - { - xc->setIntReg(si->destRegIdx(idx), val); - } ++#endif ++#endif + - void setFloatReg(StaticInst *si, int idx, FloatReg val, int width) - { - int reg_idx = si->destRegIdx(idx) - TheISA::FP_Base_DepTag; - xc->setFloatReg(reg_idx, val, width); - } ++ if (req->flags & LOCKED) { ++ if (req->flags & UNCACHEABLE) { ++ req->result = 2; ++ } else { ++ if (this->lockFlag) { ++ if (lockAddrList.find(req->paddr) != ++ lockAddrList.end()) { ++ req->result = 1; ++ } else { ++ req->result = 0; ++ return NoFault; ++ } ++ } else { ++ req->result = 0; ++ return NoFault; ++ } ++ } ++ } + - void setFloatReg(StaticInst *si, int idx, FloatReg val) - { - int reg_idx = si->destRegIdx(idx) - TheISA::FP_Base_DepTag; - xc->setFloatReg(reg_idx, val); ++ return this->mem->write(req, (T)htog(data)); + } + - void setFloatRegBits(StaticInst *si, int idx, FloatRegBits val, int width) ++ /** CPU write function, forwards write to LSQ. */ ++ template ++ Fault write(MemReqPtr &req, T &data, int store_idx) + { - int reg_idx = si->destRegIdx(idx) - TheISA::FP_Base_DepTag; - xc->setFloatRegBits(reg_idx, val, width); ++ return backEnd->write(req, data, store_idx); + } + - void setFloatRegBits(StaticInst *si, int idx, FloatRegBits val) ++ void prefetch(Addr addr, unsigned flags) + { - int reg_idx = si->destRegIdx(idx) - TheISA::FP_Base_DepTag; - xc->setFloatRegBits(reg_idx, val); ++ // need to do this... + } + - uint64_t readPC() { return PC; } - void setNextPC(Addr val) { nextPC = val; } - - private: - Addr PC; - Addr nextPC; - - unsigned issueWidth; - - bool fetchRedirExcp; - bool fetchRedirBranch; - - /** Mask to get a cache block's address. */ - Addr cacheBlkMask; - - unsigned cacheBlkSize; - - Addr cacheBlkPC; - - /** The cache line being fetched. */ - uint8_t *cacheData; - - protected: - bool cacheBlkValid; - - private: - - // Align an address (typically a PC) to the start of an I-cache block. - // We fold in the PISA 64- to 32-bit conversion here as well. - Addr icacheBlockAlignPC(Addr addr) ++ void writeHint(Addr addr, int size, unsigned flags) + { - addr = TheISA::realPCToFetchPC(addr); - return (addr & ~(cacheBlkMask)); ++ // need to do this... + } + - unsigned instSize; ++ Fault copySrcTranslate(Addr src); + - // ROB tracking stuff. - DynInstPtr robHeadPtr; - DynInstPtr robTailPtr; - unsigned robSize; - unsigned robInsts; ++ Fault copy(Addr dest); + - // List of outstanding EA instructions. - protected: - EAList eaList; ++ InstSeqNum globalSeqNum; + + public: - void branchToTarget(Addr val) - { - if (!fetchRedirExcp) { - fetchRedirBranch = true; - PC = val; - } - } - - // ISA stuff: - uint64_t readUniq() { return xc->readUniq(); } - void setUniq(uint64_t val) { xc->setUniq(val); } ++ void squashFromXC(); + - uint64_t readFpcr() { return xc->readFpcr(); } - void setFpcr(uint64_t val) { xc->setFpcr(val); } ++ // @todo: This can be a useful debug function. Implement it. ++ void dumpInsts() { frontEnd->dumpInsts(); } + +#if FULL_SYSTEM - uint64_t readIpr(int idx, Fault &fault) { return xc->readIpr(idx, fault); } - Fault setIpr(int idx, uint64_t val) { return xc->setIpr(idx, val); } - Fault hwrei() { return xc->hwrei(); } - int readIntrFlag() { return xc->readIntrFlag(); } - void setIntrFlag(int val) { xc->setIntrFlag(val); } - bool inPalMode() { return xc->inPalMode(); } - void trap(Fault fault) { fault->invoke(xc); } - bool simPalCheck(int palFunc) { return xc->simPalCheck(palFunc); } ++ Fault hwrei(); ++ int readIntrFlag() { return thread.regs.intrflag; } ++ void setIntrFlag(int val) { thread.regs.intrflag = val; } ++ bool inPalMode() { return AlphaISA::PcPAL(thread.PC); } ++ bool inPalMode(Addr pc) { return AlphaISA::PcPAL(pc); } ++ bool simPalCheck(int palFunc); ++ void processInterrupts(); +#else - void syscall() { xc->syscall(); } - #endif - - ExecContext *xcBase() { return xc; } - }; - - - // precise architected memory state accessor macros - template - template - Fault - OoOCPU::read(Addr addr, T &data, unsigned flags, DynInstPtr inst) - { - MemReqPtr readReq = new MemReq(); - readReq->xc = xc; - readReq->asid = 0; - readReq->data = new uint8_t[64]; - - readReq->reset(addr, sizeof(T), flags); - - // translate to physical address - This might be an ISA impl call - Fault fault = translateDataReadReq(readReq); - - // do functional access - if (fault == NoFault) - fault = xc->mem->read(readReq, data); - #if 0 - if (traceData) { - traceData->setAddr(addr); - if (fault == NoFault) - traceData->setData(data); - } ++ void syscall(); ++ void setSyscallReturn(SyscallReturn return_value, int tid); +#endif + - // if we have a cache, do cache access too - if (fault == NoFault && dcacheInterface) { - readReq->cmd = Read; - readReq->completionEvent = NULL; - readReq->time = curTick; - /*MemAccessResult result = */dcacheInterface->access(readReq); ++ ExecContext *xcBase() { return xcProxy; } + - if (dcacheInterface->doEvents()) { - readReq->completionEvent = new DCacheCompletionEvent(this, inst, - dcceIt); - } - } - - if (!dcacheInterface && (readReq->flags & UNCACHEABLE)) - recordEvent("Uncached Read"); - - return fault; - } - - template - template - Fault - OoOCPU::write(T data, Addr addr, unsigned flags, - uint64_t *res, DynInstPtr inst) - { - MemReqPtr writeReq = new MemReq(); - writeReq->xc = xc; - writeReq->asid = 0; - writeReq->data = new uint8_t[64]; ++ bool decoupledFrontEnd; ++ struct CommStruct { ++ InstSeqNum doneSeqNum; ++ InstSeqNum nonSpecSeqNum; ++ bool uncached; ++ unsigned lqIdx; + - #if 0 - if (traceData) { - traceData->setAddr(addr); - traceData->setData(data); - } - #endif - - writeReq->reset(addr, sizeof(T), flags); - - // translate to physical address - Fault fault = translateDataWriteReq(writeReq); - - // do functional access - if (fault == NoFault) - fault = xc->write(writeReq, data); - - if (fault == NoFault && dcacheInterface) { - writeReq->cmd = Write; - memcpy(writeReq->data,(uint8_t *)&data,writeReq->size); - writeReq->completionEvent = NULL; - writeReq->time = curTick; - /*MemAccessResult result = */dcacheInterface->access(writeReq); - - if (dcacheInterface->doEvents()) { - writeReq->completionEvent = new DCacheCompletionEvent(this, inst, - dcceIt); - } - } - - if (res && (fault == NoFault)) - *res = writeReq->result; ++ bool stall; ++ }; ++ TimeBuffer comm; + - if (!dcacheInterface && (writeReq->flags & UNCACHEABLE)) - recordEvent("Uncached Write"); ++ bool lockFlag; + - return fault; - } ++ Stats::Scalar<> quiesceCycles; + ++ Checker *checker; ++}; + - #endif // __CPU_OOO_CPU_OOO_CPU_HH__ ++#endif // __CPU_OZONE_CPU_HH__ diff --cc src/cpu/ozone/cpu_impl.hh index e7ed3cfe0,000000000..5675da3a8 mode 100644,000000..100644 --- a/src/cpu/ozone/cpu_impl.hh +++ b/src/cpu/ozone/cpu_impl.hh @@@ -1,48 -1,0 +1,1049 @@@ +/* - * Copyright (c) 2005 The Regents of The University of Michigan ++ * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + - #ifndef __CPU_OOO_CPU_OOO_IMPL_HH__ - #define __CPU_OOO_CPU_OOO_IMPL_HH__ ++//#include ++//#include + - #include "arch/isa_traits.hh" ++#include "arch/isa_traits.hh" // For MachInst ++#include "base/trace.hh" ++#include "config/full_system.hh" ++#include "cpu/base.hh" ++#include "cpu/checker/exec_context.hh" ++#include "cpu/exec_context.hh" ++#include "cpu/exetrace.hh" ++#include "cpu/ozone/cpu.hh" ++#include "cpu/quiesce_event.hh" ++#include "cpu/static_inst.hh" ++//#include "mem/base_mem.hh" ++#include "mem/mem_interface.hh" ++#include "sim/sim_object.hh" ++#include "sim/stats.hh" ++ ++#if FULL_SYSTEM ++#include "arch/faults.hh" ++#include "arch/alpha/osfpal.hh" ++#include "arch/alpha/tlb.hh" ++#include "arch/vtophys.hh" ++#include "base/callback.hh" ++//#include "base/remote_gdb.hh" ++#include "cpu/profile.hh" ++#include "kern/kernel_stats.hh" ++#include "mem/functional/memory_control.hh" ++#include "mem/functional/physical.hh" ++#include "sim/faults.hh" ++#include "sim/sim_events.hh" ++#include "sim/sim_exit.hh" ++#include "sim/system.hh" ++#else // !FULL_SYSTEM ++#include "mem/functional/functional.hh" ++#include "sim/process.hh" ++#endif // FULL_SYSTEM ++ ++using namespace TheISA; ++ ++template ++template ++void ++OzoneCPU::trace_data(T data) { ++ if (traceData) { ++ traceData->setData(data); ++ } ++} ++ ++template ++OzoneCPU::TickEvent::TickEvent(OzoneCPU *c, int w) ++ : Event(&mainEventQueue, CPU_Tick_Pri), cpu(c), width(w) ++{ ++} ++ ++template ++void ++OzoneCPU::TickEvent::process() ++{ ++ cpu->tick(); ++} ++ ++template ++const char * ++OzoneCPU::TickEvent::description() ++{ ++ return "OzoneCPU tick event"; ++} ++ ++template ++OzoneCPU::OzoneCPU(Params *p) ++#if FULL_SYSTEM ++ : BaseCPU(p), thread(this, 0, p->mem), tickEvent(this, p->width), ++ mem(p->mem), ++#else ++ : BaseCPU(p), thread(this, 0, p->workload[0], 0), tickEvent(this, p->width), ++ mem(p->workload[0]->getMemory()), ++#endif ++ comm(5, 5) ++{ ++ frontEnd = new FrontEnd(p); ++ backEnd = new BackEnd(p); ++ ++ _status = Idle; ++ ++ if (p->checker) { ++ BaseCPU *temp_checker = p->checker; ++ checker = dynamic_cast *>(temp_checker); ++ checker->setMemory(mem); ++#if FULL_SYSTEM ++ checker->setSystem(p->system); ++#endif ++ checkerXC = new CheckerExecContext(&ozoneXC, checker); ++ thread.xcProxy = checkerXC; ++ xcProxy = checkerXC; ++ } else { ++ checker = NULL; ++ thread.xcProxy = &ozoneXC; ++ xcProxy = &ozoneXC; ++ } ++ ++ ozoneXC.cpu = this; ++ ozoneXC.thread = &thread; ++ ++ thread.inSyscall = false; ++ ++ thread.setStatus(ExecContext::Suspended); ++#if FULL_SYSTEM ++ /***** All thread state stuff *****/ ++ thread.cpu = this; ++ thread.tid = 0; ++ thread.mem = p->mem; ++ ++ thread.quiesceEvent = new EndQuiesceEvent(xcProxy); ++ ++ system = p->system; ++ itb = p->itb; ++ dtb = p->dtb; ++ memctrl = p->system->memctrl; ++ physmem = p->system->physmem; ++ ++ if (p->profile) { ++ thread.profile = new FunctionProfile(p->system->kernelSymtab); ++ // @todo: This might be better as an ExecContext instead of OzoneXC ++ Callback *cb = ++ new MakeCallback(&ozoneXC); ++ registerExitCallback(cb); ++ } ++ ++ // let's fill with a dummy node for now so we don't get a segfault ++ // on the first cycle when there's no node available. ++ static ProfileNode dummyNode; ++ thread.profileNode = &dummyNode; ++ thread.profilePC = 3; ++#else ++ thread.cpu = this; ++ thread.tid = 0; ++ thread.process = p->workload[0]; ++ thread.asid = 0; ++#endif // !FULL_SYSTEM ++ ++ numInst = 0; ++ startNumInst = 0; ++ ++ execContexts.push_back(xcProxy); ++ ++ frontEnd->setCPU(this); ++ backEnd->setCPU(this); ++ ++ frontEnd->setXC(xcProxy); ++ backEnd->setXC(xcProxy); ++ ++ frontEnd->setThreadState(&thread); ++ backEnd->setThreadState(&thread); ++ ++ frontEnd->setCommBuffer(&comm); ++ backEnd->setCommBuffer(&comm); ++ ++ frontEnd->setBackEnd(backEnd); ++ backEnd->setFrontEnd(frontEnd); ++ ++ decoupledFrontEnd = p->decoupledFrontEnd; ++ ++ globalSeqNum = 1; ++ ++ checkInterrupts = false; ++ ++ for (int i = 0; i < TheISA::TotalNumRegs; ++i) { ++ thread.renameTable[i] = new DynInst(this); ++ thread.renameTable[i]->setResultReady(); ++ } ++ ++ frontEnd->renameTable.copyFrom(thread.renameTable); ++ backEnd->renameTable.copyFrom(thread.renameTable); ++ ++#if !FULL_SYSTEM ++// pTable = p->pTable; ++#endif ++ ++ lockFlag = 0; ++ ++ DPRINTF(OzoneCPU, "OzoneCPU: Created Ozone cpu object.\n"); ++} ++ ++template ++OzoneCPU::~OzoneCPU() ++{ ++} ++ ++template ++void ++OzoneCPU::switchOut(Sampler *_sampler) ++{ ++ sampler = _sampler; ++ switchCount = 0; ++ // Front end needs state from back end, so switch out the back end first. ++ backEnd->switchOut(); ++ frontEnd->switchOut(); ++} ++ ++template ++void ++OzoneCPU::signalSwitched() ++{ ++ if (++switchCount == 2) { ++ backEnd->doSwitchOut(); ++ frontEnd->doSwitchOut(); ++ if (checker) ++ checker->switchOut(sampler); ++ _status = SwitchedOut; ++ if (tickEvent.scheduled()) ++ tickEvent.squash(); ++ sampler->signalSwitched(); ++ } ++ assert(switchCount <= 2); ++} ++ ++template ++void ++OzoneCPU::takeOverFrom(BaseCPU *oldCPU) ++{ ++ BaseCPU::takeOverFrom(oldCPU); ++ ++ backEnd->takeOverFrom(); ++ frontEnd->takeOverFrom(); ++ assert(!tickEvent.scheduled()); ++ ++ // @todo: Fix hardcoded number ++ // Clear out any old information in time buffer. ++ for (int i = 0; i < 6; ++i) { ++ comm.advance(); ++ } ++ ++ // if any of this CPU's ExecContexts are active, mark the CPU as ++ // running and schedule its tick event. ++ for (int i = 0; i < execContexts.size(); ++i) { ++ ExecContext *xc = execContexts[i]; ++ if (xc->status() == ExecContext::Active && ++ _status != Running) { ++ _status = Running; ++ tickEvent.schedule(curTick); ++ } ++ } ++ // Nothing running, change status to reflect that we're no longer ++ // switched out. ++ if (_status == SwitchedOut) { ++ _status = Idle; ++ } ++} ++ ++template ++void ++OzoneCPU::activateContext(int thread_num, int delay) ++{ ++ // Eventually change this in SMT. ++ assert(thread_num == 0); ++ ++ assert(_status == Idle); ++ notIdleFraction++; ++ scheduleTickEvent(delay); ++ _status = Running; ++ thread._status = ExecContext::Active; ++ frontEnd->wakeFromQuiesce(); ++} ++ ++template ++void ++OzoneCPU::suspendContext(int thread_num) ++{ ++ // Eventually change this in SMT. ++ assert(thread_num == 0); ++ // @todo: Figure out how to initially set the status properly so ++ // this is running. ++// assert(_status == Running); ++ notIdleFraction--; ++ unscheduleTickEvent(); ++ _status = Idle; ++} ++ ++template ++void ++OzoneCPU::deallocateContext(int thread_num) ++{ ++ // for now, these are equivalent ++ suspendContext(thread_num); ++} + +template - class OoOCPU; ++void ++OzoneCPU::haltContext(int thread_num) ++{ ++ // for now, these are equivalent ++ suspendContext(thread_num); ++} ++ ++template ++void ++OzoneCPU::regStats() ++{ ++ using namespace Stats; ++ ++ BaseCPU::regStats(); ++ ++ thread.numInsts ++ .name(name() + ".num_insts") ++ .desc("Number of instructions executed") ++ ; ++ ++ thread.numMemRefs ++ .name(name() + ".num_refs") ++ .desc("Number of memory references") ++ ; ++ ++ notIdleFraction ++ .name(name() + ".not_idle_fraction") ++ .desc("Percentage of non-idle cycles") ++ ; ++ ++ idleFraction ++ .name(name() + ".idle_fraction") ++ .desc("Percentage of idle cycles") ++ ; ++ ++ quiesceCycles ++ .name(name() + ".quiesce_cycles") ++ .desc("Number of cycles spent in quiesce") ++ ; ++ ++ idleFraction = constant(1.0) - notIdleFraction; ++ ++ frontEnd->regStats(); ++ backEnd->regStats(); ++} ++ ++template ++void ++OzoneCPU::resetStats() ++{ ++ startNumInst = numInst; ++ notIdleFraction = (_status != Idle); ++} ++ ++template ++void ++OzoneCPU::init() ++{ ++ BaseCPU::init(); ++ ++ // Mark this as in syscall so it won't need to squash ++ thread.inSyscall = true; ++#if FULL_SYSTEM ++ for (int i = 0; i < execContexts.size(); ++i) { ++ ExecContext *xc = execContexts[i]; ++ ++ // initialize CPU, including PC ++ TheISA::initCPU(xc, xc->readCpuId()); ++ } ++#endif ++ frontEnd->renameTable.copyFrom(thread.renameTable); ++ backEnd->renameTable.copyFrom(thread.renameTable); ++ ++ thread.inSyscall = false; ++} ++ ++template ++void ++OzoneCPU::serialize(std::ostream &os) ++{ ++ BaseCPU::serialize(os); ++ SERIALIZE_ENUM(_status); ++ nameOut(os, csprintf("%s.xc", name())); ++ ozoneXC.serialize(os); ++ nameOut(os, csprintf("%s.tickEvent", name())); ++ tickEvent.serialize(os); ++} ++ ++template ++void ++OzoneCPU::unserialize(Checkpoint *cp, const std::string §ion) ++{ ++ BaseCPU::unserialize(cp, section); ++ UNSERIALIZE_ENUM(_status); ++ ozoneXC.unserialize(cp, csprintf("%s.xc", section)); ++ tickEvent.unserialize(cp, csprintf("%s.tickEvent", section)); ++} ++ ++template ++Fault ++OzoneCPU::copySrcTranslate(Addr src) ++{ ++ panic("Copy not implemented!\n"); ++ return NoFault; ++#if 0 ++ static bool no_warn = true; ++ int blk_size = (dcacheInterface) ? dcacheInterface->getBlockSize() : 64; ++ // Only support block sizes of 64 atm. ++ assert(blk_size == 64); ++ int offset = src & (blk_size - 1); ++ ++ // Make sure block doesn't span page ++ if (no_warn && ++ (src & TheISA::PageMask) != ((src + blk_size) & TheISA::PageMask) && ++ (src >> 40) != 0xfffffc) { ++ warn("Copied block source spans pages %x.", src); ++ no_warn = false; ++ } ++ ++ memReq->reset(src & ~(blk_size - 1), blk_size); ++ ++ // translate to physical address ++ Fault fault = xc->translateDataReadReq(memReq); ++ ++ assert(fault != Alignment_Fault); ++ ++ if (fault == NoFault) { ++ xc->copySrcAddr = src; ++ xc->copySrcPhysAddr = memReq->paddr + offset; ++ } else { ++ xc->copySrcAddr = 0; ++ xc->copySrcPhysAddr = 0; ++ } ++ return fault; ++#endif ++} ++ ++template ++Fault ++OzoneCPU::copy(Addr dest) ++{ ++ panic("Copy not implemented!\n"); ++ return NoFault; ++#if 0 ++ static bool no_warn = true; ++ int blk_size = (dcacheInterface) ? dcacheInterface->getBlockSize() : 64; ++ // Only support block sizes of 64 atm. ++ assert(blk_size == 64); ++ uint8_t data[blk_size]; ++ //assert(xc->copySrcAddr); ++ int offset = dest & (blk_size - 1); ++ ++ // Make sure block doesn't span page ++ if (no_warn && ++ (dest & TheISA::PageMask) != ((dest + blk_size) & TheISA::PageMask) && ++ (dest >> 40) != 0xfffffc) { ++ no_warn = false; ++ warn("Copied block destination spans pages %x. ", dest); ++ } ++ ++ memReq->reset(dest & ~(blk_size -1), blk_size); ++ // translate to physical address ++ Fault fault = xc->translateDataWriteReq(memReq); ++ ++ assert(fault != Alignment_Fault); ++ ++ if (fault == NoFault) { ++ Addr dest_addr = memReq->paddr + offset; ++ // Need to read straight from memory since we have more than 8 bytes. ++ memReq->paddr = xc->copySrcPhysAddr; ++ xc->mem->read(memReq, data); ++ memReq->paddr = dest_addr; ++ xc->mem->write(memReq, data); ++ if (dcacheInterface) { ++ memReq->cmd = Copy; ++ memReq->completionEvent = NULL; ++ memReq->paddr = xc->copySrcPhysAddr; ++ memReq->dest = dest_addr; ++ memReq->size = 64; ++ memReq->time = curTick; ++ dcacheInterface->access(memReq); ++ } ++ } ++ return fault; ++#endif ++} ++ ++#if FULL_SYSTEM ++template ++Addr ++OzoneCPU::dbg_vtophys(Addr addr) ++{ ++ return vtophys(xcProxy, addr); ++} ++#endif // FULL_SYSTEM ++ ++#if FULL_SYSTEM ++template ++void ++OzoneCPU::post_interrupt(int int_num, int index) ++{ ++ BaseCPU::post_interrupt(int_num, index); ++ ++ if (_status == Idle) { ++ DPRINTF(IPI,"Suspended Processor awoke\n"); ++// thread.activate(); ++ // Hack for now. Otherwise might have to go through the xcProxy, or ++ // I need to figure out what's the right thing to call. ++ activateContext(thread.tid, 1); ++ } ++} ++#endif // FULL_SYSTEM ++ ++/* start simulation, program loaded, processor precise state initialized */ ++template ++void ++OzoneCPU::tick() ++{ ++ DPRINTF(OzoneCPU, "\n\nOzoneCPU: Ticking cpu.\n"); ++ ++ _status = Running; ++ thread.renameTable[ZeroReg]->setIntResult(0); ++ thread.renameTable[ZeroReg+TheISA::FP_Base_DepTag]-> ++ setDoubleResult(0.0); ++ ++ comm.advance(); ++ frontEnd->tick(); ++ backEnd->tick(); ++ ++ // check for instruction-count-based events ++ comInstEventQueue[0]->serviceEvents(numInst); ++ ++ if (!tickEvent.scheduled() && _status == Running) ++ tickEvent.schedule(curTick + cycles(1)); ++} ++ ++template ++void ++OzoneCPU::squashFromXC() ++{ ++ thread.inSyscall = true; ++ backEnd->generateXCEvent(); ++} ++ ++#if !FULL_SYSTEM ++template ++void ++OzoneCPU::syscall() ++{ ++ // Not sure this copy is needed, depending on how the XC proxy is made. ++ thread.renameTable.copyFrom(backEnd->renameTable); ++ ++ thread.inSyscall = true; ++ ++ thread.funcExeInst++; ++ ++ DPRINTF(OzoneCPU, "FuncExeInst: %i\n", thread.funcExeInst); ++ ++ thread.process->syscall(xcProxy); ++ ++ thread.funcExeInst--; ++ ++ thread.inSyscall = false; ++ ++ frontEnd->renameTable.copyFrom(thread.renameTable); ++ backEnd->renameTable.copyFrom(thread.renameTable); ++} ++ ++template ++void ++OzoneCPU::setSyscallReturn(SyscallReturn return_value, int tid) ++{ ++ // check for error condition. Alpha syscall convention is to ++ // indicate success/failure in reg a3 (r19) and put the ++ // return value itself in the standard return value reg (v0). ++ if (return_value.successful()) { ++ // no error ++ thread.renameTable[SyscallSuccessReg]->setIntResult(0); ++ thread.renameTable[ReturnValueReg]->setIntResult( ++ return_value.value()); ++ } else { ++ // got an error, return details ++ thread.renameTable[SyscallSuccessReg]->setIntResult((IntReg) -1); ++ thread.renameTable[ReturnValueReg]->setIntResult( ++ -return_value.value()); ++ } ++} ++#else ++template ++Fault ++OzoneCPU::hwrei() ++{ ++ // Need to move this to ISA code ++ // May also need to make this per thread ++ ++ lockFlag = false; ++ lockAddrList.clear(); ++ thread.kernelStats->hwrei(); ++ ++ checkInterrupts = true; ++ ++ // FIXME: XXX check for interrupts? XXX ++ return NoFault; ++} ++ ++template ++void ++OzoneCPU::processInterrupts() ++{ ++ // Check for interrupts here. For now can copy the code that ++ // exists within isa_fullsys_traits.hh. Also assume that thread 0 ++ // is the one that handles the interrupts. ++ ++ // Check if there are any outstanding interrupts ++ //Handle the interrupts ++ int ipl = 0; ++ int summary = 0; ++ ++ checkInterrupts = false; ++ ++ if (thread.readMiscReg(IPR_ASTRR)) ++ panic("asynchronous traps not implemented\n"); ++ ++ if (thread.readMiscReg(IPR_SIRR)) { ++ for (int i = INTLEVEL_SOFTWARE_MIN; ++ i < INTLEVEL_SOFTWARE_MAX; i++) { ++ if (thread.readMiscReg(IPR_SIRR) & (ULL(1) << i)) { ++ // See table 4-19 of the 21164 hardware reference ++ ipl = (i - INTLEVEL_SOFTWARE_MIN) + 1; ++ summary |= (ULL(1) << i); ++ } ++ } ++ } ++ ++ uint64_t interrupts = intr_status(); ++ ++ if (interrupts) { ++ for (int i = INTLEVEL_EXTERNAL_MIN; ++ i < INTLEVEL_EXTERNAL_MAX; i++) { ++ if (interrupts & (ULL(1) << i)) { ++ // See table 4-19 of the 21164 hardware reference ++ ipl = i; ++ summary |= (ULL(1) << i); ++ } ++ } ++ } ++ ++ if (ipl && ipl > thread.readMiscReg(IPR_IPLR)) { ++ thread.setMiscReg(IPR_ISR, summary); ++ thread.setMiscReg(IPR_INTID, ipl); ++ // @todo: Make this more transparent ++ if (checker) { ++ checker->cpuXCBase()->setMiscReg(IPR_ISR, summary); ++ checker->cpuXCBase()->setMiscReg(IPR_INTID, ipl); ++ } ++ Fault fault = new InterruptFault; ++ fault->invoke(thread.getXCProxy()); ++ DPRINTF(Flow, "Interrupt! IPLR=%d ipl=%d summary=%x\n", ++ thread.readMiscReg(IPR_IPLR), ipl, summary); ++ } ++} ++ ++template ++bool ++OzoneCPU::simPalCheck(int palFunc) ++{ ++ // Need to move this to ISA code ++ // May also need to make this per thread ++ thread.kernelStats->callpal(palFunc, xcProxy); ++ ++ switch (palFunc) { ++ case PAL::halt: ++ haltContext(thread.tid); ++ if (--System::numSystemsRunning == 0) ++ new SimExitEvent("all cpus halted"); ++ break; ++ ++ case PAL::bpt: ++ case PAL::bugchk: ++ if (system->breakpoint()) ++ return false; ++ break; ++ } ++ ++ return true; ++} ++#endif ++ ++template ++BaseCPU * ++OzoneCPU::OzoneXC::getCpuPtr() ++{ ++ return cpu; ++} ++ ++template ++void ++OzoneCPU::OzoneXC::setCpuId(int id) ++{ ++ cpu->cpuId = id; ++ thread->cpuId = id; ++} ++ ++template ++void ++OzoneCPU::OzoneXC::setStatus(Status new_status) ++{ ++ thread->_status = new_status; ++} ++ ++template ++void ++OzoneCPU::OzoneXC::activate(int delay) ++{ ++ cpu->activateContext(thread->tid, delay); ++} ++ ++/// Set the status to Suspended. ++template ++void ++OzoneCPU::OzoneXC::suspend() ++{ ++ cpu->suspendContext(thread->tid); ++} ++ ++/// Set the status to Unallocated. ++template ++void ++OzoneCPU::OzoneXC::deallocate() ++{ ++ cpu->deallocateContext(thread->tid); ++} ++ ++/// Set the status to Halted. ++template ++void ++OzoneCPU::OzoneXC::halt() ++{ ++ cpu->haltContext(thread->tid); ++} ++ ++#if FULL_SYSTEM ++template ++void ++OzoneCPU::OzoneXC::dumpFuncProfile() ++{ } ++#endif ++ ++template ++void ++OzoneCPU::OzoneXC::takeOverFrom(ExecContext *old_context) ++{ ++ // some things should already be set up ++ assert(getMemPtr() == old_context->getMemPtr()); ++#if FULL_SYSTEM ++ assert(getSystemPtr() == old_context->getSystemPtr()); ++#else ++ assert(getProcessPtr() == old_context->getProcessPtr()); ++#endif ++ ++ // copy over functional state ++ setStatus(old_context->status()); ++ copyArchRegs(old_context); ++ setCpuId(old_context->readCpuId()); ++ ++#if !FULL_SYSTEM ++ setFuncExeInst(old_context->readFuncExeInst()); ++#else ++ EndQuiesceEvent *other_quiesce = old_context->getQuiesceEvent(); ++ if (other_quiesce) { ++ // Point the quiesce event's XC at this XC so that it wakes up ++ // the proper CPU. ++ other_quiesce->xc = this; ++ } ++ if (thread->quiesceEvent) { ++ thread->quiesceEvent->xc = this; ++ } ++ ++ thread->kernelStats = old_context->getKernelStats(); ++// storeCondFailures = 0; ++ cpu->lockFlag = false; ++#endif ++ ++ old_context->setStatus(ExecContext::Unallocated); ++} ++ ++template ++void ++OzoneCPU::OzoneXC::regStats(const std::string &name) ++{ ++#if FULL_SYSTEM ++ thread->kernelStats = new Kernel::Statistics(cpu->system); ++ thread->kernelStats->regStats(name + ".kern"); ++#endif ++} ++ ++template ++void ++OzoneCPU::OzoneXC::serialize(std::ostream &os) ++{ } ++ ++template ++void ++OzoneCPU::OzoneXC::unserialize(Checkpoint *cp, const std::string §ion) ++{ } ++ ++#if FULL_SYSTEM ++template ++EndQuiesceEvent * ++OzoneCPU::OzoneXC::getQuiesceEvent() ++{ ++ return thread->quiesceEvent; ++} ++ ++template ++Tick ++OzoneCPU::OzoneXC::readLastActivate() ++{ ++ return thread->lastActivate; ++} ++ ++template ++Tick ++OzoneCPU::OzoneXC::readLastSuspend() ++{ ++ return thread->lastSuspend; ++} ++ ++template ++void ++OzoneCPU::OzoneXC::profileClear() ++{ ++ if (thread->profile) ++ thread->profile->clear(); ++} ++ ++template ++void ++OzoneCPU::OzoneXC::profileSample() ++{ ++ if (thread->profile) ++ thread->profile->sample(thread->profileNode, thread->profilePC); ++} ++#endif ++ ++template ++int ++OzoneCPU::OzoneXC::getThreadNum() ++{ ++ return thread->tid; ++} ++ ++// Also somewhat obnoxious. Really only used for the TLB fault. ++template ++TheISA::MachInst ++OzoneCPU::OzoneXC::getInst() ++{ ++ return thread->inst; ++} ++ ++template ++void ++OzoneCPU::OzoneXC::copyArchRegs(ExecContext *xc) ++{ ++ thread->PC = xc->readPC(); ++ thread->nextPC = xc->readNextPC(); ++ ++ cpu->frontEnd->setPC(thread->PC); ++ cpu->frontEnd->setNextPC(thread->nextPC); ++ ++ for (int i = 0; i < TheISA::TotalNumRegs; ++i) { ++ if (i < TheISA::FP_Base_DepTag) { ++ thread->renameTable[i]->setIntResult(xc->readIntReg(i)); ++ } else if (i < (TheISA::FP_Base_DepTag + TheISA::NumFloatRegs)) { ++ int fp_idx = i - TheISA::FP_Base_DepTag; ++ thread->renameTable[i]->setDoubleResult( ++ xc->readFloatRegDouble(fp_idx)); ++ } ++ } ++ ++#if !FULL_SYSTEM ++ thread->funcExeInst = xc->readFuncExeInst(); ++#endif ++ ++ // Need to copy the XC values into the current rename table, ++ // copy the misc regs. ++ thread->regs.miscRegs.copyMiscRegs(xc); ++} ++ ++template ++void ++OzoneCPU::OzoneXC::clearArchRegs() ++{ ++ panic("Unimplemented!"); ++} ++ ++template ++uint64_t ++OzoneCPU::OzoneXC::readIntReg(int reg_idx) ++{ ++ return thread->renameTable[reg_idx]->readIntResult(); ++} ++ ++template ++float ++OzoneCPU::OzoneXC::readFloatRegSingle(int reg_idx) ++{ ++ int idx = reg_idx + TheISA::FP_Base_DepTag; ++ return thread->renameTable[idx]->readFloatResult(); ++} ++ ++template ++double ++OzoneCPU::OzoneXC::readFloatRegDouble(int reg_idx) ++{ ++ int idx = reg_idx + TheISA::FP_Base_DepTag; ++ return thread->renameTable[idx]->readDoubleResult(); ++} ++ ++template ++uint64_t ++OzoneCPU::OzoneXC::readFloatRegInt(int reg_idx) ++{ ++ int idx = reg_idx + TheISA::FP_Base_DepTag; ++ return thread->renameTable[idx]->readIntResult(); ++} ++ ++template ++void ++OzoneCPU::OzoneXC::setIntReg(int reg_idx, uint64_t val) ++{ ++ thread->renameTable[reg_idx]->setIntResult(val); ++ ++ if (!thread->inSyscall) { ++ cpu->squashFromXC(); ++ } ++} ++ ++template ++void ++OzoneCPU::OzoneXC::setFloatRegSingle(int reg_idx, float val) ++{ ++ panic("Unimplemented!"); ++} ++ ++template ++void ++OzoneCPU::OzoneXC::setFloatRegDouble(int reg_idx, double val) ++{ ++ int idx = reg_idx + TheISA::FP_Base_DepTag; ++ ++ thread->renameTable[idx]->setDoubleResult(val); ++ ++ if (!thread->inSyscall) { ++ cpu->squashFromXC(); ++ } ++} ++ ++template ++void ++OzoneCPU::OzoneXC::setFloatRegInt(int reg_idx, uint64_t val) ++{ ++ panic("Unimplemented!"); ++} ++ ++template ++void ++OzoneCPU::OzoneXC::setPC(Addr val) ++{ ++ thread->PC = val; ++ cpu->frontEnd->setPC(val); ++ ++ if (!thread->inSyscall) { ++ cpu->squashFromXC(); ++ } ++} ++ ++template ++void ++OzoneCPU::OzoneXC::setNextPC(Addr val) ++{ ++ thread->nextPC = val; ++ cpu->frontEnd->setNextPC(val); ++ ++ if (!thread->inSyscall) { ++ cpu->squashFromXC(); ++ } ++} ++ ++template ++TheISA::MiscReg ++OzoneCPU::OzoneXC::readMiscReg(int misc_reg) ++{ ++ return thread->regs.miscRegs.readReg(misc_reg); ++} ++ ++template ++TheISA::MiscReg ++OzoneCPU::OzoneXC::readMiscRegWithEffect(int misc_reg, Fault &fault) ++{ ++ return thread->regs.miscRegs.readRegWithEffect(misc_reg, ++ fault, this); ++} ++ ++template ++Fault ++OzoneCPU::OzoneXC::setMiscReg(int misc_reg, const MiscReg &val) ++{ ++ // Needs to setup a squash event unless we're in syscall mode ++ Fault ret_fault = thread->regs.miscRegs.setReg(misc_reg, val); ++ ++ if (!thread->inSyscall) { ++ cpu->squashFromXC(); ++ } ++ ++ return ret_fault; ++} + +template - class OoODynInst; ++Fault ++OzoneCPU::OzoneXC::setMiscRegWithEffect(int misc_reg, const MiscReg &val) ++{ ++ // Needs to setup a squash event unless we're in syscall mode ++ Fault ret_fault = thread->regs.miscRegs.setRegWithEffect(misc_reg, val, ++ this); + - struct OoOImpl { - typedef AlphaISA ISA; - typedef OoOCPU OoOCPU; - typedef OoOCPU FullCPU; - typedef OoODynInst DynInst; - typedef RefCountingPtr DynInstPtr; - }; ++ if (!thread->inSyscall) { ++ cpu->squashFromXC(); ++ } + - #endif // __CPU_OOO_CPU_OOO_IMPL_HH__ ++ return ret_fault; ++} diff --cc src/cpu/static_inst.hh index 33c9144fb,000000000..803b7a09b mode 100644,000000..100644 --- a/src/cpu/static_inst.hh +++ b/src/cpu/static_inst.hh @@@ -1,475 -1,0 +1,490 @@@ +/* + * Copyright (c) 2003-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_STATIC_INST_HH__ +#define __CPU_STATIC_INST_HH__ + +#include +#include + +#include "base/hashmap.hh" +#include "base/misc.hh" +#include "base/refcnt.hh" +#include "cpu/op_class.hh" +#include "sim/host.hh" +#include "arch/isa_traits.hh" + +// forward declarations +struct AlphaSimpleImpl; ++struct OzoneImpl; ++struct SimpleImpl; +class ExecContext; +class DynInst; +class Packet; + +template +class AlphaDynInst; + ++template ++class OzoneDynInst; ++ ++class CheckerCPU; +class FastCPU; +class AtomicSimpleCPU; +class TimingSimpleCPU; +class InorderCPU; +class SymbolTable; + +namespace Trace { + class InstRecord; +} + +/** + * Base, ISA-independent static instruction class. + * + * The main component of this class is the vector of flags and the + * associated methods for reading them. Any object that can rely + * solely on these flags can process instructions without being + * recompiled for multiple ISAs. + */ +class StaticInstBase : public RefCounted +{ + protected: + + /// Set of boolean static instruction properties. + /// + /// Notes: + /// - The IsInteger and IsFloating flags are based on the class of + /// registers accessed by the instruction. Although most + /// instructions will have exactly one of these two flags set, it + /// is possible for an instruction to have neither (e.g., direct + /// unconditional branches, memory barriers) or both (e.g., an + /// FP/int conversion). + /// - If IsMemRef is set, then exactly one of IsLoad or IsStore + /// will be set. + /// - If IsControl is set, then exactly one of IsDirectControl or + /// IsIndirect Control will be set, and exactly one of + /// IsCondControl or IsUncondControl will be set. + /// - IsSerializing, IsMemBarrier, and IsWriteBarrier are + /// implemented as flags since in the current model there's no + /// other way for instructions to inject behavior into the + /// pipeline outside of fetch. Once we go to an exec-in-exec CPU + /// model we should be able to get rid of these flags and + /// implement this behavior via the execute() methods. + /// + enum Flags { + IsNop, ///< Is a no-op (no effect at all). + + IsInteger, ///< References integer regs. + IsFloating, ///< References FP regs. + + IsMemRef, ///< References memory (load, store, or prefetch). + IsLoad, ///< Reads from memory (load or prefetch). + IsStore, ///< Writes to memory. ++ IsStoreConditional, ///< Store conditional instruction. + IsInstPrefetch, ///< Instruction-cache prefetch. + IsDataPrefetch, ///< Data-cache prefetch. + IsCopy, ///< Fast Cache block copy + + IsControl, ///< Control transfer instruction. + IsDirectControl, ///< PC relative control transfer. + IsIndirectControl, ///< Register indirect control transfer. + IsCondControl, ///< Conditional control transfer. + IsUncondControl, ///< Unconditional control transfer. + IsCall, ///< Subroutine call. + IsReturn, ///< Subroutine return. + + IsCondDelaySlot,///< Conditional Delay-Slot Instruction + + IsThreadSync, ///< Thread synchronization operation. + + IsSerializing, ///< Serializes pipeline: won't execute until all + /// older instructions have committed. + IsSerializeBefore, + IsSerializeAfter, + IsMemBarrier, ///< Is a memory barrier + IsWriteBarrier, ///< Is a write barrier + + IsNonSpeculative, ///< Should not be executed speculatively ++ IsQuiesce, ///< Is a quiesce instruction ++ ++ IsIprAccess, ///< Accesses IPRs ++ IsUnverifiable, ///< Can't be verified by a checker + + NumFlags + }; + + /// Flag values for this instruction. + std::bitset flags; + + /// See opClass(). + OpClass _opClass; + + /// See numSrcRegs(). + int8_t _numSrcRegs; + + /// See numDestRegs(). + int8_t _numDestRegs; + + /// The following are used to track physical register usage + /// for machines with separate int & FP reg files. + //@{ + int8_t _numFPDestRegs; + int8_t _numIntDestRegs; + //@} + + /// Constructor. + /// It's important to initialize everything here to a sane + /// default, since the decoder generally only overrides + /// the fields that are meaningful for the particular + /// instruction. + StaticInstBase(OpClass __opClass) + : _opClass(__opClass), _numSrcRegs(0), _numDestRegs(0), + _numFPDestRegs(0), _numIntDestRegs(0) + { + } + + public: + + /// @name Register information. + /// The sum of numFPDestRegs() and numIntDestRegs() equals + /// numDestRegs(). The former two functions are used to track + /// physical register usage for machines with separate int & FP + /// reg files. + //@{ + /// Number of source registers. + int8_t numSrcRegs() const { return _numSrcRegs; } + /// Number of destination registers. + int8_t numDestRegs() const { return _numDestRegs; } + /// Number of floating-point destination regs. + int8_t numFPDestRegs() const { return _numFPDestRegs; } + /// Number of integer destination regs. + int8_t numIntDestRegs() const { return _numIntDestRegs; } + //@} + + /// @name Flag accessors. + /// These functions are used to access the values of the various + /// instruction property flags. See StaticInstBase::Flags for descriptions + /// of the individual flags. + //@{ + + bool isNop() const { return flags[IsNop]; } + + bool isMemRef() const { return flags[IsMemRef]; } + bool isLoad() const { return flags[IsLoad]; } + bool isStore() const { return flags[IsStore]; } ++ bool isStoreConditional() const { return flags[IsStoreConditional]; } + bool isInstPrefetch() const { return flags[IsInstPrefetch]; } + bool isDataPrefetch() const { return flags[IsDataPrefetch]; } + bool isCopy() const { return flags[IsCopy];} + + bool isInteger() const { return flags[IsInteger]; } + bool isFloating() const { return flags[IsFloating]; } + + bool isControl() const { return flags[IsControl]; } + bool isCall() const { return flags[IsCall]; } + bool isReturn() const { return flags[IsReturn]; } + bool isDirectCtrl() const { return flags[IsDirectControl]; } + bool isIndirectCtrl() const { return flags[IsIndirectControl]; } + bool isCondCtrl() const { return flags[IsCondControl]; } + bool isUncondCtrl() const { return flags[IsUncondControl]; } + + bool isThreadSync() const { return flags[IsThreadSync]; } + bool isSerializing() const { return flags[IsSerializing] || + flags[IsSerializeBefore] || + flags[IsSerializeAfter]; } + bool isSerializeBefore() const { return flags[IsSerializeBefore]; } + bool isSerializeAfter() const { return flags[IsSerializeAfter]; } + bool isMemBarrier() const { return flags[IsMemBarrier]; } + bool isWriteBarrier() const { return flags[IsWriteBarrier]; } + bool isNonSpeculative() const { return flags[IsNonSpeculative]; } ++ bool isQuiesce() const { return flags[IsQuiesce]; } ++ bool isIprAccess() const { return flags[IsIprAccess]; } ++ bool isUnverifiable() const { return flags[IsUnverifiable]; } + //@} + + /// Operation class. Used to select appropriate function unit in issue. + OpClass opClass() const { return _opClass; } +}; + + +// forward declaration +class StaticInstPtr; + +/** + * Generic yet ISA-dependent static instruction class. + * + * This class builds on StaticInstBase, defining fields and interfaces + * that are generic across all ISAs but that differ in details + * according to the specific ISA being used. + */ +class StaticInst : public StaticInstBase +{ + public: + + /// Binary machine instruction type. + typedef TheISA::MachInst MachInst; + /// Binary extended machine instruction type. + typedef TheISA::ExtMachInst ExtMachInst; + /// Logical register index type. + typedef TheISA::RegIndex RegIndex; + + enum { + MaxInstSrcRegs = TheISA::MaxInstSrcRegs, //< Max source regs + MaxInstDestRegs = TheISA::MaxInstDestRegs, //< Max dest regs + }; + + + /// Return logical index (architectural reg num) of i'th destination reg. + /// Only the entries from 0 through numDestRegs()-1 are valid. + RegIndex destRegIdx(int i) const { return _destRegIdx[i]; } + + /// Return logical index (architectural reg num) of i'th source reg. + /// Only the entries from 0 through numSrcRegs()-1 are valid. + RegIndex srcRegIdx(int i) const { return _srcRegIdx[i]; } + + /// Pointer to a statically allocated "null" instruction object. + /// Used to give eaCompInst() and memAccInst() something to return + /// when called on non-memory instructions. + static StaticInstPtr nullStaticInstPtr; + + /** + * Memory references only: returns "fake" instruction representing + * the effective address part of the memory operation. Used to + * obtain the dependence info (numSrcRegs and srcRegIdx[]) for + * just the EA computation. + */ + virtual const + StaticInstPtr &eaCompInst() const { return nullStaticInstPtr; } + + /** + * Memory references only: returns "fake" instruction representing + * the memory access part of the memory operation. Used to + * obtain the dependence info (numSrcRegs and srcRegIdx[]) for + * just the memory access (not the EA computation). + */ + virtual const + StaticInstPtr &memAccInst() const { return nullStaticInstPtr; } + + /// The binary machine instruction. + const ExtMachInst machInst; + + protected: + + /// See destRegIdx(). + RegIndex _destRegIdx[MaxInstDestRegs]; + /// See srcRegIdx(). + RegIndex _srcRegIdx[MaxInstSrcRegs]; + + /** + * Base mnemonic (e.g., "add"). Used by generateDisassembly() + * methods. Also useful to readily identify instructions from + * within the debugger when #cachedDisassembly has not been + * initialized. + */ + const char *mnemonic; + + /** + * String representation of disassembly (lazily evaluated via + * disassemble()). + */ + mutable std::string *cachedDisassembly; + + /** + * Internal function to generate disassembly string. + */ + virtual std::string + generateDisassembly(Addr pc, const SymbolTable *symtab) const = 0; + + /// Constructor. + StaticInst(const char *_mnemonic, ExtMachInst _machInst, OpClass __opClass) + : StaticInstBase(__opClass), + machInst(_machInst), mnemonic(_mnemonic), cachedDisassembly(0) + { + } + + public: + + virtual ~StaticInst() + { + if (cachedDisassembly) + delete cachedDisassembly; + } + +/** + * The execute() signatures are auto-generated by scons based on the + * set of CPU models we are compiling in today. + */ +#include "cpu/static_inst_exec_sigs.hh" + + /** + * Return the target address for a PC-relative branch. + * Invalid if not a PC-relative branch (i.e. isDirectCtrl() + * should be true). + */ + virtual Addr branchTarget(Addr branchPC) const + { + panic("StaticInst::branchTarget() called on instruction " + "that is not a PC-relative branch."); + } + + /** + * Return the target address for an indirect branch (jump). The + * register value is read from the supplied execution context, so + * the result is valid only if the execution context is about to + * execute the branch in question. Invalid if not an indirect + * branch (i.e. isIndirectCtrl() should be true). + */ + virtual Addr branchTarget(ExecContext *xc) const + { + panic("StaticInst::branchTarget() called on instruction " + "that is not an indirect branch."); + } + + /** + * Return true if the instruction is a control transfer, and if so, + * return the target address as well. + */ + bool hasBranchTarget(Addr pc, ExecContext *xc, Addr &tgt) const; + + /** + * Return string representation of disassembled instruction. + * The default version of this function will call the internal + * virtual generateDisassembly() function to get the string, + * then cache it in #cachedDisassembly. If the disassembly + * should not be cached, this function should be overridden directly. + */ + virtual const std::string &disassemble(Addr pc, + const SymbolTable *symtab = 0) const + { + if (!cachedDisassembly) + cachedDisassembly = + new std::string(generateDisassembly(pc, symtab)); + + return *cachedDisassembly; + } + + /// Decoded instruction cache type. + /// For now we're using a generic hash_map; this seems to work + /// pretty well. + typedef m5::hash_map DecodeCache; + + /// A cache of decoded instruction objects. + static DecodeCache decodeCache; + + /** + * Dump some basic stats on the decode cache hash map. + * Only gets called if DECODE_CACHE_HASH_STATS is defined. + */ + static void dumpDecodeCacheStats(); + + /// Decode a machine instruction. + /// @param mach_inst The binary instruction to decode. + /// @retval A pointer to the corresponding StaticInst object. + //This is defined as inline below. + static StaticInstPtr decode(ExtMachInst mach_inst); + + //MIPS Decoder Debug Functions + int getOpcode() { return (machInst & 0xFC000000) >> 26 ; }//31..26 + int getRs() { return (machInst & 0x03E00000) >> 21; } //25...21 + int getRt() { return (machInst & 0x001F0000) >> 16; } //20...16 + int getRd() { return (machInst & 0x0000F800) >> 11; } //15...11 + int getImm() { return (machInst & 0x0000FFFF); } //15...0 + int getFunction(){ return (machInst & 0x0000003F); }//5...0 + int getBranch(){ return (machInst & 0x0000FFFF); }//15...0 + int getJump(){ return (machInst & 0x03FFFFFF); }//5...0 + int getHint(){ return (machInst & 0x000007C0) >> 6; } //10...6 + std::string getName() { return mnemonic; } +}; + +typedef RefCountingPtr StaticInstBasePtr; + +/// Reference-counted pointer to a StaticInst object. +/// This type should be used instead of "StaticInst *" so that +/// StaticInst objects can be properly reference-counted. +class StaticInstPtr : public RefCountingPtr +{ + public: + /// Constructor. + StaticInstPtr() + : RefCountingPtr() + { + } + + /// Conversion from "StaticInst *". + StaticInstPtr(StaticInst *p) + : RefCountingPtr(p) + { + } + + /// Copy constructor. + StaticInstPtr(const StaticInstPtr &r) + : RefCountingPtr(r) + { + } + + /// Construct directly from machine instruction. + /// Calls StaticInst::decode(). + StaticInstPtr(TheISA::ExtMachInst mach_inst) + : RefCountingPtr(StaticInst::decode(mach_inst)) + { + } + + /// Convert to pointer to StaticInstBase class. + operator const StaticInstBasePtr() + { + return this->get(); + } +}; + +inline StaticInstPtr +StaticInst::decode(StaticInst::ExtMachInst mach_inst) +{ +#ifdef DECODE_CACHE_HASH_STATS + // Simple stats on decode hash_map. Turns out the default + // hash function is as good as anything I could come up with. + const int dump_every_n = 10000000; + static int decodes_til_dump = dump_every_n; + + if (--decodes_til_dump == 0) { + dumpDecodeCacheStats(); + decodes_til_dump = dump_every_n; + } +#endif + + DecodeCache::iterator iter = decodeCache.find(mach_inst); + if (iter != decodeCache.end()) { + return iter->second; + } + + StaticInstPtr si = TheISA::decodeInst(mach_inst); + decodeCache[mach_inst] = si; + return si; +} + +#endif // __CPU_STATIC_INST_HH__ diff --cc src/kern/system_events.cc index fd5c12e44,000000000..f3b1bf91d mode 100644,000000..100644 --- a/src/kern/system_events.cc +++ b/src/kern/system_events.cc @@@ -1,91 -1,0 +1,94 @@@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cpu/base.hh" +#include "cpu/cpu_exec_context.hh" +#include "kern/kernel_stats.hh" +#include "kern/system_events.hh" +#include "sim/system.hh" + +using namespace TheISA; + +void +SkipFuncEvent::process(ExecContext *xc) +{ + Addr newpc = xc->readIntReg(ReturnAddressReg); + + DPRINTF(PCEvent, "skipping %s: pc=%x, newpc=%x\n", description, + xc->readPC(), newpc); + + xc->setPC(newpc); + xc->setNextPC(xc->readPC() + sizeof(TheISA::MachInst)); +/* + BranchPred *bp = xc->getCpuPtr()->getBranchPred(); + if (bp != NULL) { + bp->popRAS(xc->getThreadNum()); + } +*/ +} + + +FnEvent::FnEvent(PCEventQueue *q, const std::string &desc, Addr addr, + Stats::MainBin *bin) + : PCEvent(q, desc, addr), _name(desc), mybin(bin) +{ +} + +void +FnEvent::process(ExecContext *xc) +{ + if (xc->misspeculating()) + return; + + xc->getSystemPtr()->kernelBinning->call(xc, mybin); +} + +void +IdleStartEvent::process(ExecContext *xc) +{ - xc->getCpuPtr()->kernelStats->setIdleProcess( - xc->readMiscReg(AlphaISA::IPR_PALtemp23), xc); ++ if (xc->getKernelStats()) ++ xc->getKernelStats()->setIdleProcess( ++ xc->readMiscReg(AlphaISA::IPR_PALtemp23), xc); + remove(); +} + +void +InterruptStartEvent::process(ExecContext *xc) +{ - xc->getCpuPtr()->kernelStats->mode(Kernel::interrupt, xc); ++ if (xc->getKernelStats()) ++ xc->getKernelStats()->mode(Kernel::interrupt, xc); +} + +void +InterruptEndEvent::process(ExecContext *xc) +{ + // We go back to kernel, if we are user, inside the rti + // pal code we will get switched to user because of the ICM write - xc->getCpuPtr()->kernelStats->mode(Kernel::kernel, xc); ++ if (xc->getKernelStats()) ++ xc->getKernelStats()->mode(Kernel::kernel, xc); +} diff --cc src/kern/tru64/tru64.hh index 91db5bb84,000000000..fff91f8ca mode 100644,000000..100644 --- a/src/kern/tru64/tru64.hh +++ b/src/kern/tru64/tru64.hh @@@ -1,1240 -1,0 +1,1240 @@@ +/* + * Copyright (c) 2001-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __TRU64_HH__ +#define __TRU64_HH__ +#include "config/full_system.hh" + +#if FULL_SYSTEM + +class Tru64 {}; + +#else //!FULL_SYSTEM + +#include +#include +#if defined(__OpenBSD__) || defined(__APPLE__) || defined(__FreeBSD__) +#include +#include +#else +#include +#endif + +#include +#include +#include +#include // for memset() +#include + +#include "cpu/base.hh" +#include "sim/root.hh" +#include "sim/syscall_emul.hh" + +typedef struct stat global_stat; +typedef struct statfs global_statfs; +typedef struct dirent global_dirent; + +class TranslatingPort; + +/// +/// This class encapsulates the types, structures, constants, +/// functions, and syscall-number mappings specific to the Alpha Tru64 +/// syscall interface. +/// +class Tru64 { + + public: + + //@{ + /// Basic Tru64 types. + typedef uint64_t size_t; + typedef uint64_t off_t; + typedef uint16_t nlink_t; + typedef int32_t dev_t; + typedef uint32_t uid_t; + typedef uint32_t gid_t; + typedef uint32_t time_t; + typedef uint32_t mode_t; + typedef uint32_t ino_t; + typedef struct { int val[2]; } quad; + typedef quad fsid_t; + //@} + + /// Stat buffer. Note that Tru64 v5.0+ use a new "F64" stat + /// structure, and a new set of syscall numbers for stat calls. + /// On some hosts (notably Linux) define st_atime, st_mtime, and + /// st_ctime as macros, so we append an X to get around this. + struct F64_stat { + dev_t st_dev; //!< st_dev + int32_t st_retired1; //!< st_retired1 + mode_t st_mode; //!< st_mode + nlink_t st_nlink; //!< st_nlink + uint16_t st_nlink_reserved; //!< st_nlink_reserved + uid_t st_uid; //!< st_uid + gid_t st_gid; //!< st_gid + dev_t st_rdev; //!< st_rdev + dev_t st_ldev; //!< st_ldev + off_t st_size; //!< st_size + time_t st_retired2; //!< st_retired2 + int32_t st_uatime; //!< st_uatime + time_t st_retired3; //!< st_retired3 + int32_t st_umtime; //!< st_umtime + time_t st_retired4; //!< st_retired4 + int32_t st_uctime; //!< st_uctime + int32_t st_retired5; //!< st_retired5 + int32_t st_retired6; //!< st_retired6 + uint32_t st_flags; //!< st_flags + uint32_t st_gen; //!< st_gen + uint64_t st_spare[4]; //!< st_spare[4] + ino_t st_ino; //!< st_ino + int32_t st_ino_reserved; //!< st_ino_reserved + time_t st_atimeX; //!< st_atime + int32_t st_atime_reserved; //!< st_atime_reserved + time_t st_mtimeX; //!< st_mtime + int32_t st_mtime_reserved; //!< st_mtime_reserved + time_t st_ctimeX; //!< st_ctime + int32_t st_ctime_reserved; //!< st_ctime_reserved + uint64_t st_blksize; //!< st_blksize + uint64_t st_blocks; //!< st_blocks + }; + + + /// Old Tru64 v4.x stat struct. + /// Tru64 maintains backwards compatibility with v4.x by + /// implementing another set of stat functions using the old + /// structure definition and binding them to the old syscall + /// numbers. + + struct pre_F64_stat { + dev_t st_dev; + ino_t st_ino; + mode_t st_mode; + nlink_t st_nlink; + uid_t st_uid __attribute__ ((aligned(sizeof(uid_t)))); + gid_t st_gid; + dev_t st_rdev; + off_t st_size __attribute__ ((aligned(sizeof(off_t)))); + time_t st_atimeX; + int32_t st_uatime; + time_t st_mtimeX; + int32_t st_umtime; + time_t st_ctimeX; + int32_t st_uctime; + uint32_t st_blksize; + int32_t st_blocks; + uint32_t st_flags; + uint32_t st_gen; + }; + + /// For statfs(). + struct F64_statfs { + int16_t f_type; + int16_t f_flags; + int32_t f_retired1; + int32_t f_retired2; + int32_t f_retired3; + int32_t f_retired4; + int32_t f_retired5; + int32_t f_retired6; + int32_t f_retired7; + fsid_t f_fsid; + int32_t f_spare[9]; + char f_retired8[90]; + char f_retired9[90]; + uint64_t dummy[10]; // was union mount_info mount_info; + uint64_t f_flags2; + int64_t f_spare2[14]; + int64_t f_fsize; + int64_t f_bsize; + int64_t f_blocks; + int64_t f_bfree; + int64_t f_bavail; + int64_t f_files; + int64_t f_ffree; + char f_mntonname[1024]; + char f_mntfromname[1024]; + }; + + /// For old Tru64 v4.x statfs() + struct pre_F64_statfs { + int16_t f_type; + int16_t f_flags; + int32_t f_fsize; + int32_t f_bsize; + int32_t f_blocks; + int32_t f_bfree; + int32_t f_bavail; + int32_t f_files; + int32_t f_ffree; + fsid_t f_fsid; + int32_t f_spare[9]; + char f_mntonname[90]; + char f_mntfromname[90]; + uint64_t dummy[10]; // was union mount_info mount_info; + }; + + /// For getdirentries(). + struct dirent + { + ino_t d_ino; //!< file number of entry + uint16_t d_reclen; //!< length of this record + uint16_t d_namlen; //!< length of string in d_name + char d_name[256]; //!< dummy name length + }; + + + /// Length of strings in struct utsname (plus 1 for null char). + static const int _SYS_NMLN = 32; + + /// Interface struct for uname(). + struct utsname { + char sysname[_SYS_NMLN]; //!< System name. + char nodename[_SYS_NMLN]; //!< Node name. + char release[_SYS_NMLN]; //!< OS release. + char version[_SYS_NMLN]; //!< OS version. + char machine[_SYS_NMLN]; //!< Machine type. + }; + + /// Limit struct for getrlimit/setrlimit. + struct rlimit { + uint64_t rlim_cur; //!< soft limit + uint64_t rlim_max; //!< hard limit + }; + + + /// For getsysinfo() GSI_CPU_INFO option. + struct cpu_info { + uint32_t current_cpu; //!< current_cpu + uint32_t cpus_in_box; //!< cpus_in_box + uint32_t cpu_type; //!< cpu_type + uint32_t ncpus; //!< ncpus + uint64_t cpus_present; //!< cpus_present + uint64_t cpus_running; //!< cpus_running + uint64_t cpu_binding; //!< cpu_binding + uint64_t cpu_ex_binding; //!< cpu_ex_binding + uint32_t mhz; //!< mhz + uint32_t unused[3]; //!< future expansion + }; + + /// For gettimeofday. + struct timeval { + uint32_t tv_sec; //!< seconds + uint32_t tv_usec; //!< microseconds + }; + + /// For getrusage(). + struct rusage { + struct timeval ru_utime; //!< user time used + struct timeval ru_stime; //!< system time used + uint64_t ru_maxrss; //!< ru_maxrss + uint64_t ru_ixrss; //!< integral shared memory size + uint64_t ru_idrss; //!< integral unshared data " + uint64_t ru_isrss; //!< integral unshared stack " + uint64_t ru_minflt; //!< page reclaims - total vmfaults + uint64_t ru_majflt; //!< page faults + uint64_t ru_nswap; //!< swaps + uint64_t ru_inblock; //!< block input operations + uint64_t ru_oublock; //!< block output operations + uint64_t ru_msgsnd; //!< messages sent + uint64_t ru_msgrcv; //!< messages received + uint64_t ru_nsignals; //!< signals received + uint64_t ru_nvcsw; //!< voluntary context switches + uint64_t ru_nivcsw; //!< involuntary " + }; + + /// For sigreturn(). + struct sigcontext { + int64_t sc_onstack; //!< sigstack state to restore + int64_t sc_mask; //!< signal mask to restore + int64_t sc_pc; //!< pc at time of signal + int64_t sc_ps; //!< psl to retore + int64_t sc_regs[32]; //!< processor regs 0 to 31 + int64_t sc_ownedfp; //!< fp has been used + int64_t sc_fpregs[32]; //!< fp regs 0 to 31 + uint64_t sc_fpcr; //!< floating point control reg + uint64_t sc_fp_control; //!< software fpcr + int64_t sc_reserved1; //!< reserved for kernel + uint32_t sc_kreserved1; //!< reserved for kernel + uint32_t sc_kreserved2; //!< reserved for kernel + size_t sc_ssize; //!< stack size + caddr_t sc_sbase; //!< stack start + uint64_t sc_traparg_a0; //!< a0 argument to trap on exc + uint64_t sc_traparg_a1; //!< a1 argument to trap on exc + uint64_t sc_traparg_a2; //!< a2 argument to trap on exc + uint64_t sc_fp_trap_pc; //!< imprecise pc + uint64_t sc_fp_trigger_sum; //!< Exception summary at trigg + uint64_t sc_fp_trigger_inst; //!< Instruction at trigger pc + }; + + + + /// For table(). + struct tbl_sysinfo { + uint64_t si_user; //!< User time + uint64_t si_nice; //!< Nice time + uint64_t si_sys; //!< System time + uint64_t si_idle; //!< Idle time + uint64_t si_hz; //!< hz + uint64_t si_phz; //!< phz + uint64_t si_boottime; //!< Boot time in seconds + uint64_t wait; //!< Wait time + uint32_t si_max_procs; //!< rpb->rpb_numprocs + uint32_t pad; //!< padding + }; + + + /// For stack_create. + struct vm_stack { + // was void * + Addr address; //!< address hint + size_t rsize; //!< red zone size + size_t ysize; //!< yellow zone size + size_t gsize; //!< green zone size + size_t swap; //!< amount of swap to reserve + size_t incr; //!< growth increment + uint64_t align; //!< address alignment + uint64_t flags; //!< MAP_FIXED etc. + // was struct memalloc_attr * + Addr attr; //!< allocation policy + uint64_t reserved; //!< reserved + }; + + /// Return values for nxm calls. + enum { + KERN_NOT_RECEIVER = 7, + KERN_NOT_IN_SET = 12 + }; + + /// For nxm_task_init. + static const int NXM_TASK_INIT_VP = 2; //!< initial thread is VP + + /// Task attribute structure. + struct nxm_task_attr { + int64_t nxm_callback; //!< nxm_callback + unsigned int nxm_version; //!< nxm_version + unsigned short nxm_uniq_offset; //!< nxm_uniq_offset + unsigned short flags; //!< flags + int nxm_quantum; //!< nxm_quantum + int pad1; //!< pad1 + int64_t pad2; //!< pad2 + }; + + /// Signal set. + typedef uint64_t sigset_t; + + /// Thread state shared between user & kernel. + struct ushared_state { + sigset_t sigmask; //!< thread signal mask + sigset_t sig; //!< thread pending mask + // struct nxm_pth_state * + Addr pth_id; //!< out-of-line state + int flags; //!< shared flags +#define US_SIGSTACK 0x1 // thread called sigaltstack +#define US_ONSTACK 0x2 // thread is running on altstack +#define US_PROFILE 0x4 // thread called profil +#define US_SYSCALL 0x8 // thread in syscall +#define US_TRAP 0x10 // thread has trapped +#define US_YELLOW 0x20 // thread has mellowed yellow +#define US_YZONE 0x40 // thread has zoned out +#define US_FP_OWNED 0x80 // thread used floating point + + int cancel_state; //!< thread's cancelation state +#define US_CANCEL 0x1 // cancel pending +#define US_NOCANCEL 0X2 // synch cancel disabled +#define US_SYS_NOCANCEL 0x4 // syscall cancel disabled +#define US_ASYNC_NOCANCEL 0x8 // asynch cancel disabled +#define US_CANCEL_BITS (US_NOCANCEL|US_SYS_NOCANCEL|US_ASYNC_NOCANCEL) +#define US_CANCEL_MASK (US_CANCEL|US_NOCANCEL|US_SYS_NOCANCEL| \ + US_ASYNC_NOCANCEL) + + // These are semi-shared. They are always visible to + // the kernel but are never context-switched by the library. + + int nxm_ssig; //!< scheduler's synchronous signals + int reserved1; //!< reserved1 + int64_t nxm_active; //!< scheduler active + int64_t reserved2; //!< reserved2 + }; + + struct nxm_sched_state { + struct ushared_state nxm_u; //!< state own by user thread + unsigned int nxm_bits; //!< scheduler state / slot + int nxm_quantum; //!< quantum count-down value + int nxm_set_quantum; //!< quantum reset value + int nxm_sysevent; //!< syscall state + // struct nxm_upcall * + Addr nxm_uc_ret; //!< stack ptr of null thread + // void * + Addr nxm_tid; //!< scheduler's thread id + int64_t nxm_va; //!< page fault address + // struct nxm_pth_state * + Addr nxm_pthid; //!< id of null thread + uint64_t nxm_bound_pcs_count; //!< bound PCS thread count + int64_t pad[2]; //!< pad + }; + + /// nxm_shared. + struct nxm_shared { + int64_t nxm_callback; //!< address of upcall routine + unsigned int nxm_version; //!< version number + unsigned short nxm_uniq_offset; //!< correction factor for TEB + unsigned short pad1; //!< pad1 + int64_t space[2]; //!< future growth + struct nxm_sched_state nxm_ss[1]; //!< array of shared areas + }; + + /// nxm_slot_state_t. + enum nxm_slot_state_t { + NXM_SLOT_AVAIL, + NXM_SLOT_BOUND, + NXM_SLOT_UNBOUND, + NXM_SLOT_EMPTY + }; + + /// nxm_config_info + struct nxm_config_info { + int nxm_nslots_per_rad; //!< max number of VP slots per RAD + int nxm_nrads; //!< max number of RADs + // nxm_slot_state_t * + Addr nxm_slot_state; //!< per-VP slot state + // struct nxm_shared * + Addr nxm_rad[1]; //!< per-RAD shared areas + }; + + /// For nxm_thread_create. + enum nxm_thread_type { + NXM_TYPE_SCS = 0, + NXM_TYPE_VP = 1, + NXM_TYPE_MANAGER = 2 + }; + + /// Thread attributes. + struct nxm_thread_attr { + int version; //!< version + int type; //!< type + int cancel_flags; //!< cancel_flags + int priority; //!< priority + int policy; //!< policy + int signal_type; //!< signal_type + // void * + Addr pthid; //!< pthid + sigset_t sigmask; //!< sigmask + /// Initial register values. + struct { + uint64_t pc; //!< pc + uint64_t sp; //!< sp + uint64_t a0; //!< a0 + } registers; + uint64_t pad2[2]; //!< pad2 + }; + + /// Helper function to convert a host stat buffer to a target stat + /// buffer. Also copies the target buffer out to the simulated + /// memory space. Used by stat(), fstat(), and lstat(). + template + static void + copyOutStatBuf(TranslatingPort *mem, Addr addr, global_stat *host) + { + using namespace TheISA; + + TypedBufferArg tgt(addr); + + tgt->st_dev = htog(host->st_dev); + tgt->st_ino = htog(host->st_ino); + tgt->st_mode = htog(host->st_mode); + tgt->st_nlink = htog(host->st_nlink); + tgt->st_uid = htog(host->st_uid); + tgt->st_gid = htog(host->st_gid); + tgt->st_rdev = htog(host->st_rdev); + tgt->st_size = htog(host->st_size); + tgt->st_atimeX = htog(host->st_atime); + tgt->st_mtimeX = htog(host->st_mtime); + tgt->st_ctimeX = htog(host->st_ctime); + tgt->st_blksize = htog(host->st_blksize); + tgt->st_blocks = htog(host->st_blocks); + + tgt.copyOut(mem); + } + + /// Helper function to convert a host statfs buffer to a target statfs + /// buffer. Also copies the target buffer out to the simulated + /// memory space. Used by statfs() and fstatfs(). + template + static void + copyOutStatfsBuf(TranslatingPort *mem, Addr addr, global_statfs *host) + { + using namespace TheISA; + + TypedBufferArg tgt(addr); + +#if defined(__OpenBSD__) || defined(__APPLE__) || defined(__FreeBSD__) + tgt->f_type = 0; +#else + tgt->f_type = htog(host->f_type); +#endif + tgt->f_bsize = htog(host->f_bsize); + tgt->f_blocks = htog(host->f_blocks); + tgt->f_bfree = htog(host->f_bfree); + tgt->f_bavail = htog(host->f_bavail); + tgt->f_files = htog(host->f_files); + tgt->f_ffree = htog(host->f_ffree); + + // Is this as string normally? + memcpy(&tgt->f_fsid, &host->f_fsid, sizeof(host->f_fsid)); + + tgt.copyOut(mem); + } + + class F64 { + public: + static void copyOutStatBuf(TranslatingPort *mem, Addr addr, + global_stat *host) + { + Tru64::copyOutStatBuf(mem, addr, host); + } + + static void copyOutStatfsBuf(TranslatingPort *mem, Addr addr, + global_statfs *host) + { + Tru64::copyOutStatfsBuf(mem, addr, host); + } + }; + + class PreF64 { + public: + static void copyOutStatBuf(TranslatingPort *mem, Addr addr, + global_stat *host) + { + Tru64::copyOutStatBuf(mem, addr, host); + } + + static void copyOutStatfsBuf(TranslatingPort *mem, Addr addr, + global_statfs *host) + { + Tru64::copyOutStatfsBuf(mem, addr, host); + } + }; + + /// Helper function to convert a host stat buffer to an old pre-F64 + /// (4.x) target stat buffer. Also copies the target buffer out to + /// the simulated memory space. Used by pre_F64_stat(), + /// pre_F64_fstat(), and pre_F64_lstat(). + static void + copyOutPreF64StatBuf(TranslatingPort *mem, Addr addr, struct stat *host) + { + using namespace TheISA; + + TypedBufferArg tgt(addr); + + tgt->st_dev = htog(host->st_dev); + tgt->st_ino = htog(host->st_ino); + tgt->st_mode = htog(host->st_mode); + tgt->st_nlink = htog(host->st_nlink); + tgt->st_uid = htog(host->st_uid); + tgt->st_gid = htog(host->st_gid); + tgt->st_rdev = htog(host->st_rdev); + tgt->st_size = htog(host->st_size); + tgt->st_atimeX = htog(host->st_atime); + tgt->st_mtimeX = htog(host->st_mtime); + tgt->st_ctimeX = htog(host->st_ctime); + tgt->st_blksize = htog(host->st_blksize); + tgt->st_blocks = htog(host->st_blocks); + + tgt.copyOut(mem); + } + + + /// The target system's hostname. + static const char *hostname; + + + /// Target getdirentries() handler. + static SyscallReturn + getdirentriesFunc(SyscallDesc *desc, int callnum, Process *process, + ExecContext *xc) + { + using namespace TheISA; + +#ifdef __CYGWIN__ + panic("getdirent not implemented on cygwin!"); +#else + int fd = process->sim_fd(xc->getSyscallArg(0)); + Addr tgt_buf = xc->getSyscallArg(1); + int tgt_nbytes = xc->getSyscallArg(2); + Addr tgt_basep = xc->getSyscallArg(3); + + char * const host_buf = new char[tgt_nbytes]; + + // just pass basep through uninterpreted. + TypedBufferArg basep(tgt_basep); + basep.copyIn(xc->getMemPort()); + long host_basep = (off_t)htog((int64_t)*basep); + int host_result = getdirentries(fd, host_buf, tgt_nbytes, &host_basep); + + // check for error + if (host_result < 0) { + delete [] host_buf; + return -errno; + } + + // no error: copy results back to target space + Addr tgt_buf_ptr = tgt_buf; + char *host_buf_ptr = host_buf; + char *host_buf_end = host_buf + host_result; + while (host_buf_ptr < host_buf_end) { + global_dirent *host_dp = (global_dirent *)host_buf_ptr; + int namelen = strlen(host_dp->d_name); + + // Actual size includes padded string rounded up for alignment. + // Subtract 256 for dummy char array in Tru64::dirent definition. + // Add 1 to namelen for terminating null char. + int tgt_bufsize = sizeof(Tru64::dirent) - 256 + roundUp(namelen+1, 8); + TypedBufferArg tgt_dp(tgt_buf_ptr, tgt_bufsize); + tgt_dp->d_ino = host_dp->d_ino; + tgt_dp->d_reclen = tgt_bufsize; + tgt_dp->d_namlen = namelen; + strcpy(tgt_dp->d_name, host_dp->d_name); + tgt_dp.copyOut(xc->getMemPort()); + + tgt_buf_ptr += tgt_bufsize; + host_buf_ptr += host_dp->d_reclen; + } + + delete [] host_buf; + + *basep = htog((int64_t)host_basep); + basep.copyOut(xc->getMemPort()); + + return tgt_buf_ptr - tgt_buf; +#endif + } + + /// Target sigreturn() handler. + static SyscallReturn + sigreturnFunc(SyscallDesc *desc, int callnum, Process *process, + ExecContext *xc) + { + using namespace TheISA; + + using TheISA::RegFile; + TypedBufferArg sc(xc->getSyscallArg(0)); + + sc.copyIn(xc->getMemPort()); + + // Restore state from sigcontext structure. + // Note that we'll advance PC <- NPC before the end of the cycle, + // so we need to restore the desired PC into NPC. + // The current regs->pc will get clobbered. + xc->setNextPC(htog(sc->sc_pc)); + + for (int i = 0; i < 31; ++i) { + xc->setIntReg(i, htog(sc->sc_regs[i])); + xc->setFloatRegBits(i, htog(sc->sc_fpregs[i])); + } + + xc->setMiscReg(TheISA::Fpcr_DepTag, htog(sc->sc_fpcr)); + + return 0; + } + + + // + // Mach syscalls -- identified by negated syscall numbers + // + + /// Create a stack region for a thread. + static SyscallReturn + stack_createFunc(SyscallDesc *desc, int callnum, Process *process, + ExecContext *xc) + { + using namespace TheISA; + + TypedBufferArg argp(xc->getSyscallArg(0)); + + argp.copyIn(xc->getMemPort()); + + // if the user chose an address, just let them have it. Otherwise + // pick one for them. + if (htog(argp->address) == 0) { + argp->address = htog(process->next_thread_stack_base); + int stack_size = (htog(argp->rsize) + htog(argp->ysize) + + htog(argp->gsize)); + process->next_thread_stack_base -= stack_size; + argp.copyOut(xc->getMemPort()); + } + + return 0; + } + + /// NXM library version stamp. + static + const int NXM_LIB_VERSION = 301003; + + /// This call sets up the interface between the user and kernel + /// schedulers by creating a shared-memory region. The shared memory + /// region has several structs, some global, some per-RAD, some per-VP. + static SyscallReturn + nxm_task_initFunc(SyscallDesc *desc, int callnum, Process *process, + ExecContext *xc) + { + using namespace std; + using namespace TheISA; + + TypedBufferArg attrp(xc->getSyscallArg(0)); + TypedBufferArg configptr_ptr(xc->getSyscallArg(1)); + + attrp.copyIn(xc->getMemPort()); + + if (gtoh(attrp->nxm_version) != NXM_LIB_VERSION) { + cerr << "nxm_task_init: thread library version mismatch! " + << "got " << attrp->nxm_version + << ", expected " << NXM_LIB_VERSION << endl; + abort(); + } + + if (gtoh(attrp->flags) != Tru64::NXM_TASK_INIT_VP) { + cerr << "nxm_task_init: bad flag value " << attrp->flags + << " (expected " << Tru64::NXM_TASK_INIT_VP << ")" << endl; + abort(); + } + + const Addr base_addr = 0x12000; // was 0x3f0000000LL; + Addr cur_addr = base_addr; // next addresses to use + // first comes the config_info struct + Addr config_addr = cur_addr; + cur_addr += sizeof(Tru64::nxm_config_info); + // next comes the per-cpu state vector + Addr slot_state_addr = cur_addr; + int slot_state_size = + process->numCpus() * sizeof(Tru64::nxm_slot_state_t); + cur_addr += slot_state_size; + // now the per-RAD state struct (we only support one RAD) + cur_addr = 0x14000; // bump up addr for alignment + Addr rad_state_addr = cur_addr; + int rad_state_size = + (sizeof(Tru64::nxm_shared) + + (process->numCpus()-1) * sizeof(Tru64::nxm_sched_state)); + cur_addr += rad_state_size; + + // now initialize a config_info struct and copy it out to user space + TypedBufferArg config(config_addr); + + config->nxm_nslots_per_rad = htog(process->numCpus()); + config->nxm_nrads = htog(1); // only one RAD in our system! + config->nxm_slot_state = htog(slot_state_addr); + config->nxm_rad[0] = htog(rad_state_addr); + + config.copyOut(xc->getMemPort()); + + // initialize the slot_state array and copy it out + TypedBufferArg slot_state(slot_state_addr, + slot_state_size); + for (int i = 0; i < process->numCpus(); ++i) { + // CPU 0 is bound to the calling process; all others are available + // XXX this code should have an endian conversion, but I don't think + // it works anyway + slot_state[i] = + (i == 0) ? Tru64::NXM_SLOT_BOUND : Tru64::NXM_SLOT_AVAIL; + } + + slot_state.copyOut(xc->getMemPort()); + + // same for the per-RAD "shared" struct. Note that we need to + // allocate extra bytes for the per-VP array which is embedded at + // the end. + TypedBufferArg rad_state(rad_state_addr, + rad_state_size); + + rad_state->nxm_callback = attrp->nxm_callback; + rad_state->nxm_version = attrp->nxm_version; + rad_state->nxm_uniq_offset = attrp->nxm_uniq_offset; + for (int i = 0; i < process->numCpus(); ++i) { + Tru64::nxm_sched_state *ssp = &rad_state->nxm_ss[i]; + ssp->nxm_u.sigmask = htog(0); + ssp->nxm_u.sig = htog(0); + ssp->nxm_u.flags = htog(0); + ssp->nxm_u.cancel_state = htog(0); + ssp->nxm_u.nxm_ssig = 0; + ssp->nxm_bits = htog(0); + ssp->nxm_quantum = attrp->nxm_quantum; + ssp->nxm_set_quantum = attrp->nxm_quantum; + ssp->nxm_sysevent = htog(0); + + if (i == 0) { + uint64_t uniq = xc->readMiscReg(TheISA::Uniq_DepTag); + ssp->nxm_u.pth_id = htog(uniq + gtoh(attrp->nxm_uniq_offset)); + ssp->nxm_u.nxm_active = htog(uniq | 1); + } + else { + ssp->nxm_u.pth_id = htog(0); + ssp->nxm_u.nxm_active = htog(0); + } + } + + rad_state.copyOut(xc->getMemPort()); + + // + // copy pointer to shared config area out to user + // + *configptr_ptr = htog(config_addr); + configptr_ptr.copyOut(xc->getMemPort()); + + // Register this as a valid address range with the process + process->nxm_start = base_addr; + process->nxm_end = cur_addr; + + return 0; + } + + /// Initialize execution context. + static void + init_exec_context(ExecContext *ec, + Tru64::nxm_thread_attr *attrp, uint64_t uniq_val) + { + using namespace TheISA; + + ec->clearArchRegs(); + + ec->setIntReg(TheISA::ArgumentReg0, gtoh(attrp->registers.a0)); + ec->setIntReg(27/*t12*/, gtoh(attrp->registers.pc)); + ec->setIntReg(TheISA::StackPointerReg, gtoh(attrp->registers.sp)); + ec->setMiscReg(TheISA::Uniq_DepTag, uniq_val); + + ec->setPC(gtoh(attrp->registers.pc)); + ec->setNextPC(gtoh(attrp->registers.pc) + sizeof(TheISA::MachInst)); + + ec->activate(); + } + + /// Create thread. + static SyscallReturn + nxm_thread_createFunc(SyscallDesc *desc, int callnum, Process *process, + ExecContext *xc) + { + using namespace std; + using namespace TheISA; + + TypedBufferArg attrp(xc->getSyscallArg(0)); + TypedBufferArg kidp(xc->getSyscallArg(1)); + int thread_index = xc->getSyscallArg(2); + + // get attribute args + attrp.copyIn(xc->getMemPort()); + + if (gtoh(attrp->version) != NXM_LIB_VERSION) { + cerr << "nxm_thread_create: thread library version mismatch! " + << "got " << attrp->version + << ", expected " << NXM_LIB_VERSION << endl; + abort(); + } + + if (thread_index < 0 | thread_index > process->numCpus()) { + cerr << "nxm_thread_create: bad thread index " << thread_index + << endl; + abort(); + } + + // On a real machine, the per-RAD shared structure is in + // shared memory, so both the user and kernel can get at it. + // We don't have that luxury, so we just copy it in and then + // back out again. + int rad_state_size = + (sizeof(Tru64::nxm_shared) + + (process->numCpus()-1) * sizeof(Tru64::nxm_sched_state)); + + TypedBufferArg rad_state(0x14000, + rad_state_size); + rad_state.copyIn(xc->getMemPort()); + + uint64_t uniq_val = gtoh(attrp->pthid) - gtoh(rad_state->nxm_uniq_offset); + + if (gtoh(attrp->type) == Tru64::NXM_TYPE_MANAGER) { + // DEC pthreads seems to always create one of these (in + // addition to N application threads), but we don't use it, + // so don't bother creating it. + + // This is supposed to be a port number. Make something up. + *kidp = htog(99); + kidp.copyOut(xc->getMemPort()); + + return 0; + } else if (gtoh(attrp->type) == Tru64::NXM_TYPE_VP) { + // A real "virtual processor" kernel thread. Need to fork + // this thread on another CPU. + Tru64::nxm_sched_state *ssp = &rad_state->nxm_ss[thread_index]; + + if (gtoh(ssp->nxm_u.nxm_active) != 0) + return (int) Tru64::KERN_NOT_RECEIVER; + + ssp->nxm_u.pth_id = attrp->pthid; + ssp->nxm_u.nxm_active = htog(uniq_val | 1); + + rad_state.copyOut(xc->getMemPort()); + + Addr slot_state_addr = 0x12000 + sizeof(Tru64::nxm_config_info); + int slot_state_size = + process->numCpus() * sizeof(Tru64::nxm_slot_state_t); + + TypedBufferArg + slot_state(slot_state_addr, + slot_state_size); + + slot_state.copyIn(xc->getMemPort()); + + if (slot_state[thread_index] != Tru64::NXM_SLOT_AVAIL) { + cerr << "nxm_thread_createFunc: requested VP slot " + << thread_index << " not available!" << endl; + fatal(""); + } + + // XXX This should have an endian conversion but I think this code + // doesn't work anyway + slot_state[thread_index] = Tru64::NXM_SLOT_BOUND; + + slot_state.copyOut(xc->getMemPort()); + + // Find a free simulator execution context. + for (int i = 0; i < process->numCpus(); ++i) { + ExecContext *xc = process->execContexts[i]; + - if (xc->status() == ExecContext::Unallocated) { ++ if (xc->status() == ExecContext::Suspended) { + // inactive context... grab it + init_exec_context(xc, attrp, uniq_val); + + // This is supposed to be a port number, but we'll try + // and get away with just sticking the thread index + // here. + *kidp = htog(thread_index); + kidp.copyOut(xc->getMemPort()); + + return 0; + } + } + + // fell out of loop... no available inactive context + cerr << "nxm_thread_create: no idle contexts available." << endl; + abort(); + } else { + cerr << "nxm_thread_create: can't handle thread type " + << attrp->type << endl; + abort(); + } + + return 0; + } + + /// Thread idle call (like yield()). + static SyscallReturn + nxm_idleFunc(SyscallDesc *desc, int callnum, Process *process, + ExecContext *xc) + { + return 0; + } + + /// Block thread. + static SyscallReturn + nxm_thread_blockFunc(SyscallDesc *desc, int callnum, Process *process, + ExecContext *xc) + { + using namespace std; + + uint64_t tid = xc->getSyscallArg(0); + uint64_t secs = xc->getSyscallArg(1); + uint64_t flags = xc->getSyscallArg(2); + uint64_t action = xc->getSyscallArg(3); + uint64_t usecs = xc->getSyscallArg(4); + + cout << xc->getCpuPtr()->name() << ": nxm_thread_block " << tid << " " + << secs << " " << flags << " " << action << " " << usecs << endl; + + return 0; + } + + /// block. + static SyscallReturn + nxm_blockFunc(SyscallDesc *desc, int callnum, Process *process, + ExecContext *xc) + { + using namespace std; + + Addr uaddr = xc->getSyscallArg(0); + uint64_t val = xc->getSyscallArg(1); + uint64_t secs = xc->getSyscallArg(2); + uint64_t usecs = xc->getSyscallArg(3); + uint64_t flags = xc->getSyscallArg(4); + + BaseCPU *cpu = xc->getCpuPtr(); + + cout << cpu->name() << ": nxm_block " + << hex << uaddr << dec << " " << val + << " " << secs << " " << usecs + << " " << flags << endl; + + return 0; + } + + /// Unblock thread. + static SyscallReturn + nxm_unblockFunc(SyscallDesc *desc, int callnum, Process *process, + ExecContext *xc) + { + using namespace std; + + Addr uaddr = xc->getSyscallArg(0); + + cout << xc->getCpuPtr()->name() << ": nxm_unblock " + << hex << uaddr << dec << endl; + + return 0; + } + + /// Switch thread priority. + static SyscallReturn + swtch_priFunc(SyscallDesc *desc, int callnum, Process *process, + ExecContext *xc) + { + // Attempts to switch to another runnable thread (if there is + // one). Returns false if there are no other threads to run + // (i.e., the thread can reasonably spin-wait) or true if there + // are other threads. + // + // Since we assume at most one "kernel" thread per CPU, it's + // always safe to return false here. + return 0; //false; + } + + + /// Activate exec context waiting on a channel. Just activate one + /// by default. + static int + activate_waiting_context(Addr uaddr, Process *process, + bool activate_all = false) + { + using namespace std; + + int num_activated = 0; + + list::iterator i = process->waitList.begin(); + list::iterator end = process->waitList.end(); + + while (i != end && (num_activated == 0 || activate_all)) { + if (i->waitChan == uaddr) { + // found waiting process: make it active + ExecContext *newCtx = i->waitingContext; + assert(newCtx->status() == ExecContext::Suspended); + newCtx->activate(); + + // get rid of this record + i = process->waitList.erase(i); + + ++num_activated; + } else { + ++i; + } + } + + return num_activated; + } + + /// M5 hacked-up lock acquire. + static void + m5_lock_mutex(Addr uaddr, Process *process, ExecContext *xc) + { + using namespace TheISA; + + TypedBufferArg lockp(uaddr); + + lockp.copyIn(xc->getMemPort()); + + if (gtoh(*lockp) == 0) { + // lock is free: grab it + *lockp = htog(1); + lockp.copyOut(xc->getMemPort()); + } else { + // lock is busy: disable until free + process->waitList.push_back(Process::WaitRec(uaddr, xc)); + xc->suspend(); + } + } + + /// M5 unlock call. + static void + m5_unlock_mutex(Addr uaddr, Process *process, ExecContext *xc) + { + TypedBufferArg lockp(uaddr); + + lockp.copyIn(xc->getMemPort()); + assert(*lockp != 0); + + // Check for a process waiting on the lock. + int num_waiting = activate_waiting_context(uaddr, process); + + // clear lock field if no waiting context is taking over the lock + if (num_waiting == 0) { + *lockp = 0; + lockp.copyOut(xc->getMemPort()); + } + } + + /// Lock acquire syscall handler. + static SyscallReturn + m5_mutex_lockFunc(SyscallDesc *desc, int callnum, Process *process, + ExecContext *xc) + { + Addr uaddr = xc->getSyscallArg(0); + + m5_lock_mutex(uaddr, process, xc); + + // Return 0 since we will always return to the user with the lock + // acquired. We will just keep the context inactive until that is + // true. + return 0; + } + + /// Try lock (non-blocking). + static SyscallReturn + m5_mutex_trylockFunc(SyscallDesc *desc, int callnum, Process *process, + ExecContext *xc) + { + using namespace TheISA; + + Addr uaddr = xc->getSyscallArg(0); + TypedBufferArg lockp(uaddr); + + lockp.copyIn(xc->getMemPort()); + + if (gtoh(*lockp) == 0) { + // lock is free: grab it + *lockp = htog(1); + lockp.copyOut(xc->getMemPort()); + return 0; + } else { + return 1; + } + } + + /// Unlock syscall handler. + static SyscallReturn + m5_mutex_unlockFunc(SyscallDesc *desc, int callnum, Process *process, + ExecContext *xc) + { + Addr uaddr = xc->getSyscallArg(0); + + m5_unlock_mutex(uaddr, process, xc); + + return 0; + } + + /// Signal ocndition. + static SyscallReturn + m5_cond_signalFunc(SyscallDesc *desc, int callnum, Process *process, + ExecContext *xc) + { + Addr cond_addr = xc->getSyscallArg(0); + + // Wake up one process waiting on the condition variable. + activate_waiting_context(cond_addr, process); + + return 0; + } + + /// Wake up all processes waiting on the condition variable. + static SyscallReturn + m5_cond_broadcastFunc(SyscallDesc *desc, int callnum, Process *process, + ExecContext *xc) + { + Addr cond_addr = xc->getSyscallArg(0); + + activate_waiting_context(cond_addr, process, true); + + return 0; + } + + /// Wait on a condition. + static SyscallReturn + m5_cond_waitFunc(SyscallDesc *desc, int callnum, Process *process, + ExecContext *xc) + { + using namespace TheISA; + + Addr cond_addr = xc->getSyscallArg(0); + Addr lock_addr = xc->getSyscallArg(1); + TypedBufferArg condp(cond_addr); + TypedBufferArg lockp(lock_addr); + + // user is supposed to acquire lock before entering + lockp.copyIn(xc->getMemPort()); + assert(gtoh(*lockp) != 0); + + m5_unlock_mutex(lock_addr, process, xc); + + process->waitList.push_back(Process::WaitRec(cond_addr, xc)); + xc->suspend(); + + return 0; + } + + /// Thread exit. + static SyscallReturn + m5_thread_exitFunc(SyscallDesc *desc, int callnum, Process *process, + ExecContext *xc) + { + assert(xc->status() == ExecContext::Active); + xc->deallocate(); + + return 0; + } + + /// Indirect syscall invocation (call #0). + static SyscallReturn + indirectSyscallFunc(SyscallDesc *desc, int callnum, Process *process, + ExecContext *xc) + { + int new_callnum = xc->getSyscallArg(0); + LiveProcess *lp = dynamic_cast(process); + assert(lp); + + for (int i = 0; i < 5; ++i) + xc->setSyscallArg(i, xc->getSyscallArg(i+1)); + + + SyscallDesc *new_desc = lp->getDesc(new_callnum); + if (desc == NULL) + fatal("Syscall %d out of range", callnum); + + new_desc->doSyscall(new_callnum, process, xc); + + return 0; + } + +}; // class Tru64 + + +#endif // FULL_SYSTEM + +#endif // __TRU64_HH__ diff --cc src/python/m5/objects/AlphaFullCPU.py index 48989d057,000000000..d719bf783 mode 100644,000000..100644 --- a/src/python/m5/objects/AlphaFullCPU.py +++ b/src/python/m5/objects/AlphaFullCPU.py @@@ -1,80 -1,0 +1,99 @@@ +from m5 import * +from BaseCPU import BaseCPU + +class DerivAlphaFullCPU(BaseCPU): + type = 'DerivAlphaFullCPU' - ++ activity = Param.Unsigned("Initial count") + numThreads = Param.Unsigned("number of HW thread contexts") + + if not build_env['FULL_SYSTEM']: + mem = Param.FunctionalMemory(NULL, "memory") + ++ checker = Param.BaseCPU(NULL, "checker") ++ ++ cachePorts = Param.Unsigned("Cache Ports") ++ + decodeToFetchDelay = Param.Unsigned("Decode to fetch delay") + renameToFetchDelay = Param.Unsigned("Rename to fetch delay") + iewToFetchDelay = Param.Unsigned("Issue/Execute/Writeback to fetch " + "delay") + commitToFetchDelay = Param.Unsigned("Commit to fetch delay") + fetchWidth = Param.Unsigned("Fetch width") + + renameToDecodeDelay = Param.Unsigned("Rename to decode delay") + iewToDecodeDelay = Param.Unsigned("Issue/Execute/Writeback to decode " + "delay") + commitToDecodeDelay = Param.Unsigned("Commit to decode delay") + fetchToDecodeDelay = Param.Unsigned("Fetch to decode delay") + decodeWidth = Param.Unsigned("Decode width") + + iewToRenameDelay = Param.Unsigned("Issue/Execute/Writeback to rename " + "delay") + commitToRenameDelay = Param.Unsigned("Commit to rename delay") + decodeToRenameDelay = Param.Unsigned("Decode to rename delay") + renameWidth = Param.Unsigned("Rename width") + + commitToIEWDelay = Param.Unsigned("Commit to " + "Issue/Execute/Writeback delay") + renameToIEWDelay = Param.Unsigned("Rename to " + "Issue/Execute/Writeback delay") + issueToExecuteDelay = Param.Unsigned("Issue to execute delay (internal " + "to the IEW stage)") + issueWidth = Param.Unsigned("Issue width") + executeWidth = Param.Unsigned("Execute width") + executeIntWidth = Param.Unsigned("Integer execute width") + executeFloatWidth = Param.Unsigned("Floating point execute width") + executeBranchWidth = Param.Unsigned("Branch execute width") + executeMemoryWidth = Param.Unsigned("Memory execute width") ++ fuPool = Param.FUPool(NULL, "Functional Unit pool") + + iewToCommitDelay = Param.Unsigned("Issue/Execute/Writeback to commit " + "delay") + renameToROBDelay = Param.Unsigned("Rename to reorder buffer delay") + commitWidth = Param.Unsigned("Commit width") + squashWidth = Param.Unsigned("Squash width") ++ trapLatency = Param.Tick("Trap latency") ++ fetchTrapLatency = Param.Tick("Fetch trap latency") + - local_predictor_size = Param.Unsigned("Size of local predictor") - local_ctr_bits = Param.Unsigned("Bits per counter") - local_history_table_size = Param.Unsigned("Size of local history table") - local_history_bits = Param.Unsigned("Bits for the local history") - global_predictor_size = Param.Unsigned("Size of global predictor") - global_ctr_bits = Param.Unsigned("Bits per counter") - global_history_bits = Param.Unsigned("Bits of history") - choice_predictor_size = Param.Unsigned("Size of choice predictor") - choice_ctr_bits = Param.Unsigned("Bits of choice counters") ++ localPredictorSize = Param.Unsigned("Size of local predictor") ++ localCtrBits = Param.Unsigned("Bits per counter") ++ localHistoryTableSize = Param.Unsigned("Size of local history table") ++ localHistoryBits = Param.Unsigned("Bits for the local history") ++ globalPredictorSize = Param.Unsigned("Size of global predictor") ++ globalCtrBits = Param.Unsigned("Bits per counter") ++ globalHistoryBits = Param.Unsigned("Bits of history") ++ choicePredictorSize = Param.Unsigned("Size of choice predictor") ++ choiceCtrBits = Param.Unsigned("Bits of choice counters") + + BTBEntries = Param.Unsigned("Number of BTB entries") + BTBTagSize = Param.Unsigned("Size of the BTB tags, in bits") + + RASSize = Param.Unsigned("RAS size") + + LQEntries = Param.Unsigned("Number of load queue entries") + SQEntries = Param.Unsigned("Number of store queue entries") + LFSTSize = Param.Unsigned("Last fetched store table size") + SSITSize = Param.Unsigned("Store set ID table size") + ++ numRobs = Param.Unsigned("Number of Reorder Buffers"); ++ + numPhysIntRegs = Param.Unsigned("Number of physical integer registers") + numPhysFloatRegs = Param.Unsigned("Number of physical floating point " + "registers") + numIQEntries = Param.Unsigned("Number of instruction queue entries") + numROBEntries = Param.Unsigned("Number of reorder buffer entries") + + instShiftAmt = Param.Unsigned("Number of bits to shift instructions by") + + function_trace = Param.Bool(False, "Enable function trace") + function_trace_start = Param.Tick(0, "Cycle to start function trace") ++ ++ smtNumFetchingThreads = Param.Unsigned("SMT Number of Fetching Threads") ++ smtFetchPolicy = Param.String("SMT Fetch policy") ++ smtLSQPolicy = Param.String("SMT LSQ Sharing Policy") ++ smtLSQThreshold = Param.String("SMT LSQ Threshold Sharing Parameter") ++ smtIQPolicy = Param.String("SMT IQ Sharing Policy") ++ smtIQThreshold = Param.String("SMT IQ Threshold Sharing Parameter") ++ smtROBPolicy = Param.String("SMT ROB Sharing Policy") ++ smtROBThreshold = Param.String("SMT ROB Threshold Sharing Parameter") ++ smtCommitPolicy = Param.String("SMT Commit Policy") diff --cc src/sim/pseudo_inst.cc index 7897b5c8b,000000000..12c076c08 mode 100644,000000..100644 --- a/src/sim/pseudo_inst.cc +++ b/src/sim/pseudo_inst.cc @@@ -1,284 -1,0 +1,290 @@@ +/* + * Copyright (c) 2003-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include + +#include + +#include "sim/pseudo_inst.hh" +#include "arch/vtophys.hh" +#include "cpu/base.hh" +#include "cpu/sampler/sampler.hh" +#include "cpu/exec_context.hh" ++#include "cpu/quiesce_event.hh" +#include "kern/kernel_stats.hh" +#include "sim/param.hh" +#include "sim/serialize.hh" +#include "sim/sim_exit.hh" +#include "sim/stat_control.hh" +#include "sim/stats.hh" +#include "sim/system.hh" +#include "sim/debug.hh" +#include "sim/vptr.hh" + +using namespace std; + +extern Sampler *SampCPU; + +using namespace Stats; +using namespace TheISA; + +namespace AlphaPseudo +{ + bool doStatisticsInsts; + bool doCheckpointInsts; + bool doQuiesce; + + void + arm(ExecContext *xc) + { - xc->getCpuPtr()->kernelStats->arm(); ++ if (xc->getKernelStats()) ++ xc->getKernelStats()->arm(); + } + + void + quiesce(ExecContext *xc) + { + if (!doQuiesce) + return; + + xc->suspend(); - xc->getCpuPtr()->kernelStats->quiesce(); ++ if (xc->getKernelStats()) ++ xc->getKernelStats()->quiesce(); + } + + void + quiesceNs(ExecContext *xc, uint64_t ns) + { + if (!doQuiesce || ns == 0) + return; + - Event *quiesceEvent = xc->getQuiesceEvent(); ++ EndQuiesceEvent *quiesceEvent = xc->getQuiesceEvent(); + + if (quiesceEvent->scheduled()) + quiesceEvent->reschedule(curTick + Clock::Int::ns * ns); + else + quiesceEvent->schedule(curTick + Clock::Int::ns * ns); + + xc->suspend(); - xc->getCpuPtr()->kernelStats->quiesce(); ++ if (xc->getKernelStats()) ++ xc->getKernelStats()->quiesce(); + } + + void + quiesceCycles(ExecContext *xc, uint64_t cycles) + { + if (!doQuiesce || cycles == 0) + return; + - Event *quiesceEvent = xc->getQuiesceEvent(); ++ EndQuiesceEvent *quiesceEvent = xc->getQuiesceEvent(); + + if (quiesceEvent->scheduled()) + quiesceEvent->reschedule(curTick + + xc->getCpuPtr()->cycles(cycles)); + else + quiesceEvent->schedule(curTick + + xc->getCpuPtr()->cycles(cycles)); + + xc->suspend(); - xc->getCpuPtr()->kernelStats->quiesce(); ++ if (xc->getKernelStats()) ++ xc->getKernelStats()->quiesce(); + } + + uint64_t + quiesceTime(ExecContext *xc) + { + return (xc->readLastActivate() - xc->readLastSuspend()) / Clock::Int::ns; + } + + void + ivlb(ExecContext *xc) + { - xc->getCpuPtr()->kernelStats->ivlb(); ++ if (xc->getKernelStats()) ++ xc->getKernelStats()->ivlb(); + } + + void + ivle(ExecContext *xc) + { + } + + void + m5exit_old(ExecContext *xc) + { + SimExit(curTick, "m5_exit_old instruction encountered"); + } + + void + m5exit(ExecContext *xc, Tick delay) + { + Tick when = curTick + delay * Clock::Int::ns; + SimExit(when, "m5_exit instruction encountered"); + } + + void + resetstats(ExecContext *xc, Tick delay, Tick period) + { + if (!doStatisticsInsts) + return; + + + Tick when = curTick + delay * Clock::Int::ns; + Tick repeat = period * Clock::Int::ns; + + using namespace Stats; + SetupEvent(Reset, when, repeat); + } + + void + dumpstats(ExecContext *xc, Tick delay, Tick period) + { + if (!doStatisticsInsts) + return; + + + Tick when = curTick + delay * Clock::Int::ns; + Tick repeat = period * Clock::Int::ns; + + using namespace Stats; + SetupEvent(Dump, when, repeat); + } + + void + addsymbol(ExecContext *xc, Addr addr, Addr symbolAddr) + { + char symb[100]; + CopyStringOut(xc, symb, symbolAddr, 100); + std::string symbol(symb); + + DPRINTF(Loader, "Loaded symbol: %s @ %#llx\n", symbol, addr); + + xc->getSystemPtr()->kernelSymtab->insert(addr,symbol); + } + + void + dumpresetstats(ExecContext *xc, Tick delay, Tick period) + { + if (!doStatisticsInsts) + return; + + + Tick when = curTick + delay * Clock::Int::ns; + Tick repeat = period * Clock::Int::ns; + + using namespace Stats; + SetupEvent(Dump|Reset, when, repeat); + } + + void + m5checkpoint(ExecContext *xc, Tick delay, Tick period) + { + if (!doCheckpointInsts) + return; + + + Tick when = curTick + delay * Clock::Int::ns; + Tick repeat = period * Clock::Int::ns; + + Checkpoint::setup(when, repeat); + } + + uint64_t + readfile(ExecContext *xc, Addr vaddr, uint64_t len, uint64_t offset) + { + const string &file = xc->getCpuPtr()->system->params()->readfile; + if (file.empty()) { + return ULL(0); + } + + uint64_t result = 0; + + int fd = ::open(file.c_str(), O_RDONLY, 0); + if (fd < 0) + panic("could not open file %s\n", file); + + if (::lseek(fd, offset, SEEK_SET) < 0) + panic("could not seek: %s", strerror(errno)); + + char *buf = new char[len]; + char *p = buf; + while (len > 0) { + int bytes = ::read(fd, p, len); + if (bytes <= 0) + break; + + p += bytes; + result += bytes; + len -= bytes; + } + + close(fd); + CopyIn(xc, vaddr, buf, result); + delete [] buf; + return result; + } + + class Context : public ParamContext + { + public: + Context(const string §ion) : ParamContext(section) {} + void checkParams(); + }; + + Context context("pseudo_inst"); + + Param __quiesce(&context, "quiesce", + "enable quiesce instructions", + true); + Param __statistics(&context, "statistics", + "enable statistics pseudo instructions", + true); + Param __checkpoint(&context, "checkpoint", + "enable checkpoint pseudo instructions", + true); + + void + Context::checkParams() + { + doQuiesce = __quiesce; + doStatisticsInsts = __statistics; + doCheckpointInsts = __checkpoint; + } + + void debugbreak(ExecContext *xc) + { + debug_break(); + } + + void switchcpu(ExecContext *xc) + { + if (SampCPU) + SampCPU->switchCPUs(); + } +}