--- /dev/null
- 'FullCPU', 'AlphaFullCPU']
+# -*- mode:python -*-
+
+# Copyright (c) 2004-2005 The Regents of The University of Michigan
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+###################################################
+#
+# SCons top-level build description (SConstruct) file.
+#
+# While in this directory ('m5'), just type 'scons' to build the default
+# configuration (see below), or type 'scons build/<CONFIG>/<binary>'
+# to build some other configuration (e.g., 'build/ALPHA_FS/m5.opt' for
+# the optimized full-system version).
+#
+# You can build M5 in a different directory as long as there is a
+# 'build/<CONFIG>' somewhere along the target path. The build system
+# expdects that all configs under the same build directory are being
+# built for the same host system.
+#
+# Examples:
+# These two commands are equivalent. The '-u' option tells scons to
+# search up the directory tree for this SConstruct file.
+# % cd <path-to-src>/m5 ; scons build/ALPHA_FS/m5.debug
+# % cd <path-to-src>/m5/build/ALPHA_FS; scons -u m5.debug
+# These two commands are equivalent and demonstrate building in a
+# directory outside of the source tree. The '-C' option tells scons
+# to chdir to the specified directory to find this SConstruct file.
+# % cd <path-to-src>/m5 ; scons /local/foo/build/ALPHA_FS/m5.debug
+# % cd /local/foo/build/ALPHA_FS; scons -C <path-to-src>/m5 m5.debug
+#
+# You can use 'scons -H' to print scons options. If you're in this
+# 'm5' directory (or use -u or -C to tell scons where to find this
+# file), you can use 'scons -h' to print all the M5-specific build
+# options as well.
+#
+###################################################
+
+# Python library imports
+import sys
+import os
+
+# Check for recent-enough Python and SCons versions
+EnsurePythonVersion(2,3)
+
+# Ironically, SCons 0.96 dies if you give EnsureSconsVersion a
+# 3-element version number.
+min_scons_version = (0,96,91)
+try:
+ EnsureSConsVersion(*min_scons_version)
+except:
+ print "Error checking current SCons version."
+ print "SCons", ".".join(map(str,min_scons_version)), "or greater required."
+ Exit(2)
+
+
+# The absolute path to the current directory (where this file lives).
+ROOT = Dir('.').abspath
+
+# Paths to the M5 and external source trees.
+SRCDIR = os.path.join(ROOT, 'src')
+
+# tell python where to find m5 python code
+sys.path.append(os.path.join(ROOT, 'src/python'))
+
+###################################################
+#
+# Figure out which configurations to set up based on the path(s) of
+# the target(s).
+#
+###################################################
+
+# Find default configuration & binary.
+Default(os.environ.get('M5_DEFAULT_BINARY', 'build/ALPHA_SE/m5.debug'))
+
+# Ask SCons which directory it was invoked from.
+launch_dir = GetLaunchDir()
+
+# Make targets relative to invocation directory
+abs_targets = map(lambda x: os.path.normpath(os.path.join(launch_dir, str(x))),
+ BUILD_TARGETS)
+
+# helper function: find last occurrence of element in list
+def rfind(l, elt, offs = -1):
+ for i in range(len(l)+offs, 0, -1):
+ if l[i] == elt:
+ return i
+ raise ValueError, "element not found"
+
+# Each target must have 'build' in the interior of the path; the
+# directory below this will determine the build parameters. For
+# example, for target 'foo/bar/build/ALPHA_SE/arch/alpha/blah.do' we
+# recognize that ALPHA_SE specifies the configuration because it
+# follow 'build' in the bulid path.
+
+# Generate a list of the unique build roots and configs that the
+# collected targets reference.
+build_paths = []
+build_root = None
+for t in abs_targets:
+ path_dirs = t.split('/')
+ try:
+ build_top = rfind(path_dirs, 'build', -2)
+ except:
+ print "Error: no non-leaf 'build' dir found on target path", t
+ Exit(1)
+ this_build_root = os.path.join('/',*path_dirs[:build_top+1])
+ if not build_root:
+ build_root = this_build_root
+ else:
+ if this_build_root != build_root:
+ print "Error: build targets not under same build root\n"\
+ " %s\n %s" % (build_root, this_build_root)
+ Exit(1)
+ build_path = os.path.join('/',*path_dirs[:build_top+2])
+ if build_path not in build_paths:
+ build_paths.append(build_path)
+
+###################################################
+#
+# Set up the default build environment. This environment is copied
+# and modified according to each selected configuration.
+#
+###################################################
+
+env = Environment(ENV = os.environ, # inherit user's environment vars
+ ROOT = ROOT,
+ SRCDIR = SRCDIR)
+
+env.SConsignFile("sconsign")
+
+# I waffle on this setting... it does avoid a few painful but
+# unnecessary builds, but it also seems to make trivial builds take
+# noticeably longer.
+if False:
+ env.TargetSignatures('content')
+
+# M5_PLY is used by isa_parser.py to find the PLY package.
+env.Append(ENV = { 'M5_PLY' : Dir('ext/ply') })
+
+# Set up default C++ compiler flags
+env.Append(CCFLAGS='-pipe')
+env.Append(CCFLAGS='-fno-strict-aliasing')
+env.Append(CCFLAGS=Split('-Wall -Wno-sign-compare -Werror -Wundef'))
+if sys.platform == 'cygwin':
+ # cygwin has some header file issues...
+ env.Append(CCFLAGS=Split("-Wno-uninitialized"))
+env.Append(CPPPATH=[Dir('ext/dnet')])
+
+# Default libraries
+env.Append(LIBS=['z'])
+
+# Platform-specific configuration. Note again that we assume that all
+# builds under a given build root run on the same host platform.
+conf = Configure(env,
+ conf_dir = os.path.join(build_root, '.scons_config'),
+ log_file = os.path.join(build_root, 'scons_config.log'))
+
+# Check for <fenv.h> (C99 FP environment control)
+have_fenv = conf.CheckHeader('fenv.h', '<>')
+if not have_fenv:
+ print "Warning: Header file <fenv.h> not found."
+ print " This host has no IEEE FP rounding mode control."
+
+# Check for mysql.
+mysql_config = WhereIs('mysql_config')
+have_mysql = mysql_config != None
+
+# Check MySQL version.
+if have_mysql:
+ mysql_version = os.popen(mysql_config + ' --version').read()
+ mysql_version = mysql_version.split('.')
+ mysql_major = int(mysql_version[0])
+ mysql_minor = int(mysql_version[1])
+ # This version check is probably overly conservative, but it deals
+ # with the versions we have installed.
+ if mysql_major < 4 or (mysql_major == 4 and mysql_minor < 1):
+ print "Warning: MySQL v4.1 or newer required."
+ have_mysql = False
+
+# Set up mysql_config commands.
+if have_mysql:
+ mysql_config_include = mysql_config + ' --include'
+ if os.system(mysql_config_include + ' > /dev/null') != 0:
+ # older mysql_config versions don't support --include, use
+ # --cflags instead
+ mysql_config_include = mysql_config + ' --cflags | sed s/\\\'//g'
+ # This seems to work in all versions
+ mysql_config_libs = mysql_config + ' --libs'
+
+env = conf.Finish()
+
+# Define the universe of supported ISAs
+env['ALL_ISA_LIST'] = ['alpha', 'sparc', 'mips']
+
+# Define the universe of supported CPU models
+env['ALL_CPU_LIST'] = ['AtomicSimpleCPU', 'TimingSimpleCPU',
++ 'FullCPU', 'AlphaFullCPU',
++ 'OzoneSimpleCPU', 'OzoneCPU', 'CheckerCPU']
+
+# Sticky options get saved in the options file so they persist from
+# one invocation to the next (unless overridden, in which case the new
+# value becomes sticky).
+sticky_opts = Options(args=ARGUMENTS)
+sticky_opts.AddOptions(
+ EnumOption('TARGET_ISA', 'Target ISA', 'alpha', env['ALL_ISA_LIST']),
+ BoolOption('FULL_SYSTEM', 'Full-system support', False),
+ # There's a bug in scons 0.96.1 that causes ListOptions with list
+ # values (more than one value) not to be able to be restored from
+ # a saved option file. If this causes trouble then upgrade to
+ # scons 0.96.90 or later.
+ ListOption('CPU_MODELS', 'CPU models', 'AtomicSimpleCPU,TimingSimpleCPU',
+ env['ALL_CPU_LIST']),
+ BoolOption('ALPHA_TLASER',
+ 'Model Alpha TurboLaser platform (vs. Tsunami)', False),
+ BoolOption('NO_FAST_ALLOC', 'Disable fast object allocator', False),
+ BoolOption('EFENCE', 'Link with Electric Fence malloc debugger',
+ False),
+ BoolOption('SS_COMPATIBLE_FP',
+ 'Make floating-point results compatible with SimpleScalar',
+ False),
+ BoolOption('USE_SSE2',
+ 'Compile for SSE2 (-msse2) to get IEEE FP on x86 hosts',
+ False),
+ BoolOption('STATS_BINNING', 'Bin statistics by CPU mode', have_mysql),
+ BoolOption('USE_MYSQL', 'Use MySQL for stats output', have_mysql),
+ BoolOption('USE_FENV', 'Use <fenv.h> IEEE mode control', have_fenv),
+ ('CC', 'C compiler', os.environ.get('CC', env['CC'])),
+ ('CXX', 'C++ compiler', os.environ.get('CXX', env['CXX'])),
+ BoolOption('BATCH', 'Use batch pool for build and tests', False),
+ ('BATCH_CMD', 'Batch pool submission command name', 'qdo')
+ )
+
+# Non-sticky options only apply to the current build.
+nonsticky_opts = Options(args=ARGUMENTS)
+nonsticky_opts.AddOptions(
+ BoolOption('update_ref', 'Update test reference outputs', False)
+ )
+
+# These options get exported to #defines in config/*.hh (see m5/SConscript).
+env.ExportOptions = ['FULL_SYSTEM', 'ALPHA_TLASER', 'USE_FENV', \
+ 'USE_MYSQL', 'NO_FAST_ALLOC', 'SS_COMPATIBLE_FP', \
+ 'STATS_BINNING']
+
+# Define a handy 'no-op' action
+def no_action(target, source, env):
+ return 0
+
+env.NoAction = Action(no_action, None)
+
+###################################################
+#
+# Define a SCons builder for configuration flag headers.
+#
+###################################################
+
+# This function generates a config header file that #defines the
+# option symbol to the current option setting (0 or 1). The source
+# operands are the name of the option and a Value node containing the
+# value of the option.
+def build_config_file(target, source, env):
+ (option, value) = [s.get_contents() for s in source]
+ f = file(str(target[0]), 'w')
+ print >> f, '#define', option, value
+ f.close()
+ return None
+
+# Generate the message to be printed when building the config file.
+def build_config_file_string(target, source, env):
+ (option, value) = [s.get_contents() for s in source]
+ return "Defining %s as %s in %s." % (option, value, target[0])
+
+# Combine the two functions into a scons Action object.
+config_action = Action(build_config_file, build_config_file_string)
+
+# The emitter munges the source & target node lists to reflect what
+# we're really doing.
+def config_emitter(target, source, env):
+ # extract option name from Builder arg
+ option = str(target[0])
+ # True target is config header file
+ target = os.path.join('config', option.lower() + '.hh')
+ # Force value to 0/1 even if it's a Python bool
+ val = int(eval(str(env[option])))
+ # Sources are option name & value (packaged in SCons Value nodes)
+ return ([target], [Value(option), Value(val)])
+
+config_builder = Builder(emitter = config_emitter, action = config_action)
+
+env.Append(BUILDERS = { 'ConfigFile' : config_builder })
+
+# base help text
+help_text = '''
+Usage: scons [scons options] [build options] [target(s)]
+
+'''
+
+# libelf build is shared across all configs in the build root.
+env.SConscript('ext/libelf/SConscript',
+ build_dir = os.path.join(build_root, 'libelf'),
+ exports = 'env')
+
+###################################################
+#
+# Define build environments for selected configurations.
+#
+###################################################
+
+# rename base env
+base_env = env
+
+for build_path in build_paths:
+ print "Building in", build_path
+ # build_dir is the tail component of build path, and is used to
+ # determine the build parameters (e.g., 'ALPHA_SE')
+ (build_root, build_dir) = os.path.split(build_path)
+ # Make a copy of the build-root environment to use for this config.
+ env = base_env.Copy()
+
+ # Set env options according to the build directory config.
+ sticky_opts.files = []
+ # Options for $BUILD_ROOT/$BUILD_DIR are stored in
+ # $BUILD_ROOT/options/$BUILD_DIR so you can nuke
+ # $BUILD_ROOT/$BUILD_DIR without losing your options settings.
+ current_opts_file = os.path.join(build_root, 'options', build_dir)
+ if os.path.isfile(current_opts_file):
+ sticky_opts.files.append(current_opts_file)
+ print "Using saved options file %s" % current_opts_file
+ else:
+ # Build dir-specific options file doesn't exist.
+
+ # Make sure the directory is there so we can create it later
+ opt_dir = os.path.dirname(current_opts_file)
+ if not os.path.isdir(opt_dir):
+ os.mkdir(opt_dir)
+
+ # Get default build options from source tree. Options are
+ # normally determined by name of $BUILD_DIR, but can be
+ # overriden by 'default=' arg on command line.
+ default_opts_file = os.path.join('build_opts',
+ ARGUMENTS.get('default', build_dir))
+ if os.path.isfile(default_opts_file):
+ sticky_opts.files.append(default_opts_file)
+ print "Options file %s not found,\n using defaults in %s" \
+ % (current_opts_file, default_opts_file)
+ else:
+ print "Error: cannot find options file %s or %s" \
+ % (current_opts_file, default_opts_file)
+ Exit(1)
+
+ # Apply current option settings to env
+ sticky_opts.Update(env)
+ nonsticky_opts.Update(env)
+
+ help_text += "Sticky options for %s:\n" % build_dir \
+ + sticky_opts.GenerateHelpText(env) \
+ + "\nNon-sticky options for %s:\n" % build_dir \
+ + nonsticky_opts.GenerateHelpText(env)
+
+ # Process option settings.
+
+ if not have_fenv and env['USE_FENV']:
+ print "Warning: <fenv.h> not available; " \
+ "forcing USE_FENV to False in", build_dir + "."
+ env['USE_FENV'] = False
+
+ if not env['USE_FENV']:
+ print "Warning: No IEEE FP rounding mode control in", build_dir + "."
+ print " FP results may deviate slightly from other platforms."
+
+ if env['EFENCE']:
+ env.Append(LIBS=['efence'])
+
+ if env['USE_MYSQL']:
+ if not have_mysql:
+ print "Warning: MySQL not available; " \
+ "forcing USE_MYSQL to False in", build_dir + "."
+ env['USE_MYSQL'] = False
+ else:
+ print "Compiling in", build_dir, "with MySQL support."
+ env.ParseConfig(mysql_config_libs)
+ env.ParseConfig(mysql_config_include)
+
+ # Save sticky option settings back to current options file
+ sticky_opts.Save(current_opts_file, env)
+
+ # Do this after we save setting back, or else we'll tack on an
+ # extra 'qdo' every time we run scons.
+ if env['BATCH']:
+ env['CC'] = env['BATCH_CMD'] + ' ' + env['CC']
+ env['CXX'] = env['BATCH_CMD'] + ' ' + env['CXX']
+
+ if env['USE_SSE2']:
+ env.Append(CCFLAGS='-msse2')
+
+ # The m5/SConscript file sets up the build rules in 'env' according
+ # to the configured options. It returns a list of environments,
+ # one for each variant build (debug, opt, etc.)
+ envList = SConscript('src/SConscript', build_dir = build_path,
+ exports = 'env', duplicate = False)
+
+ # Set up the regression tests for each build.
+# for e in envList:
+# SConscript('m5-test/SConscript',
+# build_dir = os.path.join(build_dir, 'test', e.Label),
+# exports = { 'env' : e }, duplicate = False)
+
+Help(help_text)
+
+###################################################
+#
+# Let SCons do its thing. At this point SCons will use the defined
+# build environments to build the requested targets.
+#
+###################################################
+
--- /dev/null
+# -*- mode:python -*-
+
+# Copyright (c) 2004-2005 The Regents of The University of Michigan
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import os
+import sys
+from os.path import isdir
+
+# This file defines how to build a particular configuration of M5
+# based on variable settings in the 'env' build environment.
+
+# Import build environment variable from SConstruct.
+Import('env')
+
+###################################################
+#
+# Define needed sources.
+#
+###################################################
+
+# Base sources used by all configurations.
+
+base_sources = Split('''
+ base/circlebuf.cc
+ base/copyright.cc
+ base/cprintf.cc
+ base/embedfile.cc
+ base/fast_alloc.cc
+ base/fifo_buffer.cc
+ base/hostinfo.cc
+ base/hybrid_pred.cc
+ base/inifile.cc
+ base/intmath.cc
+ base/match.cc
+ base/misc.cc
+ base/output.cc
+ base/pollevent.cc
+ base/range.cc
+ base/random.cc
+ base/sat_counter.cc
+ base/serializer.cc
+ base/socket.cc
+ base/statistics.cc
+ base/str.cc
+ base/time.cc
+ base/trace.cc
+ base/traceflags.cc
+ base/userinfo.cc
+ base/compression/lzss_compression.cc
+ base/loader/aout_object.cc
+ base/loader/ecoff_object.cc
+ base/loader/elf_object.cc
+ base/loader/object_file.cc
+ base/loader/symtab.cc
+ base/stats/events.cc
+ base/stats/statdb.cc
+ base/stats/visit.cc
+ base/stats/text.cc
+
++ cpu/activity.cc
+ cpu/base.cc
+ cpu/cpu_exec_context.cc
+ cpu/cpuevent.cc
+ cpu/exetrace.cc
+ cpu/op_class.cc
+ cpu/pc_event.cc
++ cpu/quiesce_event.cc
+ cpu/static_inst.cc
+ cpu/sampler/sampler.cc
+
+ mem/bridge.cc
+ mem/bus.cc
+ mem/connector.cc
+ mem/mem_object.cc
+ mem/packet.cc
+ mem/physical.cc
+ mem/port.cc
+ mem/request.cc
+
+ python/pyconfig.cc
+ python/embedded_py.cc
+
+ sim/builder.cc
+ sim/configfile.cc
+ sim/debug.cc
+ sim/eventq.cc
+ sim/faults.cc
+ sim/main.cc
+ sim/param.cc
+ sim/profile.cc
+ sim/root.cc
+ sim/serialize.cc
+ sim/sim_events.cc
+ sim/sim_exit.cc
+ sim/sim_object.cc
+ sim/startup.cc
+ sim/stat_context.cc
+ sim/stat_control.cc
+ sim/system.cc
+ sim/trace_context.cc
+ ''')
+
+# Old FullCPU sources
+full_cpu_sources = Split('''
+ encumbered/cpu/full/bpred.cc
+ encumbered/cpu/full/commit.cc
+ encumbered/cpu/full/cpu.cc
+ encumbered/cpu/full/create_vector.cc
+ encumbered/cpu/full/cv_spec_state.cc
+ encumbered/cpu/full/dd_queue.cc
+ encumbered/cpu/full/dep_link.cc
+ encumbered/cpu/full/dispatch.cc
+ encumbered/cpu/full/dyn_inst.cc
+ encumbered/cpu/full/execute.cc
+ encumbered/cpu/full/fetch.cc
+ encumbered/cpu/full/floss_reasons.cc
+ encumbered/cpu/full/fu_pool.cc
+ encumbered/cpu/full/inst_fifo.cc
+ encumbered/cpu/full/instpipe.cc
+ encumbered/cpu/full/issue.cc
+ encumbered/cpu/full/ls_queue.cc
+ encumbered/cpu/full/machine_queue.cc
+ encumbered/cpu/full/pipetrace.cc
+ encumbered/cpu/full/readyq.cc
+ encumbered/cpu/full/reg_info.cc
+ encumbered/cpu/full/rob_station.cc
+ encumbered/cpu/full/spec_memory.cc
+ encumbered/cpu/full/spec_state.cc
+ encumbered/cpu/full/storebuffer.cc
+ encumbered/cpu/full/writeback.cc
+ encumbered/cpu/full/iq/iq_station.cc
+ encumbered/cpu/full/iq/iqueue.cc
+ encumbered/cpu/full/iq/segmented/chain_info.cc
+ encumbered/cpu/full/iq/segmented/chain_wire.cc
+ encumbered/cpu/full/iq/segmented/iq_seg.cc
+ encumbered/cpu/full/iq/segmented/iq_segmented.cc
+ encumbered/cpu/full/iq/segmented/seg_chain.cc
+ encumbered/cpu/full/iq/seznec/iq_seznec.cc
+ encumbered/cpu/full/iq/standard/iq_standard.cc
+ ''')
+
+trace_reader_sources = Split('''
+ cpu/trace/reader/mem_trace_reader.cc
+ cpu/trace/reader/ibm_reader.cc
+ cpu/trace/reader/itx_reader.cc
+ cpu/trace/reader/m5_reader.cc
+ cpu/trace/opt_cpu.cc
+ cpu/trace/trace_cpu.cc
+ ''')
+
+
+
+# MySql sources
+mysql_sources = Split('''
+ base/mysql.cc
+ base/stats/mysql.cc
+ ''')
+
+# Full-system sources
+full_system_sources = Split('''
+ base/crc.cc
+ base/inet.cc
+ base/remote_gdb.cc
+
+ cpu/intr_control.cc
+ cpu/profile.cc
+
+ dev/alpha_console.cc
+ dev/baddev.cc
+ dev/disk_image.cc
+ dev/etherbus.cc
+ dev/etherdump.cc
+ dev/etherint.cc
+ dev/etherlink.cc
+ dev/etherpkt.cc
+ dev/ethertap.cc
+ dev/ide_ctrl.cc
+ dev/ide_disk.cc
+ dev/io_device.cc
+ dev/isa_fake.cc
+ dev/ns_gige.cc
+ dev/pciconfigall.cc
+ dev/pcidev.cc
+ dev/pcifake.cc
+ dev/pktfifo.cc
+ dev/platform.cc
+ dev/simconsole.cc
+ dev/simple_disk.cc
+ dev/sinic.cc
+ dev/tsunami.cc
+ dev/tsunami_cchip.cc
+ dev/tsunami_io.cc
+ dev/tsunami_fake.cc
+ dev/tsunami_pchip.cc
+
+ dev/uart.cc
+ dev/uart8250.cc
+
+ kern/kernel_binning.cc
+ kern/kernel_stats.cc
+ kern/system_events.cc
+ kern/linux/events.cc
+ kern/linux/linux_syscalls.cc
+ kern/linux/printk.cc
+
+ mem/vport.cc
+
+ sim/pseudo_inst.cc
+ ''')
+
+
+if env['TARGET_ISA'] == 'alpha':
+ full_system_sources += Split('''
+ kern/tru64/dump_mbuf.cc
+ kern/tru64/printf.cc
+ kern/tru64/tru64_events.cc
+ kern/tru64/tru64_syscalls.cc
+ ''')
+
+# turbolaser encumbered sources
+turbolaser_sources = Split('''
+ encumbered/dev/dma.cc
+ encumbered/dev/etherdev.cc
+ encumbered/dev/scsi.cc
+ encumbered/dev/scsi_ctrl.cc
+ encumbered/dev/scsi_disk.cc
+ encumbered/dev/scsi_none.cc
+ encumbered/dev/tlaser_clock.cc
+ encumbered/dev/tlaser_ipi.cc
+ encumbered/dev/tlaser_mbox.cc
+ encumbered/dev/tlaser_mc146818.cc
+ encumbered/dev/tlaser_node.cc
+ encumbered/dev/tlaser_pcia.cc
+ encumbered/dev/tlaser_pcidev.cc
+ encumbered/dev/tlaser_serial.cc
+ encumbered/dev/turbolaser.cc
+ encumbered/dev/uart8530.cc
+ ''')
+
+# Syscall emulation (non-full-system) sources
+syscall_emulation_sources = Split('''
+ mem/translating_port.cc
+ mem/page_table.cc
+ sim/process.cc
+ sim/syscall_emul.cc
+ ''')
+
+#if env['TARGET_ISA'] == 'alpha':
+# syscall_emulation_sources += Split('''
+# kern/tru64/tru64.cc
+# ''')
+
+alpha_eio_sources = Split('''
+ encumbered/eio/exolex.cc
+ encumbered/eio/libexo.cc
+ encumbered/eio/eio.cc
+ ''')
+
+if env['TARGET_ISA'] == 'ALPHA_ISA':
+ syscall_emulation_sources += alpha_eio_sources
+
+memtest_sources = Split('''
+ cpu/memtest/memtest.cc
+ ''')
+
+# Add a flag defining what THE_ISA should be for all compilation
+env.Append(CPPDEFINES=[('THE_ISA','%s_ISA' % env['TARGET_ISA'].upper())])
+
+arch_sources = SConscript('arch/SConscript',
+ exports = 'env', duplicate = False)
+
+cpu_sources = SConscript('cpu/SConscript',
+ exports = 'env', duplicate = False)
+
+# This is outside of cpu/SConscript since the source directory isn't
+# underneath 'cpu'.
+if 'FullCPU' in env['CPU_MODELS']:
+ cpu_sources += full_cpu_sources
+
+# Set up complete list of sources based on configuration.
+sources = base_sources + arch_sources + cpu_sources
+
+if env['FULL_SYSTEM']:
+ sources += full_system_sources
+ if env['ALPHA_TLASER']:
+ sources += turbolaser_sources
+else:
+ sources += syscall_emulation_sources
+
+if env['USE_MYSQL']:
+ sources += mysql_sources
+
+for opt in env.ExportOptions:
+ env.ConfigFile(opt)
+
+###################################################
+#
+# Special build rules.
+#
+###################################################
+
+# base/traceflags.{cc,hh} are generated from base/traceflags.py.
+# $TARGET.base will expand to "<build-dir>/base/traceflags".
+env.Command(Split('base/traceflags.hh base/traceflags.cc'),
+ 'base/traceflags.py',
+ 'python $SOURCE $TARGET.base')
+
+SConscript('python/SConscript', exports = ['env'], duplicate=0)
+
+# This function adds the specified sources to the given build
+# environment, and returns a list of all the corresponding SCons
+# Object nodes (including an extra one for date.cc). We explicitly
+# add the Object nodes so we can set up special dependencies for
+# date.cc.
+def make_objs(sources, env):
+ objs = [env.Object(s) for s in sources]
+ # make date.cc depend on all other objects so it always gets
+ # recompiled whenever anything else does
+ date_obj = env.Object('base/date.cc')
+ env.Depends(date_obj, objs)
+ objs.append(date_obj)
+ return objs
+
+###################################################
+#
+# Define binaries. Each different build type (debug, opt, etc.) gets
+# a slightly different build environment.
+#
+###################################################
+
+# Include file paths are rooted in this directory. SCons will
+# automatically expand '.' to refer to both the source directory and
+# the corresponding build directory to pick up generated include
+# files.
+env.Append(CPPPATH='.')
+
+# Debug binary
+debugEnv = env.Copy(OBJSUFFIX='.do')
+debugEnv.Label = 'debug'
+debugEnv.Append(CCFLAGS=Split('-g3 -gdwarf-2 -O0'))
+debugEnv.Append(CPPDEFINES='DEBUG')
+tlist = debugEnv.Program(target = 'm5.debug',
+ source = make_objs(sources, debugEnv))
+debugEnv.M5Binary = tlist[0]
+
+# Optimized binary
+optEnv = env.Copy()
+optEnv.Label = 'opt'
+optEnv.Append(CCFLAGS=Split('-g -O3'))
+tlist = optEnv.Program(target = 'm5.opt',
+ source = make_objs(sources, optEnv))
+optEnv.M5Binary = tlist[0]
+
+# "Fast" binary
+fastEnv = env.Copy(OBJSUFFIX='.fo')
+fastEnv.Label = 'fast'
+fastEnv.Append(CCFLAGS=Split('-O3'))
+fastEnv.Append(CPPDEFINES='NDEBUG')
+fastEnv.Program(target = 'm5.fast.unstripped',
+ source = make_objs(sources, fastEnv))
+tlist = fastEnv.Command(target = 'm5.fast',
+ source = 'm5.fast.unstripped',
+ action = 'strip $SOURCE -o $TARGET')
+fastEnv.M5Binary = tlist[0]
+
+# Profiled binary
+profEnv = env.Copy(OBJSUFFIX='.po')
+profEnv.Label = 'prof'
+profEnv.Append(CCFLAGS=Split('-O3 -g -pg'), LINKFLAGS='-pg')
+tlist = profEnv.Program(target = 'm5.prof',
+ source = make_objs(sources, profEnv))
+profEnv.M5Binary = tlist[0]
+
+envList = [debugEnv, optEnv, fastEnv, profEnv]
+
+Return('envList')
--- /dev/null
- cpu->kernelStats->hwrei();
+/*
+ * Copyright (c) 2002-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "arch/alpha/tlb.hh"
+#include "arch/alpha/isa_traits.hh"
+#include "arch/alpha/osfpal.hh"
+#include "base/kgdb.h"
+#include "base/remote_gdb.hh"
+#include "base/stats/events.hh"
+#include "config/full_system.hh"
+#include "cpu/base.hh"
+#include "cpu/cpu_exec_context.hh"
+#include "cpu/exec_context.hh"
+#include "kern/kernel_stats.hh"
+#include "sim/debug.hh"
+#include "sim/sim_events.hh"
+
+#if FULL_SYSTEM
+
+using namespace EV5;
+
+////////////////////////////////////////////////////////////////////////
+//
+// Machine dependent functions
+//
+void
+AlphaISA::initCPU(ExecContext *xc, int cpuId)
+{
+ initIPRs(xc, cpuId);
+
+ xc->setIntReg(16, cpuId);
+ xc->setIntReg(0, cpuId);
+
+ xc->setPC(xc->readMiscReg(IPR_PAL_BASE) + (new ResetFault)->vect());
+ xc->setNextPC(xc->readPC() + sizeof(MachInst));
+}
+
+////////////////////////////////////////////////////////////////////////
+//
+//
+//
+void
+AlphaISA::initIPRs(ExecContext *xc, int cpuId)
+{
+ for (int i = 0; i < NumInternalProcRegs; ++i) {
+ xc->setMiscReg(i, 0);
+ }
+
+ xc->setMiscReg(IPR_PAL_BASE, PalBase);
+ xc->setMiscReg(IPR_MCSR, 0x6);
+ xc->setMiscReg(IPR_PALtemp16, cpuId);
+}
+
+
+template <class CPU>
+void
+AlphaISA::processInterrupts(CPU *cpu)
+{
+ //Check if there are any outstanding interrupts
+ //Handle the interrupts
+ int ipl = 0;
+ int summary = 0;
+
+ cpu->checkInterrupts = false;
+
+ if (cpu->readMiscReg(IPR_ASTRR))
+ panic("asynchronous traps not implemented\n");
+
+ if (cpu->readMiscReg(IPR_SIRR)) {
+ for (int i = INTLEVEL_SOFTWARE_MIN;
+ i < INTLEVEL_SOFTWARE_MAX; i++) {
+ if (cpu->readMiscReg(IPR_SIRR) & (ULL(1) << i)) {
+ // See table 4-19 of the 21164 hardware reference
+ ipl = (i - INTLEVEL_SOFTWARE_MIN) + 1;
+ summary |= (ULL(1) << i);
+ }
+ }
+ }
+
+ uint64_t interrupts = cpu->intr_status();
+
+ if (interrupts) {
+ for (int i = INTLEVEL_EXTERNAL_MIN;
+ i < INTLEVEL_EXTERNAL_MAX; i++) {
+ if (interrupts & (ULL(1) << i)) {
+ // See table 4-19 of the 21164 hardware reference
+ ipl = i;
+ summary |= (ULL(1) << i);
+ }
+ }
+ }
+
+ if (ipl && ipl > cpu->readMiscReg(IPR_IPLR)) {
+ cpu->setMiscReg(IPR_ISR, summary);
+ cpu->setMiscReg(IPR_INTID, ipl);
+ cpu->trap(new InterruptFault);
+ DPRINTF(Flow, "Interrupt! IPLR=%d ipl=%d summary=%x\n",
+ cpu->readMiscReg(IPR_IPLR), ipl, summary);
+ }
+
+}
+
+template <class CPU>
+void
+AlphaISA::zeroRegisters(CPU *cpu)
+{
+ // Insure ISA semantics
+ // (no longer very clean due to the change in setIntReg() in the
+ // cpu model. Consider changing later.)
+ cpu->cpuXC->setIntReg(ZeroReg, 0);
+ cpu->cpuXC->setFloatReg(ZeroReg, 0.0);
+}
+
+Fault
+CPUExecContext::hwrei()
+{
+ if (!inPalMode())
+ return new UnimplementedOpcodeFault;
+
+ setNextPC(readMiscReg(AlphaISA::IPR_EXC_ADDR));
+
+ if (!misspeculating()) {
- xc->getCpuPtr()->kernelStats->context(old, val, xc);
++ if (kernelStats)
++ kernelStats->hwrei();
+
+ cpu->checkInterrupts = true;
+ }
+
+ // FIXME: XXX check for interrupts? XXX
+ return NoFault;
+}
+
+int
+AlphaISA::MiscRegFile::getInstAsid()
+{
+ return EV5::ITB_ASN_ASN(ipr[IPR_ITB_ASN]);
+}
+
+int
+AlphaISA::MiscRegFile::getDataAsid()
+{
+ return EV5::DTB_ASN_ASN(ipr[IPR_DTB_ASN]);
+}
+
+AlphaISA::MiscReg
+AlphaISA::MiscRegFile::readIpr(int idx, Fault &fault, ExecContext *xc)
+{
+ uint64_t retval = 0; // return value, default 0
+
+ switch (idx) {
+ case AlphaISA::IPR_PALtemp0:
+ case AlphaISA::IPR_PALtemp1:
+ case AlphaISA::IPR_PALtemp2:
+ case AlphaISA::IPR_PALtemp3:
+ case AlphaISA::IPR_PALtemp4:
+ case AlphaISA::IPR_PALtemp5:
+ case AlphaISA::IPR_PALtemp6:
+ case AlphaISA::IPR_PALtemp7:
+ case AlphaISA::IPR_PALtemp8:
+ case AlphaISA::IPR_PALtemp9:
+ case AlphaISA::IPR_PALtemp10:
+ case AlphaISA::IPR_PALtemp11:
+ case AlphaISA::IPR_PALtemp12:
+ case AlphaISA::IPR_PALtemp13:
+ case AlphaISA::IPR_PALtemp14:
+ case AlphaISA::IPR_PALtemp15:
+ case AlphaISA::IPR_PALtemp16:
+ case AlphaISA::IPR_PALtemp17:
+ case AlphaISA::IPR_PALtemp18:
+ case AlphaISA::IPR_PALtemp19:
+ case AlphaISA::IPR_PALtemp20:
+ case AlphaISA::IPR_PALtemp21:
+ case AlphaISA::IPR_PALtemp22:
+ case AlphaISA::IPR_PALtemp23:
+ case AlphaISA::IPR_PAL_BASE:
+
+ case AlphaISA::IPR_IVPTBR:
+ case AlphaISA::IPR_DC_MODE:
+ case AlphaISA::IPR_MAF_MODE:
+ case AlphaISA::IPR_ISR:
+ case AlphaISA::IPR_EXC_ADDR:
+ case AlphaISA::IPR_IC_PERR_STAT:
+ case AlphaISA::IPR_DC_PERR_STAT:
+ case AlphaISA::IPR_MCSR:
+ case AlphaISA::IPR_ASTRR:
+ case AlphaISA::IPR_ASTER:
+ case AlphaISA::IPR_SIRR:
+ case AlphaISA::IPR_ICSR:
+ case AlphaISA::IPR_ICM:
+ case AlphaISA::IPR_DTB_CM:
+ case AlphaISA::IPR_IPLR:
+ case AlphaISA::IPR_INTID:
+ case AlphaISA::IPR_PMCTR:
+ // no side-effect
+ retval = ipr[idx];
+ break;
+
+ case AlphaISA::IPR_CC:
+ retval |= ipr[idx] & ULL(0xffffffff00000000);
+ retval |= xc->getCpuPtr()->curCycle() & ULL(0x00000000ffffffff);
+ break;
+
+ case AlphaISA::IPR_VA:
+ retval = ipr[idx];
+ break;
+
+ case AlphaISA::IPR_VA_FORM:
+ case AlphaISA::IPR_MM_STAT:
+ case AlphaISA::IPR_IFAULT_VA_FORM:
+ case AlphaISA::IPR_EXC_MASK:
+ case AlphaISA::IPR_EXC_SUM:
+ retval = ipr[idx];
+ break;
+
+ case AlphaISA::IPR_DTB_PTE:
+ {
+ AlphaISA::PTE &pte = xc->getDTBPtr()->index(!xc->misspeculating());
+
+ retval |= ((u_int64_t)pte.ppn & ULL(0x7ffffff)) << 32;
+ retval |= ((u_int64_t)pte.xre & ULL(0xf)) << 8;
+ retval |= ((u_int64_t)pte.xwe & ULL(0xf)) << 12;
+ retval |= ((u_int64_t)pte.fonr & ULL(0x1)) << 1;
+ retval |= ((u_int64_t)pte.fonw & ULL(0x1))<< 2;
+ retval |= ((u_int64_t)pte.asma & ULL(0x1)) << 4;
+ retval |= ((u_int64_t)pte.asn & ULL(0x7f)) << 57;
+ }
+ break;
+
+ // write only registers
+ case AlphaISA::IPR_HWINT_CLR:
+ case AlphaISA::IPR_SL_XMIT:
+ case AlphaISA::IPR_DC_FLUSH:
+ case AlphaISA::IPR_IC_FLUSH:
+ case AlphaISA::IPR_ALT_MODE:
+ case AlphaISA::IPR_DTB_IA:
+ case AlphaISA::IPR_DTB_IAP:
+ case AlphaISA::IPR_ITB_IA:
+ case AlphaISA::IPR_ITB_IAP:
+ fault = new UnimplementedOpcodeFault;
+ break;
+
+ default:
+ // invalid IPR
+ fault = new UnimplementedOpcodeFault;
+ break;
+ }
+
+ return retval;
+}
+
+#ifdef DEBUG
+// Cause the simulator to break when changing to the following IPL
+int break_ipl = -1;
+#endif
+
+Fault
+AlphaISA::MiscRegFile::setIpr(int idx, uint64_t val, ExecContext *xc)
+{
+ uint64_t old;
+
+ if (xc->misspeculating())
+ return NoFault;
+
+ switch (idx) {
+ case AlphaISA::IPR_PALtemp0:
+ case AlphaISA::IPR_PALtemp1:
+ case AlphaISA::IPR_PALtemp2:
+ case AlphaISA::IPR_PALtemp3:
+ case AlphaISA::IPR_PALtemp4:
+ case AlphaISA::IPR_PALtemp5:
+ case AlphaISA::IPR_PALtemp6:
+ case AlphaISA::IPR_PALtemp7:
+ case AlphaISA::IPR_PALtemp8:
+ case AlphaISA::IPR_PALtemp9:
+ case AlphaISA::IPR_PALtemp10:
+ case AlphaISA::IPR_PALtemp11:
+ case AlphaISA::IPR_PALtemp12:
+ case AlphaISA::IPR_PALtemp13:
+ case AlphaISA::IPR_PALtemp14:
+ case AlphaISA::IPR_PALtemp15:
+ case AlphaISA::IPR_PALtemp16:
+ case AlphaISA::IPR_PALtemp17:
+ case AlphaISA::IPR_PALtemp18:
+ case AlphaISA::IPR_PALtemp19:
+ case AlphaISA::IPR_PALtemp20:
+ case AlphaISA::IPR_PALtemp21:
+ case AlphaISA::IPR_PALtemp22:
+ case AlphaISA::IPR_PAL_BASE:
+ case AlphaISA::IPR_IC_PERR_STAT:
+ case AlphaISA::IPR_DC_PERR_STAT:
+ case AlphaISA::IPR_PMCTR:
+ // write entire quad w/ no side-effect
+ ipr[idx] = val;
+ break;
+
+ case AlphaISA::IPR_CC_CTL:
+ // This IPR resets the cycle counter. We assume this only
+ // happens once... let's verify that.
+ assert(ipr[idx] == 0);
+ ipr[idx] = 1;
+ break;
+
+ case AlphaISA::IPR_CC:
+ // This IPR only writes the upper 64 bits. It's ok to write
+ // all 64 here since we mask out the lower 32 in rpcc (see
+ // isa_desc).
+ ipr[idx] = val;
+ break;
+
+ case AlphaISA::IPR_PALtemp23:
+ // write entire quad w/ no side-effect
+ old = ipr[idx];
+ ipr[idx] = val;
- xc->getCpuPtr()->kernelStats->swpipl(ipr[idx]);
++ if (xc->getKernelStats())
++ xc->getKernelStats()->context(old, val, xc);
+ break;
+
+ case AlphaISA::IPR_DTB_PTE:
+ // write entire quad w/ no side-effect, tag is forthcoming
+ ipr[idx] = val;
+ break;
+
+ case AlphaISA::IPR_EXC_ADDR:
+ // second least significant bit in PC is always zero
+ ipr[idx] = val & ~2;
+ break;
+
+ case AlphaISA::IPR_ASTRR:
+ case AlphaISA::IPR_ASTER:
+ // only write least significant four bits - privilege mask
+ ipr[idx] = val & 0xf;
+ break;
+
+ case AlphaISA::IPR_IPLR:
+#ifdef DEBUG
+ if (break_ipl != -1 && break_ipl == (val & 0x1f))
+ debug_break();
+#endif
+
+ // only write least significant five bits - interrupt level
+ ipr[idx] = val & 0x1f;
- if (val & 0x18)
- xc->getCpuPtr()->kernelStats->mode(Kernel::user, xc);
- else
- xc->getCpuPtr()->kernelStats->mode(Kernel::kernel, xc);
++ if (xc->getKernelStats())
++ xc->getKernelStats()->swpipl(ipr[idx]);
+ break;
+
+ case AlphaISA::IPR_DTB_CM:
- cpu->kernelStats->callpal(palFunc, proxy);
++ if (val & 0x18) {
++ if (xc->getKernelStats())
++ xc->getKernelStats()->mode(Kernel::user, xc);
++ } else {
++ if (xc->getKernelStats())
++ xc->getKernelStats()->mode(Kernel::kernel, xc);
++ }
+
+ case AlphaISA::IPR_ICM:
+ // only write two mode bits - processor mode
+ ipr[idx] = val & 0x18;
+ break;
+
+ case AlphaISA::IPR_ALT_MODE:
+ // only write two mode bits - processor mode
+ ipr[idx] = val & 0x18;
+ break;
+
+ case AlphaISA::IPR_MCSR:
+ // more here after optimization...
+ ipr[idx] = val;
+ break;
+
+ case AlphaISA::IPR_SIRR:
+ // only write software interrupt mask
+ ipr[idx] = val & 0x7fff0;
+ break;
+
+ case AlphaISA::IPR_ICSR:
+ ipr[idx] = val & ULL(0xffffff0300);
+ break;
+
+ case AlphaISA::IPR_IVPTBR:
+ case AlphaISA::IPR_MVPTBR:
+ ipr[idx] = val & ULL(0xffffffffc0000000);
+ break;
+
+ case AlphaISA::IPR_DC_TEST_CTL:
+ ipr[idx] = val & 0x1ffb;
+ break;
+
+ case AlphaISA::IPR_DC_MODE:
+ case AlphaISA::IPR_MAF_MODE:
+ ipr[idx] = val & 0x3f;
+ break;
+
+ case AlphaISA::IPR_ITB_ASN:
+ ipr[idx] = val & 0x7f0;
+ break;
+
+ case AlphaISA::IPR_DTB_ASN:
+ ipr[idx] = val & ULL(0xfe00000000000000);
+ break;
+
+ case AlphaISA::IPR_EXC_SUM:
+ case AlphaISA::IPR_EXC_MASK:
+ // any write to this register clears it
+ ipr[idx] = 0;
+ break;
+
+ case AlphaISA::IPR_INTID:
+ case AlphaISA::IPR_SL_RCV:
+ case AlphaISA::IPR_MM_STAT:
+ case AlphaISA::IPR_ITB_PTE_TEMP:
+ case AlphaISA::IPR_DTB_PTE_TEMP:
+ // read-only registers
+ return new UnimplementedOpcodeFault;
+
+ case AlphaISA::IPR_HWINT_CLR:
+ case AlphaISA::IPR_SL_XMIT:
+ case AlphaISA::IPR_DC_FLUSH:
+ case AlphaISA::IPR_IC_FLUSH:
+ // the following are write only
+ ipr[idx] = val;
+ break;
+
+ case AlphaISA::IPR_DTB_IA:
+ // really a control write
+ ipr[idx] = 0;
+
+ xc->getDTBPtr()->flushAll();
+ break;
+
+ case AlphaISA::IPR_DTB_IAP:
+ // really a control write
+ ipr[idx] = 0;
+
+ xc->getDTBPtr()->flushProcesses();
+ break;
+
+ case AlphaISA::IPR_DTB_IS:
+ // really a control write
+ ipr[idx] = val;
+
+ xc->getDTBPtr()->flushAddr(val,
+ DTB_ASN_ASN(ipr[AlphaISA::IPR_DTB_ASN]));
+ break;
+
+ case AlphaISA::IPR_DTB_TAG: {
+ struct AlphaISA::PTE pte;
+
+ // FIXME: granularity hints NYI...
+ if (DTB_PTE_GH(ipr[AlphaISA::IPR_DTB_PTE]) != 0)
+ panic("PTE GH field != 0");
+
+ // write entire quad
+ ipr[idx] = val;
+
+ // construct PTE for new entry
+ pte.ppn = DTB_PTE_PPN(ipr[AlphaISA::IPR_DTB_PTE]);
+ pte.xre = DTB_PTE_XRE(ipr[AlphaISA::IPR_DTB_PTE]);
+ pte.xwe = DTB_PTE_XWE(ipr[AlphaISA::IPR_DTB_PTE]);
+ pte.fonr = DTB_PTE_FONR(ipr[AlphaISA::IPR_DTB_PTE]);
+ pte.fonw = DTB_PTE_FONW(ipr[AlphaISA::IPR_DTB_PTE]);
+ pte.asma = DTB_PTE_ASMA(ipr[AlphaISA::IPR_DTB_PTE]);
+ pte.asn = DTB_ASN_ASN(ipr[AlphaISA::IPR_DTB_ASN]);
+
+ // insert new TAG/PTE value into data TLB
+ xc->getDTBPtr()->insert(val, pte);
+ }
+ break;
+
+ case AlphaISA::IPR_ITB_PTE: {
+ struct AlphaISA::PTE pte;
+
+ // FIXME: granularity hints NYI...
+ if (ITB_PTE_GH(val) != 0)
+ panic("PTE GH field != 0");
+
+ // write entire quad
+ ipr[idx] = val;
+
+ // construct PTE for new entry
+ pte.ppn = ITB_PTE_PPN(val);
+ pte.xre = ITB_PTE_XRE(val);
+ pte.xwe = 0;
+ pte.fonr = ITB_PTE_FONR(val);
+ pte.fonw = ITB_PTE_FONW(val);
+ pte.asma = ITB_PTE_ASMA(val);
+ pte.asn = ITB_ASN_ASN(ipr[AlphaISA::IPR_ITB_ASN]);
+
+ // insert new TAG/PTE value into data TLB
+ xc->getITBPtr()->insert(ipr[AlphaISA::IPR_ITB_TAG], pte);
+ }
+ break;
+
+ case AlphaISA::IPR_ITB_IA:
+ // really a control write
+ ipr[idx] = 0;
+
+ xc->getITBPtr()->flushAll();
+ break;
+
+ case AlphaISA::IPR_ITB_IAP:
+ // really a control write
+ ipr[idx] = 0;
+
+ xc->getITBPtr()->flushProcesses();
+ break;
+
+ case AlphaISA::IPR_ITB_IS:
+ // really a control write
+ ipr[idx] = val;
+
+ xc->getITBPtr()->flushAddr(val,
+ ITB_ASN_ASN(ipr[AlphaISA::IPR_ITB_ASN]));
+ break;
+
+ default:
+ // invalid IPR
+ return new UnimplementedOpcodeFault;
+ }
+
+ // no error...
+ return NoFault;
+}
+
+void
+AlphaISA::copyIprs(ExecContext *src, ExecContext *dest)
+{
+ for (int i = IPR_Base_DepTag; i < NumInternalProcRegs; ++i) {
+ dest->setMiscReg(i, src->readMiscReg(i));
+ }
+}
+
+/**
+ * Check for special simulator handling of specific PAL calls.
+ * If return value is false, actual PAL call will be suppressed.
+ */
+bool
+CPUExecContext::simPalCheck(int palFunc)
+{
++ if (kernelStats)
++ kernelStats->callpal(palFunc, proxy);
+
+ switch (palFunc) {
+ case PAL::halt:
+ halt();
+ if (--System::numSystemsRunning == 0)
+ new SimExitEvent("all cpus halted");
+ break;
+
+ case PAL::bpt:
+ case PAL::bugchk:
+ if (system->breakpoint())
+ return false;
+ break;
+ }
+
+ return true;
+}
+
+#endif // FULL_SYSTEM
--- /dev/null
- }}, mem_flags = LOCKED);
+// -*- mode:c++ -*-
+
+// Copyright (c) 2003-2006 The Regents of The University of Michigan
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met: redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer;
+// redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution;
+// neither the name of the copyright holders nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+////////////////////////////////////////////////////////////////////
+//
+// The actual decoder specification
+//
+
+decode OPCODE default Unknown::unknown() {
+
+ format LoadAddress {
+ 0x08: lda({{ Ra = Rb + disp; }});
+ 0x09: ldah({{ Ra = Rb + (disp << 16); }});
+ }
+
+ format LoadOrNop {
+ 0x0a: ldbu({{ Ra.uq = Mem.ub; }});
+ 0x0c: ldwu({{ Ra.uq = Mem.uw; }});
+ 0x0b: ldq_u({{ Ra = Mem.uq; }}, ea_code = {{ EA = (Rb + disp) & ~7; }});
+ 0x23: ldt({{ Fa = Mem.df; }});
+ 0x2a: ldl_l({{ Ra.sl = Mem.sl; }}, mem_flags = LOCKED);
+ 0x2b: ldq_l({{ Ra.uq = Mem.uq; }}, mem_flags = LOCKED);
+ 0x20: MiscPrefetch::copy_load({{ EA = Ra; }},
+ {{ fault = xc->copySrcTranslate(EA); }},
+ inst_flags = [IsMemRef, IsLoad, IsCopy]);
+ }
+
+ format LoadOrPrefetch {
+ 0x28: ldl({{ Ra.sl = Mem.sl; }});
+ 0x29: ldq({{ Ra.uq = Mem.uq; }}, pf_flags = EVICT_NEXT);
+ // IsFloating flag on lds gets the prefetch to disassemble
+ // using f31 instead of r31... funcitonally it's unnecessary
+ 0x22: lds({{ Fa.uq = s_to_t(Mem.ul); }},
+ pf_flags = PF_EXCLUSIVE, inst_flags = IsFloating);
+ }
+
+ format Store {
+ 0x0e: stb({{ Mem.ub = Ra<7:0>; }});
+ 0x0d: stw({{ Mem.uw = Ra<15:0>; }});
+ 0x2c: stl({{ Mem.ul = Ra<31:0>; }});
+ 0x2d: stq({{ Mem.uq = Ra.uq; }});
+ 0x0f: stq_u({{ Mem.uq = Ra.uq; }}, {{ EA = (Rb + disp) & ~7; }});
+ 0x26: sts({{ Mem.ul = t_to_s(Fa.uq); }});
+ 0x27: stt({{ Mem.df = Fa; }});
+ 0x24: MiscPrefetch::copy_store({{ EA = Rb; }},
+ {{ fault = xc->copy(EA); }},
+ inst_flags = [IsMemRef, IsStore, IsCopy]);
+ }
+
+ format StoreCond {
+ 0x2e: stl_c({{ Mem.ul = Ra<31:0>; }},
+ {{
+ uint64_t tmp = write_result;
+ // see stq_c
+ Ra = (tmp == 0 || tmp == 1) ? tmp : Ra;
- }}, mem_flags = LOCKED);
++ }}, mem_flags = LOCKED, inst_flags = IsStoreConditional);
+ 0x2f: stq_c({{ Mem.uq = Ra; }},
+ {{
+ uint64_t tmp = write_result;
+ // If the write operation returns 0 or 1, then
+ // this was a conventional store conditional,
+ // and the value indicates the success/failure
+ // of the operation. If another value is
+ // returned, then this was a Turbolaser
+ // mailbox access, and we don't update the
+ // result register at all.
+ Ra = (tmp == 0 || tmp == 1) ? tmp : Ra;
- 0x024: mt_fpcr({{ FPCR = Fa.uq; }});
- 0x025: mf_fpcr({{ Fa.uq = FPCR; }});
++ }}, mem_flags = LOCKED, inst_flags = IsStoreConditional);
+ }
+
+ format IntegerOperate {
+
+ 0x10: decode INTFUNC { // integer arithmetic operations
+
+ 0x00: addl({{ Rc.sl = Ra.sl + Rb_or_imm.sl; }});
+ 0x40: addlv({{
+ uint32_t tmp = Ra.sl + Rb_or_imm.sl;
+ // signed overflow occurs when operands have same sign
+ // and sign of result does not match.
+ if (Ra.sl<31:> == Rb_or_imm.sl<31:> && tmp<31:> != Ra.sl<31:>)
+ fault = new IntegerOverflowFault;
+ Rc.sl = tmp;
+ }});
+ 0x02: s4addl({{ Rc.sl = (Ra.sl << 2) + Rb_or_imm.sl; }});
+ 0x12: s8addl({{ Rc.sl = (Ra.sl << 3) + Rb_or_imm.sl; }});
+
+ 0x20: addq({{ Rc = Ra + Rb_or_imm; }});
+ 0x60: addqv({{
+ uint64_t tmp = Ra + Rb_or_imm;
+ // signed overflow occurs when operands have same sign
+ // and sign of result does not match.
+ if (Ra<63:> == Rb_or_imm<63:> && tmp<63:> != Ra<63:>)
+ fault = new IntegerOverflowFault;
+ Rc = tmp;
+ }});
+ 0x22: s4addq({{ Rc = (Ra << 2) + Rb_or_imm; }});
+ 0x32: s8addq({{ Rc = (Ra << 3) + Rb_or_imm; }});
+
+ 0x09: subl({{ Rc.sl = Ra.sl - Rb_or_imm.sl; }});
+ 0x49: sublv({{
+ uint32_t tmp = Ra.sl - Rb_or_imm.sl;
+ // signed overflow detection is same as for add,
+ // except we need to look at the *complemented*
+ // sign bit of the subtrahend (Rb), i.e., if the initial
+ // signs are the *same* then no overflow can occur
+ if (Ra.sl<31:> != Rb_or_imm.sl<31:> && tmp<31:> != Ra.sl<31:>)
+ fault = new IntegerOverflowFault;
+ Rc.sl = tmp;
+ }});
+ 0x0b: s4subl({{ Rc.sl = (Ra.sl << 2) - Rb_or_imm.sl; }});
+ 0x1b: s8subl({{ Rc.sl = (Ra.sl << 3) - Rb_or_imm.sl; }});
+
+ 0x29: subq({{ Rc = Ra - Rb_or_imm; }});
+ 0x69: subqv({{
+ uint64_t tmp = Ra - Rb_or_imm;
+ // signed overflow detection is same as for add,
+ // except we need to look at the *complemented*
+ // sign bit of the subtrahend (Rb), i.e., if the initial
+ // signs are the *same* then no overflow can occur
+ if (Ra<63:> != Rb_or_imm<63:> && tmp<63:> != Ra<63:>)
+ fault = new IntegerOverflowFault;
+ Rc = tmp;
+ }});
+ 0x2b: s4subq({{ Rc = (Ra << 2) - Rb_or_imm; }});
+ 0x3b: s8subq({{ Rc = (Ra << 3) - Rb_or_imm; }});
+
+ 0x2d: cmpeq({{ Rc = (Ra == Rb_or_imm); }});
+ 0x6d: cmple({{ Rc = (Ra.sq <= Rb_or_imm.sq); }});
+ 0x4d: cmplt({{ Rc = (Ra.sq < Rb_or_imm.sq); }});
+ 0x3d: cmpule({{ Rc = (Ra.uq <= Rb_or_imm.uq); }});
+ 0x1d: cmpult({{ Rc = (Ra.uq < Rb_or_imm.uq); }});
+
+ 0x0f: cmpbge({{
+ int hi = 7;
+ int lo = 0;
+ uint64_t tmp = 0;
+ for (int i = 0; i < 8; ++i) {
+ tmp |= (Ra.uq<hi:lo> >= Rb_or_imm.uq<hi:lo>) << i;
+ hi += 8;
+ lo += 8;
+ }
+ Rc = tmp;
+ }});
+ }
+
+ 0x11: decode INTFUNC { // integer logical operations
+
+ 0x00: and({{ Rc = Ra & Rb_or_imm; }});
+ 0x08: bic({{ Rc = Ra & ~Rb_or_imm; }});
+ 0x20: bis({{ Rc = Ra | Rb_or_imm; }});
+ 0x28: ornot({{ Rc = Ra | ~Rb_or_imm; }});
+ 0x40: xor({{ Rc = Ra ^ Rb_or_imm; }});
+ 0x48: eqv({{ Rc = Ra ^ ~Rb_or_imm; }});
+
+ // conditional moves
+ 0x14: cmovlbs({{ Rc = ((Ra & 1) == 1) ? Rb_or_imm : Rc; }});
+ 0x16: cmovlbc({{ Rc = ((Ra & 1) == 0) ? Rb_or_imm : Rc; }});
+ 0x24: cmoveq({{ Rc = (Ra == 0) ? Rb_or_imm : Rc; }});
+ 0x26: cmovne({{ Rc = (Ra != 0) ? Rb_or_imm : Rc; }});
+ 0x44: cmovlt({{ Rc = (Ra.sq < 0) ? Rb_or_imm : Rc; }});
+ 0x46: cmovge({{ Rc = (Ra.sq >= 0) ? Rb_or_imm : Rc; }});
+ 0x64: cmovle({{ Rc = (Ra.sq <= 0) ? Rb_or_imm : Rc; }});
+ 0x66: cmovgt({{ Rc = (Ra.sq > 0) ? Rb_or_imm : Rc; }});
+
+ // For AMASK, RA must be R31.
+ 0x61: decode RA {
+ 31: amask({{ Rc = Rb_or_imm & ~ULL(0x17); }});
+ }
+
+ // For IMPLVER, RA must be R31 and the B operand
+ // must be the immediate value 1.
+ 0x6c: decode RA {
+ 31: decode IMM {
+ 1: decode INTIMM {
+ // return EV5 for FULL_SYSTEM and EV6 otherwise
+ 1: implver({{
+#if FULL_SYSTEM
+ Rc = 1;
+#else
+ Rc = 2;
+#endif
+ }});
+ }
+ }
+ }
+
+#if FULL_SYSTEM
+ // The mysterious 11.25...
+ 0x25: WarnUnimpl::eleven25();
+#endif
+ }
+
+ 0x12: decode INTFUNC {
+ 0x39: sll({{ Rc = Ra << Rb_or_imm<5:0>; }});
+ 0x34: srl({{ Rc = Ra.uq >> Rb_or_imm<5:0>; }});
+ 0x3c: sra({{ Rc = Ra.sq >> Rb_or_imm<5:0>; }});
+
+ 0x02: mskbl({{ Rc = Ra & ~(mask( 8) << (Rb_or_imm<2:0> * 8)); }});
+ 0x12: mskwl({{ Rc = Ra & ~(mask(16) << (Rb_or_imm<2:0> * 8)); }});
+ 0x22: mskll({{ Rc = Ra & ~(mask(32) << (Rb_or_imm<2:0> * 8)); }});
+ 0x32: mskql({{ Rc = Ra & ~(mask(64) << (Rb_or_imm<2:0> * 8)); }});
+
+ 0x52: mskwh({{
+ int bv = Rb_or_imm<2:0>;
+ Rc = bv ? (Ra & ~(mask(16) >> (64 - 8 * bv))) : Ra;
+ }});
+ 0x62: msklh({{
+ int bv = Rb_or_imm<2:0>;
+ Rc = bv ? (Ra & ~(mask(32) >> (64 - 8 * bv))) : Ra;
+ }});
+ 0x72: mskqh({{
+ int bv = Rb_or_imm<2:0>;
+ Rc = bv ? (Ra & ~(mask(64) >> (64 - 8 * bv))) : Ra;
+ }});
+
+ 0x06: extbl({{ Rc = (Ra.uq >> (Rb_or_imm<2:0> * 8))< 7:0>; }});
+ 0x16: extwl({{ Rc = (Ra.uq >> (Rb_or_imm<2:0> * 8))<15:0>; }});
+ 0x26: extll({{ Rc = (Ra.uq >> (Rb_or_imm<2:0> * 8))<31:0>; }});
+ 0x36: extql({{ Rc = (Ra.uq >> (Rb_or_imm<2:0> * 8)); }});
+
+ 0x5a: extwh({{
+ Rc = (Ra << (64 - (Rb_or_imm<2:0> * 8))<5:0>)<15:0>; }});
+ 0x6a: extlh({{
+ Rc = (Ra << (64 - (Rb_or_imm<2:0> * 8))<5:0>)<31:0>; }});
+ 0x7a: extqh({{
+ Rc = (Ra << (64 - (Rb_or_imm<2:0> * 8))<5:0>); }});
+
+ 0x0b: insbl({{ Rc = Ra< 7:0> << (Rb_or_imm<2:0> * 8); }});
+ 0x1b: inswl({{ Rc = Ra<15:0> << (Rb_or_imm<2:0> * 8); }});
+ 0x2b: insll({{ Rc = Ra<31:0> << (Rb_or_imm<2:0> * 8); }});
+ 0x3b: insql({{ Rc = Ra << (Rb_or_imm<2:0> * 8); }});
+
+ 0x57: inswh({{
+ int bv = Rb_or_imm<2:0>;
+ Rc = bv ? (Ra.uq<15:0> >> (64 - 8 * bv)) : 0;
+ }});
+ 0x67: inslh({{
+ int bv = Rb_or_imm<2:0>;
+ Rc = bv ? (Ra.uq<31:0> >> (64 - 8 * bv)) : 0;
+ }});
+ 0x77: insqh({{
+ int bv = Rb_or_imm<2:0>;
+ Rc = bv ? (Ra.uq >> (64 - 8 * bv)) : 0;
+ }});
+
+ 0x30: zap({{
+ uint64_t zapmask = 0;
+ for (int i = 0; i < 8; ++i) {
+ if (Rb_or_imm<i:>)
+ zapmask |= (mask(8) << (i * 8));
+ }
+ Rc = Ra & ~zapmask;
+ }});
+ 0x31: zapnot({{
+ uint64_t zapmask = 0;
+ for (int i = 0; i < 8; ++i) {
+ if (!Rb_or_imm<i:>)
+ zapmask |= (mask(8) << (i * 8));
+ }
+ Rc = Ra & ~zapmask;
+ }});
+ }
+
+ 0x13: decode INTFUNC { // integer multiplies
+ 0x00: mull({{ Rc.sl = Ra.sl * Rb_or_imm.sl; }}, IntMultOp);
+ 0x20: mulq({{ Rc = Ra * Rb_or_imm; }}, IntMultOp);
+ 0x30: umulh({{
+ uint64_t hi, lo;
+ mul128(Ra, Rb_or_imm, hi, lo);
+ Rc = hi;
+ }}, IntMultOp);
+ 0x40: mullv({{
+ // 32-bit multiply with trap on overflow
+ int64_t Rax = Ra.sl; // sign extended version of Ra.sl
+ int64_t Rbx = Rb_or_imm.sl;
+ int64_t tmp = Rax * Rbx;
+ // To avoid overflow, all the upper 32 bits must match
+ // the sign bit of the lower 32. We code this as
+ // checking the upper 33 bits for all 0s or all 1s.
+ uint64_t sign_bits = tmp<63:31>;
+ if (sign_bits != 0 && sign_bits != mask(33))
+ fault = new IntegerOverflowFault;
+ Rc.sl = tmp<31:0>;
+ }}, IntMultOp);
+ 0x60: mulqv({{
+ // 64-bit multiply with trap on overflow
+ uint64_t hi, lo;
+ mul128(Ra, Rb_or_imm, hi, lo);
+ // all the upper 64 bits must match the sign bit of
+ // the lower 64
+ if (!((hi == 0 && lo<63:> == 0) ||
+ (hi == mask(64) && lo<63:> == 1)))
+ fault = new IntegerOverflowFault;
+ Rc = lo;
+ }}, IntMultOp);
+ }
+
+ 0x1c: decode INTFUNC {
+ 0x00: decode RA { 31: sextb({{ Rc.sb = Rb_or_imm< 7:0>; }}); }
+ 0x01: decode RA { 31: sextw({{ Rc.sw = Rb_or_imm<15:0>; }}); }
+ 0x32: ctlz({{
+ uint64_t count = 0;
+ uint64_t temp = Rb;
+ if (temp<63:32>) temp >>= 32; else count += 32;
+ if (temp<31:16>) temp >>= 16; else count += 16;
+ if (temp<15:8>) temp >>= 8; else count += 8;
+ if (temp<7:4>) temp >>= 4; else count += 4;
+ if (temp<3:2>) temp >>= 2; else count += 2;
+ if (temp<1:1>) temp >>= 1; else count += 1;
+ if ((temp<0:0>) != 0x1) count += 1;
+ Rc = count;
+ }}, IntAluOp);
+
+ 0x33: cttz({{
+ uint64_t count = 0;
+ uint64_t temp = Rb;
+ if (!(temp<31:0>)) { temp >>= 32; count += 32; }
+ if (!(temp<15:0>)) { temp >>= 16; count += 16; }
+ if (!(temp<7:0>)) { temp >>= 8; count += 8; }
+ if (!(temp<3:0>)) { temp >>= 4; count += 4; }
+ if (!(temp<1:0>)) { temp >>= 2; count += 2; }
+ if (!(temp<0:0> & ULL(0x1))) count += 1;
+ Rc = count;
+ }}, IntAluOp);
+
+ format FailUnimpl {
+ 0x30: ctpop();
+ 0x31: perr();
+ 0x34: unpkbw();
+ 0x35: unpkbl();
+ 0x36: pkwb();
+ 0x37: pklb();
+ 0x38: minsb8();
+ 0x39: minsw4();
+ 0x3a: minub8();
+ 0x3b: minuw4();
+ 0x3c: maxub8();
+ 0x3d: maxuw4();
+ 0x3e: maxsb8();
+ 0x3f: maxsw4();
+ }
+
+ format BasicOperateWithNopCheck {
+ 0x70: decode RB {
+ 31: ftoit({{ Rc = Fa.uq; }}, FloatCvtOp);
+ }
+ 0x78: decode RB {
+ 31: ftois({{ Rc.sl = t_to_s(Fa.uq); }},
+ FloatCvtOp);
+ }
+ }
+ }
+ }
+
+ // Conditional branches.
+ format CondBranch {
+ 0x39: beq({{ cond = (Ra == 0); }});
+ 0x3d: bne({{ cond = (Ra != 0); }});
+ 0x3e: bge({{ cond = (Ra.sq >= 0); }});
+ 0x3f: bgt({{ cond = (Ra.sq > 0); }});
+ 0x3b: ble({{ cond = (Ra.sq <= 0); }});
+ 0x3a: blt({{ cond = (Ra.sq < 0); }});
+ 0x38: blbc({{ cond = ((Ra & 1) == 0); }});
+ 0x3c: blbs({{ cond = ((Ra & 1) == 1); }});
+
+ 0x31: fbeq({{ cond = (Fa == 0); }});
+ 0x35: fbne({{ cond = (Fa != 0); }});
+ 0x36: fbge({{ cond = (Fa >= 0); }});
+ 0x37: fbgt({{ cond = (Fa > 0); }});
+ 0x33: fble({{ cond = (Fa <= 0); }});
+ 0x32: fblt({{ cond = (Fa < 0); }});
+ }
+
+ // unconditional branches
+ format UncondBranch {
+ 0x30: br();
+ 0x34: bsr(IsCall);
+ }
+
+ // indirect branches
+ 0x1a: decode JMPFUNC {
+ format Jump {
+ 0: jmp();
+ 1: jsr(IsCall);
+ 2: ret(IsReturn);
+ 3: jsr_coroutine(IsCall, IsReturn);
+ }
+ }
+
+ // Square root and integer-to-FP moves
+ 0x14: decode FP_SHORTFUNC {
+ // Integer to FP register moves must have RB == 31
+ 0x4: decode RB {
+ 31: decode FP_FULLFUNC {
+ format BasicOperateWithNopCheck {
+ 0x004: itofs({{ Fc.uq = s_to_t(Ra.ul); }}, FloatCvtOp);
+ 0x024: itoft({{ Fc.uq = Ra.uq; }}, FloatCvtOp);
+ 0x014: FailUnimpl::itoff(); // VAX-format conversion
+ }
+ }
+ }
+
+ // Square root instructions must have FA == 31
+ 0xb: decode FA {
+ 31: decode FP_TYPEFUNC {
+ format FloatingPointOperate {
+#if SS_COMPATIBLE_FP
+ 0x0b: sqrts({{
+ if (Fb < 0.0)
+ fault = new ArithmeticFault;
+ Fc = sqrt(Fb);
+ }}, FloatSqrtOp);
+#else
+ 0x0b: sqrts({{
+ if (Fb.sf < 0.0)
+ fault = new ArithmeticFault;
+ Fc.sf = sqrt(Fb.sf);
+ }}, FloatSqrtOp);
+#endif
+ 0x2b: sqrtt({{
+ if (Fb < 0.0)
+ fault = new ArithmeticFault;
+ Fc = sqrt(Fb);
+ }}, FloatSqrtOp);
+ }
+ }
+ }
+
+ // VAX-format sqrtf and sqrtg are not implemented
+ 0xa: FailUnimpl::sqrtfg();
+ }
+
+ // IEEE floating point
+ 0x16: decode FP_SHORTFUNC_TOP2 {
+ // The top two bits of the short function code break this
+ // space into four groups: binary ops, compares, reserved, and
+ // conversions. See Table 4-12 of AHB. There are different
+ // special cases in these different groups, so we decode on
+ // these top two bits first just to select a decode strategy.
+ // Most of these instructions may have various trapping and
+ // rounding mode flags set; these are decoded in the
+ // FloatingPointDecode template used by the
+ // FloatingPointOperate format.
+
+ // add/sub/mul/div: just decode on the short function code
+ // and source type. All valid trapping and rounding modes apply.
+ 0: decode FP_TRAPMODE {
+ // check for valid trapping modes here
+ 0,1,5,7: decode FP_TYPEFUNC {
+ format FloatingPointOperate {
+#if SS_COMPATIBLE_FP
+ 0x00: adds({{ Fc = Fa + Fb; }});
+ 0x01: subs({{ Fc = Fa - Fb; }});
+ 0x02: muls({{ Fc = Fa * Fb; }}, FloatMultOp);
+ 0x03: divs({{ Fc = Fa / Fb; }}, FloatDivOp);
+#else
+ 0x00: adds({{ Fc.sf = Fa.sf + Fb.sf; }});
+ 0x01: subs({{ Fc.sf = Fa.sf - Fb.sf; }});
+ 0x02: muls({{ Fc.sf = Fa.sf * Fb.sf; }}, FloatMultOp);
+ 0x03: divs({{ Fc.sf = Fa.sf / Fb.sf; }}, FloatDivOp);
+#endif
+
+ 0x20: addt({{ Fc = Fa + Fb; }});
+ 0x21: subt({{ Fc = Fa - Fb; }});
+ 0x22: mult({{ Fc = Fa * Fb; }}, FloatMultOp);
+ 0x23: divt({{ Fc = Fa / Fb; }}, FloatDivOp);
+ }
+ }
+ }
+
+ // Floating-point compare instructions must have the default
+ // rounding mode, and may use the default trapping mode or
+ // /SU. Both trapping modes are treated the same by M5; the
+ // only difference on the real hardware (as far a I can tell)
+ // is that without /SU you'd get an imprecise trap if you
+ // tried to compare a NaN with something else (instead of an
+ // "unordered" result).
+ 1: decode FP_FULLFUNC {
+ format BasicOperateWithNopCheck {
+ 0x0a5, 0x5a5: cmpteq({{ Fc = (Fa == Fb) ? 2.0 : 0.0; }},
+ FloatCmpOp);
+ 0x0a7, 0x5a7: cmptle({{ Fc = (Fa <= Fb) ? 2.0 : 0.0; }},
+ FloatCmpOp);
+ 0x0a6, 0x5a6: cmptlt({{ Fc = (Fa < Fb) ? 2.0 : 0.0; }},
+ FloatCmpOp);
+ 0x0a4, 0x5a4: cmptun({{ // unordered
+ Fc = (!(Fa < Fb) && !(Fa == Fb) && !(Fa > Fb)) ? 2.0 : 0.0;
+ }}, FloatCmpOp);
+ }
+ }
+
+ // The FP-to-integer and integer-to-FP conversion insts
+ // require that FA be 31.
+ 3: decode FA {
+ 31: decode FP_TYPEFUNC {
+ format FloatingPointOperate {
+ 0x2f: decode FP_ROUNDMODE {
+ format FPFixedRounding {
+ // "chopped" i.e. round toward zero
+ 0: cvttq({{ Fc.sq = (int64_t)trunc(Fb); }},
+ Chopped);
+ // round to minus infinity
+ 1: cvttq({{ Fc.sq = (int64_t)floor(Fb); }},
+ MinusInfinity);
+ }
+ default: cvttq({{ Fc.sq = (int64_t)nearbyint(Fb); }});
+ }
+
+ // The cvtts opcode is overloaded to be cvtst if the trap
+ // mode is 2 or 6 (which are not valid otherwise)
+ 0x2c: decode FP_FULLFUNC {
+ format BasicOperateWithNopCheck {
+ // trap on denorm version "cvtst/s" is
+ // simulated same as cvtst
+ 0x2ac, 0x6ac: cvtst({{ Fc = Fb.sf; }});
+ }
+ default: cvtts({{ Fc.sf = Fb; }});
+ }
+
+ // The trapping mode for integer-to-FP conversions
+ // must be /SUI or nothing; /U and /SU are not
+ // allowed. The full set of rounding modes are
+ // supported though.
+ 0x3c: decode FP_TRAPMODE {
+ 0,7: cvtqs({{ Fc.sf = Fb.sq; }});
+ }
+ 0x3e: decode FP_TRAPMODE {
+ 0,7: cvtqt({{ Fc = Fb.sq; }});
+ }
+ }
+ }
+ }
+ }
+
+ // misc FP operate
+ 0x17: decode FP_FULLFUNC {
+ format BasicOperateWithNopCheck {
+ 0x010: cvtlq({{
+ Fc.sl = (Fb.uq<63:62> << 30) | Fb.uq<58:29>;
+ }});
+ 0x030: cvtql({{
+ Fc.uq = (Fb.uq<31:30> << 62) | (Fb.uq<29:0> << 29);
+ }});
+
+ // We treat the precise & imprecise trapping versions of
+ // cvtql identically.
+ 0x130, 0x530: cvtqlv({{
+ // To avoid overflow, all the upper 32 bits must match
+ // the sign bit of the lower 32. We code this as
+ // checking the upper 33 bits for all 0s or all 1s.
+ uint64_t sign_bits = Fb.uq<63:31>;
+ if (sign_bits != 0 && sign_bits != mask(33))
+ fault = new IntegerOverflowFault;
+ Fc.uq = (Fb.uq<31:30> << 62) | (Fb.uq<29:0> << 29);
+ }});
+
+ 0x020: cpys({{ // copy sign
+ Fc.uq = (Fa.uq<63:> << 63) | Fb.uq<62:0>;
+ }});
+ 0x021: cpysn({{ // copy sign negated
+ Fc.uq = (~Fa.uq<63:> << 63) | Fb.uq<62:0>;
+ }});
+ 0x022: cpyse({{ // copy sign and exponent
+ Fc.uq = (Fa.uq<63:52> << 52) | Fb.uq<51:0>;
+ }});
+
+ 0x02a: fcmoveq({{ Fc = (Fa == 0) ? Fb : Fc; }});
+ 0x02b: fcmovne({{ Fc = (Fa != 0) ? Fb : Fc; }});
+ 0x02c: fcmovlt({{ Fc = (Fa < 0) ? Fb : Fc; }});
+ 0x02d: fcmovge({{ Fc = (Fa >= 0) ? Fb : Fc; }});
+ 0x02e: fcmovle({{ Fc = (Fa <= 0) ? Fb : Fc; }});
+ 0x02f: fcmovgt({{ Fc = (Fa > 0) ? Fb : Fc; }});
+
- }});
++ 0x024: mt_fpcr({{ FPCR = Fa.uq; }}, IsIprAccess);
++ 0x025: mf_fpcr({{ Fa.uq = FPCR; }}, IsIprAccess);
+ }
+ }
+
+ // miscellaneous mem-format ops
+ 0x18: decode MEMFUNC {
+ format WarnUnimpl {
+ 0x8000: fetch();
+ 0xa000: fetch_m();
+ 0xe800: ecb();
+ }
+
+ format MiscPrefetch {
+ 0xf800: wh64({{ EA = Rb & ~ULL(63); }},
+ {{ xc->writeHint(EA, 64, memAccessFlags); }},
+ mem_flags = NO_FAULT,
+ inst_flags = [IsMemRef, IsDataPrefetch,
+ IsStore, MemWriteOp]);
+ }
+
+ format BasicOperate {
+ 0xc000: rpcc({{
+#if FULL_SYSTEM
+ /* Rb is a fake dependency so here is a fun way to get
+ * the parser to understand that.
+ */
+ Ra = xc->readMiscRegWithEffect(AlphaISA::IPR_CC, fault) + (Rb & 0);
+
+#else
+ Ra = curTick;
+#endif
- 0x0000: trapb({{ }}, IsSerializing, No_OpClass);
- 0x0400: excb({{ }}, IsSerializing, No_OpClass);
++ }}, IsUnverifiable);
+
+ // All of the barrier instructions below do nothing in
+ // their execute() methods (hence the empty code blocks).
+ // All of their functionality is hard-coded in the
+ // pipeline based on the flags IsSerializing,
+ // IsMemBarrier, and IsWriteBarrier. In the current
+ // detailed CPU model, the execute() function only gets
+ // called at fetch, so there's no way to generate pipeline
+ // behavior at any other stage. Once we go to an
+ // exec-in-exec CPU model we should be able to get rid of
+ // these flags and implement this behavior via the
+ // execute() methods.
+
+ // trapb is just a barrier on integer traps, where excb is
+ // a barrier on integer and FP traps. "EXCB is thus a
+ // superset of TRAPB." (Alpha ARM, Sec 4.11.4) We treat
+ // them the same though.
- 0x9e: rduniq({{ R0 = Runiq; }});
++ 0x0000: trapb({{ }}, IsSerializing, IsSerializeBefore, No_OpClass);
++ 0x0400: excb({{ }}, IsSerializing, IsSerializeBefore, No_OpClass);
+ 0x4000: mb({{ }}, IsMemBarrier, MemReadOp);
+ 0x4400: wmb({{ }}, IsWriteBarrier, MemWriteOp);
+ }
+
+#if FULL_SYSTEM
+ format BasicOperate {
+ 0xe000: rc({{
+ Ra = xc->readIntrFlag();
+ xc->setIntrFlag(0);
+ }}, IsNonSpeculative);
+ 0xf000: rs({{
+ Ra = xc->readIntrFlag();
+ xc->setIntrFlag(1);
+ }}, IsNonSpeculative);
+ }
+#else
+ format FailUnimpl {
+ 0xe000: rc();
+ 0xf000: rs();
+ }
+#endif
+ }
+
+#if FULL_SYSTEM
+ 0x00: CallPal::call_pal({{
+ if (!palValid ||
+ (palPriv
+ && xc->readMiscRegWithEffect(AlphaISA::IPR_ICM, fault) != AlphaISA::mode_kernel)) {
+ // invalid pal function code, or attempt to do privileged
+ // PAL call in non-kernel mode
+ fault = new UnimplementedOpcodeFault;
+ }
+ else {
+ // check to see if simulator wants to do something special
+ // on this PAL call (including maybe suppress it)
+ bool dopal = xc->simPalCheck(palFunc);
+
+ if (dopal) {
+ xc->setMiscRegWithEffect(AlphaISA::IPR_EXC_ADDR, NPC);
+ NPC = xc->readMiscRegWithEffect(AlphaISA::IPR_PAL_BASE, fault) + palOffset;
+ }
+ }
+ }}, IsNonSpeculative);
+#else
+ 0x00: decode PALFUNC {
+ format EmulatedCallPal {
+ 0x00: halt ({{
+ SimExit(curTick, "halt instruction encountered");
+ }}, IsNonSpeculative);
+ 0x83: callsys({{
+ xc->syscall(R0);
+ }}, IsNonSpeculative);
+ // Read uniq reg into ABI return value register (r0)
- 0x9f: wruniq({{ Runiq = R16; }});
++ 0x9e: rduniq({{ R0 = Runiq; }}, IsIprAccess);
+ // Write uniq reg with value from ABI arg register (r16)
- }});
++ 0x9f: wruniq({{ Runiq = R16; }}, IsIprAccess);
+ }
+ }
+#endif
+
+#if FULL_SYSTEM
+ 0x1b: decode PALMODE {
+ 0: OpcdecFault::hw_st_quad();
+ 1: decode HW_LDST_QUAD {
+ format HwLoad {
+ 0: hw_ld({{ EA = (Rb + disp) & ~3; }}, {{ Ra = Mem.ul; }}, L);
+ 1: hw_ld({{ EA = (Rb + disp) & ~7; }}, {{ Ra = Mem.uq; }}, Q);
+ }
+ }
+ }
+
+ 0x1f: decode PALMODE {
+ 0: OpcdecFault::hw_st_cond();
+ format HwStore {
+ 1: decode HW_LDST_COND {
+ 0: decode HW_LDST_QUAD {
+ 0: hw_st({{ EA = (Rb + disp) & ~3; }},
+ {{ Mem.ul = Ra<31:0>; }}, L);
+ 1: hw_st({{ EA = (Rb + disp) & ~7; }},
+ {{ Mem.uq = Ra.uq; }}, Q);
+ }
+
+ 1: FailUnimpl::hw_st_cond();
+ }
+ }
+ }
+
+ 0x19: decode PALMODE {
+ 0: OpcdecFault::hw_mfpr();
+ format HwMoveIPR {
+ 1: hw_mfpr({{
+ Ra = xc->readMiscRegWithEffect(ipr_index, fault);
- }});
++ }}, IsIprAccess);
+ }
+ }
+
+ 0x1d: decode PALMODE {
+ 0: OpcdecFault::hw_mtpr();
+ format HwMoveIPR {
+ 1: hw_mtpr({{
+ xc->setMiscRegWithEffect(ipr_index, Ra);
+ if (traceData) { traceData->setData(Ra); }
- 1:hw_rei({{ xc->hwrei(); }}, IsSerializing);
++ }}, IsIprAccess);
+ }
+ }
+
+ format BasicOperate {
+ 0x1e: decode PALMODE {
+ 0: OpcdecFault::hw_rei();
- }}, IsNonSpeculative);
++ 1:hw_rei({{ xc->hwrei(); }}, IsSerializing, IsSerializeBefore);
+ }
+
+ // M5 special opcodes use the reserved 0x01 opcode space
+ 0x01: decode M5FUNC {
+ 0x00: arm({{
+ AlphaPseudo::arm(xc->xcBase());
+ }}, IsNonSpeculative);
+ 0x01: quiesce({{
+ AlphaPseudo::quiesce(xc->xcBase());
- }}, IsNonSpeculative);
++ }}, IsNonSpeculative, IsQuiesce);
+ 0x02: quiesceNs({{
+ AlphaPseudo::quiesceNs(xc->xcBase(), R16);
- }}, IsNonSpeculative);
++ }}, IsNonSpeculative, IsQuiesce);
+ 0x03: quiesceCycles({{
+ AlphaPseudo::quiesceCycles(xc->xcBase(), R16);
++ }}, IsNonSpeculative, IsQuiesce);
+ 0x04: quiesceTime({{
+ R0 = AlphaPseudo::quiesceTime(xc->xcBase());
+ }}, IsNonSpeculative);
+ 0x10: ivlb({{
+ AlphaPseudo::ivlb(xc->xcBase());
+ }}, No_OpClass, IsNonSpeculative);
+ 0x11: ivle({{
+ AlphaPseudo::ivle(xc->xcBase());
+ }}, No_OpClass, IsNonSpeculative);
+ 0x20: m5exit_old({{
+ AlphaPseudo::m5exit_old(xc->xcBase());
+ }}, No_OpClass, IsNonSpeculative);
+ 0x21: m5exit({{
+ AlphaPseudo::m5exit(xc->xcBase(), R16);
+ }}, No_OpClass, IsNonSpeculative);
+ 0x30: initparam({{ Ra = xc->xcBase()->getCpuPtr()->system->init_param; }});
+ 0x40: resetstats({{
+ AlphaPseudo::resetstats(xc->xcBase(), R16, R17);
+ }}, IsNonSpeculative);
+ 0x41: dumpstats({{
+ AlphaPseudo::dumpstats(xc->xcBase(), R16, R17);
+ }}, IsNonSpeculative);
+ 0x42: dumpresetstats({{
+ AlphaPseudo::dumpresetstats(xc->xcBase(), R16, R17);
+ }}, IsNonSpeculative);
+ 0x43: m5checkpoint({{
+ AlphaPseudo::m5checkpoint(xc->xcBase(), R16, R17);
+ }}, IsNonSpeculative);
+ 0x50: m5readfile({{
+ R0 = AlphaPseudo::readfile(xc->xcBase(), R16, R17, R18);
+ }}, IsNonSpeculative);
+ 0x51: m5break({{
+ AlphaPseudo::debugbreak(xc->xcBase());
+ }}, IsNonSpeculative);
+ 0x52: m5switchcpu({{
+ AlphaPseudo::switchcpu(xc->xcBase());
+ }}, IsNonSpeculative);
+ 0x53: m5addsymbol({{
+ AlphaPseudo::addsymbol(xc->xcBase(), R16, R17);
+ }}, IsNonSpeculative);
+ 0x54: m5panic({{
+ panic("M5 panic instruction called at pc=%#x.", xc->readPC());
+ }}, IsNonSpeculative);
+
+ }
+ }
+#endif
+}
--- /dev/null
- def format HwMoveIPR(code) {{
+// -*- mode:c++ -*-
+
+// Copyright (c) 2003-2005 The Regents of The University of Michigan
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met: redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer;
+// redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution;
+// neither the name of the copyright holders nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+////////////////////////////////////////////////////////////////////
+//
+// PAL calls & PAL-specific instructions
+//
+
+output header {{
+ /**
+ * Base class for emulated call_pal calls (used only in
+ * non-full-system mode).
+ */
+ class EmulatedCallPal : public AlphaStaticInst
+ {
+ protected:
+
+ /// Constructor.
+ EmulatedCallPal(const char *mnem, ExtMachInst _machInst,
+ OpClass __opClass)
+ : AlphaStaticInst(mnem, _machInst, __opClass)
+ {
+ }
+
+ std::string
+ generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+ };
+}};
+
+output decoder {{
+ std::string
+ EmulatedCallPal::generateDisassembly(Addr pc,
+ const SymbolTable *symtab) const
+ {
+#ifdef SS_COMPATIBLE_DISASSEMBLY
+ return csprintf("%s %s", "call_pal", mnemonic);
+#else
+ return csprintf("%-10s %s", "call_pal", mnemonic);
+#endif
+ }
+}};
+
+def format EmulatedCallPal(code, *flags) {{
+ iop = InstObjParams(name, Name, 'EmulatedCallPal', CodeBlock(code), flags)
+ header_output = BasicDeclare.subst(iop)
+ decoder_output = BasicConstructor.subst(iop)
+ decode_block = BasicDecode.subst(iop)
+ exec_output = BasicExecute.subst(iop)
+}};
+
+output header {{
+ /**
+ * Base class for full-system-mode call_pal instructions.
+ * Probably could turn this into a leaf class and get rid of the
+ * parser template.
+ */
+ class CallPalBase : public AlphaStaticInst
+ {
+ protected:
+ int palFunc; ///< Function code part of instruction
+ int palOffset; ///< Target PC, offset from IPR_PAL_BASE
+ bool palValid; ///< is the function code valid?
+ bool palPriv; ///< is this call privileged?
+
+ /// Constructor.
+ CallPalBase(const char *mnem, ExtMachInst _machInst,
+ OpClass __opClass);
+
+ std::string
+ generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+ };
+}};
+
+output decoder {{
+ inline
+ CallPalBase::CallPalBase(const char *mnem, ExtMachInst _machInst,
+ OpClass __opClass)
+ : AlphaStaticInst(mnem, _machInst, __opClass),
+ palFunc(PALFUNC)
+ {
+ // From the 21164 HRM (paraphrased):
+ // Bit 7 of the function code (mask 0x80) indicates
+ // whether the call is privileged (bit 7 == 0) or
+ // unprivileged (bit 7 == 1). The privileged call table
+ // starts at 0x2000, the unprivielged call table starts at
+ // 0x3000. Bits 5-0 (mask 0x3f) are used to calculate the
+ // offset.
+ const int palPrivMask = 0x80;
+ const int palOffsetMask = 0x3f;
+
+ // Pal call is invalid unless all other bits are 0
+ palValid = ((machInst & ~(palPrivMask | palOffsetMask)) == 0);
+ palPriv = ((machInst & palPrivMask) == 0);
+ int shortPalFunc = (machInst & palOffsetMask);
+ // Add 1 to base to set pal-mode bit
+ palOffset = (palPriv ? 0x2001 : 0x3001) + (shortPalFunc << 6);
+ }
+
+ std::string
+ CallPalBase::generateDisassembly(Addr pc, const SymbolTable *symtab) const
+ {
+ return csprintf("%-10s %#x", "call_pal", palFunc);
+ }
+}};
+
+def format CallPal(code, *flags) {{
+ iop = InstObjParams(name, Name, 'CallPalBase', CodeBlock(code), flags)
+ header_output = BasicDeclare.subst(iop)
+ decoder_output = BasicConstructor.subst(iop)
+ decode_block = BasicDecode.subst(iop)
+ exec_output = BasicExecute.subst(iop)
+}};
+
+////////////////////////////////////////////////////////////////////
+//
+// hw_ld, hw_st
+//
+
+output header {{
+ /**
+ * Base class for hw_ld and hw_st.
+ */
+ class HwLoadStore : public Memory
+ {
+ protected:
+
+ /// Displacement for EA calculation (signed).
+ int16_t disp;
+
+ /// Constructor
+ HwLoadStore(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
+ StaticInstPtr _eaCompPtr = nullStaticInstPtr,
+ StaticInstPtr _memAccPtr = nullStaticInstPtr);
+
+ std::string
+ generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+ };
+}};
+
+
+output decoder {{
+ inline
+ HwLoadStore::HwLoadStore(const char *mnem, ExtMachInst _machInst,
+ OpClass __opClass,
+ StaticInstPtr _eaCompPtr,
+ StaticInstPtr _memAccPtr)
+ : Memory(mnem, _machInst, __opClass, _eaCompPtr, _memAccPtr),
+ disp(HW_LDST_DISP)
+ {
+ memAccessFlags = 0;
+ if (HW_LDST_PHYS) memAccessFlags |= PHYSICAL;
+ if (HW_LDST_ALT) memAccessFlags |= ALTMODE;
+ if (HW_LDST_VPTE) memAccessFlags |= VPTE;
+ if (HW_LDST_LOCK) memAccessFlags |= LOCKED;
+ }
+
+ std::string
+ HwLoadStore::generateDisassembly(Addr pc, const SymbolTable *symtab) const
+ {
+#ifdef SS_COMPATIBLE_DISASSEMBLY
+ return csprintf("%-10s r%d,%d(r%d)", mnemonic, RA, disp, RB);
+#else
+ // HW_LDST_LOCK and HW_LDST_COND are the same bit.
+ const char *lock_str =
+ (HW_LDST_LOCK) ? (flags[IsLoad] ? ",LOCK" : ",COND") : "";
+
+ return csprintf("%-10s r%d,%d(r%d)%s%s%s%s%s",
+ mnemonic, RA, disp, RB,
+ HW_LDST_PHYS ? ",PHYS" : "",
+ HW_LDST_ALT ? ",ALT" : "",
+ HW_LDST_QUAD ? ",QUAD" : "",
+ HW_LDST_VPTE ? ",VPTE" : "",
+ lock_str);
+#endif
+ }
+}};
+
+def format HwLoad(ea_code, memacc_code, class_ext, *flags) {{
+ (header_output, decoder_output, decode_block, exec_output) = \
+ LoadStoreBase(name, Name + class_ext, ea_code, memacc_code,
+ mem_flags = [], inst_flags = flags,
+ base_class = 'HwLoadStore', exec_template_base = 'Load')
+}};
+
+
+def format HwStore(ea_code, memacc_code, class_ext, *flags) {{
+ (header_output, decoder_output, decode_block, exec_output) = \
+ LoadStoreBase(name, Name + class_ext, ea_code, memacc_code,
+ mem_flags = [], inst_flags = flags,
+ base_class = 'HwLoadStore', exec_template_base = 'Store')
+}};
+
+
+def format HwStoreCond(ea_code, memacc_code, postacc_code, class_ext,
+ *flags) {{
+ (header_output, decoder_output, decode_block, exec_output) = \
+ LoadStoreBase(name, Name + class_ext, ea_code, memacc_code,
+ postacc_code, mem_flags = [], inst_flags = flags,
+ base_class = 'HwLoadStore')
+}};
+
+
+output header {{
+ /**
+ * Base class for hw_mfpr and hw_mtpr.
+ */
+ class HwMoveIPR : public AlphaStaticInst
+ {
+ protected:
+ /// Index of internal processor register.
+ int ipr_index;
+
+ /// Constructor
+ HwMoveIPR(const char *mnem, ExtMachInst _machInst, OpClass __opClass)
+ : AlphaStaticInst(mnem, _machInst, __opClass),
+ ipr_index(HW_IPR_IDX)
+ {
+ }
+
+ std::string
+ generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+ };
+}};
+
+output decoder {{
+ std::string
+ HwMoveIPR::generateDisassembly(Addr pc, const SymbolTable *symtab) const
+ {
+ if (_numSrcRegs > 0) {
+ // must be mtpr
+ return csprintf("%-10s r%d,IPR(%#x)",
+ mnemonic, RA, ipr_index);
+ }
+ else {
+ // must be mfpr
+ return csprintf("%-10s IPR(%#x),r%d",
+ mnemonic, ipr_index, RA);
+ }
+ }
+}};
+
- ['IprAccessOp'])
++def format HwMoveIPR(code, *flags) {{
++ all_flags = ['IprAccessOp']
++ all_flags += flags
+ iop = InstObjParams(name, Name, 'HwMoveIPR', CodeBlock(code),
++ all_flags)
+ header_output = BasicDeclare.subst(iop)
+ decoder_output = BasicConstructor.subst(iop)
+ decode_block = BasicDecode.subst(iop)
+ exec_output = BasicExecute.subst(iop)
+}};
+
+
--- /dev/null
- 'LDSTQ',
+#!/usr/bin/env python
+
+# Copyright (c) 2004-2005 The Regents of The University of Michigan
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#
+# This file generates the header and source files for the flags
+# that control the tracing facility.
+#
+
+import sys
+
+if len(sys.argv) != 2:
+ print "%s: Need argument (basename of cc/hh files)" % sys.argv[0]
+ sys.exit(1)
+
+hhfilename = sys.argv[1] + '.hh'
+ccfilename = sys.argv[1] + '.cc'
+
+#
+# The list of trace flags that can be used to condition DPRINTFs etc.
+# To define a new flag, simply add it to this list.
+#
+baseFlags = [
+ 'AlphaConsole',
+ 'BADADDR',
+ 'BPredRAS',
+ 'Bus',
+ 'BusAddrRanges',
+ 'BusBridge',
+ 'Cache',
+ 'Chains',
+ 'Clock',
+ 'Commit',
+ 'CommitRate',
+ 'Config',
+ 'Console',
+ 'ConsolePoll',
+ 'ConsoleVerbose',
+ 'Context',
+ 'Cycle',
+ 'DMA',
+ 'DMAReadVerbose',
+ 'DMAWriteVerbose',
+ 'DebugPrintf',
+ 'Decode',
+ 'DiskImage',
+ 'DiskImageRead',
+ 'DiskImageWrite',
+ 'DynInst',
+ 'Ethernet',
+ 'EthernetCksum',
+ 'EthernetDMA',
+ 'EthernetData',
+ 'EthernetDesc',
+ 'EthernetIntr',
+ 'EthernetPIO',
+ 'EthernetSM',
+ 'Event',
+ 'Fault',
+ 'Fetch',
+ 'Flow',
+ 'FreeList',
+ 'FullCPU',
+ 'GDBAcc',
+ 'GDBExtra',
+ 'GDBMisc',
+ 'GDBRead',
+ 'GDBRecv',
+ 'GDBSend',
+ 'GDBWrite',
+ 'HWPrefetch',
+ 'IEW',
+ 'IIC',
+ 'IICMore',
+ 'IPI',
+ 'IQ',
+ 'ISP',
+ 'IdeCtrl',
+ 'IdeDisk',
+ 'InstExec',
+ 'Interrupt',
- 'OoOCPU',
- 'PCEvent',
- 'PCIA',
- 'PCIDEV',
- 'PciConfigAll',
- 'Pipeline',
- 'Printf',
- 'ROB',
- 'Regs',
- 'Rename',
- 'RenameMap',
- 'SQL',
- 'Sampler',
- 'ScsiCtrl',
- 'ScsiDisk',
- 'ScsiNone',
- 'Serialize',
- 'SimpleCPU',
- 'SimpleDisk',
- 'SimpleDiskData',
- 'Sparc',
- 'Split',
- 'Stack',
- 'StatEvents',
- 'Stats',
- 'StoreSet',
- 'Syscall',
- 'SyscallVerbose',
- 'TCPIP',
- 'TLB',
- 'Thread',
- 'Timer',
- 'Tsunami',
- 'Uart',
- 'VtoPhys',
- 'WriteBarrier',
++ 'LSQ',
++ 'LSQUnit',
+ 'Loader',
+ 'MC146818',
+ 'MMU',
+ 'MSHR',
+ 'Mbox',
+ 'MemDepUnit',
- 'FullCPUAll' : [ 'Fetch', 'Decode', 'Rename', 'IEW', 'Commit', 'IQ', 'ROB', 'FreeList', 'RenameMap', 'LDSTQ', 'StoreSet', 'MemDepUnit', 'DynInst', 'FullCPU']
++ 'OzoneCPU',
++ 'FE',
++ 'IBE',
++ 'BE',
++ 'OzoneLSQ',
+ ]
+
+#
+# "Compound" flags correspond to a set of base flags. These exist
+# solely for convenience in setting them via the command line: if a
+# compound flag is specified, all of the corresponding base flags are
+# set. Compound flags cannot be used directly in DPRINTFs etc.
+# To define a new compound flag, add a new entry to this hash
+# following the existing examples.
+#
+compoundFlagMap = {
+ 'GDBAll' : [ 'GDBMisc', 'GDBAcc', 'GDBRead', 'GDBWrite', 'GDBSend', 'GDBRecv', 'GDBExtra' ],
+ 'ScsiAll' : [ 'ScsiDisk', 'ScsiCtrl', 'ScsiNone' ],
+ 'DiskImageAll' : [ 'DiskImage', 'DiskImageRead', 'DiskImageWrite' ],
+ 'EthernetAll' : [ 'Ethernet', 'EthernetPIO', 'EthernetDMA', 'EthernetData' , 'EthernetDesc', 'EthernetIntr', 'EthernetSM', 'EthernetCksum' ],
+ 'EthernetNoData' : [ 'Ethernet', 'EthernetPIO', 'EthernetDesc', 'EthernetIntr', 'EthernetSM', 'EthernetCksum' ],
+ 'IdeAll' : [ 'IdeCtrl', 'IdeDisk' ],
++ 'FullCPUAll' : [ 'Fetch', 'Decode', 'Rename', 'IEW', 'Commit', 'IQ', 'ROB', 'FreeList', 'RenameMap', 'LSQ', 'LSQUnit', 'StoreSet', 'MemDepUnit', 'DynInst', 'FullCPU', 'Activity','Scoreboard','Writeback'],
++ 'OzoneCPUAll' : [ 'BE', 'FE', 'IBE', 'OzoneLSQ', 'OzoneCPU']
+}
+
+#############################################################
+#
+# Everything below this point generates the appropriate C++
+# declarations and definitions for the trace flags. If you are simply
+# adding or modifying flag definitions, you should not have to change
+# anything below.
+#
+
+import sys
+
+# extract just the compound flag names into a list
+compoundFlags = []
+compoundFlags.extend(compoundFlagMap.keys())
+compoundFlags.sort()
+
+#
+# First generate the header file. This defines the Flag enum
+# and some extern declarations for the .cc file.
+#
+try:
+ hhfile = file(hhfilename, 'w')
+except IOError, e:
+ sys.exit("can't open %s: %s" % (hhfilename, e))
+
+# file header boilerplate
+print >>hhfile, '''
+/*
+ * DO NOT EDIT THIS FILE!
+ *
+ * Automatically generated from traceflags.py
+ */
+
+#ifndef __BASE_TRACE_FLAGS_HH__
+#define __BASE_TRACE_FLAGS_HH__
+
+namespace Trace {
+
+enum Flags {
+''',
+
+# Generate the enum. Base flags come first, then compound flags.
+idx = 0
+for flag in baseFlags:
+ print >>hhfile, ' %s = %d,' % (flag, idx)
+ idx += 1
+
+numBaseFlags = idx
+print >>hhfile, ' NumFlags = %d,' % idx
+
+# put a comment in here to separate base from compound flags
+print >>hhfile, '''
+ // The remaining enum values are *not* valid indices for Trace::flags.
+ // They are "compound" flags, which correspond to sets of base
+ // flags, and are used only by TraceParamContext::setFlags().
+''',
+
+for flag in compoundFlags:
+ print >>hhfile, ' %s = %d,' % (flag, idx)
+ idx += 1
+
+numCompoundFlags = idx - numBaseFlags
+print >>hhfile, ' NumCompoundFlags = %d' % numCompoundFlags
+
+# trailer boilerplate
+print >>hhfile, '''\
+}; // enum Flags
+
+// Array of strings for SimpleEnumParam
+extern const char *flagStrings[];
+extern const int numFlagStrings;
+
+// Array of arraay pointers: for each compound flag, gives the list of
+// base flags to set. Inidividual flag arrays are terminated by -1.
+extern const Flags *compoundFlags[];
+
+/* namespace Trace */ }
+
+#endif // __BASE_TRACE_FLAGS_HH__
+''',
+
+hhfile.close()
+
+#
+#
+# Print out .cc file with array definitions.
+#
+#
+try:
+ ccfile = file(ccfilename, 'w')
+except OSError, e:
+ sys.exit("can't open %s: %s" % (ccfilename, e))
+
+# file header
+print >>ccfile, '''
+/*
+ * DO NOT EDIT THIS FILE!
+ *
+ * Automatically generated from traceflags.pl.
+ */
+
+#include "base/traceflags.hh"
+
+using namespace Trace;
+
+const char *Trace::flagStrings[] =
+{
+''',
+
+# The string array is used by SimpleEnumParam to map the strings
+# provided by the user to enum values.
+for flag in baseFlags:
+ print >>ccfile, ' "%s",' % flag
+
+for flag in compoundFlags:
+ print >>ccfile, ' "%s",' % flag
+
+print >>ccfile, '};\n'
+
+numFlagStrings = len(baseFlags) + len(compoundFlags);
+
+print >>ccfile, 'const int Trace::numFlagStrings = %d;' % numFlagStrings
+print >>ccfile
+
+#
+# Now define the individual compound flag arrays. There is an array
+# for each compound flag listing the component base flags.
+#
+
+for flag in compoundFlags:
+ flags = compoundFlagMap[flag]
+ flags.append('(Flags)-1')
+ print >>ccfile, 'static const Flags %sMap[] =' % flag
+ print >>ccfile, '{ %s };' % (', '.join(flags))
+ print >>ccfile
+
+#
+# Finally the compoundFlags[] array maps the compound flags
+# to their individual arrays/
+#
+print >>ccfile, 'const Flags *Trace::compoundFlags[] ='
+print >>ccfile, '{'
+
+for flag in compoundFlags:
+ print >>ccfile, ' %sMap,' % flag
+
+# file trailer
+print >>ccfile, '};'
+
+ccfile.close()
+
--- /dev/null
- o3/ldstq.cc
+# -*- mode:python -*-
+
+# Copyright (c) 2006 The Regents of The University of Michigan
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import os
+import os.path
+
+# Import build environment variable from SConstruct.
+Import('env')
+
+#################################################################
+#
+# Generate StaticInst execute() method signatures.
+#
+# There must be one signature for each CPU model compiled in.
+# Since the set of compiled-in models is flexible, we generate a
+# header containing the appropriate set of signatures on the fly.
+#
+#################################################################
+
+# CPU model-specific data is contained in cpu_models.py
+# Convert to SCons File node to get path handling
+models_db = File('cpu_models.py')
+# slurp in contents of file
+execfile(models_db.srcnode().abspath)
+
+# Template for execute() signature.
+exec_sig_template = '''
+virtual Fault execute(%s *xc, Trace::InstRecord *traceData) const = 0;
+virtual Fault initiateAcc(%s *xc, Trace::InstRecord *traceData) const
+{ panic("initiateAcc not defined!"); };
+virtual Fault completeAcc(Packet *pkt, %s *xc,
+ Trace::InstRecord *traceData) const
+{ panic("completeAcc not defined!"); };
+'''
+
++mem_ini_sig_template = '''
++virtual Fault initiateAcc(%s *xc, Trace::InstRecord *traceData) const { panic("Not defined!"); };
++'''
++
++mem_comp_sig_template = '''
++virtual Fault completeAcc(uint8_t *data, %s *xc, Trace::InstRecord *traceData) const { panic("Not defined!"); return NoFault; };
++'''
++
+# Generate header.
+def gen_cpu_exec_signatures(target, source, env):
+ f = open(str(target[0]), 'w')
+ print >> f, '''
+#ifndef __CPU_STATIC_INST_EXEC_SIGS_HH__
+#define __CPU_STATIC_INST_EXEC_SIGS_HH__
+'''
+ for cpu in env['CPU_MODELS']:
+ xc_type = CpuModel.dict[cpu].strings['CPU_exec_context']
+ print >> f, exec_sig_template % (xc_type, xc_type, xc_type)
+ print >> f, '''
+#endif // __CPU_STATIC_INST_EXEC_SIGS_HH__
+'''
+
+# Generate string that gets printed when header is rebuilt
+def gen_sigs_string(target, source, env):
+ return "Generating static_inst_exec_sigs.hh: " \
+ + ', '.join(env['CPU_MODELS'])
+
+# Add command to generate header to environment.
+env.Command('static_inst_exec_sigs.hh', models_db,
+ Action(gen_cpu_exec_signatures, gen_sigs_string,
+ varlist = ['CPU_MODELS']))
+
+#################################################################
+#
+# Include CPU-model-specific files based on set of models
+# specified in CPU_MODELS build option.
+#
+#################################################################
+
+sources = []
+
+need_simple_base = False
+if 'AtomicSimpleCPU' in env['CPU_MODELS']:
+ need_simple_base = True
+ sources += Split('simple/atomic.cc')
+
+if 'TimingSimpleCPU' in env['CPU_MODELS']:
+ need_simple_base = True
+ sources += Split('simple/timing.cc')
+
+if need_simple_base:
+ sources += Split('simple/base.cc')
+
+if 'FastCPU' in env['CPU_MODELS']:
+ sources += Split('fast/cpu.cc')
+
+if 'AlphaFullCPU' in env['CPU_MODELS']:
+ sources += Split('''
+ o3/2bit_local_pred.cc
+ o3/alpha_dyn_inst.cc
+ o3/alpha_cpu.cc
+ o3/alpha_cpu_builder.cc
+ o3/bpred_unit.cc
+ o3/btb.cc
+ o3/commit.cc
+ o3/decode.cc
+ o3/fetch.cc
+ o3/free_list.cc
++ o3/fu_pool.cc
+ o3/cpu.cc
+ o3/iew.cc
+ o3/inst_queue.cc
- o3/sat_counter.cc
++ o3/lsq_unit.cc
++ o3/lsq.cc
+ o3/mem_dep_unit.cc
+ o3/ras.cc
+ o3/rename.cc
+ o3/rename_map.cc
+ o3/rob.cc
++ o3/scoreboard.cc
+ o3/store_set.cc
+ o3/tournament_pred.cc
+ ''')
+
++if 'OzoneSimpleCPU' in env['CPU_MODELS']:
++ sources += Split('''
++ ozone/cpu.cc
++ ozone/cpu_builder.cc
++ ozone/dyn_inst.cc
++ ozone/front_end.cc
++ ozone/inorder_back_end.cc
++ ozone/inst_queue.cc
++ ozone/rename_table.cc
++ ''')
++
++if 'OzoneCPU' in env['CPU_MODELS']:
++ sources += Split('''
++ ozone/back_end.cc
++ ozone/lsq_unit.cc
++ ozone/lw_back_end.cc
++ ozone/lw_lsq.cc
++ ''')
++
++if 'CheckerCPU' in env['CPU_MODELS']:
++ sources += Split('''
++ checker/cpu.cc
++ checker/cpu_builder.cc
++ checker/o3_cpu_builder.cc
++ ''')
++
+# FullCPU sources are included from m5/SConscript since they're not
+# below this point in the file hierarchy.
+
+# Convert file names to SCons File objects. This takes care of the
+# path relative to the top of the directory tree.
+sources = [File(s) for s in sources]
+
+Return('sources')
+
--- /dev/null
- #if FULL_SYSTEM
- #include "kern/kernel_stats.hh"
- #endif
-
+/*
+ * Copyright (c) 2002-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <iostream>
+#include <string>
+#include <sstream>
+
+#include "base/cprintf.hh"
+#include "base/loader/symtab.hh"
+#include "base/misc.hh"
+#include "base/output.hh"
+#include "cpu/base.hh"
+#include "cpu/cpuevent.hh"
+#include "cpu/exec_context.hh"
+#include "cpu/profile.hh"
+#include "cpu/sampler/sampler.hh"
+#include "sim/param.hh"
+#include "sim/process.hh"
+#include "sim/sim_events.hh"
+#include "sim/system.hh"
+
+#include "base/trace.hh"
+
-
- kernelStats = new Kernel::Statistics(system);
+using namespace std;
+
+vector<BaseCPU *> BaseCPU::cpuList;
+
+// This variable reflects the max number of threads in any CPU. Be
+// careful to only use it once all the CPUs that you care about have
+// been initialized
+int maxThreadsPerCPU = 1;
+
+#if FULL_SYSTEM
+BaseCPU::BaseCPU(Params *p)
+ : SimObject(p->name), clock(p->clock), checkInterrupts(true),
+ params(p), number_of_threads(p->numberOfThreads), system(p->system)
+#else
+BaseCPU::BaseCPU(Params *p)
+ : SimObject(p->name), clock(p->clock), params(p),
+ number_of_threads(p->numberOfThreads), system(p->system)
+#endif
+{
+ DPRINTF(FullCPU, "BaseCPU: Creating object, mem address %#x.\n", this);
+
+ // add self to global list of CPUs
+ cpuList.push_back(this);
+
+ DPRINTF(FullCPU, "BaseCPU: CPU added to cpuList, mem address %#x.\n",
+ this);
+
+ if (number_of_threads > maxThreadsPerCPU)
+ maxThreadsPerCPU = number_of_threads;
+
+ // allocate per-thread instruction-based event queues
+ comInstEventQueue = new EventQueue *[number_of_threads];
+ for (int i = 0; i < number_of_threads; ++i)
+ comInstEventQueue[i] = new EventQueue("instruction-based event queue");
+
+ //
+ // set up instruction-count-based termination events, if any
+ //
+ if (p->max_insts_any_thread != 0)
+ for (int i = 0; i < number_of_threads; ++i)
+ new SimExitEvent(comInstEventQueue[i], p->max_insts_any_thread,
+ "a thread reached the max instruction count");
+
+ if (p->max_insts_all_threads != 0) {
+ // allocate & initialize shared downcounter: each event will
+ // decrement this when triggered; simulation will terminate
+ // when counter reaches 0
+ int *counter = new int;
+ *counter = number_of_threads;
+ for (int i = 0; i < number_of_threads; ++i)
+ new CountedExitEvent(comInstEventQueue[i],
+ "all threads reached the max instruction count",
+ p->max_insts_all_threads, *counter);
+ }
+
+ // allocate per-thread load-based event queues
+ comLoadEventQueue = new EventQueue *[number_of_threads];
+ for (int i = 0; i < number_of_threads; ++i)
+ comLoadEventQueue[i] = new EventQueue("load-based event queue");
+
+ //
+ // set up instruction-count-based termination events, if any
+ //
+ if (p->max_loads_any_thread != 0)
+ for (int i = 0; i < number_of_threads; ++i)
+ new SimExitEvent(comLoadEventQueue[i], p->max_loads_any_thread,
+ "a thread reached the max load count");
+
+ if (p->max_loads_all_threads != 0) {
+ // allocate & initialize shared downcounter: each event will
+ // decrement this when triggered; simulation will terminate
+ // when counter reaches 0
+ int *counter = new int;
+ *counter = number_of_threads;
+ for (int i = 0; i < number_of_threads; ++i)
+ new CountedExitEvent(comLoadEventQueue[i],
+ "all threads reached the max load count",
+ p->max_loads_all_threads, *counter);
+ }
+
+#if FULL_SYSTEM
+ memset(interrupts, 0, sizeof(interrupts));
+ intstatus = 0;
+#endif
+
+ functionTracingEnabled = false;
+ if (p->functionTrace) {
+ functionTraceStream = simout.find(csprintf("ftrace.%s", name()));
+ currentFunctionStart = currentFunctionEnd = 0;
+ functionEntryTick = p->functionTraceStart;
+
+ if (p->functionTraceStart == 0) {
+ functionTracingEnabled = true;
+ } else {
+ Event *e =
+ new EventWrapper<BaseCPU, &BaseCPU::enableFunctionTrace>(this,
+ true);
+ e->schedule(p->functionTraceStart);
+ }
+ }
+#if FULL_SYSTEM
+ profileEvent = NULL;
+ if (params->profile)
+ profileEvent = new ProfileEvent(this, params->profile);
- #if FULL_SYSTEM
- if (kernelStats)
- delete kernelStats;
- #endif
+#endif
+
+}
+
+BaseCPU::Params::Params()
+{
+#if FULL_SYSTEM
+ profile = false;
+#endif
++ checker = NULL;
+}
+
+void
+BaseCPU::enableFunctionTrace()
+{
+ functionTracingEnabled = true;
+}
+
+BaseCPU::~BaseCPU()
+{
- if (kernelStats)
- kernelStats->regStats(name() + ".kern");
+}
+
+void
+BaseCPU::init()
+{
+ if (!params->deferRegistration)
+ registerExecContexts();
+}
+
+void
+BaseCPU::startup()
+{
+#if FULL_SYSTEM
+ if (!params->deferRegistration && profileEvent)
+ profileEvent->schedule(curTick);
+#endif
+}
+
+
+void
+BaseCPU::regStats()
+{
+ using namespace Stats;
+
+ numCycles
+ .name(name() + ".numCycles")
+ .desc("number of cpu cycles simulated")
+ ;
+
+ int size = execContexts.size();
+ if (size > 1) {
+ for (int i = 0; i < size; ++i) {
+ stringstream namestr;
+ ccprintf(namestr, "%s.ctx%d", name(), i);
+ execContexts[i]->regStats(namestr.str());
+ }
+ } else if (size == 1)
+ execContexts[0]->regStats(name());
+
+#if FULL_SYSTEM
-
- #if FULL_SYSTEM
- if (kernelStats)
- kernelStats->serialize(os);
- #endif
-
+#endif
+}
+
+
+void
+BaseCPU::registerExecContexts()
+{
+ for (int i = 0; i < execContexts.size(); ++i) {
+ ExecContext *xc = execContexts[i];
+
+#if FULL_SYSTEM
+ int id = params->cpu_id;
+ if (id != -1)
+ id += i;
+
+ xc->setCpuId(system->registerExecContext(xc, id));
+#else
+ xc->setCpuId(xc->getProcessPtr()->registerExecContext(xc));
+#endif
++ }
+ }
+}
+
+
+void
+BaseCPU::switchOut(Sampler *sampler)
+{
+ panic("This CPU doesn't support sampling!");
+}
+
+void
+BaseCPU::takeOverFrom(BaseCPU *oldCPU)
+{
+ assert(execContexts.size() == oldCPU->execContexts.size());
+
+ for (int i = 0; i < execContexts.size(); ++i) {
+ ExecContext *newXC = execContexts[i];
+ ExecContext *oldXC = oldCPU->execContexts[i];
+
+ newXC->takeOverFrom(oldXC);
+
+ CpuEvent::replaceExecContext(oldXC, newXC);
+
+ assert(newXC->readCpuId() == oldXC->readCpuId());
+#if FULL_SYSTEM
+ system->replaceExecContext(newXC, newXC->readCpuId());
+#else
+ assert(newXC->getProcessPtr() == oldXC->getProcessPtr());
+ newXC->getProcessPtr()->replaceExecContext(newXC, newXC->readCpuId());
+#endif
+ }
+
+#if FULL_SYSTEM
+ for (int i = 0; i < TheISA::NumInterruptLevels; ++i)
+ interrupts[i] = oldCPU->interrupts[i];
+ intstatus = oldCPU->intstatus;
+
+ for (int i = 0; i < execContexts.size(); ++i)
+ execContexts[i]->profileClear();
+
+ if (profileEvent)
+ profileEvent->schedule(curTick);
+#endif
+}
+
+
+#if FULL_SYSTEM
+BaseCPU::ProfileEvent::ProfileEvent(BaseCPU *_cpu, int _interval)
+ : Event(&mainEventQueue), cpu(_cpu), interval(_interval)
+{ }
+
+void
+BaseCPU::ProfileEvent::process()
+{
+ for (int i = 0, size = cpu->execContexts.size(); i < size; ++i) {
+ ExecContext *xc = cpu->execContexts[i];
+ xc->profileSample();
+ }
+
+ schedule(curTick + interval);
+}
+
+void
+BaseCPU::post_interrupt(int int_num, int index)
+{
+ DPRINTF(Interrupt, "Interrupt %d:%d posted\n", int_num, index);
+
+ if (int_num < 0 || int_num >= TheISA::NumInterruptLevels)
+ panic("int_num out of bounds\n");
+
+ if (index < 0 || index >= sizeof(uint64_t) * 8)
+ panic("int_num out of bounds\n");
+
+ checkInterrupts = true;
+ interrupts[int_num] |= 1 << index;
+ intstatus |= (ULL(1) << int_num);
+}
+
+void
+BaseCPU::clear_interrupt(int int_num, int index)
+{
+ DPRINTF(Interrupt, "Interrupt %d:%d cleared\n", int_num, index);
+
+ if (int_num < 0 || int_num >= TheISA::NumInterruptLevels)
+ panic("int_num out of bounds\n");
+
+ if (index < 0 || index >= sizeof(uint64_t) * 8)
+ panic("int_num out of bounds\n");
+
+ interrupts[int_num] &= ~(1 << index);
+ if (interrupts[int_num] == 0)
+ intstatus &= ~(ULL(1) << int_num);
+}
+
+void
+BaseCPU::clear_interrupts()
+{
+ DPRINTF(Interrupt, "Interrupts all cleared\n");
+
+ memset(interrupts, 0, sizeof(interrupts));
+ intstatus = 0;
+}
+
+
+void
+BaseCPU::serialize(std::ostream &os)
+{
+ SERIALIZE_ARRAY(interrupts, TheISA::NumInterruptLevels);
+ SERIALIZE_SCALAR(intstatus);
-
- #if FULL_SYSTEM
- if (kernelStats)
- kernelStats->unserialize(cp, section);
- #endif
+}
+
+void
+BaseCPU::unserialize(Checkpoint *cp, const std::string §ion)
+{
+ UNSERIALIZE_ARRAY(interrupts, TheISA::NumInterruptLevels);
+ UNSERIALIZE_SCALAR(intstatus);
+}
+
+#endif // FULL_SYSTEM
+
+void
+BaseCPU::traceFunctionsInternal(Addr pc)
+{
+ if (!debugSymbolTable)
+ return;
+
+ // if pc enters different function, print new function symbol and
+ // update saved range. Otherwise do nothing.
+ if (pc < currentFunctionStart || pc >= currentFunctionEnd) {
+ string sym_str;
+ bool found = debugSymbolTable->findNearestSymbol(pc, sym_str,
+ currentFunctionStart,
+ currentFunctionEnd);
+
+ if (!found) {
+ // no symbol found: use addr as label
+ sym_str = csprintf("0x%x", pc);
+ currentFunctionStart = pc;
+ currentFunctionEnd = pc + 1;
+ }
+
+ ccprintf(*functionTraceStream, " (%d)\n%d: %s",
+ curTick - functionEntryTick, curTick, sym_str);
+ functionEntryTick = curTick;
+ }
+}
+
+
+DEFINE_SIM_OBJECT_CLASS_NAME("BaseCPU", BaseCPU)
--- /dev/null
- class System;
- namespace Kernel { class Statistics; }
+/*
+ * Copyright (c) 2002-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __CPU_BASE_HH__
+#define __CPU_BASE_HH__
+
+#include <vector>
+
+#include "base/statistics.hh"
+#include "config/full_system.hh"
+#include "cpu/sampler/sampler.hh"
+#include "sim/eventq.hh"
+#include "sim/sim_object.hh"
+#include "arch/isa_traits.hh"
+
-
- #if FULL_SYSTEM
- Kernel::Statistics *kernelStats;
- #endif
+class BranchPred;
++class CheckerCPU;
+class ExecContext;
++class System;
+
+class BaseCPU : public SimObject
+{
+ protected:
+ // CPU's clock period in terms of the number of ticks of curTime.
+ Tick clock;
+
+ public:
+ inline Tick frequency() const { return Clock::Frequency / clock; }
+ inline Tick cycles(int numCycles) const { return clock * numCycles; }
+ inline Tick curCycle() const { return curTick / clock; }
+
+#if FULL_SYSTEM
+ protected:
+ uint64_t interrupts[TheISA::NumInterruptLevels];
+ uint64_t intstatus;
+
+ public:
+ virtual void post_interrupt(int int_num, int index);
+ virtual void clear_interrupt(int int_num, int index);
+ virtual void clear_interrupts();
+ bool checkInterrupts;
+
+ bool check_interrupt(int int_num) const {
+ if (int_num > TheISA::NumInterruptLevels)
+ panic("int_num out of bounds\n");
+
+ return interrupts[int_num] != 0;
+ }
+
+ bool check_interrupts() const { return intstatus != 0; }
+ uint64_t intr_status() const { return intstatus; }
+
+ class ProfileEvent : public Event
+ {
+ private:
+ BaseCPU *cpu;
+ int interval;
+
+ public:
+ ProfileEvent(BaseCPU *cpu, int interval);
+ void process();
+ };
+ ProfileEvent *profileEvent;
+#endif
+
+ protected:
+ std::vector<ExecContext *> execContexts;
+
+ public:
+
+ /// Notify the CPU that the indicated context is now active. The
+ /// delay parameter indicates the number of ticks to wait before
+ /// executing (typically 0 or 1).
+ virtual void activateContext(int thread_num, int delay) {}
+
+ /// Notify the CPU that the indicated context is now suspended.
+ virtual void suspendContext(int thread_num) {}
+
+ /// Notify the CPU that the indicated context is now deallocated.
+ virtual void deallocateContext(int thread_num) {}
+
+ /// Notify the CPU that the indicated context is now halted.
+ virtual void haltContext(int thread_num) {}
+
+ public:
+ struct Params
+ {
+ std::string name;
+ int numberOfThreads;
+ bool deferRegistration;
+ Counter max_insts_any_thread;
+ Counter max_insts_all_threads;
+ Counter max_loads_any_thread;
+ Counter max_loads_all_threads;
+ Tick clock;
+ bool functionTrace;
+ Tick functionTraceStart;
+ System *system;
+#if FULL_SYSTEM
+ int cpu_id;
+ Tick profile;
+#endif
++ BaseCPU *checker;
+
+ Params();
+ };
+
+ const Params *params;
+
+ BaseCPU(Params *params);
+ virtual ~BaseCPU();
+
+ virtual void init();
+ virtual void startup();
+ virtual void regStats();
+
+ virtual void activateWhenReady(int tid) {};
+
+ void registerExecContexts();
+
+ /// Prepare for another CPU to take over execution. When it is
+ /// is ready (drained pipe) it signals the sampler.
+ virtual void switchOut(Sampler *);
+
+ /// Take over execution from the given CPU. Used for warm-up and
+ /// sampling.
+ virtual void takeOverFrom(BaseCPU *);
+
+ /**
+ * Number of threads we're actually simulating (<= SMT_MAX_THREADS).
+ * This is a constant for the duration of the simulation.
+ */
+ int number_of_threads;
+
+ /**
+ * Vector of per-thread instruction-based event queues. Used for
+ * scheduling events based on number of instructions committed by
+ * a particular thread.
+ */
+ EventQueue **comInstEventQueue;
+
+ /**
+ * Vector of per-thread load-based event queues. Used for
+ * scheduling events based on number of loads committed by
+ *a particular thread.
+ */
+ EventQueue **comLoadEventQueue;
+
+ System *system;
+
+#if FULL_SYSTEM
+ /**
+ * Serialize this object to the given output stream.
+ * @param os The stream to serialize to.
+ */
+ virtual void serialize(std::ostream &os);
+
+ /**
+ * Reconstruct the state of this object from a checkpoint.
+ * @param cp The checkpoint use.
+ * @param section The section name of this object
+ */
+ virtual void unserialize(Checkpoint *cp, const std::string §ion);
+
+#endif
+
+ /**
+ * Return pointer to CPU's branch predictor (NULL if none).
+ * @return Branch predictor pointer.
+ */
+ virtual BranchPred *getBranchPred() { return NULL; };
+
+ virtual Counter totalInstructions() const { return 0; }
+
+ // Function tracing
+ private:
+ bool functionTracingEnabled;
+ std::ostream *functionTraceStream;
+ Addr currentFunctionStart;
+ Addr currentFunctionEnd;
+ Tick functionEntryTick;
+ void enableFunctionTrace();
+ void traceFunctionsInternal(Addr pc);
+
+ protected:
+ void traceFunctions(Addr pc)
+ {
+ if (functionTracingEnabled)
+ traceFunctionsInternal(pc);
+ }
+
+ private:
+ static std::vector<BaseCPU *> cpuList; //!< Static global cpu list
+
+ public:
+ static int numSimulatedCPUs() { return cpuList.size(); }
+ static Counter numSimulatedInstructions()
+ {
+ Counter total = 0;
+
+ int size = cpuList.size();
+ for (int i = 0; i < size; ++i)
+ total += cpuList[i]->totalInstructions();
+
+ return total;
+ }
+
+ public:
+ // Number of CPU cycles simulated
+ Stats::Scalar<> numCycles;
+};
+
+#endif // __CPU_BASE_HH__
--- /dev/null
- #ifndef __CPU_BASE_DYN_INST_CC__
- #define __CPU_BASE_DYN_INST_CC__
-
+/*
+ * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
- unsigned a = (unsigned)addr;
- unsigned hash = (((a >> 14) ^ ((a >> 2) & 0xffff))) & 0x7FFFFFFF;
+#include <iostream>
++#include <set>
+#include <string>
+#include <sstream>
+
+#include "base/cprintf.hh"
+#include "base/trace.hh"
+
+#include "arch/faults.hh"
+#include "cpu/exetrace.hh"
+#include "mem/mem_req.hh"
+
+#include "cpu/base_dyn_inst.hh"
+#include "cpu/o3/alpha_impl.hh"
+#include "cpu/o3/alpha_cpu.hh"
++#include "cpu/ozone/simple_impl.hh"
++#include "cpu/ozone/ozone_impl.hh"
+
+using namespace std;
+using namespace TheISA;
+
+#define NOHASH
+#ifndef NOHASH
+
+#include "base/hashmap.hh"
+
+unsigned int MyHashFunc(const BaseDynInst *addr)
+{
- return hash;
++ unsigned a = (unsigned)addr;
++ unsigned hash = (((a >> 14) ^ ((a >> 2) & 0xffff))) & 0x7FFFFFFF;
+
- typedef m5::hash_map<const BaseDynInst *, const BaseDynInst *, MyHashFunc> my_hash_t;
++ return hash;
+}
+
- BaseDynInst<Impl>::BaseDynInst(MachInst machInst, Addr inst_PC,
++typedef m5::hash_map<const BaseDynInst *, const BaseDynInst *, MyHashFunc>
++my_hash_t;
++
+my_hash_t thishash;
+#endif
+
+template <class Impl>
- : staticInst(machInst), traceData(NULL), cpu(cpu), cpuXC(cpu->cpuXCBase())
++BaseDynInst<Impl>::BaseDynInst(ExtMachInst machInst, Addr inst_PC,
+ Addr pred_PC, InstSeqNum seq_num,
+ FullCPU *cpu)
- fault = new UnimplementedOpcodeFault;
++ : staticInst(machInst), traceData(NULL), cpu(cpu)/*, xc(cpu->xcBase())*/
+{
+ seqNum = seq_num;
+
+ PC = inst_PC;
+ nextPC = PC + sizeof(MachInst);
+ predPC = pred_PC;
+
+ initVars();
+}
+
+template <class Impl>
+BaseDynInst<Impl>::BaseDynInst(StaticInstPtr &_staticInst)
+ : staticInst(_staticInst), traceData(NULL)
+{
++ seqNum = 0;
+ initVars();
+}
+
+template <class Impl>
+void
+BaseDynInst<Impl>::initVars()
+{
++ req = NULL;
+ effAddr = MemReq::inval_addr;
+ physEffAddr = MemReq::inval_addr;
++ storeSize = 0;
+
+ readyRegs = 0;
+
+ completed = false;
++ resultReady = false;
+ canIssue = false;
+ issued = false;
+ executed = false;
+ canCommit = false;
++ committed = false;
+ squashed = false;
+ squashedInIQ = false;
++ squashedInLSQ = false;
++ squashedInROB = false;
+ eaCalcDone = false;
++ memOpDone = false;
++ lqIdx = -1;
++ sqIdx = -1;
++ reachedCommit = false;
+
+ blockingInst = false;
+ recoverInst = false;
+
++ iqEntry = false;
++ robEntry = false;
++
++ serializeBefore = false;
++ serializeAfter = false;
++ serializeHandled = false;
++
+ // Eventually make this a parameter.
+ threadNumber = 0;
+
+ // Also make this a parameter, or perhaps get it from xc or cpu.
+ asid = 0;
+
+ // Initialize the fault to be unimplemented opcode.
- DPRINTF(FullCPU, "DynInst: Instruction created. Instcount=%i\n",
- instcount);
++// fault = new UnimplementedOpcodeFault;
++ fault = NoFault;
+
+ ++instcount;
+
- DPRINTF(FullCPU, "DynInst: Instruction destroyed. Instcount=%i\n",
- instcount);
++ if (instcount > 1500) {
++ cpu->dumpInsts();
++#ifdef DEBUG
++ dumpSNList();
++#endif
++ assert(instcount <= 1500);
++ }
++
++ DPRINTF(DynInst, "DynInst: [sn:%lli] Instruction created. Instcount=%i\n",
++ seqNum, instcount);
++
++#ifdef DEBUG
++ cpu->snList.insert(seqNum);
++#endif
+}
+
+template <class Impl>
+BaseDynInst<Impl>::~BaseDynInst()
+{
++ if (req) {
++ req = NULL;
++ }
++
++ if (traceData) {
++ delete traceData;
++ }
++
+ --instcount;
- MemReqPtr req = new MemReq(addr, cpuXC->getProxy(), 1, flags);
++
++ DPRINTF(DynInst, "DynInst: [sn:%lli] Instruction destroyed. Instcount=%i\n",
++ seqNum, instcount);
++#ifdef DEBUG
++ cpu->snList.erase(seqNum);
++#endif
+}
+
++#ifdef DEBUG
++template <class Impl>
++void
++BaseDynInst<Impl>::dumpSNList()
++{
++ std::set<InstSeqNum>::iterator sn_it = cpu->snList.begin();
++
++ int count = 0;
++ while (sn_it != cpu->snList.end()) {
++ cprintf("%i: [sn:%lli] not destroyed\n", count, (*sn_it));
++ count++;
++ sn_it++;
++ }
++}
++#endif
++
+template <class Impl>
+void
+BaseDynInst<Impl>::prefetch(Addr addr, unsigned flags)
+{
+ // This is the "functional" implementation of prefetch. Not much
+ // happens here since prefetches don't affect the architectural
+ // state.
+
+ // Generate a MemReq so we can translate the effective address.
- Fault trans_fault = cpuXC->translateDataReadReq(req);
++ MemReqPtr req = new MemReq(addr, thread->getXCProxy(), 1, flags);
+ req->asid = asid;
+
+ // Prefetches never cause faults.
+ fault = NoFault;
+
+ // note this is a local, not BaseDynInst::fault
- /**
- * @todo
- * Replace the disjoint functional memory with a unified one and remove
- * this hack.
- */
- #if !FULL_SYSTEM
- req->paddr = req->vaddr;
- #endif
-
++ Fault trans_fault = cpu->translateDataReadReq(req);
+
+ if (trans_fault == NoFault && !(req->flags & UNCACHEABLE)) {
+ // It's a valid address to cacheable space. Record key MemReq
+ // parameters so we can generate another one just like it for
+ // the timing access without calling translate() again (which
+ // might mess up the TLB).
+ effAddr = req->vaddr;
+ physEffAddr = req->paddr;
+ memReqFlags = req->flags;
+ } else {
+ // Bogus address (invalid or uncacheable space). Mark it by
+ // setting the eff_addr to InvalidAddr.
+ effAddr = physEffAddr = MemReq::inval_addr;
+ }
+
- MemReqPtr req = new MemReq(addr, cpuXC->getProxy(), size, flags);
+ if (traceData) {
+ traceData->setAddr(addr);
+ }
+}
+
+template <class Impl>
+void
+BaseDynInst<Impl>::writeHint(Addr addr, int size, unsigned flags)
+{
+ // Need to create a MemReq here so we can do a translation. This
+ // will casue a TLB miss trap if necessary... not sure whether
+ // that's the best thing to do or not. We don't really need the
+ // MemReq otherwise, since wh64 has no functional effect.
- fault = cpuXC->translateDataWriteReq(req);
++ MemReqPtr req = new MemReq(addr, thread->getXCProxy(), size, flags);
+ req->asid = asid;
+
- MemReqPtr req = new MemReq(src, cpuXC->getProxy(), 64);
++ fault = cpu->translateDataWriteReq(req);
+
+ if (fault == NoFault && !(req->flags & UNCACHEABLE)) {
+ // Record key MemReq parameters so we can generate another one
+ // just like it for the timing access without calling translate()
+ // again (which might mess up the TLB).
+ effAddr = req->vaddr;
+ physEffAddr = req->paddr;
+ memReqFlags = req->flags;
+ } else {
+ // ignore faults & accesses to uncacheable space... treat as no-op
+ effAddr = physEffAddr = MemReq::inval_addr;
+ }
+
+ storeSize = size;
+ storeData = 0;
+}
+
+/**
+ * @todo Need to find a way to get the cache block size here.
+ */
+template <class Impl>
+Fault
+BaseDynInst<Impl>::copySrcTranslate(Addr src)
+{
- Fault fault = cpuXC->translateDataReadReq(req);
++ MemReqPtr req = new MemReq(src, thread->getXCProxy(), 64);
+ req->asid = asid;
+
+ // translate to physical address
- cpuXC->copySrcAddr = src;
- cpuXC->copySrcPhysAddr = req->paddr;
++ Fault fault = cpu->translateDataReadReq(req);
+
+ if (fault == NoFault) {
- cpuXC->copySrcAddr = 0;
- cpuXC->copySrcPhysAddr = 0;
++ thread->copySrcAddr = src;
++ thread->copySrcPhysAddr = req->paddr;
+ } else {
- FunctionalMemory *mem = cpuXC->mem;
- assert(cpuXC->copySrcPhysAddr || cpuXC->misspeculating());
- MemReqPtr req = new MemReq(dest, cpuXC->getProxy(), 64);
++ thread->copySrcAddr = 0;
++ thread->copySrcPhysAddr = 0;
+ }
+ return fault;
+}
+
+/**
+ * @todo Need to find a way to get the cache block size here.
+ */
+template <class Impl>
+Fault
+BaseDynInst<Impl>::copy(Addr dest)
+{
+ uint8_t data[64];
- Fault fault = cpuXC->translateDataWriteReq(req);
++ FunctionalMemory *mem = thread->mem;
++ assert(thread->copySrcPhysAddr || thread->misspeculating());
++ MemReqPtr req = new MemReq(dest, thread->getXCProxy(), 64);
+ req->asid = asid;
+
+ // translate to physical address
- req->paddr = cpuXC->copySrcPhysAddr;
++ Fault fault = cpu->translateDataWriteReq(req);
+
+ if (fault == NoFault) {
+ Addr dest_addr = req->paddr;
+ // Need to read straight from memory since we have more than 8 bytes.
-
++ req->paddr = thread->copySrcPhysAddr;
+ mem->read(req, data);
+ req->paddr = dest_addr;
+ mem->write(req, data);
+ }
+ return fault;
+}
+
+template <class Impl>
+void
+BaseDynInst<Impl>::dump()
+{
+ cprintf("T%d : %#08d `", threadNumber, PC);
+ cout << staticInst->disassemble(PC);
+ cprintf("'\n");
+}
+
+template <class Impl>
+void
+BaseDynInst<Impl>::dump(std::string &outstring)
+{
+ std::ostringstream s;
+ s << "T" << threadNumber << " : 0x" << PC << " "
+ << staticInst->disassemble(PC);
+
+ outstring = s.str();
+}
+
- for (int i = 1; i < numSrcRegs(); ++i)
- {
+#if 0
+template <class Impl>
+Fault
+BaseDynInst<Impl>::mem_access(mem_cmd cmd, Addr addr, void *p, int nbytes)
+{
+ Fault fault;
+
+ // check alignments, even speculative this test should always pass
+ if ((nbytes & nbytes - 1) != 0 || (addr & nbytes - 1) != 0) {
+ for (int i = 0; i < nbytes; i++)
+ ((char *) p)[i] = 0;
+
+ // I added the following because according to the comment above,
+ // we should never get here. The comment lies
+#if 0
+ panic("unaligned access. Cycle = %n", curTick);
+#endif
+ return NoFault;
+ }
+
+ MemReqPtr req = new MemReq(addr, thread, nbytes);
+ switch(cmd) {
+ case Read:
+ fault = spec_mem->read(req, (uint8_t *)p);
+ break;
+
+ case Write:
+ fault = spec_mem->write(req, (uint8_t *)p);
+ if (fault != NoFault)
+ break;
+
+ specMemWrite = true;
+ storeSize = nbytes;
+ switch(nbytes) {
+ case sizeof(uint8_t):
+ *(uint8_t)&storeData = (uint8_t *)p;
+ break;
+ case sizeof(uint16_t):
+ *(uint16_t)&storeData = (uint16_t *)p;
+ break;
+ case sizeof(uint32_t):
+ *(uint32_t)&storeData = (uint32_t *)p;
+ break;
+ case sizeof(uint64_t):
+ *(uint64_t)&storeData = (uint64_t *)p;
+ break;
+ }
+ break;
+
+ default:
+ fault = genMachineCheckFault();
+ break;
+ }
+
+ trace_mem(fault, cmd, addr, p, nbytes);
+
+ return fault;
+}
+
+#endif
+
++template <class Impl>
++void
++BaseDynInst<Impl>::markSrcRegReady()
++{
++ if (++readyRegs == numSrcRegs()) {
++ canIssue = true;
++ }
++}
++
++template <class Impl>
++void
++BaseDynInst<Impl>::markSrcRegReady(RegIndex src_idx)
++{
++ ++readyRegs;
++
++ _readySrcRegIdx[src_idx] = true;
++
++ if (readyRegs == numSrcRegs()) {
++ canIssue = true;
++ }
++}
++
+template <class Impl>
+bool
+BaseDynInst<Impl>::eaSrcsReady()
+{
+ // For now I am assuming that src registers 1..n-1 are the ones that the
+ // EA calc depends on. (i.e. src reg 0 is the source of the data to be
+ // stored)
+
- #endif // __CPU_BASE_DYN_INST_CC__
++ for (int i = 1; i < numSrcRegs(); ++i) {
+ if (!_readySrcRegIdx[i])
+ return false;
+ }
+
+ return true;
+}
+
+// Forward declaration
+template class BaseDynInst<AlphaSimpleImpl>;
+
+template <>
+int
+BaseDynInst<AlphaSimpleImpl>::instcount = 0;
+
++// Forward declaration
++template class BaseDynInst<SimpleImpl>;
++
++template <>
++int
++BaseDynInst<SimpleImpl>::instcount = 0;
++
++// Forward declaration
++template class BaseDynInst<OzoneImpl>;
++
++template <>
++int
++BaseDynInst<OzoneImpl>::instcount = 0;
--- /dev/null
- #include <vector>
+/*
+ * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __CPU_BASE_DYN_INST_HH__
+#define __CPU_BASE_DYN_INST_HH__
+
++#include <list>
+#include <string>
- #include "cpu/o3/comm.hh"
+
+#include "base/fast_alloc.hh"
+#include "base/trace.hh"
+#include "config/full_system.hh"
+#include "cpu/exetrace.hh"
+#include "cpu/inst_seq.hh"
- #include "encumbered/cpu/full/bpred_update.hh"
+#include "cpu/static_inst.hh"
- /// Binary machine instruction type.
+#include "encumbered/cpu/full/op_class.hh"
++#include "mem/functional/memory_control.hh"
++#include "sim/system.hh"
++/*
++#include "encumbered/cpu/full/bpred_update.hh"
+#include "encumbered/cpu/full/spec_memory.hh"
+#include "encumbered/cpu/full/spec_state.hh"
+#include "encumbered/mem/functional/main.hh"
++*/
+
+/**
+ * @file
+ * Defines a dynamic instruction context.
+ */
+
+// Forward declaration.
+class StaticInstPtr;
+
+template <class Impl>
+class BaseDynInst : public FastAlloc, public RefCounted
+{
+ public:
+ // Typedef for the CPU.
+ typedef typename Impl::FullCPU FullCPU;
++ typedef typename FullCPU::ImplState ImplState;
+
- /// Logical register index type.
++ // Binary machine instruction type.
+ typedef TheISA::MachInst MachInst;
- /// Integer register index type.
++ // Extended machine instruction type
++ typedef TheISA::ExtMachInst ExtMachInst;
++ // Logical register index type.
+ typedef TheISA::RegIndex RegIndex;
- MaxInstSrcRegs = TheISA::MaxInstSrcRegs, //< Max source regs
- MaxInstDestRegs = TheISA::MaxInstDestRegs, //< Max dest regs
++ // Integer register index type.
+ typedef TheISA::IntReg IntReg;
+
++ // The DynInstPtr type.
++ typedef typename Impl::DynInstPtr DynInstPtr;
++
++ // The list of instructions iterator type.
++ typedef typename std::list<DynInstPtr>::iterator ListIt;
++
+ enum {
- /** The static inst used by this dyn inst. */
++ MaxInstSrcRegs = TheISA::MaxInstSrcRegs, /// Max source regs
++ MaxInstDestRegs = TheISA::MaxInstDestRegs, /// Max dest regs
+ };
+
- /** Is this instruction valid. */
- bool valid;
-
++ /** The StaticInst used by this BaseDynInst. */
+ StaticInstPtr staticInst;
+
+ ////////////////////////////////////////////
+ //
+ // INSTRUCTION EXECUTION
+ //
+ ////////////////////////////////////////////
++ /** InstRecord that tracks this instructions. */
+ Trace::InstRecord *traceData;
+
++ /**
++ * Does a read to a given address.
++ * @param addr The address to read.
++ * @param data The read's data is written into this parameter.
++ * @param flags The request's flags.
++ * @return Returns any fault due to the read.
++ */
+ template <class T>
+ Fault read(Addr addr, T &data, unsigned flags);
+
++ /**
++ * Does a write to a given address.
++ * @param data The data to be written.
++ * @param addr The address to write to.
++ * @param flags The request's flags.
++ * @param res The result of the write (for load locked/store conditionals).
++ * @return Returns any fault due to the write.
++ */
+ template <class T>
+ Fault write(T data, Addr addr, unsigned flags,
+ uint64_t *res);
+
+ void prefetch(Addr addr, unsigned flags);
+ void writeHint(Addr addr, int size, unsigned flags);
+ Fault copySrcTranslate(Addr src);
+ Fault copy(Addr dest);
+
+ /** @todo: Consider making this private. */
+ public:
- /** How many source registers are ready. */
- unsigned readyRegs;
+ /** The sequence number of the instruction. */
+ InstSeqNum seqNum;
+
- /** Pointer to the exec context. Will not exist in the final version. */
- CPUExecContext *cpuXC;
++ /** Is the instruction in the IQ */
++ bool iqEntry;
++
++ /** Is the instruction in the ROB */
++ bool robEntry;
++
++ /** Is the instruction in the LSQ */
++ bool lsqEntry;
+
+ /** Is the instruction completed. */
+ bool completed;
+
++ /** Is the instruction's result ready. */
++ bool resultReady;
++
+ /** Can this instruction issue. */
+ bool canIssue;
+
+ /** Has this instruction issued. */
+ bool issued;
+
+ /** Has this instruction executed (or made it through execute) yet. */
+ bool executed;
+
+ /** Can this instruction commit. */
+ bool canCommit;
+
++ /** Is this instruction committed. */
++ bool committed;
++
+ /** Is this instruction squashed. */
+ bool squashed;
+
+ /** Is this instruction squashed in the instruction queue. */
+ bool squashedInIQ;
+
++ /** Is this instruction squashed in the instruction queue. */
++ bool squashedInLSQ;
++
++ /** Is this instruction squashed in the instruction queue. */
++ bool squashedInROB;
++
+ /** Is this a recover instruction. */
+ bool recoverInst;
+
+ /** Is this a thread blocking instruction. */
+ bool blockingInst; /* this inst has called thread_block() */
+
+ /** Is this a thread syncrhonization instruction. */
+ bool threadsyncWait;
+
+ /** The thread this instruction is from. */
+ short threadNumber;
+
+ /** data address space ID, for loads & stores. */
+ short asid;
+
++ /** How many source registers are ready. */
++ unsigned readyRegs;
++
+ /** Pointer to the FullCPU object. */
+ FullCPU *cpu;
+
- /** Whether or not the source register is ready. Not sure this should be
- * here vs. the derived class.
++ /** Pointer to the exec context. */
++ ImplState *thread;
+
+ /** The kind of fault this instruction has generated. */
+ Fault fault;
+
++ /** The memory request. */
++ MemReqPtr req;
++
+ /** The effective virtual address (lds & stores only). */
+ Addr effAddr;
+
+ /** The effective physical address. */
+ Addr physEffAddr;
+
+ /** Effective virtual address for a copy source. */
+ Addr copySrcEffAddr;
+
+ /** Effective physical address for a copy source. */
+ Addr copySrcPhysEffAddr;
+
+ /** The memory request flags (from translation). */
+ unsigned memReqFlags;
+
+ /** The size of the data to be stored. */
+ int storeSize;
+
+ /** The data to be stored. */
+ IntReg storeData;
+
+ union Result {
+ uint64_t integer;
+ float fp;
+ double dbl;
+ };
+
+ /** The result of the instruction; assumes for now that there's only one
+ * destination register.
+ */
+ Result instResult;
+
+ /** PC of this instruction. */
+ Addr PC;
+
+ /** Next non-speculative PC. It is not filled in at fetch, but rather
+ * once the target of the branch is truly known (either decode or
+ * execute).
+ */
+ Addr nextPC;
+
+ /** Predicted next PC. */
+ Addr predPC;
+
+ /** Count of total number of dynamic instructions. */
+ static int instcount;
+
- /** BaseDynInst constructor given a binary instruction. */
- BaseDynInst(MachInst inst, Addr PC, Addr Pred_PC, InstSeqNum seq_num,
++#ifdef DEBUG
++ void dumpSNList();
++#endif
++
++ /** Whether or not the source register is ready.
++ * @todo: Not sure this should be here vs the derived class.
+ */
+ bool _readySrcRegIdx[MaxInstSrcRegs];
+
+ public:
- /** BaseDynInst constructor given a static inst pointer. */
++ /** BaseDynInst constructor given a binary instruction.
++ * @param inst The binary instruction.
++ * @param PC The PC of the instruction.
++ * @param pred_PC The predicted next PC.
++ * @param seq_num The sequence number of the instruction.
++ * @param cpu Pointer to the instruction's CPU.
++ */
++ BaseDynInst(ExtMachInst inst, Addr PC, Addr pred_PC, InstSeqNum seq_num,
+ FullCPU *cpu);
+
- trace_mem(Fault fault, // last fault
- MemCmd cmd, // last command
- Addr addr, // virtual address of access
- void *p, // memory accessed
- int nbytes); // access size
++ /** BaseDynInst constructor given a StaticInst pointer.
++ * @param _staticInst The StaticInst for this BaseDynInst.
++ */
+ BaseDynInst(StaticInstPtr &_staticInst);
+
+ /** BaseDynInst destructor. */
+ ~BaseDynInst();
+
+ private:
+ /** Function to initialize variables in the constructors. */
+ void initVars();
+
+ public:
++ /**
++ * @todo: Make this function work; currently it is a dummy function.
++ * @param fault Last fault.
++ * @param cmd Last command.
++ * @param addr Virtual address of access.
++ * @param p Memory accessed.
++ * @param nbytes Access size.
++ */
+ void
- bool predTaken() {
- return( predPC != (PC + sizeof(MachInst) ) );
- }
++ trace_mem(Fault fault,
++ MemCmd cmd,
++ Addr addr,
++ void *p,
++ int nbytes);
+
+ /** Dumps out contents of this BaseDynInst. */
+ void dump();
+
+ /** Dumps out contents of this BaseDynInst into given string. */
+ void dump(std::string &outstring);
+
+ /** Returns the fault type. */
+ Fault getFault() { return fault; }
+
+ /** Checks whether or not this instruction has had its branch target
+ * calculated yet. For now it is not utilized and is hacked to be
+ * always false.
++ * @todo: Actually use this instruction.
+ */
+ bool doneTargCalc() { return false; }
+
+ /** Returns the next PC. This could be the speculative next PC if it is
+ * called prior to the actual branch target being calculated.
+ */
+ Addr readNextPC() { return nextPC; }
+
+ /** Set the predicted target of this current instruction. */
+ void setPredTarg(Addr predicted_PC) { predPC = predicted_PC; }
+
+ /** Returns the predicted target of the branch. */
+ Addr readPredTarg() { return predPC; }
+
+ /** Returns whether the instruction was predicted taken or not. */
- bool mispredicted() { return (predPC != nextPC); }
++ bool predTaken() { return predPC != (PC + sizeof(MachInst)); }
+
+ /** Returns whether the instruction mispredicted. */
- /** Number of source registers. */
- int8_t numSrcRegs() const { return staticInst->numSrcRegs(); }
++ bool mispredicted() { return predPC != nextPC; }
+
+ //
+ // Instruction types. Forward checks to StaticInst object.
+ //
+ bool isNop() const { return staticInst->isNop(); }
+ bool isMemRef() const { return staticInst->isMemRef(); }
+ bool isLoad() const { return staticInst->isLoad(); }
+ bool isStore() const { return staticInst->isStore(); }
++ bool isStoreConditional() const
++ { return staticInst->isStoreConditional(); }
+ bool isInstPrefetch() const { return staticInst->isInstPrefetch(); }
+ bool isDataPrefetch() const { return staticInst->isDataPrefetch(); }
+ bool isCopy() const { return staticInst->isCopy(); }
+ bool isInteger() const { return staticInst->isInteger(); }
+ bool isFloating() const { return staticInst->isFloating(); }
+ bool isControl() const { return staticInst->isControl(); }
+ bool isCall() const { return staticInst->isCall(); }
+ bool isReturn() const { return staticInst->isReturn(); }
+ bool isDirectCtrl() const { return staticInst->isDirectCtrl(); }
+ bool isIndirectCtrl() const { return staticInst->isIndirectCtrl(); }
+ bool isCondCtrl() const { return staticInst->isCondCtrl(); }
+ bool isUncondCtrl() const { return staticInst->isUncondCtrl(); }
+ bool isThreadSync() const { return staticInst->isThreadSync(); }
+ bool isSerializing() const { return staticInst->isSerializing(); }
++ bool isSerializeBefore() const
++ { return staticInst->isSerializeBefore() || serializeBefore; }
++ bool isSerializeAfter() const
++ { return staticInst->isSerializeAfter() || serializeAfter; }
+ bool isMemBarrier() const { return staticInst->isMemBarrier(); }
+ bool isWriteBarrier() const { return staticInst->isWriteBarrier(); }
+ bool isNonSpeculative() const { return staticInst->isNonSpeculative(); }
++ bool isQuiesce() const { return staticInst->isQuiesce(); }
++ bool isIprAccess() const { return staticInst->isIprAccess(); }
++ bool isUnverifiable() const { return staticInst->isUnverifiable(); }
++
++ /** Temporarily sets this instruction as a serialize before instruction. */
++ void setSerializeBefore() { serializeBefore = true; }
++
++ /** Clears the serializeBefore part of this instruction. */
++ void clearSerializeBefore() { serializeBefore = false; }
++
++ /** Checks if this serializeBefore is only temporarily set. */
++ bool isTempSerializeBefore() { return serializeBefore; }
++
++ /** Tracks if instruction has been externally set as serializeBefore. */
++ bool serializeBefore;
++
++ /** Temporarily sets this instruction as a serialize after instruction. */
++ void setSerializeAfter() { serializeAfter = true; }
++
++ /** Clears the serializeAfter part of this instruction.*/
++ void clearSerializeAfter() { serializeAfter = false; }
++
++ /** Checks if this serializeAfter is only temporarily set. */
++ bool isTempSerializeAfter() { return serializeAfter; }
++
++ /** Tracks if instruction has been externally set as serializeAfter. */
++ bool serializeAfter;
++
++ /** Checks if the serialization part of this instruction has been
++ * handled. This does not apply to the temporary serializing
++ * state; it only applies to this instruction's own permanent
++ * serializing state.
++ */
++ bool isSerializeHandled() { return serializeHandled; }
++
++ /** Sets the serialization part of this instruction as handled. */
++ void setSerializeHandled() { serializeHandled = true; }
++
++ /** Whether or not the serialization of this instruction has been handled. */
++ bool serializeHandled;
+
+ /** Returns the opclass of this instruction. */
+ OpClass opClass() const { return staticInst->opClass(); }
+
+ /** Returns the branch target address. */
+ Addr branchTarget() const { return staticInst->branchTarget(PC); }
+
- /** Number of destination registers. */
++ /** Returns the number of source registers. */
++ int8_t numSrcRegs() const { return staticInst->numSrcRegs(); }
+
- RegIndex destRegIdx(int i) const
- {
- return staticInst->destRegIdx(i);
- }
++ /** Returns the number of destination registers. */
+ int8_t numDestRegs() const { return staticInst->numDestRegs(); }
+
+ // the following are used to track physical register usage
+ // for machines with separate int & FP reg files
+ int8_t numFPDestRegs() const { return staticInst->numFPDestRegs(); }
+ int8_t numIntDestRegs() const { return staticInst->numIntDestRegs(); }
+
+ /** Returns the logical register index of the i'th destination register. */
- RegIndex srcRegIdx(int i) const
- {
- return staticInst->srcRegIdx(i);
- }
++ RegIndex destRegIdx(int i) const { return staticInst->destRegIdx(i); }
+
+ /** Returns the logical register index of the i'th source register. */
- //Push to .cc file.
- /** Records that one of the source registers is ready. */
- void markSrcRegReady()
++ RegIndex srcRegIdx(int i) const { return staticInst->srcRegIdx(i); }
+
+ /** Returns the result of an integer instruction. */
+ uint64_t readIntResult() { return instResult.integer; }
+
+ /** Returns the result of a floating point instruction. */
+ float readFloatResult() { return instResult.fp; }
+
+ /** Returns the result of a floating point (double) instruction. */
+ double readDoubleResult() { return instResult.dbl; }
+
- ++readyRegs;
- if(readyRegs == numSrcRegs()) {
- canIssue = true;
- }
++ void setIntReg(const StaticInst *si, int idx, uint64_t val)
+ {
- /** Marks a specific register as ready.
- * @todo: Move this to .cc file.
- */
- void markSrcRegReady(RegIndex src_idx)
++ instResult.integer = val;
+ }
+
- ++readyRegs;
++ void setFloatRegSingle(const StaticInst *si, int idx, float val)
+ {
- _readySrcRegIdx[src_idx] = 1;
++ instResult.fp = val;
++ }
+
- if(readyRegs == numSrcRegs()) {
- canIssue = true;
- }
++ void setFloatRegDouble(const StaticInst *si, int idx, double val)
++ {
++ instResult.dbl = val;
++ }
+
- /** Returns whethe or not this instruction is completed. */
++ void setFloatRegInt(const StaticInst *si, int idx, uint64_t val)
++ {
++ instResult.integer = val;
+ }
+
++ /** Records that one of the source registers is ready. */
++ void markSrcRegReady();
++
++ /** Marks a specific register as ready. */
++ void markSrcRegReady(RegIndex src_idx);
++
+ /** Returns if a source register is ready. */
+ bool isReadySrcRegIdx(int idx) const
+ {
+ return this->_readySrcRegIdx[idx];
+ }
+
+ /** Sets this instruction as completed. */
+ void setCompleted() { completed = true; }
+
- void setSquashedInIQ() { squashedInIQ = true; }
++ /** Returns whether or not this instruction is completed. */
+ bool isCompleted() const { return completed; }
+
++ void setResultReady() { resultReady = true; }
++
++ bool isResultReady() const { return resultReady; }
++
+ /** Sets this instruction as ready to issue. */
+ void setCanIssue() { canIssue = true; }
+
+ /** Returns whether or not this instruction is ready to issue. */
+ bool readyToIssue() const { return canIssue; }
+
+ /** Sets this instruction as issued from the IQ. */
+ void setIssued() { issued = true; }
+
+ /** Returns whether or not this instruction has issued. */
+ bool isIssued() const { return issued; }
+
+ /** Sets this instruction as executed. */
+ void setExecuted() { executed = true; }
+
+ /** Returns whether or not this instruction has executed. */
+ bool isExecuted() const { return executed; }
+
+ /** Sets this instruction as ready to commit. */
+ void setCanCommit() { canCommit = true; }
+
+ /** Clears this instruction as being ready to commit. */
+ void clearCanCommit() { canCommit = false; }
+
+ /** Returns whether or not this instruction is ready to commit. */
+ bool readyToCommit() const { return canCommit; }
+
++ /** Sets this instruction as committed. */
++ void setCommitted() { committed = true; }
++
++ /** Returns whether or not this instruction is committed. */
++ bool isCommitted() const { return committed; }
++
+ /** Sets this instruction as squashed. */
+ void setSquashed() { squashed = true; }
+
+ /** Returns whether or not this instruction is squashed. */
+ bool isSquashed() const { return squashed; }
+
++ //Instruction Queue Entry
++ //-----------------------
++ /** Sets this instruction as a entry the IQ. */
++ void setInIQ() { iqEntry = true; }
++
++ /** Sets this instruction as a entry the IQ. */
++ void removeInIQ() { iqEntry = false; }
++
+ /** Sets this instruction as squashed in the IQ. */
- void setNextPC(uint64_t val) { nextPC = val; }
++ void setSquashedInIQ() { squashedInIQ = true; squashed = true;}
+
+ /** Returns whether or not this instruction is squashed in the IQ. */
+ bool isSquashedInIQ() const { return squashedInIQ; }
+
++ /** Returns whether or not this instruction has issued. */
++ bool isInIQ() const { return iqEntry; }
++
++
++ //Load / Store Queue Functions
++ //-----------------------
++ /** Sets this instruction as a entry the LSQ. */
++ void setInLSQ() { lsqEntry = true; }
++
++ /** Sets this instruction as a entry the LSQ. */
++ void removeInLSQ() { lsqEntry = false; }
++
++ /** Sets this instruction as squashed in the LSQ. */
++ void setSquashedInLSQ() { squashedInLSQ = true;}
++
++ /** Returns whether or not this instruction is squashed in the LSQ. */
++ bool isSquashedInLSQ() const { return squashedInLSQ; }
++
++ /** Returns whether or not this instruction is in the LSQ. */
++ bool isInLSQ() const { return lsqEntry; }
++
++
++ //Reorder Buffer Functions
++ //-----------------------
++ /** Sets this instruction as a entry the ROB. */
++ void setInROB() { robEntry = true; }
++
++ /** Sets this instruction as a entry the ROB. */
++ void removeInROB() { robEntry = false; }
++
++ /** Sets this instruction as squashed in the ROB. */
++ void setSquashedInROB() { squashedInROB = true; }
++
++ /** Returns whether or not this instruction is squashed in the ROB. */
++ bool isSquashedInROB() const { return squashedInROB; }
++
++ /** Returns whether or not this instruction is in the ROB. */
++ bool isInROB() const { return robEntry; }
++
+ /** Read the PC of this instruction. */
+ const Addr readPC() const { return PC; }
+
+ /** Set the next PC of this instruction (its actual target). */
- ExecContext *xcBase() { return cpuXC->getProxy(); }
++ void setNextPC(uint64_t val)
++ {
++ nextPC = val;
++// instResult.integer = val;
++ }
++
++ void setASID(short addr_space_id) { asid = addr_space_id; }
++
++ void setThread(unsigned tid) { threadNumber = tid; }
++
++ void setState(ImplState *state) { thread = state; }
+
+ /** Returns the exec context.
+ * @todo: Remove this once the ExecContext is no longer used.
+ */
- const Addr &getEA() const { return instEffAddr; }
++ ExecContext *xcBase() { return thread->getXCProxy(); }
+
+ private:
+ /** Instruction effective address.
+ * @todo: Consider if this is necessary or not.
+ */
+ Addr instEffAddr;
++
+ /** Whether or not the effective address calculation is completed.
+ * @todo: Consider if this is necessary or not.
+ */
+ bool eaCalcDone;
+
+ public:
+ /** Sets the effective address. */
+ void setEA(Addr &ea) { instEffAddr = ea; eaCalcDone = true; }
+
+ /** Returns the effective address. */
- MemReqPtr req = new MemReq(addr, cpuXC->getProxy(), sizeof(T), flags);
++ const Addr &getEA() const { return req->vaddr; }
+
+ /** Returns whether or not the eff. addr. calculation has been completed. */
+ bool doneEACalc() { return eaCalcDone; }
+
+ /** Returns whether or not the eff. addr. source registers are ready. */
+ bool eaSrcsReady();
+
++ /** Whether or not the memory operation is done. */
++ bool memOpDone;
++
+ public:
+ /** Load queue index. */
+ int16_t lqIdx;
+
+ /** Store queue index. */
+ int16_t sqIdx;
++
++ bool reachedCommit;
++
++ /** Iterator pointing to this BaseDynInst in the list of all insts. */
++ ListIt instListIt;
++
++ /** Returns iterator to this instruction in the list of all insts. */
++ ListIt &getInstListIt() { return instListIt; }
++
++ /** Sets iterator for this instruction in the list of all insts. */
++ void setInstListIt(ListIt _instListIt) { instListIt = _instListIt; }
+};
+
+template<class Impl>
+template<class T>
+inline Fault
+BaseDynInst<Impl>::read(Addr addr, T &data, unsigned flags)
+{
- // Record key MemReq parameters so we can generate another one
- // just like it for the timing access without calling translate()
- // again (which might mess up the TLB).
- // Do I ever really need this? -KTL 3/05
++ if (executed) {
++ fault = cpu->read(req, data, lqIdx);
++ return fault;
++ }
++
++ req = new MemReq(addr, thread->getXCProxy(), sizeof(T), flags);
+ req->asid = asid;
++ req->thread_num = threadNumber;
++ req->pc = this->PC;
++
++ if ((req->vaddr & (TheISA::VMPageSize - 1)) + req->size >
++ TheISA::VMPageSize) {
++ return TheISA::genAlignmentFault();
++ }
+
+ fault = cpu->translateDataReadReq(req);
+
- /**
- * @todo
- * Replace the disjoint functional memory with a unified one and remove
- * this hack.
- */
- #if !FULL_SYSTEM
- req->paddr = req->vaddr;
- #endif
-
+ effAddr = req->vaddr;
+ physEffAddr = req->paddr;
+ memReqFlags = req->flags;
+
- MemReqPtr req = new MemReq(addr, cpuXC->getProxy(), sizeof(T), flags);
+ if (fault == NoFault) {
++#if FULL_SYSTEM
++ if (cpu->system->memctrl->badaddr(physEffAddr)) {
++ fault = TheISA::genMachineCheckFault();
++ data = (T)-1;
++ this->setExecuted();
++ } else {
++ fault = cpu->read(req, data, lqIdx);
++ }
++#else
+ fault = cpu->read(req, data, lqIdx);
++#endif
+ } else {
+ // Return a fixed value to keep simulation deterministic even
+ // along misspeculated paths.
+ data = (T)-1;
++
++ // Commit will have to clean up whatever happened. Set this
++ // instruction as executed.
++ this->setExecuted();
+ }
+
+ if (traceData) {
+ traceData->setAddr(addr);
+ traceData->setData(data);
+ }
+
+ return fault;
+}
+
+template<class Impl>
+template<class T>
+inline Fault
+BaseDynInst<Impl>::write(T data, Addr addr, unsigned flags, uint64_t *res)
+{
+ if (traceData) {
+ traceData->setAddr(addr);
+ traceData->setData(data);
+ }
+
- // Record key MemReq parameters so we can generate another one
- // just like it for the timing access without calling translate()
- // again (which might mess up the TLB).
++ req = new MemReq(addr, thread->getXCProxy(), sizeof(T), flags);
+
+ req->asid = asid;
++ req->thread_num = threadNumber;
++ req->pc = this->PC;
++
++ if ((req->vaddr & (TheISA::VMPageSize - 1)) + req->size >
++ TheISA::VMPageSize) {
++ return TheISA::genAlignmentFault();
++ }
+
+ fault = cpu->translateDataWriteReq(req);
+
- /**
- * @todo
- * Replace the disjoint functional memory with a unified one and remove
- * this hack.
- */
- #if !FULL_SYSTEM
- req->paddr = req->vaddr;
- #endif
-
+ effAddr = req->vaddr;
+ physEffAddr = req->paddr;
+ memReqFlags = req->flags;
+
+ if (fault == NoFault) {
++#if FULL_SYSTEM
++ if (cpu->system->memctrl->badaddr(physEffAddr)) {
++ fault = TheISA::genMachineCheckFault();
++ } else {
++ fault = cpu->write(req, data, sqIdx);
++ }
++#else
+ fault = cpu->write(req, data, sqIdx);
++#endif
+ }
+
+ if (res) {
+ // always return some result to keep misspeculated paths
+ // (which will ignore faults) deterministic
+ *res = (fault == NoFault) ? req->result : 0;
+ }
+
+ return fault;
+}
+
+#endif // __CPU_BASE_DYN_INST_HH__
--- /dev/null
- AlphaITB *_itb, AlphaDTB *_dtb)
+/*
+ * Copyright (c) 2001-2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <string>
+
+#include "arch/isa_traits.hh"
+#include "cpu/base.hh"
+#include "cpu/cpu_exec_context.hh"
+#include "cpu/exec_context.hh"
+
+#if FULL_SYSTEM
+#include "base/callback.hh"
+#include "base/cprintf.hh"
+#include "base/output.hh"
+#include "base/trace.hh"
+#include "cpu/profile.hh"
++#include "cpu/quiesce_event.hh"
+#include "kern/kernel_stats.hh"
+#include "sim/serialize.hh"
+#include "sim/sim_exit.hh"
+#include "arch/stacktrace.hh"
+#else
+#include "sim/process.hh"
+#include "sim/system.hh"
+#include "mem/translating_port.hh"
+#endif
+
+using namespace std;
+
+// constructor
+#if FULL_SYSTEM
+CPUExecContext::CPUExecContext(BaseCPU *_cpu, int _thread_num, System *_sys,
- dtb(_dtb), profile(NULL), quiesceEvent(this), func_exe_inst(0),
- storeCondFailures(0)
++ AlphaITB *_itb, AlphaDTB *_dtb,
++ bool use_kernel_stats)
+ : _status(ExecContext::Unallocated), cpu(_cpu), thread_num(_thread_num),
+ cpu_id(-1), lastActivate(0), lastSuspend(0), system(_sys), itb(_itb),
- CPUExecContext::EndQuiesceEvent::EndQuiesceEvent(CPUExecContext *_cpuXC)
- : Event(&mainEventQueue), cpuXC(_cpuXC)
- {
- }
-
- void
- CPUExecContext::EndQuiesceEvent::process()
- {
- cpuXC->activate();
- }
-
- const char*
- CPUExecContext::EndQuiesceEvent::description()
- {
- return "End Quiesce Event.";
- }
-
++ dtb(_dtb), profile(NULL), func_exe_inst(0), storeCondFailures(0)
+
+{
+ proxy = new ProxyExecContext<CPUExecContext>(this);
+
++ quiesceEvent = new EndQuiesceEvent(proxy);
++
+ regs.clear();
+
+ if (cpu->params->profile) {
+ profile = new FunctionProfile(system->kernelSymtab);
+ Callback *cb =
+ new MakeCallback<CPUExecContext,
+ &CPUExecContext::dumpFuncProfile>(this);
+ registerExitCallback(cb);
+ }
+
+ // let's fill with a dummy node for now so we don't get a segfault
+ // on the first cycle when there's no node available.
+ static ProfileNode dummyNode;
+ profileNode = &dummyNode;
+ profilePC = 3;
+
++
++ if (use_kernel_stats) {
++ kernelStats = new Kernel::Statistics(system);
++ } else {
++ kernelStats = NULL;
++ }
+ Port *mem_port;
+ physPort = new FunctionalPort(csprintf("%s-%d-funcport",
+ cpu->name(), thread_num));
+ mem_port = system->physmem->getPort("functional");
+ mem_port->setPeer(physPort);
+ physPort->setPeer(mem_port);
+
+ virtPort = new VirtualPort(csprintf("%s-%d-vport",
+ cpu->name(), thread_num));
+ mem_port = system->physmem->getPort("functional");
+ mem_port->setPeer(virtPort);
+ virtPort->setPeer(mem_port);
+}
+#else
+CPUExecContext::CPUExecContext(BaseCPU *_cpu, int _thread_num,
+ Process *_process, int _asid, MemObject* memobj)
+ : _status(ExecContext::Unallocated),
+ cpu(_cpu), thread_num(_thread_num), cpu_id(-1), lastActivate(0),
+ lastSuspend(0), process(_process), asid(_asid),
+ func_exe_inst(0), storeCondFailures(0)
+{
+ /* Use this port to for syscall emulation writes to memory. */
+ Port *mem_port;
+ port = new TranslatingPort(csprintf("%s-%d-funcport",
+ cpu->name(), thread_num),
+ process->pTable, false);
+ mem_port = memobj->getPort("functional");
+ mem_port->setPeer(port);
+ port->setPeer(mem_port);
+
+ regs.clear();
+ proxy = new ProxyExecContext<CPUExecContext>(this);
+}
+
+CPUExecContext::CPUExecContext(RegFile *regFile)
+ : cpu(NULL), thread_num(-1), process(NULL), asid(-1),
+ func_exe_inst(0), storeCondFailures(0)
+{
+ regs = *regFile;
+ proxy = new ProxyExecContext<CPUExecContext>(this);
+}
+
+#endif
+
+CPUExecContext::~CPUExecContext()
+{
+ delete proxy;
+}
+
+#if FULL_SYSTEM
+void
+CPUExecContext::dumpFuncProfile()
+{
+ std::ostream *os = simout.create(csprintf("profile.%s.dat", cpu->name()));
+ profile->dump(proxy, *os);
+}
+
- if (quiesceEvent.scheduled())
- quiesceEndTick = quiesceEvent.when();
+void
+CPUExecContext::profileClear()
+{
+ if (profile)
+ profile->clear();
+}
+
+void
+CPUExecContext::profileSample()
+{
+ if (profile)
+ profile->sample(profileNode, profilePC);
+}
+
+#endif
+
+void
+CPUExecContext::takeOverFrom(ExecContext *oldContext)
+{
+ // some things should already be set up
+#if FULL_SYSTEM
+ assert(system == oldContext->getSystemPtr());
+#else
+ assert(process == oldContext->getProcessPtr());
+#endif
+
+ // copy over functional state
+ _status = oldContext->status();
+ copyArchRegs(oldContext);
+ cpu_id = oldContext->readCpuId();
+#if !FULL_SYSTEM
+ func_exe_inst = oldContext->readFuncExeInst();
++#else
++ EndQuiesceEvent *quiesce = oldContext->getQuiesceEvent();
++ if (quiesce) {
++ // Point the quiesce event's XC at this XC so that it wakes up
++ // the proper CPU.
++ quiesce->xc = proxy;
++ }
++ if (quiesceEvent) {
++ quiesceEvent->xc = proxy;
++ }
+#endif
+
+ storeCondFailures = 0;
+
+ oldContext->setStatus(ExecContext::Unallocated);
+}
+
+void
+CPUExecContext::serialize(ostream &os)
+{
+ SERIALIZE_ENUM(_status);
+ regs.serialize(os);
+ // thread_num and cpu_id are deterministic from the config
+ SERIALIZE_SCALAR(func_exe_inst);
+ SERIALIZE_SCALAR(inst);
+
+#if FULL_SYSTEM
+ Tick quiesceEndTick = 0;
-
++ if (quiesceEvent->scheduled())
++ quiesceEndTick = quiesceEvent->when();
+ SERIALIZE_SCALAR(quiesceEndTick);
- quiesceEvent.schedule(quiesceEndTick);
++ if (kernelStats)
++ kernelStats->serialize(os);
+#endif
+}
+
+
+void
+CPUExecContext::unserialize(Checkpoint *cp, const std::string §ion)
+{
+ UNSERIALIZE_ENUM(_status);
+ regs.unserialize(cp, section);
+ // thread_num and cpu_id are deterministic from the config
+ UNSERIALIZE_SCALAR(func_exe_inst);
+ UNSERIALIZE_SCALAR(inst);
+
+#if FULL_SYSTEM
+ Tick quiesceEndTick;
+ UNSERIALIZE_SCALAR(quiesceEndTick);
+ if (quiesceEndTick)
++ quiesceEvent->schedule(quiesceEndTick);
++ if (kernelStats)
++ kernelStats->unserialize(cp, section);
+#endif
+}
+
+
+void
+CPUExecContext::activate(int delay)
+{
+ if (status() == ExecContext::Active)
+ return;
+
+ lastActivate = curTick;
+
++ if (status() == ExecContext::Unallocated) {
++ cpu->activateWhenReady(thread_num);
++ return;
++ }
++
+ _status = ExecContext::Active;
++
++ // status() == Suspended
+ cpu->activateContext(thread_num, delay);
+}
+
+void
+CPUExecContext::suspend()
+{
+ if (status() == ExecContext::Suspended)
+ return;
+
+ lastActivate = curTick;
+ lastSuspend = curTick;
+/*
+#if FULL_SYSTEM
+ // Don't change the status from active if there are pending interrupts
+ if (cpu->check_interrupts()) {
+ assert(status() == ExecContext::Active);
+ return;
+ }
+#endif
+*/
+ _status = ExecContext::Suspended;
+ cpu->suspendContext(thread_num);
+}
+
+void
+CPUExecContext::deallocate()
+{
+ if (status() == ExecContext::Unallocated)
+ return;
+
+ _status = ExecContext::Unallocated;
+ cpu->deallocateContext(thread_num);
+}
+
+void
+CPUExecContext::halt()
+{
+ if (status() == ExecContext::Halted)
+ return;
+
+ _status = ExecContext::Halted;
+ cpu->haltContext(thread_num);
+}
+
+
+void
+CPUExecContext::regStats(const string &name)
+{
++#if FULL_SYSTEM
++ if (kernelStats)
++ kernelStats->regStats(name + ".kern");
++#endif
+}
+
+void
+CPUExecContext::copyArchRegs(ExecContext *xc)
+{
+ TheISA::copyRegs(xc, proxy);
+}
+
+#if FULL_SYSTEM
+VirtualPort*
+CPUExecContext::getVirtPort(ExecContext *xc)
+{
+ if (!xc)
+ return virtPort;
+
+ VirtualPort *vp;
+ Port *mem_port;
+
+ vp = new VirtualPort("xc-vport", xc);
+ mem_port = system->physmem->getPort("functional");
+ mem_port->setPeer(vp);
+ vp->setPeer(mem_port);
+ return vp;
+}
+
+void
+CPUExecContext::delVirtPort(VirtualPort *vp)
+{
+// assert(!vp->nullExecContext());
+ delete vp->getPeer();
+ delete vp;
+}
+
+
+#endif
+
--- /dev/null
- /** Event for timing out quiesce instruction */
- struct EndQuiesceEvent : public Event
- {
- /** A pointer to the execution context that is quiesced */
- CPUExecContext *cpuXC;
-
- EndQuiesceEvent(CPUExecContext *_cpuXC);
-
- /** Event process to occur at interrupt*/
- virtual void process();
+/*
+ * Copyright (c) 2001-2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __CPU_CPU_EXEC_CONTEXT_HH__
+#define __CPU_CPU_EXEC_CONTEXT_HH__
+
+#include "arch/isa_traits.hh"
+#include "config/full_system.hh"
+#include "cpu/exec_context.hh"
+#include "mem/physical.hh"
+#include "mem/request.hh"
+#include "sim/byteswap.hh"
+#include "sim/eventq.hh"
+#include "sim/host.hh"
+#include "sim/serialize.hh"
+
+class BaseCPU;
+
+#if FULL_SYSTEM
+
+#include "sim/system.hh"
+#include "arch/tlb.hh"
+
+class FunctionProfile;
+class ProfileNode;
+class FunctionalPort;
+class PhysicalPort;
+
+
++namespace Kernel {
++ class Statistics;
++};
++
+#else // !FULL_SYSTEM
+
+#include "sim/process.hh"
+#include "mem/page_table.hh"
+class TranslatingPort;
+
+
+#endif // FULL_SYSTEM
+
+//
+// The CPUExecContext object represents a functional context for
+// instruction execution. It incorporates everything required for
+// architecture-level functional simulation of a single thread.
+//
+
+class CPUExecContext
+{
+ protected:
+ typedef TheISA::RegFile RegFile;
+ typedef TheISA::MachInst MachInst;
+ typedef TheISA::MiscRegFile MiscRegFile;
+ typedef TheISA::MiscReg MiscReg;
+ typedef TheISA::FloatReg FloatReg;
+ typedef TheISA::FloatRegBits FloatRegBits;
+ public:
+ typedef ExecContext::Status Status;
+
+ private:
+ Status _status;
+
+ public:
+ Status status() const { return _status; }
+
+ void setStatus(Status newStatus) { _status = newStatus; }
+
+ /// Set the status to Active. Optional delay indicates number of
+ /// cycles to wait before beginning execution.
+ void activate(int delay = 1);
+
+ /// Set the status to Suspended.
+ void suspend();
+
+ /// Set the status to Unallocated.
+ void deallocate();
+
+ /// Set the status to Halted.
+ void halt();
+
+ protected:
+ RegFile regs; // correct-path register context
+
+ public:
+ // pointer to CPU associated with this context
+ BaseCPU *cpu;
+
+ ProxyExecContext<CPUExecContext> *proxy;
+
+ // Current instruction
+ MachInst inst;
+
+ // Index of hardware thread context on the CPU that this represents.
+ int thread_num;
+
+ // ID of this context w.r.t. the System or Process object to which
+ // it belongs. For full-system mode, this is the system CPU ID.
+ int cpu_id;
+
+ Tick lastActivate;
+ Tick lastSuspend;
+
+ System *system;
+
+
+#if FULL_SYSTEM
+ AlphaITB *itb;
+ AlphaDTB *dtb;
+
+ /** A functional port outgoing only for functional accesses to physical
+ * addresses.*/
+ FunctionalPort *physPort;
+
+ /** A functional port, outgoing only, for functional accesse to virtual
+ * addresses. That doen't require execution context information */
+ VirtualPort *virtPort;
+
+ FunctionProfile *profile;
+ ProfileNode *profileNode;
+ Addr profilePC;
+ void dumpFuncProfile();
+
- /** Event description */
- virtual const char *description();
- };
- EndQuiesceEvent quiesceEvent;
-
- Event *getQuiesceEvent() { return &quiesceEvent; }
++ EndQuiesceEvent *quiesceEvent;
+
- AlphaITB *_itb, AlphaDTB *_dtb);
++ EndQuiesceEvent *getQuiesceEvent() { return quiesceEvent; }
+
+ Tick readLastActivate() { return lastActivate; }
+
+ Tick readLastSuspend() { return lastSuspend; }
+
+ void profileClear();
+
+ void profileSample();
+
++ Kernel::Statistics *getKernelStats() { return kernelStats; }
++
++ Kernel::Statistics *kernelStats;
+#else
+ /// Port that syscalls can use to access memory (provides translation step).
+ TranslatingPort *port;
+
+ Process *process;
+
+ // Address space ID. Note that this is used for TIMING cache
+ // simulation only; all functional memory accesses should use
+ // one of the FunctionalMemory pointers above.
+ short asid;
+
+#endif
+
+ /**
+ * Temporary storage to pass the source address from copy_load to
+ * copy_store.
+ * @todo Remove this temporary when we have a better way to do it.
+ */
+ Addr copySrcAddr;
+ /**
+ * Temp storage for the physical source address of a copy.
+ * @todo Remove this temporary when we have a better way to do it.
+ */
+ Addr copySrcPhysAddr;
+
+
+ /*
+ * number of executed instructions, for matching with syscall trace
+ * points in EIO files.
+ */
+ Counter func_exe_inst;
+
+ //
+ // Count failed store conditionals so we can warn of apparent
+ // application deadlock situations.
+ unsigned storeCondFailures;
+
+ // constructor: initialize context from given process structure
+#if FULL_SYSTEM
+ CPUExecContext(BaseCPU *_cpu, int _thread_num, System *_system,
++ AlphaITB *_itb, AlphaDTB *_dtb,
++ bool use_kernel_stats = true);
+#else
+ CPUExecContext(BaseCPU *_cpu, int _thread_num, Process *_process, int _asid,
+ MemObject *memobj);
+ // Constructor to use XC to pass reg file around. Not used for anything
+ // else.
+ CPUExecContext(RegFile *regFile);
+#endif
+ virtual ~CPUExecContext();
+
+ virtual void takeOverFrom(ExecContext *oldContext);
+
+ void regStats(const std::string &name);
+
+ void serialize(std::ostream &os);
+ void unserialize(Checkpoint *cp, const std::string §ion);
+
+ BaseCPU *getCpuPtr() { return cpu; }
+
+ ExecContext *getProxy() { return proxy; }
+
+ int getThreadNum() { return thread_num; }
+
+#if FULL_SYSTEM
+ System *getSystemPtr() { return system; }
+
+ AlphaITB *getITBPtr() { return itb; }
+
+ AlphaDTB *getDTBPtr() { return dtb; }
+
+ int getInstAsid() { return regs.instAsid(); }
+ int getDataAsid() { return regs.dataAsid(); }
+
+ Fault translateInstReq(RequestPtr &req)
+ {
+ return itb->translate(req, proxy);
+ }
+
+ Fault translateDataReadReq(RequestPtr &req)
+ {
+ return dtb->translate(req, proxy, false);
+ }
+
+ Fault translateDataWriteReq(RequestPtr &req)
+ {
+ return dtb->translate(req, proxy, true);
+ }
+
+ FunctionalPort *getPhysPort() { return physPort; }
+
+ /** Return a virtual port. If no exec context is specified then a static
+ * port is returned. Otherwise a port is created and returned. It must be
+ * deleted by deleteVirtPort(). */
+ VirtualPort *getVirtPort(ExecContext *xc);
+
+ void delVirtPort(VirtualPort *vp);
+
+#else
+ TranslatingPort *getMemPort() { return port; }
+
+ Process *getProcessPtr() { return process; }
+
+ int getInstAsid() { return asid; }
+ int getDataAsid() { return asid; }
+
+ Fault translateInstReq(RequestPtr &req)
+ {
+ return process->pTable->translate(req);
+ }
+
+ Fault translateDataReadReq(RequestPtr &req)
+ {
+ return process->pTable->translate(req);
+ }
+
+ Fault translateDataWriteReq(RequestPtr &req)
+ {
+ return process->pTable->translate(req);
+ }
+
+#endif
+
+/*
+ template <class T>
+ Fault read(RequestPtr &req, T &data)
+ {
+#if FULL_SYSTEM && THE_ISA == ALPHA_ISA
+ if (req->flags & LOCKED) {
+ req->xc->setMiscReg(TheISA::Lock_Addr_DepTag, req->paddr);
+ req->xc->setMiscReg(TheISA::Lock_Flag_DepTag, true);
+ }
+#endif
+
+ Fault error;
+ error = mem->prot_read(req->paddr, data, req->size);
+ data = LittleEndianGuest::gtoh(data);
+ return error;
+ }
+
+ template <class T>
+ Fault write(RequestPtr &req, T &data)
+ {
+#if FULL_SYSTEM && THE_ISA == ALPHA_ISA
+ ExecContext *xc;
+
+ // If this is a store conditional, act appropriately
+ if (req->flags & LOCKED) {
+ xc = req->xc;
+
+ if (req->flags & UNCACHEABLE) {
+ // Don't update result register (see stq_c in isa_desc)
+ req->result = 2;
+ xc->setStCondFailures(0);//Needed? [RGD]
+ } else {
+ bool lock_flag = xc->readMiscReg(TheISA::Lock_Flag_DepTag);
+ Addr lock_addr = xc->readMiscReg(TheISA::Lock_Addr_DepTag);
+ req->result = lock_flag;
+ if (!lock_flag ||
+ ((lock_addr & ~0xf) != (req->paddr & ~0xf))) {
+ xc->setMiscReg(TheISA::Lock_Flag_DepTag, false);
+ xc->setStCondFailures(xc->readStCondFailures() + 1);
+ if (((xc->readStCondFailures()) % 100000) == 0) {
+ std::cerr << "Warning: "
+ << xc->readStCondFailures()
+ << " consecutive store conditional failures "
+ << "on cpu " << req->xc->readCpuId()
+ << std::endl;
+ }
+ return NoFault;
+ }
+ else xc->setStCondFailures(0);
+ }
+ }
+
+ // Need to clear any locked flags on other proccessors for
+ // this address. Only do this for succsful Store Conditionals
+ // and all other stores (WH64?). Unsuccessful Store
+ // Conditionals would have returned above, and wouldn't fall
+ // through.
+ for (int i = 0; i < system->execContexts.size(); i++){
+ xc = system->execContexts[i];
+ if ((xc->readMiscReg(TheISA::Lock_Addr_DepTag) & ~0xf) ==
+ (req->paddr & ~0xf)) {
+ xc->setMiscReg(TheISA::Lock_Flag_DepTag, false);
+ }
+ }
+
+#endif
+ return mem->prot_write(req->paddr, (T)htog(data), req->size);
+ }
+*/
+ virtual bool misspeculating();
+
+
+ MachInst getInst() { return inst; }
+
+ void setInst(MachInst new_inst)
+ {
+ inst = new_inst;
+ }
+
+ Fault instRead(RequestPtr &req)
+ {
+ panic("instRead not implemented");
+ // return funcPhysMem->read(req, inst);
+ return NoFault;
+ }
+
+ void setCpuId(int id) { cpu_id = id; }
+
+ int readCpuId() { return cpu_id; }
+
+ void copyArchRegs(ExecContext *xc);
+
+ //
+ // New accessors for new decoder.
+ //
+ uint64_t readIntReg(int reg_idx)
+ {
+ return regs.readIntReg(reg_idx);
+ }
+
+ FloatReg readFloatReg(int reg_idx, int width)
+ {
+ return regs.readFloatReg(reg_idx, width);
+ }
+
+ FloatReg readFloatReg(int reg_idx)
+ {
+ return regs.readFloatReg(reg_idx);
+ }
+
+ FloatRegBits readFloatRegBits(int reg_idx, int width)
+ {
+ return regs.readFloatRegBits(reg_idx, width);
+ }
+
+ FloatRegBits readFloatRegBits(int reg_idx)
+ {
+ return regs.readFloatRegBits(reg_idx);
+ }
+
+ void setIntReg(int reg_idx, uint64_t val)
+ {
+ regs.setIntReg(reg_idx, val);
+ }
+
+ void setFloatReg(int reg_idx, FloatReg val, int width)
+ {
+ regs.setFloatReg(reg_idx, val, width);
+ }
+
+ void setFloatReg(int reg_idx, FloatReg val)
+ {
+ regs.setFloatReg(reg_idx, val);
+ }
+
+ void setFloatRegBits(int reg_idx, FloatRegBits val, int width)
+ {
+ regs.setFloatRegBits(reg_idx, val, width);
+ }
+
+ void setFloatRegBits(int reg_idx, FloatRegBits val)
+ {
+ regs.setFloatRegBits(reg_idx, val);
+ }
+
+ uint64_t readPC()
+ {
+ return regs.readPC();
+ }
+
+ void setPC(uint64_t val)
+ {
+ regs.setPC(val);
+ }
+
+ uint64_t readNextPC()
+ {
+ return regs.readNextPC();
+ }
+
+ void setNextPC(uint64_t val)
+ {
+ regs.setNextPC(val);
+ }
+
+ uint64_t readNextNPC()
+ {
+ return regs.readNextNPC();
+ }
+
+ void setNextNPC(uint64_t val)
+ {
+ regs.setNextNPC(val);
+ }
+
+
+ MiscReg readMiscReg(int misc_reg)
+ {
+ return regs.readMiscReg(misc_reg);
+ }
+
+ MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault)
+ {
+ return regs.readMiscRegWithEffect(misc_reg, fault, proxy);
+ }
+
+ Fault setMiscReg(int misc_reg, const MiscReg &val)
+ {
+ return regs.setMiscReg(misc_reg, val);
+ }
+
+ Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val)
+ {
+ return regs.setMiscRegWithEffect(misc_reg, val, proxy);
+ }
+
+ unsigned readStCondFailures() { return storeCondFailures; }
+
+ void setStCondFailures(unsigned sc_failures)
+ { storeCondFailures = sc_failures; }
+
+ void clearArchRegs() { regs.clear(); }
+
+#if FULL_SYSTEM
+ int readIntrFlag() { return regs.intrflag; }
+ void setIntrFlag(int val) { regs.intrflag = val; }
+ Fault hwrei();
+ bool inPalMode() { return AlphaISA::PcPAL(regs.readPC()); }
+ bool simPalCheck(int palFunc);
+#endif
+
+#if !FULL_SYSTEM
+ TheISA::IntReg getSyscallArg(int i)
+ {
+ return regs.readIntReg(TheISA::ArgumentReg0 + i);
+ }
+
+ // used to shift args for indirect syscall
+ void setSyscallArg(int i, TheISA::IntReg val)
+ {
+ regs.setIntReg(TheISA::ArgumentReg0 + i, val);
+ }
+
+ void setSyscallReturn(SyscallReturn return_value)
+ {
+ TheISA::setSyscallReturn(return_value, ®s);
+ }
+
+ void syscall(int64_t callnum)
+ {
+ process->syscall(callnum, proxy);
+ }
+
+ Counter readFuncExeInst() { return func_exe_inst; }
+
+ void setFuncExeInst(Counter new_val) { func_exe_inst = new_val; }
+#endif
+
+ void changeRegFileContext(RegFile::ContextParam param,
+ RegFile::ContextVal val)
+ {
+ regs.changeContext(param, val);
+ }
+};
+
+
+// for non-speculative execution context, spec_mode is always false
+inline bool
+CPUExecContext::misspeculating()
+{
+ return false;
+}
+
+#endif // __CPU_CPU_EXEC_CONTEXT_HH__
--- /dev/null
+# Copyright (c) 2003-2006 The Regents of The University of Michigan
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+################
+# CpuModel class
+#
+# The CpuModel class encapsulates everything the ISA parser needs to
+# know about a particular CPU model.
+
+class CpuModel:
+ # Dict of available CPU model objects. Accessible as CpuModel.dict.
+ dict = {}
+
+ # Constructor. Automatically adds models to CpuModel.dict.
+ def __init__(self, name, filename, includes, strings):
+ self.name = name
+ self.filename = filename # filename for output exec code
+ self.includes = includes # include files needed in exec file
+ # The 'strings' dict holds all the per-CPU symbols we can
+ # substitute into templates etc.
+ self.strings = strings
+ # Add self to dict
+ CpuModel.dict[name] = self
+
+
+#
+# Define CPU models.
+#
+# Parameters are:
+# - name of model
+# - filename for generated ISA execution file
+# - includes needed for generated ISA execution file
+# - substitution strings for ISA description templates
+#
+
+CpuModel('AtomicSimpleCPU', 'atomic_simple_cpu_exec.cc',
+ '#include "cpu/simple/atomic.hh"',
+ { 'CPU_exec_context': 'AtomicSimpleCPU' })
+CpuModel('TimingSimpleCPU', 'timing_simple_cpu_exec.cc',
+ '#include "cpu/simple/timing.hh"',
+ { 'CPU_exec_context': 'TimingSimpleCPU' })
+CpuModel('FullCPU', 'full_cpu_exec.cc',
+ '#include "encumbered/cpu/full/dyn_inst.hh"',
+ { 'CPU_exec_context': 'DynInst' })
+CpuModel('AlphaFullCPU', 'alpha_o3_exec.cc',
+ '#include "cpu/o3/alpha_dyn_inst.hh"',
+ { 'CPU_exec_context': 'AlphaDynInst<AlphaSimpleImpl>' })
++CpuModel('OzoneSimpleCPU', 'ozone_simple_exec.cc',
++ '#include "cpu/ozone/dyn_inst.hh"',
++ { 'CPU_exec_context': 'OzoneDynInst<SimpleImpl>' })
++CpuModel('OzoneCPU', 'ozone_exec.cc',
++ '#include "cpu/ozone/dyn_inst.hh"',
++ { 'CPU_exec_context': 'OzoneDynInst<OzoneImpl>' })
++CpuModel('CheckerCPU', 'checker_cpu_exec.cc',
++ '#include "cpu/checker/cpu.hh"',
++ { 'CPU_exec_context': 'CheckerCPU' })
+
--- /dev/null
- virtual Event *getQuiesceEvent() = 0;
+/*
+ * Copyright (c) 2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __CPU_EXEC_CONTEXT_HH__
+#define __CPU_EXEC_CONTEXT_HH__
+
+#include "config/full_system.hh"
+#include "mem/request.hh"
+#include "sim/faults.hh"
+#include "sim/host.hh"
+#include "sim/serialize.hh"
+#include "sim/byteswap.hh"
+
+// @todo: Figure out a more architecture independent way to obtain the ITB and
+// DTB pointers.
+class AlphaDTB;
+class AlphaITB;
+class BaseCPU;
++class EndQuiesceEvent;
+class Event;
+class TranslatingPort;
+class FunctionalPort;
+class VirtualPort;
+class Process;
+class System;
++namespace Kernel {
++ class Statistics;
++};
+
+class ExecContext
+{
+ protected:
+ typedef TheISA::RegFile RegFile;
+ typedef TheISA::MachInst MachInst;
+ typedef TheISA::IntReg IntReg;
+ typedef TheISA::FloatReg FloatReg;
+ typedef TheISA::FloatRegBits FloatRegBits;
+ typedef TheISA::MiscRegFile MiscRegFile;
+ typedef TheISA::MiscReg MiscReg;
+ public:
+ enum Status
+ {
+ /// Initialized but not running yet. All CPUs start in
+ /// this state, but most transition to Active on cycle 1.
+ /// In MP or SMT systems, non-primary contexts will stay
+ /// in this state until a thread is assigned to them.
+ Unallocated,
+
+ /// Running. Instructions should be executed only when
+ /// the context is in this state.
+ Active,
+
+ /// Temporarily inactive. Entered while waiting for
+ /// synchronization, etc.
+ Suspended,
+
+ /// Permanently shut down. Entered when target executes
+ /// m5exit pseudo-instruction. When all contexts enter
+ /// this state, the simulation will terminate.
+ Halted
+ };
+
+ virtual ~ExecContext() { };
+
+ virtual BaseCPU *getCpuPtr() = 0;
+
+ virtual void setCpuId(int id) = 0;
+
+ virtual int readCpuId() = 0;
+
+#if FULL_SYSTEM
+ virtual System *getSystemPtr() = 0;
+
+ virtual AlphaITB *getITBPtr() = 0;
+
+ virtual AlphaDTB * getDTBPtr() = 0;
+
++ virtual Kernel::Statistics *getKernelStats() = 0;
++
+ virtual FunctionalPort *getPhysPort() = 0;
+
+ virtual VirtualPort *getVirtPort(ExecContext *xc = NULL) = 0;
+
+ virtual void delVirtPort(VirtualPort *vp) = 0;
+#else
+ virtual TranslatingPort *getMemPort() = 0;
+
+ virtual Process *getProcessPtr() = 0;
+#endif
+
+ virtual Status status() const = 0;
+
+ virtual void setStatus(Status new_status) = 0;
+
+ /// Set the status to Active. Optional delay indicates number of
+ /// cycles to wait before beginning execution.
+ virtual void activate(int delay = 1) = 0;
+
+ /// Set the status to Suspended.
+ virtual void suspend() = 0;
+
+ /// Set the status to Unallocated.
+ virtual void deallocate() = 0;
+
+ /// Set the status to Halted.
+ virtual void halt() = 0;
+
+#if FULL_SYSTEM
+ virtual void dumpFuncProfile() = 0;
+#endif
+
+ virtual void takeOverFrom(ExecContext *old_context) = 0;
+
+ virtual void regStats(const std::string &name) = 0;
+
+ virtual void serialize(std::ostream &os) = 0;
+ virtual void unserialize(Checkpoint *cp, const std::string §ion) = 0;
+
+#if FULL_SYSTEM
- virtual int getInstAsid() = 0;
- virtual int getDataAsid() = 0;
-
- virtual Fault translateInstReq(RequestPtr &req) = 0;
-
- virtual Fault translateDataReadReq(RequestPtr &req) = 0;
-
- virtual Fault translateDataWriteReq(RequestPtr &req) = 0;
-
++ virtual EndQuiesceEvent *getQuiesceEvent() = 0;
+
+ // Not necessarily the best location for these...
+ // Having an extra function just to read these is obnoxious
+ virtual Tick readLastActivate() = 0;
+ virtual Tick readLastSuspend() = 0;
+
+ virtual void profileClear() = 0;
+ virtual void profileSample() = 0;
+#endif
+
+ virtual int getThreadNum() = 0;
+
- virtual int readIntrFlag() = 0;
- virtual void setIntrFlag(int val) = 0;
- virtual Fault hwrei() = 0;
+ // Also somewhat obnoxious. Really only used for the TLB fault.
+ // However, may be quite useful in SPARC.
+ virtual TheISA::MachInst getInst() = 0;
+
+ virtual void copyArchRegs(ExecContext *xc) = 0;
+
+ virtual void clearArchRegs() = 0;
+
+ //
+ // New accessors for new decoder.
+ //
+ virtual uint64_t readIntReg(int reg_idx) = 0;
+
+ virtual FloatReg readFloatReg(int reg_idx, int width) = 0;
+
+ virtual FloatReg readFloatReg(int reg_idx) = 0;
+
+ virtual FloatRegBits readFloatRegBits(int reg_idx, int width) = 0;
+
+ virtual FloatRegBits readFloatRegBits(int reg_idx) = 0;
+
+ virtual void setIntReg(int reg_idx, uint64_t val) = 0;
+
+ virtual void setFloatReg(int reg_idx, FloatReg val, int width) = 0;
+
+ virtual void setFloatReg(int reg_idx, FloatReg val) = 0;
+
+ virtual void setFloatRegBits(int reg_idx, FloatRegBits val) = 0;
+
+ virtual void setFloatRegBits(int reg_idx, FloatRegBits val, int width) = 0;
+
+ virtual uint64_t readPC() = 0;
+
+ virtual void setPC(uint64_t val) = 0;
+
+ virtual uint64_t readNextPC() = 0;
+
+ virtual void setNextPC(uint64_t val) = 0;
+
+ virtual uint64_t readNextNPC() = 0;
+
+ virtual void setNextNPC(uint64_t val) = 0;
+
+ virtual MiscReg readMiscReg(int misc_reg) = 0;
+
+ virtual MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault) = 0;
+
+ virtual Fault setMiscReg(int misc_reg, const MiscReg &val) = 0;
+
+ virtual Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val) = 0;
+
+ // Also not necessarily the best location for these two. Hopefully will go
+ // away once we decide upon where st cond failures goes.
+ virtual unsigned readStCondFailures() = 0;
+
+ virtual void setStCondFailures(unsigned sc_failures) = 0;
+
+#if FULL_SYSTEM
- virtual bool simPalCheck(int palFunc) = 0;
+ virtual bool inPalMode() = 0;
- virtual void syscall(int64_t callnum) = 0;
+#endif
+
+ // Only really makes sense for old CPU model. Still could be useful though.
+ virtual bool misspeculating() = 0;
+
+#if !FULL_SYSTEM
+ virtual IntReg getSyscallArg(int i) = 0;
+
+ // used to shift args for indirect syscall
+ virtual void setSyscallArg(int i, IntReg val) = 0;
+
+ virtual void setSyscallReturn(SyscallReturn return_value) = 0;
+
-
- virtual void setFuncExeInst(Counter new_val) = 0;
+
+ // Same with st cond failures.
+ virtual Counter readFuncExeInst() = 0;
- Event *getQuiesceEvent() { return actualXC->getQuiesceEvent(); }
+#endif
+
+ virtual void changeRegFileContext(RegFile::ContextParam param,
+ RegFile::ContextVal val) = 0;
+};
+
+template <class XC>
+class ProxyExecContext : public ExecContext
+{
+ public:
+ ProxyExecContext(XC *actual_xc)
+ { actualXC = actual_xc; }
+
+ private:
+ XC *actualXC;
+
+ public:
+
+ BaseCPU *getCpuPtr() { return actualXC->getCpuPtr(); }
+
+ void setCpuId(int id) { actualXC->setCpuId(id); }
+
+ int readCpuId() { return actualXC->readCpuId(); }
+
+#if FULL_SYSTEM
+ System *getSystemPtr() { return actualXC->getSystemPtr(); }
+
+ AlphaITB *getITBPtr() { return actualXC->getITBPtr(); }
+
+ AlphaDTB *getDTBPtr() { return actualXC->getDTBPtr(); }
+
++ Kernel::Statistics *getKernelStats() { return actualXC->getKernelStats(); }
++
+ FunctionalPort *getPhysPort() { return actualXC->getPhysPort(); }
+
+ VirtualPort *getVirtPort(ExecContext *xc = NULL) { return actualXC->getVirtPort(xc); }
+
+ void delVirtPort(VirtualPort *vp) { return actualXC->delVirtPort(vp); }
+#else
+ TranslatingPort *getMemPort() { return actualXC->getMemPort(); }
+
+ Process *getProcessPtr() { return actualXC->getProcessPtr(); }
+#endif
+
+ Status status() const { return actualXC->status(); }
+
+ void setStatus(Status new_status) { actualXC->setStatus(new_status); }
+
+ /// Set the status to Active. Optional delay indicates number of
+ /// cycles to wait before beginning execution.
+ void activate(int delay = 1) { actualXC->activate(delay); }
+
+ /// Set the status to Suspended.
+ void suspend() { actualXC->suspend(); }
+
+ /// Set the status to Unallocated.
+ void deallocate() { actualXC->deallocate(); }
+
+ /// Set the status to Halted.
+ void halt() { actualXC->halt(); }
+
+#if FULL_SYSTEM
+ void dumpFuncProfile() { actualXC->dumpFuncProfile(); }
+#endif
+
+ void takeOverFrom(ExecContext *oldContext)
+ { actualXC->takeOverFrom(oldContext); }
+
+ void regStats(const std::string &name) { actualXC->regStats(name); }
+
+ void serialize(std::ostream &os) { actualXC->serialize(os); }
+ void unserialize(Checkpoint *cp, const std::string §ion)
+ { actualXC->unserialize(cp, section); }
+
+#if FULL_SYSTEM
- int getInstAsid() { return actualXC->getInstAsid(); }
- int getDataAsid() { return actualXC->getDataAsid(); }
-
- Fault translateInstReq(RequestPtr &req)
- { return actualXC->translateInstReq(req); }
-
- Fault translateDataReadReq(RequestPtr &req)
- { return actualXC->translateDataReadReq(req); }
-
- Fault translateDataWriteReq(RequestPtr &req)
- { return actualXC->translateDataWriteReq(req); }
-
++ EndQuiesceEvent *getQuiesceEvent() { return actualXC->getQuiesceEvent(); }
+
+ Tick readLastActivate() { return actualXC->readLastActivate(); }
+ Tick readLastSuspend() { return actualXC->readLastSuspend(); }
+
+ void profileClear() { return actualXC->profileClear(); }
+ void profileSample() { return actualXC->profileSample(); }
+#endif
+
+ int getThreadNum() { return actualXC->getThreadNum(); }
+
-
+ // @todo: Do I need this?
+ MachInst getInst() { return actualXC->getInst(); }
+
+ // @todo: Do I need this?
+ void copyArchRegs(ExecContext *xc) { actualXC->copyArchRegs(xc); }
+
+ void clearArchRegs() { actualXC->clearArchRegs(); }
+
+ //
+ // New accessors for new decoder.
+ //
+ uint64_t readIntReg(int reg_idx)
+ { return actualXC->readIntReg(reg_idx); }
+
+ FloatReg readFloatReg(int reg_idx, int width)
+ { return actualXC->readFloatReg(reg_idx, width); }
+
+ FloatReg readFloatReg(int reg_idx)
+ { return actualXC->readFloatReg(reg_idx); }
+
+ FloatRegBits readFloatRegBits(int reg_idx, int width)
+ { return actualXC->readFloatRegBits(reg_idx, width); }
+
+ FloatRegBits readFloatRegBits(int reg_idx)
+ { return actualXC->readFloatRegBits(reg_idx); }
+
+ void setIntReg(int reg_idx, uint64_t val)
+ { actualXC->setIntReg(reg_idx, val); }
+
+ void setFloatReg(int reg_idx, FloatReg val, int width)
+ { actualXC->setFloatReg(reg_idx, val, width); }
+
+ void setFloatReg(int reg_idx, FloatReg val)
+ { actualXC->setFloatReg(reg_idx, val); }
+
+ void setFloatRegBits(int reg_idx, FloatRegBits val, int width)
+ { actualXC->setFloatRegBits(reg_idx, val, width); }
+
+ void setFloatRegBits(int reg_idx, FloatRegBits val)
+ { actualXC->setFloatRegBits(reg_idx, val); }
+
+ uint64_t readPC() { return actualXC->readPC(); }
+
+ void setPC(uint64_t val) { actualXC->setPC(val); }
+
+ uint64_t readNextPC() { return actualXC->readNextPC(); }
+
+ void setNextPC(uint64_t val) { actualXC->setNextPC(val); }
+
+ uint64_t readNextNPC() { return actualXC->readNextNPC(); }
+
+ void setNextNPC(uint64_t val) { actualXC->setNextNPC(val); }
+
+ MiscReg readMiscReg(int misc_reg)
+ { return actualXC->readMiscReg(misc_reg); }
+
+ MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault)
+ { return actualXC->readMiscRegWithEffect(misc_reg, fault); }
+
+ Fault setMiscReg(int misc_reg, const MiscReg &val)
+ { return actualXC->setMiscReg(misc_reg, val); }
+
+ Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val)
+ { return actualXC->setMiscRegWithEffect(misc_reg, val); }
+
+ unsigned readStCondFailures()
+ { return actualXC->readStCondFailures(); }
+
+ void setStCondFailures(unsigned sc_failures)
+ { actualXC->setStCondFailures(sc_failures); }
- int readIntrFlag() { return actualXC->readIntrFlag(); }
-
- void setIntrFlag(int val) { actualXC->setIntrFlag(val); }
-
- Fault hwrei() { return actualXC->hwrei(); }
-
+#if FULL_SYSTEM
-
- bool simPalCheck(int palFunc) { return actualXC->simPalCheck(palFunc); }
+ bool inPalMode() { return actualXC->inPalMode(); }
- void syscall(int64_t callnum) { actualXC->syscall(callnum); }
+#endif
+
+ // @todo: Fix this!
+ bool misspeculating() { return actualXC->misspeculating(); }
+
+#if !FULL_SYSTEM
+ IntReg getSyscallArg(int i) { return actualXC->getSyscallArg(i); }
+
+ // used to shift args for indirect syscall
+ void setSyscallArg(int i, IntReg val)
+ { actualXC->setSyscallArg(i, val); }
+
+ void setSyscallReturn(SyscallReturn return_value)
+ { actualXC->setSyscallReturn(return_value); }
+
-
- void setFuncExeInst(Counter new_val)
- { return actualXC->setFuncExeInst(new_val); }
+
+ Counter readFuncExeInst() { return actualXC->readFuncExeInst(); }
+#endif
+
+ void changeRegFileContext(RegFile::ContextParam param,
+ RegFile::ContextVal val)
+ {
+ actualXC->changeRegFileContext(param, val);
+ }
+};
+
+#endif
--- /dev/null
- && debugSymbolTable->findNearestSymbol(PC, sym_str, sym_addr)) {
+/*
+ * Copyright (c) 2001-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <fstream>
+#include <iomanip>
+
+#include "base/loader/symtab.hh"
+#include "cpu/base.hh"
+#include "cpu/exetrace.hh"
+#include "cpu/static_inst.hh"
+#include "sim/param.hh"
+#include "sim/system.hh"
+
+using namespace std;
+
+
+////////////////////////////////////////////////////////////////////////
+//
+// Methods for the InstRecord object
+//
+
+
+void
+Trace::InstRecord::dump(ostream &outs)
+{
+ if (flags[INTEL_FORMAT]) {
+#if FULL_SYSTEM
+ bool is_trace_system = (cpu->system->name() == trace_system);
+#else
+ bool is_trace_system = true;
+#endif
+ if (is_trace_system) {
+ ccprintf(outs, "%7d ) ", cycle);
+ outs << "0x" << hex << PC << ":\t";
+ if (staticInst->isLoad()) {
+ outs << "<RD 0x" << hex << addr;
+ outs << ">";
+ } else if (staticInst->isStore()) {
+ outs << "<WR 0x" << hex << addr;
+ outs << ">";
+ }
+ outs << endl;
+ }
+ } else {
+ if (flags[PRINT_CYCLE])
+ ccprintf(outs, "%7d: ", cycle);
+
+ outs << cpu->name() << " ";
+
+ if (flags[TRACE_MISSPEC])
+ outs << (misspeculating ? "-" : "+") << " ";
+
+ if (flags[PRINT_THREAD_NUM])
+ outs << "T" << thread << " : ";
+
+
+ std::string sym_str;
+ Addr sym_addr;
+ if (debugSymbolTable
++ && debugSymbolTable->findNearestSymbol(PC, sym_str, sym_addr)
++ && flags[PC_SYMBOL]) {
+ if (PC != sym_addr)
+ sym_str += csprintf("+%d", PC - sym_addr);
+ outs << "@" << sym_str << " : ";
+ }
+ else {
+ outs << "0x" << hex << PC << " : ";
+ }
+
+ //
+ // Print decoded instruction
+ //
+
+#if defined(__GNUC__) && (__GNUC__ < 3)
+ // There's a bug in gcc 2.x library that prevents setw()
+ // from working properly on strings
+ string mc(staticInst->disassemble(PC, debugSymbolTable));
+ while (mc.length() < 26)
+ mc += " ";
+ outs << mc;
+#else
+ outs << setw(26) << left << staticInst->disassemble(PC, debugSymbolTable);
+#endif
+
+ outs << " : ";
+
+ if (flags[PRINT_OP_CLASS]) {
+ outs << opClassStrings[staticInst->opClass()] << " : ";
+ }
+
+ if (flags[PRINT_RESULT_DATA] && data_status != DataInvalid) {
+ outs << " D=";
+#if 0
+ if (data_status == DataDouble)
+ ccprintf(outs, "%f", data.as_double);
+ else
+ ccprintf(outs, "%#018x", data.as_int);
+#else
+ ccprintf(outs, "%#018x", data.as_int);
+#endif
+ }
+
+ if (flags[PRINT_EFF_ADDR] && addr_valid)
+ outs << " A=0x" << hex << addr;
+
+ if (flags[PRINT_INT_REGS] && regs_valid) {
+ for (int i = 0; i < TheISA::NumIntRegs;)
+ for (int j = i + 1; i <= j; i++)
+ ccprintf(outs, "r%02d = %#018x%s", i,
+ iregs->regs.readReg(i),
+ ((i == j) ? "\n" : " "));
+ outs << "\n";
+ }
+
+ if (flags[PRINT_FETCH_SEQ] && fetch_seq_valid)
+ outs << " FetchSeq=" << dec << fetch_seq;
+
+ if (flags[PRINT_CP_SEQ] && cp_seq_valid)
+ outs << " CPSeq=" << dec << cp_seq;
+
+ //
+ // End of line...
+ //
+ outs << endl;
+ }
+}
+
+
+vector<bool> Trace::InstRecord::flags(NUM_BITS);
+string Trace::InstRecord::trace_system;
+
+////////////////////////////////////////////////////////////////////////
+//
+// Parameter space for per-cycle execution address tracing options.
+// Derive from ParamContext so we can override checkParams() function.
+//
+class ExecutionTraceParamContext : public ParamContext
+{
+ public:
+ ExecutionTraceParamContext(const string &_iniSection)
+ : ParamContext(_iniSection)
+ {
+ }
+
+ void checkParams(); // defined at bottom of file
+};
+
+ExecutionTraceParamContext exeTraceParams("exetrace");
+
+Param<bool> exe_trace_spec(&exeTraceParams, "speculative",
+ "capture speculative instructions", true);
+
+Param<bool> exe_trace_print_cycle(&exeTraceParams, "print_cycle",
+ "print cycle number", true);
+Param<bool> exe_trace_print_opclass(&exeTraceParams, "print_opclass",
+ "print op class", true);
+Param<bool> exe_trace_print_thread(&exeTraceParams, "print_thread",
+ "print thread number", true);
+Param<bool> exe_trace_print_effaddr(&exeTraceParams, "print_effaddr",
+ "print effective address", true);
+Param<bool> exe_trace_print_data(&exeTraceParams, "print_data",
+ "print result data", true);
+Param<bool> exe_trace_print_iregs(&exeTraceParams, "print_iregs",
+ "print all integer regs", false);
+Param<bool> exe_trace_print_fetchseq(&exeTraceParams, "print_fetchseq",
+ "print fetch sequence number", false);
+Param<bool> exe_trace_print_cp_seq(&exeTraceParams, "print_cpseq",
+ "print correct-path sequence number", false);
++Param<bool> exe_trace_pc_symbol(&exeTraceParams, "pc_symbol",
++ "Use symbols for the PC if available", true);
+Param<bool> exe_trace_intel_format(&exeTraceParams, "intel_format",
+ "print trace in intel compatible format", false);
+Param<string> exe_trace_system(&exeTraceParams, "trace_system",
+ "print trace of which system (client or server)",
+ "client");
+
+
+//
+// Helper function for ExecutionTraceParamContext::checkParams() just
+// to get us into the InstRecord namespace
+//
+void
+Trace::InstRecord::setParams()
+{
+ flags[TRACE_MISSPEC] = exe_trace_spec;
+
+ flags[PRINT_CYCLE] = exe_trace_print_cycle;
+ flags[PRINT_OP_CLASS] = exe_trace_print_opclass;
+ flags[PRINT_THREAD_NUM] = exe_trace_print_thread;
+ flags[PRINT_RESULT_DATA] = exe_trace_print_effaddr;
+ flags[PRINT_EFF_ADDR] = exe_trace_print_data;
+ flags[PRINT_INT_REGS] = exe_trace_print_iregs;
+ flags[PRINT_FETCH_SEQ] = exe_trace_print_fetchseq;
+ flags[PRINT_CP_SEQ] = exe_trace_print_cp_seq;
++ flags[PC_SYMBOL] = exe_trace_pc_symbol;
+ flags[INTEL_FORMAT] = exe_trace_intel_format;
+ trace_system = exe_trace_system;
+}
+
+void
+ExecutionTraceParamContext::checkParams()
+{
+ Trace::InstRecord::setParams();
+}
+
--- /dev/null
+/*
+ * Copyright (c) 2001-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __EXETRACE_HH__
+#define __EXETRACE_HH__
+
+#include <fstream>
+#include <vector>
+
+#include "sim/host.hh"
+#include "cpu/inst_seq.hh" // for InstSeqNum
+#include "base/trace.hh"
+#include "cpu/exec_context.hh"
+#include "cpu/static_inst.hh"
+
+class BaseCPU;
+
+
+namespace Trace {
+
+class InstRecord : public Record
+{
+ protected:
+ typedef TheISA::IntRegFile IntRegFile;
+
+ // The following fields are initialized by the constructor and
+ // thus guaranteed to be valid.
+ BaseCPU *cpu;
+ // need to make this ref-counted so it doesn't go away before we
+ // dump the record
+ StaticInstPtr staticInst;
+ Addr PC;
+ bool misspeculating;
+ unsigned thread;
+
+ // The remaining fields are only valid for particular instruction
+ // types (e.g, addresses for memory ops) or when particular
+ // options are enabled (e.g., tracing full register contents).
+ // Each data field has an associated valid flag to indicate
+ // whether the data field is valid.
+ Addr addr;
+ bool addr_valid;
+
+ union {
+ uint64_t as_int;
+ double as_double;
+ } data;
+ enum {
+ DataInvalid = 0,
+ DataInt8 = 1, // set to equal number of bytes
+ DataInt16 = 2,
+ DataInt32 = 4,
+ DataInt64 = 8,
+ DataDouble = 3
+ } data_status;
+
+ InstSeqNum fetch_seq;
+ bool fetch_seq_valid;
+
+ InstSeqNum cp_seq;
+ bool cp_seq_valid;
+
+ struct iRegFile {
+ IntRegFile regs;
+ };
+ iRegFile *iregs;
+ bool regs_valid;
+
+ public:
+ InstRecord(Tick _cycle, BaseCPU *_cpu,
+ const StaticInstPtr &_staticInst,
+ Addr _pc, bool spec, int _thread)
+ : Record(_cycle), cpu(_cpu), staticInst(_staticInst), PC(_pc),
+ misspeculating(spec), thread(_thread)
+ {
+ data_status = DataInvalid;
+ addr_valid = false;
+ regs_valid = false;
+
+ fetch_seq_valid = false;
+ cp_seq_valid = false;
+ }
+
+ virtual ~InstRecord() { }
+
+ virtual void dump(std::ostream &outs);
+
+ void setAddr(Addr a) { addr = a; addr_valid = true; }
+
+ void setData(uint64_t d) { data.as_int = d; data_status = DataInt64; }
+ void setData(uint32_t d) { data.as_int = d; data_status = DataInt32; }
+ void setData(uint16_t d) { data.as_int = d; data_status = DataInt16; }
+ void setData(uint8_t d) { data.as_int = d; data_status = DataInt8; }
+
+ void setData(int64_t d) { setData((uint64_t)d); }
+ void setData(int32_t d) { setData((uint32_t)d); }
+ void setData(int16_t d) { setData((uint16_t)d); }
+ void setData(int8_t d) { setData((uint8_t)d); }
+
+ void setData(double d) { data.as_double = d; data_status = DataDouble; }
+
+ void setFetchSeq(InstSeqNum seq)
+ { fetch_seq = seq; fetch_seq_valid = true; }
+
+ void setCPSeq(InstSeqNum seq)
+ { cp_seq = seq; cp_seq_valid = true; }
+
+ void setRegs(const IntRegFile ®s);
+
+ void finalize() { theLog.append(this); }
+
+ enum InstExecFlagBits {
+ TRACE_MISSPEC = 0,
+ PRINT_CYCLE,
+ PRINT_OP_CLASS,
+ PRINT_THREAD_NUM,
+ PRINT_RESULT_DATA,
+ PRINT_EFF_ADDR,
+ PRINT_INT_REGS,
+ PRINT_FETCH_SEQ,
+ PRINT_CP_SEQ,
++ PC_SYMBOL,
+ INTEL_FORMAT,
+ NUM_BITS
+ };
+
+ static std::vector<bool> flags;
+ static std::string trace_system;
+
+ static void setParams();
+
+ static bool traceMisspec() { return flags[TRACE_MISSPEC]; }
+};
+
+
+inline void
+InstRecord::setRegs(const IntRegFile ®s)
+{
+ if (!iregs)
+ iregs = new iRegFile;
+
+ memcpy(&iregs->regs, ®s, sizeof(IntRegFile));
+ regs_valid = true;
+}
+
+inline
+InstRecord *
+getInstRecord(Tick cycle, ExecContext *xc, BaseCPU *cpu,
+ const StaticInstPtr staticInst,
+ Addr pc, int thread = 0)
+{
+ if (DTRACE(InstExec) &&
+ (InstRecord::traceMisspec() || !xc->misspeculating())) {
+ return new InstRecord(cycle, cpu, staticInst, pc,
+ xc->misspeculating(), thread);
+ }
+
+ return NULL;
+}
+
+
+}
+
+#endif // __EXETRACE_HH__
--- /dev/null
+/*
+ * Copyright (c) 2001, 2003-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __STD_TYPES_HH__
+#define __STD_TYPES_HH__
+
++#include <stdint.h>
++
+// inst sequence type, used to order instructions in the ready list,
+// if this rolls over the ready list order temporarily will get messed
+// up, but execution will continue and complete correctly
+typedef uint64_t InstSeqNum;
+
+// inst tag type, used to tag an operation instance in the IQ
+typedef unsigned int InstTag;
+
+#endif // __STD_TYPES_HH__
--- /dev/null
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
+/*
- // Should do checks here to make sure sizes are correct (powers of 2).
++ * Copyright (c) 2004-2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
++#include "base/intmath.hh"
+#include "base/trace.hh"
+#include "cpu/o3/2bit_local_pred.hh"
+
+DefaultBP::DefaultBP(unsigned _localPredictorSize,
+ unsigned _localCtrBits,
+ unsigned _instShiftAmt)
+ : localPredictorSize(_localPredictorSize),
+ localCtrBits(_localCtrBits),
+ instShiftAmt(_instShiftAmt)
+{
- indexMask = localPredictorSize - 1;
++ if (!isPowerOf2(localPredictorSize)) {
++ fatal("Invalid local predictor size!\n");
++ }
++
++ localPredictorSets = localPredictorSize / localCtrBits;
++
++ if (!isPowerOf2(localPredictorSets)) {
++ fatal("Invalid number of local predictor sets! Check localCtrBits.\n");
++ }
+
+ // Setup the index mask.
- localCtrs = new SatCounter[localPredictorSize];
++ indexMask = localPredictorSets - 1;
+
+ DPRINTF(Fetch, "Branch predictor: index mask: %#x\n", indexMask);
+
+ // Setup the array of counters for the local predictor.
- for (int i = 0; i < localPredictorSize; ++i)
++ localCtrs.resize(localPredictorSets);
+
- assert(local_predictor_idx < localPredictorSize);
-
++ for (int i = 0; i < localPredictorSets; ++i)
+ localCtrs[i].setBits(_localCtrBits);
+
+ DPRINTF(Fetch, "Branch predictor: local predictor size: %i\n",
+ localPredictorSize);
+
+ DPRINTF(Fetch, "Branch predictor: local counter bits: %i\n", localCtrBits);
+
+ DPRINTF(Fetch, "Branch predictor: instruction shift amount: %i\n",
+ instShiftAmt);
+}
+
++void
++DefaultBP::reset()
++{
++ for (int i = 0; i < localPredictorSets; ++i) {
++ localCtrs[i].reset();
++ }
++}
++
+bool
+DefaultBP::lookup(Addr &branch_addr)
+{
+ bool taken;
+ uint8_t local_prediction;
+ unsigned local_predictor_idx = getLocalIndex(branch_addr);
+
+ DPRINTF(Fetch, "Branch predictor: Looking up index %#x\n",
+ local_predictor_idx);
+
- assert(local_predictor_idx < localPredictorSize);
-
+ local_prediction = localCtrs[local_predictor_idx].read();
+
+ DPRINTF(Fetch, "Branch predictor: prediction is %i.\n",
+ (int)local_prediction);
+
+ taken = getPrediction(local_prediction);
+
+#if 0
+ // Speculative update.
+ if (taken) {
+ DPRINTF(Fetch, "Branch predictor: Branch updated as taken.\n");
+ localCtrs[local_predictor_idx].increment();
+ } else {
+ DPRINTF(Fetch, "Branch predictor: Branch updated as not taken.\n");
+ localCtrs[local_predictor_idx].decrement();
+ }
+#endif
+
+ return taken;
+}
+
+void
+DefaultBP::update(Addr &branch_addr, bool taken)
+{
+ unsigned local_predictor_idx;
+
+ // Update the local predictor.
+ local_predictor_idx = getLocalIndex(branch_addr);
+
+ DPRINTF(Fetch, "Branch predictor: Looking up index %#x\n",
+ local_predictor_idx);
+
+ if (taken) {
+ DPRINTF(Fetch, "Branch predictor: Branch updated as taken.\n");
+ localCtrs[local_predictor_idx].increment();
+ } else {
+ DPRINTF(Fetch, "Branch predictor: Branch updated as not taken.\n");
+ localCtrs[local_predictor_idx].decrement();
+ }
+}
+
+inline
+bool
+DefaultBP::getPrediction(uint8_t &count)
+{
+ // Get the MSB of the count
+ return (count >> (localCtrBits - 1));
+}
+
+inline
+unsigned
+DefaultBP::getLocalIndex(Addr &branch_addr)
+{
+ return (branch_addr >> instShiftAmt) & indexMask;
+}
--- /dev/null
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
+/*
- #ifndef __CPU_O3_CPU_2BIT_LOCAL_PRED_HH__
- #define __CPU_O3_CPU_2BIT_LOCAL_PRED_HH__
++ * Copyright (c) 2004-2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
- /** Returns the taken/not taken prediction given the value of the
++#ifndef __CPU_O3_2BIT_LOCAL_PRED_HH__
++#define __CPU_O3_2BIT_LOCAL_PRED_HH__
+
+// For Addr type.
+#include "arch/isa_traits.hh"
+#include "cpu/o3/sat_counter.hh"
+
++#include <vector>
++
+class DefaultBP
+{
+ public:
+ /**
+ * Default branch predictor constructor.
++ * @param localPredictorSize Size of the local predictor.
++ * @param localCtrBits Number of bits per counter.
++ * @param instShiftAmt Offset amount for instructions to ignore alignment.
+ */
+ DefaultBP(unsigned localPredictorSize, unsigned localCtrBits,
+ unsigned instShiftAmt);
+
+ /**
+ * Looks up the given address in the branch predictor and returns
+ * a true/false value as to whether it is taken.
+ * @param branch_addr The address of the branch to look up.
+ * @return Whether or not the branch is taken.
+ */
+ bool lookup(Addr &branch_addr);
+
+ /**
+ * Updates the branch predictor with the actual result of a branch.
+ * @param branch_addr The address of the branch to update.
+ * @param taken Whether or not the branch was taken.
+ */
+ void update(Addr &branch_addr, bool taken);
+
++ void reset();
++
+ private:
+
- SatCounter *localCtrs;
++ /**
++ * Returns the taken/not taken prediction given the value of the
+ * counter.
++ * @param count The value of the counter.
++ * @return The prediction based on the counter value.
+ */
+ inline bool getPrediction(uint8_t &count);
+
+ /** Calculates the local index based on the PC. */
+ inline unsigned getLocalIndex(Addr &PC);
+
+ /** Array of counters that make up the local predictor. */
- #endif // __CPU_O3_CPU_2BIT_LOCAL_PRED_HH__
++ std::vector<SatCounter> localCtrs;
+
+ /** Size of the local predictor. */
+ unsigned localPredictorSize;
+
++ /** Number of sets. */
++ unsigned localPredictorSets;
++
+ /** Number of bits of the local predictor's counters. */
+ unsigned localCtrBits;
+
+ /** Number of bits to shift the PC when calculating index. */
+ unsigned instShiftAmt;
+
+ /** Mask to get index bits. */
+ unsigned indexMask;
+};
+
++#endif // __CPU_O3_2BIT_LOCAL_PRED_HH__
--- /dev/null
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
+/*
- // Todo: Find all the stuff in ExecContext and ev5 that needs to be
- // specifically designed for this CPU.
++ * Copyright (c) 2004-2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
- #ifndef __CPU_O3_CPU_ALPHA_FULL_CPU_HH__
- #define __CPU_O3_CPU_ALPHA_FULL_CPU_HH__
-
- #include "cpu/o3/cpu.hh"
++#ifndef __CPU_O3_ALPHA_FULL_CPU_HH__
++#define __CPU_O3_ALPHA_FULL_CPU_HH__
+
- public:
- AlphaFullCPU(Params ¶ms);
+#include "arch/isa_traits.hh"
++#include "cpu/exec_context.hh"
++#include "cpu/o3/cpu.hh"
+#include "sim/byteswap.hh"
+
++class EndQuiesceEvent;
++namespace Kernel {
++ class Statistics;
++};
++
+template <class Impl>
+class AlphaFullCPU : public FullO3CPU<Impl>
+{
+ protected:
+ typedef TheISA::IntReg IntReg;
+ typedef TheISA::MiscReg MiscReg;
+ typedef TheISA::RegFile RegFile;
+ typedef TheISA::MiscRegFile MiscRegFile;
+
+ public:
++ typedef O3ThreadState<Impl> ImplState;
++ typedef O3ThreadState<Impl> Thread;
+ typedef typename Impl::Params Params;
+
- public:
++ /** Constructs an AlphaFullCPU with the given parameters. */
++ AlphaFullCPU(Params *params);
++
++ class AlphaXC : public ExecContext
++ {
++ public:
++ AlphaFullCPU<Impl> *cpu;
++
++ O3ThreadState<Impl> *thread;
++
++ virtual BaseCPU *getCpuPtr() { return cpu; }
++
++ virtual void setCpuId(int id) { cpu->cpu_id = id; }
++
++ virtual int readCpuId() { return cpu->cpu_id; }
++
++ virtual FunctionalMemory *getMemPtr() { return thread->mem; }
++
++#if FULL_SYSTEM
++ virtual System *getSystemPtr() { return cpu->system; }
++
++ virtual PhysicalMemory *getPhysMemPtr() { return cpu->physmem; }
++
++ virtual AlphaITB *getITBPtr() { return cpu->itb; }
++
++ virtual AlphaDTB * getDTBPtr() { return cpu->dtb; }
++
++ virtual Kernel::Statistics *getKernelStats()
++ { return thread->kernelStats; }
++#else
++ virtual Process *getProcessPtr() { return thread->process; }
++#endif
++
++ virtual Status status() const { return thread->status(); }
++
++ virtual void setStatus(Status new_status)
++ { thread->setStatus(new_status); }
++
++ /// Set the status to Active. Optional delay indicates number of
++ /// cycles to wait before beginning execution.
++ virtual void activate(int delay = 1);
++
++ /// Set the status to Suspended.
++ virtual void suspend();
++
++ /// Set the status to Unallocated.
++ virtual void deallocate();
++
++ /// Set the status to Halted.
++ virtual void halt();
++
++#if FULL_SYSTEM
++ virtual void dumpFuncProfile();
++#endif
++
++ virtual void takeOverFrom(ExecContext *old_context);
++
++ virtual void regStats(const std::string &name);
++
++ virtual void serialize(std::ostream &os);
++ virtual void unserialize(Checkpoint *cp, const std::string §ion);
++
++#if FULL_SYSTEM
++ virtual EndQuiesceEvent *getQuiesceEvent();
++
++ virtual Tick readLastActivate();
++ virtual Tick readLastSuspend();
++
++ virtual void profileClear();
++ virtual void profileSample();
++#endif
++
++ virtual int getThreadNum() { return thread->tid; }
++
++ virtual TheISA::MachInst getInst();
++
++ virtual void copyArchRegs(ExecContext *xc);
++
++ virtual void clearArchRegs();
++
++ virtual uint64_t readIntReg(int reg_idx);
++
++ virtual float readFloatRegSingle(int reg_idx);
++
++ virtual double readFloatRegDouble(int reg_idx);
++
++ virtual uint64_t readFloatRegInt(int reg_idx);
++
++ virtual void setIntReg(int reg_idx, uint64_t val);
++
++ virtual void setFloatRegSingle(int reg_idx, float val);
++
++ virtual void setFloatRegDouble(int reg_idx, double val);
++
++ virtual void setFloatRegInt(int reg_idx, uint64_t val);
++
++ virtual uint64_t readPC()
++ { return cpu->readPC(thread->tid); }
++
++ virtual void setPC(uint64_t val);
++
++ virtual uint64_t readNextPC()
++ { return cpu->readNextPC(thread->tid); }
++
++ virtual void setNextPC(uint64_t val);
++
++ virtual MiscReg readMiscReg(int misc_reg)
++ { return cpu->readMiscReg(misc_reg, thread->tid); }
++
++ virtual MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault)
++ { return cpu->readMiscRegWithEffect(misc_reg, fault, thread->tid); }
++
++ virtual Fault setMiscReg(int misc_reg, const MiscReg &val);
++
++ virtual Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val);
++
++ // @todo: Figure out where these store cond failures should go.
++ virtual unsigned readStCondFailures()
++ { return thread->storeCondFailures; }
++
++ virtual void setStCondFailures(unsigned sc_failures)
++ { thread->storeCondFailures = sc_failures; }
++
++#if FULL_SYSTEM
++ virtual bool inPalMode()
++ { return TheISA::PcPAL(cpu->readPC(thread->tid)); }
++#endif
++
++ // Only really makes sense for old CPU model. Lots of code
++ // outside the CPU still checks this function, so it will
++ // always return false to keep everything working.
++ virtual bool misspeculating() { return false; }
++
++#if !FULL_SYSTEM
++ virtual IntReg getSyscallArg(int i);
++
++ virtual void setSyscallArg(int i, IntReg val);
++
++ virtual void setSyscallReturn(SyscallReturn return_value);
++
++ virtual void syscall() { return cpu->syscall(thread->tid); }
++
++ virtual Counter readFuncExeInst() { return thread->funcExeInst; }
++#endif
++ };
+
+#if FULL_SYSTEM
++ /** ITB pointer. */
+ AlphaITB *itb;
++ /** DTB pointer. */
+ AlphaDTB *dtb;
+#endif
+
- //Note that the interrupt stuff from the base CPU might be somewhat
- //ISA specific (ie NumInterruptLevels). These functions might not
- //be needed in FullCPU though.
- // void post_interrupt(int int_num, int index);
- // void clear_interrupt(int int_num, int index);
- // void clear_interrupts();
-
++ /** Registers statistics. */
+ void regStats();
+
+#if FULL_SYSTEM
- // Later on may want to remove this misc stuff from the regfile and
- // have it handled at this level. Might prove to be an issue when
- // trying to rename source/destination registers...
- MiscReg readMiscReg(int misc_reg)
- {
- // Dummy function for now.
- // @todo: Fix this once reg file gets fixed.
- return 0;
- }
++ /** Translates instruction requestion. */
+ Fault translateInstReq(MemReqPtr &req)
+ {
+ return itb->translate(req);
+ }
+
++ /** Translates data read request. */
+ Fault translateDataReadReq(MemReqPtr &req)
+ {
+ return dtb->translate(req, false);
+ }
+
++ /** Translates data write request. */
+ Fault translateDataWriteReq(MemReqPtr &req)
+ {
+ return dtb->translate(req, true);
+ }
+
+#else
+ Fault dummyTranslation(MemReqPtr &req)
+ {
+#if 0
+ assert((req->vaddr >> 48 & 0xffff) == 0);
+#endif
+
+ // put the asid in the upper 16 bits of the paddr
+ req->paddr = req->vaddr & ~((Addr)0xffff << sizeof(Addr) * 8 - 16);
+ req->paddr = req->paddr | (Addr)req->asid << sizeof(Addr) * 8 - 16;
+ return NoFault;
+ }
+
++ /** Translates instruction requestion in syscall emulation mode. */
+ Fault translateInstReq(MemReqPtr &req)
+ {
+ return dummyTranslation(req);
+ }
+
++ /** Translates data read request in syscall emulation mode. */
+ Fault translateDataReadReq(MemReqPtr &req)
+ {
+ return dummyTranslation(req);
+ }
+
++ /** Translates data write request in syscall emulation mode. */
+ Fault translateDataWriteReq(MemReqPtr &req)
+ {
+ return dummyTranslation(req);
+ }
+
+#endif
++ MiscReg readMiscReg(int misc_reg, unsigned tid);
+
- Fault setMiscReg(int misc_reg, const MiscReg &val)
- {
- // Dummy function for now.
- // @todo: Fix this once reg file gets fixed.
- return NoFault;
- }
++ MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault, unsigned tid);
+
- // Most of the full system code and syscall emulation is not yet
- // implemented. These functions do show what the final interface will
- // look like.
++ Fault setMiscReg(int misc_reg, const MiscReg &val, unsigned tid);
++
++ Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val, unsigned tid);
++
++ void squashFromXC(unsigned tid);
+
- Fault hwrei();
- bool inPalMode() { return AlphaISA::PcPAL(this->regFile.readPC()); }
+#if FULL_SYSTEM
++ void post_interrupt(int int_num, int index);
++
+ int readIntrFlag();
++ /** Sets the interrupt flags. */
+ void setIntrFlag(int val);
- void trap(Fault fault);
- bool simPalCheck(int palFunc);
++ /** HW return from error interrupt. */
++ Fault hwrei(unsigned tid);
++ /** Returns if a specific PC is a PAL mode PC. */
+ bool inPalMode(uint64_t PC)
+ { return AlphaISA::PcPAL(PC); }
+
- #endif
-
-
- #if !FULL_SYSTEM
- // Need to change these into regfile calls that directly set a certain
- // register. Actually, these functions should handle most of this
- // functionality by themselves; should look up the rename and then
- // set the register.
- IntReg getSyscallArg(int i)
- {
- return this->cpuXC->readIntReg(AlphaISA::ArgumentReg0 + i);
- }
-
- // used to shift args for indirect syscall
- void setSyscallArg(int i, IntReg val)
- {
- this->cpuXC->setIntReg(AlphaISA::ArgumentReg0 + i, val);
- }
-
- void setSyscallReturn(int64_t return_value)
- {
- // check for error condition. Alpha syscall convention is to
- // indicate success/failure in reg a3 (r19) and put the
- // return value itself in the standard return value reg (v0).
- const int RegA3 = 19; // only place this is used
- if (return_value >= 0) {
- // no error
- this->cpuXC->setIntReg(RegA3, 0);
- this->cpuXC->setIntReg(AlphaISA::ReturnValueReg, return_value);
- } else {
- // got an error, return details
- this->cpuXC->setIntReg(RegA3, (IntReg) -1);
- this->cpuXC->setIntReg(AlphaISA::ReturnValueReg, -return_value);
- }
- }
-
- void syscall(short thread_num);
- void squashStages();
++ /** Traps to handle given fault. */
++ void trap(Fault fault, unsigned tid);
++ bool simPalCheck(int palFunc, unsigned tid);
+
++ /** Processes any interrupts. */
+ void processInterrupts();
- void copyToXC();
- void copyFromXC();
-
- public:
- #if FULL_SYSTEM
- bool palShadowEnabled;
-
- // Not sure this is used anywhere.
- void intr_post(RegFile *regs, Fault fault, Addr pc);
- // Actually used within exec files. Implement properly.
- void swapPALShadow(bool use_shadow);
- // Called by CPU constructor. Can implement as I please.
- void initCPU(RegFile *regs);
- // Called by initCPU. Implement as I please.
- void initIPRs(RegFile *regs);
+
++ /** Halts the CPU. */
++ void halt() { panic("Halt not implemented!\n"); }
+#endif
+
- void halt() { panic("Halt not implemented!\n"); }
+
-
++#if !FULL_SYSTEM
++ /** Executes a syscall.
++ * @todo: Determine if this needs to be virtual.
++ */
++ void syscall(int thread_num);
++ /** Gets a syscall argument. */
++ IntReg getSyscallArg(int i, int tid);
++
++ /** Used to shift args for indirect syscall. */
++ void setSyscallArg(int i, IntReg val, int tid);
++
++ /** Sets the return value of a syscall. */
++ void setSyscallReturn(SyscallReturn return_value, int tid);
+#endif
+
-
++ /** Read from memory function. */
+ template <class T>
+ Fault read(MemReqPtr &req, T &data)
+ {
++#if 0
+#if FULL_SYSTEM && THE_ISA == ALPHA_ISA
+ if (req->flags & LOCKED) {
+ req->xc->setMiscReg(TheISA::Lock_Addr_DepTag, req->paddr);
+ req->xc->setMiscReg(TheISA::Lock_Flag_DepTag, true);
+ }
+#endif
- #endif // __CPU_O3_CPU_ALPHA_FULL_CPU_HH__
++#endif
+ Fault error;
++
++#if FULL_SYSTEM
++ // @todo: Fix this LL/SC hack.
++ if (req->flags & LOCKED) {
++ lockAddr = req->paddr;
++ lockFlag = true;
++ }
++#endif
++
+ error = this->mem->read(req, data);
+ data = gtoh(data);
+ return error;
+ }
+
++ /** CPU read function, forwards read to LSQ. */
+ template <class T>
+ Fault read(MemReqPtr &req, T &data, int load_idx)
+ {
+ return this->iew.ldstQueue.read(req, data, load_idx);
+ }
+
++ /** Write to memory function. */
+ template <class T>
+ Fault write(MemReqPtr &req, T &data)
+ {
++#if 0
+#if FULL_SYSTEM && THE_ISA == ALPHA_ISA
+ ExecContext *xc;
+
+ // If this is a store conditional, act appropriately
+ if (req->flags & LOCKED) {
+ xc = req->xc;
+
+ if (req->flags & UNCACHEABLE) {
+ // Don't update result register (see stq_c in isa_desc)
+ req->result = 2;
+ xc->setStCondFailures(0);//Needed? [RGD]
+ } else {
+ bool lock_flag = xc->readMiscReg(TheISA::Lock_Flag_DepTag);
+ Addr lock_addr = xc->readMiscReg(TheISA::Lock_Addr_DepTag);
+ req->result = lock_flag;
+ if (!lock_flag ||
+ ((lock_addr & ~0xf) != (req->paddr & ~0xf))) {
+ xc->setMiscReg(TheISA::Lock_Flag_DepTag, false);
+ xc->setStCondFailures(xc->readStCondFailures() + 1);
+ if (((xc->readStCondFailures()) % 100000) == 0) {
+ std::cerr << "Warning: "
+ << xc->readStCondFailures()
+ << " consecutive store conditional failures "
+ << "on cpu " << req->xc->readCpuId()
+ << std::endl;
+ }
+ return NoFault;
+ }
+ else xc->setStCondFailures(0);
+ }
+ }
+
+ // Need to clear any locked flags on other proccessors for
+ // this address. Only do this for succsful Store Conditionals
+ // and all other stores (WH64?). Unsuccessful Store
+ // Conditionals would have returned above, and wouldn't fall
+ // through.
+ for (int i = 0; i < this->system->execContexts.size(); i++){
+ xc = this->system->execContexts[i];
+ if ((xc->readMiscReg(TheISA::Lock_Addr_DepTag) & ~0xf) ==
+ (req->paddr & ~0xf)) {
+ xc->setMiscReg(TheISA::Lock_Flag_DepTag, false);
+ }
+ }
+
++#endif
++#endif
++
++#if FULL_SYSTEM
++ // @todo: Fix this LL/SC hack.
++ if (req->flags & LOCKED) {
++ if (req->flags & UNCACHEABLE) {
++ req->result = 2;
++ } else {
++ if (this->lockFlag) {
++ req->result = 1;
++ } else {
++ req->result = 0;
++ return NoFault;
++ }
++ }
++ }
+#endif
+
+ return this->mem->write(req, (T)htog(data));
+ }
+
++ /** CPU write function, forwards write to LSQ. */
+ template <class T>
+ Fault write(MemReqPtr &req, T &data, int store_idx)
+ {
+ return this->iew.ldstQueue.write(req, data, store_idx);
+ }
+
++ Addr lockAddr;
++
++ bool lockFlag;
+};
+
++#endif // __CPU_O3_ALPHA_FULL_CPU_HH__
--- /dev/null
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
+/*
- #include "base/inifile.hh"
- #include "base/loader/symtab.hh"
- #include "base/misc.hh"
++ * Copyright (c) 2004-2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
- #include "cpu/exetrace.hh"
++#include <string>
++
+#include "cpu/base.hh"
- #include "mem/base_mem.hh"
+#include "cpu/o3/alpha_cpu.hh"
+#include "cpu/o3/alpha_impl.hh"
- #include "mem/mem_interface.hh"
++#include "cpu/o3/alpha_params.hh"
++#include "cpu/o3/fu_pool.hh"
+#include "mem/cache/base_cache.hh"
- #include "sim/debug.hh"
- #include "sim/host.hh"
- #include "sim/process.hh"
- #include "sim/sim_events.hh"
- #include "sim/sim_object.hh"
- #include "sim/stats.hh"
-
- #if FULL_SYSTEM
- #include "base/remote_gdb.hh"
- #include "mem/functional/memory_control.hh"
- #include "mem/functional/physical.hh"
- #include "sim/system.hh"
- #include "arch/tlb.hh"
- #include "arch/vtophys.hh"
- #else // !FULL_SYSTEM
- #include "mem/functional/functional.hh"
- #endif // FULL_SYSTEM
+#include "sim/builder.hh"
- DerivAlphaFullCPU(AlphaSimpleParams p)
+
+class DerivAlphaFullCPU : public AlphaFullCPU<AlphaSimpleImpl>
+{
+ public:
- #if 0
++ DerivAlphaFullCPU(AlphaSimpleParams *p)
+ : AlphaFullCPU<AlphaSimpleImpl>(p)
+ { }
+};
+
+BEGIN_DECLARE_SIM_OBJECT_PARAMS(DerivAlphaFullCPU)
+
+ Param<int> clock;
+ Param<int> numThreads;
++Param<int> activity;
+
+#if FULL_SYSTEM
+SimObjectParam<System *> system;
+Param<int> cpu_id;
+SimObjectParam<AlphaITB *> itb;
+SimObjectParam<AlphaDTB *> dtb;
+#else
+SimObjectVectorParam<Process *> workload;
++//SimObjectParam<PageTable *> page_table;
+#endif // FULL_SYSTEM
++
+SimObjectParam<FunctionalMemory *> mem;
+
++SimObjectParam<BaseCPU *> checker;
++
+Param<Counter> max_insts_any_thread;
+Param<Counter> max_insts_all_threads;
+Param<Counter> max_loads_any_thread;
+Param<Counter> max_loads_all_threads;
+
+SimObjectParam<BaseCache *> icache;
+SimObjectParam<BaseCache *> dcache;
+
++Param<unsigned> cachePorts;
++
+Param<unsigned> decodeToFetchDelay;
+Param<unsigned> renameToFetchDelay;
+Param<unsigned> iewToFetchDelay;
+Param<unsigned> commitToFetchDelay;
+Param<unsigned> fetchWidth;
+
+Param<unsigned> renameToDecodeDelay;
+Param<unsigned> iewToDecodeDelay;
+Param<unsigned> commitToDecodeDelay;
+Param<unsigned> fetchToDecodeDelay;
+Param<unsigned> decodeWidth;
+
+Param<unsigned> iewToRenameDelay;
+Param<unsigned> commitToRenameDelay;
+Param<unsigned> decodeToRenameDelay;
+Param<unsigned> renameWidth;
+
+Param<unsigned> commitToIEWDelay;
+Param<unsigned> renameToIEWDelay;
+Param<unsigned> issueToExecuteDelay;
+Param<unsigned> issueWidth;
+Param<unsigned> executeWidth;
+Param<unsigned> executeIntWidth;
+Param<unsigned> executeFloatWidth;
+Param<unsigned> executeBranchWidth;
+Param<unsigned> executeMemoryWidth;
++SimObjectParam<FUPool *> fuPool;
+
+Param<unsigned> iewToCommitDelay;
+Param<unsigned> renameToROBDelay;
+Param<unsigned> commitWidth;
+Param<unsigned> squashWidth;
++Param<Tick> trapLatency;
++Param<Tick> fetchTrapLatency;
+
- Param<unsigned> localPredictorCtrBits;
- #endif
- Param<unsigned> local_predictor_size;
- Param<unsigned> local_ctr_bits;
- Param<unsigned> local_history_table_size;
- Param<unsigned> local_history_bits;
- Param<unsigned> global_predictor_size;
- Param<unsigned> global_ctr_bits;
- Param<unsigned> global_history_bits;
- Param<unsigned> choice_predictor_size;
- Param<unsigned> choice_ctr_bits;
+Param<unsigned> localPredictorSize;
-
++Param<unsigned> localCtrBits;
++Param<unsigned> localHistoryTableSize;
++Param<unsigned> localHistoryBits;
++Param<unsigned> globalPredictorSize;
++Param<unsigned> globalCtrBits;
++Param<unsigned> globalHistoryBits;
++Param<unsigned> choicePredictorSize;
++Param<unsigned> choiceCtrBits;
+
+Param<unsigned> BTBEntries;
+Param<unsigned> BTBTagSize;
+
+Param<unsigned> RASSize;
+
+Param<unsigned> LQEntries;
+Param<unsigned> SQEntries;
+Param<unsigned> LFSTSize;
+Param<unsigned> SSITSize;
+
+Param<unsigned> numPhysIntRegs;
+Param<unsigned> numPhysFloatRegs;
+Param<unsigned> numIQEntries;
+Param<unsigned> numROBEntries;
+
++Param<unsigned> smtNumFetchingThreads;
++Param<std::string> smtFetchPolicy;
++Param<std::string> smtLSQPolicy;
++Param<unsigned> smtLSQThreshold;
++Param<std::string> smtIQPolicy;
++Param<unsigned> smtIQThreshold;
++Param<std::string> smtROBPolicy;
++Param<unsigned> smtROBThreshold;
++Param<std::string> smtCommitPolicy;
++
+Param<unsigned> instShiftAmt;
+
+Param<bool> defer_registration;
+
+Param<bool> function_trace;
+Param<Tick> function_trace_start;
+
+END_DECLARE_SIM_OBJECT_PARAMS(DerivAlphaFullCPU)
+
+BEGIN_INIT_SIM_OBJECT_PARAMS(DerivAlphaFullCPU)
+
+ INIT_PARAM(clock, "clock speed"),
+ INIT_PARAM(numThreads, "number of HW thread contexts"),
++ INIT_PARAM_DFLT(activity, "Initial activity count", 0),
+
+#if FULL_SYSTEM
+ INIT_PARAM(system, "System object"),
+ INIT_PARAM(cpu_id, "processor ID"),
+ INIT_PARAM(itb, "Instruction translation buffer"),
+ INIT_PARAM(dtb, "Data translation buffer"),
+#else
+ INIT_PARAM(workload, "Processes to run"),
++// INIT_PARAM(page_table, "Page table"),
+#endif // FULL_SYSTEM
+
+ INIT_PARAM_DFLT(mem, "Memory", NULL),
+
++ INIT_PARAM_DFLT(checker, "Checker CPU", NULL),
++
+ INIT_PARAM_DFLT(max_insts_any_thread,
+ "Terminate when any thread reaches this inst count",
+ 0),
+ INIT_PARAM_DFLT(max_insts_all_threads,
+ "Terminate when all threads have reached"
+ "this inst count",
+ 0),
+ INIT_PARAM_DFLT(max_loads_any_thread,
+ "Terminate when any thread reaches this load count",
+ 0),
+ INIT_PARAM_DFLT(max_loads_all_threads,
+ "Terminate when all threads have reached this load"
+ "count",
+ 0),
+
+ INIT_PARAM_DFLT(icache, "L1 instruction cache", NULL),
+ INIT_PARAM_DFLT(dcache, "L1 data cache", NULL),
+
++ INIT_PARAM_DFLT(cachePorts, "Cache Ports", 200),
++
+ INIT_PARAM(decodeToFetchDelay, "Decode to fetch delay"),
+ INIT_PARAM(renameToFetchDelay, "Rename to fetch delay"),
+ INIT_PARAM(iewToFetchDelay, "Issue/Execute/Writeback to fetch"
+ "delay"),
+ INIT_PARAM(commitToFetchDelay, "Commit to fetch delay"),
+ INIT_PARAM(fetchWidth, "Fetch width"),
-
- #if 0
- INIT_PARAM(localPredictorSize, "Size of the local predictor in entries. "
- "Must be a power of 2."),
- INIT_PARAM(localPredictorCtrBits, "Number of bits per counter for bpred"),
- #endif
- INIT_PARAM(local_predictor_size, "Size of local predictor"),
- INIT_PARAM(local_ctr_bits, "Bits per counter"),
- INIT_PARAM(local_history_table_size, "Size of local history table"),
- INIT_PARAM(local_history_bits, "Bits for the local history"),
- INIT_PARAM(global_predictor_size, "Size of global predictor"),
- INIT_PARAM(global_ctr_bits, "Bits per counter"),
- INIT_PARAM(global_history_bits, "Bits of history"),
- INIT_PARAM(choice_predictor_size, "Size of choice predictor"),
- INIT_PARAM(choice_ctr_bits, "Bits of choice counters"),
+ INIT_PARAM(renameToDecodeDelay, "Rename to decode delay"),
+ INIT_PARAM(iewToDecodeDelay, "Issue/Execute/Writeback to decode"
+ "delay"),
+ INIT_PARAM(commitToDecodeDelay, "Commit to decode delay"),
+ INIT_PARAM(fetchToDecodeDelay, "Fetch to decode delay"),
+ INIT_PARAM(decodeWidth, "Decode width"),
+
+ INIT_PARAM(iewToRenameDelay, "Issue/Execute/Writeback to rename"
+ "delay"),
+ INIT_PARAM(commitToRenameDelay, "Commit to rename delay"),
+ INIT_PARAM(decodeToRenameDelay, "Decode to rename delay"),
+ INIT_PARAM(renameWidth, "Rename width"),
+
+ INIT_PARAM(commitToIEWDelay, "Commit to "
+ "Issue/Execute/Writeback delay"),
+ INIT_PARAM(renameToIEWDelay, "Rename to "
+ "Issue/Execute/Writeback delay"),
+ INIT_PARAM(issueToExecuteDelay, "Issue to execute delay (internal"
+ "to the IEW stage)"),
+ INIT_PARAM(issueWidth, "Issue width"),
+ INIT_PARAM(executeWidth, "Execute width"),
+ INIT_PARAM(executeIntWidth, "Integer execute width"),
+ INIT_PARAM(executeFloatWidth, "Floating point execute width"),
+ INIT_PARAM(executeBranchWidth, "Branch execute width"),
+ INIT_PARAM(executeMemoryWidth, "Memory execute width"),
++ INIT_PARAM_DFLT(fuPool, "Functional unit pool", NULL),
+
+ INIT_PARAM(iewToCommitDelay, "Issue/Execute/Writeback to commit "
+ "delay"),
+ INIT_PARAM(renameToROBDelay, "Rename to reorder buffer delay"),
+ INIT_PARAM(commitWidth, "Commit width"),
+ INIT_PARAM(squashWidth, "Squash width"),
- AlphaSimpleParams params;
++ INIT_PARAM_DFLT(trapLatency, "Number of cycles before the trap is handled", 6),
++ INIT_PARAM_DFLT(fetchTrapLatency, "Number of cycles before the fetch trap is handled", 12),
++
++ INIT_PARAM(localPredictorSize, "Size of local predictor"),
++ INIT_PARAM(localCtrBits, "Bits per counter"),
++ INIT_PARAM(localHistoryTableSize, "Size of local history table"),
++ INIT_PARAM(localHistoryBits, "Bits for the local history"),
++ INIT_PARAM(globalPredictorSize, "Size of global predictor"),
++ INIT_PARAM(globalCtrBits, "Bits per counter"),
++ INIT_PARAM(globalHistoryBits, "Bits of history"),
++ INIT_PARAM(choicePredictorSize, "Size of choice predictor"),
++ INIT_PARAM(choiceCtrBits, "Bits of choice counters"),
+
+ INIT_PARAM(BTBEntries, "Number of BTB entries"),
+ INIT_PARAM(BTBTagSize, "Size of the BTB tags, in bits"),
+
+ INIT_PARAM(RASSize, "RAS size"),
+
+ INIT_PARAM(LQEntries, "Number of load queue entries"),
+ INIT_PARAM(SQEntries, "Number of store queue entries"),
+ INIT_PARAM(LFSTSize, "Last fetched store table size"),
+ INIT_PARAM(SSITSize, "Store set ID table size"),
+
+ INIT_PARAM(numPhysIntRegs, "Number of physical integer registers"),
+ INIT_PARAM(numPhysFloatRegs, "Number of physical floating point "
+ "registers"),
+ INIT_PARAM(numIQEntries, "Number of instruction queue entries"),
+ INIT_PARAM(numROBEntries, "Number of reorder buffer entries"),
+
++ INIT_PARAM_DFLT(smtNumFetchingThreads, "SMT Number of Fetching Threads", 1),
++ INIT_PARAM_DFLT(smtFetchPolicy, "SMT Fetch Policy", "SingleThread"),
++ INIT_PARAM_DFLT(smtLSQPolicy, "SMT LSQ Sharing Policy", "Partitioned"),
++ INIT_PARAM_DFLT(smtLSQThreshold,"SMT LSQ Threshold", 100),
++ INIT_PARAM_DFLT(smtIQPolicy, "SMT IQ Policy", "Partitioned"),
++ INIT_PARAM_DFLT(smtIQThreshold, "SMT IQ Threshold", 100),
++ INIT_PARAM_DFLT(smtROBPolicy, "SMT ROB Sharing Policy", "Partitioned"),
++ INIT_PARAM_DFLT(smtROBThreshold,"SMT ROB Threshold", 100),
++ INIT_PARAM_DFLT(smtCommitPolicy,"SMT Commit Fetch Policy", "RoundRobin"),
++
+ INIT_PARAM(instShiftAmt, "Number of bits to shift instructions by"),
+ INIT_PARAM(defer_registration, "defer system registration (for sampling)"),
+
+ INIT_PARAM(function_trace, "Enable function trace"),
+ INIT_PARAM(function_trace_start, "Cycle to start function trace")
+
+END_INIT_SIM_OBJECT_PARAMS(DerivAlphaFullCPU)
+
+CREATE_SIM_OBJECT(DerivAlphaFullCPU)
+{
+ DerivAlphaFullCPU *cpu;
+
+#if FULL_SYSTEM
+ // Full-system only supports a single thread for the moment.
+ int actual_num_threads = 1;
+#else
+ // In non-full-system mode, we infer the number of threads from
+ // the workload if it's not explicitly specified.
+ int actual_num_threads =
+ numThreads.isValid() ? numThreads : workload.size();
+
+ if (workload.size() == 0) {
+ fatal("Must specify at least one workload!");
+ }
+
+#endif
+
- params.clock = clock;
++ AlphaSimpleParams *params = new AlphaSimpleParams;
+
- params.name = getInstanceName();
- params.numberOfThreads = actual_num_threads;
++ params->clock = clock;
+
- params.system = system;
- params.cpu_id = cpu_id;
- params.itb = itb;
- params.dtb = dtb;
++ params->name = getInstanceName();
++ params->numberOfThreads = actual_num_threads;
++ params->activity = activity;
+
+#if FULL_SYSTEM
- params.workload = workload;
++ params->system = system;
++ params->cpu_id = cpu_id;
++ params->itb = itb;
++ params->dtb = dtb;
+#else
- params.mem = mem;
++ params->workload = workload;
++// params->pTable = page_table;
+#endif // FULL_SYSTEM
+
- params.max_insts_any_thread = max_insts_any_thread;
- params.max_insts_all_threads = max_insts_all_threads;
- params.max_loads_any_thread = max_loads_any_thread;
- params.max_loads_all_threads = max_loads_all_threads;
++ params->mem = mem;
+
- params.icacheInterface = icache ? icache->getInterface() : NULL;
- params.dcacheInterface = dcache ? dcache->getInterface() : NULL;
-
- params.decodeToFetchDelay = decodeToFetchDelay;
- params.renameToFetchDelay = renameToFetchDelay;
- params.iewToFetchDelay = iewToFetchDelay;
- params.commitToFetchDelay = commitToFetchDelay;
- params.fetchWidth = fetchWidth;
-
- params.renameToDecodeDelay = renameToDecodeDelay;
- params.iewToDecodeDelay = iewToDecodeDelay;
- params.commitToDecodeDelay = commitToDecodeDelay;
- params.fetchToDecodeDelay = fetchToDecodeDelay;
- params.decodeWidth = decodeWidth;
-
- params.iewToRenameDelay = iewToRenameDelay;
- params.commitToRenameDelay = commitToRenameDelay;
- params.decodeToRenameDelay = decodeToRenameDelay;
- params.renameWidth = renameWidth;
-
- params.commitToIEWDelay = commitToIEWDelay;
- params.renameToIEWDelay = renameToIEWDelay;
- params.issueToExecuteDelay = issueToExecuteDelay;
- params.issueWidth = issueWidth;
- params.executeWidth = executeWidth;
- params.executeIntWidth = executeIntWidth;
- params.executeFloatWidth = executeFloatWidth;
- params.executeBranchWidth = executeBranchWidth;
- params.executeMemoryWidth = executeMemoryWidth;
-
- params.iewToCommitDelay = iewToCommitDelay;
- params.renameToROBDelay = renameToROBDelay;
- params.commitWidth = commitWidth;
- params.squashWidth = squashWidth;
- #if 0
- params.localPredictorSize = localPredictorSize;
- params.localPredictorCtrBits = localPredictorCtrBits;
- #endif
- params.local_predictor_size = local_predictor_size;
- params.local_ctr_bits = local_ctr_bits;
- params.local_history_table_size = local_history_table_size;
- params.local_history_bits = local_history_bits;
- params.global_predictor_size = global_predictor_size;
- params.global_ctr_bits = global_ctr_bits;
- params.global_history_bits = global_history_bits;
- params.choice_predictor_size = choice_predictor_size;
- params.choice_ctr_bits = choice_ctr_bits;
-
- params.BTBEntries = BTBEntries;
- params.BTBTagSize = BTBTagSize;
-
- params.RASSize = RASSize;
-
- params.LQEntries = LQEntries;
- params.SQEntries = SQEntries;
- params.SSITSize = SSITSize;
- params.LFSTSize = LFSTSize;
-
- params.numPhysIntRegs = numPhysIntRegs;
- params.numPhysFloatRegs = numPhysFloatRegs;
- params.numIQEntries = numIQEntries;
- params.numROBEntries = numROBEntries;
-
- params.instShiftAmt = 2;
-
- params.defReg = defer_registration;
-
- params.functionTrace = function_trace;
- params.functionTraceStart = function_trace_start;
++ params->checker = checker;
++
++ params->max_insts_any_thread = max_insts_any_thread;
++ params->max_insts_all_threads = max_insts_all_threads;
++ params->max_loads_any_thread = max_loads_any_thread;
++ params->max_loads_all_threads = max_loads_all_threads;
+
+ //
+ // Caches
+ //
++ params->icacheInterface = icache ? icache->getInterface() : NULL;
++ params->dcacheInterface = dcache ? dcache->getInterface() : NULL;
++ params->cachePorts = cachePorts;
++
++ params->decodeToFetchDelay = decodeToFetchDelay;
++ params->renameToFetchDelay = renameToFetchDelay;
++ params->iewToFetchDelay = iewToFetchDelay;
++ params->commitToFetchDelay = commitToFetchDelay;
++ params->fetchWidth = fetchWidth;
++
++ params->renameToDecodeDelay = renameToDecodeDelay;
++ params->iewToDecodeDelay = iewToDecodeDelay;
++ params->commitToDecodeDelay = commitToDecodeDelay;
++ params->fetchToDecodeDelay = fetchToDecodeDelay;
++ params->decodeWidth = decodeWidth;
++
++ params->iewToRenameDelay = iewToRenameDelay;
++ params->commitToRenameDelay = commitToRenameDelay;
++ params->decodeToRenameDelay = decodeToRenameDelay;
++ params->renameWidth = renameWidth;
++
++ params->commitToIEWDelay = commitToIEWDelay;
++ params->renameToIEWDelay = renameToIEWDelay;
++ params->issueToExecuteDelay = issueToExecuteDelay;
++ params->issueWidth = issueWidth;
++ params->executeWidth = executeWidth;
++ params->executeIntWidth = executeIntWidth;
++ params->executeFloatWidth = executeFloatWidth;
++ params->executeBranchWidth = executeBranchWidth;
++ params->executeMemoryWidth = executeMemoryWidth;
++ params->fuPool = fuPool;
++
++ params->iewToCommitDelay = iewToCommitDelay;
++ params->renameToROBDelay = renameToROBDelay;
++ params->commitWidth = commitWidth;
++ params->squashWidth = squashWidth;
++ params->trapLatency = trapLatency;
++ params->fetchTrapLatency = fetchTrapLatency;
++
++ params->localPredictorSize = localPredictorSize;
++ params->localCtrBits = localCtrBits;
++ params->localHistoryTableSize = localHistoryTableSize;
++ params->localHistoryBits = localHistoryBits;
++ params->globalPredictorSize = globalPredictorSize;
++ params->globalCtrBits = globalCtrBits;
++ params->globalHistoryBits = globalHistoryBits;
++ params->choicePredictorSize = choicePredictorSize;
++ params->choiceCtrBits = choiceCtrBits;
++
++ params->BTBEntries = BTBEntries;
++ params->BTBTagSize = BTBTagSize;
++
++ params->RASSize = RASSize;
++
++ params->LQEntries = LQEntries;
++ params->SQEntries = SQEntries;
++
++ params->SSITSize = SSITSize;
++ params->LFSTSize = LFSTSize;
++
++ params->numPhysIntRegs = numPhysIntRegs;
++ params->numPhysFloatRegs = numPhysFloatRegs;
++ params->numIQEntries = numIQEntries;
++ params->numROBEntries = numROBEntries;
++
++ params->smtNumFetchingThreads = smtNumFetchingThreads;
++ params->smtFetchPolicy = smtFetchPolicy;
++ params->smtIQPolicy = smtIQPolicy;
++ params->smtLSQPolicy = smtLSQPolicy;
++ params->smtLSQThreshold = smtLSQThreshold;
++ params->smtROBPolicy = smtROBPolicy;
++ params->smtROBThreshold = smtROBThreshold;
++ params->smtCommitPolicy = smtCommitPolicy;
++
++ params->instShiftAmt = 2;
++
++ params->deferRegistration = defer_registration;
++
++ params->functionTrace = function_trace;
++ params->functionTraceStart = function_trace_start;
+
+ cpu = new DerivAlphaFullCPU(params);
+
+ return cpu;
+}
+
+REGISTER_SIM_OBJECT("DerivAlphaFullCPU", DerivAlphaFullCPU)
+
--- /dev/null
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
+/*
- #include "mem/cache/cache.hh" // for dynamic cast
++ * Copyright (c) 2004-2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "arch/alpha/faults.hh"
+#include "base/cprintf.hh"
+#include "base/statistics.hh"
+#include "base/timebuf.hh"
- #include "sim/builder.hh"
++#include "cpu/checker/exec_context.hh"
+#include "mem/mem_interface.hh"
- #include "arch/alpha/isa_traits.hh"
+#include "sim/sim_events.hh"
+#include "sim/stats.hh"
+
+#include "cpu/o3/alpha_cpu.hh"
+#include "cpu/o3/alpha_params.hh"
+#include "cpu/o3/comm.hh"
++#include "cpu/o3/thread_state.hh"
+
+#if FULL_SYSTEM
+#include "arch/alpha/osfpal.hh"
- AlphaFullCPU<Impl>::AlphaFullCPU(Params ¶ms)
++#include "arch/isa_traits.hh"
++#include "cpu/quiesce_event.hh"
++#include "kern/kernel_stats.hh"
+#endif
+
++using namespace TheISA;
++
+template <class Impl>
- #if !FULL_SYSTEM
++AlphaFullCPU<Impl>::AlphaFullCPU(Params *params)
++#if FULL_SYSTEM
++ : FullO3CPU<Impl>(params), itb(params->itb), dtb(params->dtb)
++#else
+ : FullO3CPU<Impl>(params)
++#endif
+{
+ DPRINTF(FullCPU, "AlphaFullCPU: Creating AlphaFullCPU object.\n");
+
++ this->thread.resize(this->numThreads);
++
++ for (int i = 0; i < this->numThreads; ++i) {
++#if FULL_SYSTEM
++ assert(this->numThreads == 1);
++ this->thread[i] = new Thread(this, 0, params->mem);
++ this->thread[i]->setStatus(ExecContext::Suspended);
++#else
++ if (i < params->workload.size()) {
++ DPRINTF(FullCPU, "FullCPU: Workload[%i]'s starting PC is %#x, "
++ "process is %#x",
++ i, params->workload[i]->prog_entry, this->thread[i]);
++ this->thread[i] = new Thread(this, i, params->workload[i], i);
++ assert(params->workload[i]->getMemory() != NULL);
++
++ this->thread[i]->setStatus(ExecContext::Suspended);
++ //usedTids[i] = true;
++ //threadMap[i] = i;
++ } else {
++ //Allocate Empty execution context so M5 can use later
++ //when scheduling threads to CPU
++ Process* dummy_proc = NULL;
++
++ this->thread[i] = new Thread(this, i, dummy_proc, i);
++ //usedTids[i] = false;
++ }
++#endif // !FULL_SYSTEM
++
++ this->thread[i]->numInst = 0;
++
++ ExecContext *xc_proxy;
++
++ AlphaXC *alpha_xc_proxy = new AlphaXC;
++
++ if (params->checker) {
++ xc_proxy = new CheckerExecContext<AlphaXC>(alpha_xc_proxy, this->checker);
++ } else {
++ xc_proxy = alpha_xc_proxy;
++ }
++
++ alpha_xc_proxy->cpu = this;
++ alpha_xc_proxy->thread = this->thread[i];
++
++#if FULL_SYSTEM
++ this->thread[i]->quiesceEvent =
++ new EndQuiesceEvent(xc_proxy);
++ this->thread[i]->lastActivate = 0;
++ this->thread[i]->lastSuspend = 0;
++#endif
++ this->thread[i]->xcProxy = xc_proxy;
++
++ this->execContexts.push_back(xc_proxy);
++ }
++
++
++ for (int i=0; i < this->numThreads; i++) {
++ this->thread[i]->funcExeInst = 0;
++ }
++
++ // Sets CPU pointers. These must be set at this level because the CPU
++ // pointers are defined to be the highest level of CPU class.
+ this->fetch.setCPU(this);
+ this->decode.setCPU(this);
+ this->rename.setCPU(this);
+ this->iew.setCPU(this);
+ this->commit.setCPU(this);
+
+ this->rob.setCPU(this);
++ this->regFile.setCPU(this);
++
++ lockAddr = 0;
++ lockFlag = false;
+}
+
+template <class Impl>
+void
+AlphaFullCPU<Impl>::regStats()
+{
+ // Register stats for everything that has stats.
+ this->fullCPURegStats();
+ this->fetch.regStats();
+ this->decode.regStats();
+ this->rename.regStats();
+ this->iew.regStats();
+ this->commit.regStats();
+}
+
- // Will probably need to know which thread is calling syscall
- // Will need to pass that information in to the DynInst when it is constructed,
- // so that this call can be made with the proper thread number.
++#if FULL_SYSTEM
++template <class Impl>
++void
++AlphaFullCPU<Impl>::AlphaXC::dumpFuncProfile()
++{
++ // Currently not supported
++}
++#endif
+
- AlphaFullCPU<Impl>::syscall(short thread_num)
+template <class Impl>
+void
- DPRINTF(FullCPU, "AlphaFullCPU: Syscall() called.\n\n");
++AlphaFullCPU<Impl>::AlphaXC::takeOverFrom(ExecContext *old_context)
+{
- // Commit stage needs to run as well.
- this->commit.tick();
++ // some things should already be set up
++ assert(getMemPtr() == old_context->getMemPtr());
++#if FULL_SYSTEM
++ assert(getSystemPtr() == old_context->getSystemPtr());
++#else
++ assert(getProcessPtr() == old_context->getProcessPtr());
++#endif
+
- squashStages();
++ // copy over functional state
++ setStatus(old_context->status());
++ copyArchRegs(old_context);
++ setCpuId(old_context->readCpuId());
++#if !FULL_SYSTEM
++ thread->funcExeInst = old_context->readFuncExeInst();
++#else
++ EndQuiesceEvent *other_quiesce = old_context->getQuiesceEvent();
++ if (other_quiesce) {
++ // Point the quiesce event's XC at this XC so that it wakes up
++ // the proper CPU.
++ other_quiesce->xc = this;
++ }
++ if (thread->quiesceEvent) {
++ thread->quiesceEvent->xc = this;
++ }
+
- // Temporarily increase this by one to account for the syscall
- // instruction.
- ++(this->funcExeInst);
++ // Transfer kernel stats from one CPU to the other.
++ thread->kernelStats = old_context->getKernelStats();
++// storeCondFailures = 0;
++ cpu->lockFlag = false;
++#endif
+
- // Copy over all important state to xc once all the unrolling is done.
- copyToXC();
++ old_context->setStatus(ExecContext::Unallocated);
++
++ thread->inSyscall = false;
++ thread->trapPending = false;
++}
++
++template <class Impl>
++void
++AlphaFullCPU<Impl>::AlphaXC::activate(int delay)
++{
++ DPRINTF(FullCPU, "Calling activate on AlphaXC\n");
+
- // This is hardcoded to thread 0 while the CPU is only single threaded.
- this->thread[0]->syscall();
++ if (thread->status() == ExecContext::Active)
++ return;
+
- // Copy over all important state back to CPU.
- copyFromXC();
++#if FULL_SYSTEM
++ thread->lastActivate = curTick;
++#endif
+
- // Decrease funcExeInst by one as the normal commit will handle
- // incrememnting it.
- --(this->funcExeInst);
++ if (thread->status() == ExecContext::Unallocated) {
++ cpu->activateWhenReady(thread->tid);
++ return;
++ }
+
- // This is not a pretty function, and should only be used if it is necessary
- // to fake having everything squash all at once (ie for non-full system
- // syscalls). Maybe put this at the FullCPU level?
++ thread->setStatus(ExecContext::Active);
++
++ // status() == Suspended
++ cpu->activateContext(thread->tid, delay);
+}
+
- AlphaFullCPU<Impl>::squashStages()
+template <class Impl>
+void
- InstSeqNum rob_head = this->rob.readHeadSeqNum();
++AlphaFullCPU<Impl>::AlphaXC::suspend()
+{
- // Now hack the time buffer to put this sequence number in the places
- // where the stages might read it.
- for (int i = 0; i < 5; ++i)
- {
- this->timeBuffer.access(-i)->commitInfo.doneSeqNum = rob_head;
++ DPRINTF(FullCPU, "Calling suspend on AlphaXC\n");
+
- this->fetch.squash(this->rob.readHeadNextPC());
- this->fetchQueue.advance();
++ if (thread->status() == ExecContext::Suspended)
++ return;
++
++#if FULL_SYSTEM
++ thread->lastActivate = curTick;
++ thread->lastSuspend = curTick;
++#endif
++/*
++#if FULL_SYSTEM
++ // Don't change the status from active if there are pending interrupts
++ if (cpu->check_interrupts()) {
++ assert(status() == ExecContext::Active);
++ return;
+ }
++#endif
++*/
++ thread->setStatus(ExecContext::Suspended);
++ cpu->suspendContext(thread->tid);
++}
+
- this->decode.squash();
- this->decodeQueue.advance();
++template <class Impl>
++void
++AlphaFullCPU<Impl>::AlphaXC::deallocate()
++{
++ DPRINTF(FullCPU, "Calling deallocate on AlphaXC\n");
+
- this->rename.squash();
- this->renameQueue.advance();
- this->renameQueue.advance();
++ if (thread->status() == ExecContext::Unallocated)
++ return;
+
- // Be sure to advance the IEW queues so that the commit stage doesn't
- // try to set an instruction as completed at the same time that it
- // might be deleting it.
- this->iew.squash();
- this->iewQueue.advance();
- this->iewQueue.advance();
- // Needs to tell the LSQ to write back all of its data
- this->iew.lsqWriteback();
++ thread->setStatus(ExecContext::Unallocated);
++ cpu->deallocateContext(thread->tid);
++}
+
- this->rob.squash(rob_head);
- this->commit.setSquashing();
++template <class Impl>
++void
++AlphaFullCPU<Impl>::AlphaXC::halt()
++{
++ DPRINTF(FullCPU, "Calling halt on AlphaXC\n");
+
- // Now hack the time buffer to clear the sequence numbers in the places
- // where the stages might read it.?
- for (int i = 0; i < 5; ++i)
- {
- this->timeBuffer.access(-i)->commitInfo.doneSeqNum = 0;
- }
++ if (thread->status() == ExecContext::Halted)
++ return;
+
- #endif // FULL_SYSTEM
++ thread->setStatus(ExecContext::Halted);
++ cpu->haltContext(thread->tid);
++}
+
++template <class Impl>
++void
++AlphaFullCPU<Impl>::AlphaXC::regStats(const std::string &name)
++{
++#if FULL_SYSTEM
++ thread->kernelStats = new Kernel::Statistics(cpu->system);
++ thread->kernelStats->regStats(name + ".kern");
++#endif
+}
+
- AlphaFullCPU<Impl>::copyToXC()
++template <class Impl>
++void
++AlphaFullCPU<Impl>::AlphaXC::serialize(std::ostream &os)
++{
++#if FULL_SYSTEM
++ if (thread->kernelStats)
++ thread->kernelStats->serialize(os);
++#endif
++
++}
+
+template <class Impl>
+void
- PhysRegIndex renamed_reg;
++AlphaFullCPU<Impl>::AlphaXC::unserialize(Checkpoint *cp, const std::string §ion)
+{
- // First loop through the integer registers.
- for (int i = 0; i < AlphaISA::NumIntRegs; ++i)
- {
- renamed_reg = this->renameMap.lookup(i);
- this->cpuXC->setIntReg(i, this->regFile.readIntReg(renamed_reg));
- DPRINTF(FullCPU, "FullCPU: Copying register %i, has data %lli.\n",
- renamed_reg, this->regFile.intRegFile[renamed_reg]);
- }
++#if FULL_SYSTEM
++ if (thread->kernelStats)
++ thread->kernelStats->unserialize(cp, section);
++#endif
+
- // Then loop through the floating point registers.
- for (int i = 0; i < AlphaISA::NumFloatRegs; ++i)
- {
- renamed_reg = this->renameMap.lookup(i + AlphaISA::FP_Base_DepTag);
- this->cpuXC->setFloatRegBits(i,
- this->regFile.readFloatRegBits(renamed_reg));
- }
++}
+
- this->cpuXC->setMiscReg(AlphaISA::Fpcr_DepTag,
- this->regFile.readMiscReg(AlphaISA::Fpcr_DepTag));
- this->cpuXC->setMiscReg(AlphaISA::Uniq_DepTag,
- this->regFile.readMiscReg(AlphaISA::Uniq_DepTag));
- this->cpuXC->setMiscReg(AlphaISA::Lock_Flag_DepTag,
- this->regFile.readMiscReg(AlphaISA::Lock_Flag_DepTag));
- this->cpuXC->setMiscReg(AlphaISA::Lock_Addr_DepTag,
- this->regFile.readMiscReg(AlphaISA::Lock_Addr_DepTag));
++#if FULL_SYSTEM
++template <class Impl>
++EndQuiesceEvent *
++AlphaFullCPU<Impl>::AlphaXC::getQuiesceEvent()
++{
++ return thread->quiesceEvent;
++}
+
- this->cpuXC->setPC(this->rob.readHeadPC());
- this->cpuXC->setNextPC(this->cpuXC->readPC()+4);
++template <class Impl>
++Tick
++AlphaFullCPU<Impl>::AlphaXC::readLastActivate()
++{
++ return thread->lastActivate;
++}
+
- #if !FULL_SYSTEM
- this->cpuXC->setFuncExeInst(this->funcExeInst);
++template <class Impl>
++Tick
++AlphaFullCPU<Impl>::AlphaXC::readLastSuspend()
++{
++ return thread->lastSuspend;
++}
+
- // This function will probably mess things up unless the ROB is empty and
- // there are no instructions in the pipeline.
++template <class Impl>
++void
++AlphaFullCPU<Impl>::AlphaXC::profileClear()
++{}
++
++template <class Impl>
++void
++AlphaFullCPU<Impl>::AlphaXC::profileSample()
++{}
+#endif
++
++template <class Impl>
++TheISA::MachInst
++AlphaFullCPU<Impl>::AlphaXC:: getInst()
++{
++ return thread->inst;
+}
+
- AlphaFullCPU<Impl>::copyFromXC()
+template <class Impl>
+void
- for (int i = 0; i < AlphaISA::NumIntRegs; ++i)
- {
- renamed_reg = this->renameMap.lookup(i);
++AlphaFullCPU<Impl>::AlphaXC::copyArchRegs(ExecContext *xc)
+{
++ // This function will mess things up unless the ROB is empty and
++ // there are no instructions in the pipeline.
++ unsigned tid = thread->tid;
+ PhysRegIndex renamed_reg;
+
+ // First loop through the integer registers.
- renamed_reg, this->regFile.intRegFile[renamed_reg],
- this->cpuXC->readIntReg(i));
++ for (int i = 0; i < AlphaISA::NumIntRegs; ++i) {
++ renamed_reg = cpu->renameMap[tid].lookup(i);
+
+ DPRINTF(FullCPU, "FullCPU: Copying over register %i, had data %lli, "
+ "now has data %lli.\n",
- this->regFile.setIntReg(renamed_reg, this->cpuXC->readIntReg(i));
++ renamed_reg, cpu->readIntReg(renamed_reg),
++ xc->readIntReg(i));
+
- this->regFile.setFloatRegBits(renamed_reg,
- this->cpuXC->readFloatRegBits(i));
++ cpu->setIntReg(renamed_reg, xc->readIntReg(i));
+ }
+
+ // Then loop through the floating point registers.
+ for (int i = 0; i < AlphaISA::NumFloatRegs; ++i)
+ {
+ renamed_reg = this->renameMap.lookup(i + AlphaISA::FP_Base_DepTag);
- // Then loop through the misc registers.
- this->regFile.setMiscReg(AlphaISA::Fpcr_DepTag,
- this->cpuXC->readMiscReg(AlphaISA::Fpcr_DepTag));
- this->regFile.setMiscReg(AlphaISA::Uniq_DepTag,
- this->cpuXC->readMiscReg(AlphaISA::Uniq_DepTag));
- this->regFile.setMiscReg(AlphaISA::Lock_Flag_DepTag,
- this->cpuXC->readMiscReg(AlphaISA::Lock_Flag_DepTag));
- this->regFile.setMiscReg(AlphaISA::Lock_Addr_DepTag,
- this->cpuXC->readMiscReg(AlphaISA::Lock_Addr_DepTag));
++ this->cpuXC->setFloatRegBits(i,
++ this->regFile.readFloatRegBits(renamed_reg));
+ }
+
- // regFile.pc = cpuXC->regs.pc;
- // regFile.npc = cpuXC->regs.npc;
++ // Copy the misc regs.
++ cpu->regFile.miscRegs[tid].copyMiscRegs(xc);
+
+ // Then finally set the PC and the next PC.
- this->funcExeInst = this->cpuXC->readFuncExeInst();
++ cpu->setPC(xc->readPC(), tid);
++ cpu->setNextPC(xc->readNextPC(), tid);
+#if !FULL_SYSTEM
- // Can force commit stage to squash and stuff.
++ this->thread->funcExeInst = xc->readFuncExeInst();
+#endif
+}
+
++template <class Impl>
++void
++AlphaFullCPU<Impl>::AlphaXC::clearArchRegs()
++{}
++
++template <class Impl>
++uint64_t
++AlphaFullCPU<Impl>::AlphaXC::readIntReg(int reg_idx)
++{
++ DPRINTF(Fault, "Reading int register through the XC!\n");
++ return cpu->readArchIntReg(reg_idx, thread->tid);
++}
++
++template <class Impl>
++float
++AlphaFullCPU<Impl>::AlphaXC::readFloatRegSingle(int reg_idx)
++{
++ DPRINTF(Fault, "Reading float register through the XC!\n");
++ return cpu->readArchFloatRegSingle(reg_idx, thread->tid);
++}
++
++template <class Impl>
++double
++AlphaFullCPU<Impl>::AlphaXC::readFloatRegDouble(int reg_idx)
++{
++ DPRINTF(Fault, "Reading float register through the XC!\n");
++ return cpu->readArchFloatRegDouble(reg_idx, thread->tid);
++}
++
++template <class Impl>
++uint64_t
++AlphaFullCPU<Impl>::AlphaXC::readFloatRegInt(int reg_idx)
++{
++ DPRINTF(Fault, "Reading floatint register through the XC!\n");
++ return cpu->readArchFloatRegInt(reg_idx, thread->tid);
++}
++
++template <class Impl>
++void
++AlphaFullCPU<Impl>::AlphaXC::setIntReg(int reg_idx, uint64_t val)
++{
++ DPRINTF(Fault, "Setting int register through the XC!\n");
++ cpu->setArchIntReg(reg_idx, val, thread->tid);
++
++ if (!thread->trapPending && !thread->inSyscall) {
++ cpu->squashFromXC(thread->tid);
++ }
++}
++
++template <class Impl>
++void
++AlphaFullCPU<Impl>::AlphaXC::setFloatRegSingle(int reg_idx, float val)
++{
++ DPRINTF(Fault, "Setting float register through the XC!\n");
++ cpu->setArchFloatRegSingle(reg_idx, val, thread->tid);
++
++ if (!thread->trapPending && !thread->inSyscall) {
++ cpu->squashFromXC(thread->tid);
++ }
++}
++
++template <class Impl>
++void
++AlphaFullCPU<Impl>::AlphaXC::setFloatRegDouble(int reg_idx, double val)
++{
++ DPRINTF(Fault, "Setting float register through the XC!\n");
++ cpu->setArchFloatRegDouble(reg_idx, val, thread->tid);
++
++ if (!thread->trapPending && !thread->inSyscall) {
++ cpu->squashFromXC(thread->tid);
++ }
++}
++
++template <class Impl>
++void
++AlphaFullCPU<Impl>::AlphaXC::setFloatRegInt(int reg_idx, uint64_t val)
++{
++ DPRINTF(Fault, "Setting floatint register through the XC!\n");
++ cpu->setArchFloatRegInt(reg_idx, val, thread->tid);
++
++ if (!thread->trapPending && !thread->inSyscall) {
++ cpu->squashFromXC(thread->tid);
++ }
++}
++
++template <class Impl>
++void
++AlphaFullCPU<Impl>::AlphaXC::setPC(uint64_t val)
++{
++ cpu->setPC(val, thread->tid);
++
++ if (!thread->trapPending && !thread->inSyscall) {
++ cpu->squashFromXC(thread->tid);
++ }
++}
++
++template <class Impl>
++void
++AlphaFullCPU<Impl>::AlphaXC::setNextPC(uint64_t val)
++{
++ cpu->setNextPC(val, thread->tid);
++
++ if (!thread->trapPending && !thread->inSyscall) {
++ cpu->squashFromXC(thread->tid);
++ }
++}
++
++template <class Impl>
++Fault
++AlphaFullCPU<Impl>::AlphaXC::setMiscReg(int misc_reg, const MiscReg &val)
++{
++ DPRINTF(Fault, "Setting misc register through the XC!\n");
++
++ Fault ret_fault = cpu->setMiscReg(misc_reg, val, thread->tid);
++
++ if (!thread->trapPending && !thread->inSyscall) {
++ cpu->squashFromXC(thread->tid);
++ }
++
++ return ret_fault;
++}
++
++template <class Impl>
++Fault
++AlphaFullCPU<Impl>::AlphaXC::setMiscRegWithEffect(int misc_reg, const MiscReg &val)
++{
++ DPRINTF(Fault, "Setting misc register through the XC!\n");
++
++ Fault ret_fault = cpu->setMiscRegWithEffect(misc_reg, val, thread->tid);
++
++ if (!thread->trapPending && !thread->inSyscall) {
++ cpu->squashFromXC(thread->tid);
++ }
++
++ return ret_fault;
++}
++
++#if !FULL_SYSTEM
++
++template <class Impl>
++TheISA::IntReg
++AlphaFullCPU<Impl>::AlphaXC::getSyscallArg(int i)
++{
++ return cpu->getSyscallArg(i, thread->tid);
++}
++
++template <class Impl>
++void
++AlphaFullCPU<Impl>::AlphaXC::setSyscallArg(int i, IntReg val)
++{
++ cpu->setSyscallArg(i, val, thread->tid);
++}
++
++template <class Impl>
++void
++AlphaFullCPU<Impl>::AlphaXC::setSyscallReturn(SyscallReturn return_value)
++{
++ cpu->setSyscallReturn(return_value, thread->tid);
++}
++
++#endif // FULL_SYSTEM
++
++template <class Impl>
++MiscReg
++AlphaFullCPU<Impl>::readMiscReg(int misc_reg, unsigned tid)
++{
++ return this->regFile.readMiscReg(misc_reg, tid);
++}
++
++template <class Impl>
++MiscReg
++AlphaFullCPU<Impl>::readMiscRegWithEffect(int misc_reg, Fault &fault,
++ unsigned tid)
++{
++ return this->regFile.readMiscRegWithEffect(misc_reg, fault, tid);
++}
++
++template <class Impl>
++Fault
++AlphaFullCPU<Impl>::setMiscReg(int misc_reg, const MiscReg &val, unsigned tid)
++{
++ return this->regFile.setMiscReg(misc_reg, val, tid);
++}
++
++template <class Impl>
++Fault
++AlphaFullCPU<Impl>::setMiscRegWithEffect(int misc_reg, const MiscReg &val,
++ unsigned tid)
++{
++ return this->regFile.setMiscRegWithEffect(misc_reg, val, tid);
++}
++
++template <class Impl>
++void
++AlphaFullCPU<Impl>::squashFromXC(unsigned tid)
++{
++ this->thread[tid]->inSyscall = true;
++ this->commit.generateXCEvent(tid);
++}
++
+#if FULL_SYSTEM
+
++template <class Impl>
++void
++AlphaFullCPU<Impl>::post_interrupt(int int_num, int index)
++{
++ BaseCPU::post_interrupt(int_num, index);
++
++ if (this->thread[0]->status() == ExecContext::Suspended) {
++ DPRINTF(IPI,"Suspended Processor awoke\n");
++// xcProxies[0]->activate();
++ this->execContexts[0]->activate();
++ }
++}
++
+template <class Impl>
+int
+AlphaFullCPU<Impl>::readIntrFlag()
+{
+ return this->regFile.readIntrFlag();
+}
+
+template <class Impl>
+void
+AlphaFullCPU<Impl>::setIntrFlag(int val)
+{
+ this->regFile.setIntrFlag(val);
+}
+
- AlphaFullCPU<Impl>::hwrei()
+template <class Impl>
+Fault
- if (!inPalMode())
- return new AlphaISA::UnimplementedOpcodeFault;
-
- this->setNextPC(this->regFile.miscRegs.readReg(AlphaISA::IPR_EXC_ADDR));
++AlphaFullCPU<Impl>::hwrei(unsigned tid)
+{
- // kernelStats.hwrei();
-
- if ((this->regFile.miscRegs.readReg(AlphaISA::IPR_EXC_ADDR) & 1) == 0)
- // AlphaISA::swap_palshadow(®s, false);
++ // Need to clear the lock flag upon returning from an interrupt.
++ this->lockFlag = false;
+
- AlphaFullCPU<Impl>::simPalCheck(int palFunc)
++ this->thread[tid]->kernelStats->hwrei();
+
+ this->checkInterrupts = true;
+
+ // FIXME: XXX check for interrupts? XXX
+ return NoFault;
+}
+
+template <class Impl>
+bool
- // kernelStats.callpal(palFunc);
++AlphaFullCPU<Impl>::simPalCheck(int palFunc, unsigned tid)
+{
- // Probably shouldn't be able to switch to the trap handler as quickly as
- // this. Also needs to get the exception restart address from the commit
- // stage.
++ if (this->thread[tid]->kernelStats)
++ this->thread[tid]->kernelStats->callpal(palFunc,
++ this->execContexts[tid]);
+
+ switch (palFunc) {
+ case PAL::halt:
+ halt();
+ if (--System::numSystemsRunning == 0)
+ new SimExitEvent("all cpus halted");
+ break;
+
+ case PAL::bpt:
+ case PAL::bugchk:
+ if (this->system->breakpoint())
+ return false;
+ break;
+ }
+
+ return true;
+}
+
- AlphaFullCPU<Impl>::trap(Fault fault)
+template <class Impl>
+void
- /* // Keep in mind that a trap may be initiated by fetch if there's a TLB
- // miss
- uint64_t PC = this->commit.readCommitPC();
++AlphaFullCPU<Impl>::trap(Fault fault, unsigned tid)
+{
- DPRINTF(Fault, "Fault %s\n", fault->name());
- this->recordEvent(csprintf("Fault %s", fault->name()));
++ fault->invoke(this->execContexts[tid]);
++}
+
- //kernelStats.fault(fault);
++template <class Impl>
++void
++AlphaFullCPU<Impl>::processInterrupts()
++{
++ // Check for interrupts here. For now can copy the code that
++ // exists within isa_fullsys_traits.hh. Also assume that thread 0
++ // is the one that handles the interrupts.
++ // @todo: Possibly consolidate the interrupt checking code.
++ // @todo: Allow other threads to handle interrupts.
++
++ // Check if there are any outstanding interrupts
++ //Handle the interrupts
++ int ipl = 0;
++ int summary = 0;
++
++ this->checkInterrupts = false;
++
++ if (this->readMiscReg(IPR_ASTRR, 0))
++ panic("asynchronous traps not implemented\n");
++
++ if (this->readMiscReg(IPR_SIRR, 0)) {
++ for (int i = INTLEVEL_SOFTWARE_MIN;
++ i < INTLEVEL_SOFTWARE_MAX; i++) {
++ if (this->readMiscReg(IPR_SIRR, 0) & (ULL(1) << i)) {
++ // See table 4-19 of the 21164 hardware reference
++ ipl = (i - INTLEVEL_SOFTWARE_MIN) + 1;
++ summary |= (ULL(1) << i);
++ }
++ }
++ }
+
- if (fault->isA<ArithmeticFault>())
- panic("Arithmetic traps are unimplemented!");
++ uint64_t interrupts = this->intr_status();
++
++ if (interrupts) {
++ for (int i = INTLEVEL_EXTERNAL_MIN;
++ i < INTLEVEL_EXTERNAL_MAX; i++) {
++ if (interrupts & (ULL(1) << i)) {
++ // See table 4-19 of the 21164 hardware reference
++ ipl = i;
++ summary |= (ULL(1) << i);
++ }
++ }
++ }
+
- // exception restart address - Get the commit PC
- if (!fault->isA<InterruptFault>() || !inPalMode(PC))
- this->regFile.miscRegs.setReg(AlphaISA::IPR_EXC_ADDR, PC);
++ if (ipl && ipl > this->readMiscReg(IPR_IPLR, 0)) {
++ this->setMiscReg(IPR_ISR, summary, 0);
++ this->setMiscReg(IPR_INTID, ipl, 0);
++ if (this->checker) {
++ this->checker->cpuXCBase()->setMiscReg(IPR_ISR, summary);
++ this->checker->cpuXCBase()->setMiscReg(IPR_INTID, ipl);
++ }
++ this->trap(Fault(new InterruptFault), 0);
++ DPRINTF(Flow, "Interrupt! IPLR=%d ipl=%d summary=%x\n",
++ this->readMiscReg(IPR_IPLR, 0), ipl, summary);
++ }
++}
+
- if (fault->isA<PalFault>() || fault->isA<ArithmeticFault>())
- // || fault == InterruptFault && !PC_PAL(regs.pc)
- {
- // traps... skip faulting instruction
- AlphaISA::MiscReg ipr_exc_addr =
- this->regFile.miscRegs.readReg(AlphaISA::IPR_EXC_ADDR);
- this->regFile.miscRegs.setReg(AlphaISA::IPR_EXC_ADDR,
- ipr_exc_addr + 4);
- }
++#endif // FULL_SYSTEM
+
- if (!inPalMode(PC))
- swapPALShadow(true);
++#if !FULL_SYSTEM
++
++template <class Impl>
++void
++AlphaFullCPU<Impl>::syscall(int tid)
++{
++ DPRINTF(FullCPU, "AlphaFullCPU: [tid:%i] Executing syscall().\n\n", tid);
+
- this->regFile.setPC(this->regFile.miscRegs.readReg(AlphaISA::IPR_PAL_BASE) +
- (dynamic_cast<AlphaFault *>(fault.get()))->vect());
- this->regFile.setNextPC(PC + sizeof(MachInst));*/
++ DPRINTF(Activity,"Activity: syscall() called.\n");
+
- void
- AlphaFullCPU<Impl>::processInterrupts()
++ // Temporarily increase this by one to account for the syscall
++ // instruction.
++ ++(this->thread[tid]->funcExeInst);
++
++ // Execute the actual syscall.
++ this->thread[tid]->syscall();
++
++ // Decrease funcExeInst by one as the normal commit will handle
++ // incrementing it.
++ --(this->thread[tid]->funcExeInst);
+}
+
+template <class Impl>
- // Check for interrupts here. For now can copy the code that exists
- // within isa_fullsys_traits.hh.
++TheISA::IntReg
++AlphaFullCPU<Impl>::getSyscallArg(int i, int tid)
+{
- // swap_palshadow swaps in the values of the shadow registers and
- // swaps them with the values of the physical registers that map to the
- // same logical index.
++ return this->readArchIntReg(AlphaISA::ArgumentReg0 + i, tid);
+}
+
- AlphaFullCPU<Impl>::swapPALShadow(bool use_shadow)
+template <class Impl>
+void
- if (palShadowEnabled == use_shadow)
- panic("swap_palshadow: wrong PAL shadow state");
-
- palShadowEnabled = use_shadow;
-
- // Will have to lookup in rename map to get physical registers, then
- // swap.
++AlphaFullCPU<Impl>::setSyscallArg(int i, IntReg val, int tid)
+{
- #endif // FULL_SYSTEM
++ this->setArchIntReg(AlphaISA::ArgumentReg0 + i, val, tid);
+}
+
++template <class Impl>
++void
++AlphaFullCPU<Impl>::setSyscallReturn(SyscallReturn return_value, int tid)
++{
++ // check for error condition. Alpha syscall convention is to
++ // indicate success/failure in reg a3 (r19) and put the
++ // return value itself in the standard return value reg (v0).
++ if (return_value.successful()) {
++ // no error
++ this->setArchIntReg(SyscallSuccessReg, 0, tid);
++ this->setArchIntReg(ReturnValueReg, return_value.value(), tid);
++ } else {
++ // got an error, return details
++ this->setArchIntReg(SyscallSuccessReg, (IntReg) -1, tid);
++ this->setArchIntReg(ReturnValueReg, -return_value.value(), tid);
++ }
++}
++#endif
--- /dev/null
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
+/*
- #ifndef __CPU_O3_CPU_ALPHA_DYN_INST_HH__
- #define __CPU_O3_CPU_ALPHA_DYN_INST_HH__
++ * Copyright (c) 2004-2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
- #include "cpu/inst_seq.hh"
++#ifndef __CPU_O3_ALPHA_DYN_INST_HH__
++#define __CPU_O3_ALPHA_DYN_INST_HH__
+
+#include "cpu/base_dyn_inst.hh"
++#include "cpu/inst_seq.hh"
+#include "cpu/o3/alpha_cpu.hh"
+#include "cpu/o3/alpha_impl.hh"
- * Mostly implementation specific AlphaDynInst. It is templated in case there
- * are other implementations that are similar enough to be able to use this
- * class without changes. This is mainly useful if there are multiple similar
- * CPU implementations of the same ISA.
+
+/**
-
++ * Mostly implementation & ISA specific AlphaDynInst. As with most
++ * other classes in the new CPU model, it is templated on the Impl to
++ * allow for passing in of all types, such as the CPU type and the ISA
++ * type. The AlphaDynInst serves as the primary interface to the CPU
++ * for instructions that are executing.
+ */
- AlphaDynInst(MachInst inst, Addr PC, Addr Pred_PC, InstSeqNum seq_num,
+template <class Impl>
+class AlphaDynInst : public BaseDynInst<Impl>
+{
+ public:
+ /** Typedef for the CPU. */
+ typedef typename Impl::FullCPU FullCPU;
+
+ /** Binary machine instruction type. */
+ typedef TheISA::MachInst MachInst;
++ /** Extended machine instruction type. */
++ typedef TheISA::ExtMachInst ExtMachInst;
+ /** Logical register index type. */
+ typedef TheISA::RegIndex RegIndex;
+ /** Integer register index type. */
+ typedef TheISA::IntReg IntReg;
+ /** Misc register index type. */
+ typedef TheISA::MiscReg MiscReg;
+
+ enum {
+ MaxInstSrcRegs = TheISA::MaxInstSrcRegs, //< Max source regs
+ MaxInstDestRegs = TheISA::MaxInstDestRegs, //< Max dest regs
+ };
+
+ public:
+ /** BaseDynInst constructor given a binary instruction. */
- Fault execute()
- {
- return this->fault = this->staticInst->execute(this, this->traceData);
- }
++ AlphaDynInst(ExtMachInst inst, Addr PC, Addr Pred_PC, InstSeqNum seq_num,
+ FullCPU *cpu);
+
+ /** BaseDynInst constructor given a static inst pointer. */
+ AlphaDynInst(StaticInstPtr &_staticInst);
+
+ /** Executes the instruction.*/
- // Dummy function for now.
- // @todo: Fix this once reg file gets fixed.
- return 0;
++ Fault execute();
++
++ /** Initiates the access. Only valid for memory operations. */
++ Fault initiateAcc();
++
++ /** Completes the access. Only valid for memory operations. */
++ Fault completeAcc();
++
++ private:
++ /** Initializes variables. */
++ void initVars();
+
+ public:
+ MiscReg readMiscReg(int misc_reg)
+ {
- // Dummy function for now.
- // @todo: Fix this once reg file gets fixed.
- return 0;
++ return this->cpu->readMiscReg(misc_reg, this->threadNumber);
+ }
+
+ MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault)
+ {
- // Dummy function for now.
- // @todo: Fix this once reg file gets fixed.
- return NoFault;
++ return this->cpu->readMiscRegWithEffect(misc_reg, fault,
++ this->threadNumber);
+ }
+
+ Fault setMiscReg(int misc_reg, const MiscReg &val)
+ {
- // Dummy function for now.
- // @todo: Fix this once reg file gets fixed.
- return NoFault;
++ this->instResult.integer = val;
++ return this->cpu->setMiscReg(misc_reg, val, this->threadNumber);
+ }
+
+ Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val)
+ {
-
-
++ return this->cpu->setMiscRegWithEffect(misc_reg, val,
++ this->threadNumber);
+ }
+
+#if FULL_SYSTEM
++ /** Calls hardware return from error interrupt. */
+ Fault hwrei();
++ /** Reads interrupt flag. */
+ int readIntrFlag();
++ /** Sets interrupt flag. */
+ void setIntrFlag(int val);
++ /** Checks if system is in PAL mode. */
+ bool inPalMode();
++ /** Traps to handle specified fault. */
+ void trap(Fault fault);
+ bool simPalCheck(int palFunc);
+#else
++ /** Calls a syscall. */
+ void syscall();
+#endif
+
- this->instResult.integer = val;
+ private:
+ /** Physical register index of the destination registers of this
+ * instruction.
+ */
+ PhysRegIndex _destRegIdx[MaxInstDestRegs];
+
+ /** Physical register index of the source registers of this
+ * instruction.
+ */
+ PhysRegIndex _srcRegIdx[MaxInstSrcRegs];
+
+ /** Physical register index of the previous producers of the
+ * architected destinations.
+ */
+ PhysRegIndex _prevDestRegIdx[MaxInstDestRegs];
+
+ public:
+
+ // The register accessor methods provide the index of the
+ // instruction's operand (e.g., 0 or 1), not the architectural
+ // register index, to simplify the implementation of register
+ // renaming. We find the architectural register index by indexing
+ // into the instruction's own operand index table. Note that a
+ // raw pointer to the StaticInst is provided instead of a
+ // ref-counted StaticInstPtr to redice overhead. This is fine as
+ // long as these methods don't copy the pointer into any long-term
+ // storage (which is pretty hard to imagine they would have reason
+ // to do).
+
+ uint64_t readIntReg(const StaticInst *si, int idx)
+ {
+ return this->cpu->readIntReg(_srcRegIdx[idx]);
+ }
+
+ FloatReg readFloatReg(const StaticInst *si, int idx, int width)
+ {
+ return this->cpu->readFloatReg(_srcRegIdx[idx], width);
+ }
+
+ FloatReg readFloatReg(const StaticInst *si, int idx)
+ {
+ return this->cpu->readFloatReg(_srcRegIdx[idx]);
+ }
+
+ FloatRegBits readFloatRegBits(const StaticInst *si, int idx, int width)
+ {
+ return this->cpu->readFloatRegBits(_srcRegIdx[idx], width);
+ }
+
+ FloatRegBits readFloatRegBits(const StaticInst *si, int idx)
+ {
+ return this->cpu->readFloatRegBits(_srcRegIdx[idx]);
+ }
+
+ /** @todo: Make results into arrays so they can handle multiple dest
+ * registers.
+ */
+ void setIntReg(const StaticInst *si, int idx, uint64_t val)
+ {
+ this->cpu->setIntReg(_destRegIdx[idx], val);
- this->instResult.fp = val;
++ BaseDynInst<Impl>::setIntReg(si, idx, val);
+ }
+
+ void setFloatReg(const StaticInst *si, int idx, FloatReg val, int width)
+ {
+ this->cpu->setFloatReg(_destRegIdx[idx], val, width);
- this->instResult.dbl = val;
++ BaseDynInst<Impl>::setFloatRegSingle(si, idx, val);
+ }
+
+ void setFloatReg(const StaticInst *si, int idx, FloatReg val)
+ {
+ this->cpu->setFloatReg(_destRegIdx[idx], val);
- this->instResult.integer = val;
++ BaseDynInst<Impl>::setFloatRegDouble(si, idx, val);
+ }
+
+ void setFloatRegBits(const StaticInst *si, int idx,
+ FloatRegBits val, int width)
+ {
+ this->cpu->setFloatRegBits(_destRegIdx[idx], val, width);
+ this->instResult.integer = val;
+ }
+
+ void setFloatRegBits(const StaticInst *si, int idx, FloatRegBits val)
+ {
+ this->cpu->setFloatRegBits(_destRegIdx[idx], val);
- #endif // __CPU_O3_CPU_ALPHA_DYN_INST_HH__
++ BaseDynInst<Impl>::setFloatRegInt(si, idx, val);
+ }
+
+ /** Returns the physical register index of the i'th destination
+ * register.
+ */
+ PhysRegIndex renamedDestRegIdx(int idx) const
+ {
+ return _destRegIdx[idx];
+ }
+
+ /** Returns the physical register index of the i'th source register. */
+ PhysRegIndex renamedSrcRegIdx(int idx) const
+ {
+ return _srcRegIdx[idx];
+ }
+
+ /** Returns the physical register index of the previous physical register
+ * that remapped to the same logical register index.
+ */
+ PhysRegIndex prevDestRegIdx(int idx) const
+ {
+ return _prevDestRegIdx[idx];
+ }
+
+ /** Renames a destination register to a physical register. Also records
+ * the previous physical register that the logical register mapped to.
+ */
+ void renameDestReg(int idx,
+ PhysRegIndex renamed_dest,
+ PhysRegIndex previous_rename)
+ {
+ _destRegIdx[idx] = renamed_dest;
+ _prevDestRegIdx[idx] = previous_rename;
+ }
+
+ /** Renames a source logical register to the physical register which
+ * has/will produce that logical register's result.
+ * @todo: add in whether or not the source register is ready.
+ */
+ void renameSrcReg(int idx, PhysRegIndex renamed_src)
+ {
+ _srcRegIdx[idx] = renamed_src;
+ }
+
+ public:
++ /** Calculates EA part of a memory instruction. Currently unused,
++ * though it may be useful in the future if we want to split
++ * memory operations into EA calculation and memory access parts.
++ */
+ Fault calcEA()
+ {
+ return this->staticInst->eaCompInst()->execute(this, this->traceData);
+ }
+
++ /** Does the memory access part of a memory instruction. Currently unused,
++ * though it may be useful in the future if we want to split
++ * memory operations into EA calculation and memory access parts.
++ */
+ Fault memAccess()
+ {
+ return this->staticInst->memAccInst()->execute(this, this->traceData);
+ }
+};
+
++#endif // __CPU_O3_ALPHA_DYN_INST_HH__
+
--- /dev/null
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
+/*
- AlphaDynInst<Impl>::AlphaDynInst(MachInst inst, Addr PC, Addr Pred_PC,
++ * Copyright (c) 2004-2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "cpu/o3/alpha_dyn_inst.hh"
+
+template <class Impl>
- for (int i = 0; i < this->staticInst->numDestRegs(); i++)
- {
++AlphaDynInst<Impl>::AlphaDynInst(ExtMachInst inst, Addr PC, Addr Pred_PC,
+ InstSeqNum seq_num, FullCPU *cpu)
+ : BaseDynInst<Impl>(inst, PC, Pred_PC, seq_num, cpu)
++{
++ initVars();
++}
++
++template <class Impl>
++AlphaDynInst<Impl>::AlphaDynInst(StaticInstPtr &_staticInst)
++ : BaseDynInst<Impl>(_staticInst)
++{
++ initVars();
++}
++
++template <class Impl>
++void
++AlphaDynInst<Impl>::initVars()
+{
+ // Make sure to have the renamed register entries set to the same
+ // as the normal register entries. It will allow the IQ to work
+ // without any modifications.
- for (int i = 0; i < this->staticInst->numSrcRegs(); i++)
- {
++ for (int i = 0; i < this->staticInst->numDestRegs(); i++) {
+ _destRegIdx[i] = this->staticInst->destRegIdx(i);
+ }
+
- AlphaDynInst<Impl>::AlphaDynInst(StaticInstPtr &_staticInst)
- : BaseDynInst<Impl>(_staticInst)
++ for (int i = 0; i < this->staticInst->numSrcRegs(); i++) {
+ _srcRegIdx[i] = this->staticInst->srcRegIdx(i);
+ this->_readySrcRegIdx[i] = 0;
+ }
++}
++
++template <class Impl>
++Fault
++AlphaDynInst<Impl>::execute()
++{
++ // @todo: Pretty convoluted way to avoid squashing from happening when using
++ // the XC during an instruction's execution (specifically for instructions
++ // that have sideeffects that use the XC). Fix this.
++ bool in_syscall = this->thread->inSyscall;
++ this->thread->inSyscall = true;
++
++ this->fault = this->staticInst->execute(this, this->traceData);
++
++ this->thread->inSyscall = in_syscall;
+
++ return this->fault;
+}
+
+template <class Impl>
- // Make sure to have the renamed register entries set to the same
- // as the normal register entries. It will allow the IQ to work
- // without any modifications.
- for (int i = 0; i < _staticInst->numDestRegs(); i++)
- {
- _destRegIdx[i] = _staticInst->destRegIdx(i);
- }
++Fault
++AlphaDynInst<Impl>::initiateAcc()
+{
- for (int i = 0; i < _staticInst->numSrcRegs(); i++)
- {
- _srcRegIdx[i] = _staticInst->srcRegIdx(i);
++ // @todo: Pretty convoluted way to avoid squashing from happening when using
++ // the XC during an instruction's execution (specifically for instructions
++ // that have sideeffects that use the XC). Fix this.
++ bool in_syscall = this->thread->inSyscall;
++ this->thread->inSyscall = true;
++
++ this->fault = this->staticInst->initiateAcc(this, this->traceData);
++
++ this->thread->inSyscall = in_syscall;
+
- return this->cpu->hwrei();
++ return this->fault;
++}
++
++template <class Impl>
++Fault
++AlphaDynInst<Impl>::completeAcc()
++{
++ if (this->isLoad()) {
++ this->fault = this->staticInst->completeAcc(this->req->data,
++ this,
++ this->traceData);
++ } else if (this->isStore()) {
++ this->fault = this->staticInst->completeAcc((uint8_t*)&this->req->result,
++ this,
++ this->traceData);
++ } else {
++ panic("Unknown type!");
+ }
++
++ return this->fault;
+}
+
+#if FULL_SYSTEM
+template <class Impl>
+Fault
+AlphaDynInst<Impl>::hwrei()
+{
- return this->cpu->readIntrFlag();
++ if (!this->cpu->inPalMode(this->readPC()))
++ return new AlphaISA::UnimplementedOpcodeFault;
++
++ this->setNextPC(this->cpu->readMiscReg(AlphaISA::IPR_EXC_ADDR,
++ this->threadNumber));
++
++ // Tell CPU to clear any state it needs to if a hwrei is taken.
++ this->cpu->hwrei(this->threadNumber);
++
++ // FIXME: XXX check for interrupts? XXX
++ return NoFault;
+}
+
+template <class Impl>
+int
+AlphaDynInst<Impl>::readIntrFlag()
+{
- return this->cpu->inPalMode();
++ return this->cpu->readIntrFlag();
+}
+
+template <class Impl>
+void
+AlphaDynInst<Impl>::setIntrFlag(int val)
+{
+ this->cpu->setIntrFlag(val);
+}
+
+template <class Impl>
+bool
+AlphaDynInst<Impl>::inPalMode()
+{
- this->cpu->trap(fault);
++ return this->cpu->inPalMode(this->PC);
+}
+
+template <class Impl>
+void
+AlphaDynInst<Impl>::trap(Fault fault)
+{
- return this->cpu->simPalCheck(palFunc);
++ this->cpu->trap(fault, this->threadNumber);
+}
+
+template <class Impl>
+bool
+AlphaDynInst<Impl>::simPalCheck(int palFunc)
+{
++ return this->cpu->simPalCheck(palFunc, this->threadNumber);
+}
+#else
+template <class Impl>
+void
+AlphaDynInst<Impl>::syscall()
+{
+ this->cpu->syscall(this->threadNumber);
+}
+#endif
+
--- /dev/null
- #ifndef __CPU_O3_CPU_ALPHA_IMPL_HH__
- #define __CPU_O3_CPU_ALPHA_IMPL_HH__
+/*
+ * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
- /** Implementation specific struct that defines several key things to the
++#ifndef __CPU_O3_ALPHA_IMPL_HH__
++#define __CPU_O3_ALPHA_IMPL_HH__
+
+#include "arch/alpha/isa_traits.hh"
+
+#include "cpu/o3/alpha_params.hh"
+#include "cpu/o3/cpu_policy.hh"
+
+// Forward declarations.
+template <class Impl>
+class AlphaDynInst;
+
+template <class Impl>
+class AlphaFullCPU;
+
- /** The CPU policy to be used (ie fetch, decode, etc.). */
++/** Implementation specific struct that defines several key types to the
+ * CPU, the stages within the CPU, the time buffers, and the DynInst.
+ * The struct defines the ISA, the CPU policy, the specific DynInst, the
+ * specific FullCPU, and all of the structs from the time buffers to do
+ * communication.
+ * This is one of the key things that must be defined for each hardware
+ * specific CPU implementation.
+ */
+struct AlphaSimpleImpl
+{
+ /** The type of MachInst. */
+ typedef TheISA::MachInst MachInst;
+
- /** The DynInst to be used. */
++ /** The CPU policy to be used, which defines all of the CPU stages. */
+ typedef SimpleCPUPolicy<AlphaSimpleImpl> CPUPol;
+
- /** The FullCPU to be used. */
++ /** The DynInst type to be used. */
+ typedef AlphaDynInst<AlphaSimpleImpl> DynInst;
+
+ /** The refcounted DynInst pointer to be used. In most cases this is
+ * what should be used, and not DynInst *.
+ */
+ typedef RefCountingPtr<DynInst> DynInstPtr;
+
- MaxWidth = 8
++ /** The FullCPU type to be used. */
+ typedef AlphaFullCPU<AlphaSimpleImpl> FullCPU;
+
+ /** The Params to be passed to each stage. */
+ typedef AlphaSimpleParams Params;
+
+ enum {
- #endif // __CPU_O3_CPU_ALPHA_IMPL_HH__
++ MaxWidth = 8,
++ MaxThreads = 4
+ };
+};
+
++#endif // __CPU_O3_ALPHA_IMPL_HH__
--- /dev/null
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
+/*
- #ifndef __CPU_O3_CPU_ALPHA_SIMPLE_PARAMS_HH__
- #define __CPU_O3_CPU_ALPHA_SIMPLE_PARAMS_HH__
++ * Copyright (c) 2004-2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
- class System;
- class AlphaITB;
++#ifndef __CPU_O3_ALPHA_PARAMS_HH__
++#define __CPU_O3_ALPHA_PARAMS_HH__
+
+#include "cpu/o3/cpu.hh"
+
+//Forward declarations
- class Process;
+class AlphaDTB;
++class AlphaITB;
++class FUPool;
+class FunctionalMemory;
- /*
+class MemInterface;
++class Process;
++class System;
+
+/**
+ * This file defines the parameters that will be used for the AlphaFullCPU.
+ * This must be defined externally so that the Impl can have a params class
+ * defined that it can pass to all of the individual stages.
+ */
+
+class AlphaSimpleParams : public BaseFullCPU::Params
+{
+ public:
+
+#if FULL_SYSTEM
+ AlphaITB *itb; AlphaDTB *dtb;
+#else
+ std::vector<Process *> workload;
+ Process *process;
+#endif // FULL_SYSTEM
+
++ //Page Table
++// PageTable *pTable;
++
+ FunctionalMemory *mem;
+
++ BaseCPU *checker;
++
++ unsigned activity;
++
+ //
+ // Caches
+ //
+ MemInterface *icacheInterface;
+ MemInterface *dcacheInterface;
+
++ unsigned cachePorts;
++
+ //
+ // Fetch
+ //
+ unsigned decodeToFetchDelay;
+ unsigned renameToFetchDelay;
+ unsigned iewToFetchDelay;
+ unsigned commitToFetchDelay;
+ unsigned fetchWidth;
+
+ //
+ // Decode
+ //
+ unsigned renameToDecodeDelay;
+ unsigned iewToDecodeDelay;
+ unsigned commitToDecodeDelay;
+ unsigned fetchToDecodeDelay;
+ unsigned decodeWidth;
+
+ //
+ // Rename
+ //
+ unsigned iewToRenameDelay;
+ unsigned commitToRenameDelay;
+ unsigned decodeToRenameDelay;
+ unsigned renameWidth;
+
+ //
+ // IEW
+ //
+ unsigned commitToIEWDelay;
+ unsigned renameToIEWDelay;
+ unsigned issueToExecuteDelay;
+ unsigned issueWidth;
+ unsigned executeWidth;
+ unsigned executeIntWidth;
+ unsigned executeFloatWidth;
+ unsigned executeBranchWidth;
+ unsigned executeMemoryWidth;
++ FUPool *fuPool;
+
+ //
+ // Commit
+ //
+ unsigned iewToCommitDelay;
+ unsigned renameToROBDelay;
+ unsigned commitWidth;
+ unsigned squashWidth;
++ Tick trapLatency;
++ Tick fetchTrapLatency;
+
+ //
+ // Branch predictor (BP & BTB)
+ //
- unsigned localPredictorCtrBits;
- */
-
- unsigned local_predictor_size;
- unsigned local_ctr_bits;
- unsigned local_history_table_size;
- unsigned local_history_bits;
- unsigned global_predictor_size;
- unsigned global_ctr_bits;
- unsigned global_history_bits;
- unsigned choice_predictor_size;
- unsigned choice_ctr_bits;
+ unsigned localPredictorSize;
-
- bool defReg;
++ unsigned localCtrBits;
++ unsigned localHistoryTableSize;
++ unsigned localHistoryBits;
++ unsigned globalPredictorSize;
++ unsigned globalCtrBits;
++ unsigned globalHistoryBits;
++ unsigned choicePredictorSize;
++ unsigned choiceCtrBits;
+
+ unsigned BTBEntries;
+ unsigned BTBTagSize;
+
+ unsigned RASSize;
+
+ //
+ // Load store queue
+ //
+ unsigned LQEntries;
+ unsigned SQEntries;
+
+ //
+ // Memory dependence
+ //
+ unsigned SSITSize;
+ unsigned LFSTSize;
+
+ //
+ // Miscellaneous
+ //
+ unsigned numPhysIntRegs;
+ unsigned numPhysFloatRegs;
+ unsigned numIQEntries;
+ unsigned numROBEntries;
+
++ //SMT Parameters
++ unsigned smtNumFetchingThreads;
++
++ std::string smtFetchPolicy;
++
++ std::string smtIQPolicy;
++ unsigned smtIQThreshold;
++
++ std::string smtLSQPolicy;
++ unsigned smtLSQThreshold;
++
++ std::string smtCommitPolicy;
++
++ std::string smtROBPolicy;
++ unsigned smtROBThreshold;
++
+ // Probably can get this from somewhere.
+ unsigned instShiftAmt;
- #endif // __CPU_O3_CPU_ALPHA_PARAMS_HH__
+};
+
++#endif // __CPU_O3_ALPHA_PARAMS_HH__
--- /dev/null
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
+/*
++ * Copyright (c) 2004-2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "cpu/o3/bpred_unit_impl.hh"
+#include "cpu/o3/alpha_impl.hh"
+#include "cpu/o3/alpha_dyn_inst.hh"
++#include "cpu/ozone/ozone_impl.hh"
++#include "cpu/ozone/simple_impl.hh"
+
+template class TwobitBPredUnit<AlphaSimpleImpl>;
++template class TwobitBPredUnit<OzoneImpl>;
++template class TwobitBPredUnit<SimpleImpl>;
--- /dev/null
- #ifndef __BPRED_UNIT_HH__
- #define __BPRED_UNIT_HH__
+/*
+ * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
- #include "cpu/o3/tournament_pred.hh"
++#ifndef __CPU_O3_BPRED_UNIT_HH__
++#define __CPU_O3_BPRED_UNIT_HH__
+
+// For Addr type.
+#include "arch/isa_traits.hh"
+#include "base/statistics.hh"
+#include "cpu/inst_seq.hh"
+
+#include "cpu/o3/2bit_local_pred.hh"
- * and the BTB. Right now I'm unsure of the implementation; it would
- * be nicer to have something closer to the CPUPolicy or the Impl where
- * this is just typedefs, but it forces the upper level stages to be
- * aware of the constructors of the BP and the BTB. The nicer thing
- * to do is have this templated on the Impl, accept the usual Params
- * object, and be able to call the constructors on the BP and BTB.
+#include "cpu/o3/btb.hh"
+#include "cpu/o3/ras.hh"
++#include "cpu/o3/tournament_pred.hh"
+
+#include <list>
+
+/**
+ * Basically a wrapper class to hold both the branch predictor
- TwobitBPredUnit(Params ¶ms);
++ * and the BTB.
+ */
+template<class Impl>
+class TwobitBPredUnit
+{
+ public:
+ typedef typename Impl::Params Params;
+ typedef typename Impl::DynInstPtr DynInstPtr;
+
- bool predict(DynInstPtr &inst, Addr &PC);
-
- void update(const InstSeqNum &done_sn);
-
- void squash(const InstSeqNum &squashed_sn);
-
++ /**
++ * @param params The params object, that has the size of the BP and BTB.
++ */
++ TwobitBPredUnit(Params *params);
+
++ /**
++ * Registers statistics.
++ */
+ void regStats();
+
- bool actually_taken);
++ void switchOut();
++
++ void takeOverFrom();
++
++ /**
++ * Predicts whether or not the instruction is a taken branch, and the
++ * target of the branch if it is taken.
++ * @param inst The branch instruction.
++ * @param PC The predicted PC is passed back through this parameter.
++ * @param tid The thread id.
++ * @return Returns if the branch is taken or not.
++ */
++ bool predict(DynInstPtr &inst, Addr &PC, unsigned tid);
++
++ /**
++ * Tells the branch predictor to commit any updates until the given
++ * sequence number.
++ * @param done_sn The sequence number to commit any older updates up until.
++ * @param tid The thread id.
++ */
++ void update(const InstSeqNum &done_sn, unsigned tid);
++
++ /**
++ * Squashes all outstanding updates until a given sequence number.
++ * @param squashed_sn The sequence number to squash any younger updates up
++ * until.
++ * @param tid The thread id.
++ */
++ void squash(const InstSeqNum &squashed_sn, unsigned tid);
++
++ /**
++ * Squashes all outstanding updates until a given sequence number, and
++ * corrects that sn's update with the proper address and taken/not taken.
++ * @param squashed_sn The sequence number to squash any younger updates up
++ * until.
++ * @param corr_target The correct branch target.
++ * @param actually_taken The correct branch direction.
++ * @param tid The thread id.
++ */
+ void squash(const InstSeqNum &squashed_sn, const Addr &corr_target,
- { return BTB.valid(inst_PC); }
++ bool actually_taken, unsigned tid);
+
++ /**
++ * Looks up a given PC in the BP to see if it is taken or not taken.
++ * @param inst_PC The PC to look up.
++ * @return Whether the branch is taken or not taken.
++ */
+ bool BPLookup(Addr &inst_PC)
+ { return BP.lookup(inst_PC); }
+
++ /**
++ * Looks up a given PC in the BTB to see if a matching entry exists.
++ * @param inst_PC The PC to look up.
++ * @return Whether the BTB contains the given PC.
++ */
+ bool BTBValid(Addr &inst_PC)
- { return BTB.lookup(inst_PC); }
-
- // Will want to include global history.
++ { return BTB.valid(inst_PC, 0); }
+
++ /**
++ * Looks up a given PC in the BTB to get the predicted target.
++ * @param inst_PC The PC to look up.
++ * @return The address of the target of the branch.
++ */
+ Addr BTBLookup(Addr &inst_PC)
- { BTB.update(inst_PC, target_PC); }
++ { return BTB.lookup(inst_PC, 0); }
++
++ /**
++ * Updates the BP with taken/not taken information.
++ * @param inst_PC The branch's PC that will be updated.
++ * @param taken Whether the branch was taken or not taken.
++ * @todo Make this update flexible enough to handle a global predictor.
++ */
+ void BPUpdate(Addr &inst_PC, bool taken)
+ { BP.update(inst_PC, taken); }
+
++ /**
++ * Updates the BTB with the target of a branch.
++ * @param inst_PC The branch's PC that will be updated.
++ * @param target_PC The branch's target that will be added to the BTB.
++ */
+ void BTBUpdate(Addr &inst_PC, Addr &target_PC)
- const bool pred_taken)
- : seqNum(seq_num), PC(inst_PC), predTaken(pred_taken),
- globalHistory(0), usedRAS(0), wasCall(0), RASIndex(0),
- RASTarget(0)
++ { BTB.update(inst_PC, target_PC,0); }
+
+ private:
+ struct PredictorHistory {
++ /**
++ * Makes a predictor history struct that contains a sequence number,
++ * the PC of its instruction, and whether or not it was predicted
++ * taken.
++ */
+ PredictorHistory(const InstSeqNum &seq_num, const Addr &inst_PC,
- bool predTaken;
++ const bool pred_taken, const unsigned _tid)
++ : seqNum(seq_num), PC(inst_PC), RASTarget(0), globalHistory(0),
++ RASIndex(0), tid(_tid), predTaken(pred_taken), usedRAS(0),
++ wasCall(0)
+ { }
+
++ /** The sequence number for the predictor history entry. */
+ InstSeqNum seqNum;
+
++ /** The PC associated with the sequence number. */
+ Addr PC;
+
- bool usedRAS;
++ /** The RAS target (only valid if a return). */
++ Addr RASTarget;
+
++ /** The global history at the time this entry was created. */
+ unsigned globalHistory;
+
- bool wasCall;
++ /** The RAS index of the instruction (only valid if a call). */
++ unsigned RASIndex;
+
- unsigned RASIndex;
++ /** The thread id. */
++ unsigned tid;
+
- Addr RASTarget;
++ /** Whether or not it was predicted taken. */
++ bool predTaken;
+
- std::list<PredictorHistory> predHist;
++ /** Whether or not the RAS was used. */
++ bool usedRAS;
++
++ /** Whether or not the instruction was a call. */
++ bool wasCall;
+ };
+
- ReturnAddrStack RAS;
++ typedef std::list<PredictorHistory> History;
++
++ /**
++ * The per-thread predictor history. This is used to update the predictor
++ * as instructions are committed, or restore it to the proper state after
++ * a squash.
++ */
++ History predHist[Impl::MaxThreads];
+
++ /** The branch predictor. */
+ DefaultBP BP;
+
++ /** The BTB. */
+ DefaultBTB BTB;
+
- #endif // __BPRED_UNIT_HH__
++ /** The per-thread return address stack. */
++ ReturnAddrStack RAS[Impl::MaxThreads];
+
++ /** Stat for number of BP lookups. */
+ Stats::Scalar<> lookups;
++ /** Stat for number of conditional branches predicted. */
+ Stats::Scalar<> condPredicted;
++ /** Stat for number of conditional branches predicted incorrectly. */
+ Stats::Scalar<> condIncorrect;
++ /** Stat for number of BTB lookups. */
+ Stats::Scalar<> BTBLookups;
++ /** Stat for number of BTB hits. */
+ Stats::Scalar<> BTBHits;
++ /** Stat for number of times the BTB is correct. */
+ Stats::Scalar<> BTBCorrect;
++ /** Stat for number of times the RAS is used to get a target. */
+ Stats::Scalar<> usedRAS;
++ /** Stat for number of times the RAS is incorrect. */
+ Stats::Scalar<> RASIncorrect;
+};
+
++#endif // __CPU_O3_BPRED_UNIT_HH__
--- /dev/null
- TwobitBPredUnit<Impl>::TwobitBPredUnit(Params ¶ms)
- : BP(params.local_predictor_size,
- params.local_ctr_bits,
- params.instShiftAmt),
- BTB(params.BTBEntries,
- params.BTBTagSize,
- params.instShiftAmt),
- RAS(params.RASSize)
+/*
+ * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
++#include <list>
++#include <vector>
++
+#include "base/trace.hh"
+#include "base/traceflags.hh"
+#include "cpu/o3/bpred_unit.hh"
+
++using namespace std;
++
+template<class Impl>
- .desc("Number of times the RAS was used.")
++TwobitBPredUnit<Impl>::TwobitBPredUnit(Params *params)
++ : BP(params->localPredictorSize,
++ params->localCtrBits,
++ params->instShiftAmt),
++ BTB(params->BTBEntries,
++ params->BTBTagSize,
++ params->instShiftAmt)
+{
++ for (int i=0; i < Impl::MaxThreads; i++)
++ RAS[i].init(params->RASSize);
+}
+
+template <class Impl>
+void
+TwobitBPredUnit<Impl>::regStats()
+{
+ lookups
+ .name(name() + ".BPredUnit.lookups")
+ .desc("Number of BP lookups")
+ ;
+
+ condPredicted
+ .name(name() + ".BPredUnit.condPredicted")
+ .desc("Number of conditional branches predicted")
+ ;
+
+ condIncorrect
+ .name(name() + ".BPredUnit.condIncorrect")
+ .desc("Number of conditional branches incorrect")
+ ;
+
+ BTBLookups
+ .name(name() + ".BPredUnit.BTBLookups")
+ .desc("Number of BTB lookups")
+ ;
+
+ BTBHits
+ .name(name() + ".BPredUnit.BTBHits")
+ .desc("Number of BTB hits")
+ ;
+
+ BTBCorrect
+ .name(name() + ".BPredUnit.BTBCorrect")
+ .desc("Number of correct BTB predictions (this stat may not "
+ "work properly.")
+ ;
+
+ usedRAS
+ .name(name() + ".BPredUnit.usedRAS")
- TwobitBPredUnit<Impl>::predict(DynInstPtr &inst, Addr &PC)
++ .desc("Number of times the RAS was used to get a target.")
+ ;
+
+ RASIncorrect
+ .name(name() + ".BPredUnit.RASInCorrect")
+ .desc("Number of incorrect RAS predictions.")
+ ;
+}
+
++template <class Impl>
++void
++TwobitBPredUnit<Impl>::switchOut()
++{
++ for (int i = 0; i < Impl::MaxThreads; ++i) {
++ predHist[i].clear();
++ }
++}
++
++template <class Impl>
++void
++TwobitBPredUnit<Impl>::takeOverFrom()
++{
++/*
++ for (int i = 0; i < Impl::MaxThreads; ++i)
++ RAS[i].reset();
++
++ BP.reset();
++ BTB.reset();
++*/
++}
++
+template <class Impl>
+bool
- DPRINTF(Fetch, "BranchPred: Unconditional control.\n");
++TwobitBPredUnit<Impl>::predict(DynInstPtr &inst, Addr &PC, unsigned tid)
+{
+ // See if branch predictor predicts taken.
+ // If so, get its target addr either from the BTB or the RAS.
+ // Once that's done, speculatively update the predictor?
+ // Save off record of branch stuff so the RAS can be fixed
+ // up once it's done.
+
+ using TheISA::MachInst;
+
+ bool pred_taken = false;
+ Addr target;
+
+ ++lookups;
+
+ if (inst->isUncondCtrl()) {
- DPRINTF(Fetch, "BranchPred: Branch predictor predicted %i for PC %#x"
- "\n", pred_taken, inst->readPC());
++ DPRINTF(Fetch, "BranchPred: [tid:%i] Unconditional control.\n", tid);
+ pred_taken = true;
+ } else {
+ ++condPredicted;
+
+ pred_taken = BPLookup(PC);
+
- PredictorHistory predict_record(inst->seqNum, PC, pred_taken);
++ DPRINTF(Fetch, "BranchPred: [tid:%i]: Branch predictor predicted %i "
++ "for PC %#x\n",
++ tid, pred_taken, inst->readPC());
+ }
+
- target = RAS.top();
++ PredictorHistory predict_record(inst->seqNum, PC, pred_taken, tid);
+
+ // Now lookup in the BTB or RAS.
+ if (pred_taken) {
+ if (inst->isReturn()) {
+ ++usedRAS;
+
+ // If it's a function return call, then look up the address
+ // in the RAS.
- predict_record.RASIndex = RAS.topIdx();
++ target = RAS[tid].top();
+
+ // Record the top entry of the RAS, and its index.
+ predict_record.usedRAS = true;
- RAS.pop();
++ predict_record.RASIndex = RAS[tid].topIdx();
+ predict_record.RASTarget = target;
+
- DPRINTF(Fetch, "BranchPred: Instruction %#x is a return, RAS "
- "predicted target: %#x, RAS index: %i.\n",
- inst->readPC(), target, predict_record.RASIndex);
++ assert(predict_record.RASIndex < 16);
+
- RAS.push(PC+sizeof(MachInst));
++ RAS[tid].pop();
++
++ DPRINTF(Fetch, "BranchPred: [tid:%i]: Instruction %#x is a return, "
++ "RAS predicted target: %#x, RAS index: %i.\n",
++ tid, inst->readPC(), target, predict_record.RASIndex);
+ } else {
+ ++BTBLookups;
+
+ if (inst->isCall()) {
- DPRINTF(Fetch, "BranchPred: Instruction %#x was a call, "
- "adding %#x to the RAS.\n",
- inst->readPC(), PC+sizeof(MachInst));
++ RAS[tid].push(PC + sizeof(MachInst));
+
+ // Record that it was a call so that the top RAS entry can
+ // be popped off if the speculation is incorrect.
+ predict_record.wasCall = true;
+
- if (BTB.valid(PC)) {
++ DPRINTF(Fetch, "BranchPred: [tid:%i] Instruction %#x was a call"
++ ", adding %#x to the RAS.\n",
++ tid, inst->readPC(), PC + sizeof(MachInst));
+ }
+
- target = BTB.lookup(PC);
++ if (BTB.valid(PC, tid)) {
+ ++BTBHits;
+
+ //If it's anything else, use the BTB to get the target addr.
- DPRINTF(Fetch, "BranchPred: Instruction %#x predicted target "
- "is %#x.\n", inst->readPC(), target);
++ target = BTB.lookup(PC, tid);
+
- DPRINTF(Fetch, "BranchPred: BTB doesn't have a valid entry."
- "\n");
++ DPRINTF(Fetch, "BranchPred: [tid:%i]: Instruction %#x predicted"
++ " target is %#x.\n",
++ tid, inst->readPC(), target);
+
+ } else {
- predHist.push_front(predict_record);
++ DPRINTF(Fetch, "BranchPred: [tid:%i]: BTB doesn't have a "
++ "valid entry.\n",tid);
+ pred_taken = false;
+ }
+
+ }
+ }
+
+ if (pred_taken) {
+ // Set the PC and the instruction's predicted target.
+ PC = target;
+ inst->setPredTarg(target);
+ } else {
+ PC = PC + sizeof(MachInst);
+ inst->setPredTarg(PC);
+ }
+
- assert(!predHist.empty());
++ predHist[tid].push_front(predict_record);
+
- TwobitBPredUnit<Impl>::update(const InstSeqNum &done_sn)
++ DPRINTF(Fetch, "[tid:%i] predHist.size(): %i\n", tid, predHist[tid].size());
+
+ return pred_taken;
+}
+
+template <class Impl>
+void
- DPRINTF(Fetch, "BranchPred: Commiting branches until sequence number "
- "%i.\n", done_sn);
-
- while (!predHist.empty() && predHist.back().seqNum <= done_sn) {
- assert(!predHist.empty());
++TwobitBPredUnit<Impl>::update(const InstSeqNum &done_sn, unsigned tid)
+{
- // Update the branch predictor with the correct results of branches.
- BP.update(predHist.back().PC, predHist.back().predTaken);
++ DPRINTF(Fetch, "BranchPred: [tid:%i]: Commiting branches until sequence"
++ "number %lli.\n", tid, done_sn);
+
- predHist.pop_back();
++ while (!predHist[tid].empty() &&
++ predHist[tid].back().seqNum <= done_sn) {
++ // Update the branch predictor with the correct results.
++ BP.update(predHist[tid].back().PC,
++ predHist[tid].back().predTaken);
+
- TwobitBPredUnit<Impl>::squash(const InstSeqNum &squashed_sn)
++ predHist[tid].pop_back();
+ }
+}
+
+template <class Impl>
+void
- while (!predHist.empty() && predHist.front().seqNum > squashed_sn) {
- if (predHist.front().usedRAS) {
- DPRINTF(Fetch, "BranchPred: Restoring top of RAS to: %i, "
- "target: %#x.\n",
- predHist.front().RASIndex,
- predHist.front().RASTarget);
++TwobitBPredUnit<Impl>::squash(const InstSeqNum &squashed_sn, unsigned tid)
+{
- RAS.restore(predHist.front().RASIndex,
- predHist.front().RASTarget);
- } else if (predHist.front().wasCall) {
- DPRINTF(Fetch, "BranchPred: Removing speculative entry added "
- "to the RAS.\n");
++ History &pred_hist = predHist[tid];
++
++ while (!pred_hist.empty() &&
++ pred_hist.front().seqNum > squashed_sn) {
++ if (pred_hist.front().usedRAS) {
++ DPRINTF(Fetch, "BranchPred: [tid:%i]: Restoring top of RAS to: %i,"
++ " target: %#x.\n",
++ tid,
++ pred_hist.front().RASIndex,
++ pred_hist.front().RASTarget);
++
++ RAS[tid].restore(pred_hist.front().RASIndex,
++ pred_hist.front().RASTarget);
+
- RAS.pop();
++ } else if (pred_hist.front().wasCall) {
++ DPRINTF(Fetch, "BranchPred: [tid:%i]: Removing speculative entry added "
++ "to the RAS.\n",tid);
+
- predHist.pop_front();
++ RAS[tid].pop();
+ }
+
- const bool actually_taken)
++ pred_hist.pop_front();
+ }
++
+}
+
+template <class Impl>
+void
+TwobitBPredUnit<Impl>::squash(const InstSeqNum &squashed_sn,
+ const Addr &corr_target,
- DPRINTF(Fetch, "BranchPred: Squashing from sequence number %i, "
++ const bool actually_taken,
++ unsigned tid)
+{
+ // Now that we know that a branch was mispredicted, we need to undo
+ // all the branches that have been seen up until this branch and
+ // fix up everything.
+
++ History &pred_hist = predHist[tid];
++
+ ++condIncorrect;
+
- squashed_sn, corr_target);
-
- while (!predHist.empty() && predHist.front().seqNum > squashed_sn) {
++ DPRINTF(Fetch, "BranchPred: [tid:%i]: Squashing from sequence number %i, "
+ "setting target to %#x.\n",
- if (predHist.front().usedRAS) {
- DPRINTF(Fetch, "BranchPred: Restoring top of RAS to: %i, "
++ tid, squashed_sn, corr_target);
+
- predHist.front().RASIndex,
- predHist.front().RASTarget);
++ while (!pred_hist.empty() &&
++ pred_hist.front().seqNum > squashed_sn) {
++ if (pred_hist.front().usedRAS) {
++ DPRINTF(Fetch, "BranchPred: [tid:%i]: Restoring top of RAS to: %i, "
+ "target: %#x.\n",
- RAS.restore(predHist.front().RASIndex,
- predHist.front().RASTarget);
- } else if (predHist.front().wasCall) {
- DPRINTF(Fetch, "BranchPred: Removing speculative entry added "
- "to the RAS.\n");
++ tid,
++ pred_hist.front().RASIndex,
++ pred_hist.front().RASTarget);
+
- RAS.pop();
++ RAS[tid].restore(pred_hist.front().RASIndex,
++ pred_hist.front().RASTarget);
++ } else if (pred_hist.front().wasCall) {
++ DPRINTF(Fetch, "BranchPred: [tid:%i]: Removing speculative entry"
++ " added to the RAS.\n", tid);
+
- predHist.pop_front();
++ RAS[tid].pop();
+ }
+
- predHist.front().predTaken = actually_taken;
++ pred_hist.pop_front();
+ }
+
- if (predHist.front().usedRAS) {
- ++RASIncorrect;
- }
++ // If there's a squash due to a syscall, there may not be an entry
++ // corresponding to the squash. In that case, don't bother trying to
++ // fix up the entry.
++ if (!pred_hist.empty()) {
++ pred_hist.front().predTaken = actually_taken;
+
- BP.update(predHist.front().PC, actually_taken);
++ if (pred_hist.front().usedRAS) {
++ ++RASIncorrect;
++ }
+
- BTB.update(predHist.front().PC, corr_target);
++ BP.update(pred_hist.front().PC, actually_taken);
+
++ BTB.update(pred_hist.front().PC, corr_target, tid);
++ pred_hist.pop_front();
++ }
+}
--- /dev/null
- // @todo Check to make sure num_entries is valid (a power of 2)
-
+/*
+ * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "base/intmath.hh"
+#include "base/trace.hh"
+#include "cpu/o3/btb.hh"
+
+using namespace TheISA;
+
+DefaultBTB::DefaultBTB(unsigned _numEntries,
+ unsigned _tagBits,
+ unsigned _instShiftAmt)
+ : numEntries(_numEntries),
+ tagBits(_tagBits),
+ instShiftAmt(_instShiftAmt)
+{
- btb = new BTBEntry[numEntries];
+ DPRINTF(Fetch, "BTB: Creating BTB object.\n");
+
- for (int i = 0; i < numEntries; ++i)
- {
++ if (!isPowerOf2(numEntries)) {
++ fatal("BTB entries is not a power of 2!");
++ }
++
++ btb.resize(numEntries);
+
- DefaultBTB::valid(const Addr &inst_PC)
++ for (int i = 0; i < numEntries; ++i) {
+ btb[i].valid = false;
+ }
+
+ idxMask = numEntries - 1;
+
+ tagMask = (1 << tagBits) - 1;
+
+ tagShiftAmt = instShiftAmt + floorLog2(numEntries);
+}
+
++void
++DefaultBTB::reset()
++{
++ for (int i = 0; i < numEntries; ++i) {
++ btb[i].valid = false;
++ }
++}
++
+inline
+unsigned
+DefaultBTB::getIndex(const Addr &inst_PC)
+{
+ // Need to shift PC over by the word offset.
+ return (inst_PC >> instShiftAmt) & idxMask;
+}
+
+inline
+Addr
+DefaultBTB::getTag(const Addr &inst_PC)
+{
+ return (inst_PC >> tagShiftAmt) & tagMask;
+}
+
+bool
- if (btb[btb_idx].valid && inst_tag == btb[btb_idx].tag) {
++DefaultBTB::valid(const Addr &inst_PC, unsigned tid)
+{
+ unsigned btb_idx = getIndex(inst_PC);
+
+ Addr inst_tag = getTag(inst_PC);
+
+ assert(btb_idx < numEntries);
+
- DefaultBTB::lookup(const Addr &inst_PC)
++ if (btb[btb_idx].valid
++ && inst_tag == btb[btb_idx].tag
++ && btb[btb_idx].tid == tid) {
+ return true;
+ } else {
+ return false;
+ }
+}
+
+// @todo Create some sort of return struct that has both whether or not the
+// address is valid, and also the address. For now will just use addr = 0 to
+// represent invalid entry.
+Addr
- if (btb[btb_idx].valid && inst_tag == btb[btb_idx].tag) {
++DefaultBTB::lookup(const Addr &inst_PC, unsigned tid)
+{
+ unsigned btb_idx = getIndex(inst_PC);
+
+ Addr inst_tag = getTag(inst_PC);
+
+ assert(btb_idx < numEntries);
+
- DefaultBTB::update(const Addr &inst_PC, const Addr &target)
++ if (btb[btb_idx].valid
++ && inst_tag == btb[btb_idx].tag
++ && btb[btb_idx].tid == tid) {
+ return btb[btb_idx].target;
+ } else {
+ return 0;
+ }
+}
+
+void
++DefaultBTB::update(const Addr &inst_PC, const Addr &target, unsigned tid)
+{
+ unsigned btb_idx = getIndex(inst_PC);
+
+ assert(btb_idx < numEntries);
+
++ btb[btb_idx].tid = tid;
+ btb[btb_idx].valid = true;
+ btb[btb_idx].target = target;
+ btb[btb_idx].tag = getTag(inst_PC);
+}
--- /dev/null
- #ifndef __CPU_O3_CPU_BTB_HH__
- #define __CPU_O3_CPU_BTB_HH__
+/*
+ * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
- Addr lookup(const Addr &inst_PC);
-
- bool valid(const Addr &inst_PC);
-
- void update(const Addr &inst_PC, const Addr &target_PC);
++#ifndef __CPU_O3_BTB_HH__
++#define __CPU_O3_BTB_HH__
+
+// For Addr type.
+#include "arch/isa_traits.hh"
+
+class DefaultBTB
+{
+ private:
+ struct BTBEntry
+ {
+ BTBEntry()
+ : tag(0), target(0), valid(false)
+ {
+ }
+
++ /** The entry's tag. */
+ Addr tag;
++
++ /** The entry's target. */
+ Addr target;
++
++ /** The entry's thread id. */
++ unsigned tid;
++
++ /** Whether or not the entry is valid. */
+ bool valid;
+ };
+
+ public:
++ /** Creates a BTB with the given number of entries, number of bits per
++ * tag, and instruction offset amount.
++ * @param numEntries Number of entries for the BTB.
++ * @param tagBits Number of bits for each tag in the BTB.
++ * @param instShiftAmt Offset amount for instructions to ignore alignment.
++ */
+ DefaultBTB(unsigned numEntries, unsigned tagBits,
+ unsigned instShiftAmt);
+
- BTBEntry *btb;
++ void reset();
++
++ /** Looks up an address in the BTB. Must call valid() first on the address.
++ * @param inst_PC The address of the branch to look up.
++ * @param tid The thread id.
++ * @return Returns the target of the branch.
++ */
++ Addr lookup(const Addr &inst_PC, unsigned tid);
++
++ /** Checks if a branch is in the BTB.
++ * @param inst_PC The address of the branch to look up.
++ * @param tid The thread id.
++ * @return Whether or not the branch exists in the BTB.
++ */
++ bool valid(const Addr &inst_PC, unsigned tid);
++
++ /** Updates the BTB with the target of a branch.
++ * @param inst_PC The address of the branch being updated.
++ * @param target_PC The target address of the branch.
++ * @param tid The thread id.
++ */
++ void update(const Addr &inst_PC, const Addr &target_PC,
++ unsigned tid);
+
+ private:
++ /** Returns the index into the BTB, based on the branch's PC.
++ * @param inst_PC The branch to look up.
++ * @return Returns the index into the BTB.
++ */
+ inline unsigned getIndex(const Addr &inst_PC);
+
++ /** Returns the tag bits of a given address.
++ * @param inst_PC The branch's address.
++ * @return Returns the tag bits.
++ */
+ inline Addr getTag(const Addr &inst_PC);
+
- #endif // __CPU_O3_CPU_BTB_HH__
++ /** The actual BTB. */
++ std::vector<BTBEntry> btb;
+
++ /** The number of entries in the BTB. */
+ unsigned numEntries;
+
++ /** The index mask. */
+ unsigned idxMask;
+
++ /** The number of tag bits per entry. */
+ unsigned tagBits;
+
++ /** The tag mask. */
+ unsigned tagMask;
+
++ /** Number of bits to shift PC when calculating index. */
+ unsigned instShiftAmt;
+
++ /** Number of bits to shift PC when calculating tag. */
+ unsigned tagShiftAmt;
+};
+
++#endif // __CPU_O3_BTB_HH__
--- /dev/null
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
+/*
- #ifndef __CPU_O3_CPU_COMM_HH__
- #define __CPU_O3_CPU_COMM_HH__
++ * Copyright (c) 2004-2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
- // Find better place to put this typedef.
- // The impl might be the best place for this.
++#ifndef __CPU_O3_COMM_HH__
++#define __CPU_O3_COMM_HH__
+
+#include <vector>
+
++#include "arch/faults.hh"
+#include "arch/isa_traits.hh"
+#include "cpu/inst_seq.hh"
+#include "sim/host.hh"
+
- struct SimpleFetchSimpleDecode {
++// Typedef for physical register index type. Although the Impl would be the
++// most likely location for this, there are a few classes that need this
++// typedef yet are not templated on the Impl. For now it will be defined here.
+typedef short int PhysRegIndex;
+
+template<class Impl>
- struct SimpleDecodeSimpleRename {
++struct DefaultFetchDefaultDecode {
+ typedef typename Impl::DynInstPtr DynInstPtr;
+
+ int size;
+
+ DynInstPtr insts[Impl::MaxWidth];
++ Fault fetchFault;
++ InstSeqNum fetchFaultSN;
++ bool clearFetchFault;
+};
+
+template<class Impl>
- struct SimpleRenameSimpleIEW {
++struct DefaultDecodeDefaultRename {
+ typedef typename Impl::DynInstPtr DynInstPtr;
+
+ int size;
+
+ DynInstPtr insts[Impl::MaxWidth];
+};
+
+template<class Impl>
- struct SimpleIEWSimpleCommit {
++struct DefaultRenameDefaultIEW {
+ typedef typename Impl::DynInstPtr DynInstPtr;
+
+ int size;
+
+ DynInstPtr insts[Impl::MaxWidth];
+};
+
+template<class Impl>
- bool squash;
- bool branchMispredict;
- bool branchTaken;
- uint64_t mispredPC;
- uint64_t nextPC;
- InstSeqNum squashedSeqNum;
++struct DefaultIEWDefaultCommit {
+ typedef typename Impl::DynInstPtr DynInstPtr;
+
+ int size;
+
+ DynInstPtr insts[Impl::MaxWidth];
+
- bool stall;
++ bool squash[Impl::MaxThreads];
++ bool branchMispredict[Impl::MaxThreads];
++ bool branchTaken[Impl::MaxThreads];
++ uint64_t mispredPC[Impl::MaxThreads];
++ uint64_t nextPC[Impl::MaxThreads];
++ InstSeqNum squashedSeqNum[Impl::MaxThreads];
++
++ bool includeSquashInst[Impl::MaxThreads];
+};
+
+template<class Impl>
+struct IssueStruct {
+ typedef typename Impl::DynInstPtr DynInstPtr;
+
+ int size;
+
+ DynInstPtr insts[Impl::MaxWidth];
+};
+
++template<class Impl>
+struct TimeBufStruct {
+ struct decodeComm {
+ bool squash;
- // Might want to package this kind of branch stuff into a single
+ bool predIncorrect;
+ uint64_t branchAddr;
+
+ InstSeqNum doneSeqNum;
+
- decodeComm decodeInfo;
++ // @todo: Might want to package this kind of branch stuff into a single
+ // struct as it is used pretty frequently.
+ bool branchMispredict;
+ bool branchTaken;
+ uint64_t mispredPC;
+ uint64_t nextPC;
++
++ unsigned branchCount;
+ };
+
- bool stall;
++ decodeComm decodeInfo[Impl::MaxThreads];
+
+ // Rename can't actually tell anything to squash or send a new PC back
+ // because it doesn't do anything along those lines. But maybe leave
+ // these fields in here to keep the stages mostly orthagonal.
+ struct renameComm {
+ bool squash;
- renameComm renameInfo;
+
+ uint64_t nextPC;
+ };
+
- bool stall;
-
++ renameComm renameInfo[Impl::MaxThreads];
+
+ struct iewComm {
- iewComm iewInfo;
+ // Also eventually include skid buffer space.
++ bool usedIQ;
+ unsigned freeIQEntries;
++ bool usedLSQ;
++ unsigned freeLSQEntries;
++
++ unsigned iqCount;
++ unsigned ldstqCount;
++
++ unsigned dispatched;
++ unsigned dispatchedToLSQ;
+ };
+
- bool squash;
- bool stall;
++ iewComm iewInfo[Impl::MaxThreads];
+
+ struct commitComm {
- bool robSquashing;
-
++ bool usedROB;
+ unsigned freeROBEntries;
++ bool emptyROB;
++
++ bool squash;
++ bool robSquashing;
+
+ bool branchMispredict;
+ bool branchTaken;
+ uint64_t mispredPC;
+ uint64_t nextPC;
+
- // Extra bit of information so that the LDSTQ only updates when it
- // needs to.
- bool commitIsLoad;
+ // Represents the instruction that has either been retired or
+ // squashed. Similar to having a single bus that broadcasts the
+ // retired or squashed sequence number.
+ InstSeqNum doneSeqNum;
+
- commitComm commitInfo;
++ //Just in case we want to do a commit/squash on a cycle
++ //(necessary for multiple ROBs?)
++ bool commitInsts;
++ InstSeqNum squashSeqNum;
+
+ // Communication specifically to the IQ to tell the IQ that it can
+ // schedule a non-speculative instruction.
+ InstSeqNum nonSpecSeqNum;
++
++ // Hack for now to send back an uncached access to the IEW stage.
++ typedef typename Impl::DynInstPtr DynInstPtr;
++ bool uncached;
++ DynInstPtr uncachedLoad;
++
++ bool interruptPending;
++ bool clearInterrupt;
+ };
+
- #endif //__CPU_O3_CPU_COMM_HH__
++ commitComm commitInfo[Impl::MaxThreads];
++
++ bool decodeBlock[Impl::MaxThreads];
++ bool decodeUnblock[Impl::MaxThreads];
++ bool renameBlock[Impl::MaxThreads];
++ bool renameUnblock[Impl::MaxThreads];
++ bool iewBlock[Impl::MaxThreads];
++ bool iewUnblock[Impl::MaxThreads];
++ bool commitBlock[Impl::MaxThreads];
++ bool commitUnblock[Impl::MaxThreads];
+};
+
++#endif //__CPU_O3_COMM_HH__
--- /dev/null
- template class SimpleCommit<AlphaSimpleImpl>;
+/*
+ * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "cpu/o3/alpha_dyn_inst.hh"
+#include "cpu/o3/alpha_impl.hh"
+#include "cpu/o3/commit_impl.hh"
+
++template class DefaultCommit<AlphaSimpleImpl>;
--- /dev/null
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
+/*
- // Todo: Maybe have a special method for handling interrupts/traps.
- //
- // Traps: Have IEW send a signal to commit saying that there's a trap to
- // be handled. Have commit send the PC back to the fetch stage, along
- // with the current commit PC. Fetch will directly access the IPR and save
- // off all the proper stuff. Commit can send out a squash, or something
- // close to it.
- // Do the same for hwrei(). However, requires that commit be specifically
- // built to support that kind of stuff. Probably not horrible to have
- // commit support having the CPU tell it to squash the other stages and
- // restart at a given address. The IPR register does become an issue.
- // Probably not a big deal if the IPR stuff isn't cycle accurate. Can just
- // have the original function handle writing to the IPR register.
-
- #ifndef __CPU_O3_CPU_SIMPLE_COMMIT_HH__
- #define __CPU_O3_CPU_SIMPLE_COMMIT_HH__
++ * Copyright (c) 2004-2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
- class SimpleCommit
++#ifndef __CPU_O3_COMMIT_HH__
++#define __CPU_O3_COMMIT_HH__
+
++#include "arch/faults.hh"
+#include "base/statistics.hh"
+#include "base/timebuf.hh"
++#include "cpu/exetrace.hh"
++#include "cpu/inst_seq.hh"
+#include "mem/memory_interface.hh"
+
++template <class>
++class O3ThreadState;
++
++/**
++ * DefaultCommit handles single threaded and SMT commit. Its width is
++ * specified by the parameters; each cycle it tries to commit that
++ * many instructions. The SMT policy decides which thread it tries to
++ * commit instructions from. Non- speculative instructions must reach
++ * the head of the ROB before they are ready to execute; once they
++ * reach the head, commit will broadcast the instruction's sequence
++ * number to the previous stages so that they can issue/ execute the
++ * instruction. Only one non-speculative instruction is handled per
++ * cycle. Commit is responsible for handling all back-end initiated
++ * redirects. It receives the redirect, and then broadcasts it to all
++ * stages, indicating the sequence number they should squash until,
++ * and any necessary branch misprediction information as well. It
++ * priortizes redirects by instruction's age, only broadcasting a
++ * redirect if it corresponds to an instruction that should currently
++ * be in the ROB. This is done by tracking the sequence number of the
++ * youngest instruction in the ROB, which gets updated to any
++ * squashing instruction's sequence number, and only broadcasting a
++ * redirect if it corresponds to an older instruction. Commit also
++ * supports multiple cycle squashing, to model a ROB that can only
++ * remove a certain number of instructions per cycle.
++ */
+template<class Impl>
- public:
- // I don't believe commit can block, so it will only have two
- // statuses for now.
- // Actually if there's a cache access that needs to block (ie
- // uncachable load or just a mem access in commit) then the stage
- // may have to wait.
- enum Status {
++class DefaultCommit
+{
+ public:
+ // Typedefs from the Impl.
+ typedef typename Impl::FullCPU FullCPU;
+ typedef typename Impl::DynInstPtr DynInstPtr;
+ typedef typename Impl::Params Params;
+ typedef typename Impl::CPUPol CPUPol;
+
++ typedef typename CPUPol::RenameMap RenameMap;
+ typedef typename CPUPol::ROB ROB;
+
+ typedef typename CPUPol::TimeStruct TimeStruct;
++ typedef typename CPUPol::FetchStruct FetchStruct;
+ typedef typename CPUPol::IEWStruct IEWStruct;
+ typedef typename CPUPol::RenameStruct RenameStruct;
+
- DcacheMissStall,
- DcacheMissComplete
++ typedef typename CPUPol::Fetch Fetch;
++ typedef typename CPUPol::IEW IEW;
++
++ typedef O3ThreadState<Impl> Thread;
++
++ class TrapEvent : public Event {
++ private:
++ DefaultCommit<Impl> *commit;
++ unsigned tid;
++
++ public:
++ TrapEvent(DefaultCommit<Impl> *_commit, unsigned _tid);
++
++ void process();
++ const char *description();
++ };
++
++ /** Overall commit status. Used to determine if the CPU can deschedule
++ * itself due to a lack of activity.
++ */
++ enum CommitStatus{
++ Active,
++ Inactive
++ };
++
++ /** Individual thread status. */
++ enum ThreadStatus {
+ Running,
+ Idle,
+ ROBSquashing,
- Status _status;
++ TrapPending,
++ FetchTrapPending
++ };
++
++ /** Commit policy for SMT mode. */
++ enum CommitPolicy {
++ Aggressive,
++ RoundRobin,
++ OldestReady
+ };
+
+ private:
- SimpleCommit(Params ¶ms);
++ /** Overall commit status. */
++ CommitStatus _status;
++ /** Next commit status, to be set at the end of the cycle. */
++ CommitStatus _nextStatus;
++ /** Per-thread status. */
++ ThreadStatus commitStatus[Impl::MaxThreads];
++ /** Commit policy used in SMT mode. */
++ CommitPolicy commitPolicy;
+
+ public:
- uint64_t readCommitPC();
++ /** Construct a DefaultCommit with the given parameters. */
++ DefaultCommit(Params *params);
++
++ /** Returns the name of the DefaultCommit. */
++ std::string name() const;
+
++ /** Registers statistics. */
+ void regStats();
+
++ /** Sets the CPU pointer. */
+ void setCPU(FullCPU *cpu_ptr);
+
++ /** Sets the list of threads. */
++ void setThreads(std::vector<Thread *> &threads);
++
++ /** Sets the main time buffer pointer, used for backwards communication. */
+ void setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr);
+
++ void setFetchQueue(TimeBuffer<FetchStruct> *fq_ptr);
++
++ /** Sets the pointer to the queue coming from rename. */
+ void setRenameQueue(TimeBuffer<RenameStruct> *rq_ptr);
+
++ /** Sets the pointer to the queue coming from IEW. */
+ void setIEWQueue(TimeBuffer<IEWStruct> *iq_ptr);
+
++ void setFetchStage(Fetch *fetch_stage);
++
++ Fetch *fetchStage;
++
++ /** Sets the poitner to the IEW stage. */
++ void setIEWStage(IEW *iew_stage);
++
++ /** The pointer to the IEW stage. Used solely to ensure that
++ * various events (traps, interrupts, syscalls) do not occur until
++ * all stores have written back.
++ */
++ IEW *iewStage;
++
++ /** Sets pointer to list of active threads. */
++ void setActiveThreads(std::list<unsigned> *at_ptr);
++
++ /** Sets pointer to the commited state rename map. */
++ void setRenameMap(RenameMap rm_ptr[Impl::MaxThreads]);
++
++ /** Sets pointer to the ROB. */
+ void setROB(ROB *rob_ptr);
+
++ /** Initializes stage by sending back the number of free entries. */
++ void initStage();
++
++ void switchOut();
++
++ void doSwitchOut();
++
++ void takeOverFrom();
++
++ /** Ticks the commit stage, which tries to commit instructions. */
+ void tick();
+
++ /** Handles any squashes that are sent from IEW, and adds instructions
++ * to the ROB and tries to commit instructions.
++ */
+ void commit();
+
++ /** Returns the number of free ROB entries for a specific thread. */
++ unsigned numROBFreeEntries(unsigned tid);
++
++ void generateXCEvent(unsigned tid);
++
+ private:
++ /** Updates the overall status of commit with the nextStatus, and
++ * tell the CPU if commit is active/inactive. */
++ void updateStatus();
++
++ /** Sets the next status based on threads' statuses, which becomes the
++ * current status at the end of the cycle.
++ */
++ void setNextStatus();
++
++ /** Checks if the ROB is completed with squashing. This is for the case
++ * where the ROB can take multiple cycles to complete squashing.
++ */
++ bool robDoneSquashing();
++
++ /** Returns if any of the threads have the number of ROB entries changed
++ * on this cycle. Used to determine if the number of free ROB entries needs
++ * to be sent back to previous stages.
++ */
++ bool changedROBEntries();
+
++ void squashAll(unsigned tid);
++
++ void squashFromTrap(unsigned tid);
++
++ void squashFromXC(unsigned tid);
++
++ /** Commits as many instructions as possible. */
+ void commitInsts();
+
++ /** Tries to commit the head ROB instruction passed in.
++ * @param head_inst The instruction to be committed.
++ */
+ bool commitHead(DynInstPtr &head_inst, unsigned inst_num);
+
++ void generateTrapEvent(unsigned tid);
++
++ /** Gets instructions from rename and inserts them into the ROB. */
+ void getInsts();
+
++ /** Marks completed instructions using information sent from IEW. */
+ void markCompletedInsts();
+
++ /** Gets the thread to commit, based on the SMT policy. */
++ int getCommittingThread();
++
++ /** Returns the thread ID to use based on a round robin policy. */
++ int roundRobin();
++
++ /** Returns the thread ID to use based on an oldest instruction policy. */
++ int oldestReady();
++
+ public:
- void setSquashing() { _status = ROBSquashing; }
++ /** Returns the PC of the head instruction of the ROB.
++ * @todo: Probably remove this function as it returns only thread 0.
++ */
++ uint64_t readPC() { return PC[0]; }
++
++ uint64_t readPC(unsigned tid) { return PC[tid]; }
++
++ void setPC(uint64_t val, unsigned tid) { PC[tid] = val; }
+
- private:
++ uint64_t readNextPC(unsigned tid) { return nextPC[tid]; }
++
++ void setNextPC(uint64_t val, unsigned tid) { nextPC[tid] = val; }
+
+ private:
+ /** Time buffer interface. */
+ TimeBuffer<TimeStruct> *timeBuffer;
+
+ /** Wire to write information heading to previous stages. */
+ typename TimeBuffer<TimeStruct>::wire toIEW;
+
+ /** Wire to read information from IEW (for ROB). */
+ typename TimeBuffer<TimeStruct>::wire robInfoFromIEW;
+
++ TimeBuffer<FetchStruct> *fetchQueue;
++
++ typename TimeBuffer<FetchStruct>::wire fromFetch;
++
+ /** IEW instruction queue interface. */
+ TimeBuffer<IEWStruct> *iewQueue;
+
+ /** Wire to read information from IEW queue. */
+ typename TimeBuffer<IEWStruct>::wire fromIEW;
+
+ /** Rename instruction queue interface, for ROB. */
+ TimeBuffer<RenameStruct> *renameQueue;
+
+ /** Wire to read information from rename queue. */
+ typename TimeBuffer<RenameStruct>::wire fromRename;
+
++ public:
+ /** ROB interface. */
+ ROB *rob;
+
++ private:
+ /** Pointer to FullCPU. */
+ FullCPU *cpu;
+
+ /** Memory interface. Used for d-cache accesses. */
+ MemInterface *dcacheInterface;
+
- Stats::Scalar<> commitCommittedBranches;
- Stats::Scalar<> commitCommittedLoads;
- Stats::Scalar<> commitCommittedMemRefs;
++ std::vector<Thread *> thread;
++
++ Fault fetchFault;
++
++ int fetchTrapWait;
++
++ /** Records that commit has written to the time buffer this cycle. Used for
++ * the CPU to determine if it can deschedule itself if there is no activity.
++ */
++ bool wroteToTimeBuffer;
++
++ /** Records if the number of ROB entries has changed this cycle. If it has,
++ * then the number of free entries must be re-broadcast.
++ */
++ bool changedROBNumEntries[Impl::MaxThreads];
++
++ /** A counter of how many threads are currently squashing. */
++ int squashCounter;
++
++ /** Records if a thread has to squash this cycle due to a trap. */
++ bool trapSquash[Impl::MaxThreads];
++
++ /** Records if a thread has to squash this cycle due to an XC write. */
++ bool xcSquash[Impl::MaxThreads];
++
++ /** Priority List used for Commit Policy */
++ std::list<unsigned> priority_list;
++
+ /** IEW to Commit delay, in ticks. */
+ unsigned iewToCommitDelay;
+
++ /** Commit to IEW delay, in ticks. */
++ unsigned commitToIEWDelay;
++
+ /** Rename to ROB delay, in ticks. */
+ unsigned renameToROBDelay;
+
++ unsigned fetchToCommitDelay;
++
+ /** Rename width, in instructions. Used so ROB knows how many
+ * instructions to get from the rename instruction queue.
+ */
+ unsigned renameWidth;
+
+ /** IEW width, in instructions. Used so ROB knows how many
+ * instructions to get from the IEW instruction queue.
+ */
+ unsigned iewWidth;
+
+ /** Commit width, in instructions. */
+ unsigned commitWidth;
+
++ /** Number of Reorder Buffers */
++ unsigned numRobs;
++
++ /** Number of Active Threads */
++ unsigned numThreads;
++
++ bool switchPending;
++ bool switchedOut;
++
++ Tick trapLatency;
++
++ Tick fetchTrapLatency;
++
++ Tick fetchFaultTick;
++
++ Addr PC[Impl::MaxThreads];
++
++ Addr nextPC[Impl::MaxThreads];
++
++ /** The sequence number of the youngest valid instruction in the ROB. */
++ InstSeqNum youngestSeqNum[Impl::MaxThreads];
++
++ /** Pointer to the list of active threads. */
++ std::list<unsigned> *activeThreads;
++
++ /** Rename map interface. */
++ RenameMap *renameMap[Impl::MaxThreads];
++
++ void updateComInstStats(DynInstPtr &inst);
++
++ /** Stat for the total number of committed instructions. */
+ Stats::Scalar<> commitCommittedInsts;
++ /** Stat for the total number of squashed instructions discarded by commit.
++ */
+ Stats::Scalar<> commitSquashedInsts;
++ /** Stat for the total number of times commit is told to squash.
++ * @todo: Actually increment this stat.
++ */
+ Stats::Scalar<> commitSquashEvents;
++ /** Stat for the total number of times commit has had to stall due to a non-
++ * speculative instruction reaching the head of the ROB.
++ */
+ Stats::Scalar<> commitNonSpecStalls;
-
- Stats::Distribution<> n_committed_dist;
++ /** Stat for the total number of branch mispredicts that caused a squash. */
+ Stats::Scalar<> branchMispredicts;
- #endif // __CPU_O3_CPU_SIMPLE_COMMIT_HH__
++ /** Distribution of the number of committed instructions each cycle. */
++ Stats::Distribution<> numCommittedDist;
++
++ /** Total number of instructions committed. */
++ Stats::Vector<> statComInst;
++ /** Total number of software prefetches committed. */
++ Stats::Vector<> statComSwp;
++ /** Stat for the total number of committed memory references. */
++ Stats::Vector<> statComRefs;
++ /** Stat for the total number of committed loads. */
++ Stats::Vector<> statComLoads;
++ /** Total number of committed memory barriers. */
++ Stats::Vector<> statComMembars;
++ /** Total number of committed branches. */
++ Stats::Vector<> statComBranches;
++
++ Stats::Scalar<> commitEligibleSamples;
++ Stats::Vector<> commitEligible;
+};
+
++#endif // __CPU_O3_COMMIT_HH__
--- /dev/null
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
+/*
- #include "cpu/o3/commit.hh"
++ * Copyright (c) 2004-2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
++#include <algorithm>
++#include <string>
++
++#include "base/loader/symtab.hh"
+#include "base/timebuf.hh"
- SimpleCommit<Impl>::SimpleCommit(Params ¶ms)
- : dcacheInterface(params.dcacheInterface),
- iewToCommitDelay(params.iewToCommitDelay),
- renameToROBDelay(params.renameToROBDelay),
- renameWidth(params.renameWidth),
- iewWidth(params.executeWidth),
- commitWidth(params.commitWidth)
++#include "cpu/checker/cpu.hh"
+#include "cpu/exetrace.hh"
++#include "cpu/o3/commit.hh"
++#include "cpu/o3/thread_state.hh"
++
++using namespace std;
+
+template <class Impl>
- _status = Idle;
++DefaultCommit<Impl>::TrapEvent::TrapEvent(DefaultCommit<Impl> *_commit,
++ unsigned _tid)
++ : Event(&mainEventQueue, CPU_Tick_Pri), commit(_commit), tid(_tid)
+{
- SimpleCommit<Impl>::regStats()
++ this->setFlags(Event::AutoDelete);
+}
+
+template <class Impl>
+void
- commitCommittedBranches
- .name(name() + ".commitCommittedBranches")
- .desc("The number of committed branches")
- .prereq(commitCommittedBranches);
- commitCommittedLoads
- .name(name() + ".commitCommittedLoads")
- .desc("The number of committed loads")
- .prereq(commitCommittedLoads);
- commitCommittedMemRefs
- .name(name() + ".commitCommittedMemRefs")
- .desc("The number of committed memory references")
- .prereq(commitCommittedMemRefs);
++DefaultCommit<Impl>::TrapEvent::process()
++{
++ // This will get reset by commit if it was switched out at the
++ // time of this event processing.
++ commit->trapSquash[tid] = true;
++}
++
++template <class Impl>
++const char *
++DefaultCommit<Impl>::TrapEvent::description()
++{
++ return "Trap event";
++}
++
++template <class Impl>
++DefaultCommit<Impl>::DefaultCommit(Params *params)
++ : dcacheInterface(params->dcacheInterface),
++ squashCounter(0),
++ iewToCommitDelay(params->iewToCommitDelay),
++ commitToIEWDelay(params->commitToIEWDelay),
++ renameToROBDelay(params->renameToROBDelay),
++ fetchToCommitDelay(params->commitToFetchDelay),
++ renameWidth(params->renameWidth),
++ iewWidth(params->executeWidth),
++ commitWidth(params->commitWidth),
++ numThreads(params->numberOfThreads),
++ switchedOut(false),
++ trapLatency(params->trapLatency),
++ fetchTrapLatency(params->fetchTrapLatency)
+{
++ _status = Active;
++ _nextStatus = Inactive;
++ string policy = params->smtCommitPolicy;
++
++ //Convert string to lowercase
++ std::transform(policy.begin(), policy.end(), policy.begin(),
++ (int(*)(int)) tolower);
++
++ //Assign commit policy
++ if (policy == "aggressive"){
++ commitPolicy = Aggressive;
++
++ DPRINTF(Commit,"Commit Policy set to Aggressive.");
++ } else if (policy == "roundrobin"){
++ commitPolicy = RoundRobin;
++
++ //Set-Up Priority List
++ for (int tid=0; tid < numThreads; tid++) {
++ priority_list.push_back(tid);
++ }
++
++ DPRINTF(Commit,"Commit Policy set to Round Robin.");
++ } else if (policy == "oldestready"){
++ commitPolicy = OldestReady;
++
++ DPRINTF(Commit,"Commit Policy set to Oldest Ready.");
++ } else {
++ assert(0 && "Invalid SMT Commit Policy. Options Are: {Aggressive,"
++ "RoundRobin,OldestReady}");
++ }
++
++ for (int i=0; i < numThreads; i++) {
++ commitStatus[i] = Idle;
++ changedROBNumEntries[i] = false;
++ trapSquash[i] = false;
++ xcSquash[i] = false;
++ }
++
++ fetchFaultTick = 0;
++ fetchTrapWait = 0;
++}
++
++template <class Impl>
++std::string
++DefaultCommit<Impl>::name() const
++{
++ return cpu->name() + ".commit";
++}
++
++template <class Impl>
++void
++DefaultCommit<Impl>::regStats()
++{
++ using namespace Stats;
+ commitCommittedInsts
+ .name(name() + ".commitCommittedInsts")
+ .desc("The number of committed instructions")
+ .prereq(commitCommittedInsts);
+ commitSquashedInsts
+ .name(name() + ".commitSquashedInsts")
+ .desc("The number of squashed insts skipped by commit")
+ .prereq(commitSquashedInsts);
+ commitSquashEvents
+ .name(name() + ".commitSquashEvents")
+ .desc("The number of times commit is told to squash")
+ .prereq(commitSquashEvents);
+ commitNonSpecStalls
+ .name(name() + ".commitNonSpecStalls")
+ .desc("The number of times commit has been forced to stall to "
+ "communicate backwards")
+ .prereq(commitNonSpecStalls);
- n_committed_dist
+ branchMispredicts
+ .name(name() + ".branchMispredicts")
+ .desc("The number of times a branch was mispredicted")
+ .prereq(branchMispredicts);
- SimpleCommit<Impl>::setCPU(FullCPU *cpu_ptr)
++ numCommittedDist
+ .init(0,commitWidth,1)
+ .name(name() + ".COM:committed_per_cycle")
+ .desc("Number of insts commited each cycle")
+ .flags(Stats::pdf)
+ ;
++
++ statComInst
++ .init(cpu->number_of_threads)
++ .name(name() + ".COM:count")
++ .desc("Number of instructions committed")
++ .flags(total)
++ ;
++
++ statComSwp
++ .init(cpu->number_of_threads)
++ .name(name() + ".COM:swp_count")
++ .desc("Number of s/w prefetches committed")
++ .flags(total)
++ ;
++
++ statComRefs
++ .init(cpu->number_of_threads)
++ .name(name() + ".COM:refs")
++ .desc("Number of memory references committed")
++ .flags(total)
++ ;
++
++ statComLoads
++ .init(cpu->number_of_threads)
++ .name(name() + ".COM:loads")
++ .desc("Number of loads committed")
++ .flags(total)
++ ;
++
++ statComMembars
++ .init(cpu->number_of_threads)
++ .name(name() + ".COM:membars")
++ .desc("Number of memory barriers committed")
++ .flags(total)
++ ;
++
++ statComBranches
++ .init(cpu->number_of_threads)
++ .name(name() + ".COM:branches")
++ .desc("Number of branches committed")
++ .flags(total)
++ ;
++
++ //
++ // Commit-Eligible instructions...
++ //
++ // -> The number of instructions eligible to commit in those
++ // cycles where we reached our commit BW limit (less the number
++ // actually committed)
++ //
++ // -> The average value is computed over ALL CYCLES... not just
++ // the BW limited cycles
++ //
++ // -> The standard deviation is computed only over cycles where
++ // we reached the BW limit
++ //
++ commitEligible
++ .init(cpu->number_of_threads)
++ .name(name() + ".COM:bw_limited")
++ .desc("number of insts not committed due to BW limits")
++ .flags(total)
++ ;
++
++ commitEligibleSamples
++ .name(name() + ".COM:bw_lim_events")
++ .desc("number cycles where commit BW limit reached")
++ ;
+}
+
+template <class Impl>
+void
- SimpleCommit<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr)
++DefaultCommit<Impl>::setCPU(FullCPU *cpu_ptr)
+{
+ DPRINTF(Commit, "Commit: Setting CPU pointer.\n");
+ cpu = cpu_ptr;
++
++ // Commit must broadcast the number of free entries it has at the start of
++ // the simulation, so it starts as active.
++ cpu->activateStage(FullCPU::CommitIdx);
++
++ trapLatency = cpu->cycles(trapLatency);
++ fetchTrapLatency = cpu->cycles(fetchTrapLatency);
++}
++
++template <class Impl>
++void
++DefaultCommit<Impl>::setThreads(vector<Thread *> &threads)
++{
++ thread = threads;
+}
+
+template <class Impl>
+void
- SimpleCommit<Impl>::setRenameQueue(TimeBuffer<RenameStruct> *rq_ptr)
++DefaultCommit<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr)
+{
+ DPRINTF(Commit, "Commit: Setting time buffer pointer.\n");
+ timeBuffer = tb_ptr;
+
+ // Setup wire to send information back to IEW.
+ toIEW = timeBuffer->getWire(0);
+
+ // Setup wire to read data from IEW (for the ROB).
+ robInfoFromIEW = timeBuffer->getWire(-iewToCommitDelay);
+}
+
+template <class Impl>
+void
- SimpleCommit<Impl>::setIEWQueue(TimeBuffer<IEWStruct> *iq_ptr)
++DefaultCommit<Impl>::setFetchQueue(TimeBuffer<FetchStruct> *fq_ptr)
++{
++ DPRINTF(Commit, "Commit: Setting fetch queue pointer.\n");
++ fetchQueue = fq_ptr;
++
++ // Setup wire to get instructions from rename (for the ROB).
++ fromFetch = fetchQueue->getWire(-fetchToCommitDelay);
++}
++
++template <class Impl>
++void
++DefaultCommit<Impl>::setRenameQueue(TimeBuffer<RenameStruct> *rq_ptr)
+{
+ DPRINTF(Commit, "Commit: Setting rename queue pointer.\n");
+ renameQueue = rq_ptr;
+
+ // Setup wire to get instructions from rename (for the ROB).
+ fromRename = renameQueue->getWire(-renameToROBDelay);
+}
+
+template <class Impl>
+void
- SimpleCommit<Impl>::setROB(ROB *rob_ptr)
++DefaultCommit<Impl>::setIEWQueue(TimeBuffer<IEWStruct> *iq_ptr)
+{
+ DPRINTF(Commit, "Commit: Setting IEW queue pointer.\n");
+ iewQueue = iq_ptr;
+
+ // Setup wire to get instructions from IEW.
+ fromIEW = iewQueue->getWire(-iewToCommitDelay);
+}
+
+template <class Impl>
+void
- SimpleCommit<Impl>::tick()
- {
- // If the ROB is currently in its squash sequence, then continue
- // to squash. In this case, commit does not do anything. Otherwise
- // run commit.
- if (_status == ROBSquashing) {
- if (rob->isDoneSquashing()) {
- _status = Running;
- } else {
- rob->doSquash();
-
- // Send back sequence number of tail of ROB, so other stages
- // can squash younger instructions. Note that really the only
- // stage that this is important for is the IEW stage; other
- // stages can just clear all their state as long as selective
- // replay isn't used.
- toIEW->commitInfo.doneSeqNum = rob->readTailSeqNum();
- toIEW->commitInfo.robSquashing = true;
++DefaultCommit<Impl>::setFetchStage(Fetch *fetch_stage)
++{
++ fetchStage = fetch_stage;
++}
++
++template <class Impl>
++void
++DefaultCommit<Impl>::setIEWStage(IEW *iew_stage)
++{
++ iewStage = iew_stage;
++}
++
++template<class Impl>
++void
++DefaultCommit<Impl>::setActiveThreads(list<unsigned> *at_ptr)
++{
++ DPRINTF(Commit, "Commit: Setting active threads list pointer.\n");
++ activeThreads = at_ptr;
++}
++
++template <class Impl>
++void
++DefaultCommit<Impl>::setRenameMap(RenameMap rm_ptr[])
++{
++ DPRINTF(Commit, "Setting rename map pointers.\n");
++
++ for (int i=0; i < numThreads; i++) {
++ renameMap[i] = &rm_ptr[i];
++ }
++}
++
++template <class Impl>
++void
++DefaultCommit<Impl>::setROB(ROB *rob_ptr)
+{
+ DPRINTF(Commit, "Commit: Setting ROB pointer.\n");
+ rob = rob_ptr;
+}
+
+template <class Impl>
+void
- } else {
- commit();
++DefaultCommit<Impl>::initStage()
++{
++ rob->setActiveThreads(activeThreads);
++ rob->resetEntries();
++
++ // Broadcast the number of free entries.
++ for (int i=0; i < numThreads; i++) {
++ toIEW->commitInfo[i].usedROB = true;
++ toIEW->commitInfo[i].freeROBEntries = rob->numFreeEntries(i);
++ }
++
++ cpu->activityThisCycle();
++}
++
++template <class Impl>
++void
++DefaultCommit<Impl>::switchOut()
++{
++ switchPending = true;
++}
++
++template <class Impl>
++void
++DefaultCommit<Impl>::doSwitchOut()
++{
++ switchedOut = true;
++ switchPending = false;
++ rob->switchOut();
++}
++
++template <class Impl>
++void
++DefaultCommit<Impl>::takeOverFrom()
++{
++ switchedOut = false;
++ _status = Active;
++ _nextStatus = Inactive;
++ for (int i=0; i < numThreads; i++) {
++ commitStatus[i] = Idle;
++ changedROBNumEntries[i] = false;
++ trapSquash[i] = false;
++ xcSquash[i] = false;
++ }
++ squashCounter = 0;
++ rob->takeOverFrom();
++}
++
++template <class Impl>
++void
++DefaultCommit<Impl>::updateStatus()
++{
++ // reset ROB changed variable
++ list<unsigned>::iterator threads = (*activeThreads).begin();
++ while (threads != (*activeThreads).end()) {
++ unsigned tid = *threads++;
++ changedROBNumEntries[tid] = false;
++
++ // Also check if any of the threads has a trap pending
++ if (commitStatus[tid] == TrapPending ||
++ commitStatus[tid] == FetchTrapPending) {
++ _nextStatus = Active;
+ }
- // Writeback number of free ROB entries here.
- DPRINTF(Commit, "Commit: ROB has %d free entries.\n",
- rob->numFreeEntries());
- toIEW->commitInfo.freeROBEntries = rob->numFreeEntries();
+ }
+
++ if (_nextStatus == Inactive && _status == Active) {
++ DPRINTF(Activity, "Deactivating stage.\n");
++ cpu->deactivateStage(FullCPU::CommitIdx);
++ } else if (_nextStatus == Active && _status == Inactive) {
++ DPRINTF(Activity, "Activating stage.\n");
++ cpu->activateStage(FullCPU::CommitIdx);
++ }
++
++ _status = _nextStatus;
++}
++
++template <class Impl>
++void
++DefaultCommit<Impl>::setNextStatus()
++{
++ int squashes = 0;
++
++ list<unsigned>::iterator threads = (*activeThreads).begin();
++
++ while (threads != (*activeThreads).end()) {
++ unsigned tid = *threads++;
++
++ if (commitStatus[tid] == ROBSquashing) {
++ squashes++;
++ }
++ }
++
++ assert(squashes == squashCounter);
++
++ // If commit is currently squashing, then it will have activity for the
++ // next cycle. Set its next status as active.
++ if (squashCounter) {
++ _nextStatus = Active;
++ }
++}
++
++template <class Impl>
++bool
++DefaultCommit<Impl>::changedROBEntries()
++{
++ list<unsigned>::iterator threads = (*activeThreads).begin();
++
++ while (threads != (*activeThreads).end()) {
++ unsigned tid = *threads++;
++
++ if (changedROBNumEntries[tid]) {
++ return true;
++ }
++ }
++
++ return false;
++}
++
++template <class Impl>
++unsigned
++DefaultCommit<Impl>::numROBFreeEntries(unsigned tid)
++{
++ return rob->numFreeEntries(tid);
++}
++
++template <class Impl>
++void
++DefaultCommit<Impl>::generateTrapEvent(unsigned tid)
++{
++ DPRINTF(Commit, "Generating trap event for [tid:%i]\n", tid);
++
++ TrapEvent *trap = new TrapEvent(this, tid);
++
++ trap->schedule(curTick + trapLatency);
++
++ thread[tid]->trapPending = true;
++}
++
++template <class Impl>
++void
++DefaultCommit<Impl>::generateXCEvent(unsigned tid)
++{
++ DPRINTF(Commit, "Generating XC squash event for [tid:%i]\n", tid);
++
++ xcSquash[tid] = true;
++}
++
++template <class Impl>
++void
++DefaultCommit<Impl>::squashAll(unsigned tid)
++{
++ // If we want to include the squashing instruction in the squash,
++ // then use one older sequence number.
++ // Hopefully this doesn't mess things up. Basically I want to squash
++ // all instructions of this thread.
++ InstSeqNum squashed_inst = rob->isEmpty() ?
++ 0 : rob->readHeadInst(tid)->seqNum - 1;;
++
++ // All younger instructions will be squashed. Set the sequence
++ // number as the youngest instruction in the ROB (0 in this case.
++ // Hopefully nothing breaks.)
++ youngestSeqNum[tid] = 0;
++
++ rob->squash(squashed_inst, tid);
++ changedROBNumEntries[tid] = true;
++
++ // Send back the sequence number of the squashed instruction.
++ toIEW->commitInfo[tid].doneSeqNum = squashed_inst;
++
++ // Send back the squash signal to tell stages that they should
++ // squash.
++ toIEW->commitInfo[tid].squash = true;
++
++ // Send back the rob squashing signal so other stages know that
++ // the ROB is in the process of squashing.
++ toIEW->commitInfo[tid].robSquashing = true;
++
++ toIEW->commitInfo[tid].branchMispredict = false;
++
++ toIEW->commitInfo[tid].nextPC = PC[tid];
++}
++
++template <class Impl>
++void
++DefaultCommit<Impl>::squashFromTrap(unsigned tid)
++{
++ squashAll(tid);
++
++ DPRINTF(Commit, "Squashing from trap, restarting at PC %#x\n", PC[tid]);
++
++ thread[tid]->trapPending = false;
++ thread[tid]->inSyscall = false;
++
++ trapSquash[tid] = false;
++
++ commitStatus[tid] = ROBSquashing;
++ cpu->activityThisCycle();
++
++ ++squashCounter;
++}
++
++template <class Impl>
++void
++DefaultCommit<Impl>::squashFromXC(unsigned tid)
++{
++ squashAll(tid);
++
++ DPRINTF(Commit, "Squashing from XC, restarting at PC %#x\n", PC[tid]);
++
++ thread[tid]->inSyscall = false;
++ assert(!thread[tid]->trapPending);
++
++ commitStatus[tid] = ROBSquashing;
++ cpu->activityThisCycle();
++
++ xcSquash[tid] = false;
++
++ ++squashCounter;
++}
++
++template <class Impl>
++void
++DefaultCommit<Impl>::tick()
++{
++ wroteToTimeBuffer = false;
++ _nextStatus = Inactive;
++
++ if (switchPending && rob->isEmpty() && !iewStage->hasStoresToWB()) {
++ cpu->signalSwitched();
++ return;
++ }
++
++ list<unsigned>::iterator threads = (*activeThreads).begin();
++
++ // Check if any of the threads are done squashing. Change the
++ // status if they are done.
++ while (threads != (*activeThreads).end()) {
++ unsigned tid = *threads++;
++
++ if (commitStatus[tid] == ROBSquashing) {
++
++ if (rob->isDoneSquashing(tid)) {
++ commitStatus[tid] = Running;
++ --squashCounter;
++ } else {
++ DPRINTF(Commit,"[tid:%u]: Still Squashing, cannot commit any"
++ "insts this cycle.\n", tid);
++ }
++ }
++ }
++
++ commit();
++
+ markCompletedInsts();
+
- SimpleCommit<Impl>::commit()
++ threads = (*activeThreads).begin();
++
++ while (threads != (*activeThreads).end()) {
++ unsigned tid = *threads++;
++
++ if (!rob->isEmpty(tid) && rob->readHeadInst(tid)->readyToCommit()) {
++ // The ROB has more instructions it can commit. Its next status
++ // will be active.
++ _nextStatus = Active;
++
++ DynInstPtr inst = rob->readHeadInst(tid);
++
++ DPRINTF(Commit,"[tid:%i]: Instruction [sn:%lli] PC %#x is head of"
++ " ROB and ready to commit\n",
++ tid, inst->seqNum, inst->readPC());
++
++ } else if (!rob->isEmpty(tid)) {
++ DynInstPtr inst = rob->readHeadInst(tid);
++
++ DPRINTF(Commit,"[tid:%i]: Can't commit, Instruction [sn:%lli] PC "
++ "%#x is head of ROB and not ready\n",
++ tid, inst->seqNum, inst->readPC());
++ }
++
++ DPRINTF(Commit, "[tid:%i]: ROB has %d insts & %d free entries.\n",
++ tid, rob->countInsts(tid), rob->numFreeEntries(tid));
++ }
++
++
++ if (wroteToTimeBuffer) {
++ DPRINTF(Activity, "Activity This Cycle.\n");
++ cpu->activityThisCycle();
++ }
++
++ updateStatus();
+}
+
+template <class Impl>
+void
- // Process interrupts if interrupts are enabled and not in PAL mode.
- // Take the PC from commit and write it to the IPR, then squash. The
- // interrupt completing will take care of restoring the PC from that value
- // in the IPR. Look at IPR[EXC_ADDR];
- // hwrei() is what resets the PC to the place where instruction execution
- // beings again.
++DefaultCommit<Impl>::commit()
+{
++
+ //////////////////////////////////////
+ // Check for interrupts
+ //////////////////////////////////////
+
- if (//checkInterrupts &&
+#if FULL_SYSTEM
- !cpu->inPalMode(readCommitPC())) {
- // Will need to squash all instructions currently in flight and have
- // the interrupt handler restart at the last non-committed inst.
- // Most of that can be handled through the trap() function. The
- // processInterrupts() function really just checks for interrupts
- // and then calls trap() if there is an interrupt present.
++ // Process interrupts if interrupts are enabled, not in PAL mode,
++ // and no other traps or external squashes are currently pending.
++ // @todo: Allow other threads to handle interrupts.
++ if (cpu->checkInterrupts &&
+ cpu->check_interrupts() &&
- // CPU will handle implementation of the interrupt.
- cpu->processInterrupts();
++ !cpu->inPalMode(readPC()) &&
++ !trapSquash[0] &&
++ !xcSquash[0]) {
++ // Tell fetch that there is an interrupt pending. This will
++ // make fetch wait until it sees a non PAL-mode PC, at which
++ // point it stops fetching instructions.
++ toIEW->commitInfo[0].interruptPending = true;
++
++ // Wait until the ROB is empty and all stores have drained in
++ // order to enter the interrupt.
++ if (rob->isEmpty() && !iewStage->hasStoresToWB()) {
++ // Not sure which thread should be the one to interrupt. For now
++ // always do thread 0.
++ assert(!thread[0]->inSyscall);
++ thread[0]->inSyscall = true;
++
++ // CPU will handle implementation of the interrupt.
++ cpu->processInterrupts();
++
++ // Now squash or record that I need to squash this cycle.
++ commitStatus[0] = TrapPending;
++
++ // Exit state update mode to avoid accidental updating.
++ thread[0]->inSyscall = false;
++
++ // Generate trap squash event.
++ generateTrapEvent(0);
+
- // Check for squash signal, handle that first
++ toIEW->commitInfo[0].clearInterrupt = true;
++
++ DPRINTF(Commit, "Interrupt detected.\n");
++ } else {
++ DPRINTF(Commit, "Interrupt pending, waiting for ROB to empty.\n");
++ }
+ }
+#endif // FULL_SYSTEM
+
+ ////////////////////////////////////
- // Want to mainly check if the IEW stage is telling the ROB to squash.
- // Should I also check if the commit stage is telling the ROB to squah?
- // This might be necessary to keep the same timing between the IQ and
- // the ROB...
- if (fromIEW->squash) {
- DPRINTF(Commit, "Commit: Squashing instructions in the ROB.\n");
++ // Check for any possible squashes, handle them first
+ ////////////////////////////////////
+
- _status = ROBSquashing;
++ list<unsigned>::iterator threads = (*activeThreads).begin();
++
++ while (threads != (*activeThreads).end()) {
++ unsigned tid = *threads++;
++
++ if (fromFetch->fetchFault && commitStatus[0] != TrapPending) {
++ // Record the fault. Wait until it's empty in the ROB.
++ // Then handle the trap. Ignore it if there's already a
++ // trap pending as fetch will be redirected.
++ fetchFault = fromFetch->fetchFault;
++ fetchFaultTick = curTick + fetchTrapLatency;
++ commitStatus[0] = FetchTrapPending;
++ DPRINTF(Commit, "Fault from fetch recorded. Will trap if the "
++ "ROB empties without squashing the fault.\n");
++ fetchTrapWait = 0;
++ }
++
++ // Fetch may tell commit to clear the trap if it's been squashed.
++ if (fromFetch->clearFetchFault) {
++ DPRINTF(Commit, "Received clear fetch fault signal\n");
++ fetchTrapWait = 0;
++ if (commitStatus[0] == FetchTrapPending) {
++ DPRINTF(Commit, "Clearing fault from fetch\n");
++ commitStatus[0] = Running;
++ }
++ }
++
++ // Not sure which one takes priority. I think if we have
++ // both, that's a bad sign.
++ if (trapSquash[tid] == true) {
++ assert(!xcSquash[tid]);
++ squashFromTrap(tid);
++ } else if (xcSquash[tid] == true) {
++ squashFromXC(tid);
++ }
++
++ // Squashed sequence number must be older than youngest valid
++ // instruction in the ROB. This prevents squashes from younger
++ // instructions overriding squashes from older instructions.
++ if (fromIEW->squash[tid] &&
++ commitStatus[tid] != TrapPending &&
++ fromIEW->squashedSeqNum[tid] <= youngestSeqNum[tid]) {
++
++ DPRINTF(Commit, "[tid:%i]: Squashing due to PC %#x [sn:%i]\n",
++ tid,
++ fromIEW->mispredPC[tid],
++ fromIEW->squashedSeqNum[tid]);
+
- InstSeqNum squashed_inst = fromIEW->squashedSeqNum;
++ DPRINTF(Commit, "[tid:%i]: Redirecting to PC %#x\n",
++ tid,
++ fromIEW->nextPC[tid]);
+
- rob->squash(squashed_inst);
++ commitStatus[tid] = ROBSquashing;
+
- // Send back the sequence number of the squashed instruction.
- toIEW->commitInfo.doneSeqNum = squashed_inst;
++ ++squashCounter;
+
- // Send back the squash signal to tell stages that they should squash.
- toIEW->commitInfo.squash = true;
++ // If we want to include the squashing instruction in the squash,
++ // then use one older sequence number.
++ InstSeqNum squashed_inst = fromIEW->squashedSeqNum[tid];
+
- // Send back the rob squashing signal so other stages know that the
- // ROB is in the process of squashing.
- toIEW->commitInfo.robSquashing = true;
++ if (fromIEW->includeSquashInst[tid] == true)
++ squashed_inst--;
+
- toIEW->commitInfo.branchMispredict = fromIEW->branchMispredict;
++ // All younger instructions will be squashed. Set the sequence
++ // number as the youngest instruction in the ROB.
++ youngestSeqNum[tid] = squashed_inst;
+
- toIEW->commitInfo.branchTaken = fromIEW->branchTaken;
++ rob->squash(squashed_inst, tid);
++ changedROBNumEntries[tid] = true;
+
- toIEW->commitInfo.nextPC = fromIEW->nextPC;
++ toIEW->commitInfo[tid].doneSeqNum = squashed_inst;
+
- toIEW->commitInfo.mispredPC = fromIEW->mispredPC;
++ toIEW->commitInfo[tid].squash = true;
+
- if (toIEW->commitInfo.branchMispredict) {
- ++branchMispredicts;
++ // Send back the rob squashing signal so other stages know that
++ // the ROB is in the process of squashing.
++ toIEW->commitInfo[tid].robSquashing = true;
+
- if (_status != ROBSquashing) {
++ toIEW->commitInfo[tid].branchMispredict =
++ fromIEW->branchMispredict[tid];
++
++ toIEW->commitInfo[tid].branchTaken =
++ fromIEW->branchTaken[tid];
++
++ toIEW->commitInfo[tid].nextPC = fromIEW->nextPC[tid];
++
++ toIEW->commitInfo[tid].mispredPC = fromIEW->mispredPC[tid];
++
++ if (toIEW->commitInfo[tid].branchMispredict) {
++ ++branchMispredicts;
++ }
+ }
++
+ }
+
- // If the ROB is empty, we can set this stage to idle. Use this
- // in the future when the Idle status will actually be utilized.
- #if 0
- if (rob->isEmpty()) {
- DPRINTF(Commit, "Commit: ROB is empty. Status changed to idle.\n");
- _status = Idle;
- // Schedule an event so that commit will actually wake up
- // once something gets put in the ROB.
++ setNextStatus();
++
++ if (squashCounter != numThreads) {
+ // If we're not currently squashing, then get instructions.
+ getInsts();
+
+ // Try to commit any instructions.
+ commitInsts();
+ }
+
- #endif
++ //Check for any activity
++ threads = (*activeThreads).begin();
++
++ while (threads != (*activeThreads).end()) {
++ unsigned tid = *threads++;
++
++ if (changedROBNumEntries[tid]) {
++ toIEW->commitInfo[tid].usedROB = true;
++ toIEW->commitInfo[tid].freeROBEntries = rob->numFreeEntries(tid);
++
++ if (rob->isEmpty(tid)) {
++ toIEW->commitInfo[tid].emptyROB = true;
++ }
++
++ wroteToTimeBuffer = true;
++ changedROBNumEntries[tid] = false;
++ }
+ }
- // Loop that goes through as many instructions in the ROB as possible and
- // tries to commit them. The actual work for committing is done by the
- // commitHead() function.
+}
+
- SimpleCommit<Impl>::commitInsts()
+template <class Impl>
+void
- // Note that commit will be handled prior to the ROB so that the ROB
- // only tries to commit instructions it has in this current cycle, and
- // not instructions it is writing in during this cycle.
- // Can't commit and squash things at the same time...
++DefaultCommit<Impl>::commitInsts()
+{
+ ////////////////////////////////////
+ // Handle commit
- if (rob->isEmpty())
- return;
-
- DynInstPtr head_inst = rob->readHeadInst();
++ // Note that commit will be handled prior to putting new
++ // instructions in the ROB so that the ROB only tries to commit
++ // instructions it has in this current cycle, and not instructions
++ // it is writing in during this cycle. Can't commit and squash
++ // things at the same time...
+ ////////////////////////////////////
+
- while (!rob->isEmpty() &&
- head_inst->readyToCommit() &&
- num_committed < commitWidth)
- {
- DPRINTF(Commit, "Commit: Trying to commit head instruction.\n");
++ DPRINTF(Commit, "Trying to commit instructions in the ROB.\n");
+
+ unsigned num_committed = 0;
+
++ DynInstPtr head_inst;
++
+ // Commit as many instructions as possible until the commit bandwidth
+ // limit is reached, or it becomes impossible to commit any more.
- // If the head instruction is squashed, it is ready to retire at any
- // time. However, we need to avoid updating any other state
- // incorrectly if it's already been squashed.
++ while (num_committed < commitWidth) {
++ int commit_thread = getCommittingThread();
++
++ if (commit_thread == -1 || !rob->isHeadReady(commit_thread))
++ break;
++
++ head_inst = rob->readHeadInst(commit_thread);
++
++ int tid = head_inst->threadNumber;
++
++ assert(tid == commit_thread);
++
++ DPRINTF(Commit, "Trying to commit head instruction, [sn:%i] [tid:%i]\n",
++ head_inst->seqNum, tid);
+
- DPRINTF(Commit, "Commit: Retiring squashed instruction from "
++ // If the head instruction is squashed, it is ready to retire
++ // (be removed from the ROB) at any time.
+ if (head_inst->isSquashed()) {
+
- // Tell ROB to retire head instruction. This retires the head
- // inst in the ROB without affecting any other stages.
- rob->retireHead();
++ DPRINTF(Commit, "Retiring squashed instruction from "
+ "ROB.\n");
+
- cpu->funcExeInst++;
++ rob->retireHead(commit_thread);
+
+ ++commitSquashedInsts;
+
++ // Record that the number of ROB entries has changed.
++ changedROBNumEntries[tid] = true;
+ } else {
++ PC[tid] = head_inst->readPC();
++ nextPC[tid] = head_inst->readNextPC();
++
+ // Increment the total number of non-speculative instructions
+ // executed.
+ // Hack for now: it really shouldn't happen until after the
+ // commit is deemed to be successful, but this count is needed
+ // for syscalls.
- // Update what instruction we are looking at if the commit worked.
++ thread[tid]->funcExeInst++;
+
+ // Try to commit the head instruction.
+ bool commit_success = commitHead(head_inst, num_committed);
+
- // Send back which instruction has been committed.
- // @todo: Update this later when a wider pipeline is used.
- // Hmm, can't really give a pointer here...perhaps the
- // sequence number instead (copy).
- toIEW->commitInfo.doneSeqNum = head_inst->seqNum;
+ if (commit_success) {
+ ++num_committed;
+
- if (!head_inst->isNop()) {
- cpu->instDone();
++ changedROBNumEntries[tid] = true;
++
++ // Set the doneSeqNum to the youngest committed instruction.
++ toIEW->commitInfo[tid].doneSeqNum = head_inst->seqNum;
+
+ ++commitCommittedInsts;
+
-
- // Update the pointer to read the next instruction in the ROB.
- head_inst = rob->readHeadInst();
++ // To match the old model, don't count nops and instruction
++ // prefetches towards the total commit count.
++ if (!head_inst->isNop() && !head_inst->isInstPrefetch()) {
++ cpu->instDone(tid);
++ }
++
++ PC[tid] = nextPC[tid];
++ nextPC[tid] = nextPC[tid] + sizeof(TheISA::MachInst);
++#if FULL_SYSTEM
++ int count = 0;
++ Addr oldpc;
++ do {
++ // Debug statement. Checks to make sure we're not
++ // currently updating state while handling PC events.
++ if (count == 0)
++ assert(!thread[tid]->inSyscall &&
++ !thread[tid]->trapPending);
++ oldpc = PC[tid];
++ cpu->system->pcEventQueue.service(
++ thread[tid]->getXCProxy());
++ count++;
++ } while (oldpc != PC[tid]);
++ if (count > 1) {
++ DPRINTF(Commit, "PC skip function event, stopping commit\n");
++ break;
+ }
++#endif
+ } else {
++ DPRINTF(Commit, "Unable to commit head instruction PC:%#x "
++ "[tid:%i] [sn:%i].\n",
++ head_inst->readPC(), tid ,head_inst->seqNum);
+ break;
+ }
+ }
- n_committed_dist.sample(num_committed);
+ }
+
+ DPRINTF(CommitRate, "%i\n", num_committed);
- SimpleCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
++ numCommittedDist.sample(num_committed);
++
++ if (num_committed == commitWidth) {
++ commitEligible[0]++;
++ }
+}
+
+template <class Impl>
+bool
- // Make sure instruction is valid
++DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
+{
- // If the instruction is not executed yet, then it is a non-speculative
- // or store inst. Signal backwards that it should be executed.
+ assert(head_inst);
+
- cpu->funcExeInst--;
-
- if (head_inst->isNonSpeculative()) {
- DPRINTF(Commit, "Commit: Encountered a store or non-speculative "
- "instruction at the head of the ROB, PC %#x.\n",
- head_inst->readPC());
++ int tid = head_inst->threadNumber;
++
++ // If the instruction is not executed yet, then it will need extra
++ // handling. Signal backwards that it should be executed.
+ if (!head_inst->isExecuted()) {
+ // Keep this number correct. We have not yet actually executed
+ // and committed this instruction.
- toIEW->commitInfo.nonSpecSeqNum = head_inst->seqNum;
++ thread[tid]->funcExeInst--;
++
++ head_inst->reachedCommit = true;
++
++ if (head_inst->isNonSpeculative() ||
++ head_inst->isMemBarrier() ||
++ head_inst->isWriteBarrier()) {
++
++ DPRINTF(Commit, "Encountered a barrier or non-speculative "
++ "instruction [sn:%lli] at the head of the ROB, PC %#x.\n",
++ head_inst->seqNum, head_inst->readPC());
++
++#if !FULL_SYSTEM
++ // Hack to make sure syscalls/memory barriers/quiesces
++ // aren't executed until all stores write back their data.
++ // This direct communication shouldn't be used for
++ // anything other than this.
++ if (inst_num > 0 || iewStage->hasStoresToWB())
++#else
++ if ((head_inst->isMemBarrier() || head_inst->isWriteBarrier() ||
++ head_inst->isQuiesce()) &&
++ iewStage->hasStoresToWB())
++#endif
++ {
++ DPRINTF(Commit, "Waiting for all stores to writeback.\n");
++ return false;
++ }
+
- panic("Commit: Trying to commit un-executed instruction "
++ toIEW->commitInfo[tid].nonSpecSeqNum = head_inst->seqNum;
+
+ // Change the instruction so it won't try to commit again until
+ // it is executed.
+ head_inst->clearCanCommit();
+
+ ++commitNonSpecStalls;
+
++ return false;
++ } else if (head_inst->isLoad()) {
++ DPRINTF(Commit, "[sn:%lli]: Uncached load, PC %#x.\n",
++ head_inst->seqNum, head_inst->readPC());
++
++ // Send back the non-speculative instruction's sequence
++ // number. Tell the lsq to re-execute the load.
++ toIEW->commitInfo[tid].nonSpecSeqNum = head_inst->seqNum;
++ toIEW->commitInfo[tid].uncached = true;
++ toIEW->commitInfo[tid].uncachedLoad = head_inst;
++
++ head_inst->clearCanCommit();
++
+ return false;
+ } else {
- // Now check if it's one of the special trap or barrier or
- // serializing instructions.
- if (head_inst->isThreadSync() ||
- head_inst->isSerializing() ||
- head_inst->isMemBarrier() ||
- head_inst->isWriteBarrier() )
- {
- // Not handled for now. Mem barriers and write barriers are safe
- // to simply let commit as memory accesses only happen once they
- // reach the head of commit. Not sure about the other two.
- panic("Serializing or barrier instructions"
- " are not handled yet.\n");
++ panic("Trying to commit un-executed instruction "
+ "of unknown type!\n");
+ }
+ }
+
- if (!head_inst->isNop()) {
++ if (head_inst->isThreadSync()) {
++ // Not handled for now.
++ panic("Thread sync instructions are not handled yet.\n");
++ }
++
++ // Stores mark themselves as completed.
++ if (!head_inst->isStore()) {
++ head_inst->setCompleted();
++ }
++
++ // Use checker prior to updating anything due to traps or PC
++ // based events.
++ if (cpu->checker) {
++ cpu->checker->tick(head_inst);
+ }
+
+ // Check if the instruction caused a fault. If so, trap.
+ Fault inst_fault = head_inst->getFault();
+
+ if (inst_fault != NoFault) {
- cpu->trap(inst_fault);
- #else // !FULL_SYSTEM
- panic("fault (%d) detected @ PC %08p", inst_fault,
- head_inst->PC);
- #endif // FULL_SYSTEM
++ head_inst->setCompleted();
+#if FULL_SYSTEM
- }
++ DPRINTF(Commit, "Inst [sn:%lli] PC %#x has a fault\n",
++ head_inst->seqNum, head_inst->readPC());
++
++ if (iewStage->hasStoresToWB() || inst_num > 0) {
++ DPRINTF(Commit, "Stores outstanding, fault must wait.\n");
++ return false;
+ }
- // Check if we're really ready to commit. If not then return false.
- // I'm pretty sure all instructions should be able to commit if they've
- // reached this far. For now leave this in as a check.
- if (!rob->isHeadReady()) {
- panic("Commit: Unable to commit head instruction!\n");
- return false;
- }
+
- // If it's a branch, then send back branch prediction update info
- // to the fetch stage.
- // This should be handled in the iew stage if a mispredict happens...
++ if (cpu->checker && head_inst->isStore()) {
++ cpu->checker->tick(head_inst);
++ }
+
- if (head_inst->isControl()) {
++ assert(!thread[tid]->inSyscall);
+
- #if 0
- toIEW->nextPC = head_inst->readPC();
- //Maybe switch over to BTB incorrect.
- toIEW->btbMissed = head_inst->btbMiss();
- toIEW->target = head_inst->nextPC;
- //Maybe also include global history information.
- //This simple version will have no branch prediction however.
- #endif
++ // Mark that we're in state update mode so that the trap's
++ // execution doesn't generate extra squashes.
++ thread[tid]->inSyscall = true;
+
- ++commitCommittedBranches;
++ // DTB will sometimes need the machine instruction for when
++ // faults happen. So we will set it here, prior to the DTB
++ // possibly needing it for its fault.
++ thread[tid]->setInst(
++ static_cast<TheISA::MachInst>(head_inst->staticInst->machInst));
++
++ // Execute the trap. Although it's slightly unrealistic in
++ // terms of timing (as it doesn't wait for the full timing of
++ // the trap event to complete before updating state), it's
++ // needed to update the state as soon as possible. This
++ // prevents external agents from changing any specific state
++ // that the trap need.
++ cpu->trap(inst_fault, tid);
++
++ // Exit state update mode to avoid accidental updating.
++ thread[tid]->inSyscall = false;
++
++ commitStatus[tid] = TrapPending;
+
- // Now that the instruction is going to be committed, finalize its
- // trace data.
++ // Generate trap squash event.
++ generateTrapEvent(tid);
++
++ return false;
++#else // !FULL_SYSTEM
++ panic("fault (%d) detected @ PC %08p", inst_fault,
++ head_inst->PC);
++#endif // FULL_SYSTEM
+ }
+
- //Finally clear the head ROB entry.
- rob->retireHead();
++ updateComInstStats(head_inst);
++
+ if (head_inst->traceData) {
++ head_inst->traceData->setFetchSeq(head_inst->seqNum);
++ head_inst->traceData->setCPSeq(thread[tid]->numInst);
+ head_inst->traceData->finalize();
++ head_inst->traceData = NULL;
++ }
++
++ // Update the commit rename map
++ for (int i = 0; i < head_inst->numDestRegs(); i++) {
++ renameMap[tid]->setEntry(head_inst->destRegIdx(i),
++ head_inst->renamedDestRegIdx(i));
+ }
+
- SimpleCommit<Impl>::getInsts()
++ // Finally clear the head ROB entry.
++ rob->retireHead(tid);
+
+ // Return true to indicate that we have committed an instruction.
+ return true;
+}
+
+template <class Impl>
+void
- //////////////////////////////////////
- // Handle ROB functions
- //////////////////////////////////////
-
- // Read any issued instructions and place them into the ROB. Do this
- // prior to squashing to avoid having instructions in the ROB that
- // don't get squashed properly.
++DefaultCommit<Impl>::getInsts()
+{
- for (int inst_num = 0;
- inst_num < insts_to_process;
- ++inst_num)
++ // Read any renamed instructions and place them into the ROB.
+ int insts_to_process = min((int)renameWidth, fromRename->size);
+
- if (!fromRename->insts[inst_num]->isSquashed()) {
- DPRINTF(Commit, "Commit: Inserting PC %#x into ROB.\n",
- fromRename->insts[inst_num]->readPC());
- rob->insertInst(fromRename->insts[inst_num]);
++ for (int inst_num = 0; inst_num < insts_to_process; ++inst_num)
+ {
- DPRINTF(Commit, "Commit: Instruction %i PC %#x was "
++ DynInstPtr inst = fromRename->insts[inst_num];
++ int tid = inst->threadNumber;
++
++ if (!inst->isSquashed() &&
++ commitStatus[tid] != ROBSquashing) {
++ changedROBNumEntries[tid] = true;
++
++ DPRINTF(Commit, "Inserting PC %#x [sn:%i] [tid:%i] into ROB.\n",
++ inst->readPC(), inst->seqNum, tid);
++
++ rob->insertInst(inst);
++
++ assert(rob->getThreadEntries(tid) <= rob->getMaxEntries(tid));
++
++ youngestSeqNum[tid] = inst->seqNum;
+ } else {
- fromRename->insts[inst_num]->seqNum,
- fromRename->insts[inst_num]->readPC());
++ DPRINTF(Commit, "Instruction PC %#x [sn:%i] [tid:%i] was "
+ "squashed, skipping.\n",
- SimpleCommit<Impl>::markCompletedInsts()
++ inst->readPC(), inst->seqNum, tid);
+ }
+ }
+}
+
+template <class Impl>
+void
- DPRINTF(Commit, "Commit: Marking PC %#x, SN %i ready within ROB.\n",
- fromIEW->insts[inst_num]->readPC(),
- fromIEW->insts[inst_num]->seqNum);
++DefaultCommit<Impl>::markCompletedInsts()
+{
+ // Grab completed insts out of the IEW instruction queue, and mark
+ // instructions completed within the ROB.
+ for (int inst_num = 0;
+ inst_num < fromIEW->size && fromIEW->insts[inst_num];
+ ++inst_num)
+ {
- // Mark the instruction as ready to commit.
- fromIEW->insts[inst_num]->setCanCommit();
++ if (!fromIEW->insts[inst_num]->isSquashed()) {
++ DPRINTF(Commit, "[tid:%i]: Marking PC %#x, [sn:%lli] ready "
++ "within ROB.\n",
++ fromIEW->insts[inst_num]->threadNumber,
++ fromIEW->insts[inst_num]->readPC(),
++ fromIEW->insts[inst_num]->seqNum);
++
++ // Mark the instruction as ready to commit.
++ fromIEW->insts[inst_num]->setCanCommit();
++ }
++ }
++}
++
++template <class Impl>
++bool
++DefaultCommit<Impl>::robDoneSquashing()
++{
++ list<unsigned>::iterator threads = (*activeThreads).begin();
+
- uint64_t
- SimpleCommit<Impl>::readCommitPC()
++ while (threads != (*activeThreads).end()) {
++ unsigned tid = *threads++;
++
++ if (!rob->isDoneSquashing(tid))
++ return false;
+ }
++
++ return true;
+}
+
+template <class Impl>
- return rob->readHeadPC();
++void
++DefaultCommit<Impl>::updateComInstStats(DynInstPtr &inst)
+{
++ unsigned thread = inst->threadNumber;
++
++ //
++ // Pick off the software prefetches
++ //
++#ifdef TARGET_ALPHA
++ if (inst->isDataPrefetch()) {
++ statComSwp[thread]++;
++ } else {
++ statComInst[thread]++;
++ }
++#else
++ statComInst[thread]++;
++#endif
++
++ //
++ // Control Instructions
++ //
++ if (inst->isControl())
++ statComBranches[thread]++;
++
++ //
++ // Memory references
++ //
++ if (inst->isMemRef()) {
++ statComRefs[thread]++;
++
++ if (inst->isLoad()) {
++ statComLoads[thread]++;
++ }
++ }
++
++ if (inst->isMemBarrier()) {
++ statComMembars[thread]++;
++ }
++}
++
++////////////////////////////////////////
++// //
++// SMT COMMIT POLICY MAINTAINED HERE //
++// //
++////////////////////////////////////////
++template <class Impl>
++int
++DefaultCommit<Impl>::getCommittingThread()
++{
++ if (numThreads > 1) {
++ switch (commitPolicy) {
++
++ case Aggressive:
++ //If Policy is Aggressive, commit will call
++ //this function multiple times per
++ //cycle
++ return oldestReady();
++
++ case RoundRobin:
++ return roundRobin();
++
++ case OldestReady:
++ return oldestReady();
++
++ default:
++ return -1;
++ }
++ } else {
++ int tid = (*activeThreads).front();
++
++ if (commitStatus[tid] == Running ||
++ commitStatus[tid] == Idle ||
++ commitStatus[tid] == FetchTrapPending) {
++ return tid;
++ } else {
++ return -1;
++ }
++ }
++}
++
++template<class Impl>
++int
++DefaultCommit<Impl>::roundRobin()
++{
++ list<unsigned>::iterator pri_iter = priority_list.begin();
++ list<unsigned>::iterator end = priority_list.end();
++
++ while (pri_iter != end) {
++ unsigned tid = *pri_iter;
++
++ if (commitStatus[tid] == Running ||
++ commitStatus[tid] == Idle) {
++
++ if (rob->isHeadReady(tid)) {
++ priority_list.erase(pri_iter);
++ priority_list.push_back(tid);
++
++ return tid;
++ }
++ }
++
++ pri_iter++;
++ }
++
++ return -1;
++}
++
++template<class Impl>
++int
++DefaultCommit<Impl>::oldestReady()
++{
++ unsigned oldest = 0;
++ bool first = true;
++
++ list<unsigned>::iterator threads = (*activeThreads).begin();
++
++ while (threads != (*activeThreads).end()) {
++ unsigned tid = *threads++;
++
++ if (!rob->isEmpty(tid) &&
++ (commitStatus[tid] == Running ||
++ commitStatus[tid] == Idle ||
++ commitStatus[tid] == FetchTrapPending)) {
++
++ if (rob->isHeadReady(tid)) {
++
++ DynInstPtr head_inst = rob->readHeadInst(tid);
++
++ if (first) {
++ oldest = tid;
++ first = false;
++ } else if (head_inst->seqNum < oldest) {
++ oldest = tid;
++ }
++ }
++ }
++ }
++
++ if (!first) {
++ return oldest;
++ } else {
++ return -1;
++ }
+}
--- /dev/null
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
+/*
- #include "sim/root.hh"
++ * Copyright (c) 2004-2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config/full_system.hh"
+
+#if FULL_SYSTEM
+#include "sim/system.hh"
+#else
+#include "sim/process.hh"
+#endif
- BaseFullCPU::BaseFullCPU(Params ¶ms)
- : BaseCPU(¶ms), cpu_id(0)
+
++#include "cpu/activity.hh"
++#include "cpu/checker/cpu.hh"
+#include "cpu/cpu_exec_context.hh"
+#include "cpu/exec_context.hh"
+#include "cpu/o3/alpha_dyn_inst.hh"
+#include "cpu/o3/alpha_impl.hh"
+#include "cpu/o3/cpu.hh"
+
++#include "sim/root.hh"
++#include "sim/stat_control.hh"
++
+using namespace std;
+
- //Call constructor to all the pipeline stages here
++BaseFullCPU::BaseFullCPU(Params *params)
++ : BaseCPU(params), cpu_id(0)
++{
++}
++
++void
++BaseFullCPU::regStats()
+{
++ BaseCPU::regStats();
+}
+
+template <class Impl>
+FullO3CPU<Impl>::TickEvent::TickEvent(FullO3CPU<Impl> *c)
+ : Event(&mainEventQueue, CPU_Tick_Pri), cpu(c)
+{
+}
+
+template <class Impl>
+void
+FullO3CPU<Impl>::TickEvent::process()
+{
+ cpu->tick();
+}
+
+template <class Impl>
+const char *
+FullO3CPU<Impl>::TickEvent::description()
+{
+ return "FullO3CPU tick event";
+}
+
- FullO3CPU<Impl>::FullO3CPU(Params ¶ms)
- #if FULL_SYSTEM
- : BaseFullCPU(params),
- #else
+template <class Impl>
- #endif // FULL_SYSTEM
++FullO3CPU<Impl>::FullO3CPU(Params *params)
+ : BaseFullCPU(params),
- regFile(params.numPhysIntRegs, params.numPhysFloatRegs),
+ tickEvent(this),
++ removeInstsThisCycle(false),
+ fetch(params),
+ decode(params),
+ rename(params),
+ iew(params),
+ commit(params),
+
- freeList(TheISA::NumIntRegs, params.numPhysIntRegs,
- TheISA::NumFloatRegs, params.numPhysFloatRegs),
++ regFile(params->numPhysIntRegs, params->numPhysFloatRegs),
+
- renameMap(TheISA::NumIntRegs, params.numPhysIntRegs,
- TheISA::NumFloatRegs, params.numPhysFloatRegs,
- TheISA::NumMiscRegs,
- TheISA::ZeroReg,
- TheISA::ZeroReg + TheISA::NumIntRegs),
++ freeList(params->numberOfThreads,//number of activeThreads
++ TheISA::NumIntRegs, params->numPhysIntRegs,
++ TheISA::NumFloatRegs, params->numPhysFloatRegs),
+
- rob(params.numROBEntries, params.squashWidth),
++ rob(params->numROBEntries, params->squashWidth,
++ params->smtROBPolicy, params->smtROBThreshold,
++ params->numberOfThreads),
+
- // What to pass to these time buffers?
++ scoreboard(params->numberOfThreads,//number of activeThreads
++ TheISA::NumIntRegs, params->numPhysIntRegs,
++ TheISA::NumFloatRegs, params->numPhysFloatRegs,
++ TheISA::NumMiscRegs * number_of_threads,
++ TheISA::ZeroReg),
+
-
- cpuXC(NULL),
+ // For now just have these time buffers be pretty big.
++ // @todo: Make these time buffer sizes parameters or derived
++ // from latencies
+ timeBuffer(5, 5),
+ fetchQueue(5, 5),
+ decodeQueue(5, 5),
+ renameQueue(5, 5),
+ iewQueue(5, 5),
- system(params.system),
++ activityRec(NumStages, 10, params->activity),
+
+ globalSeqNum(1),
+
+#if FULL_SYSTEM
- itb(params.itb),
- dtb(params.dtb),
- mem(params.mem),
++ system(params->system),
+ memCtrl(system->memctrl),
+ physmem(system->physmem),
- // Hardcoded for a single thread!!
- mem(params.workload[0]->getMemory()),
++ mem(params->mem),
+#else
-
- icacheInterface(params.icacheInterface),
- dcacheInterface(params.dcacheInterface),
- deferRegistration(params.defReg),
- numInsts(0),
- funcExeInst(0)
++// pTable(params->pTable),
++ mem(params->workload[0]->getMemory()),
+#endif // FULL_SYSTEM
- #if !FULL_SYSTEM
- thread.resize(this->number_of_threads);
- #endif
-
- for (int i = 0; i < this->number_of_threads; ++i) {
++ switchCount(0),
++ icacheInterface(params->icacheInterface),
++ dcacheInterface(params->dcacheInterface),
++ deferRegistration(params->deferRegistration),
++ numThreads(number_of_threads)
+{
+ _status = Idle;
+
- assert(i == 0);
- thread[i] = new CPUExecContext(this, 0, system, itb, dtb, mem);
- system->execContexts[i] = thread[i]->getProxy();
-
- execContexts.push_back(system->execContexts[i]);
- #else
- if (i < params.workload.size()) {
- DPRINTF(FullCPU, "FullCPU: Workload[%i]'s starting PC is %#x, "
- "process is %#x",
- i, params.workload[i]->prog_entry, thread[i]);
- thread[i] = new CPUExecContext(this, i, params.workload[i], i);
- }
- assert(params.workload[i]->getMemory() != NULL);
- assert(mem != NULL);
- execContexts.push_back(thread[i]->getProxy());
- #endif // !FULL_SYSTEM
++ if (params->checker) {
++ BaseCPU *temp_checker = params->checker;
++ checker = dynamic_cast<Checker<DynInstPtr> *>(temp_checker);
++ checker->setMemory(mem);
+#if FULL_SYSTEM
- // Note that this is a hack so that my code which still uses xc-> will
- // still work. I should remove this eventually
- cpuXC = thread[0];
++ checker->setSystem(params->system);
++#endif
++ } else {
++ checker = NULL;
+ }
+
- // The stages also need their CPU pointer setup. However this must be
- // done at the upper level CPU because they have pointers to the upper
- // level CPU, and not this FullO3CPU.
++#if !FULL_SYSTEM
++ thread.resize(number_of_threads);
++ tids.resize(number_of_threads);
++#endif
+
- rename.setRenameMap(&renameMap);
- iew.setRenameMap(&renameMap);
++ // The stages also need their CPU pointer setup. However this
++ // must be done at the upper level CPU because they have pointers
++ // to the upper level CPU, and not this FullO3CPU.
++
++ // Set up Pointers to the activeThreads list for each stage
++ fetch.setActiveThreads(&activeThreads);
++ decode.setActiveThreads(&activeThreads);
++ rename.setActiveThreads(&activeThreads);
++ iew.setActiveThreads(&activeThreads);
++ commit.setActiveThreads(&activeThreads);
+
+ // Give each of the stages the time buffer they will use.
+ fetch.setTimeBuffer(&timeBuffer);
+ decode.setTimeBuffer(&timeBuffer);
+ rename.setTimeBuffer(&timeBuffer);
+ iew.setTimeBuffer(&timeBuffer);
+ commit.setTimeBuffer(&timeBuffer);
+
+ // Also setup each of the stages' queues.
+ fetch.setFetchQueue(&fetchQueue);
+ decode.setFetchQueue(&fetchQueue);
++ commit.setFetchQueue(&fetchQueue);
+ decode.setDecodeQueue(&decodeQueue);
+ rename.setDecodeQueue(&decodeQueue);
+ rename.setRenameQueue(&renameQueue);
+ iew.setRenameQueue(&renameQueue);
+ iew.setIEWQueue(&iewQueue);
+ commit.setIEWQueue(&iewQueue);
+ commit.setRenameQueue(&renameQueue);
+
++ commit.setFetchStage(&fetch);
++ commit.setIEWStage(&iew);
++ rename.setIEWStage(&iew);
++ rename.setCommitStage(&commit);
++
++#if !FULL_SYSTEM
++ int active_threads = params->workload.size();
++#else
++ int active_threads = 1;
++#endif
++
++ //Make Sure That this a Valid Architeture
++ assert(params->numPhysIntRegs >= numThreads * TheISA::NumIntRegs);
++ assert(params->numPhysFloatRegs >= numThreads * TheISA::NumFloatRegs);
++
++ rename.setScoreboard(&scoreboard);
++ iew.setScoreboard(&scoreboard);
++
+ // Setup the rename map for whichever stages need it.
- // Setup the free list for whichever stages need it.
++ PhysRegIndex lreg_idx = 0;
++ PhysRegIndex freg_idx = params->numPhysIntRegs; //Index to 1 after int regs
++
++ for (int tid=0; tid < numThreads; tid++) {
++ bool bindRegs = (tid <= active_threads - 1);
++
++ commitRenameMap[tid].init(TheISA::NumIntRegs,
++ params->numPhysIntRegs,
++ lreg_idx, //Index for Logical. Regs
++
++ TheISA::NumFloatRegs,
++ params->numPhysFloatRegs,
++ freg_idx, //Index for Float Regs
++
++ TheISA::NumMiscRegs,
++
++ TheISA::ZeroReg,
++ TheISA::ZeroReg,
++
++ tid,
++ false);
++
++ renameMap[tid].init(TheISA::NumIntRegs,
++ params->numPhysIntRegs,
++ lreg_idx, //Index for Logical. Regs
++
++ TheISA::NumFloatRegs,
++ params->numPhysFloatRegs,
++ freg_idx, //Index for Float Regs
+
- renameMap.setFreeList(&freeList);
++ TheISA::NumMiscRegs,
++
++ TheISA::ZeroReg,
++ TheISA::ZeroReg,
++
++ tid,
++ bindRegs);
++ }
++
++ rename.setRenameMap(renameMap);
++ commit.setRenameMap(commitRenameMap);
++
++ // Give renameMap & rename stage access to the freeList;
++ for (int i=0; i < numThreads; i++) {
++ renameMap[i].setFreeList(&freeList);
++ }
+ rename.setFreeList(&freeList);
- //Tick each of the stages if they're actually running.
- //Will want to figure out a way to unschedule itself if they're all
- //going to be idle for a long time.
++
++ // Setup the page table for whichever stages need it.
++#if !FULL_SYSTEM
++// fetch.setPageTable(pTable);
++// iew.setPageTable(pTable);
++#endif
+
+ // Setup the ROB for whichever stages need it.
+ commit.setROB(&rob);
++
++ lastRunningCycle = curTick;
++
++ contextSwitch = false;
+}
+
+template <class Impl>
+FullO3CPU<Impl>::~FullO3CPU()
+{
+}
+
+template <class Impl>
+void
+FullO3CPU<Impl>::fullCPURegStats()
+{
++ BaseFullCPU::regStats();
++
+ // Register any of the FullCPU's stats here.
++ timesIdled
++ .name(name() + ".timesIdled")
++ .desc("Number of times that the entire CPU went into an idle state and"
++ " unscheduled itself")
++ .prereq(timesIdled);
++
++ idleCycles
++ .name(name() + ".idleCycles")
++ .desc("Total number of cycles that the CPU has spent unscheduled due "
++ "to idling")
++ .prereq(idleCycles);
++
++ // Number of Instructions simulated
++ // --------------------------------
++ // Should probably be in Base CPU but need templated
++ // MaxThreads so put in here instead
++ committedInsts
++ .init(numThreads)
++ .name(name() + ".committedInsts")
++ .desc("Number of Instructions Simulated");
++
++ totalCommittedInsts
++ .name(name() + ".committedInsts_total")
++ .desc("Number of Instructions Simulated");
++
++ cpi
++ .name(name() + ".cpi")
++ .desc("CPI: Cycles Per Instruction")
++ .precision(6);
++ cpi = simTicks / committedInsts;
++
++ totalCpi
++ .name(name() + ".cpi_total")
++ .desc("CPI: Total CPI of All Threads")
++ .precision(6);
++ totalCpi = simTicks / totalCommittedInsts;
++
++ ipc
++ .name(name() + ".ipc")
++ .desc("IPC: Instructions Per Cycle")
++ .precision(6);
++ ipc = committedInsts / simTicks;
++
++ totalIpc
++ .name(name() + ".ipc_total")
++ .desc("IPC: Total IPC of All Threads")
++ .precision(6);
++ totalIpc = totalCommittedInsts / simTicks;
++
+}
+
+template <class Impl>
+void
+FullO3CPU<Impl>::tick()
+{
+ DPRINTF(FullCPU, "\n\nFullCPU: Ticking main, FullO3CPU.\n");
+
- // Now advance the time buffers, unless the stage is stalled.
++ ++numCycles;
++
++// activity = false;
++
++ //Tick each of the stages
+ fetch.tick();
+
+ decode.tick();
+
+ rename.tick();
+
+ iew.tick();
+
+ commit.tick();
+
- if (_status == Running && !tickEvent.scheduled())
- tickEvent.schedule(curTick + 1);
++#if !FULL_SYSTEM
++ doContextSwitch();
++#endif
++
++ // Now advance the time buffers
+ timeBuffer.advance();
+
+ fetchQueue.advance();
+ decodeQueue.advance();
+ renameQueue.advance();
+ iewQueue.advance();
+
- if(!deferRegistration)
- {
- this->registerExecContexts();
++ activityRec.advance();
++
++ if (removeInstsThisCycle) {
++ cleanUpRemovedInsts();
++ }
++
++ if (!tickEvent.scheduled()) {
++ if (_status == SwitchedOut) {
++ // increment stat
++ lastRunningCycle = curTick;
++ } else if (!activityRec.active()) {
++ lastRunningCycle = curTick;
++ timesIdled++;
++ } else {
++ tickEvent.schedule(curTick + cycles(1));
++ }
++ }
++
++#if !FULL_SYSTEM
++ updateThreadPriority();
++#endif
++
+}
+
+template <class Impl>
+void
+FullO3CPU<Impl>::init()
+{
- // Need to do a copy of the xc->regs into the CPU's regfile so
- // that it can start properly.
++ if (!deferRegistration) {
++ registerExecContexts();
++ }
+
- ExecContext *src_xc = system->execContexts[0];
- TheISA::initCPU(src_xc, src_xc->readCpuId());
++ // Set inSyscall so that the CPU doesn't squash when initially
++ // setting up registers.
++ for (int i = 0; i < number_of_threads; ++i)
++ thread[i]->inSyscall = true;
++
++ for (int tid=0; tid < number_of_threads; tid++) {
+#if FULL_SYSTEM
- ExecContext *src_xc = thread[0]->getProxy();
++ ExecContext *src_xc = execContexts[tid];
+#else
- // First loop through the integer registers.
- for (int i = 0; i < TheISA::NumIntRegs; ++i)
- {
- regFile.intRegFile[i] = src_xc->readIntReg(i);
++ ExecContext *src_xc = thread[tid]->getXCProxy();
+#endif
- // Then loop through the floating point registers.
- for (int i = 0; i < TheISA::NumFloatRegs; ++i)
- {
- regFile.floatRegFile.setRegBits(i, src_xc->readRegBits(i))
- }
- /*
- // Then loop through the misc registers.
- regFile.miscRegs.fpcr = src_xc->regs.miscRegs.fpcr;
- regFile.miscRegs.uniq = src_xc->regs.miscRegs.uniq;
- regFile.miscRegs.lock_flag = src_xc->regs.miscRegs.lock_flag;
- regFile.miscRegs.lock_addr = src_xc->regs.miscRegs.lock_addr;
- */
- // Then finally set the PC and the next PC.
- regFile.pc = src_xc->readPC();
- regFile.npc = src_xc->readNextPC();
++ // Threads start in the Suspended State
++ if (src_xc->status() != ExecContext::Suspended) {
++ continue;
+ }
+
- FullO3CPU<Impl>::activateContext(int thread_num, int delay)
++#if FULL_SYSTEM
++ TheISA::initCPU(src_xc, src_xc->readCpuId());
++#endif
++ }
++
++ // Clear inSyscall.
++ for (int i = 0; i < number_of_threads; ++i)
++ thread[i]->inSyscall = false;
++
++ // Initialize stages.
++ fetch.initStage();
++ iew.initStage();
++ rename.initStage();
++ commit.initStage();
++
++ commit.setThreads(thread);
++}
++
++template <class Impl>
++void
++FullO3CPU<Impl>::insertThread(unsigned tid)
++{
++ DPRINTF(FullCPU,"[tid:%i] Initializing thread data");
++ // Will change now that the PC and thread state is internal to the CPU
++ // and not in the CPUExecContext.
++#if 0
++#if FULL_SYSTEM
++ ExecContext *src_xc = system->execContexts[tid];
++#else
++ CPUExecContext *src_xc = thread[tid];
++#endif
++
++ //Bind Int Regs to Rename Map
++ for (int ireg = 0; ireg < TheISA::NumIntRegs; ireg++) {
++ PhysRegIndex phys_reg = freeList.getIntReg();
++
++ renameMap[tid].setEntry(ireg,phys_reg);
++ scoreboard.setReg(phys_reg);
++ }
++
++ //Bind Float Regs to Rename Map
++ for (int freg = 0; freg < TheISA::NumFloatRegs; freg++) {
++ PhysRegIndex phys_reg = freeList.getFloatReg();
++
++ renameMap[tid].setEntry(freg,phys_reg);
++ scoreboard.setReg(phys_reg);
++ }
++
++ //Copy Thread Data Into RegFile
++ this->copyFromXC(tid);
++
++ //Set PC/NPC
++ regFile.pc[tid] = src_xc->readPC();
++ regFile.npc[tid] = src_xc->readNextPC();
++
++ src_xc->setStatus(ExecContext::Active);
++
++ activateContext(tid,1);
++
++ //Reset ROB/IQ/LSQ Entries
++ commit.rob->resetEntries();
++ iew.resetEntries();
++#endif
++}
++
++template <class Impl>
++void
++FullO3CPU<Impl>::removeThread(unsigned tid)
++{
++ DPRINTF(FullCPU,"[tid:%i] Removing thread data");
++#if 0
++ //Unbind Int Regs from Rename Map
++ for (int ireg = 0; ireg < TheISA::NumIntRegs; ireg++) {
++ PhysRegIndex phys_reg = renameMap[tid].lookup(ireg);
++
++ scoreboard.unsetReg(phys_reg);
++ freeList.addReg(phys_reg);
++ }
++
++ //Unbind Float Regs from Rename Map
++ for (int freg = 0; freg < TheISA::NumFloatRegs; freg++) {
++ PhysRegIndex phys_reg = renameMap[tid].lookup(freg);
++
++ scoreboard.unsetReg(phys_reg);
++ freeList.addReg(phys_reg);
++ }
++
++ //Copy Thread Data From RegFile
++ /* Fix Me:
++ * Do we really need to do this if we are removing a thread
++ * in the sense that it's finished (exiting)? If the thread is just
++ * being suspended we might...
++ */
++// this->copyToXC(tid);
++
++ //Squash Throughout Pipeline
++ fetch.squash(0,tid);
++ decode.squash(tid);
++ rename.squash(tid);
++
++ assert(iew.ldstQueue.getCount(tid) == 0);
++
++ //Reset ROB/IQ/LSQ Entries
++ if (activeThreads.size() >= 1) {
++ commit.rob->resetEntries();
++ iew.resetEntries();
++ }
++#endif
++}
++
++
++template <class Impl>
++void
++FullO3CPU<Impl>::activateWhenReady(int tid)
++{
++ DPRINTF(FullCPU,"[tid:%i]: Checking if resources are available for incoming"
++ "(e.g. PhysRegs/ROB/IQ/LSQ) \n",
++ tid);
++
++ bool ready = true;
++
++ if (freeList.numFreeIntRegs() >= TheISA::NumIntRegs) {
++ DPRINTF(FullCPU,"[tid:%i] Suspending thread due to not enough "
++ "Phys. Int. Regs.\n",
++ tid);
++ ready = false;
++ } else if (freeList.numFreeFloatRegs() >= TheISA::NumFloatRegs) {
++ DPRINTF(FullCPU,"[tid:%i] Suspending thread due to not enough "
++ "Phys. Float. Regs.\n",
++ tid);
++ ready = false;
++ } else if (commit.rob->numFreeEntries() >=
++ commit.rob->entryAmount(activeThreads.size() + 1)) {
++ DPRINTF(FullCPU,"[tid:%i] Suspending thread due to not enough "
++ "ROB entries.\n",
++ tid);
++ ready = false;
++ } else if (iew.instQueue.numFreeEntries() >=
++ iew.instQueue.entryAmount(activeThreads.size() + 1)) {
++ DPRINTF(FullCPU,"[tid:%i] Suspending thread due to not enough "
++ "IQ entries.\n",
++ tid);
++ ready = false;
++ } else if (iew.ldstQueue.numFreeEntries() >=
++ iew.ldstQueue.entryAmount(activeThreads.size() + 1)) {
++ DPRINTF(FullCPU,"[tid:%i] Suspending thread due to not enough "
++ "LSQ entries.\n",
++ tid);
++ ready = false;
++ }
++
++ if (ready) {
++ insertThread(tid);
++
++ contextSwitch = false;
++
++ cpuWaitList.remove(tid);
++ } else {
++ suspendContext(tid);
++
++ //blocks fetch
++ contextSwitch = true;
++
++ //do waitlist
++ cpuWaitList.push_back(tid);
+ }
+}
+
+template <class Impl>
+void
- FullO3CPU<Impl>::suspendContext(int thread_num)
++FullO3CPU<Impl>::activateContext(int tid, int delay)
+{
+ // Needs to set each stage to running as well.
++ list<unsigned>::iterator isActive = find(
++ activeThreads.begin(), activeThreads.end(), tid);
++
++ if (isActive == activeThreads.end()) {
++ //May Need to Re-code this if the delay variable is the
++ //delay needed for thread to activate
++ DPRINTF(FullCPU, "Adding Thread %i to active threads list\n",
++ tid);
++
++ activeThreads.push_back(tid);
++ }
++
++ assert(_status == Idle || _status == SwitchedOut);
+
+ scheduleTickEvent(delay);
+
++ // Be sure to signal that there's some activity so the CPU doesn't
++ // deschedule itself.
++ activityRec.activity();
++ fetch.wakeFromQuiesce();
++
+ _status = Running;
+}
+
+template <class Impl>
+void
- panic("suspendContext unimplemented!");
++FullO3CPU<Impl>::suspendContext(int tid)
++{
++ DPRINTF(FullCPU,"[tid: %i]: Suspended ...\n", tid);
++ unscheduleTickEvent();
++ _status = Idle;
++/*
++ //Remove From Active List, if Active
++ list<unsigned>::iterator isActive = find(
++ activeThreads.begin(), activeThreads.end(), tid);
++
++ if (isActive != activeThreads.end()) {
++ DPRINTF(FullCPU,"[tid:%i]: Removing from active threads list\n",
++ tid);
++ activeThreads.erase(isActive);
++ }
++*/
++}
++
++template <class Impl>
++void
++FullO3CPU<Impl>::deallocateContext(int tid)
+{
- FullO3CPU<Impl>::deallocateContext(int thread_num)
++ DPRINTF(FullCPU,"[tid:%i]: Deallocating ...", tid);
++/*
++ //Remove From Active List, if Active
++ list<unsigned>::iterator isActive = find(
++ activeThreads.begin(), activeThreads.end(), tid);
++
++ if (isActive != activeThreads.end()) {
++ DPRINTF(FullCPU,"[tid:%i]: Removing from active threads list\n",
++ tid);
++ activeThreads.erase(isActive);
++
++ removeThread(tid);
++ }
++*/
+}
+
+template <class Impl>
+void
- panic("deallocateContext unimplemented!");
++FullO3CPU<Impl>::haltContext(int tid)
+{
- FullO3CPU<Impl>::haltContext(int thread_num)
++ DPRINTF(FullCPU,"[tid:%i]: Halted ...", tid);
++/*
++ //Remove From Active List, if Active
++ list<unsigned>::iterator isActive = find(
++ activeThreads.begin(), activeThreads.end(), tid);
++
++ if (isActive != activeThreads.end()) {
++ DPRINTF(FullCPU,"[tid:%i]: Removing from active threads list\n",
++ tid);
++ activeThreads.erase(isActive);
++
++ removeThread(tid);
++ }
++*/
+}
+
+template <class Impl>
+void
- panic("haltContext unimplemented!");
++FullO3CPU<Impl>::switchOut(Sampler *_sampler)
+{
- FullO3CPU<Impl>::switchOut()
++ sampler = _sampler;
++ switchCount = 0;
++ fetch.switchOut();
++ decode.switchOut();
++ rename.switchOut();
++ iew.switchOut();
++ commit.switchOut();
++
++ // Wake the CPU and record activity so everything can drain out if
++ // the CPU is currently idle.
++ wakeCPU();
++ activityRec.activity();
+}
+
+template <class Impl>
+void
- panic("FullO3CPU does not have a switch out function.\n");
++FullO3CPU<Impl>::signalSwitched()
+{
- // Set all status's to active, schedule the
- // CPU's tick event.
++ if (++switchCount == NumStages) {
++ fetch.doSwitchOut();
++ rename.doSwitchOut();
++ commit.doSwitchOut();
++ instList.clear();
++ while (!removeList.empty()) {
++ removeList.pop();
++ }
++
++ if (checker)
++ checker->switchOut(sampler);
++
++ if (tickEvent.scheduled())
++ tickEvent.squash();
++ sampler->signalSwitched();
++ _status = SwitchedOut;
++ }
++ assert(switchCount <= 5);
+}
+
+template <class Impl>
+void
+FullO3CPU<Impl>::takeOverFrom(BaseCPU *oldCPU)
+{
++ // Flush out any old data from the time buffers.
++ for (int i = 0; i < 10; ++i) {
++ timeBuffer.advance();
++ fetchQueue.advance();
++ decodeQueue.advance();
++ renameQueue.advance();
++ iewQueue.advance();
++ }
++
++ activityRec.reset();
++
+ BaseCPU::takeOverFrom(oldCPU);
+
++ fetch.takeOverFrom();
++ decode.takeOverFrom();
++ rename.takeOverFrom();
++ iew.takeOverFrom();
++ commit.takeOverFrom();
++
+ assert(!tickEvent.scheduled());
+
- }
-
- template <class Impl>
- InstSeqNum
- FullO3CPU<Impl>::getAndIncrementInstSeq()
- {
- // Hopefully this works right.
- return globalSeqNum++;
++ // @todo: Figure out how to properly select the tid to put onto
++ // the active threads list.
++ int tid = 0;
++
++ list<unsigned>::iterator isActive = find(
++ activeThreads.begin(), activeThreads.end(), tid);
++
++ if (isActive == activeThreads.end()) {
++ //May Need to Re-code this if the delay variable is the delay
++ //needed for thread to activate
++ DPRINTF(FullCPU, "Adding Thread %i to active threads list\n",
++ tid);
++
++ activeThreads.push_back(tid);
++ }
++
++ // Set all statuses to active, schedule the CPU's tick event.
++ // @todo: Fix up statuses so this is handled properly
+ for (int i = 0; i < execContexts.size(); ++i) {
+ ExecContext *xc = execContexts[i];
+ if (xc->status() == ExecContext::Active && _status != Running) {
+ _status = Running;
+ tickEvent.schedule(curTick);
+ }
+ }
- {
++ if (!tickEvent.scheduled())
++ tickEvent.schedule(curTick);
+}
+
+template <class Impl>
+uint64_t
+FullO3CPU<Impl>::readIntReg(int reg_idx)
+{
+ return regFile.readIntReg(reg_idx);
+}
+
+template <class Impl>
+FloatReg
+FullO3CPU<Impl>::readFloatReg(int reg_idx, int width)
+{
+ return regFile.readFloatReg(reg_idx, width);
+}
+
+template <class Impl>
+FloatReg
+FullO3CPU<Impl>::readFloatReg(int reg_idx)
+{
+ return regFile.readFloatReg(reg_idx);
+}
+
+template <class Impl>
+FloatRegBits
+FullO3CPU<Impl>::readFloatRegBits(int reg_idx, int width)
- FullO3CPU<Impl>::readPC()
+ return regFile.readFloatRegBits(reg_idx, width);
+}
+
+template <class Impl>
+FloatRegBits
+FullO3CPU<Impl>::readFloatRegBits(int reg_idx)
+{
+ return regFile.readFloatRegBits(reg_idx);
+}
+
+template <class Impl>
+void
+FullO3CPU<Impl>::setIntReg(int reg_idx, uint64_t val)
+{
+ regFile.setIntReg(reg_idx, val);
+}
+
+template <class Impl>
+void
+FullO3CPU<Impl>::setFloatReg(int reg_idx, FloatReg val, int width)
+{
+ regFile.setFloatReg(reg_idx, val, width);
+}
+
+template <class Impl>
+void
+FullO3CPU<Impl>::setFloatReg(int reg_idx, FloatReg val)
+{
+ regFile.setFloatReg(reg_idx, val);
+}
+
+template <class Impl>
+void
+FullO3CPU<Impl>::setFloatRegBits(int reg_idx, FloatRegBits val, int width)
+{
+ regFile.setFloatRegBits(reg_idx, val, width);
+}
+
+template <class Impl>
+void
+FullO3CPU<Impl>::setFloatRegBits(int reg_idx, FloatRegBits val)
+{
+ regFile.setFloatRegBits(reg_idx, val);
+}
+
+template <class Impl>
+uint64_t
- return regFile.readPC();
++FullO3CPU<Impl>::readArchIntReg(int reg_idx, unsigned tid)
++{
++ PhysRegIndex phys_reg = commitRenameMap[tid].lookup(reg_idx);
++
++ return regFile.readIntReg(phys_reg);
++}
++
++template <class Impl>
++float
++FullO3CPU<Impl>::readArchFloatRegSingle(int reg_idx, unsigned tid)
++{
++ int idx = reg_idx + TheISA::FP_Base_DepTag;
++ PhysRegIndex phys_reg = commitRenameMap[tid].lookup(idx);
++
++ return regFile.readFloatRegSingle(phys_reg);
++}
++
++template <class Impl>
++double
++FullO3CPU<Impl>::readArchFloatRegDouble(int reg_idx, unsigned tid)
++{
++ int idx = reg_idx + TheISA::FP_Base_DepTag;
++ PhysRegIndex phys_reg = commitRenameMap[tid].lookup(idx);
++
++ return regFile.readFloatRegDouble(phys_reg);
++}
++
++template <class Impl>
++uint64_t
++FullO3CPU<Impl>::readArchFloatRegInt(int reg_idx, unsigned tid)
+{
- FullO3CPU<Impl>::setNextPC(uint64_t val)
++ int idx = reg_idx + TheISA::FP_Base_DepTag;
++ PhysRegIndex phys_reg = commitRenameMap[tid].lookup(idx);
++
++ return regFile.readFloatRegInt(phys_reg);
+}
+
+template <class Impl>
+void
- regFile.setNextPC(val);
++FullO3CPU<Impl>::setArchIntReg(int reg_idx, uint64_t val, unsigned tid)
+{
- FullO3CPU<Impl>::setPC(Addr new_PC)
++ PhysRegIndex phys_reg = commitRenameMap[tid].lookup(reg_idx);
++
++ regFile.setIntReg(phys_reg, val);
+}
+
+template <class Impl>
+void
- regFile.setPC(new_PC);
++FullO3CPU<Impl>::setArchFloatRegSingle(int reg_idx, float val, unsigned tid)
+{
- FullO3CPU<Impl>::addInst(DynInstPtr &inst)
++ PhysRegIndex phys_reg = commitRenameMap[tid].lookup(reg_idx);
++
++ regFile.setFloatRegSingle(phys_reg, val);
+}
+
+template <class Impl>
+void
- instList.push_back(inst);
++FullO3CPU<Impl>::setArchFloatRegDouble(int reg_idx, double val, unsigned tid)
+{
- FullO3CPU<Impl>::instDone()
++ PhysRegIndex phys_reg = commitRenameMap[tid].lookup(reg_idx);
++
++ regFile.setFloatRegDouble(phys_reg, val);
+}
+
+template <class Impl>
+void
- // Keep an instruction count.
- numInsts++;
++FullO3CPU<Impl>::setArchFloatRegInt(int reg_idx, uint64_t val, unsigned tid)
+{
- // Check for instruction-count-based events.
- comInstEventQueue[0]->serviceEvents(numInsts);
++ PhysRegIndex phys_reg = commitRenameMap[tid].lookup(reg_idx);
+
- FullO3CPU<Impl>::removeBackInst(DynInstPtr &inst)
++ regFile.setFloatRegInt(phys_reg, val);
++}
++
++template <class Impl>
++uint64_t
++FullO3CPU<Impl>::readPC(unsigned tid)
++{
++ return commit.readPC(tid);
+}
+
+template <class Impl>
+void
- DynInstPtr inst_to_delete;
++FullO3CPU<Impl>::setPC(Addr new_PC,unsigned tid)
+{
- // Walk through the instruction list, removing any instructions
- // that were inserted after the given instruction, inst.
- while (instList.back() != inst)
- {
- assert(!instList.empty());
++ commit.setPC(new_PC, tid);
++}
+
- // Obtain the pointer to the instruction.
- inst_to_delete = instList.back();
++template <class Impl>
++uint64_t
++FullO3CPU<Impl>::readNextPC(unsigned tid)
++{
++ return commit.readNextPC(tid);
++}
++
++template <class Impl>
++void
++FullO3CPU<Impl>::setNextPC(uint64_t val,unsigned tid)
++{
++ commit.setNextPC(val, tid);
++}
++
++template <class Impl>
++typename FullO3CPU<Impl>::ListIt
++FullO3CPU<Impl>::addInst(DynInstPtr &inst)
++{
++ instList.push_back(inst);
+
- DPRINTF(FullCPU, "FullCPU: Removing instruction %i, PC %#x\n",
- inst_to_delete->seqNum, inst_to_delete->readPC());
++ return --(instList.end());
++}
+
- // Remove the instruction from the list.
- instList.pop_back();
++template <class Impl>
++void
++FullO3CPU<Impl>::instDone(unsigned tid)
++{
++ // Keep an instruction count.
++ thread[tid]->numInst++;
++ thread[tid]->numInsts++;
++ committedInsts[tid]++;
++ totalCommittedInsts++;
+
- // Mark it as squashed.
- inst_to_delete->setSquashed();
- }
++ // Check for instruction-count-based events.
++ comInstEventQueue[tid]->serviceEvents(thread[tid]->numInst);
++}
+
- DynInstPtr inst_to_remove;
++template <class Impl>
++void
++FullO3CPU<Impl>::addToRemoveList(DynInstPtr &inst)
++{
++ removeInstsThisCycle = true;
++
++ removeList.push(inst->getInstListIt());
+}
+
+template <class Impl>
+void
+FullO3CPU<Impl>::removeFrontInst(DynInstPtr &inst)
+{
- // The front instruction should be the same one being asked to be removed.
- assert(instList.front() == inst);
++ DPRINTF(FullCPU, "FullCPU: Removing committed instruction [tid:%i] PC %#x "
++ "[sn:%lli]\n",
++ inst->threadNumber, inst->readPC(), inst->seqNum);
+
- inst_to_remove = inst;
- instList.pop_front();
-
- DPRINTF(FullCPU, "FullCPU: Removing committed instruction %#x, PC %#x\n",
- inst_to_remove, inst_to_remove->readPC());
++ removeInstsThisCycle = true;
+
+ // Remove the front instruction.
- FullO3CPU<Impl>::removeInstsNotInROB()
++ removeList.push(inst->getInstListIt());
+}
+
+template <class Impl>
+void
- DPRINTF(FullCPU, "FullCPU: Deleting instructions from instruction "
- "list.\n");
++FullO3CPU<Impl>::removeInstsNotInROB(unsigned tid)
+{
- DynInstPtr rob_tail = rob.readTailInst();
++ DPRINTF(FullCPU, "FullCPU: Thread %i: Deleting instructions from instruction"
++ " list.\n", tid);
++
++ ListIt end_it;
++
++ bool rob_empty = false;
++
++ if (instList.empty()) {
++ return;
++ } else if (rob.isEmpty(/*tid*/)) {
++ DPRINTF(FullCPU, "FullCPU: ROB is empty, squashing all insts.\n");
++ end_it = instList.begin();
++ rob_empty = true;
++ } else {
++ end_it = (rob.readTailInst(tid))->getInstListIt();
++ DPRINTF(FullCPU, "FullCPU: ROB is not empty, squashing insts not in ROB.\n");
++ }
++
++ removeInstsThisCycle = true;
++
++ ListIt inst_it = instList.end();
++
++ inst_it--;
++
++ // Walk through the instruction list, removing any instructions
++ // that were inserted after the given instruction iterator, end_it.
++ while (inst_it != end_it) {
++ assert(!instList.empty());
+
- removeBackInst(rob_tail);
++ squashInstIt(inst_it, tid);
+
- FullO3CPU<Impl>::removeInstsUntil(const InstSeqNum &seq_num)
++ inst_it--;
++ }
++
++ // If the ROB was empty, then we actually need to remove the first
++ // instruction as well.
++ if (rob_empty) {
++ squashInstIt(inst_it, tid);
++ }
+}
+
+template <class Impl>
+void
- "list.\n");
++FullO3CPU<Impl>::removeInstsUntil(const InstSeqNum &seq_num,
++ unsigned tid)
+{
++ assert(!instList.empty());
++
++ removeInstsThisCycle = true;
++
++ ListIt inst_iter = instList.end();
++
++ inst_iter--;
++
+ DPRINTF(FullCPU, "FullCPU: Deleting instructions from instruction "
- DynInstPtr inst_to_delete;
++ "list that are from [tid:%i] and above [sn:%lli] (end=%lli).\n",
++ tid, seq_num, (*inst_iter)->seqNum);
+
- while (instList.back()->seqNum > seq_num) {
- assert(!instList.empty());
++ while ((*inst_iter)->seqNum > seq_num) {
+
- // Obtain the pointer to the instruction.
- inst_to_delete = instList.back();
++ bool break_loop = (inst_iter == instList.begin());
+
- DPRINTF(FullCPU, "FullCPU: Removing instruction %i, PC %#x\n",
- inst_to_delete->seqNum, inst_to_delete->readPC());
++ squashInstIt(inst_iter, tid);
+
- // Remove the instruction from the list.
- instList.back() = NULL;
- instList.pop_back();
++ inst_iter--;
+
- inst_to_delete->setSquashed();
- }
++ if (break_loop)
++ break;
++ }
++}
++
++template <class Impl>
++inline void
++FullO3CPU<Impl>::squashInstIt(const ListIt &instIt, const unsigned &tid)
++{
++ if ((*instIt)->threadNumber == tid) {
++ DPRINTF(FullCPU, "FullCPU: Squashing instruction, "
++ "[tid:%i] [sn:%lli] PC %#x\n",
++ (*instIt)->threadNumber,
++ (*instIt)->seqNum,
++ (*instIt)->readPC());
+
+ // Mark it as squashed.
-
++ (*instIt)->setSquashed();
+
++ // @todo: Formulate a consistent method for deleting
++ // instructions from the instruction list
++ // Remove the instruction from the list.
++ removeList.push(instIt);
++ }
+}
+
++template <class Impl>
++void
++FullO3CPU<Impl>::cleanUpRemovedInsts()
++{
++ while (!removeList.empty()) {
++ DPRINTF(FullCPU, "FullCPU: Removing instruction, "
++ "[tid:%i] [sn:%lli] PC %#x\n",
++ (*removeList.front())->threadNumber,
++ (*removeList.front())->seqNum,
++ (*removeList.front())->readPC());
++
++ instList.erase(removeList.front());
++
++ removeList.pop();
++ }
++
++ removeInstsThisCycle = false;
++}
++/*
+template <class Impl>
+void
+FullO3CPU<Impl>::removeAllInsts()
+{
+ instList.clear();
+}
- typename list<DynInstPtr>::iterator inst_list_it = instList.begin();
++*/
+template <class Impl>
+void
+FullO3CPU<Impl>::dumpInsts()
+{
+ int num = 0;
- while (inst_list_it != instList.end())
- {
- cprintf("Instruction:%i\nPC:%#x\nSN:%lli\nIssued:%i\nSquashed:%i\n\n",
- num, (*inst_list_it)->readPC(), (*inst_list_it)->seqNum,
- (*inst_list_it)->isIssued(), (*inst_list_it)->isSquashed());
+
-
++ ListIt inst_list_it = instList.begin();
++
++ cprintf("Dumping Instruction List\n");
++
++ while (inst_list_it != instList.end()) {
++ cprintf("Instruction:%i\nPC:%#x\n[tid:%i]\n[sn:%lli]\nIssued:%i\n"
++ "Squashed:%i\n\n",
++ num, (*inst_list_it)->readPC(), (*inst_list_it)->threadNumber,
++ (*inst_list_it)->seqNum, (*inst_list_it)->isIssued(),
++ (*inst_list_it)->isSquashed());
+ inst_list_it++;
+ ++num;
+ }
+}
++/*
+template <class Impl>
+void
+FullO3CPU<Impl>::wakeDependents(DynInstPtr &inst)
+{
+ iew.wakeDependents(inst);
+}
++*/
++template <class Impl>
++void
++FullO3CPU<Impl>::wakeCPU()
++{
++ if (activityRec.active() || tickEvent.scheduled()) {
++ DPRINTF(Activity, "CPU already running.\n");
++ return;
++ }
++
++ DPRINTF(Activity, "Waking up CPU\n");
++
++ idleCycles += (curTick - 1) - lastRunningCycle;
++
++ tickEvent.schedule(curTick);
++}
++
++template <class Impl>
++int
++FullO3CPU<Impl>::getFreeTid()
++{
++ for (int i=0; i < numThreads; i++) {
++ if (!tids[i]) {
++ tids[i] = true;
++ return i;
++ }
++ }
++
++ return -1;
++}
++
++template <class Impl>
++void
++FullO3CPU<Impl>::doContextSwitch()
++{
++ if (contextSwitch) {
++
++ //ADD CODE TO DEACTIVE THREAD HERE (???)
++
++ for (int tid=0; tid < cpuWaitList.size(); tid++) {
++ activateWhenReady(tid);
++ }
++
++ if (cpuWaitList.size() == 0)
++ contextSwitch = true;
++ }
++}
++
++template <class Impl>
++void
++FullO3CPU<Impl>::updateThreadPriority()
++{
++ if (activeThreads.size() > 1)
++ {
++ //DEFAULT TO ROUND ROBIN SCHEME
++ //e.g. Move highest priority to end of thread list
++ list<unsigned>::iterator list_begin = activeThreads.begin();
++ list<unsigned>::iterator list_end = activeThreads.end();
++
++ unsigned high_thread = *list_begin;
++
++ activeThreads.erase(list_begin);
++
++ activeThreads.push_back(high_thread);
++ }
++}
+
+// Forward declaration of FullO3CPU.
+template class FullO3CPU<AlphaSimpleImpl>;
--- /dev/null
- //Todo: Add in a lot of the functions that are ISA specific. Also define
- //the functions that currently exist within the base cpu class. Define
- //everything for the simobject stuff so it can be serialized and
- //instantiated, add in debugging statements everywhere. Have CPU schedule
- //itself properly. Threads!
- // Avoid running stages and advancing queues if idle/stalled.
-
- #ifndef __CPU_O3_CPU_FULL_CPU_HH__
- #define __CPU_O3_CPU_FULL_CPU_HH__
+/*
+ * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
- class FunctionalMemory;
++#ifndef __CPU_O3_CPU_HH__
++#define __CPU_O3_CPU_HH__
+
+#include <iostream>
+#include <list>
++#include <queue>
++#include <set>
+#include <vector>
+
+#include "base/statistics.hh"
+#include "base/timebuf.hh"
+#include "config/full_system.hh"
++#include "cpu/activity.hh"
+#include "cpu/base.hh"
+#include "cpu/cpu_exec_context.hh"
+#include "cpu/o3/comm.hh"
+#include "cpu/o3/cpu_policy.hh"
++#include "cpu/o3/scoreboard.hh"
++#include "cpu/o3/thread_state.hh"
+#include "sim/process.hh"
+
++template <class>
++class Checker;
+class ExecContext;
- #if FULL_SYSTEM
- BaseFullCPU(Params ¶ms);
- #else
- BaseFullCPU(Params ¶ms);
- #endif // FULL_SYSTEM
++class MemInterface;
+class Process;
+
+class BaseFullCPU : public BaseCPU
+{
+ //Stuff that's pretty ISA independent will go here.
+ public:
+ typedef BaseCPU::Params Params;
+
- //Put typedefs from the Impl here.
++ BaseFullCPU(Params *params);
++
++ void regStats();
+
+ protected:
+ int cpu_id;
+};
+
+template <class Impl>
+class FullO3CPU : public BaseFullCPU
+{
+ public:
- Blocked // ?
++ // Typedefs from the Impl here.
+ typedef typename Impl::CPUPol CPUPolicy;
+ typedef typename Impl::Params Params;
+ typedef typename Impl::DynInstPtr DynInstPtr;
+
++ typedef O3ThreadState<Impl> Thread;
++
++ typedef typename std::list<DynInstPtr>::iterator ListIt;
++
+ public:
+ enum Status {
+ Running,
+ Idle,
+ Halted,
- /// Schedule tick event, regardless of its current state.
++ Blocked,
++ SwitchedOut
+ };
+
++ /** Overall CPU status. */
+ Status _status;
+
+ private:
+ class TickEvent : public Event
+ {
+ private:
++ /** Pointer to the CPU. */
+ FullO3CPU<Impl> *cpu;
+
+ public:
++ /** Constructs a tick event. */
+ TickEvent(FullO3CPU<Impl> *c);
++
++ /** Processes a tick event, calling tick() on the CPU. */
+ void process();
++ /** Returns the description of the tick event. */
+ const char *description();
+ };
+
++ /** The tick event used for scheduling CPU ticks. */
+ TickEvent tickEvent;
+
- tickEvent.reschedule(curTick + delay);
++ /** Schedule tick event, regardless of its current state. */
+ void scheduleTickEvent(int delay)
+ {
+ if (tickEvent.squashed())
- tickEvent.schedule(curTick + delay);
++ tickEvent.reschedule(curTick + cycles(delay));
+ else if (!tickEvent.scheduled())
- /// Unschedule tick event, regardless of its current state.
++ tickEvent.schedule(curTick + cycles(delay));
+ }
+
- FullO3CPU(Params ¶ms);
++ /** Unschedule tick event, regardless of its current state. */
+ void unscheduleTickEvent()
+ {
+ if (tickEvent.scheduled())
+ tickEvent.squash();
+ }
+
+ public:
- void activateContext(int thread_num, int delay);
- void suspendContext(int thread_num);
- void deallocateContext(int thread_num);
- void haltContext(int thread_num);
++ /** Constructs a CPU with the given parameters. */
++ FullO3CPU(Params *params);
++ /** Destructor. */
+ ~FullO3CPU();
+
++ /** Registers statistics. */
+ void fullCPURegStats();
+
++ /** Ticks CPU, calling tick() on each stage, and checking the overall
++ * activity to see if the CPU should deschedule itself.
++ */
+ void tick();
+
++ /** Initialize the CPU */
+ void init();
+
- void switchOut();
++ /** Setup CPU to insert a thread's context */
++ void insertThread(unsigned tid);
++
++ /** Remove all of a thread's context from CPU */
++ void removeThread(unsigned tid);
++
++ /** Count the Total Instructions Committed in the CPU. */
++ virtual Counter totalInstructions() const
++ {
++ Counter total(0);
++
++ for (int i=0; i < thread.size(); i++)
++ total += thread[i]->numInst;
++
++ return total;
++ }
++
++ /** Add Thread to Active Threads List. */
++ void activateContext(int tid, int delay);
++
++ /** Remove Thread from Active Threads List */
++ void suspendContext(int tid);
++
++ /** Remove Thread from Active Threads List &&
++ * Remove Thread Context from CPU.
++ */
++ void deallocateContext(int tid);
++
++ /** Remove Thread from Active Threads List &&
++ * Remove Thread Context from CPU.
++ */
++ void haltContext(int tid);
++
++ /** Activate a Thread When CPU Resources are Available. */
++ void activateWhenReady(int tid);
+
- InstSeqNum getAndIncrementInstSeq();
++ /** Add or Remove a Thread Context in the CPU. */
++ void doContextSwitch();
++
++ /** Update The Order In Which We Process Threads. */
++ void updateThreadPriority();
++
++ /** Executes a syscall on this cycle.
++ * ---------------------------------------
++ * Note: this is a virtual function. CPU-Specific
++ * functionality defined in derived classes
++ */
++ virtual void syscall(int tid) { panic("Unimplemented!"); }
++
++ /** Check if there are any system calls pending. */
++ void checkSyscalls();
++
++ /** Switches out this CPU.
++ */
++ void switchOut(Sampler *sampler);
++
++ void signalSwitched();
++
++ /** Takes over from another CPU.
++ */
+ void takeOverFrom(BaseCPU *oldCPU);
+
+ /** Get the current instruction sequence number, and increment it. */
- int getInstAsid()
- { return regFile.miscRegs.getInstAsid(); }
++ InstSeqNum getAndIncrementInstSeq()
++ { return globalSeqNum++; }
+
+#if FULL_SYSTEM
+ /** Check if this address is a valid instruction address. */
+ bool validInstAddr(Addr addr) { return true; }
+
+ /** Check if this address is a valid data address. */
+ bool validDataAddr(Addr addr) { return true; }
+
+ /** Get instruction asid. */
- int getDataAsid()
- { return regFile.miscRegs.getDataAsid(); }
++ int getInstAsid(unsigned tid)
++ { return regFile.miscRegs[tid].getInstAsid(); }
+
+ /** Get data asid. */
- bool validInstAddr(Addr addr)
- { return thread[0]->validInstAddr(addr); }
++ int getDataAsid(unsigned tid)
++ { return regFile.miscRegs[tid].getDataAsid(); }
+#else
- bool validDataAddr(Addr addr)
- { return thread[0]->validDataAddr(addr); }
++ /** Check if this address is a valid instruction address. */
++ bool validInstAddr(Addr addr,unsigned tid)
++ { return thread[tid]->validInstAddr(addr); }
+
- int getInstAsid() { return thread[0]->getInstAsid(); }
- int getDataAsid() { return thread[0]->getDataAsid(); }
++ /** Check if this address is a valid data address. */
++ bool validDataAddr(Addr addr,unsigned tid)
++ { return thread[tid]->validDataAddr(addr); }
+
- uint64_t readPC();
++ /** Get instruction asid. */
++ int getInstAsid(unsigned tid)
++ { return thread[tid]->asid; }
++
++ /** Get data asid. */
++ int getDataAsid(unsigned tid)
++ { return thread[tid]->asid; }
+
+#endif
+
+ //
+ // New accessors for new decoder.
+ //
+ uint64_t readIntReg(int reg_idx);
+
+ FloatReg readFloatReg(int reg_idx);
+
+ FloatReg readFloatReg(int reg_idx, int width);
+
+ FloatRegBits readFloatRegBits(int reg_idx);
+
+ FloatRegBits readFloatRegBits(int reg_idx, int width);
+
+ void setIntReg(int reg_idx, uint64_t val);
+
+ void setFloatReg(int reg_idx, FloatReg val, int width);
+
+ void setFloatReg(int reg_idx, FloatReg val, int width);
+
+ void setFloatRegBits(int reg_idx, FloatRegBits val);
+
+ void setFloatRegBits(int reg_idx, FloatRegBits val);
+
- void setNextPC(uint64_t val);
++ uint64_t readArchIntReg(int reg_idx, unsigned tid);
++
++ float readArchFloatRegSingle(int reg_idx, unsigned tid);
++
++ double readArchFloatRegDouble(int reg_idx, unsigned tid);
++
++ uint64_t readArchFloatRegInt(int reg_idx, unsigned tid);
++
++ void setArchIntReg(int reg_idx, uint64_t val, unsigned tid);
++
++ void setArchFloatRegSingle(int reg_idx, float val, unsigned tid);
++
++ void setArchFloatRegDouble(int reg_idx, double val, unsigned tid);
++
++ void setArchFloatRegInt(int reg_idx, uint64_t val, unsigned tid);
+
- void setPC(Addr new_PC);
++ uint64_t readPC(unsigned tid);
+
- void addInst(DynInstPtr &inst);
++ void setPC(Addr new_PC,unsigned tid);
++
++ uint64_t readNextPC(unsigned tid);
++
++ void setNextPC(uint64_t val,unsigned tid);
+
+ /** Function to add instruction onto the head of the list of the
+ * instructions. Used when new instructions are fetched.
+ */
- void instDone();
-
- /** Remove all instructions in back of the given instruction, but leave
- * that instruction in the list. This is useful in a squash, when there
- * are instructions in this list that don't exist in structures such as
- * the ROB. The instruction doesn't have to be the last instruction in
- * the list, but will be once this function completes.
- * @todo: Remove only up until that inst? Squashed inst is most likely
- * valid.
- */
- void removeBackInst(DynInstPtr &inst);
-
- /** Remove an instruction from the front of the list. It is expected
- * that there are no instructions in front of it (that is, none are older
- * than the instruction being removed). Used when retiring instructions.
- * @todo: Remove the argument to this function, and just have it remove
- * last instruction once it's verified that commit has the same ordering
- * as the instruction list.
++ ListIt addInst(DynInstPtr &inst);
+
+ /** Function to tell the CPU that an instruction has completed. */
- void removeInstsNotInROB();
++ void instDone(unsigned tid);
++
++ /** Add Instructions to the CPU Remove List*/
++ void addToRemoveList(DynInstPtr &inst);
++
++ /** Remove an instruction from the front end of the list. There's
++ * no restriction on location of the instruction.
+ */
+ void removeFrontInst(DynInstPtr &inst);
+
+ /** Remove all instructions that are not currently in the ROB. */
- void removeInstsUntil(const InstSeqNum &seq_num);
++ void removeInstsNotInROB(unsigned tid);
+
+ /** Remove all instructions younger than the given sequence number. */
- void removeAllInsts();
++ void removeInstsUntil(const InstSeqNum &seq_num,unsigned tid);
++
++ inline void squashInstIt(const ListIt &instIt, const unsigned &tid);
++
++ void cleanUpRemovedInsts();
+
+ /** Remove all instructions from the list. */
- * commit can tell the instruction queue that they have completed.
- * Eventually this hack should be removed.
++// void removeAllInsts();
+
+ void dumpInsts();
+
+ /** Basically a wrapper function so that instructions executed at
- void wakeDependents(DynInstPtr &inst);
++ * commit can tell the instruction queue that they have
++ * completed. Eventually this hack should be removed.
+ */
- list<DynInstPtr> instList;
++// void wakeDependents(DynInstPtr &inst);
+
+ public:
+ /** List of all the instructions in flight. */
- //not sure these should be private.
++ std::list<DynInstPtr> instList;
++
++ /** List of all the instructions that will be removed at the end of this
++ * cycle.
++ */
++ std::queue<ListIt> removeList;
++
++#ifdef DEBUG
++ std::set<InstSeqNum> snList;
++#endif
++
++ /** Records if instructions need to be removed this cycle due to
++ * being retired or squashed.
++ */
++ bool removeInstsThisCycle;
+
- /** The fetch stage's status. */
- typename CPUPolicy::Fetch::Status fetchStatus;
-
+ protected:
+ /** The fetch stage. */
+ typename CPUPolicy::Fetch fetch;
+
- /** The decode stage's status. */
- typename CPUPolicy::Decode::Status decodeStatus;
-
+ /** The decode stage. */
+ typename CPUPolicy::Decode decode;
+
- /** The dispatch stage's status. */
- typename CPUPolicy::Rename::Status renameStatus;
-
+ /** The dispatch stage. */
+ typename CPUPolicy::Rename rename;
+
- /** The issue/execute/writeback stage's status. */
- typename CPUPolicy::IEW::Status iewStatus;
-
+ /** The issue/execute/writeback stages. */
+ typename CPUPolicy::IEW iew;
+
- /** The fetch stage's status. */
- typename CPUPolicy::Commit::Status commitStatus;
-
- //Might want to just pass these objects in to the constructors of the
- //appropriate stage. regFile is in iew, freeList in dispatch, renameMap
- //in dispatch, and the rob in commit.
+ /** The commit stage. */
+ typename CPUPolicy::Commit commit;
+
- typename CPUPolicy::RenameMap renameMap;
+ /** The register file. */
+ typename CPUPolicy::RegFile regFile;
+
+ /** The free list. */
+ typename CPUPolicy::FreeList freeList;
+
+ /** The rename map. */
- /** The temporary exec context to support older accessors. */
- CPUExecContext *cpuXC;
++ typename CPUPolicy::RenameMap renameMap[Impl::MaxThreads];
++
++ /** The commit rename map. */
++ typename CPUPolicy::RenameMap commitRenameMap[Impl::MaxThreads];
+
+ /** The re-order buffer. */
+ typename CPUPolicy::ROB rob;
+
++ /** Active Threads List */
++ std::list<unsigned> activeThreads;
++
++ /** Integer Register Scoreboard */
++ Scoreboard scoreboard;
++
+ public:
++ /** Enum to give each stage a specific index, so when calling
++ * activateStage() or deactivateStage(), they can specify which stage
++ * is being activated/deactivated.
++ */
++ enum StageIdx {
++ FetchIdx,
++ DecodeIdx,
++ RenameIdx,
++ IEWIdx,
++ CommitIdx,
++ NumStages };
++
+ /** Typedefs from the Impl to get the structs that each of the
+ * time buffers should use.
+ */
+ typedef typename CPUPolicy::TimeStruct TimeStruct;
+
+ typedef typename CPUPolicy::FetchStruct FetchStruct;
+
+ typedef typename CPUPolicy::DecodeStruct DecodeStruct;
+
+ typedef typename CPUPolicy::RenameStruct RenameStruct;
+
+ typedef typename CPUPolicy::IEWStruct IEWStruct;
+
+ /** The main time buffer to do backwards communication. */
+ TimeBuffer<TimeStruct> timeBuffer;
+
+ /** The fetch stage's instruction queue. */
+ TimeBuffer<FetchStruct> fetchQueue;
+
+ /** The decode stage's instruction queue. */
+ TimeBuffer<DecodeStruct> decodeQueue;
+
+ /** The rename stage's instruction queue. */
+ TimeBuffer<RenameStruct> renameQueue;
+
+ /** The IEW stage's instruction queue. */
+ TimeBuffer<IEWStruct> iewQueue;
+
+ public:
- /** Temporary function to get pointer to exec context. */
- ExecContext *xcBase()
- {
- return thread[0]->getProxy();
- }
++ ActivityRecorder activityRec;
+
- CPUExecContext *cpuXCBase()
++ void activityThisCycle() { activityRec.activity(); }
++
++ void activateStage(const StageIdx idx)
++ { activityRec.activateStage(idx); }
+
- return thread[0];
++ void deactivateStage(const StageIdx idx)
++ { activityRec.deactivateStage(idx); }
++
++ /** Wakes the CPU, rescheduling the CPU if it's not already active. */
++ void wakeCPU();
++
++ /** Gets a free thread id. Use if thread ids change across system. */
++ int getFreeTid();
++
++ public:
++ /** Temporary function to get pointer to exec context. */
++ ExecContext *xcBase(unsigned tid)
+ {
-
- AlphaITB *itb;
- AlphaDTB *dtb;
-
- // SWContext *swCtx;
++ return thread[tid]->getXCProxy();
+ }
+
++ /** The global sequence number counter. */
+ InstSeqNum globalSeqNum;
+
++ Checker<DynInstPtr> *checker;
++
+#if FULL_SYSTEM
++ /** Pointer to the system. */
+ System *system;
+
++ /** Pointer to the memory controller. */
+ MemoryController *memCtrl;
++ /** Pointer to physical memory. */
+ PhysicalMemory *physmem;
- std::vector<CPUExecContext *> thread;
+#endif
- Counter numInsts;
-
- Counter funcExeInst;
+
++ /** Pointer to memory. */
+ FunctionalMemory *mem;
+
++ Sampler *sampler;
++
++ int switchCount;
++
++ // List of all ExecContexts.
++ std::vector<Thread *> thread;
++
++#if 0
++ /** Page table pointer. */
++ PageTable *pTable;
++#endif
++
++ /** Pointer to the icache interface. */
+ MemInterface *icacheInterface;
++ /** Pointer to the dcache interface. */
+ MemInterface *dcacheInterface;
+
++ /** Whether or not the CPU should defer its registration. */
+ bool deferRegistration;
+
- #endif
++ /** Is there a context switch pending? */
++ bool contextSwitch;
++
++ /** Threads Scheduled to Enter CPU */
++ std::list<int> cpuWaitList;
++
++ /** The cycle that the CPU was last running, used for statistics. */
++ Tick lastRunningCycle;
++
++ /** Number of Threads CPU can process */
++ unsigned numThreads;
++
++ /** Mapping for system thread id to cpu id */
++ std::map<unsigned,unsigned> threadMap;
++
++ /** Available thread ids in the cpu*/
++ std::vector<unsigned> tids;
++
++ /** Stat for total number of times the CPU is descheduled. */
++ Stats::Scalar<> timesIdled;
++ /** Stat for total number of cycles the CPU spends descheduled. */
++ Stats::Scalar<> idleCycles;
++ /** Stat for the number of committed instructions per thread. */
++ Stats::Vector<> committedInsts;
++ /** Stat for the total number of committed instructions. */
++ Stats::Scalar<> totalCommittedInsts;
++ /** Stat for the CPI per thread. */
++ Stats::Formula cpi;
++ /** Stat for the total CPI. */
++ Stats::Formula totalCpi;
++ /** Stat for the IPC per thread. */
++ Stats::Formula ipc;
++ /** Stat for the total IPC. */
++ Stats::Formula totalIpc;
+};
+
++#endif // __CPU_O3_CPU_HH__
--- /dev/null
- #ifndef __CPU_O3_CPU_CPU_POLICY_HH__
- #define __CPU_O3_CPU_CPU_POLICY_HH__
+/*
+ * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
- #include "cpu/o3/ldstq.hh"
++#ifndef __CPU_O3_CPU_POLICY_HH__
++#define __CPU_O3_CPU_POLICY_HH__
+
+#include "cpu/o3/bpred_unit.hh"
+#include "cpu/o3/free_list.hh"
+#include "cpu/o3/inst_queue.hh"
- typedef LDSTQ<Impl> LDSTQ;
++#include "cpu/o3/lsq.hh"
++#include "cpu/o3/lsq_unit.hh"
+#include "cpu/o3/mem_dep_unit.hh"
+#include "cpu/o3/regfile.hh"
+#include "cpu/o3/rename_map.hh"
+#include "cpu/o3/rob.hh"
+#include "cpu/o3/store_set.hh"
+
+#include "cpu/o3/commit.hh"
+#include "cpu/o3/decode.hh"
+#include "cpu/o3/fetch.hh"
+#include "cpu/o3/iew.hh"
+#include "cpu/o3/rename.hh"
+
+#include "cpu/o3/comm.hh"
+
+template<class Impl>
+struct SimpleCPUPolicy
+{
+ typedef TwobitBPredUnit<Impl> BPredUnit;
+ typedef PhysRegFile<Impl> RegFile;
+ typedef SimpleFreeList FreeList;
+ typedef SimpleRenameMap RenameMap;
+ typedef ROB<Impl> ROB;
+ typedef InstructionQueue<Impl> IQ;
+ typedef MemDepUnit<StoreSet, Impl> MemDepUnit;
- typedef SimpleFetch<Impl> Fetch;
- typedef SimpleDecode<Impl> Decode;
- typedef SimpleRename<Impl> Rename;
- typedef SimpleIEW<Impl> IEW;
- typedef SimpleCommit<Impl> Commit;
++ typedef LSQ<Impl> LSQ;
++ typedef LSQUnit<Impl> LSQUnit;
+
- typedef SimpleFetchSimpleDecode<Impl> FetchStruct;
++
++ typedef DefaultFetch<Impl> Fetch;
++ typedef DefaultDecode<Impl> Decode;
++ typedef DefaultRename<Impl> Rename;
++ typedef DefaultIEW<Impl> IEW;
++ typedef DefaultCommit<Impl> Commit;
+
+ /** The struct for communication between fetch and decode. */
- typedef SimpleDecodeSimpleRename<Impl> DecodeStruct;
++ typedef DefaultFetchDefaultDecode<Impl> FetchStruct;
+
+ /** The struct for communication between decode and rename. */
- typedef SimpleRenameSimpleIEW<Impl> RenameStruct;
++ typedef DefaultDecodeDefaultRename<Impl> DecodeStruct;
+
+ /** The struct for communication between rename and IEW. */
- typedef SimpleIEWSimpleCommit<Impl> IEWStruct;
++ typedef DefaultRenameDefaultIEW<Impl> RenameStruct;
+
+ /** The struct for communication between IEW and commit. */
- typedef TimeBufStruct TimeStruct;
++ typedef DefaultIEWDefaultCommit<Impl> IEWStruct;
+
+ /** The struct for communication within the IEW stage. */
+ typedef IssueStruct<Impl> IssueStruct;
+
+ /** The struct for all backwards communication. */
- #endif //__CPU_O3_CPU_CPU_POLICY_HH__
++ typedef TimeBufStruct<Impl> TimeStruct;
+
+};
+
++#endif //__CPU_O3_CPU_POLICY_HH__
--- /dev/null
- template class SimpleDecode<AlphaSimpleImpl>;
+/*
+ * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "cpu/o3/alpha_dyn_inst.hh"
+#include "cpu/o3/alpha_impl.hh"
+#include "cpu/o3/decode_impl.hh"
+
++template class DefaultDecode<AlphaSimpleImpl>;
--- /dev/null
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
+/*
- #ifndef __CPU_O3_CPU_SIMPLE_DECODE_HH__
- #define __CPU_O3_CPU_SIMPLE_DECODE_HH__
++ * Copyright (c) 2004-2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
- class SimpleDecode
++#ifndef __CPU_O3_DECODE_HH__
++#define __CPU_O3_DECODE_HH__
+
+#include <queue>
+
+#include "base/statistics.hh"
+#include "base/timebuf.hh"
+
++/**
++ * DefaultDecode class handles both single threaded and SMT
++ * decode. Its width is specified by the parameters; each cycles it
++ * tries to decode that many instructions. Because instructions are
++ * actually decoded when the StaticInst is created, this stage does
++ * not do much other than check any PC-relative branches.
++ */
+template<class Impl>
- // The only time decode will become blocked is if dispatch becomes
- // blocked, which means IQ or ROB is probably full.
- enum Status {
++class DefaultDecode
+{
+ private:
+ // Typedefs from the Impl.
+ typedef typename Impl::FullCPU FullCPU;
+ typedef typename Impl::DynInstPtr DynInstPtr;
+ typedef typename Impl::Params Params;
+ typedef typename Impl::CPUPol CPUPol;
+
+ // Typedefs from the CPU policy.
+ typedef typename CPUPol::FetchStruct FetchStruct;
+ typedef typename CPUPol::DecodeStruct DecodeStruct;
+ typedef typename CPUPol::TimeStruct TimeStruct;
+
+ public:
- // May eventually need statuses on a per thread basis.
- Status _status;
++ /** Overall decode stage status. Used to determine if the CPU can
++ * deschedule itself due to a lack of activity.
++ */
++ enum DecodeStatus {
++ Active,
++ Inactive
++ };
++
++ /** Individual thread status. */
++ enum ThreadStatus {
+ Running,
+ Idle,
++ StartSquash,
+ Squashing,
+ Blocked,
+ Unblocking
+ };
+
+ private:
- SimpleDecode(Params ¶ms);
++ /** Decode status. */
++ DecodeStatus _status;
++
++ /** Per-thread status. */
++ ThreadStatus decodeStatus[Impl::MaxThreads];
+
+ public:
- void decode();
++ /** DefaultDecode constructor. */
++ DefaultDecode(Params *params);
+
++ /** Returns the name of decode. */
++ std::string name() const;
++
++ /** Registers statistics. */
+ void regStats();
+
++ /** Sets CPU pointer. */
+ void setCPU(FullCPU *cpu_ptr);
+
++ /** Sets the main backwards communication time buffer pointer. */
+ void setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr);
+
++ /** Sets pointer to time buffer used to communicate to the next stage. */
+ void setDecodeQueue(TimeBuffer<DecodeStruct> *dq_ptr);
+
++ /** Sets pointer to time buffer coming from fetch. */
+ void setFetchQueue(TimeBuffer<FetchStruct> *fq_ptr);
+
++ /** Sets pointer to list of active threads. */
++ void setActiveThreads(std::list<unsigned> *at_ptr);
++
++ void switchOut();
++
++ void takeOverFrom();
++ /** Ticks decode, processing all input signals and decoding as many
++ * instructions as possible.
++ */
+ void tick();
+
- void block();
++ /** Determines what to do based on decode's current status.
++ * @param status_change decode() sets this variable if there was a status
++ * change (ie switching from from blocking to unblocking).
++ * @param tid Thread id to decode instructions from.
++ */
++ void decode(bool &status_change, unsigned tid);
++
++ /** Processes instructions from fetch and passes them on to rename.
++ * Decoding of instructions actually happens when they are created in
++ * fetch, so this function mostly checks if PC-relative branches are
++ * correct.
++ */
++ void decodeInsts(unsigned tid);
+
+ private:
++ /** Inserts a thread's instructions into the skid buffer, to be decoded
++ * once decode unblocks.
++ */
++ void skidInsert(unsigned tid);
++
++ /** Returns if all of the skid buffers are empty. */
++ bool skidsEmpty();
++
++ /** Updates overall decode status based on all of the threads' statuses. */
++ void updateStatus();
++
++ /** Separates instructions from fetch into individual lists of instructions
++ * sorted by thread.
++ */
++ void sortInsts();
++
++ /** Reads all stall signals from the backwards communication timebuffer. */
++ void readStallSignals(unsigned tid);
++
++ /** Checks all input signals and updates decode's status appropriately. */
++ bool checkSignalsAndUpdate(unsigned tid);
++
++ /** Checks all stall signals, and returns if any are true. */
++ bool checkStall(unsigned tid) const;
++
++ /** Returns if there any instructions from fetch on this cycle. */
+ inline bool fetchInstsValid();
+
- inline void unblock();
++ /** Switches decode to blocking, and signals back that decode has
++ * become blocked.
++ * @return Returns true if there is a status change.
++ */
++ bool block(unsigned tid);
+
- void squash(DynInstPtr &inst);
++ /** Switches decode to unblocking if the skid buffer is empty, and
++ * signals back that decode has unblocked.
++ * @return Returns true if there is a status change.
++ */
++ bool unblock(unsigned tid);
+
- // Might want to make squash a friend function.
- void squash();
++ /** Squashes if there is a PC-relative branch that was predicted
++ * incorrectly. Sends squash information back to fetch.
++ */
++ void squash(DynInstPtr &inst, unsigned tid);
+
+ public:
- std::queue<FetchStruct> skidBuffer;
++ /** Squashes due to commit signalling a squash. Changes status to
++ * squashing and clears block/unblock signals as needed.
++ */
++ unsigned squash(unsigned tid);
+
+ private:
+ // Interfaces to objects outside of decode.
+ /** CPU interface. */
+ FullCPU *cpu;
+
+ /** Time buffer interface. */
+ TimeBuffer<TimeStruct> *timeBuffer;
+
+ /** Wire to get rename's output from backwards time buffer. */
+ typename TimeBuffer<TimeStruct>::wire fromRename;
+
+ /** Wire to get iew's information from backwards time buffer. */
+ typename TimeBuffer<TimeStruct>::wire fromIEW;
+
+ /** Wire to get commit's information from backwards time buffer. */
+ typename TimeBuffer<TimeStruct>::wire fromCommit;
+
+ /** Wire to write information heading to previous stages. */
+ // Might not be the best name as not only fetch will read it.
+ typename TimeBuffer<TimeStruct>::wire toFetch;
+
+ /** Decode instruction queue. */
+ TimeBuffer<DecodeStruct> *decodeQueue;
+
+ /** Wire used to write any information heading to rename. */
+ typename TimeBuffer<DecodeStruct>::wire toRename;
+
+ /** Fetch instruction queue interface. */
+ TimeBuffer<FetchStruct> *fetchQueue;
+
+ /** Wire to get fetch's output from fetch queue. */
+ typename TimeBuffer<FetchStruct>::wire fromFetch;
+
++ /** Queue of all instructions coming from fetch this cycle. */
++ std::queue<DynInstPtr> insts[Impl::MaxThreads];
++
+ /** Skid buffer between fetch and decode. */
- //Consider making these unsigned to avoid any confusion.
++ std::queue<DynInstPtr> skidBuffer[Impl::MaxThreads];
++
++ /** Variable that tracks if decode has written to the time buffer this
++ * cycle. Used to tell CPU if there is activity this cycle.
++ */
++ bool wroteToTimeBuffer;
++
++ /** Source of possible stalls. */
++ struct Stalls {
++ bool rename;
++ bool iew;
++ bool commit;
++ };
++
++ /** Tracks which stages are telling decode to stall. */
++ Stalls stalls[Impl::MaxThreads];
+
- /** The instruction that decode is currently on. It needs to have
- * persistent state so that when a stall occurs in the middle of a
- * group of instructions, it can restart at the proper instruction.
- */
- unsigned numInst;
+ /** Rename to decode delay, in ticks. */
+ unsigned renameToDecodeDelay;
+
+ /** IEW to decode delay, in ticks. */
+ unsigned iewToDecodeDelay;
+
+ /** Commit to decode delay, in ticks. */
+ unsigned commitToDecodeDelay;
+
+ /** Fetch to decode delay, in ticks. */
+ unsigned fetchToDecodeDelay;
+
+ /** The width of decode, in instructions. */
+ unsigned decodeWidth;
+
- #endif // __CPU_O3_CPU_SIMPLE_DECODE_HH__
++ /** Index of instructions being sent to rename. */
++ unsigned toRenameIndex;
++
++ /** number of Active Threads*/
++ unsigned numThreads;
+
++ /** List of active thread ids */
++ std::list<unsigned> *activeThreads;
++
++ /** Number of branches in flight. */
++ unsigned branchCount[Impl::MaxThreads];
++
++ /** Maximum size of the skid buffer. */
++ unsigned skidBufferMax;
++
++ /** Stat for total number of idle cycles. */
+ Stats::Scalar<> decodeIdleCycles;
++ /** Stat for total number of blocked cycles. */
+ Stats::Scalar<> decodeBlockedCycles;
++ /** Stat for total number of normal running cycles. */
++ Stats::Scalar<> decodeRunCycles;
++ /** Stat for total number of unblocking cycles. */
+ Stats::Scalar<> decodeUnblockCycles;
++ /** Stat for total number of squashing cycles. */
+ Stats::Scalar<> decodeSquashCycles;
++ /** Stat for number of times a branch is resolved at decode. */
++ Stats::Scalar<> decodeBranchResolved;
++ /** Stat for number of times a branch mispredict is detected. */
+ Stats::Scalar<> decodeBranchMispred;
++ /** Stat for number of times decode detected a non-control instruction
++ * incorrectly predicted as a branch.
++ */
+ Stats::Scalar<> decodeControlMispred;
++ /** Stat for total number of decoded instructions. */
+ Stats::Scalar<> decodeDecodedInsts;
++ /** Stat for total number of squashed instructions. */
+ Stats::Scalar<> decodeSquashedInsts;
+};
+
++#endif // __CPU_O3_DECODE_HH__
--- /dev/null
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
+/*
- SimpleDecode<Impl>::SimpleDecode(Params ¶ms)
- : renameToDecodeDelay(params.renameToDecodeDelay),
- iewToDecodeDelay(params.iewToDecodeDelay),
- commitToDecodeDelay(params.commitToDecodeDelay),
- fetchToDecodeDelay(params.fetchToDecodeDelay),
- decodeWidth(params.decodeWidth),
- numInst(0)
++ * Copyright (c) 2004-2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "cpu/o3/decode.hh"
+
++using namespace std;
++
+template<class Impl>
- DPRINTF(Decode, "Decode: decodeWidth=%i.\n", decodeWidth);
- _status = Idle;
++DefaultDecode<Impl>::DefaultDecode(Params *params)
++ : renameToDecodeDelay(params->renameToDecodeDelay),
++ iewToDecodeDelay(params->iewToDecodeDelay),
++ commitToDecodeDelay(params->commitToDecodeDelay),
++ fetchToDecodeDelay(params->fetchToDecodeDelay),
++ decodeWidth(params->decodeWidth),
++ numThreads(params->numberOfThreads)
+{
- SimpleDecode<Impl>::regStats()
++ _status = Inactive;
++
++ for (int i = 0; i < numThreads; ++i) {
++ decodeStatus[i] = Idle;
++
++ stalls[i].rename = false;
++ stalls[i].iew = false;
++ stalls[i].commit = false;
++ }
++
++ // @todo: Make into a parameter
++ skidBufferMax = (fetchToDecodeDelay * params->fetchWidth) + decodeWidth;
++}
++
++template <class Impl>
++std::string
++DefaultDecode<Impl>::name() const
++{
++ return cpu->name() + ".decode";
+}
+
+template <class Impl>
+void
- .name(name() + ".decodeIdleCycles")
++DefaultDecode<Impl>::regStats()
+{
+ decodeIdleCycles
- .name(name() + ".decodeBlockedCycles")
++ .name(name() + ".DECODE:IdleCycles")
+ .desc("Number of cycles decode is idle")
+ .prereq(decodeIdleCycles);
+ decodeBlockedCycles
- .name(name() + ".decodeUnblockCycles")
++ .name(name() + ".DECODE:BlockedCycles")
+ .desc("Number of cycles decode is blocked")
+ .prereq(decodeBlockedCycles);
++ decodeRunCycles
++ .name(name() + ".DECODE:RunCycles")
++ .desc("Number of cycles decode is running")
++ .prereq(decodeRunCycles);
+ decodeUnblockCycles
- .name(name() + ".decodeSquashCycles")
++ .name(name() + ".DECODE:UnblockCycles")
+ .desc("Number of cycles decode is unblocking")
+ .prereq(decodeUnblockCycles);
+ decodeSquashCycles
- .name(name() + ".decodeBranchMispred")
++ .name(name() + ".DECODE:SquashCycles")
+ .desc("Number of cycles decode is squashing")
+ .prereq(decodeSquashCycles);
++ decodeBranchResolved
++ .name(name() + ".DECODE:BranchResolved")
++ .desc("Number of times decode resolved a branch")
++ .prereq(decodeBranchResolved);
+ decodeBranchMispred
- .name(name() + ".decodeControlMispred")
++ .name(name() + ".DECODE:BranchMispred")
+ .desc("Number of times decode detected a branch misprediction")
+ .prereq(decodeBranchMispred);
+ decodeControlMispred
- .name(name() + ".decodeDecodedInsts")
++ .name(name() + ".DECODE:ControlMispred")
+ .desc("Number of times decode detected an instruction incorrectly"
+ " predicted as a control")
+ .prereq(decodeControlMispred);
+ decodeDecodedInsts
- .name(name() + ".decodeSquashedInsts")
++ .name(name() + ".DECODE:DecodedInsts")
+ .desc("Number of instructions handled by decode")
+ .prereq(decodeDecodedInsts);
+ decodeSquashedInsts
- SimpleDecode<Impl>::setCPU(FullCPU *cpu_ptr)
++ .name(name() + ".DECODE:SquashedInsts")
+ .desc("Number of squashed instructions handled by decode")
+ .prereq(decodeSquashedInsts);
+}
+
+template<class Impl>
+void
- DPRINTF(Decode, "Decode: Setting CPU pointer.\n");
++DefaultDecode<Impl>::setCPU(FullCPU *cpu_ptr)
+{
- SimpleDecode<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr)
++ DPRINTF(Decode, "Setting CPU pointer.\n");
+ cpu = cpu_ptr;
+}
+
+template<class Impl>
+void
- DPRINTF(Decode, "Decode: Setting time buffer pointer.\n");
++DefaultDecode<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr)
+{
- SimpleDecode<Impl>::setDecodeQueue(TimeBuffer<DecodeStruct> *dq_ptr)
++ DPRINTF(Decode, "Setting time buffer pointer.\n");
+ timeBuffer = tb_ptr;
+
+ // Setup wire to write information back to fetch.
+ toFetch = timeBuffer->getWire(0);
+
+ // Create wires to get information from proper places in time buffer.
+ fromRename = timeBuffer->getWire(-renameToDecodeDelay);
+ fromIEW = timeBuffer->getWire(-iewToDecodeDelay);
+ fromCommit = timeBuffer->getWire(-commitToDecodeDelay);
+}
+
+template<class Impl>
+void
- DPRINTF(Decode, "Decode: Setting decode queue pointer.\n");
++DefaultDecode<Impl>::setDecodeQueue(TimeBuffer<DecodeStruct> *dq_ptr)
+{
- SimpleDecode<Impl>::setFetchQueue(TimeBuffer<FetchStruct> *fq_ptr)
++ DPRINTF(Decode, "Setting decode queue pointer.\n");
+ decodeQueue = dq_ptr;
+
+ // Setup wire to write information to proper place in decode queue.
+ toRename = decodeQueue->getWire(0);
+}
+
+template<class Impl>
+void
- DPRINTF(Decode, "Decode: Setting fetch queue pointer.\n");
++DefaultDecode<Impl>::setFetchQueue(TimeBuffer<FetchStruct> *fq_ptr)
+{
- SimpleDecode<Impl>::fetchInstsValid()
++ DPRINTF(Decode, "Setting fetch queue pointer.\n");
+ fetchQueue = fq_ptr;
+
+ // Setup wire to read information from fetch queue.
+ fromFetch = fetchQueue->getWire(-fetchToDecodeDelay);
+}
+
++template<class Impl>
++void
++DefaultDecode<Impl>::setActiveThreads(list<unsigned> *at_ptr)
++{
++ DPRINTF(Decode, "Setting active threads list pointer.\n");
++ activeThreads = at_ptr;
++}
++
++template <class Impl>
++void
++DefaultDecode<Impl>::switchOut()
++{
++ cpu->signalSwitched();
++}
++
++template <class Impl>
++void
++DefaultDecode<Impl>::takeOverFrom()
++{
++ _status = Inactive;
++
++ for (int i = 0; i < numThreads; ++i) {
++ decodeStatus[i] = Idle;
++
++ stalls[i].rename = false;
++ stalls[i].iew = false;
++ stalls[i].commit = false;
++ while (!insts[i].empty())
++ insts[i].pop();
++ while (!skidBuffer[i].empty())
++ skidBuffer[i].pop();
++ branchCount[i] = 0;
++ }
++ wroteToTimeBuffer = false;
++}
++
++template<class Impl>
++bool
++DefaultDecode<Impl>::checkStall(unsigned tid) const
++{
++ bool ret_val = false;
++
++ if (stalls[tid].rename) {
++ DPRINTF(Decode,"[tid:%i]: Stall fom Rename stage detected.\n", tid);
++ ret_val = true;
++ } else if (stalls[tid].iew) {
++ DPRINTF(Decode,"[tid:%i]: Stall fom IEW stage detected.\n", tid);
++ ret_val = true;
++ } else if (stalls[tid].commit) {
++ DPRINTF(Decode,"[tid:%i]: Stall fom Commit stage detected.\n", tid);
++ ret_val = true;
++ }
++
++ return ret_val;
++}
++
+template<class Impl>
+inline bool
- void
- SimpleDecode<Impl>::block()
++DefaultDecode<Impl>::fetchInstsValid()
+{
+ return fromFetch->size > 0;
+}
+
+template<class Impl>
- DPRINTF(Decode, "Decode: Blocking.\n");
-
- // Set the status to Blocked.
- _status = Blocked;
++bool
++DefaultDecode<Impl>::block(unsigned tid)
+{
- skidBuffer.push(*fromFetch);
++ DPRINTF(Decode, "[tid:%u]: Blocking.\n", tid);
++
++ // If the decode status is blocked or unblocking then decode has not yet
++ // signalled fetch to unblock. In that case, there is no need to tell
++ // fetch to block.
++ if (decodeStatus[tid] != Blocked &&
++ decodeStatus[tid] != Unblocking) {
++ toFetch->decodeBlock[tid] = true;
++ wroteToTimeBuffer = true;
++ }
+
+ // Add the current inputs to the skid buffer so they can be
+ // reprocessed when this stage unblocks.
- // Note that this stage only signals previous stages to stall when
- // it is the cause of the stall originates at this stage. Otherwise
- // the previous stages are expected to check all possible stall signals.
++ skidInsert(tid);
++
++ if (decodeStatus[tid] != Blocked) {
++ // Set the status to Blocked.
++ decodeStatus[tid] = Blocked;
++ return true;
++ }
+
- inline void
- SimpleDecode<Impl>::unblock()
++ return false;
+}
+
+template<class Impl>
- DPRINTF(Decode, "Decode: Unblocking, going to remove "
- "instructions from skid buffer.\n");
- // Remove the now processed instructions from the skid buffer.
- skidBuffer.pop();
-
- // If there's still information in the skid buffer, then
- // continue to tell previous stages to stall. They will be
- // able to restart once the skid buffer is empty.
- if (!skidBuffer.empty()) {
- toFetch->decodeInfo.stall = true;
- } else {
- DPRINTF(Decode, "Decode: Finished unblocking.\n");
- _status = Running;
++bool
++DefaultDecode<Impl>::unblock(unsigned tid)
+{
- // This squash is specifically for when Decode detects a PC-relative branch
- // was predicted incorrectly.
++ // Decode is done unblocking only if the skid buffer is empty.
++ if (skidBuffer[tid].empty()) {
++ DPRINTF(Decode, "[tid:%u]: Done unblocking.\n", tid);
++ toFetch->decodeUnblock[tid] = true;
++ wroteToTimeBuffer = true;
++
++ decodeStatus[tid] = Running;
++ return true;
+ }
++
++ DPRINTF(Decode, "[tid:%u]: Currently unblocking.\n", tid);
++
++ return false;
+}
+
- SimpleDecode<Impl>::squash(DynInstPtr &inst)
+template<class Impl>
+void
- DPRINTF(Decode, "Decode: Squashing due to incorrect branch prediction "
- "detected at decode.\n");
- Addr new_PC = inst->readNextPC();
-
- toFetch->decodeInfo.branchMispredict = true;
- toFetch->decodeInfo.doneSeqNum = inst->seqNum;
- toFetch->decodeInfo.predIncorrect = true;
- toFetch->decodeInfo.squash = true;
- toFetch->decodeInfo.nextPC = new_PC;
- toFetch->decodeInfo.branchTaken = true;
++DefaultDecode<Impl>::squash(DynInstPtr &inst, unsigned tid)
+{
- _status = Squashing;
++ DPRINTF(Decode, "[tid:%i]: Squashing due to incorrect branch prediction "
++ "detected at decode.\n", tid);
++
++ toFetch->decodeInfo[tid].branchMispredict = true;
++ toFetch->decodeInfo[tid].doneSeqNum = inst->seqNum;
++ toFetch->decodeInfo[tid].predIncorrect = true;
++ toFetch->decodeInfo[tid].squash = true;
++ toFetch->decodeInfo[tid].nextPC = inst->readNextPC();
++ toFetch->decodeInfo[tid].branchTaken = true;
++
++ if (decodeStatus[tid] == Blocked ||
++ decodeStatus[tid] == Unblocking) {
++ toFetch->decodeUnblock[tid] = 1;
++ }
+
+ // Set status to squashing.
- while (!skidBuffer.empty()) {
- skidBuffer.pop();
++ decodeStatus[tid] = Squashing;
++
++ for (int i=0; i<fromFetch->size; i++) {
++ if (fromFetch->insts[i]->threadNumber == tid &&
++ fromFetch->insts[i]->seqNum > inst->seqNum) {
++ fromFetch->insts[i]->squashed = true;
++ }
++ }
++
++ while (!insts[tid].empty()) {
++ insts[tid].pop();
++ }
+
+ // Clear the skid buffer in case it has any data in it.
- // Slightly unrealistic!
- cpu->removeInstsUntil(inst->seqNum);
++ while (!skidBuffer[tid].empty()) {
++ skidBuffer[tid].pop();
+ }
+
+ // Squash instructions up until this one
- void
- SimpleDecode<Impl>::squash()
++ cpu->removeInstsUntil(inst->seqNum, tid);
+}
+
+template<class Impl>
- DPRINTF(Decode, "Decode: Squashing.\n");
++unsigned
++DefaultDecode<Impl>::squash(unsigned tid)
+{
- _status = Squashing;
++ DPRINTF(Decode, "[tid:%i]: Squashing.\n",tid);
++
++ if (decodeStatus[tid] == Blocked ||
++ decodeStatus[tid] == Unblocking) {
++#if !FULL_SYSTEM
++ // In syscall emulation, we can have both a block and a squash due
++ // to a syscall in the same cycle. This would cause both signals to
++ // be high. This shouldn't happen in full system.
++ // @todo: Determine if this still happens.
++ if (toFetch->decodeBlock[tid]) {
++ toFetch->decodeBlock[tid] = 0;
++ } else {
++ toFetch->decodeUnblock[tid] = 1;
++ }
++#else
++ toFetch->decodeUnblock[tid] = 1;
++#endif
++ }
++
+ // Set status to squashing.
- // Maybe advance the time buffer? Not sure what to do in the normal
- // case.
++ decodeStatus[tid] = Squashing;
+
- while (!skidBuffer.empty())
- {
- skidBuffer.pop();
++ // Go through incoming instructions from fetch and squash them.
++ unsigned squash_count = 0;
++
++ for (int i=0; i<fromFetch->size; i++) {
++ if (fromFetch->insts[i]->threadNumber == tid) {
++ fromFetch->insts[i]->squashed = true;
++ squash_count++;
++ }
++ }
++
++ while (!insts[tid].empty()) {
++ insts[tid].pop();
++ }
+
+ // Clear the skid buffer in case it has any data in it.
- SimpleDecode<Impl>::tick()
++ while (!skidBuffer[tid].empty()) {
++ skidBuffer[tid].pop();
+ }
++
++ return squash_count;
+}
+
+template<class Impl>
+void
- // Decode should try to execute as many instructions as its bandwidth
- // will allow, as long as it is not currently blocked.
- if (_status != Blocked && _status != Squashing) {
- DPRINTF(Decode, "Decode: Not blocked, so attempting to run "
- "stage.\n");
- // Make sure that the skid buffer has something in it if the
- // status is unblocking.
- assert(_status == Unblocking ? !skidBuffer.empty() : 1);
++DefaultDecode<Impl>::skidInsert(unsigned tid)
+{
- decode();
++ DynInstPtr inst = NULL;
+
- // If the status was unblocking, then instructions from the skid
- // buffer were used. Remove those instructions and handle
- // the rest of unblocking.
- if (_status == Unblocking) {
- ++decodeUnblockCycles;
++ while (!insts[tid].empty()) {
++ inst = insts[tid].front();
+
- if (fetchInstsValid()) {
- // Add the current inputs to the skid buffer so they can be
- // reprocessed when this stage unblocks.
- skidBuffer.push(*fromFetch);
- }
++ insts[tid].pop();
+
- unblock();
- }
- } else if (_status == Blocked) {
- ++decodeBlockedCycles;
++ assert(tid == inst->threadNumber);
+
- if (fetchInstsValid()) {
- block();
- }
++ DPRINTF(Decode,"Inserting [sn:%lli] PC:%#x into decode skidBuffer %i\n",
++ inst->seqNum, inst->readPC(), inst->threadNumber);
+
- if (!fromRename->renameInfo.stall &&
- !fromIEW->iewInfo.stall &&
- !fromCommit->commitInfo.stall) {
- DPRINTF(Decode, "Decode: Stall signals cleared, going to "
- "unblock.\n");
- _status = Unblocking;
++ skidBuffer[tid].push(inst);
++ }
+
- // Continue to tell previous stage to block until this
- // stage is done unblocking.
- toFetch->decodeInfo.stall = true;
- } else {
- DPRINTF(Decode, "Decode: Still blocked.\n");
- toFetch->decodeInfo.stall = true;
++ // @todo: Eventually need to enforce this by not letting a thread
++ // fetch past its skidbuffer
++ assert(skidBuffer[tid].size() <= skidBufferMax);
++}
+
- if (fromCommit->commitInfo.squash ||
- fromCommit->commitInfo.robSquashing) {
- squash();
++template<class Impl>
++bool
++DefaultDecode<Impl>::skidsEmpty()
++{
++ list<unsigned>::iterator threads = (*activeThreads).begin();
++
++ while (threads != (*activeThreads).end()) {
++ if (!skidBuffer[*threads++].empty())
++ return false;
++ }
++
++ return true;
++}
++
++template<class Impl>
++void
++DefaultDecode<Impl>::updateStatus()
++{
++ bool any_unblocking = false;
++
++ list<unsigned>::iterator threads = (*activeThreads).begin();
++
++ threads = (*activeThreads).begin();
++
++ while (threads != (*activeThreads).end()) {
++ unsigned tid = *threads++;
++
++ if (decodeStatus[tid] == Unblocking) {
++ any_unblocking = true;
++ break;
+ }
++ }
++
++ // Decode will have activity if it's unblocking.
++ if (any_unblocking) {
++ if (_status == Inactive) {
++ _status = Active;
++
++ DPRINTF(Activity, "Activating stage.\n");
+
- } else if (_status == Squashing) {
- if (!fromCommit->commitInfo.squash &&
- !fromCommit->commitInfo.robSquashing) {
- _status = Running;
- } else if (fromCommit->commitInfo.squash) {
- ++decodeSquashCycles;
-
- squash();
++ cpu->activateStage(FullCPU::DecodeIdx);
+ }
- SimpleDecode<Impl>::decode()
++ } else {
++ // If it's not unblocking, then decode will not have any internal
++ // activity. Switch it to inactive.
++ if (_status == Active) {
++ _status = Inactive;
++ DPRINTF(Activity, "Deactivating stage.\n");
++
++ cpu->deactivateStage(FullCPU::DecodeIdx);
+ }
+ }
+}
+
++template <class Impl>
++void
++DefaultDecode<Impl>::sortInsts()
++{
++ int insts_from_fetch = fromFetch->size;
++#ifdef DEBUG
++ for (int i=0; i < numThreads; i++)
++ assert(insts[i].empty());
++#endif
++ for (int i = 0; i < insts_from_fetch; ++i) {
++ insts[fromFetch->insts[i]->threadNumber].push(fromFetch->insts[i]);
++ }
++}
++
+template<class Impl>
+void
- // Check time buffer if being told to squash.
- if (fromCommit->commitInfo.squash) {
- squash();
- return;
++DefaultDecode<Impl>::readStallSignals(unsigned tid)
+{
- // Check time buffer if being told to stall.
- if (fromRename->renameInfo.stall ||
- fromIEW->iewInfo.stall ||
- fromCommit->commitInfo.stall) {
- block();
- return;
++ if (fromRename->renameBlock[tid]) {
++ stalls[tid].rename = true;
+ }
+
- // Check fetch queue to see if instructions are available.
- // If no available instructions, do nothing, unless this stage is
- // currently unblocking.
- if (!fetchInstsValid() && _status != Unblocking) {
- DPRINTF(Decode, "Decode: Nothing to do, breaking out early.\n");
++ if (fromRename->renameUnblock[tid]) {
++ assert(stalls[tid].rename);
++ stalls[tid].rename = false;
++ }
++
++ if (fromIEW->iewBlock[tid]) {
++ stalls[tid].iew = true;
++ }
++
++ if (fromIEW->iewUnblock[tid]) {
++ assert(stalls[tid].iew);
++ stalls[tid].iew = false;
++ }
++
++ if (fromCommit->commitBlock[tid]) {
++ stalls[tid].commit = true;
++ }
++
++ if (fromCommit->commitUnblock[tid]) {
++ assert(stalls[tid].commit);
++ stalls[tid].commit = false;
++ }
++}
++
++template <class Impl>
++bool
++DefaultDecode<Impl>::checkSignalsAndUpdate(unsigned tid)
++{
++ // Check if there's a squash signal, squash if there is.
++ // Check stall signals, block if necessary.
++ // If status was blocked
++ // Check if stall conditions have passed
++ // if so then go to unblocking
++ // If status was Squashing
++ // check if squashing is not high. Switch to running this cycle.
++
++ // Update the per thread stall statuses.
++ readStallSignals(tid);
++
++ // Check squash signals from commit.
++ if (fromCommit->commitInfo[tid].squash) {
++
++ DPRINTF(Decode, "[tid:%u]: Squashing instructions due to squash "
++ "from commit.\n", tid);
++
++ squash(tid);
++
++ return true;
++ }
++
++ // Check ROB squash signals from commit.
++ if (fromCommit->commitInfo[tid].robSquashing) {
++ DPRINTF(Decode, "[tid:%]: ROB is still squashing.\n",tid);
++
++ // Continue to squash.
++ decodeStatus[tid] = Squashing;
++
++ return true;
++ }
++
++ if (checkStall(tid)) {
++ return block(tid);
+ }
+
- // Might be better to use a base DynInst * instead?
++ if (decodeStatus[tid] == Blocked) {
++ DPRINTF(Decode, "[tid:%u]: Done blocking, switching to unblocking.\n",
++ tid);
++
++ decodeStatus[tid] = Unblocking;
++
++ unblock(tid);
++
++ return true;
++ }
++
++ if (decodeStatus[tid] == Squashing) {
++ // Switch status to running if decode isn't being told to block or
++ // squash this cycle.
++ DPRINTF(Decode, "[tid:%u]: Done squashing, switching to running.\n",
++ tid);
++
++ decodeStatus[tid] = Running;
++
++ return false;
++ }
++
++ // If we've reached this point, we have not gotten any signals that
++ // cause decode to change its status. Decode remains the same as before.
++ return false;
++}
++
++template<class Impl>
++void
++DefaultDecode<Impl>::tick()
++{
++ wroteToTimeBuffer = false;
++
++ bool status_change = false;
++
++ toRenameIndex = 0;
++
++ list<unsigned>::iterator threads = (*activeThreads).begin();
++
++ sortInsts();
++
++ //Check stall and squash signals.
++ while (threads != (*activeThreads).end()) {
++ unsigned tid = *threads++;
++
++ DPRINTF(Decode,"Processing [tid:%i]\n",tid);
++ status_change = checkSignalsAndUpdate(tid) || status_change;
++
++ decode(status_change, tid);
++ }
++
++ if (status_change) {
++ updateStatus();
++ }
++
++ if (wroteToTimeBuffer) {
++ DPRINTF(Activity, "Activity this cycle.\n");
++
++ cpu->activityThisCycle();
++ }
++}
++
++template<class Impl>
++void
++DefaultDecode<Impl>::decode(bool &status_change, unsigned tid)
++{
++ // If status is Running or idle,
++ // call decodeInsts()
++ // If status is Unblocking,
++ // buffer any instructions coming from fetch
++ // continue trying to empty skid buffer
++ // check if stall conditions have passed
++
++ if (decodeStatus[tid] == Blocked) {
++ ++decodeBlockedCycles;
++ } else if (decodeStatus[tid] == Squashing) {
++ ++decodeSquashCycles;
++ }
++
++ // Decode should try to decode as many instructions as its bandwidth
++ // will allow, as long as it is not currently blocked.
++ if (decodeStatus[tid] == Running ||
++ decodeStatus[tid] == Idle) {
++ DPRINTF(Decode, "[tid:%u] Not blocked, so attempting to run "
++ "stage.\n",tid);
++
++ decodeInsts(tid);
++ } else if (decodeStatus[tid] == Unblocking) {
++ // Make sure that the skid buffer has something in it if the
++ // status is unblocking.
++ assert(!skidsEmpty());
++
++ // If the status was unblocking, then instructions from the skid
++ // buffer were used. Remove those instructions and handle
++ // the rest of unblocking.
++ decodeInsts(tid);
++
++ if (fetchInstsValid()) {
++ // Add the current inputs to the skid buffer so they can be
++ // reprocessed when this stage unblocks.
++ skidInsert(tid);
++ }
++
++ status_change = unblock(tid) || status_change;
++ }
++}
++
++template <class Impl>
++void
++DefaultDecode<Impl>::decodeInsts(unsigned tid)
++{
++ // Instructions can come either from the skid buffer or the list of
++ // instructions coming from fetch, depending on decode's status.
++ int insts_available = decodeStatus[tid] == Unblocking ?
++ skidBuffer[tid].size() : insts[tid].size();
++
++ if (insts_available == 0) {
++ DPRINTF(Decode, "[tid:%u] Nothing to do, breaking out"
++ " early.\n",tid);
+ // Should I change the status to idle?
+ ++decodeIdleCycles;
+ return;
++ } else if (decodeStatus[tid] == Unblocking) {
++ DPRINTF(Decode, "[tid:%u] Unblocking, removing insts from skid "
++ "buffer.\n",tid);
++ ++decodeUnblockCycles;
++ } else if (decodeStatus[tid] == Running) {
++ ++decodeRunCycles;
+ }
+
- unsigned to_rename_index = 0;
+ DynInstPtr inst;
+
- int insts_available = _status == Unblocking ?
- skidBuffer.front().size - numInst :
- fromFetch->size;
++ std::queue<DynInstPtr>
++ &insts_to_decode = decodeStatus[tid] == Unblocking ?
++ skidBuffer[tid] : insts[tid];
+
- // Debug block...
- #if 0
- if (insts_available) {
- DPRINTF(Decode, "Decode: Instructions available.\n");
- } else {
- if (_status == Unblocking && skidBuffer.empty()) {
- DPRINTF(Decode, "Decode: No instructions available, skid buffer "
- "empty.\n");
- } else if (_status != Unblocking &&
- !fromFetch->insts[0]) {
- DPRINTF(Decode, "Decode: No instructions available, fetch queue "
- "empty.\n");
- } else {
- panic("Decode: No instructions available, unexpected condition!"
- "\n");
- }
- }
- #endif
++ DPRINTF(Decode, "[tid:%u]: Sending instruction to rename.\n",tid);
+
- while (insts_available > 0)
- {
- DPRINTF(Decode, "Decode: Sending instruction to rename.\n");
++ while (insts_available > 0 && toRenameIndex < decodeWidth) {
++ assert(!insts_to_decode.empty());
+
- inst = _status == Unblocking ? skidBuffer.front().insts[numInst] :
- fromFetch->insts[numInst];
++ inst = insts_to_decode.front();
+
- DPRINTF(Decode, "Decode: Processing instruction %i with PC %#x\n",
- inst->seqNum, inst->readPC());
++ insts_to_decode.pop();
+
- DPRINTF(Decode, "Decode: Instruction %i with PC %#x is "
++ DPRINTF(Decode, "[tid:%u]: Processing instruction [sn:%lli] with "
++ "PC %#x\n",
++ tid, inst->seqNum, inst->readPC());
+
+ if (inst->isSquashed()) {
- inst->seqNum, inst->readPC());
++ DPRINTF(Decode, "[tid:%u]: Instruction %i with PC %#x is "
+ "squashed, skipping.\n",
- ++numInst;
++ tid, inst->seqNum, inst->readPC());
+
+ ++decodeSquashedInsts;
+
-
+ --insts_available;
+
+ continue;
+ }
+
- // Isn't this handled by the inst queue?
+ // Also check if instructions have no source registers. Mark
+ // them as ready to issue at any time. Not sure if this check
+ // should exist here or at a later stage; however it doesn't matter
+ // too much for function correctness.
- toRename->insts[to_rename_index] = inst;
+ if (inst->numSrcRegs() == 0) {
+ inst->setCanIssue();
+ }
+
+ // This current instruction is valid, so add it into the decode
+ // queue. The next instruction may not be valid, so check to
+ // see if branches were predicted correctly.
- squash(inst);
++ toRename->insts[toRenameIndex] = inst;
+
+ ++(toRename->size);
++ ++toRenameIndex;
++ ++decodeDecodedInsts;
++ --insts_available;
+
+ // Ensure that if it was predicted as a branch, it really is a
+ // branch.
+ if (inst->predTaken() && !inst->isControl()) {
+ panic("Instruction predicted as a branch!");
+
+ ++decodeControlMispred;
++
+ // Might want to set some sort of boolean and just do
+ // a check at the end
-
++ squash(inst, inst->threadNumber);
++
+ break;
+ }
+
+ // Go ahead and compute any PC-relative branches.
-
+ if (inst->isDirectCtrl() && inst->isUncondCtrl()) {
- squash(inst);
++ ++decodeBranchResolved;
+ inst->setNextPC(inst->branchTarget());
+
+ if (inst->mispredicted()) {
+ ++decodeBranchMispred;
++
+ // Might want to set some sort of boolean and just do
+ // a check at the end
- // Normally can check if a direct branch has the right target
- // addr (either the immediate, or the branch PC + 4) and redirect
- // fetch if it's incorrect.
-
- // Increment which instruction we're looking at.
- ++numInst;
- ++to_rename_index;
- ++decodeDecodedInsts;
-
- --insts_available;
++ squash(inst, inst->threadNumber);
++
+ break;
+ }
+ }
++ }
+
- numInst = 0;
++ // If we didn't process all instructions, then we will need to block
++ // and put all those instructions into the skid buffer.
++ if (!insts_to_decode.empty()) {
++ block(tid);
+ }
+
++ // Record that decode has written to the time buffer for activity
++ // tracking.
++ if (toRenameIndex) {
++ wroteToTimeBuffer = true;
++ }
+}
--- /dev/null
- template class SimpleFetch<AlphaSimpleImpl>;
+/*
+ * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "cpu/o3/alpha_dyn_inst.hh"
+#include "cpu/o3/alpha_impl.hh"
+#include "cpu/o3/fetch_impl.hh"
+
++template class DefaultFetch<AlphaSimpleImpl>;
--- /dev/null
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
+/*
- // Todo: SMT fetch,
- // Add a way to get a stage's current status.
-
- #ifndef __CPU_O3_CPU_SIMPLE_FETCH_HH__
- #define __CPU_O3_CPU_SIMPLE_FETCH_HH__
++ * Copyright (c) 2004-2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
- * SimpleFetch class to fetch a single instruction each cycle. SimpleFetch
- * will stall if there's an Icache miss, but otherwise assumes a one cycle
- * Icache hit.
++#ifndef __CPU_O3_FETCH_HH__
++#define __CPU_O3_FETCH_HH__
+
+#include "base/statistics.hh"
+#include "base/timebuf.hh"
+#include "cpu/pc_event.hh"
+#include "mem/mem_interface.hh"
+#include "sim/eventq.hh"
+
++class Sampler;
++
+/**
-
++ * DefaultFetch class handles both single threaded and SMT fetch. Its
++ * width is specified by the parameters; each cycle it tries to fetch
++ * that many instructions. It supports using a branch predictor to
++ * predict direction and targets.
++ * It supports the idling functionalitiy of the CPU by indicating to
++ * the CPU when it is active and inactive.
+ */
- class SimpleFetch
+template <class Impl>
- enum Status {
++class DefaultFetch
+{
+ public:
+ /** Typedefs from Impl. */
+ typedef typename Impl::CPUPol CPUPol;
+ typedef typename Impl::DynInst DynInst;
+ typedef typename Impl::DynInstPtr DynInstPtr;
+ typedef typename Impl::FullCPU FullCPU;
+ typedef typename Impl::Params Params;
+
++ /** Typedefs from the CPU policy. */
+ typedef typename CPUPol::BPredUnit BPredUnit;
+ typedef typename CPUPol::FetchStruct FetchStruct;
+ typedef typename CPUPol::TimeStruct TimeStruct;
+
+ /** Typedefs from ISA. */
+ typedef TheISA::MachInst MachInst;
++ typedef TheISA::ExtMachInst ExtMachInst;
+
+ public:
- // May eventually need statuses on a per thread basis.
- Status _status;
++ /** Overall fetch status. Used to determine if the CPU can
++ * deschedule itsef due to a lack of activity.
++ */
++ enum FetchStatus {
++ Active,
++ Inactive
++ };
++
++ /** Individual thread status. */
++ enum ThreadStatus {
+ Running,
+ Idle,
+ Squashing,
+ Blocked,
++ Fetching,
++ TrapPending,
++ QuiescePending,
++ SwitchOut,
+ IcacheMissStall,
+ IcacheMissComplete
+ };
+
- bool stalled;
++ /** Fetching Policy, Add new policies here.*/
++ enum FetchPriority {
++ SingleThread,
++ RoundRobin,
++ Branch,
++ IQ,
++ LSQ
++ };
++
++ private:
++ /** Fetch status. */
++ FetchStatus _status;
++
++ /** Per-thread status. */
++ ThreadStatus fetchStatus[Impl::MaxThreads];
++
++ /** Fetch policy. */
++ FetchPriority fetchPolicy;
+
- SimpleFetch *fetch;
++ /** List that has the threads organized by priority. */
++ std::list<unsigned> priorityList;
+
+ public:
+ class CacheCompletionEvent : public Event
+ {
+ private:
- CacheCompletionEvent(SimpleFetch *_fetch);
++ MemReqPtr req;
++ /** Pointer to fetch. */
++ DefaultFetch *fetch;
++ /** Thread id. */
++// unsigned threadId;
+
+ public:
- /** SimpleFetch constructor. */
- SimpleFetch(Params ¶ms);
++ /** Constructs a cache completion event, which tells fetch when the
++ * cache miss is complete.
++ */
++ CacheCompletionEvent(MemReqPtr &_req, DefaultFetch *_fetch);
+
++ /** Processes cache completion event. */
+ virtual void process();
++ /** Returns the description of the cache completion event. */
+ virtual const char *description();
+ };
+
+ public:
- void processCacheCompletion();
++ /** DefaultFetch constructor. */
++ DefaultFetch(Params *params);
+
++ /** Returns the name of fetch. */
++ std::string name() const;
++
++ /** Registers statistics. */
+ void regStats();
+
++ /** Sets CPU pointer. */
+ void setCPU(FullCPU *cpu_ptr);
+
++ /** Sets the main backwards communication time buffer pointer. */
+ void setTimeBuffer(TimeBuffer<TimeStruct> *time_buffer);
+
++ /** Sets pointer to list of active threads. */
++ void setActiveThreads(std::list<unsigned> *at_ptr);
++
++ /** Sets pointer to time buffer used to communicate to the next stage. */
+ void setFetchQueue(TimeBuffer<FetchStruct> *fq_ptr);
+
- Fault fetchCacheLine(Addr fetch_PC);
++ /** Sets pointer to page table. */
++// void setPageTable(PageTable *pt_ptr);
++
++ /** Initialize stage. */
++ void initStage();
++
++ /** Processes cache completion event. */
++ void processCacheCompletion(MemReqPtr &req);
++
++ void switchOut();
++
++ void doSwitchOut();
++
++ void takeOverFrom();
++
++ bool isSwitchedOut() { return switchedOut; }
++
++ void wakeFromQuiesce();
+
+ private:
++ /** Changes the status of this stage to active, and indicates this
++ * to the CPU.
++ */
++ inline void switchToActive();
++
++ /** Changes the status of this stage to inactive, and indicates
++ * this to the CPU.
++ */
++ inline void switchToInactive();
++
+ /**
+ * Looks up in the branch predictor to see if the next PC should be
+ * either next PC+=MachInst or a branch target.
+ * @param next_PC Next PC variable passed in by reference. It is
+ * expected to be set to the current PC; it will be updated with what
+ * the next PC will be.
+ * @return Whether or not a branch was predicted as taken.
+ */
+ bool lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC);
+
+ /**
+ * Fetches the cache line that contains fetch_PC. Returns any
+ * fault that happened. Puts the data into the class variable
+ * cacheData.
+ * @param fetch_PC The PC address that is being fetched from.
++ * @param ret_fault The fault reference that will be set to the result of
++ * the icache access.
++ * @param tid Thread id.
+ * @return Any fault that occured.
+ */
- inline void doSquash(const Addr &new_PC);
++ bool fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid);
+
- void squashFromDecode(const Addr &new_PC, const InstSeqNum &seq_num);
++ /** Squashes a specific thread and resets the PC. */
++ inline void doSquash(const Addr &new_PC, unsigned tid);
+
- // Figure out PC vs next PC and how it should be updated
- void squash(const Addr &new_PC);
++ /** Squashes a specific thread and resets the PC. Also tells the CPU to
++ * remove any instructions between fetch and decode that should be sqaushed.
++ */
++ void squashFromDecode(const Addr &new_PC, const InstSeqNum &seq_num,
++ unsigned tid);
++
++ /** Checks if a thread is stalled. */
++ bool checkStall(unsigned tid) const;
++
++ /** Updates overall fetch stage status; to be called at the end of each
++ * cycle. */
++ FetchStatus updateFetchStatus();
+
+ public:
- void fetch();
++ /** Squashes a specific thread and resets the PC. Also tells the CPU to
++ * remove any instructions that are not in the ROB. The source of this
++ * squash should be the commit stage.
++ */
++ void squash(const Addr &new_PC, unsigned tid);
+
++ /** Ticks the fetch stage, processing all inputs signals and fetching
++ * as many instructions as possible.
++ */
+ void tick();
+
- // Align an address (typically a PC) to the start of an I-cache block.
- // We fold in the PISA 64- to 32-bit conversion here as well.
++ /** Checks all input signals and updates the status as necessary.
++ * @return: Returns if the status has changed due to input signals.
++ */
++ bool checkSignalsAndUpdate(unsigned tid);
++
++ /** Does the actual fetching of instructions and passing them on to the
++ * next stage.
++ * @param status_change fetch() sets this variable if there was a status
++ * change (ie switching to IcacheMissStall).
++ */
++ void fetch(bool &status_change);
+
- MemReqPtr memReq;
++ /** Align a PC to the start of an I-cache block. */
+ Addr icacheBlockAlignPC(Addr addr)
+ {
+ addr = TheISA::realPCToFetchPC(addr);
+ return (addr & ~(cacheBlkMask));
+ }
+
++ private:
++ /** Returns the appropriate thread to fetch, given the fetch policy. */
++ int getFetchingThread(FetchPriority &fetch_priority);
++
++ /** Returns the appropriate thread to fetch using a round robin policy. */
++ int roundRobin();
++
++ /** Returns the appropriate thread to fetch using the IQ count policy. */
++ int iqCount();
++
++ /** Returns the appropriate thread to fetch using the LSQ count policy. */
++ int lsqCount();
++
++ /** Returns the appropriate thread to fetch using the branch count policy. */
++ int branchCount();
++
+ private:
+ /** Pointer to the FullCPU. */
+ FullCPU *cpu;
+
+ /** Time buffer interface. */
+ TimeBuffer<TimeStruct> *timeBuffer;
+
+ /** Wire to get decode's information from backwards time buffer. */
+ typename TimeBuffer<TimeStruct>::wire fromDecode;
+
+ /** Wire to get rename's information from backwards time buffer. */
+ typename TimeBuffer<TimeStruct>::wire fromRename;
+
+ /** Wire to get iew's information from backwards time buffer. */
+ typename TimeBuffer<TimeStruct>::wire fromIEW;
+
+ /** Wire to get commit's information from backwards time buffer. */
+ typename TimeBuffer<TimeStruct>::wire fromCommit;
+
+ /** Internal fetch instruction queue. */
+ TimeBuffer<FetchStruct> *fetchQueue;
+
+ //Might be annoying how this name is different than the queue.
+ /** Wire used to write any information heading to decode. */
+ typename TimeBuffer<FetchStruct>::wire toDecode;
+
+ /** Icache interface. */
+ MemInterface *icacheInterface;
+
+ /** BPredUnit. */
+ BPredUnit branchPred;
+
++ Addr PC[Impl::MaxThreads];
++
++ Addr nextPC[Impl::MaxThreads];
++
+ /** Memory request used to access cache. */
- uint8_t *cacheData;
++ MemReqPtr memReq[Impl::MaxThreads];
++
++ /** Variable that tracks if fetch has written to the time buffer this
++ * cycle. Used to tell CPU if there is activity this cycle.
++ */
++ bool wroteToTimeBuffer;
++
++ /** Tracks how many instructions has been fetched this cycle. */
++ int numInst;
++
++ /** Source of possible stalls. */
++ struct Stalls {
++ bool decode;
++ bool rename;
++ bool iew;
++ bool commit;
++ };
++
++ /** Tracks which stages are telling fetch to stall. */
++ Stalls stalls[Impl::MaxThreads];
+
+ /** Decode to fetch delay, in ticks. */
+ unsigned decodeToFetchDelay;
+
+ /** Rename to fetch delay, in ticks. */
+ unsigned renameToFetchDelay;
+
+ /** IEW to fetch delay, in ticks. */
+ unsigned iewToFetchDelay;
+
+ /** Commit to fetch delay, in ticks. */
+ unsigned commitToFetchDelay;
+
+ /** The width of fetch in instructions. */
+ unsigned fetchWidth;
+
+ /** Cache block size. */
+ int cacheBlkSize;
+
+ /** Mask to get a cache block's address. */
+ Addr cacheBlkMask;
+
+ /** The cache line being fetched. */
- Counter lastIcacheStall;
++ uint8_t *cacheData[Impl::MaxThreads];
+
+ /** Size of instructions. */
+ int instSize;
+
+ /** Icache stall statistics. */
- Stats::Distribution<> fetch_nisn_dist;
++ Counter lastIcacheStall[Impl::MaxThreads];
++
++ /** List of Active Threads */
++ std::list<unsigned> *activeThreads;
++
++ /** Number of threads. */
++ unsigned numThreads;
++
++ /** Number of threads that are actively fetching. */
++ unsigned numFetchingThreads;
+
++ /** Thread ID being fetched. */
++ int threadFetched;
++
++ bool interruptPending;
++
++ bool switchedOut;
++
++#if !FULL_SYSTEM
++ /** Page table pointer. */
++// PageTable *pTable;
++#endif
++
++ // @todo: Consider making these vectors and tracking on a per thread basis.
++ /** Stat for total number of cycles stalled due to an icache miss. */
+ Stats::Scalar<> icacheStallCycles;
++ /** Stat for total number of fetched instructions. */
+ Stats::Scalar<> fetchedInsts;
++ Stats::Scalar<> fetchedBranches;
++ /** Stat for total number of predicted branches. */
+ Stats::Scalar<> predictedBranches;
++ /** Stat for total number of cycles spent fetching. */
+ Stats::Scalar<> fetchCycles;
++ /** Stat for total number of cycles spent squashing. */
+ Stats::Scalar<> fetchSquashCycles;
++ /** Stat for total number of cycles spent blocked due to other stages in
++ * the pipeline.
++ */
++ Stats::Scalar<> fetchIdleCycles;
+ Stats::Scalar<> fetchBlockedCycles;
++
++ Stats::Scalar<> fetchMiscStallCycles;
++ /** Stat for total number of fetched cache lines. */
+ Stats::Scalar<> fetchedCacheLines;
+
- #endif //__CPU_O3_CPU_SIMPLE_FETCH_HH__
++ Stats::Scalar<> fetchIcacheSquashes;
++ /** Distribution of number of instructions fetched each cycle. */
++ Stats::Distribution<> fetchNisnDist;
++ Stats::Formula idleRate;
++ Stats::Formula branchRate;
++ Stats::Formula fetchRate;
+};
+
++#endif //__CPU_O3_FETCH_HH__
--- /dev/null
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
+/*
- // Remove this later; used only for debugging.
- #define OPCODE(X) (X >> 26) & 0x3f
-
++ * Copyright (c) 2004-2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
- #include "sim/byteswap.hh"
+#include "arch/isa_traits.hh"
- #include "cpu/o3/fetch.hh"
-
+#include "cpu/exetrace.hh"
++#include "cpu/o3/fetch.hh"
+#include "mem/base_mem.hh"
+#include "mem/mem_interface.hh"
+#include "mem/mem_req.hh"
- SimpleFetch<Impl>::CacheCompletionEvent
- ::CacheCompletionEvent(SimpleFetch *_fetch)
- : Event(&mainEventQueue),
++#include "sim/byteswap.hh"
+#include "sim/root.hh"
+
++#if FULL_SYSTEM
++#include "arch/tlb.hh"
++#include "arch/vtophys.hh"
++#include "base/remote_gdb.hh"
++#include "mem/functional/memory_control.hh"
++#include "mem/functional/physical.hh"
++#include "sim/system.hh"
++#else // !FULL_SYSTEM
++#include "mem/functional/functional.hh"
++#endif // FULL_SYSTEM
++
++#include <algorithm>
++
++using namespace std;
++
+template<class Impl>
- SimpleFetch<Impl>::CacheCompletionEvent::process()
++DefaultFetch<Impl>::CacheCompletionEvent::CacheCompletionEvent(MemReqPtr &_req,
++ DefaultFetch *_fetch)
++ : Event(&mainEventQueue, Delayed_Writeback_Pri),
++ req(_req),
+ fetch(_fetch)
+{
++ this->setFlags(Event::AutoDelete);
+}
+
+template<class Impl>
+void
- fetch->processCacheCompletion();
++DefaultFetch<Impl>::CacheCompletionEvent::process()
+{
- SimpleFetch<Impl>::CacheCompletionEvent::description()
++ fetch->processCacheCompletion(req);
+}
+
+template<class Impl>
+const char *
- return "SimpleFetch cache completion event";
++DefaultFetch<Impl>::CacheCompletionEvent::description()
+{
- SimpleFetch<Impl>::SimpleFetch(Params ¶ms)
- : icacheInterface(params.icacheInterface),
++ return "DefaultFetch cache completion event";
+}
+
+template<class Impl>
- decodeToFetchDelay(params.decodeToFetchDelay),
- renameToFetchDelay(params.renameToFetchDelay),
- iewToFetchDelay(params.iewToFetchDelay),
- commitToFetchDelay(params.commitToFetchDelay),
- fetchWidth(params.fetchWidth)
++DefaultFetch<Impl>::DefaultFetch(Params *params)
++ : icacheInterface(params->icacheInterface),
+ branchPred(params),
- DPRINTF(Fetch, "Fetch: Fetch constructor called\n");
-
- // Set status to idle.
- _status = Idle;
-
- // Create a new memory request.
- memReq = new MemReq();
- // Not sure of this parameter. I think it should be based on the
- // thread number.
- #if !FULL_SYSTEM
- memReq->asid = 0;
- #else
- memReq->asid = 0;
- #endif // FULL_SYSTEM
- memReq->data = new uint8_t[64];
++ decodeToFetchDelay(params->decodeToFetchDelay),
++ renameToFetchDelay(params->renameToFetchDelay),
++ iewToFetchDelay(params->iewToFetchDelay),
++ commitToFetchDelay(params->commitToFetchDelay),
++ fetchWidth(params->fetchWidth),
++ numThreads(params->numberOfThreads),
++ numFetchingThreads(params->smtNumFetchingThreads),
++ interruptPending(false)
+{
- // Create space to store a cache line.
- cacheData = new uint8_t[cacheBlkSize];
++ if (numThreads > Impl::MaxThreads)
++ fatal("numThreads is not a valid value\n");
++
++ DPRINTF(Fetch, "Fetch constructor called\n");
++
++ // Set fetch stage's status to inactive.
++ _status = Inactive;
++
++ string policy = params->smtFetchPolicy;
++
++ // Convert string to lowercase
++ std::transform(policy.begin(), policy.end(), policy.begin(),
++ (int(*)(int)) tolower);
++
++ // Figure out fetch policy
++ if (policy == "singlethread") {
++ fetchPolicy = SingleThread;
++ } else if (policy == "roundrobin") {
++ fetchPolicy = RoundRobin;
++ DPRINTF(Fetch, "Fetch policy set to Round Robin\n");
++ } else if (policy == "branch") {
++ fetchPolicy = Branch;
++ DPRINTF(Fetch, "Fetch policy set to Branch Count\n");
++ } else if (policy == "iqcount") {
++ fetchPolicy = IQ;
++ DPRINTF(Fetch, "Fetch policy set to IQ count\n");
++ } else if (policy == "lsqcount") {
++ fetchPolicy = LSQ;
++ DPRINTF(Fetch, "Fetch policy set to LSQ count\n");
++ } else {
++ fatal("Invalid Fetch Policy. Options Are: {SingleThread,"
++ " RoundRobin,LSQcount,IQcount}\n");
++ }
+
+ // Size of cache block.
+ cacheBlkSize = icacheInterface ? icacheInterface->getBlockSize() : 64;
+
+ // Create mask to get rid of offset bits.
+ cacheBlkMask = (cacheBlkSize - 1);
+
++ for (int tid=0; tid < numThreads; tid++) {
++
++ fetchStatus[tid] = Running;
++
++ priorityList.push_back(tid);
++
++ // Create a new memory request.
++ memReq[tid] = NULL;
++
++ // Create space to store a cache line.
++ cacheData[tid] = new uint8_t[cacheBlkSize];
++
++ stalls[tid].decode = 0;
++ stalls[tid].rename = 0;
++ stalls[tid].iew = 0;
++ stalls[tid].commit = 0;
++ }
++
+ // Get the size of an instruction.
+ instSize = sizeof(MachInst);
++}
+
- SimpleFetch<Impl>::regStats()
++template <class Impl>
++std::string
++DefaultFetch<Impl>::name() const
++{
++ return cpu->name() + ".fetch";
+}
+
+template <class Impl>
+void
- .name(name() + ".icacheStallCycles")
++DefaultFetch<Impl>::regStats()
+{
+ icacheStallCycles
- .name(name() + ".fetchedInsts")
++ .name(name() + ".FETCH:icacheStallCycles")
+ .desc("Number of cycles fetch is stalled on an Icache miss")
+ .prereq(icacheStallCycles);
+
+ fetchedInsts
- .name(name() + ".predictedBranches")
++ .name(name() + ".FETCH:Insts")
+ .desc("Number of instructions fetch has processed")
+ .prereq(fetchedInsts);
++
++ fetchedBranches
++ .name(name() + ".FETCH:Branches")
++ .desc("Number of branches that fetch encountered")
++ .prereq(fetchedBranches);
++
+ predictedBranches
- .name(name() + ".fetchCycles")
++ .name(name() + ".FETCH:predictedBranches")
+ .desc("Number of branches that fetch has predicted taken")
+ .prereq(predictedBranches);
++
+ fetchCycles
- .name(name() + ".fetchSquashCycles")
++ .name(name() + ".FETCH:Cycles")
+ .desc("Number of cycles fetch has run and was not squashing or"
+ " blocked")
+ .prereq(fetchCycles);
++
+ fetchSquashCycles
- .name(name() + ".fetchBlockedCycles")
++ .name(name() + ".FETCH:SquashCycles")
+ .desc("Number of cycles fetch has spent squashing")
+ .prereq(fetchSquashCycles);
++
++ fetchIdleCycles
++ .name(name() + ".FETCH:IdleCycles")
++ .desc("Number of cycles fetch was idle")
++ .prereq(fetchIdleCycles);
++
+ fetchBlockedCycles
- .name(name() + ".fetchedCacheLines")
++ .name(name() + ".FETCH:BlockedCycles")
+ .desc("Number of cycles fetch has spent blocked")
+ .prereq(fetchBlockedCycles);
++
+ fetchedCacheLines
- fetch_nisn_dist
++ .name(name() + ".FETCH:CacheLines")
+ .desc("Number of cache lines fetched")
+ .prereq(fetchedCacheLines);
+
- .name(name() + ".FETCH:rate_dist")
++ fetchMiscStallCycles
++ .name(name() + ".FETCH:MiscStallCycles")
++ .desc("Number of cycles fetch has spent waiting on interrupts, or "
++ "bad addresses, or out of MSHRs")
++ .prereq(fetchMiscStallCycles);
++
++ fetchIcacheSquashes
++ .name(name() + ".FETCH:IcacheSquashes")
++ .desc("Number of outstanding Icache misses that were squashed")
++ .prereq(fetchIcacheSquashes);
++
++ fetchNisnDist
+ .init(/* base value */ 0,
+ /* last value */ fetchWidth,
+ /* bucket size */ 1)
- .flags(Stats::pdf)
- ;
++ .name(name() + ".FETCH:rateDist")
+ .desc("Number of instructions fetched each cycle (Total)")
- SimpleFetch<Impl>::setCPU(FullCPU *cpu_ptr)
++ .flags(Stats::pdf);
++
++ idleRate
++ .name(name() + ".FETCH:idleRate")
++ .desc("Percent of cycles fetch was idle")
++ .prereq(idleRate);
++ idleRate = fetchIdleCycles * 100 / cpu->numCycles;
++
++ branchRate
++ .name(name() + ".FETCH:branchRate")
++ .desc("Number of branch fetches per cycle")
++ .flags(Stats::total);
++ branchRate = predictedBranches / cpu->numCycles;
++
++ fetchRate
++ .name(name() + ".FETCH:rate")
++ .desc("Number of inst fetches per cycle")
++ .flags(Stats::total);
++ fetchRate = fetchedInsts / cpu->numCycles;
+
+ branchPred.regStats();
+}
+
+template<class Impl>
+void
- DPRINTF(Fetch, "Fetch: Setting the CPU pointer.\n");
++DefaultFetch<Impl>::setCPU(FullCPU *cpu_ptr)
+{
- // This line will be removed eventually.
- memReq->xc = cpu->xcBase();
++ DPRINTF(Fetch, "Setting the CPU pointer.\n");
+ cpu = cpu_ptr;
- SimpleFetch<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *time_buffer)
++
++ // Fetch needs to start fetching instructions at the very beginning,
++ // so it must start up in active state.
++ switchToActive();
+}
+
+template<class Impl>
+void
- DPRINTF(Fetch, "Fetch: Setting the time buffer pointer.\n");
++DefaultFetch<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *time_buffer)
+{
- SimpleFetch<Impl>::setFetchQueue(TimeBuffer<FetchStruct> *fq_ptr)
++ DPRINTF(Fetch, "Setting the time buffer pointer.\n");
+ timeBuffer = time_buffer;
+
+ // Create wires to get information from proper places in time buffer.
+ fromDecode = timeBuffer->getWire(-decodeToFetchDelay);
+ fromRename = timeBuffer->getWire(-renameToFetchDelay);
+ fromIEW = timeBuffer->getWire(-iewToFetchDelay);
+ fromCommit = timeBuffer->getWire(-commitToFetchDelay);
+}
+
+template<class Impl>
+void
- DPRINTF(Fetch, "Fetch: Setting the fetch queue pointer.\n");
++DefaultFetch<Impl>::setActiveThreads(list<unsigned> *at_ptr)
++{
++ DPRINTF(Fetch, "Setting active threads list pointer.\n");
++ activeThreads = at_ptr;
++}
++
++template<class Impl>
++void
++DefaultFetch<Impl>::setFetchQueue(TimeBuffer<FetchStruct> *fq_ptr)
+{
- SimpleFetch<Impl>::processCacheCompletion()
++ DPRINTF(Fetch, "Setting the fetch queue pointer.\n");
+ fetchQueue = fq_ptr;
+
+ // Create wire to write information to proper place in fetch queue.
+ toDecode = fetchQueue->getWire(0);
+}
+
++#if 0
++template<class Impl>
++void
++DefaultFetch<Impl>::setPageTable(PageTable *pt_ptr)
++{
++ DPRINTF(Fetch, "Setting the page table pointer.\n");
++#if !FULL_SYSTEM
++ pTable = pt_ptr;
++#endif
++}
++#endif
++
++template<class Impl>
++void
++DefaultFetch<Impl>::initStage()
++{
++ for (int tid = 0; tid < numThreads; tid++) {
++ PC[tid] = cpu->readPC(tid);
++ nextPC[tid] = cpu->readNextPC(tid);
++ }
++}
++
+template<class Impl>
+void
- DPRINTF(Fetch, "Fetch: Waking up from cache miss.\n");
++DefaultFetch<Impl>::processCacheCompletion(MemReqPtr &req)
+{
- if (_status == IcacheMissStall)
- _status = IcacheMissComplete;
++ unsigned tid = req->thread_num;
++
++ DPRINTF(Fetch, "[tid:%u] Waking up from cache miss.\n",tid);
+
+ // Only change the status if it's still waiting on the icache access
+ // to return.
+ // Can keep track of how many cache accesses go unused due to
+ // misspeculation here.
- SimpleFetch<Impl>::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC)
++ if (fetchStatus[tid] != IcacheMissStall ||
++ req != memReq[tid] ||
++ isSwitchedOut()) {
++ ++fetchIcacheSquashes;
++ return;
++ }
++
++ // Wake up the CPU (if it went to sleep and was waiting on this completion
++ // event).
++ cpu->wakeCPU();
++
++ DPRINTF(Activity, "[tid:%u] Activating fetch due to cache completion\n",
++ tid);
++
++ switchToActive();
++
++ // Only switch to IcacheMissComplete if we're not stalled as well.
++ if (checkStall(tid)) {
++ fetchStatus[tid] = Blocked;
++ } else {
++ fetchStatus[tid] = IcacheMissComplete;
++ }
++
++// memcpy(cacheData[tid], memReq[tid]->data, memReq[tid]->size);
++
++ // Reset the mem req to NULL.
++ memReq[tid] = NULL;
++}
++
++template <class Impl>
++void
++DefaultFetch<Impl>::switchOut()
++{
++ switchedOut = true;
++ cpu->signalSwitched();
++}
++
++template <class Impl>
++void
++DefaultFetch<Impl>::doSwitchOut()
++{
++ branchPred.switchOut();
++}
++
++template <class Impl>
++void
++DefaultFetch<Impl>::takeOverFrom()
++{
++ // Reset all state
++ for (int i = 0; i < Impl::MaxThreads; ++i) {
++ stalls[i].decode = 0;
++ stalls[i].rename = 0;
++ stalls[i].iew = 0;
++ stalls[i].commit = 0;
++ PC[i] = cpu->readPC(i);
++ nextPC[i] = cpu->readNextPC(i);
++ fetchStatus[i] = Running;
++ }
++ numInst = 0;
++ wroteToTimeBuffer = false;
++ _status = Inactive;
++ switchedOut = false;
++ branchPred.takeOverFrom();
++}
++
++template <class Impl>
++void
++DefaultFetch<Impl>::wakeFromQuiesce()
++{
++ DPRINTF(Fetch, "Waking up from quiesce\n");
++ // Hopefully this is safe
++ fetchStatus[0] = Running;
++}
++
++template <class Impl>
++inline void
++DefaultFetch<Impl>::switchToActive()
++{
++ if (_status == Inactive) {
++ DPRINTF(Activity, "Activating stage.\n");
++
++ cpu->activateStage(FullCPU::FetchIdx);
++
++ _status = Active;
++ }
++}
++
++template <class Impl>
++inline void
++DefaultFetch<Impl>::switchToInactive()
++{
++ if (_status == Active) {
++ DPRINTF(Activity, "Deactivating stage.\n");
++
++ cpu->deactivateStage(FullCPU::FetchIdx);
++
++ _status = Inactive;
++ }
+}
+
+template <class Impl>
+bool
- predict_taken = branchPred.predict(inst, next_PC);
++DefaultFetch<Impl>::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC)
+{
+ // Do branch prediction check here.
+ // A bit of a misnomer...next_PC is actually the current PC until
+ // this function updates it.
+ bool predict_taken;
+
+ if (!inst->isControl()) {
+ next_PC = next_PC + instSize;
+ inst->setPredTarg(next_PC);
+ return false;
+ }
+
- Fault
- SimpleFetch<Impl>::fetchCacheLine(Addr fetch_PC)
++ predict_taken = branchPred.predict(inst, next_PC, inst->threadNumber);
++
++ ++fetchedBranches;
+
+ if (predict_taken) {
+ ++predictedBranches;
+ }
+
+ return predict_taken;
+}
+
+template <class Impl>
- // Check if the instruction exists within the cache.
- // If it does, then proceed on to read the instruction and the rest
- // of the instructions in the cache line until either the end of the
- // cache line or a predicted taken branch is encountered.
++bool
++DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid)
+{
- unsigned flags = cpu->inPalMode() ? PHYSICAL : 0;
++ Fault fault = NoFault;
+
+#if FULL_SYSTEM
+ // Flag to say whether or not address is physical addr.
- Fault fault = NoFault;
++ unsigned flags = cpu->inPalMode(fetch_PC) ? PHYSICAL : 0;
+#else
+ unsigned flags = 0;
+#endif // FULL_SYSTEM
+
- // Setup the memReq to do a read of the first isntruction's address.
++ if (interruptPending && flags == 0 || switchedOut) {
++ // Hold off fetch from getting new instructions while an interrupt
++ // is pending.
++ return false;
++ }
+
+ // Align the fetch PC so it's at the start of a cache block.
+ fetch_PC = icacheBlockAlignPC(fetch_PC);
+
- memReq->cmd = Read;
- memReq->reset(fetch_PC, cacheBlkSize, flags);
++ // Setup the memReq to do a read of the first instruction's address.
+ // Set the appropriate read size and flags as well.
- // Translate the instruction request.
- // Should this function be
- // in the CPU class ? Probably...ITB/DTB should exist within the
- // CPU.
++ memReq[tid] = new MemReq();
+
- fault = cpu->translateInstReq(memReq);
++ memReq[tid]->asid = tid;
++ memReq[tid]->thread_num = tid;
++ memReq[tid]->data = new uint8_t[64];
++ memReq[tid]->xc = cpu->xcBase(tid);
++ memReq[tid]->cmd = Read;
++ memReq[tid]->reset(fetch_PC, cacheBlkSize, flags);
+
- // on what caused the fetch (ITB or Icache miss).
++ // Translate the instruction request.
++//#if FULL_SYSTEM
++ fault = cpu->translateInstReq(memReq[tid]);
++//#else
++// fault = pTable->translate(memReq[tid]);
++//#endif
+
+ // In the case of faults, the fetch stage may need to stall and wait
- fault = cpu->mem->read(memReq, cacheData);
++ // for the ITB miss to be handled.
+
+ // If translation was successful, attempt to read the first
+ // instruction.
+ if (fault == NoFault) {
++#if FULL_SYSTEM
++ if (cpu->system->memctrl->badaddr(memReq[tid]->paddr) ||
++ memReq[tid]->flags & UNCACHEABLE) {
++ DPRINTF(Fetch, "Fetch: Bad address %#x (hopefully on a "
++ "misspeculating path)!",
++ memReq[tid]->paddr);
++ ret_fault = TheISA::genMachineCheckFault();
++ return false;
++ }
++#endif
++
+ DPRINTF(Fetch, "Fetch: Doing instruction read.\n");
- fetchedCacheLines++;
- }
++ fault = cpu->mem->read(memReq[tid], cacheData[tid]);
+ // This read may change when the mem interface changes.
+
- // Now do the timing access to see whether or not the instruction
- // exists within the cache.
- if (icacheInterface && fault == NoFault) {
- DPRINTF(Fetch, "Fetch: Doing timing memory access.\n");
- memReq->completionEvent = NULL;
++ // Now do the timing access to see whether or not the instruction
++ // exists within the cache.
++ if (icacheInterface && !icacheInterface->isBlocked()) {
++ DPRINTF(Fetch, "Doing cache access.\n");
+
- memReq->time = curTick;
++ memReq[tid]->completionEvent = NULL;
+
- MemAccessResult result = icacheInterface->access(memReq);
++ memReq[tid]->time = curTick;
+
- // If the cache missed (in this model functional and timing
- // memories are different), then schedule an event to wake
- // up this stage once the cache miss completes.
- if (result != MA_HIT && icacheInterface->doEvents()) {
- memReq->completionEvent = new CacheCompletionEvent(this);
++ MemAccessResult result = icacheInterface->access(memReq[tid]);
+
- // How does current model work as far as individual
- // stages scheduling/unscheduling?
- // Perhaps have only the main CPU scheduled/unscheduled,
- // and have it choose what stages to run appropriately.
++ fetchedCacheLines++;
+
- DPRINTF(Fetch, "Fetch: Stalling due to icache miss.\n");
- _status = IcacheMissStall;
++ // If the cache missed, then schedule an event to wake
++ // up this stage once the cache miss completes.
++ // @todo: Possibly allow for longer than 1 cycle cache hits.
++ if (result != MA_HIT && icacheInterface->doEvents()) {
+
- return fault;
++ memReq[tid]->completionEvent =
++ new CacheCompletionEvent(memReq[tid], this);
++
++ lastIcacheStall[tid] = curTick;
++
++ DPRINTF(Activity, "[tid:%i]: Activity: Stalling due to I-cache "
++ "miss.\n", tid);
++
++ fetchStatus[tid] = IcacheMissStall;
++ } else {
++ DPRINTF(Fetch, "[tid:%i]: I-Cache hit. Doing Instruction "
++ "read.\n", tid);
++
++// memcpy(cacheData[tid], memReq[tid]->data, memReq[tid]->size);
++ }
++ } else {
++ DPRINTF(Fetch, "[tid:%i] Out of MSHRs!\n", tid);
++ ret_fault = NoFault;
++ return false;
+ }
+ }
+
- SimpleFetch<Impl>::doSquash(const Addr &new_PC)
++ ret_fault = fault;
++ return true;
+}
+
+template <class Impl>
+inline void
- DPRINTF(Fetch, "Fetch: Squashing, setting PC to: %#x.\n", new_PC);
++DefaultFetch<Impl>::doSquash(const Addr &new_PC, unsigned tid)
+{
- cpu->setNextPC(new_PC + instSize);
- cpu->setPC(new_PC);
++ DPRINTF(Fetch, "[tid:%i]: Squashing, setting PC to: %#x.\n",
++ tid, new_PC);
+
- if (_status == IcacheMissStall && icacheInterface) {
- DPRINTF(Fetch, "Fetch: Squashing outstanding Icache miss.\n");
- // @todo: Use an actual thread number here.
- icacheInterface->squash(0);
++ PC[tid] = new_PC;
++ nextPC[tid] = new_PC + instSize;
+
+ // Clear the icache miss if it's outstanding.
- _status = Squashing;
++ if (fetchStatus[tid] == IcacheMissStall && icacheInterface) {
++ DPRINTF(Fetch, "[tid:%i]: Squashing outstanding Icache miss.\n",
++ tid);
++ memReq[tid] = NULL;
+ }
+
- SimpleFetch<Impl>::squashFromDecode(const Addr &new_PC,
- const InstSeqNum &seq_num)
++ fetchStatus[tid] = Squashing;
+
+ ++fetchSquashCycles;
+}
+
+template<class Impl>
+void
- DPRINTF(Fetch, "Fetch: Squashing from decode.\n");
++DefaultFetch<Impl>::squashFromDecode(const Addr &new_PC,
++ const InstSeqNum &seq_num,
++ unsigned tid)
+{
- doSquash(new_PC);
++ DPRINTF(Fetch, "[tid:%i]: Squashing from decode.\n",tid);
+
- cpu->removeInstsUntil(seq_num);
++ doSquash(new_PC, tid);
+
+ // Tell the CPU to remove any instructions that are in flight between
+ // fetch and decode.
- SimpleFetch<Impl>::squash(const Addr &new_PC)
++ cpu->removeInstsUntil(seq_num, tid);
++}
++
++template<class Impl>
++bool
++DefaultFetch<Impl>::checkStall(unsigned tid) const
++{
++ bool ret_val = false;
++
++ if (cpu->contextSwitch) {
++ DPRINTF(Fetch,"[tid:%i]: Stalling for a context switch.\n",tid);
++ ret_val = true;
++ } else if (stalls[tid].decode) {
++ DPRINTF(Fetch,"[tid:%i]: Stall from Decode stage detected.\n",tid);
++ ret_val = true;
++ } else if (stalls[tid].rename) {
++ DPRINTF(Fetch,"[tid:%i]: Stall from Rename stage detected.\n",tid);
++ ret_val = true;
++ } else if (stalls[tid].iew) {
++ DPRINTF(Fetch,"[tid:%i]: Stall from IEW stage detected.\n",tid);
++ ret_val = true;
++ } else if (stalls[tid].commit) {
++ DPRINTF(Fetch,"[tid:%i]: Stall from Commit stage detected.\n",tid);
++ ret_val = true;
++ }
++
++ return ret_val;
++}
++
++template<class Impl>
++typename DefaultFetch<Impl>::FetchStatus
++DefaultFetch<Impl>::updateFetchStatus()
++{
++ //Check Running
++ list<unsigned>::iterator threads = (*activeThreads).begin();
++
++ while (threads != (*activeThreads).end()) {
++
++ unsigned tid = *threads++;
++
++ if (fetchStatus[tid] == Running ||
++ fetchStatus[tid] == Squashing ||
++ fetchStatus[tid] == IcacheMissComplete) {
++
++ if (_status == Inactive) {
++ DPRINTF(Activity, "[tid:%i]: Activating stage.\n",tid);
++
++ if (fetchStatus[tid] == IcacheMissComplete) {
++ DPRINTF(Activity, "[tid:%i]: Activating fetch due to cache"
++ "completion\n",tid);
++ }
++
++ cpu->activateStage(FullCPU::FetchIdx);
++ }
++
++ return Active;
++ }
++ }
++
++ // Stage is switching from active to inactive, notify CPU of it.
++ if (_status == Active) {
++ DPRINTF(Activity, "Deactivating stage.\n");
++
++ cpu->deactivateStage(FullCPU::FetchIdx);
++ }
++
++ return Inactive;
+}
+
+template <class Impl>
+void
- DPRINTF(Fetch, "Fetch: Squash from commit.\n");
++DefaultFetch<Impl>::squash(const Addr &new_PC, unsigned tid)
+{
- doSquash(new_PC);
++ DPRINTF(Fetch, "[tid:%u]: Squash from commit.\n",tid);
+
- cpu->removeInstsNotInROB();
++ doSquash(new_PC, tid);
+
+ // Tell the CPU to remove any instructions that are not in the ROB.
- template<class Impl>
++ cpu->removeInstsNotInROB(tid);
+}
+
- SimpleFetch<Impl>::tick()
++template <class Impl>
+void
- if (fromCommit->commitInfo.squash) {
- DPRINTF(Fetch, "Fetch: Squashing instructions due to squash "
- "from commit.\n");
++DefaultFetch<Impl>::tick()
+{
++ list<unsigned>::iterator threads = (*activeThreads).begin();
++ bool status_change = false;
++
++ wroteToTimeBuffer = false;
++
++ while (threads != (*activeThreads).end()) {
++ unsigned tid = *threads++;
++
++ // Check the signals for each thread to determine the proper status
++ // for each thread.
++ bool updated_status = checkSignalsAndUpdate(tid);
++ status_change = status_change || updated_status;
++ }
++
++ DPRINTF(Fetch, "Running stage.\n");
++
++ // Reset the number of the instruction we're fetching.
++ numInst = 0;
++
++ if (fromCommit->commitInfo[0].interruptPending) {
++ interruptPending = true;
++ }
++ if (fromCommit->commitInfo[0].clearInterrupt) {
++ interruptPending = false;
++ }
++
++ for (threadFetched = 0; threadFetched < numFetchingThreads;
++ threadFetched++) {
++ // Fetch each of the actively fetching threads.
++ fetch(status_change);
++ }
++
++ // Record number of instructions fetched this cycle for distribution.
++ fetchNisnDist.sample(numInst);
++
++ if (status_change) {
++ // Change the fetch stage status if there was a status change.
++ _status = updateFetchStatus();
++ }
++
++ // If there was activity this cycle, inform the CPU of it.
++ if (wroteToTimeBuffer || cpu->contextSwitch) {
++ DPRINTF(Activity, "Activity this cycle.\n");
++
++ cpu->activityThisCycle();
++ }
++}
++
++template <class Impl>
++bool
++DefaultFetch<Impl>::checkSignalsAndUpdate(unsigned tid)
++{
++ // Update the per thread stall statuses.
++ if (fromDecode->decodeBlock[tid]) {
++ stalls[tid].decode = true;
++ }
++
++ if (fromDecode->decodeUnblock[tid]) {
++ assert(stalls[tid].decode);
++ assert(!fromDecode->decodeBlock[tid]);
++ stalls[tid].decode = false;
++ }
++
++ if (fromRename->renameBlock[tid]) {
++ stalls[tid].rename = true;
++ }
++
++ if (fromRename->renameUnblock[tid]) {
++ assert(stalls[tid].rename);
++ assert(!fromRename->renameBlock[tid]);
++ stalls[tid].rename = false;
++ }
++
++ if (fromIEW->iewBlock[tid]) {
++ stalls[tid].iew = true;
++ }
++
++ if (fromIEW->iewUnblock[tid]) {
++ assert(stalls[tid].iew);
++ assert(!fromIEW->iewBlock[tid]);
++ stalls[tid].iew = false;
++ }
++
++ if (fromCommit->commitBlock[tid]) {
++ stalls[tid].commit = true;
++ }
++
++ if (fromCommit->commitUnblock[tid]) {
++ assert(stalls[tid].commit);
++ assert(!fromCommit->commitBlock[tid]);
++ stalls[tid].commit = false;
++ }
++
+ // Check squash signals from commit.
- squash(fromCommit->commitInfo.nextPC);
++ if (fromCommit->commitInfo[tid].squash) {
++
++ DPRINTF(Fetch, "[tid:%u]: Squashing instructions due to squash "
++ "from commit.\n",tid);
+
+ // In any case, squash.
- if (fromCommit->commitInfo.branchMispredict) {
- branchPred.squash(fromCommit->commitInfo.doneSeqNum,
- fromCommit->commitInfo.nextPC,
- fromCommit->commitInfo.branchTaken);
++ squash(fromCommit->commitInfo[tid].nextPC,tid);
+
+ // Also check if there's a mispredict that happened.
- branchPred.squash(fromCommit->commitInfo.doneSeqNum);
++ if (fromCommit->commitInfo[tid].branchMispredict) {
++ branchPred.squash(fromCommit->commitInfo[tid].doneSeqNum,
++ fromCommit->commitInfo[tid].nextPC,
++ fromCommit->commitInfo[tid].branchTaken,
++ tid);
+ } else {
- return;
- } else if (fromCommit->commitInfo.doneSeqNum) {
++ branchPred.squash(fromCommit->commitInfo[tid].doneSeqNum,
++ tid);
+ }
+
- // that was braodcasted.
- branchPred.update(fromCommit->commitInfo.doneSeqNum);
++ return true;
++ } else if (fromCommit->commitInfo[tid].doneSeqNum) {
+ // Update the branch predictor if it wasn't a squashed instruction
- if (fromCommit->commitInfo.robSquashing) {
- DPRINTF(Fetch, "Fetch: ROB is still squashing.\n");
++ // that was broadcasted.
++ branchPred.update(fromCommit->commitInfo[tid].doneSeqNum, tid);
+ }
+
+ // Check ROB squash signals from commit.
- _status = Squashing;
++ if (fromCommit->commitInfo[tid].robSquashing) {
++ DPRINTF(Fetch, "[tid:%u]: ROB is still squashing Thread %u.\n", tid);
+
+ // Continue to squash.
- ++fetchSquashCycles;
- return;
++ fetchStatus[tid] = Squashing;
+
- if (fromDecode->decodeInfo.squash) {
- DPRINTF(Fetch, "Fetch: Squashing instructions due to squash "
- "from decode.\n");
++ return true;
+ }
+
+ // Check squash signals from decode.
- if (fromDecode->decodeInfo.branchMispredict) {
- branchPred.squash(fromDecode->decodeInfo.doneSeqNum,
- fromDecode->decodeInfo.nextPC,
- fromDecode->decodeInfo.branchTaken);
++ if (fromDecode->decodeInfo[tid].squash) {
++ DPRINTF(Fetch, "[tid:%u]: Squashing instructions due to squash "
++ "from decode.\n",tid);
+
+ // Update the branch predictor.
- branchPred.squash(fromDecode->decodeInfo.doneSeqNum);
++ if (fromDecode->decodeInfo[tid].branchMispredict) {
++ branchPred.squash(fromDecode->decodeInfo[tid].doneSeqNum,
++ fromDecode->decodeInfo[tid].nextPC,
++ fromDecode->decodeInfo[tid].branchTaken,
++ tid);
+ } else {
- if (_status != Squashing) {
- // Squash unless we're already squashing?
- squashFromDecode(fromDecode->decodeInfo.nextPC,
- fromDecode->decodeInfo.doneSeqNum);
- return;
++ branchPred.squash(fromDecode->decodeInfo[tid].doneSeqNum,
++ tid);
+ }
+
- // Check if any of the stall signals are high.
- if (fromDecode->decodeInfo.stall ||
- fromRename->renameInfo.stall ||
- fromIEW->iewInfo.stall ||
- fromCommit->commitInfo.stall)
- {
- // Block stage, regardless of current status.
++ if (fetchStatus[tid] != Squashing) {
++ // Squash unless we're already squashing
++ squashFromDecode(fromDecode->decodeInfo[tid].nextPC,
++ fromDecode->decodeInfo[tid].doneSeqNum,
++ tid);
++
++ return true;
+ }
+ }
+
- DPRINTF(Fetch, "Fetch: Stalling stage.\n");
- DPRINTF(Fetch, "Fetch: Statuses: Decode: %i Rename: %i IEW: %i "
- "Commit: %i\n",
- fromDecode->decodeInfo.stall,
- fromRename->renameInfo.stall,
- fromIEW->iewInfo.stall,
- fromCommit->commitInfo.stall);
++ if (checkStall(tid) && fetchStatus[tid] != IcacheMissStall) {
++ DPRINTF(Fetch, "[tid:%i]: Setting to blocked\n",tid);
+
- _status = Blocked;
-
- ++fetchBlockedCycles;
- return;
- } else if (_status == Blocked) {
- // Unblock stage if status is currently blocked and none of the
- // stall signals are being held high.
- _status = Running;
-
- ++fetchBlockedCycles;
- return;
++ fetchStatus[tid] = Blocked;
+
- // If fetch has reached this point, then there are no squash signals
- // still being held high. Check if fetch is in the squashing state;
- // if so, fetch can switch to running.
- // Similarly, there are no blocked signals still being held high.
- // Check if fetch is in the blocked state; if so, fetch can switch to
- // running.
- if (_status == Squashing) {
- DPRINTF(Fetch, "Fetch: Done squashing, switching to running.\n");
-
- // Switch status to running
- _status = Running;
++ return true;
+ }
+
- ++fetchCycles;
++ if (fetchStatus[tid] == Blocked ||
++ fetchStatus[tid] == Squashing) {
++ // Switch status to running if fetch isn't being told to block or
++ // squash this cycle.
++ DPRINTF(Fetch, "[tid:%i]: Done squashing, switching to running.\n",
++ tid);
+
- fetch();
- } else if (_status != IcacheMissStall) {
- DPRINTF(Fetch, "Fetch: Running stage.\n");
-
- ++fetchCycles;
-
- fetch();
++ fetchStatus[tid] = Running;
+
- SimpleFetch<Impl>::fetch()
++ return true;
+ }
++
++ // If we've reached this point, we have not gotten any signals that
++ // cause fetch to change its status. Fetch remains the same as before.
++ return false;
+}
+
+template<class Impl>
+void
- Addr fetch_PC = cpu->readPC();
++DefaultFetch<Impl>::fetch(bool &status_change)
+{
+ //////////////////////////////////////////
+ // Start actual fetch
+ //////////////////////////////////////////
++ int tid = getFetchingThread(fetchPolicy);
++
++ if (tid == -1) {
++ DPRINTF(Fetch,"There are no more threads available to fetch from.\n");
++
++ // Breaks looping condition in tick()
++ threadFetched = numFetchingThreads;
++ return;
++ }
+
+ // The current PC.
- if (_status == IcacheMissComplete) {
- DPRINTF(Fetch, "Fetch: Icache miss is complete.\n");
-
- // Reset the completion event to NULL.
- memReq->completionEvent = NULL;
-
- _status = Running;
++ Addr &fetch_PC = PC[tid];
+
+ // Fault code for memory access.
+ Fault fault = NoFault;
+
+ // If returning from the delay of a cache miss, then update the status
+ // to running, otherwise do the cache access. Possibly move this up
+ // to tick() function.
- DPRINTF(Fetch, "Fetch: Attempting to translate and read "
- "instruction, starting at PC %08p.\n",
- fetch_PC);
++ if (fetchStatus[tid] == IcacheMissComplete) {
++ DPRINTF(Fetch, "[tid:%i]: Icache miss is complete.\n",
++ tid);
++
++ fetchStatus[tid] = Running;
++ status_change = true;
++ } else if (fetchStatus[tid] == Running) {
++ DPRINTF(Fetch, "[tid:%i]: Attempting to translate and read "
++ "instruction, starting at PC %08p.\n",
++ tid, fetch_PC);
++
++ bool fetch_success = fetchCacheLine(fetch_PC, fault, tid);
++ if (!fetch_success) {
++ ++fetchMiscStallCycles;
++ return;
++ }
+ } else {
- fault = fetchCacheLine(fetch_PC);
++ if (fetchStatus[tid] == Idle) {
++ ++fetchIdleCycles;
++ } else if (fetchStatus[tid] == Blocked) {
++ ++fetchBlockedCycles;
++ } else if (fetchStatus[tid] == Squashing) {
++ ++fetchSquashCycles;
++ } else if (fetchStatus[tid] == IcacheMissStall) {
++ ++icacheStallCycles;
++ }
+
- // If we had a stall due to an icache miss, then return. It'd
- // be nicer if this were handled through the kind of fault that
- // is returned by the function.
- if (_status == IcacheMissStall) {
++ // Status is Idle, Squashing, Blocked, or IcacheMissStall, so
++ // fetch should do nothing.
++ return;
+ }
+
- // As far as timing goes, the CPU will need to send an event through
- // the MemReq in order to be woken up once the memory access completes.
- // Probably have a status on a per thread basis so each thread can
- // block independently and be woken up independently.
-
++ ++fetchCycles;
++
++ // If we had a stall due to an icache miss, then return.
++ if (fetchStatus[tid] == IcacheMissStall) {
++ ++icacheStallCycles;
++ status_change = true;
+ return;
+ }
+
- unsigned offset = fetch_PC & cacheBlkMask;
- unsigned fetched;
+ Addr next_PC = fetch_PC;
+ InstSeqNum inst_seq;
+ MachInst inst;
- DPRINTF(Fetch, "Fetch: Adding instructions to queue to decode.\n");
-
- //////////////////////////
- // Fetch first instruction
- //////////////////////////
++ ExtMachInst ext_inst;
++ // @todo: Fix this hack.
++ unsigned offset = (fetch_PC & cacheBlkMask) & ~3;
+
+ if (fault == NoFault) {
+ // If the read of the first instruction was successful, then grab the
+ // instructions from the rest of the cache line and put them into the
+ // queue heading to decode.
+
- for (fetched = 0;
++ DPRINTF(Fetch, "[tid:%i]: Adding instructions to queue to "
++ "decode.\n",tid);
+
+ // Need to keep track of whether or not a predicted branch
+ // ended this fetch block.
+ bool predicted_branch = false;
+
- fetched < fetchWidth &&
++ for (;
+ offset < cacheBlkSize &&
- ++fetched)
- {
++ numInst < fetchWidth &&
+ !predicted_branch;
- (&cacheData[offset]));
++ ++numInst) {
+
+ // Get a sequence number.
+ inst_seq = cpu->getAndIncrementInstSeq();
+
+ // Make sure this is a valid index.
+ assert(offset <= cacheBlkSize - instSize);
+
+ // Get the instruction from the array of the cache line.
+ inst = gtoh(*reinterpret_cast<MachInst *>
- DynInstPtr instruction = new DynInst(inst, fetch_PC, next_PC,
++ (&cacheData[tid][offset]));
++
++ ext_inst = TheISA::makeExtMI(inst, fetch_PC);
+
+ // Create a new DynInst from the instruction fetched.
- DPRINTF(Fetch, "Fetch: Instruction %i created, with PC %#x\n",
- inst_seq, instruction->readPC());
++ DynInstPtr instruction = new DynInst(ext_inst, fetch_PC,
++ next_PC,
+ inst_seq, cpu);
++ instruction->setThread(tid);
++
++ instruction->setASID(tid);
+
- DPRINTF(Fetch, "Fetch: Instruction opcode is: %03p\n",
- OPCODE(inst));
++ instruction->setState(cpu->thread[tid]);
+
- Trace::getInstRecord(curTick, cpu->xcBase(), cpu,
++ DPRINTF(Fetch, "[tid:%i]: Instruction PC %#x created "
++ "[sn:%lli]\n",
++ tid, instruction->readPC(), inst_seq);
++
++ DPRINTF(Fetch, "[tid:%i]: Instruction is: %s\n",
++ tid, instruction->staticInst->disassemble(fetch_PC));
+
+ instruction->traceData =
- instruction->readPC(), 0);
++ Trace::getInstRecord(curTick, cpu->xcBase(tid), cpu,
+ instruction->staticInst,
- cpu->addInst(instruction);
++ instruction->readPC(),tid);
+
+ predicted_branch = lookupAndUpdateNextPC(instruction, next_PC);
+
+ // Add instruction to the CPU's list of instructions.
- toDecode->insts[fetched] = instruction;
++ instruction->setInstListIt(cpu->addInst(instruction));
+
+ // Write the instruction to the first slot in the queue
+ // that heads to decode.
- fetch_nisn_dist.sample(fetched);
++ toDecode->insts[numInst] = instruction;
+
+ toDecode->size++;
+
+ // Increment stat of fetched instructions.
+ ++fetchedInsts;
+
+ // Move to the next instruction, unless we have a branch.
+ fetch_PC = next_PC;
+
++ if (instruction->isQuiesce()) {
++ warn("%lli: Quiesce instruction encountered, halting fetch!",
++ curTick);
++ fetchStatus[tid] = QuiescePending;
++ ++numInst;
++ status_change = true;
++ break;
++ }
++
+ offset+= instSize;
+ }
++ }
+
- // cycle will be. Might want to move this to the beginning of this
- // function so that the PC updates at the beginning of everything.
- // Or might want to leave setting the PC to the main CPU, with fetch
- // only changing the nextPC (will require correct determination of
- // next PC).
++ if (numInst > 0) {
++ wroteToTimeBuffer = true;
+ }
+
+ // Now that fetching is completed, update the PC to signify what the next
- DPRINTF(Fetch, "Fetch: Setting PC to %08p.\n", next_PC);
- cpu->setPC(next_PC);
- cpu->setNextPC(next_PC + instSize);
++ // cycle will be.
+ if (fault == NoFault) {
- // If the issue was an icache miss, then we can just return and
- // wait until it is handled.
- if (_status == IcacheMissStall) {
- return;
++ DPRINTF(Fetch, "[tid:%i]: Setting PC to %08p.\n",tid, next_PC);
++
++ PC[tid] = next_PC;
++ nextPC[tid] = next_PC + instSize;
+ } else {
- // Handle the fault.
- // This stage will not be able to continue until all the ROB
- // slots are empty, at which point the fault can be handled.
- // The only other way it can wake up is if a squash comes along
- // and changes the PC. Not sure how to handle that case...perhaps
- // have it handled by the upper level CPU class which peeks into the
- // time buffer and sees if a squash comes along, in which case it
- // changes the status.
++ // We shouldn't be in an icache miss and also have a fault (an ITB
++ // miss)
++ if (fetchStatus[tid] == IcacheMissStall) {
++ panic("Fetch should have exited prior to this!");
+ }
+
- DPRINTF(Fetch, "Fetch: Blocked, need to handle the trap.\n");
++ // Send the fault to commit. This thread will not do anything
++ // until commit handles the fault. The only other way it can
++ // wake up is if a squash comes along and changes the PC.
++#if FULL_SYSTEM
++ assert(numInst != fetchWidth);
++ // Get a sequence number.
++ inst_seq = cpu->getAndIncrementInstSeq();
++ // We will use a nop in order to carry the fault.
++ ext_inst = TheISA::NoopMachInst;
+
- _status = Blocked;
- #if FULL_SYSTEM
- // cpu->trap(fault);
- // Send a signal to the ROB indicating that there's a trap from the
- // fetch stage that needs to be handled. Need to indicate that
- // there's a fault, and the fault type.
++ // Create a new DynInst from the dummy nop.
++ DynInstPtr instruction = new DynInst(ext_inst, fetch_PC,
++ next_PC,
++ inst_seq, cpu);
++ instruction->setPredTarg(next_PC + instSize);
++ instruction->setThread(tid);
+
- fatal("fault (%d) detected @ PC %08p", fault, cpu->readPC());
++ instruction->setASID(tid);
++
++ instruction->setState(cpu->thread[tid]);
++
++ instruction->traceData = NULL;
++
++ instruction->setInstListIt(cpu->addInst(instruction));
++
++ instruction->fault = fault;
++
++ toDecode->insts[numInst] = instruction;
++ toDecode->size++;
++
++ DPRINTF(Fetch, "[tid:%i]: Blocked, need to handle the trap.\n",tid);
++
++ fetchStatus[tid] = TrapPending;
++ status_change = true;
++
++ warn("%lli fault (%d) detected @ PC %08p", curTick, fault, PC[tid]);
+#else // !FULL_SYSTEM
++ fatal("fault (%d) detected @ PC %08p", fault, PC[tid]);
+#endif // FULL_SYSTEM
+ }
+}
++
++
++///////////////////////////////////////
++// //
++// SMT FETCH POLICY MAINTAINED HERE //
++// //
++///////////////////////////////////////
++template<class Impl>
++int
++DefaultFetch<Impl>::getFetchingThread(FetchPriority &fetch_priority)
++{
++ if (numThreads > 1) {
++ switch (fetch_priority) {
++
++ case SingleThread:
++ return 0;
++
++ case RoundRobin:
++ return roundRobin();
++
++ case IQ:
++ return iqCount();
++
++ case LSQ:
++ return lsqCount();
++
++ case Branch:
++ return branchCount();
++
++ default:
++ return -1;
++ }
++ } else {
++ int tid = *((*activeThreads).begin());
++
++ if (fetchStatus[tid] == Running ||
++ fetchStatus[tid] == IcacheMissComplete ||
++ fetchStatus[tid] == Idle) {
++ return tid;
++ } else {
++ return -1;
++ }
++ }
++
++}
++
++
++template<class Impl>
++int
++DefaultFetch<Impl>::roundRobin()
++{
++ list<unsigned>::iterator pri_iter = priorityList.begin();
++ list<unsigned>::iterator end = priorityList.end();
++
++ int high_pri;
++
++ while (pri_iter != end) {
++ high_pri = *pri_iter;
++
++ assert(high_pri <= numThreads);
++
++ if (fetchStatus[high_pri] == Running ||
++ fetchStatus[high_pri] == IcacheMissComplete ||
++ fetchStatus[high_pri] == Idle) {
++
++ priorityList.erase(pri_iter);
++ priorityList.push_back(high_pri);
++
++ return high_pri;
++ }
++
++ pri_iter++;
++ }
++
++ return -1;
++}
++
++template<class Impl>
++int
++DefaultFetch<Impl>::iqCount()
++{
++ priority_queue<unsigned> PQ;
++
++ list<unsigned>::iterator threads = (*activeThreads).begin();
++
++ while (threads != (*activeThreads).end()) {
++ unsigned tid = *threads++;
++
++ PQ.push(fromIEW->iewInfo[tid].iqCount);
++ }
++
++ while (!PQ.empty()) {
++
++ unsigned high_pri = PQ.top();
++
++ if (fetchStatus[high_pri] == Running ||
++ fetchStatus[high_pri] == IcacheMissComplete ||
++ fetchStatus[high_pri] == Idle)
++ return high_pri;
++ else
++ PQ.pop();
++
++ }
++
++ return -1;
++}
++
++template<class Impl>
++int
++DefaultFetch<Impl>::lsqCount()
++{
++ priority_queue<unsigned> PQ;
++
++
++ list<unsigned>::iterator threads = (*activeThreads).begin();
++
++ while (threads != (*activeThreads).end()) {
++ unsigned tid = *threads++;
++
++ PQ.push(fromIEW->iewInfo[tid].ldstqCount);
++ }
++
++ while (!PQ.empty()) {
++
++ unsigned high_pri = PQ.top();
++
++ if (fetchStatus[high_pri] == Running ||
++ fetchStatus[high_pri] == IcacheMissComplete ||
++ fetchStatus[high_pri] == Idle)
++ return high_pri;
++ else
++ PQ.pop();
++
++ }
++
++ return -1;
++}
++
++template<class Impl>
++int
++DefaultFetch<Impl>::branchCount()
++{
++ list<unsigned>::iterator threads = (*activeThreads).begin();
++
++ return *threads;
++}
--- /dev/null
- SimpleFreeList::SimpleFreeList(unsigned _numLogicalIntRegs,
+/*
+ * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "base/trace.hh"
+
+#include "cpu/o3/free_list.hh"
+
- DPRINTF(FreeList, "FreeList: Creating new free list object.\n");
-
- // DEBUG stuff.
- freeIntRegsScoreboard.resize(numPhysicalIntRegs);
-
- freeFloatRegsScoreboard.resize(numPhysicalRegs);
-
- for (PhysRegIndex i = 0; i < numLogicalIntRegs; ++i) {
- freeIntRegsScoreboard[i] = 0;
- }
++SimpleFreeList::SimpleFreeList(unsigned activeThreads,
++ unsigned _numLogicalIntRegs,
+ unsigned _numPhysicalIntRegs,
+ unsigned _numLogicalFloatRegs,
+ unsigned _numPhysicalFloatRegs)
+ : numLogicalIntRegs(_numLogicalIntRegs),
+ numPhysicalIntRegs(_numPhysicalIntRegs),
+ numLogicalFloatRegs(_numLogicalFloatRegs),
+ numPhysicalFloatRegs(_numPhysicalFloatRegs),
+ numPhysicalRegs(numPhysicalIntRegs + numPhysicalFloatRegs)
+{
- for (PhysRegIndex i = numLogicalIntRegs;
++ DPRINTF(FreeList, "Creating new free list object.\n");
+
+ // Put all of the extra physical registers onto the free list. This
+ // means excluding all of the base logical registers.
-
- freeIntRegsScoreboard[i] = 1;
- }
-
- for (PhysRegIndex i = 0; i < numPhysicalIntRegs + numLogicalFloatRegs;
- ++i)
- {
- freeFloatRegsScoreboard[i] = 0;
++ for (PhysRegIndex i = numLogicalIntRegs * activeThreads;
+ i < numPhysicalIntRegs; ++i)
+ {
+ freeIntRegs.push(i);
- for (PhysRegIndex i = numPhysicalIntRegs + numLogicalFloatRegs;
- i < numPhysicalRegs; ++i)
+ }
+
+ // Put all of the extra physical registers onto the free list. This
+ // means excluding all of the base logical registers. Because the
+ // float registers' indices start where the physical registers end,
+ // some math must be done to determine where the free registers start.
-
- freeFloatRegsScoreboard[i] = 1;
++ PhysRegIndex i = numPhysicalIntRegs + (numLogicalFloatRegs * activeThreads);
++
++ for ( ; i < numPhysicalRegs; ++i)
+ {
+ freeFloatRegs.push(i);
+ }
+}
+
++std::string
++SimpleFreeList::name() const
++{
++ return "cpu.freelist";
++}
--- /dev/null
- #ifndef __CPU_O3_CPU_FREE_LIST_HH__
- #define __CPU_O3_CPU_FREE_LIST_HH__
+/*
+ * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
- * Note that
- * while this most likely should be called FreeList, the name "FreeList"
- * is used in a typedef within the CPU Policy, and therefore no class
- * can be named simply "FreeList".
++#ifndef __CPU_O3_FREE_LIST_HH__
++#define __CPU_O3_FREE_LIST_HH__
+
+#include <iostream>
+#include <queue>
+
+#include "arch/isa_traits.hh"
+#include "base/trace.hh"
+#include "base/traceflags.hh"
+#include "cpu/o3/comm.hh"
+
+/**
+ * FreeList class that simply holds the list of free integer and floating
+ * point registers. Can request for a free register of either type, and
+ * also send back free registers of either type. This is a very simple
+ * class, but it should be sufficient for most implementations. Like all
+ * other classes, it assumes that the indices for the floating point
+ * registers starts after the integer registers end. Hence the variable
+ * numPhysicalIntRegs is logically equivalent to the baseFP dependency.
- /** DEBUG stuff below. */
- std::vector<int> freeIntRegsScoreboard;
-
- std::vector<bool> freeFloatRegsScoreboard;
-
++ * Note that while this most likely should be called FreeList, the name
++ * "FreeList" is used in a typedef within the CPU Policy, and therefore no
++ * class can be named simply "FreeList".
+ * @todo: Give a better name to the base FP dependency.
+ */
+class SimpleFreeList
+{
+ private:
+ /** The list of free integer registers. */
+ std::queue<PhysRegIndex> freeIntRegs;
+
+ /** The list of free floating point registers. */
+ std::queue<PhysRegIndex> freeFloatRegs;
+
+ /** Number of logical integer registers. */
+ int numLogicalIntRegs;
+
+ /** Number of physical integer registers. */
+ int numPhysicalIntRegs;
+
+ /** Number of logical floating point registers. */
+ int numLogicalFloatRegs;
+
+ /** Number of physical floating point registers. */
+ int numPhysicalFloatRegs;
+
+ /** Total number of physical registers. */
+ int numPhysicalRegs;
+
- SimpleFreeList(unsigned _numLogicalIntRegs,
+ public:
- DPRINTF(Rename, "FreeList: Trying to get free integer register.\n");
++ /** Constructs a free list.
++ * @param activeThreads Number of active threads.
++ * @param _numLogicalIntRegs Number of logical integer registers.
++ * @param _numPhysicalIntRegs Number of physical integer registers.
++ * @param _numLogicalFloatRegs Number of logical fp registers.
++ * @param _numPhysicalFloatRegs Number of physical fp registers.
++ */
++ SimpleFreeList(unsigned activeThreads,
++ unsigned _numLogicalIntRegs,
+ unsigned _numPhysicalIntRegs,
+ unsigned _numLogicalFloatRegs,
+ unsigned _numPhysicalFloatRegs);
+
++ /** Gives the name of the freelist. */
++ std::string name() const;
++
++ /** Gets a free integer register. */
+ inline PhysRegIndex getIntReg();
+
++ /** Gets a free fp register. */
+ inline PhysRegIndex getFloatReg();
+
++ /** Adds a register back to the free list. */
+ inline void addReg(PhysRegIndex freed_reg);
+
++ /** Adds an integer register back to the free list. */
+ inline void addIntReg(PhysRegIndex freed_reg);
+
++ /** Adds a fp register back to the free list. */
+ inline void addFloatReg(PhysRegIndex freed_reg);
+
++ /** Checks if there are any free integer registers. */
+ bool hasFreeIntRegs()
+ { return !freeIntRegs.empty(); }
+
++ /** Checks if there are any free fp registers. */
+ bool hasFreeFloatRegs()
+ { return !freeFloatRegs.empty(); }
+
++ /** Returns the number of free integer registers. */
+ int numFreeIntRegs()
+ { return freeIntRegs.size(); }
+
++ /** Returns the number of free fp registers. */
+ int numFreeFloatRegs()
+ { return freeFloatRegs.size(); }
+};
+
+inline PhysRegIndex
+SimpleFreeList::getIntReg()
+{
- // DEBUG
- assert(freeIntRegsScoreboard[free_reg]);
- freeIntRegsScoreboard[free_reg] = 0;
-
++ DPRINTF(FreeList, "Trying to get free integer register.\n");
++
+ if (freeIntRegs.empty()) {
+ panic("No free integer registers!");
+ }
+
+ PhysRegIndex free_reg = freeIntRegs.front();
+
+ freeIntRegs.pop();
+
- DPRINTF(Rename, "FreeList: Trying to get free float register.\n");
+ return(free_reg);
+}
+
+inline PhysRegIndex
+SimpleFreeList::getFloatReg()
+{
- // DEBUG
- assert(freeFloatRegsScoreboard[free_reg]);
- freeFloatRegsScoreboard[free_reg] = 0;
-
++ DPRINTF(FreeList, "Trying to get free float register.\n");
++
+ if (freeFloatRegs.empty()) {
+ panic("No free integer registers!");
+ }
+
+ PhysRegIndex free_reg = freeFloatRegs.front();
+
+ freeFloatRegs.pop();
+
- DPRINTF(Rename, "Freelist: Freeing register %i.\n", freed_reg);
+ return(free_reg);
+}
+
+inline void
+SimpleFreeList::addReg(PhysRegIndex freed_reg)
+{
- freeIntRegs.push(freed_reg);
-
- // DEBUG
- assert(freeIntRegsScoreboard[freed_reg] == false);
- freeIntRegsScoreboard[freed_reg] = 1;
++ DPRINTF(FreeList,"Freeing register %i.\n", freed_reg);
+ //Might want to add in a check for whether or not this register is
+ //already in there. A bit vector or something similar would be useful.
+ if (freed_reg < numPhysicalIntRegs) {
- freeFloatRegs.push(freed_reg);
-
- // DEBUG
- assert(freeFloatRegsScoreboard[freed_reg] == false);
- freeFloatRegsScoreboard[freed_reg] = 1;
++ if (freed_reg != TheISA::ZeroReg)
++ freeIntRegs.push(freed_reg);
+ } else if (freed_reg < numPhysicalRegs) {
- DPRINTF(Rename, "Freelist: Freeing int register %i.\n", freed_reg);
-
- // DEBUG
- assert(!freeIntRegsScoreboard[freed_reg]);
- freeIntRegsScoreboard[freed_reg] = 1;
++ if (freed_reg != (TheISA::ZeroReg + numPhysicalIntRegs))
++ freeFloatRegs.push(freed_reg);
+ }
+}
+
+inline void
+SimpleFreeList::addIntReg(PhysRegIndex freed_reg)
+{
- DPRINTF(Rename, "Freelist: Freeing float register %i.\n", freed_reg);
-
- // DEBUG
- assert(!freeFloatRegsScoreboard[freed_reg]);
- freeFloatRegsScoreboard[freed_reg] = 1;
++ DPRINTF(FreeList,"Freeing int register %i.\n", freed_reg);
+
+ freeIntRegs.push(freed_reg);
+}
+
+inline void
+SimpleFreeList::addFloatReg(PhysRegIndex freed_reg)
+{
- #endif // __CPU_O3_CPU_FREE_LIST_HH__
++ DPRINTF(FreeList,"Freeing float register %i.\n", freed_reg);
+
+ freeFloatRegs.push(freed_reg);
+}
+
++#endif // __CPU_O3_FREE_LIST_HH__
--- /dev/null
- template class SimpleIEW<AlphaSimpleImpl>;
+/*
+ * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "cpu/o3/alpha_dyn_inst.hh"
+#include "cpu/o3/alpha_impl.hh"
+#include "cpu/o3/iew_impl.hh"
+#include "cpu/o3/inst_queue.hh"
+
++template class DefaultIEW<AlphaSimpleImpl>;
--- /dev/null
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
+/*
- //Todo: Update with statuses.
- //Need to handle delaying writes to the writeback bus if it's full at the
- //given time.
-
- #ifndef __CPU_O3_CPU_SIMPLE_IEW_HH__
- #define __CPU_O3_CPU_SIMPLE_IEW_HH__
++ * Copyright (c) 2004-2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
- #include "config/full_system.hh"
++#ifndef __CPU_O3_IEW_HH__
++#define __CPU_O3_IEW_HH__
+
+#include <queue>
+
-
+#include "base/statistics.hh"
+#include "base/timebuf.hh"
++#include "config/full_system.hh"
+#include "cpu/o3/comm.hh"
- class SimpleIEW
++#include "cpu/o3/scoreboard.hh"
++#include "cpu/o3/lsq.hh"
++
++class FUPool;
++
++/**
++ * DefaultIEW handles both single threaded and SMT IEW
++ * (issue/execute/writeback). It handles the dispatching of
++ * instructions to the LSQ/IQ as part of the issue stage, and has the
++ * IQ try to issue instructions each cycle. The execute latency is
++ * actually tied into the issue latency to allow the IQ to be able to
++ * do back-to-back scheduling without having to speculatively schedule
++ * instructions. This happens by having the IQ have access to the
++ * functional units, and the IQ gets the execution latencies from the
++ * FUs when it issues instructions. Instructions reach the execute
++ * stage on the last cycle of their execution, which is when the IQ
++ * knows to wake up any dependent instructions, allowing back to back
++ * scheduling. The execute portion of IEW separates memory
++ * instructions from non-memory instructions, either telling the LSQ
++ * to execute the instruction, or executing the instruction directly.
++ * The writeback portion of IEW completes the instructions by waking
++ * up any dependents, and marking the register ready on the
++ * scoreboard.
++ */
+template<class Impl>
- typedef typename CPUPol::LDSTQ LDSTQ;
++class DefaultIEW
+{
+ private:
+ //Typedefs from Impl
+ typedef typename Impl::CPUPol CPUPol;
+ typedef typename Impl::DynInstPtr DynInstPtr;
+ typedef typename Impl::FullCPU FullCPU;
+ typedef typename Impl::Params Params;
+
+ typedef typename CPUPol::IQ IQ;
+ typedef typename CPUPol::RenameMap RenameMap;
- Status _issueStatus;
- Status _exeStatus;
- Status _wbStatus;
++ typedef typename CPUPol::LSQ LSQ;
+
+ typedef typename CPUPol::TimeStruct TimeStruct;
+ typedef typename CPUPol::IEWStruct IEWStruct;
+ typedef typename CPUPol::RenameStruct RenameStruct;
+ typedef typename CPUPol::IssueStruct IssueStruct;
+
+ friend class Impl::FullCPU;
++ friend class CPUPol::IQ;
++
+ public:
++ /** Overall IEW stage status. Used to determine if the CPU can
++ * deschedule itself due to a lack of activity.
++ */
+ enum Status {
++ Active,
++ Inactive
++ };
++
++ /** Status for Issue, Execute, and Writeback stages. */
++ enum StageStatus {
+ Running,
+ Blocked,
+ Idle,
++ StartSquash,
+ Squashing,
+ Unblocking
+ };
+
+ private:
++ /** Overall stage status. */
+ Status _status;
- class WritebackEvent : public Event {
++ /** Dispatch status. */
++ StageStatus dispatchStatus[Impl::MaxThreads];
++ /** Execute status. */
++ StageStatus exeStatus;
++ /** Writeback status. */
++ StageStatus wbStatus;
+
+ public:
- SimpleIEW<Impl> *iewStage;
++ /** LdWriteback event for a load completion. */
++ class LdWritebackEvent : public Event {
+ private:
++ /** Instruction that is writing back data to the register file. */
+ DynInstPtr inst;
- WritebackEvent(DynInstPtr &_inst, SimpleIEW<Impl> *_iew);
++ /** Pointer to IEW stage. */
++ DefaultIEW<Impl> *iewStage;
+
+ public:
- SimpleIEW(Params ¶ms);
++ /** Constructs a load writeback event. */
++ LdWritebackEvent(DynInstPtr &_inst, DefaultIEW<Impl> *_iew);
+
++ /** Processes writeback event. */
+ virtual void process();
++ /** Returns the description of the writeback event. */
+ virtual const char *description();
+ };
+
+ public:
- void setRenameMap(RenameMap *rm_ptr);
++ /** Constructs a DefaultIEW with the given parameters. */
++ DefaultIEW(Params *params);
++
++ /** Returns the name of the DefaultIEW stage. */
++ std::string name() const;
+
++ /** Registers statistics. */
+ void regStats();
+
++ /** Initializes stage; sends back the number of free IQ and LSQ entries. */
++ void initStage();
++
++ /** Sets CPU pointer for IEW, IQ, and LSQ. */
+ void setCPU(FullCPU *cpu_ptr);
+
++ /** Sets main time buffer used for backwards communication. */
+ void setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr);
+
++ /** Sets time buffer for getting instructions coming from rename. */
+ void setRenameQueue(TimeBuffer<RenameStruct> *rq_ptr);
+
++ /** Sets time buffer to pass on instructions to commit. */
+ void setIEWQueue(TimeBuffer<IEWStruct> *iq_ptr);
+
- void squash();
++ /** Sets pointer to list of active threads. */
++ void setActiveThreads(std::list<unsigned> *at_ptr);
++
++ /** Sets pointer to the scoreboard. */
++ void setScoreboard(Scoreboard *sb_ptr);
++
++ void switchOut();
+
- void squashDueToBranch(DynInstPtr &inst);
++ void doSwitchOut();
+
- void squashDueToMem(DynInstPtr &inst);
++ void takeOverFrom();
+
- void block();
++ bool isSwitchedOut() { return switchedOut; }
+
- inline void unblock();
++ /** Sets page table pointer within LSQ. */
++// void setPageTable(PageTable *pt_ptr);
+
- void dispatchInsts();
++ /** Squashes instructions in IEW for a specific thread. */
++ void squash(unsigned tid);
+
++ /** Wakes all dependents of a completed instruction. */
+ void wakeDependents(DynInstPtr &inst);
+
++ /** Tells memory dependence unit that a memory instruction needs to be
++ * rescheduled. It will re-execute once replayMemInst() is called.
++ */
++ void rescheduleMemInst(DynInstPtr &inst);
++
++ /** Re-executes all rescheduled memory instructions. */
++ void replayMemInst(DynInstPtr &inst);
++
++ /** Sends an instruction to commit through the time buffer. */
+ void instToCommit(DynInstPtr &inst);
+
++ /** Inserts unused instructions of a thread into the skid buffer. */
++ void skidInsert(unsigned tid);
++
++ /** Returns the max of the number of entries in all of the skid buffers. */
++ int skidCount();
++
++ /** Returns if all of the skid buffers are empty. */
++ bool skidsEmpty();
++
++ /** Updates overall IEW status based on all of the stages' statuses. */
++ void updateStatus();
++
++ /** Resets entries of the IQ and the LSQ. */
++ void resetEntries();
++
++ /** Tells the CPU to wakeup if it has descheduled itself due to no
++ * activity. Used mainly by the LdWritebackEvent.
++ */
++ void wakeCPU();
++
++ /** Reports to the CPU that there is activity this cycle. */
++ void activityThisCycle();
++
++ /** Tells CPU that the IEW stage is active and running. */
++ inline void activateStage();
++
++ /** Tells CPU that the IEW stage is inactive and idle. */
++ inline void deactivateStage();
++
++ /** Returns if the LSQ has any stores to writeback. */
++ bool hasStoresToWB() { return ldstQueue.hasStoresToWB(); }
++
+ private:
- void iew();
++ /** Sends commit proper information for a squash due to a branch
++ * mispredict.
++ */
++ void squashDueToBranch(DynInstPtr &inst, unsigned thread_id);
+
++ /** Sends commit proper information for a squash due to a memory order
++ * violation.
++ */
++ void squashDueToMemOrder(DynInstPtr &inst, unsigned thread_id);
++
++ /** Sends commit proper information for a squash due to memory becoming
++ * blocked (younger issued instructions must be retried).
++ */
++ void squashDueToMemBlocked(DynInstPtr &inst, unsigned thread_id);
++
++ /** Sets Dispatch to blocked, and signals back to other stages to block. */
++ void block(unsigned thread_id);
++
++ /** Unblocks Dispatch if the skid buffer is empty, and signals back to
++ * other stages to unblock.
++ */
++ void unblock(unsigned thread_id);
++
++ /** Determines proper actions to take given Dispatch's status. */
++ void dispatch(unsigned tid);
++
++ /** Dispatches instructions to IQ and LSQ. */
++ void dispatchInsts(unsigned tid);
++
++ /** Executes instructions. In the case of memory operations, it informs the
++ * LSQ to execute the instructions. Also handles any redirects that occur
++ * due to the executed instructions.
++ */
+ void executeInsts();
+
++ /** Writebacks instructions. In our model, the instruction's execute()
++ * function atomically reads registers, executes, and writes registers.
++ * Thus this writeback only wakes up dependent instructions, and informs
++ * the scoreboard of registers becoming ready.
++ */
++ void writebackInsts();
++
++ /** Returns the number of valid, non-squashed instructions coming from
++ * rename to dispatch.
++ */
++ unsigned validInstsFromRename();
++
++ /** Reads the stall signals. */
++ void readStallSignals(unsigned tid);
++
++ /** Checks if any of the stall conditions are currently true. */
++ bool checkStall(unsigned tid);
++
++ /** Processes inputs and changes state accordingly. */
++ void checkSignalsAndUpdate(unsigned tid);
++
++ /** Sorts instructions coming from rename into lists separated by thread. */
++ void sortInsts();
++
+ public:
++ /** Ticks IEW stage, causing Dispatch, the IQ, the LSQ, Execute, and
++ * Writeback to run for one cycle.
++ */
+ void tick();
+
- //Interfaces to objects inside and outside of IEW.
- /** Time buffer interface. */
++ private:
++ void updateExeInstStats(DynInstPtr &inst);
+
- //Will need internal queue to hold onto instructions coming from
- //the rename stage in case of a stall.
++ /** Pointer to main time buffer used for backwards communication. */
+ TimeBuffer<TimeStruct> *timeBuffer;
+
++ /** Wire to write information heading to previous stages. */
++ typename TimeBuffer<TimeStruct>::wire toFetch;
++
+ /** Wire to get commit's output from backwards time buffer. */
+ typename TimeBuffer<TimeStruct>::wire fromCommit;
+
+ /** Wire to write information heading to previous stages. */
+ typename TimeBuffer<TimeStruct>::wire toRename;
+
+ /** Rename instruction queue interface. */
+ TimeBuffer<RenameStruct> *renameQueue;
+
+ /** Wire to get rename's output from rename queue. */
+ typename TimeBuffer<RenameStruct>::wire fromRename;
+
+ /** Issue stage queue. */
+ TimeBuffer<IssueStruct> issueToExecQueue;
+
+ /** Wire to read information from the issue stage time queue. */
+ typename TimeBuffer<IssueStruct>::wire fromIssue;
+
+ /**
+ * IEW stage time buffer. Holds ROB indices of instructions that
+ * can be marked as completed.
+ */
+ TimeBuffer<IEWStruct> *iewQueue;
+
+ /** Wire to write infromation heading to commit. */
+ typename TimeBuffer<IEWStruct>::wire toCommit;
+
- std::queue<RenameStruct> skidBuffer;
++ /** Queue of all instructions coming from rename this cycle. */
++ std::queue<DynInstPtr> insts[Impl::MaxThreads];
++
+ /** Skid buffer between rename and IEW. */
- protected:
++ std::queue<DynInstPtr> skidBuffer[Impl::MaxThreads];
+
- LDSTQ ldstQueue;
++ /** Scoreboard pointer. */
++ Scoreboard* scoreboard;
++
++ public:
+ /** Instruction queue. */
+ IQ instQueue;
+
- #if !FULL_SYSTEM
- public:
- void lsqWriteback();
- #endif
++ /** Load / store queue. */
++ LSQ ldstQueue;
+
- /** Pointer to rename map. Might not want this stage to directly
- * access this though...
++ /** Pointer to the functional unit pool. */
++ FUPool *fuPool;
+
+ private:
- RenameMap *renameMap;
++ /** CPU pointer. */
++ FullCPU *cpu;
++
++ /** Records if IEW has written to the time buffer this cycle, so that the
++ * CPU can deschedule itself if there is no activity.
+ */
- /** CPU interface. */
- FullCPU *cpu;
++ bool wroteToTimeBuffer;
+
- /** Number of cycles stage has been squashing. Used so that the stage
- * knows when it can start unblocking, which is when the previous stage
- * has received the stall signal and clears up its outputs.
++ /** Source of possible stalls. */
++ struct Stalls {
++ bool commit;
++ };
++
++ /** Stages that are telling IEW to stall. */
++ Stalls stalls[Impl::MaxThreads];
++
++ /** Debug function to print instructions that are issued this cycle. */
++ void printAvailableInsts();
++
++ public:
++ /** Records if the LSQ needs to be updated on the next cycle, so that
++ * IEW knows if there will be activity on the next cycle.
++ */
++ bool updateLSQNextCycle;
+
+ private:
++ /** Records if there is a fetch redirect on this cycle for each thread. */
++ bool fetchRedirect[Impl::MaxThreads];
++
++ /** Used to track if all instructions have been dispatched this cycle.
++ * If they have not, then blocking must have occurred, and the instructions
++ * would already be added to the skid buffer.
++ * @todo: Fix this hack.
++ */
++ bool dispatchedAllInsts;
++
++ /** Records if the queues have been changed (inserted or issued insts),
++ * so that IEW knows to broadcast the updated amount of free entries.
++ */
++ bool updatedQueues;
++
+ /** Commit to IEW delay, in ticks. */
+ unsigned commitToIEWDelay;
+
+ /** Rename to IEW delay, in ticks. */
+ unsigned renameToIEWDelay;
+
+ /**
+ * Issue to execute delay, in ticks. What this actually represents is
+ * the amount of time it takes for an instruction to wake up, be
+ * scheduled, and sent to a FU for execution.
+ */
+ unsigned issueToExecuteDelay;
+
+ /** Width of issue's read path, in instructions. The read path is both
+ * the skid buffer and the rename instruction queue.
+ * Note to self: is this really different than issueWidth?
+ */
+ unsigned issueReadWidth;
+
+ /** Width of issue, in instructions. */
+ unsigned issueWidth;
+
+ /** Width of execute, in instructions. Might make more sense to break
+ * down into FP vs int.
+ */
+ unsigned executeWidth;
+
- unsigned cyclesSquashing;
++ /** Index into queue of instructions being written back. */
++ unsigned wbNumInst;
++
++ /** Cycle number within the queue of instructions being written back.
++ * Used in case there are too many instructions writing back at the current
++ * cycle and writesbacks need to be scheduled for the future. See comments
++ * in instToCommit().
+ */
- // Stats::Scalar<> iewWBInsts;
++ unsigned wbCycle;
++
++ /** Number of active threads. */
++ unsigned numThreads;
++
++ /** Pointer to list of active threads. */
++ std::list<unsigned> *activeThreads;
++
++ /** Maximum size of the skid buffer. */
++ unsigned skidBufferMax;
+
++ bool switchedOut;
++
++ /** Stat for total number of idle cycles. */
+ Stats::Scalar<> iewIdleCycles;
++ /** Stat for total number of squashing cycles. */
+ Stats::Scalar<> iewSquashCycles;
++ /** Stat for total number of blocking cycles. */
+ Stats::Scalar<> iewBlockCycles;
++ /** Stat for total number of unblocking cycles. */
+ Stats::Scalar<> iewUnblockCycles;
- Stats::Scalar<> iewExecLoadInsts;
- Stats::Scalar<> iewExecStoreInsts;
++ /** Stat for total number of instructions dispatched. */
+ Stats::Scalar<> iewDispatchedInsts;
++ /** Stat for total number of squashed instructions dispatch skips. */
+ Stats::Scalar<> iewDispSquashedInsts;
++ /** Stat for total number of dispatched load instructions. */
+ Stats::Scalar<> iewDispLoadInsts;
++ /** Stat for total number of dispatched store instructions. */
+ Stats::Scalar<> iewDispStoreInsts;
++ /** Stat for total number of dispatched non speculative instructions. */
+ Stats::Scalar<> iewDispNonSpecInsts;
++ /** Stat for number of times the IQ becomes full. */
+ Stats::Scalar<> iewIQFullEvents;
++ /** Stat for number of times the LSQ becomes full. */
++ Stats::Scalar<> iewLSQFullEvents;
++ /** Stat for total number of executed instructions. */
+ Stats::Scalar<> iewExecutedInsts;
- #endif // __CPU_O3_CPU_IEW_HH__
++ /** Stat for total number of executed load instructions. */
++ Stats::Vector<> iewExecLoadInsts;
++ /** Stat for total number of executed store instructions. */
++// Stats::Scalar<> iewExecStoreInsts;
++ /** Stat for total number of squashed instructions skipped at execute. */
+ Stats::Scalar<> iewExecSquashedInsts;
++ /** Stat for total number of memory ordering violation events. */
+ Stats::Scalar<> memOrderViolationEvents;
++ /** Stat for total number of incorrect predicted taken branches. */
+ Stats::Scalar<> predictedTakenIncorrect;
++ /** Stat for total number of incorrect predicted not taken branches. */
++ Stats::Scalar<> predictedNotTakenIncorrect;
++ /** Stat for total number of mispredicted branches detected at execute. */
++ Stats::Formula branchMispredicts;
++
++ Stats::Vector<> exeSwp;
++ Stats::Vector<> exeNop;
++ Stats::Vector<> exeRefs;
++ Stats::Vector<> exeBranches;
++
++// Stats::Vector<> issued_ops;
++/*
++ Stats::Vector<> stat_fu_busy;
++ Stats::Vector2d<> stat_fuBusy;
++ Stats::Vector<> dist_unissued;
++ Stats::Vector2d<> stat_issued_inst_type;
++*/
++ Stats::Formula issueRate;
++ Stats::Formula iewExecStoreInsts;
++// Stats::Formula issue_op_rate;
++// Stats::Formula fu_busy_rate;
++
++ Stats::Vector<> iewInstsToCommit;
++ Stats::Vector<> writebackCount;
++ Stats::Vector<> producerInst;
++ Stats::Vector<> consumerInst;
++ Stats::Vector<> wbPenalized;
++
++ Stats::Formula wbRate;
++ Stats::Formula wbFanout;
++ Stats::Formula wbPenalizedRate;
+};
+
++#endif // __CPU_O3_IEW_HH__
--- /dev/null
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
+/*
- // Update the statuses for each stage.
++ * Copyright (c) 2004-2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+// @todo: Fix the instantaneous communication among all the stages within
+// iew. There's a clear delay between issue and execute, yet backwards
+// communication happens simultaneously.
- SimpleIEW<Impl>::WritebackEvent::WritebackEvent(DynInstPtr &_inst,
- SimpleIEW<Impl> *_iew)
- : Event(&mainEventQueue, CPU_Tick_Pri), inst(_inst), iewStage(_iew)
+
+#include <queue>
+
+#include "base/timebuf.hh"
++#include "cpu/o3/fu_pool.hh"
+#include "cpu/o3/iew.hh"
+
++using namespace std;
++
+template<class Impl>
- SimpleIEW<Impl>::WritebackEvent::process()
++DefaultIEW<Impl>::LdWritebackEvent::LdWritebackEvent(DynInstPtr &_inst,
++ DefaultIEW<Impl> *_iew)
++ : Event(&mainEventQueue), inst(_inst), iewStage(_iew)
+{
+ this->setFlags(Event::AutoDelete);
+}
+
+template<class Impl>
+void
- DPRINTF(IEW, "IEW: WRITEBACK EVENT!!!!\n");
++DefaultIEW<Impl>::LdWritebackEvent::process()
+{
- // Need to execute second half of the instruction, do actual writing to
- // registers and such
- inst->execute();
++ DPRINTF(IEW, "Load writeback event [sn:%lli]\n", inst->seqNum);
++ DPRINTF(Activity, "Activity: Ld Writeback event [sn:%lli]\n", inst->seqNum);
++
++ //iewStage->ldstQueue.removeMSHR(inst->threadNumber,inst->seqNum);
++
++ if (iewStage->isSwitchedOut()) {
++ inst = NULL;
++ return;
++ } else if (inst->isSquashed()) {
++ iewStage->wakeCPU();
++ inst = NULL;
++ return;
++ }
++
++ iewStage->wakeCPU();
++
++ if (!inst->isExecuted()) {
++ inst->setExecuted();
++
++ // Complete access to copy data to proper place.
++ if (inst->isStore()) {
++ inst->completeAcc();
++ }
++ }
+
+ // Need to insert instruction into queue to commit
+ iewStage->instToCommit(inst);
- SimpleIEW<Impl>::WritebackEvent::description()
++
++ iewStage->activityThisCycle();
++
++ inst = NULL;
+}
+
+template<class Impl>
+const char *
- return "LSQ writeback event";
++DefaultIEW<Impl>::LdWritebackEvent::description()
+{
- SimpleIEW<Impl>::SimpleIEW(Params ¶ms)
- : // Just make this time buffer really big for now
++ return "Load writeback event";
+}
+
+template<class Impl>
- commitToIEWDelay(params.commitToIEWDelay),
- renameToIEWDelay(params.renameToIEWDelay),
- issueToExecuteDelay(params.issueToExecuteDelay),
- issueReadWidth(params.issueWidth),
- issueWidth(params.issueWidth),
- executeWidth(params.executeWidth)
- {
- DPRINTF(IEW, "IEW: executeIntWidth: %i.\n", params.executeIntWidth);
- _status = Idle;
- _issueStatus = Idle;
- _exeStatus = Idle;
- _wbStatus = Idle;
++DefaultIEW<Impl>::DefaultIEW(Params *params)
++ : // @todo: Make this into a parameter.
+ issueToExecQueue(5, 5),
+ instQueue(params),
+ ldstQueue(params),
- SimpleIEW<Impl>::regStats()
++ fuPool(params->fuPool),
++ commitToIEWDelay(params->commitToIEWDelay),
++ renameToIEWDelay(params->renameToIEWDelay),
++ issueToExecuteDelay(params->issueToExecuteDelay),
++ issueReadWidth(params->issueWidth),
++ issueWidth(params->issueWidth),
++ executeWidth(params->executeWidth),
++ numThreads(params->numberOfThreads),
++ switchedOut(false)
++{
++ _status = Active;
++ exeStatus = Running;
++ wbStatus = Idle;
+
+ // Setup wire to read instructions coming from issue.
+ fromIssue = issueToExecQueue.getWire(-issueToExecuteDelay);
+
+ // Instruction queue needs the queue between issue and execute.
+ instQueue.setIssueToExecuteQueue(&issueToExecQueue);
+
++ instQueue.setIEW(this);
+ ldstQueue.setIEW(this);
++
++ for (int i=0; i < numThreads; i++) {
++ dispatchStatus[i] = Running;
++ stalls[i].commit = false;
++ fetchRedirect[i] = false;
++ }
++
++ updateLSQNextCycle = false;
++
++ skidBufferMax = (3 * (renameToIEWDelay * params->renameWidth)) + issueWidth;
++}
++
++template <class Impl>
++std::string
++DefaultIEW<Impl>::name() const
++{
++ return cpu->name() + ".iew";
+}
+
+template <class Impl>
+void
- // iewWBInsts;
-
++DefaultIEW<Impl>::regStats()
+{
++ using namespace Stats;
++
+ instQueue.regStats();
+
+ iewIdleCycles
+ .name(name() + ".iewIdleCycles")
+ .desc("Number of cycles IEW is idle");
+
+ iewSquashCycles
+ .name(name() + ".iewSquashCycles")
+ .desc("Number of cycles IEW is squashing");
+
+ iewBlockCycles
+ .name(name() + ".iewBlockCycles")
+ .desc("Number of cycles IEW is blocking");
+
+ iewUnblockCycles
+ .name(name() + ".iewUnblockCycles")
+ .desc("Number of cycles IEW is unblocking");
+
- .desc("Number of load instructions executed");
-
- iewExecStoreInsts
- .name(name() + ".iewExecStoreInsts")
- .desc("Number of store instructions executed");
+ iewDispatchedInsts
+ .name(name() + ".iewDispatchedInsts")
+ .desc("Number of instructions dispatched to IQ");
+
+ iewDispSquashedInsts
+ .name(name() + ".iewDispSquashedInsts")
+ .desc("Number of squashed instructions skipped by dispatch");
+
+ iewDispLoadInsts
+ .name(name() + ".iewDispLoadInsts")
+ .desc("Number of dispatched load instructions");
+
+ iewDispStoreInsts
+ .name(name() + ".iewDispStoreInsts")
+ .desc("Number of dispatched store instructions");
+
+ iewDispNonSpecInsts
+ .name(name() + ".iewDispNonSpecInsts")
+ .desc("Number of dispatched non-speculative instructions");
+
+ iewIQFullEvents
+ .name(name() + ".iewIQFullEvents")
+ .desc("Number of times the IQ has become full, causing a stall");
+
++ iewLSQFullEvents
++ .name(name() + ".iewLSQFullEvents")
++ .desc("Number of times the LSQ has become full, causing a stall");
++
+ iewExecutedInsts
+ .name(name() + ".iewExecutedInsts")
+ .desc("Number of executed instructions");
+
+ iewExecLoadInsts
++ .init(cpu->number_of_threads)
+ .name(name() + ".iewExecLoadInsts")
- SimpleIEW<Impl>::setCPU(FullCPU *cpu_ptr)
++ .desc("Number of load instructions executed")
++ .flags(total);
+
+ iewExecSquashedInsts
+ .name(name() + ".iewExecSquashedInsts")
+ .desc("Number of squashed instructions skipped in execute");
+
+ memOrderViolationEvents
+ .name(name() + ".memOrderViolationEvents")
+ .desc("Number of memory order violations");
+
+ predictedTakenIncorrect
+ .name(name() + ".predictedTakenIncorrect")
+ .desc("Number of branches that were predicted taken incorrectly");
++
++ predictedNotTakenIncorrect
++ .name(name() + ".predictedNotTakenIncorrect")
++ .desc("Number of branches that were predicted not taken incorrectly");
++
++ branchMispredicts
++ .name(name() + ".branchMispredicts")
++ .desc("Number of branch mispredicts detected at execute");
++
++ branchMispredicts = predictedTakenIncorrect + predictedNotTakenIncorrect;
++
++ exeSwp
++ .init(cpu->number_of_threads)
++ .name(name() + ".EXEC:swp")
++ .desc("number of swp insts executed")
++ .flags(total)
++ ;
++
++ exeNop
++ .init(cpu->number_of_threads)
++ .name(name() + ".EXEC:nop")
++ .desc("number of nop insts executed")
++ .flags(total)
++ ;
++
++ exeRefs
++ .init(cpu->number_of_threads)
++ .name(name() + ".EXEC:refs")
++ .desc("number of memory reference insts executed")
++ .flags(total)
++ ;
++
++ exeBranches
++ .init(cpu->number_of_threads)
++ .name(name() + ".EXEC:branches")
++ .desc("Number of branches executed")
++ .flags(total)
++ ;
++
++ issueRate
++ .name(name() + ".EXEC:rate")
++ .desc("Inst execution rate")
++ .flags(total)
++ ;
++ issueRate = iewExecutedInsts / cpu->numCycles;
++
++ iewExecStoreInsts
++ .name(name() + ".EXEC:stores")
++ .desc("Number of stores executed")
++ .flags(total)
++ ;
++ iewExecStoreInsts = exeRefs - iewExecLoadInsts;
++/*
++ for (int i=0; i<Num_OpClasses; ++i) {
++ stringstream subname;
++ subname << opClassStrings[i] << "_delay";
++ issue_delay_dist.subname(i, subname.str());
++ }
++*/
++ //
++ // Other stats
++ //
++
++ iewInstsToCommit
++ .init(cpu->number_of_threads)
++ .name(name() + ".WB:sent")
++ .desc("cumulative count of insts sent to commit")
++ .flags(total)
++ ;
++
++ writebackCount
++ .init(cpu->number_of_threads)
++ .name(name() + ".WB:count")
++ .desc("cumulative count of insts written-back")
++ .flags(total)
++ ;
++
++ producerInst
++ .init(cpu->number_of_threads)
++ .name(name() + ".WB:producers")
++ .desc("num instructions producing a value")
++ .flags(total)
++ ;
++
++ consumerInst
++ .init(cpu->number_of_threads)
++ .name(name() + ".WB:consumers")
++ .desc("num instructions consuming a value")
++ .flags(total)
++ ;
++
++ wbPenalized
++ .init(cpu->number_of_threads)
++ .name(name() + ".WB:penalized")
++ .desc("number of instrctions required to write to 'other' IQ")
++ .flags(total)
++ ;
++
++ wbPenalizedRate
++ .name(name() + ".WB:penalized_rate")
++ .desc ("fraction of instructions written-back that wrote to 'other' IQ")
++ .flags(total)
++ ;
++
++ wbPenalizedRate = wbPenalized / writebackCount;
++
++ wbFanout
++ .name(name() + ".WB:fanout")
++ .desc("average fanout of values written-back")
++ .flags(total)
++ ;
++
++ wbFanout = producerInst / consumerInst;
++
++ wbRate
++ .name(name() + ".WB:rate")
++ .desc("insts written-back per cycle")
++ .flags(total)
++ ;
++ wbRate = writebackCount / cpu->numCycles;
+}
+
+template<class Impl>
+void
- DPRINTF(IEW, "IEW: Setting CPU pointer.\n");
++DefaultIEW<Impl>::initStage()
+{
- SimpleIEW<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr)
++ for (int tid=0; tid < numThreads; tid++) {
++ toRename->iewInfo[tid].usedIQ = true;
++ toRename->iewInfo[tid].freeIQEntries =
++ instQueue.numFreeEntries(tid);
++
++ toRename->iewInfo[tid].usedLSQ = true;
++ toRename->iewInfo[tid].freeLSQEntries =
++ ldstQueue.numFreeEntries(tid);
++ }
++}
++
++template<class Impl>
++void
++DefaultIEW<Impl>::setCPU(FullCPU *cpu_ptr)
++{
++ DPRINTF(IEW, "Setting CPU pointer.\n");
+ cpu = cpu_ptr;
+
+ instQueue.setCPU(cpu_ptr);
+ ldstQueue.setCPU(cpu_ptr);
++
++ cpu->activateStage(FullCPU::IEWIdx);
+}
+
+template<class Impl>
+void
- DPRINTF(IEW, "IEW: Setting time buffer pointer.\n");
++DefaultIEW<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr)
+{
- SimpleIEW<Impl>::setRenameQueue(TimeBuffer<RenameStruct> *rq_ptr)
++ DPRINTF(IEW, "Setting time buffer pointer.\n");
+ timeBuffer = tb_ptr;
+
+ // Setup wire to read information from time buffer, from commit.
+ fromCommit = timeBuffer->getWire(-commitToIEWDelay);
+
+ // Setup wire to write information back to previous stages.
+ toRename = timeBuffer->getWire(0);
+
++ toFetch = timeBuffer->getWire(0);
++
+ // Instruction queue also needs main time buffer.
+ instQueue.setTimeBuffer(tb_ptr);
+}
+
+template<class Impl>
+void
- DPRINTF(IEW, "IEW: Setting rename queue pointer.\n");
++DefaultIEW<Impl>::setRenameQueue(TimeBuffer<RenameStruct> *rq_ptr)
+{
- SimpleIEW<Impl>::setIEWQueue(TimeBuffer<IEWStruct> *iq_ptr)
++ DPRINTF(IEW, "Setting rename queue pointer.\n");
+ renameQueue = rq_ptr;
+
+ // Setup wire to read information from rename queue.
+ fromRename = renameQueue->getWire(-renameToIEWDelay);
+}
+
+template<class Impl>
+void
- DPRINTF(IEW, "IEW: Setting IEW queue pointer.\n");
++DefaultIEW<Impl>::setIEWQueue(TimeBuffer<IEWStruct> *iq_ptr)
+{
- SimpleIEW<Impl>::setRenameMap(RenameMap *rm_ptr)
++ DPRINTF(IEW, "Setting IEW queue pointer.\n");
+ iewQueue = iq_ptr;
+
+ // Setup wire to write instructions to commit.
+ toCommit = iewQueue->getWire(0);
+}
+
+template<class Impl>
+void
- DPRINTF(IEW, "IEW: Setting rename map pointer.\n");
- renameMap = rm_ptr;
++DefaultIEW<Impl>::setActiveThreads(list<unsigned> *at_ptr)
++{
++ DPRINTF(IEW, "Setting active threads list pointer.\n");
++ activeThreads = at_ptr;
++
++ ldstQueue.setActiveThreads(at_ptr);
++ instQueue.setActiveThreads(at_ptr);
++}
++
++template<class Impl>
++void
++DefaultIEW<Impl>::setScoreboard(Scoreboard *sb_ptr)
++{
++ DPRINTF(IEW, "Setting scoreboard pointer.\n");
++ scoreboard = sb_ptr;
++}
++
++#if 0
++template<class Impl>
++void
++DefaultIEW<Impl>::setPageTable(PageTable *pt_ptr)
++{
++ ldstQueue.setPageTable(pt_ptr);
++}
++#endif
++
++template <class Impl>
++void
++DefaultIEW<Impl>::switchOut()
++{
++ cpu->signalSwitched();
++}
++
++template <class Impl>
++void
++DefaultIEW<Impl>::doSwitchOut()
+{
- SimpleIEW<Impl>::squash()
++ switchedOut = true;
++
++ instQueue.switchOut();
++ ldstQueue.switchOut();
++ fuPool->switchOut();
++
++ for (int i = 0; i < numThreads; i++) {
++ while (!insts[i].empty())
++ insts[i].pop();
++ while (!skidBuffer[i].empty())
++ skidBuffer[i].pop();
++ }
++}
++
++template <class Impl>
++void
++DefaultIEW<Impl>::takeOverFrom()
++{
++ _status = Active;
++ exeStatus = Running;
++ wbStatus = Idle;
++ switchedOut = false;
++
++ instQueue.takeOverFrom();
++ ldstQueue.takeOverFrom();
++ fuPool->takeOverFrom();
++
++ initStage();
++ cpu->activityThisCycle();
++
++ for (int i=0; i < numThreads; i++) {
++ dispatchStatus[i] = Running;
++ stalls[i].commit = false;
++ fetchRedirect[i] = false;
++ }
++
++ updateLSQNextCycle = false;
++
++ // @todo: Fix hardcoded number
++ for (int i = 0; i < 6; ++i) {
++ issueToExecQueue.advance();
++ }
+}
+
+template<class Impl>
+void
- DPRINTF(IEW, "IEW: Squashing all instructions.\n");
- _status = Squashing;
++DefaultIEW<Impl>::squash(unsigned tid)
+{
- instQueue.squash();
++ DPRINTF(IEW, "[tid:%i]: Squashing all instructions.\n",
++ tid);
+
+ // Tell the IQ to start squashing.
- ldstQueue.squash(fromCommit->commitInfo.doneSeqNum);
++ instQueue.squash(tid);
+
+ // Tell the LDSTQ to start squashing.
- SimpleIEW<Impl>::squashDueToBranch(DynInstPtr &inst)
- {
- DPRINTF(IEW, "IEW: Squashing from a specific instruction, PC: %#x.\n",
- inst->PC);
- // Perhaps leave the squashing up to the ROB stage to tell it when to
- // squash?
- _status = Squashing;
-
- // Tell rename to squash through the time buffer.
- toCommit->squash = true;
- // Also send PC update information back to prior stages.
- toCommit->squashedSeqNum = inst->seqNum;
- toCommit->mispredPC = inst->readPC();
- toCommit->nextPC = inst->readNextPC();
- toCommit->branchMispredict = true;
- // Prediction was incorrect, so send back inverse.
- toCommit->branchTaken = inst->readNextPC() !=
++ ldstQueue.squash(fromCommit->commitInfo[tid].doneSeqNum, tid);
++
++ updatedQueues = true;
++
++ // Clear the skid buffer in case it has any data in it.
++ while (!skidBuffer[tid].empty()) {
++
++ if (skidBuffer[tid].front()->isLoad() ||
++ skidBuffer[tid].front()->isStore() ) {
++ toRename->iewInfo[tid].dispatchedToLSQ++;
++ }
++
++ toRename->iewInfo[tid].dispatched++;
++
++ skidBuffer[tid].pop();
++ }
++
++ while (!insts[tid].empty()) {
++ if (insts[tid].front()->isLoad() ||
++ insts[tid].front()->isStore() ) {
++ toRename->iewInfo[tid].dispatchedToLSQ++;
++ }
++
++ toRename->iewInfo[tid].dispatched++;
++
++ insts[tid].pop();
++ }
+}
+
+template<class Impl>
+void
- SimpleIEW<Impl>::squashDueToMem(DynInstPtr &inst)
++DefaultIEW<Impl>::squashDueToBranch(DynInstPtr &inst, unsigned tid)
++{
++ DPRINTF(IEW, "[tid:%i]: Squashing from a specific instruction, PC: %#x "
++ "[sn:%i].\n", tid, inst->readPC(), inst->seqNum);
++
++ toCommit->squash[tid] = true;
++ toCommit->squashedSeqNum[tid] = inst->seqNum;
++ toCommit->mispredPC[tid] = inst->readPC();
++ toCommit->nextPC[tid] = inst->readNextPC();
++ toCommit->branchMispredict[tid] = true;
++ toCommit->branchTaken[tid] = inst->readNextPC() !=
+ (inst->readPC() + sizeof(TheISA::MachInst));
++
++ toCommit->includeSquashInst[tid] = false;
++
++ wroteToTimeBuffer = true;
++}
++
++template<class Impl>
++void
++DefaultIEW<Impl>::squashDueToMemOrder(DynInstPtr &inst, unsigned tid)
++{
++ DPRINTF(IEW, "[tid:%i]: Squashing from a specific instruction, "
++ "PC: %#x [sn:%i].\n", tid, inst->readPC(), inst->seqNum);
++
++ toCommit->squash[tid] = true;
++ toCommit->squashedSeqNum[tid] = inst->seqNum;
++ toCommit->nextPC[tid] = inst->readNextPC();
++
++ toCommit->includeSquashInst[tid] = false;
++
++ wroteToTimeBuffer = true;
+}
+
+template<class Impl>
+void
- DPRINTF(IEW, "IEW: Squashing from a specific instruction, PC: %#x.\n",
- inst->PC);
- // Perhaps leave the squashing up to the ROB stage to tell it when to
- // squash?
- _status = Squashing;
++DefaultIEW<Impl>::squashDueToMemBlocked(DynInstPtr &inst, unsigned tid)
+{
- // Tell rename to squash through the time buffer.
- toCommit->squash = true;
- // Also send PC update information back to prior stages.
- toCommit->squashedSeqNum = inst->seqNum;
- toCommit->nextPC = inst->readNextPC();
++ DPRINTF(IEW, "[tid:%i]: Memory blocked, squashing load and younger insts, "
++ "PC: %#x [sn:%i].\n", tid, inst->readPC(), inst->seqNum);
++
++ toCommit->squash[tid] = true;
++ toCommit->squashedSeqNum[tid] = inst->seqNum;
++ toCommit->nextPC[tid] = inst->readPC();
++
++ toCommit->includeSquashInst[tid] = true;
+
- SimpleIEW<Impl>::block()
++ ldstQueue.setLoadBlockedHandled(tid);
++
++ wroteToTimeBuffer = true;
+}
+
+template<class Impl>
+void
- DPRINTF(IEW, "IEW: Blocking.\n");
- // Set the status to Blocked.
- _status = Blocked;
++DefaultIEW<Impl>::block(unsigned tid)
+{
- skidBuffer.push(*fromRename);
++ DPRINTF(IEW, "[tid:%u]: Blocking.\n", tid);
++
++ if (dispatchStatus[tid] != Blocked &&
++ dispatchStatus[tid] != Unblocking) {
++ toRename->iewBlock[tid] = true;
++ wroteToTimeBuffer = true;
++ }
+
+ // Add the current inputs to the skid buffer so they can be
+ // reprocessed when this stage unblocks.
- // Note that this stage only signals previous stages to stall when
- // it is the cause of the stall originates at this stage. Otherwise
- // the previous stages are expected to check all possible stall signals.
++ skidInsert(tid);
+
- inline void
- SimpleIEW<Impl>::unblock()
++ dispatchStatus[tid] = Blocked;
+}
+
+template<class Impl>
- // Check if there's information in the skid buffer. If there is, then
- // set status to unblocking, otherwise set it directly to running.
- DPRINTF(IEW, "IEW: Reading instructions out of the skid "
- "buffer.\n");
- // Remove the now processed instructions from the skid buffer.
- skidBuffer.pop();
-
- // If there's still information in the skid buffer, then
- // continue to tell previous stages to stall. They will be
- // able to restart once the skid buffer is empty.
- if (!skidBuffer.empty()) {
- toRename->iewInfo.stall = true;
- } else {
- DPRINTF(IEW, "IEW: Stage is done unblocking.\n");
- _status = Running;
++void
++DefaultIEW<Impl>::unblock(unsigned tid)
+{
- SimpleIEW<Impl>::wakeDependents(DynInstPtr &inst)
++ DPRINTF(IEW, "[tid:%i]: Reading instructions out of the skid "
++ "buffer %u.\n",tid, tid);
++
++ // If the skid bufffer is empty, signal back to previous stages to unblock.
++ // Also switch status to running.
++ if (skidBuffer[tid].empty()) {
++ toRename->iewUnblock[tid] = true;
++ wroteToTimeBuffer = true;
++ DPRINTF(IEW, "[tid:%i]: Done unblocking.\n",tid);
++ dispatchStatus[tid] = Running;
+ }
+}
+
+template<class Impl>
+void
- SimpleIEW<Impl>::instToCommit(DynInstPtr &inst)
++DefaultIEW<Impl>::wakeDependents(DynInstPtr &inst)
+{
+ instQueue.wakeDependents(inst);
+}
+
++template<class Impl>
++void
++DefaultIEW<Impl>::rescheduleMemInst(DynInstPtr &inst)
++{
++ instQueue.rescheduleMemInst(inst);
++}
++
++template<class Impl>
++void
++DefaultIEW<Impl>::replayMemInst(DynInstPtr &inst)
++{
++ instQueue.replayMemInst(inst);
++}
+
+template<class Impl>
+void
- SimpleIEW<Impl>::dispatchInsts()
- {
- ////////////////////////////////////////
- // DISPATCH/ISSUE stage
- ////////////////////////////////////////
-
- //Put into its own function?
- //Add instructions to IQ if there are any instructions there
-
- // Check if there are any instructions coming from rename, and we're.
- // not squashing.
- if (fromRename->size > 0) {
- int insts_to_add = fromRename->size;
-
- // Loop through the instructions, putting them in the instruction
- // queue.
- for (int inst_num = 0; inst_num < insts_to_add; ++inst_num)
- {
- DynInstPtr inst = fromRename->insts[inst_num];
-
- // Make sure there's a valid instruction there.
- assert(inst);
-
- DPRINTF(IEW, "IEW: Issue: Adding PC %#x to IQ.\n",
- inst->readPC());
-
- // Be sure to mark these instructions as ready so that the
- // commit stage can go ahead and execute them, and mark
- // them as issued so the IQ doesn't reprocess them.
- if (inst->isSquashed()) {
- ++iewDispSquashedInsts;
- continue;
- } else if (instQueue.isFull()) {
- DPRINTF(IEW, "IEW: Issue: IQ has become full.\n");
- // Call function to start blocking.
- block();
- // Tell previous stage to stall.
- toRename->iewInfo.stall = true;
-
- ++iewIQFullEvents;
- break;
- } else if (inst->isLoad()) {
- DPRINTF(IEW, "IEW: Issue: Memory instruction "
- "encountered, adding to LDSTQ.\n");
-
- // Reserve a spot in the load store queue for this
- // memory access.
- ldstQueue.insertLoad(inst);
-
- ++iewDispLoadInsts;
- } else if (inst->isStore()) {
- ldstQueue.insertStore(inst);
++DefaultIEW<Impl>::instToCommit(DynInstPtr &inst)
+{
++ // First check the time slot that this instruction will write
++ // to. If there are free write ports at the time, then go ahead
++ // and write the instruction to that time. If there are not,
++ // keep looking back to see where's the first time there's a
++ // free slot.
++ while ((*iewQueue)[wbCycle].insts[wbNumInst]) {
++ ++wbNumInst;
++ if (wbNumInst == issueWidth) {
++ ++wbCycle;
++ wbNumInst = 0;
++ }
+
++ assert(wbCycle < 5);
++ }
++
++ // Add finished instruction to queue to commit.
++ (*iewQueue)[wbCycle].insts[wbNumInst] = inst;
++ (*iewQueue)[wbCycle].size++;
+}
+
+template <class Impl>
++unsigned
++DefaultIEW<Impl>::validInstsFromRename()
++{
++ unsigned inst_count = 0;
++
++ for (int i=0; i<fromRename->size; i++) {
++ if (!fromRename->insts[i]->squashed)
++ inst_count++;
++ }
++
++ return inst_count;
++}
++
++template<class Impl>
+void
- ++iewDispStoreInsts;
- } else if (inst->isNonSpeculative()) {
- DPRINTF(IEW, "IEW: Issue: Nonspeculative instruction "
- "encountered, skipping.\n");
++DefaultIEW<Impl>::skidInsert(unsigned tid)
++{
++ DynInstPtr inst = NULL;
+
- // Same hack as with stores.
- inst->setCanCommit();
++ while (!insts[tid].empty()) {
++ inst = insts[tid].front();
+
- // Specificall insert it as nonspeculative.
++ insts[tid].pop();
++
++ DPRINTF(Decode,"[tid:%i]: Inserting [sn:%lli] PC:%#x into "
++ "dispatch skidBuffer %i\n",tid, inst->seqNum,
++ inst->readPC(),tid);
++
++ skidBuffer[tid].push(inst);
++ }
++
++ assert(skidBuffer[tid].size() <= skidBufferMax &&
++ "Skidbuffer Exceeded Max Size");
++}
++
++template<class Impl>
++int
++DefaultIEW<Impl>::skidCount()
++{
++ int max=0;
++
++ list<unsigned>::iterator threads = (*activeThreads).begin();
++
++ while (threads != (*activeThreads).end()) {
++ unsigned thread_count = skidBuffer[*threads++].size();
++ if (max < thread_count)
++ max = thread_count;
++ }
++
++ return max;
++}
++
++template<class Impl>
++bool
++DefaultIEW<Impl>::skidsEmpty()
++{
++ list<unsigned>::iterator threads = (*activeThreads).begin();
++
++ while (threads != (*activeThreads).end()) {
++ if (!skidBuffer[*threads++].empty())
++ return false;
++ }
++
++ return true;
++}
++
++template <class Impl>
++void
++DefaultIEW<Impl>::updateStatus()
++{
++ bool any_unblocking = false;
++
++ list<unsigned>::iterator threads = (*activeThreads).begin();
++
++ threads = (*activeThreads).begin();
++
++ while (threads != (*activeThreads).end()) {
++ unsigned tid = *threads++;
++
++ if (dispatchStatus[tid] == Unblocking) {
++ any_unblocking = true;
++ break;
++ }
++ }
++
++ // If there are no ready instructions waiting to be scheduled by the IQ,
++ // and there's no stores waiting to write back, and dispatch is not
++ // unblocking, then there is no internal activity for the IEW stage.
++ if (_status == Active && !instQueue.hasReadyInsts() &&
++ !ldstQueue.willWB() && !any_unblocking) {
++ DPRINTF(IEW, "IEW switching to idle\n");
++
++ deactivateStage();
++
++ _status = Inactive;
++ } else if (_status == Inactive && (instQueue.hasReadyInsts() ||
++ ldstQueue.willWB() ||
++ any_unblocking)) {
++ // Otherwise there is internal activity. Set to active.
++ DPRINTF(IEW, "IEW switching to active\n");
++
++ activateStage();
++
++ _status = Active;
++ }
++}
++
++template <class Impl>
++void
++DefaultIEW<Impl>::resetEntries()
++{
++ instQueue.resetEntries();
++ ldstQueue.resetEntries();
++}
++
++template <class Impl>
++void
++DefaultIEW<Impl>::readStallSignals(unsigned tid)
++{
++ if (fromCommit->commitBlock[tid]) {
++ stalls[tid].commit = true;
++ }
++
++ if (fromCommit->commitUnblock[tid]) {
++ assert(stalls[tid].commit);
++ stalls[tid].commit = false;
++ }
++}
++
++template <class Impl>
++bool
++DefaultIEW<Impl>::checkStall(unsigned tid)
++{
++ bool ret_val(false);
++
++ if (stalls[tid].commit) {
++ DPRINTF(IEW,"[tid:%i]: Stall from Commit stage detected.\n",tid);
++ ret_val = true;
++ } else if (instQueue.isFull(tid)) {
++ DPRINTF(IEW,"[tid:%i]: Stall: IQ is full.\n",tid);
++ ret_val = true;
++ } else if (ldstQueue.isFull(tid)) {
++ DPRINTF(IEW,"[tid:%i]: Stall: LSQ is full\n",tid);
++
++ if (ldstQueue.numLoads(tid) > 0 ) {
++
++ DPRINTF(IEW,"[tid:%i]: LSQ oldest load: [sn:%i] \n",
++ tid,ldstQueue.getLoadHeadSeqNum(tid));
++ }
++
++ if (ldstQueue.numStores(tid) > 0) {
++
++ DPRINTF(IEW,"[tid:%i]: LSQ oldest store: [sn:%i] \n",
++ tid,ldstQueue.getStoreHeadSeqNum(tid));
++ }
++
++ ret_val = true;
++ } else if (ldstQueue.isStalled(tid)) {
++ DPRINTF(IEW,"[tid:%i]: Stall: LSQ stall detected.\n",tid);
++ ret_val = true;
++ }
++
++ return ret_val;
++}
++
++template <class Impl>
++void
++DefaultIEW<Impl>::checkSignalsAndUpdate(unsigned tid)
++{
++ // Check if there's a squash signal, squash if there is
++ // Check stall signals, block if there is.
++ // If status was Blocked
++ // if so then go to unblocking
++ // If status was Squashing
++ // check if squashing is not high. Switch to running this cycle.
++
++ readStallSignals(tid);
++
++ if (fromCommit->commitInfo[tid].squash) {
++ squash(tid);
++
++ if (dispatchStatus[tid] == Blocked ||
++ dispatchStatus[tid] == Unblocking) {
++ toRename->iewUnblock[tid] = true;
++ wroteToTimeBuffer = true;
++ }
++
++ dispatchStatus[tid] = Squashing;
++
++ fetchRedirect[tid] = false;
++ return;
++ }
++
++ if (fromCommit->commitInfo[tid].robSquashing) {
++ DPRINTF(IEW, "[tid:%i]: ROB is still squashing.\n");
++
++ dispatchStatus[tid] = Squashing;
++
++ return;
++ }
++
++ if (checkStall(tid)) {
++ block(tid);
++ dispatchStatus[tid] = Blocked;
++ return;
++ }
++
++ if (dispatchStatus[tid] == Blocked) {
++ // Status from previous cycle was blocked, but there are no more stall
++ // conditions. Switch over to unblocking.
++ DPRINTF(IEW, "[tid:%i]: Done blocking, switching to unblocking.\n",
++ tid);
++
++ dispatchStatus[tid] = Unblocking;
+
- continue;
- } else if (inst->isNop()) {
- DPRINTF(IEW, "IEW: Issue: Nop instruction encountered "
- ", skipping.\n");
++ unblock(tid);
++
++ return;
++ }
++
++ if (dispatchStatus[tid] == Squashing) {
++ // Switch status to running if rename isn't being told to block or
++ // squash this cycle.
++ DPRINTF(IEW, "[tid:%i]: Done squashing, switching to running.\n",
++ tid);
++
++ dispatchStatus[tid] = Running;
++
++ return;
++ }
++}
++
++template <class Impl>
++void
++DefaultIEW<Impl>::sortInsts()
++{
++ int insts_from_rename = fromRename->size;
++#ifdef DEBUG
++ for (int i = 0; i < numThreads; i++)
++ assert(insts[i].empty());
++#endif
++ for (int i = 0; i < insts_from_rename; ++i) {
++ insts[fromRename->insts[i]->threadNumber].push(fromRename->insts[i]);
++ }
++}
++
++template <class Impl>
++void
++DefaultIEW<Impl>::wakeCPU()
++{
++ cpu->wakeCPU();
++}
++
++template <class Impl>
++void
++DefaultIEW<Impl>::activityThisCycle()
++{
++ DPRINTF(Activity, "Activity this cycle.\n");
++ cpu->activityThisCycle();
++}
++
++template <class Impl>
++inline void
++DefaultIEW<Impl>::activateStage()
++{
++ DPRINTF(Activity, "Activating stage.\n");
++ cpu->activateStage(FullCPU::IEWIdx);
++}
++
++template <class Impl>
++inline void
++DefaultIEW<Impl>::deactivateStage()
++{
++ DPRINTF(Activity, "Deactivating stage.\n");
++ cpu->deactivateStage(FullCPU::IEWIdx);
++}
++
++template<class Impl>
++void
++DefaultIEW<Impl>::dispatch(unsigned tid)
++{
++ // If status is Running or idle,
++ // call dispatchInsts()
++ // If status is Unblocking,
++ // buffer any instructions coming from rename
++ // continue trying to empty skid buffer
++ // check if stall conditions have passed
++
++ if (dispatchStatus[tid] == Blocked) {
++ ++iewBlockCycles;
++
++ } else if (dispatchStatus[tid] == Squashing) {
++ ++iewSquashCycles;
++ }
++
++ // Dispatch should try to dispatch as many instructions as its bandwidth
++ // will allow, as long as it is not currently blocked.
++ if (dispatchStatus[tid] == Running ||
++ dispatchStatus[tid] == Idle) {
++ DPRINTF(IEW, "[tid:%i] Not blocked, so attempting to run "
++ "dispatch.\n", tid);
++
++ dispatchInsts(tid);
++ } else if (dispatchStatus[tid] == Unblocking) {
++ // Make sure that the skid buffer has something in it if the
++ // status is unblocking.
++ assert(!skidsEmpty());
++
++ // If the status was unblocking, then instructions from the skid
++ // buffer were used. Remove those instructions and handle
++ // the rest of unblocking.
++ dispatchInsts(tid);
++
++ ++iewUnblockCycles;
++
++ if (validInstsFromRename() && dispatchedAllInsts) {
++ // Add the current inputs to the skid buffer so they can be
++ // reprocessed when this stage unblocks.
++ skidInsert(tid);
++ }
++
++ unblock(tid);
++ }
++}
++
++template <class Impl>
++void
++DefaultIEW<Impl>::dispatchInsts(unsigned tid)
++{
++ dispatchedAllInsts = true;
++
++ // Obtain instructions from skid buffer if unblocking, or queue from rename
++ // otherwise.
++ std::queue<DynInstPtr> &insts_to_dispatch =
++ dispatchStatus[tid] == Unblocking ?
++ skidBuffer[tid] : insts[tid];
++
++ int insts_to_add = insts_to_dispatch.size();
++
++ DynInstPtr inst;
++ bool add_to_iq = false;
++ int dis_num_inst = 0;
++
++ // Loop through the instructions, putting them in the instruction
++ // queue.
++ for ( ; dis_num_inst < insts_to_add &&
++ dis_num_inst < issueReadWidth;
++ ++dis_num_inst)
++ {
++ inst = insts_to_dispatch.front();
++
++ if (dispatchStatus[tid] == Unblocking) {
++ DPRINTF(IEW, "[tid:%i]: Issue: Examining instruction from skid "
++ "buffer\n", tid);
++ }
++
++ // Make sure there's a valid instruction there.
++ assert(inst);
++
++ DPRINTF(IEW, "[tid:%i]: Issue: Adding PC %#x [sn:%lli] [tid:%i] to "
++ "IQ.\n",
++ tid, inst->readPC(), inst->seqNum, inst->threadNumber);
++
++ // Be sure to mark these instructions as ready so that the
++ // commit stage can go ahead and execute them, and mark
++ // them as issued so the IQ doesn't reprocess them.
++
++ // Check for squashed instructions.
++ if (inst->isSquashed()) {
++ DPRINTF(IEW, "[tid:%i]: Issue: Squashed instruction encountered, "
++ "not adding to IQ.\n", tid);
++
++ ++iewDispSquashedInsts;
++
++ insts_to_dispatch.pop();
++
++ //Tell Rename That An Instruction has been processed
++ if (inst->isLoad() || inst->isStore()) {
++ toRename->iewInfo[tid].dispatchedToLSQ++;
++ }
++ toRename->iewInfo[tid].dispatched++;
++
++ continue;
++ }
++
++ // Check for full conditions.
++ if (instQueue.isFull(tid)) {
++ DPRINTF(IEW, "[tid:%i]: Issue: IQ has become full.\n", tid);
++
++ // Call function to start blocking.
++ block(tid);
++
++ // Set unblock to false. Special case where we are using
++ // skidbuffer (unblocking) instructions but then we still
++ // get full in the IQ.
++ toRename->iewUnblock[tid] = false;
++
++ dispatchedAllInsts = false;
++
++ ++iewIQFullEvents;
++ break;
++ } else if (ldstQueue.isFull(tid)) {
++ DPRINTF(IEW, "[tid:%i]: Issue: LSQ has become full.\n",tid);
++
++ // Call function to start blocking.
++ block(tid);
++
++ // Set unblock to false. Special case where we are using
++ // skidbuffer (unblocking) instructions but then we still
++ // get full in the IQ.
++ toRename->iewUnblock[tid] = false;
++
++ dispatchedAllInsts = false;
++
++ ++iewLSQFullEvents;
++ break;
++ }
++
++ // Otherwise issue the instruction just fine.
++ if (inst->isLoad()) {
++ DPRINTF(IEW, "[tid:%i]: Issue: Memory instruction "
++ "encountered, adding to LSQ.\n", tid);
++
++ // Reserve a spot in the load store queue for this
++ // memory access.
++ ldstQueue.insertLoad(inst);
++
++ ++iewDispLoadInsts;
++
++ add_to_iq = true;
++
++ toRename->iewInfo[tid].dispatchedToLSQ++;
++ } else if (inst->isStore()) {
++ DPRINTF(IEW, "[tid:%i]: Issue: Memory instruction "
++ "encountered, adding to LSQ.\n", tid);
++
++ ldstQueue.insertStore(inst);
++
++ ++iewDispStoreInsts;
++
++ if (inst->isStoreConditional()) {
++ // Store conditionals need to be set as "canCommit()"
++ // so that commit can process them when they reach the
++ // head of commit.
++ inst->setCanCommit();
+ instQueue.insertNonSpec(inst);
++ add_to_iq = false;
+
+ ++iewDispNonSpecInsts;
++ } else {
++ add_to_iq = true;
++ }
+
- inst->setIssued();
- inst->setExecuted();
- inst->setCanCommit();
++ toRename->iewInfo[tid].dispatchedToLSQ++;
++#if FULL_SYSTEM
++ } else if (inst->isMemBarrier() || inst->isWriteBarrier()) {
++ // Same as non-speculative stores.
++ inst->setCanCommit();
++ instQueue.insertBarrier(inst);
++ add_to_iq = false;
++#endif
++ } else if (inst->isNonSpeculative()) {
++ DPRINTF(IEW, "[tid:%i]: Issue: Nonspeculative instruction "
++ "encountered, skipping.\n", tid);
+
- instQueue.advanceTail(inst);
++ // Same as non-speculative stores.
++ inst->setCanCommit();
+
- continue;
- } else if (inst->isExecuted()) {
- assert(0 && "Instruction shouldn't be executed.\n");
- DPRINTF(IEW, "IEW: Issue: Executed branch encountered, "
- "skipping.\n");
++ // Specifically insert it as nonspeculative.
++ instQueue.insertNonSpec(inst);
+
- inst->setIssued();
- inst->setCanCommit();
++ ++iewDispNonSpecInsts;
+
- instQueue.advanceTail(inst);
++ add_to_iq = false;
++ } else if (inst->isNop()) {
++ DPRINTF(IEW, "[tid:%i]: Issue: Nop instruction encountered, "
++ "skipping.\n", tid);
+
- continue;
- }
++ inst->setIssued();
++ inst->setExecuted();
++ inst->setCanCommit();
+
- // If the instruction queue is not full, then add the
- // instruction.
- instQueue.insert(fromRename->insts[inst_num]);
++ instQueue.recordProducer(inst);
+
- ++iewDispatchedInsts;
++ exeNop[tid]++;
++
++ add_to_iq = false;
++ } else if (inst->isExecuted()) {
++ assert(0 && "Instruction shouldn't be executed.\n");
++ DPRINTF(IEW, "Issue: Executed branch encountered, "
++ "skipping.\n");
++
++ inst->setIssued();
++ inst->setCanCommit();
++
++ instQueue.recordProducer(inst);
++
++ add_to_iq = false;
++ } else {
++ add_to_iq = true;
++ }
+
- SimpleIEW<Impl>::executeInsts()
++ // If the instruction queue is not full, then add the
++ // instruction.
++ if (add_to_iq) {
++ instQueue.insert(inst);
+ }
++
++ insts_to_dispatch.pop();
++
++ toRename->iewInfo[tid].dispatched++;
++
++ ++iewDispatchedInsts;
++ }
++
++ if (!insts_to_dispatch.empty()) {
++ DPRINTF(IEW,"[tid:%i]: Issue: Bandwidth Full. Blocking.\n");
++ block(tid);
++ toRename->iewUnblock[tid] = false;
++ }
++
++ if (dispatchStatus[tid] == Idle && dis_num_inst) {
++ dispatchStatus[tid] = Running;
++
++ updatedQueues = true;
+ }
++
++ dis_num_inst = 0;
+}
+
+template <class Impl>
+void
- ////////////////////////////////////////
- //EXECUTE/WRITEBACK stage
- ////////////////////////////////////////
++DefaultIEW<Impl>::printAvailableInsts()
+{
- //Put into its own function?
- //Similarly should probably have separate execution for int vs FP.
- // Above comment is handled by the issue queue only issuing a valid
- // mix of int/fp instructions.
- //Actually okay to just have one execution, buuuuuut will need
- //somewhere that defines the execution latency of all instructions.
- // @todo: Move to the FU pool used in the current full cpu.
++ int inst = 0;
++
++ cout << "Available Instructions: ";
++
++ while (fromIssue->insts[inst]) {
+
- int fu_usage = 0;
- bool fetch_redirect = false;
- int inst_slot = 0;
- int time_slot = 0;
++ if (inst%3==0) cout << "\n\t";
+
- for (int inst_num = 0;
- fu_usage < executeWidth && /* Haven't exceeded available FU's. */
- inst_num < issueWidth &&
- fromIssue->insts[inst_num];
- ++inst_num) {
++ cout << "PC: " << fromIssue->insts[inst]->readPC()
++ << " TN: " << fromIssue->insts[inst]->threadNumber
++ << " SN: " << fromIssue->insts[inst]->seqNum << " | ";
++
++ inst++;
++
++ }
++
++ cout << "\n";
++}
++
++template <class Impl>
++void
++DefaultIEW<Impl>::executeInsts()
++{
++ wbNumInst = 0;
++ wbCycle = 0;
++
++ list<unsigned>::iterator threads = (*activeThreads).begin();
++
++ while (threads != (*activeThreads).end()) {
++ unsigned tid = *threads++;
++ fetchRedirect[tid] = false;
++ }
++
++#if 0
++ printAvailableInsts();
++#endif
+
+ // Execute/writeback any instructions that are available.
- DPRINTF(IEW, "IEW: Execute: Executing instructions from IQ.\n");
++ int insts_to_execute = fromIssue->size;
++ int inst_num = 0;
++ for (; inst_num < insts_to_execute;
++ ++inst_num) {
+
- // Get instruction from issue's queue.
- DynInstPtr inst = fromIssue->insts[inst_num];
++ DPRINTF(IEW, "Execute: Executing instructions from IQ.\n");
+
- DPRINTF(IEW, "IEW: Execute: Processing PC %#x.\n", inst->readPC());
++ DynInstPtr inst = instQueue.getInstToExecute();
+
- // and don't count it towards the FU usage.
++ DPRINTF(IEW, "Execute: Processing PC %#x, [tid:%i] [sn:%i].\n",
++ inst->readPC(), inst->threadNumber,inst->seqNum);
+
+ // Check if the instruction is squashed; if so then skip it
- DPRINTF(IEW, "IEW: Execute: Instruction was squashed.\n");
+ if (inst->isSquashed()) {
- toCommit->insts[inst_num] = inst;
++ DPRINTF(IEW, "Execute: Instruction was squashed.\n");
+
+ // Consider this instruction executed so that commit can go
+ // ahead and retire the instruction.
+ inst->setExecuted();
+
- inst->setExecuted();
-
- // If an instruction is executed, then count it towards FU usage.
- ++fu_usage;
++ // Not sure if I should set this here or just let commit try to
++ // commit any squashed instructions. I like the latter a bit more.
++ inst->setCanCommit();
+
+ ++iewExecSquashedInsts;
+
+ continue;
+ }
+
- if (inst->isMemRef()) {
- DPRINTF(IEW, "IEW: Execute: Calculating address for memory "
++ Fault fault = NoFault;
+
+ // Execute instruction.
+ // Note that if the instruction faults, it will be handled
+ // at the commit stage.
- ldstQueue.executeLoad(inst);
-
- ++iewExecLoadInsts;
++ if (inst->isMemRef() &&
++ (!inst->isDataPrefetch() && !inst->isInstPrefetch())) {
++ DPRINTF(IEW, "Execute: Calculating address for memory "
+ "reference.\n");
+
+ // Tell the LDSTQ to execute this instruction (if it is a load).
+ if (inst->isLoad()) {
- ++iewExecStoreInsts;
++ // Loads will mark themselves as executed, and their writeback
++ // event adds the instruction to the queue to commit
++ fault = ldstQueue.executeLoad(inst);
+ } else if (inst->isStore()) {
+ ldstQueue.executeStore(inst);
+
- panic("IEW: Unexpected memory type!\n");
++ // If the store had a fault then it may not have a mem req
++ if (inst->req && !(inst->req->flags & LOCKED)) {
++ inst->setExecuted();
++
++ instToCommit(inst);
++ }
++
++ // Store conditionals will mark themselves as
++ // executed, and their writeback event will add the
++ // instruction to the queue to commit.
+ } else {
- ++iewExecutedInsts;
++ panic("Unexpected memory type!\n");
+ }
+
+ } else {
+ inst->execute();
+
- // First check the time slot that this instruction will write
- // to. If there are free write ports at the time, then go ahead
- // and write the instruction to that time. If there are not,
- // keep looking back to see where's the first time there's a
- // free slot. What happens if you run out of free spaces?
- // For now naively assume that all instructions take one cycle.
- // Otherwise would have to look into the time buffer based on the
- // latency of the instruction.
- (*iewQueue)[time_slot].insts[inst_slot];
- while ((*iewQueue)[time_slot].insts[inst_slot]) {
- if (inst_slot < issueWidth) {
- ++inst_slot;
- } else {
- ++time_slot;
- inst_slot = 0;
- }
++ inst->setExecuted();
++
++ instToCommit(inst);
+ }
+
- assert(time_slot < 5);
- }
++ updateExeInstStats(inst);
+
- // May actually have to work this out, especially with loads and stores
++ // Check if branch prediction was correct, if not then we need
++ // to tell commit to squash in flight instructions. Only
++ // handle this if there hasn't already been something that
++ // redirects fetch in this group of instructions.
+
- // Add finished instruction to queue to commit.
- (*iewQueue)[time_slot].insts[inst_slot] = inst;
- (*iewQueue)[time_slot].size++;
++ // This probably needs to prioritize the redirects if a different
++ // scheduler is used. Currently the scheduler schedules the oldest
++ // instruction first, so the branch resolution order will be correct.
++ unsigned tid = inst->threadNumber;
+
- // Check if branch was correct. This check happens after the
- // instruction is added to the queue because even if the branch
- // is mispredicted, the branch instruction itself is still valid.
- // Only handle this if there hasn't already been something that
- // redirects fetch in this group of instructions.
- if (!fetch_redirect) {
++ if (!fetchRedirect[tid]) {
+
- fetch_redirect = true;
+ if (inst->mispredicted()) {
- DPRINTF(IEW, "IEW: Execute: Branch mispredict detected.\n");
- DPRINTF(IEW, "IEW: Execute: Redirecting fetch to PC: %#x.\n",
++ fetchRedirect[tid] = true;
+
- squashDueToBranch(inst);
++ DPRINTF(IEW, "Execute: Branch mispredict detected.\n");
++ DPRINTF(IEW, "Execute: Redirecting fetch to PC: %#x.\n",
+ inst->nextPC);
+
+ // If incorrect, then signal the ROB that it must be squashed.
- } else if (ldstQueue.violation()) {
- fetch_redirect = true;
++ squashDueToBranch(inst, tid);
+
+ if (inst->predTaken()) {
+ predictedTakenIncorrect++;
++ } else {
++ predictedNotTakenIncorrect++;
+ }
- // Get the DynInst that caused the violation.
- DynInstPtr violator = ldstQueue.getMemDepViolator();
++ } else if (ldstQueue.violation(tid)) {
++ fetchRedirect[tid] = true;
+
- DPRINTF(IEW, "IEW: LDSTQ detected a violation. Violator PC: "
++ // If there was an ordering violation, then get the
++ // DynInst that caused the violation. Note that this
++ // clears the violation signal.
++ DynInstPtr violator;
++ violator = ldstQueue.getMemDepViolator(tid);
+
- squashDueToMem(inst);
++ DPRINTF(IEW, "LDSTQ detected a violation. Violator PC: "
+ "%#x, inst PC: %#x. Addr is: %#x.\n",
+ violator->readPC(), inst->readPC(), inst->physEffAddr);
+
+ // Tell the instruction queue that a violation has occured.
+ instQueue.violation(inst, violator);
+
+ // Squash.
- SimpleIEW<Impl>::tick()
++ squashDueToMemOrder(inst,tid);
+
+ ++memOrderViolationEvents;
++ } else if (ldstQueue.loadBlocked(tid) &&
++ !ldstQueue.isLoadBlockedHandled(tid)) {
++ fetchRedirect[tid] = true;
++
++ DPRINTF(IEW, "Load operation couldn't execute because the "
++ "memory system is blocked. PC: %#x [sn:%lli]\n",
++ inst->readPC(), inst->seqNum);
++
++ squashDueToMemBlocked(inst, tid);
+ }
+ }
+ }
++
++ if (inst_num) {
++ if (exeStatus == Idle) {
++ exeStatus = Running;
++ }
++
++ updatedQueues = true;
++
++ cpu->activityThisCycle();
++ }
++
++ // Need to reset this in case a writeback event needs to write into the
++ // iew queue. That way the writeback event will write into the correct
++ // spot in the queue.
++ wbNumInst = 0;
++}
++
++template <class Impl>
++void
++DefaultIEW<Impl>::writebackInsts()
++{
++ // Loop through the head of the time buffer and wake any
++ // dependents. These instructions are about to write back. Also
++ // mark scoreboard that this instruction is finally complete.
++ // Either have IEW have direct access to scoreboard, or have this
++ // as part of backwards communication.
++ for (int inst_num = 0; inst_num < issueWidth &&
++ toCommit->insts[inst_num]; inst_num++) {
++ DynInstPtr inst = toCommit->insts[inst_num];
++ int tid = inst->threadNumber;
++
++ DPRINTF(IEW, "Sending instructions to commit, PC %#x.\n",
++ inst->readPC());
++
++ iewInstsToCommit[tid]++;
++
++ // Some instructions will be sent to commit without having
++ // executed because they need commit to handle them.
++ // E.g. Uncached loads have not actually executed when they
++ // are first sent to commit. Instead commit must tell the LSQ
++ // when it's ready to execute the uncached load.
++ if (!inst->isSquashed() && inst->isExecuted()) {
++ int dependents = instQueue.wakeDependents(inst);
++
++ for (int i = 0; i < inst->numDestRegs(); i++) {
++ //mark as Ready
++ DPRINTF(IEW,"Setting Destination Register %i\n",
++ inst->renamedDestRegIdx(i));
++ scoreboard->setReg(inst->renamedDestRegIdx(i));
++ }
++
++ producerInst[tid]++;
++ consumerInst[tid]+= dependents;
++ writebackCount[tid]++;
++ }
++ }
+}
+
+template<class Impl>
+void
- // Considering putting all the state-determining stuff in this section.
++DefaultIEW<Impl>::tick()
+{
- // Try to fill up issue queue with as many instructions as bandwidth
- // allows.
- // Decode should try to execute as many instructions as its bandwidth
- // will allow, as long as it is not currently blocked.
++ wbNumInst = 0;
++ wbCycle = 0;
+
- // Check if the stage is in a running status.
- if (_status != Blocked && _status != Squashing) {
- DPRINTF(IEW, "IEW: Status is not blocked, attempting to run "
- "stage.\n");
- iew();
++ wroteToTimeBuffer = false;
++ updatedQueues = false;
+
- // If it's currently unblocking, check to see if it should switch
- // to running.
- if (_status == Unblocking) {
- unblock();
++ sortInsts();
+
- ++iewUnblockCycles;
- }
- } else if (_status == Squashing) {
++ // Free function units marked as being freed this cycle.
++ fuPool->processFreeUnits();
+
- DPRINTF(IEW, "IEW: Still squashing.\n");
++ list<unsigned>::iterator threads = (*activeThreads).begin();
+
- // Check if stage should remain squashing. Stop squashing if the
- // squash signal clears.
- if (!fromCommit->commitInfo.squash &&
- !fromCommit->commitInfo.robSquashing) {
- DPRINTF(IEW, "IEW: Done squashing, changing status to "
- "running.\n");
++ // Check stall and squash signals, dispatch any instructions.
++ while (threads != (*activeThreads).end()) {
++ unsigned tid = *threads++;
+
- _status = Running;
- instQueue.stopSquash();
- } else {
- instQueue.doSquash();
- }
++ DPRINTF(IEW,"Issue: Processing [tid:%i]\n",tid);
+
- ++iewSquashCycles;
- } else if (_status == Blocked) {
- // Continue to tell previous stage to stall.
- toRename->iewInfo.stall = true;
-
- // Check if possible stall conditions have cleared.
- if (!fromCommit->commitInfo.stall &&
- !instQueue.isFull()) {
- DPRINTF(IEW, "IEW: Stall signals cleared, going to unblock.\n");
- _status = Unblocking;
- }
++ checkSignalsAndUpdate(tid);
++ dispatch(tid);
++ }
+
- // If there's still instructions coming from rename, continue to
- // put them on the skid buffer.
- if (fromRename->size == 0) {
- block();
- }
++ if (exeStatus != Squashing) {
++ executeInsts();
+
- if (fromCommit->commitInfo.squash ||
- fromCommit->commitInfo.robSquashing) {
- squash();
- }
++ writebackInsts();
+
- ++iewBlockCycles;
++ // Have the instruction queue try to schedule any ready instructions.
++ // (In actuality, this scheduling is for instructions that will
++ // be executed next cycle.)
++ instQueue.scheduleReadyInsts();
+
- // @todo: Maybe put these at the beginning, so if it's idle it can
- // return early.
- // Write back number of free IQ entries here.
- toRename->iewInfo.freeIQEntries = instQueue.numFreeEntries();
++ // Also should advance its own time buffers if the stage ran.
++ // Not the best place for it, but this works (hopefully).
++ issueToExecQueue.advance();
+ }
+
- if (!fromCommit->commitInfo.squash &&
- !fromCommit->commitInfo.robSquashing) {
- ldstQueue.commitStores(fromCommit->commitInfo.doneSeqNum);
- ldstQueue.commitLoads(fromCommit->commitInfo.doneSeqNum);
- }
++ bool broadcast_free_entries = false;
++
++ if (updatedQueues || exeStatus == Running || updateLSQNextCycle) {
++ exeStatus = Idle;
++ updateLSQNextCycle = false;
++
++ broadcast_free_entries = true;
++ }
+
++ // Writeback any stores using any leftover bandwidth.
+ ldstQueue.writebackStores();
+
+ // Check the committed load/store signals to see if there's a load
+ // or store to commit. Also check if it's being told to execute a
+ // nonspeculative instruction.
+ // This is pretty inefficient...
- if (fromCommit->commitInfo.nonSpecSeqNum != 0) {
- instQueue.scheduleNonSpec(fromCommit->commitInfo.nonSpecSeqNum);
- }
+
- DPRINTF(IEW, "IEW: IQ has %i free entries.\n",
- instQueue.numFreeEntries());
- }
++ threads = (*activeThreads).begin();
++ while (threads != (*activeThreads).end()) {
++ unsigned tid = (*threads++);
+
- template<class Impl>
- void
- SimpleIEW<Impl>::iew()
- {
- // Might want to put all state checks in the tick() function.
- // Check if being told to stall from commit.
- if (fromCommit->commitInfo.stall) {
- block();
- return;
- } else if (fromCommit->commitInfo.squash ||
- fromCommit->commitInfo.robSquashing) {
- // Also check if commit is telling this stage to squash.
- squash();
- return;
- }
++ DPRINTF(IEW,"Processing [tid:%i]\n",tid);
+
- dispatchInsts();
++ if (fromCommit->commitInfo[tid].doneSeqNum != 0 &&
++ !fromCommit->commitInfo[tid].squash &&
++ !fromCommit->commitInfo[tid].robSquashing) {
+
- // Have the instruction queue try to schedule any ready instructions.
- instQueue.scheduleReadyInsts();
++ ldstQueue.commitStores(fromCommit->commitInfo[tid].doneSeqNum,tid);
+
- executeInsts();
++ ldstQueue.commitLoads(fromCommit->commitInfo[tid].doneSeqNum,tid);
+
- // Loop through the head of the time buffer and wake any dependents.
- // These instructions are about to write back. In the simple model
- // this loop can really happen within the previous loop, but when
- // instructions have actual latencies, this loop must be separate.
- // Also mark scoreboard that this instruction is finally complete.
- // Either have IEW have direct access to rename map, or have this as
- // part of backwards communication.
- for (int inst_num = 0; inst_num < issueWidth &&
- toCommit->insts[inst_num]; inst_num++)
- {
- DynInstPtr inst = toCommit->insts[inst_num];
++ updateLSQNextCycle = true;
++ instQueue.commit(fromCommit->commitInfo[tid].doneSeqNum,tid);
++ }
+
- DPRINTF(IEW, "IEW: Sending instructions to commit, PC %#x.\n",
- inst->readPC());
++ if (fromCommit->commitInfo[tid].nonSpecSeqNum != 0) {
+
- if(!inst->isSquashed()) {
- instQueue.wakeDependents(inst);
++ //DPRINTF(IEW,"NonspecInst from thread %i",tid);
++ if (fromCommit->commitInfo[tid].uncached) {
++ instQueue.replayMemInst(fromCommit->commitInfo[tid].uncachedLoad);
++ } else {
++ instQueue.scheduleNonSpec(
++ fromCommit->commitInfo[tid].nonSpecSeqNum);
++ }
++ }
+
- for (int i = 0; i < inst->numDestRegs(); i++)
- {
- renameMap->markAsReady(inst->renamedDestRegIdx(i));
- }
++ if (broadcast_free_entries) {
++ toFetch->iewInfo[tid].iqCount =
++ instQueue.getCount(tid);
++ toFetch->iewInfo[tid].ldstqCount =
++ ldstQueue.getCount(tid);
+
- // Also should advance its own time buffers if the stage ran.
- // Not the best place for it, but this works (hopefully).
- issueToExecQueue.advance();
++ toRename->iewInfo[tid].usedIQ = true;
++ toRename->iewInfo[tid].freeIQEntries =
++ instQueue.numFreeEntries();
++ toRename->iewInfo[tid].usedLSQ = true;
++ toRename->iewInfo[tid].freeLSQEntries =
++ ldstQueue.numFreeEntries(tid);
++
++ wroteToTimeBuffer = true;
+ }
++
++ DPRINTF(IEW, "[tid:%i], Dispatch dispatched %i instructions.\n",
++ tid, toRename->iewInfo[tid].dispatched);
+ }
+
- #if !FULL_SYSTEM
- template<class Impl>
++ DPRINTF(IEW, "IQ has %i free entries (Can schedule: %i). "
++ "LSQ has %i free entries.\n",
++ instQueue.numFreeEntries(), instQueue.hasReadyInsts(),
++ ldstQueue.numFreeEntries());
++
++ updateStatus();
++
++ if (wroteToTimeBuffer) {
++ DPRINTF(Activity, "Activity this cycle.\n");
++ cpu->activityThisCycle();
++ }
+}
+
- SimpleIEW<Impl>::lsqWriteback()
++template <class Impl>
+void
- ldstQueue.writebackAllInsts();
- }
++DefaultIEW<Impl>::updateExeInstStats(DynInstPtr &inst)
+{
++ int thread_number = inst->threadNumber;
++
++ //
++ // Pick off the software prefetches
++ //
++#ifdef TARGET_ALPHA
++ if (inst->isDataPrefetch())
++ exeSwp[thread_number]++;
++ else
++ iewExecutedInsts++;
++#else
++ iewExecutedInsts[thread_number]++;
+#endif
++
++ //
++ // Control operations
++ //
++ if (inst->isControl())
++ exeBranches[thread_number]++;
++
++ //
++ // Memory operations
++ //
++ if (inst->isMemRef()) {
++ exeRefs[thread_number]++;
++
++ if (inst->isLoad()) {
++ iewExecLoadInsts[thread_number]++;
++ }
++ }
++}
--- /dev/null
-
- template<>
- unsigned
- InstructionQueue<AlphaSimpleImpl>::DependencyEntry::mem_alloc_counter = 0;
+/*
+ * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "cpu/o3/alpha_dyn_inst.hh"
+#include "cpu/o3/alpha_impl.hh"
+#include "cpu/o3/inst_queue_impl.hh"
+
+// Force instantiation of InstructionQueue.
+template class InstructionQueue<AlphaSimpleImpl>;
--- /dev/null
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
+/*
- #ifndef __CPU_O3_CPU_INST_QUEUE_HH__
- #define __CPU_O3_CPU_INST_QUEUE_HH__
++ * Copyright (c) 2004-2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
- * physical register indices.
++#ifndef __CPU_O3_INST_QUEUE_HH__
++#define __CPU_O3_INST_QUEUE_HH__
+
+#include <list>
+#include <map>
+#include <queue>
+#include <vector>
+
+#include "base/statistics.hh"
+#include "base/timebuf.hh"
+#include "cpu/inst_seq.hh"
++#include "cpu/o3/dep_graph.hh"
++#include "encumbered/cpu/full/op_class.hh"
+#include "sim/host.hh"
+
++class FUPool;
++class MemInterface;
++
+/**
+ * A standard instruction queue class. It holds ready instructions, in
+ * order, in seperate priority queues to facilitate the scheduling of
+ * instructions. The IQ uses a separate linked list to track dependencies.
+ * Similar to the rename map and the free list, it expects that
+ * floating point registers have their indices start after the integer
+ * registers (ie with 96 int and 96 fp registers, regs 0-95 are integer
+ * and 96-191 are fp). This remains true even for both logical and
- // Typedef of iterator through the list of instructions. Might be
- // better to untie this from the FullCPU or pass its information to
- // the stages.
++ * physical register indices. The IQ depends on the memory dependence unit to
++ * track when memory operations are ready in terms of ordering; register
++ * dependencies are tracked normally. Right now the IQ also handles the
++ * execution timing; this is mainly to allow back-to-back scheduling without
++ * requiring IEW to be able to peek into the IQ. At the end of the execution
++ * latency, the instruction is put into the queue to execute, where it will
++ * have the execute() function called on it.
++ * @todo: Make IQ able to handle multiple FU pools.
+ */
+template <class Impl>
+class InstructionQueue
+{
+ public:
+ //Typedefs from the Impl.
+ typedef typename Impl::FullCPU FullCPU;
+ typedef typename Impl::DynInstPtr DynInstPtr;
+ typedef typename Impl::Params Params;
+
++ typedef typename Impl::CPUPol::IEW IEW;
+ typedef typename Impl::CPUPol::MemDepUnit MemDepUnit;
+ typedef typename Impl::CPUPol::IssueStruct IssueStruct;
+ typedef typename Impl::CPUPol::TimeStruct TimeStruct;
+
- /**
- * Struct for comparing entries to be added to the priority queue. This
- * gives reverse ordering to the instructions in terms of sequence
- * numbers: the instructions with smaller sequence numbers (and hence
- * are older) will be at the top of the priority queue.
- */
- struct pqCompare
- {
- bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const
- {
- return lhs->seqNum > rhs->seqNum;
- }
- };
++ // Typedef of iterator through the list of instructions.
+ typedef typename std::list<DynInstPtr>::iterator ListIt;
+
- /**
- * Struct for comparing entries to be added to the set. This gives
- * standard ordering in terms of sequence numbers.
- */
- struct setCompare
- {
- bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const
- {
- return lhs->seqNum < rhs->seqNum;
- }
++ friend class Impl::FullCPU;
+
- typedef std::priority_queue<DynInstPtr, vector<DynInstPtr>, pqCompare>
- ReadyInstQueue;
++ /** FU completion event class. */
++ class FUCompletion : public Event {
++ private:
++ /** Executing instruction. */
++ DynInstPtr inst;
++
++ /** Index of the FU used for executing. */
++ int fuIdx;
++
++ /** Pointer back to the instruction queue. */
++ InstructionQueue<Impl> *iqPtr;
++
++ bool freeFU;
++
++ public:
++ /** Construct a FU completion event. */
++ FUCompletion(DynInstPtr &_inst, int fu_idx,
++ InstructionQueue<Impl> *iq_ptr);
++
++ virtual void process();
++ virtual const char *description();
++ void setFreeFU() { freeFU = true; }
+ };
+
- InstructionQueue(Params ¶ms);
++ /** Constructs an IQ. */
++ InstructionQueue(Params *params);
+
- void setCPU(FullCPU *cpu);
++ /** Destructs the IQ. */
++ ~InstructionQueue();
+
++ /** Returns the name of the IQ. */
++ std::string name() const;
++
++ /** Registers statistics. */
+ void regStats();
+
- void advanceTail(DynInstPtr &inst);
++ void resetState();
++
++ /** Sets CPU pointer. */
++ void setCPU(FullCPU *_cpu) { cpu = _cpu; }
+
++ /** Sets active threads list. */
++ void setActiveThreads(std::list<unsigned> *at_ptr);
++
++ /** Sets the IEW pointer. */
++ void setIEW(IEW *iew_ptr) { iewStage = iew_ptr; }
++
++ /** Sets the timer buffer between issue and execute. */
+ void setIssueToExecuteQueue(TimeBuffer<IssueStruct> *i2eQueue);
+
++ /** Sets the global time buffer. */
+ void setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr);
+
++ void switchOut();
++
++ void takeOverFrom();
++
++ bool isSwitchedOut() { return switchedOut; }
++
++ /** Number of entries needed for given amount of threads. */
++ int entryAmount(int num_threads);
++
++ /** Resets max entries for all threads. */
++ void resetEntries();
++
++ /** Returns total number of free entries. */
+ unsigned numFreeEntries();
+
++ /** Returns number of free entries for a thread. */
++ unsigned numFreeEntries(unsigned tid);
++
++ /** Returns whether or not the IQ is full. */
+ bool isFull();
+
++ /** Returns whether or not the IQ is full for a specific thread. */
++ bool isFull(unsigned tid);
++
++ /** Returns if there are any ready instructions in the IQ. */
++ bool hasReadyInsts();
++
++ /** Inserts a new instruction into the IQ. */
+ void insert(DynInstPtr &new_inst);
+
++ /** Inserts a new, non-speculative instruction into the IQ. */
+ void insertNonSpec(DynInstPtr &new_inst);
+
- void wakeDependents(DynInstPtr &completed_inst);
++ /** Inserts a memory or write barrier into the IQ to make sure
++ * loads and stores are ordered properly.
++ */
++ void insertBarrier(DynInstPtr &barr_inst);
+
++ DynInstPtr getInstToExecute();
++
++ /**
++ * Records the instruction as the producer of a register without
++ * adding it to the rest of the IQ.
++ */
++ void recordProducer(DynInstPtr &inst)
++ { addToProducers(inst); }
++
++ /** Process FU completion event. */
++ void processFUCompletion(DynInstPtr &inst, int fu_idx);
++
++ /**
++ * Schedules ready instructions, adding the ready ones (oldest first) to
++ * the queue to execute.
++ */
+ void scheduleReadyInsts();
+
++ /** Schedules a single specific non-speculative instruction. */
+ void scheduleNonSpec(const InstSeqNum &inst);
+
- // Change this to take in the sequence number
- void squash();
++ /**
++ * Commits all instructions up to and including the given sequence number,
++ * for a specific thread.
++ */
++ void commit(const InstSeqNum &inst, unsigned tid = 0);
++
++ /** Wakes all dependents of a completed instruction. */
++ int wakeDependents(DynInstPtr &completed_inst);
++
++ /** Adds a ready memory instruction to the ready list. */
++ void addReadyMemInst(DynInstPtr &ready_inst);
++
++ /**
++ * Reschedules a memory instruction. It will be ready to issue once
++ * replayMemInst() is called.
++ */
++ void rescheduleMemInst(DynInstPtr &resched_inst);
++
++ /** Replays a memory instruction. It must be rescheduled first. */
++ void replayMemInst(DynInstPtr &replay_inst);
+
++ /** Completes a memory operation. */
++ void completeMemInst(DynInstPtr &completed_inst);
++
++ /** Indicates an ordering violation between a store and a load. */
+ void violation(DynInstPtr &store, DynInstPtr &faulting_load);
+
- void doSquash();
++ /**
++ * Squashes instructions for a thread. Squashing information is obtained
++ * from the time buffer.
++ */
++ void squash(unsigned tid);
+
- void stopSquash();
++ /** Returns the number of used entries for a thread. */
++ unsigned getCount(unsigned tid) { return count[tid]; };
+
- MemDepUnit memDepUnit;
++ /** Debug function to print all instructions. */
++ void printInsts();
+
+ private:
++ /** Does the actual squashing. */
++ void doSquash(unsigned tid);
++
++ /////////////////////////
++ // Various pointers
++ /////////////////////////
++
+ /** Pointer to the CPU. */
+ FullCPU *cpu;
+
++ /** Cache interface. */
++ MemInterface *dcacheInterface;
++
++ /** Pointer to IEW stage. */
++ IEW *iewStage;
++
+ /** The memory dependence unit, which tracks/predicts memory dependences
+ * between instructions.
+ */
- enum InstList {
- Int,
- Float,
- Branch,
- Memory,
- Misc,
- Squashed,
- None
- };
++ MemDepUnit memDepUnit[Impl::MaxThreads];
+
+ /** The queue to the execute stage. Issued instructions will be written
+ * into it.
+ */
+ TimeBuffer<IssueStruct> *issueToExecuteQueue;
+
+ /** The backwards time buffer. */
+ TimeBuffer<TimeStruct> *timeBuffer;
+
+ /** Wire to read information from timebuffer. */
+ typename TimeBuffer<TimeStruct>::wire fromCommit;
+
- /** List of ready int instructions. Used to keep track of the order in
- * which instructions should issue.
- */
- ReadyInstQueue readyIntInsts;
++ /** Function unit pool. */
++ FUPool *fuPool;
+
- /** List of ready floating point instructions. */
- ReadyInstQueue readyFloatInsts;
++ //////////////////////////////////////
++ // Instruction lists, ready queues, and ordering
++ //////////////////////////////////////
+
- /** List of ready branch instructions. */
- ReadyInstQueue readyBranchInsts;
++ /** List of all the instructions in the IQ (some of which may be issued). */
++ std::list<DynInstPtr> instList[Impl::MaxThreads];
+
- /** List of ready miscellaneous instructions. */
- ReadyInstQueue readyMiscInsts;
++ std::list<DynInstPtr> instsToExecute;
+
- /** List of squashed instructions (which are still valid and in IQ).
- * Implemented using a priority queue; the entries must contain both
- * the IQ index and sequence number of each instruction so that
- * ordering based on sequence numbers can be used.
++ /**
++ * Struct for comparing entries to be added to the priority queue. This
++ * gives reverse ordering to the instructions in terms of sequence
++ * numbers: the instructions with smaller sequence numbers (and hence
++ * are older) will be at the top of the priority queue.
++ */
++ struct pqCompare {
++ bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const
++ {
++ return lhs->seqNum > rhs->seqNum;
++ }
++ };
++
++ typedef std::priority_queue<DynInstPtr, std::vector<DynInstPtr>, pqCompare>
++ ReadyInstQueue;
+
- ReadyInstQueue squashedInsts;
++ /** List of ready instructions, per op class. They are separated by op
++ * class to allow for easy mapping to FUs.
+ */
- typedef typename std::map<InstSeqNum, DynInstPtr>::iterator non_spec_it_t;
++ ReadyInstQueue readyInsts[Num_OpClasses];
+
+ /** List of non-speculative instructions that will be scheduled
+ * once the IQ gets a signal from commit. While it's redundant to
+ * have the key be a part of the value (the sequence number is stored
+ * inside of DynInst), when these instructions are woken up only
+ * the sequence number will be available. Thus it is most efficient to be
+ * able to search by the sequence number alone.
+ */
+ std::map<InstSeqNum, DynInstPtr> nonSpecInsts;
+
- /** Number of free IQ entries left. */
- unsigned freeEntries;
++ typedef typename std::map<InstSeqNum, DynInstPtr>::iterator NonSpecMapIt;
+
- /** The number of entries in the instruction queue. */
- unsigned numEntries;
++ /** Entry for the list age ordering by op class. */
++ struct ListOrderEntry {
++ OpClass queueType;
++ InstSeqNum oldestInst;
++ };
+
- /** The number of integer instructions that can be issued in one
- * cycle.
++ /** List that contains the age order of the oldest instruction of each
++ * ready queue. Used to select the oldest instruction available
++ * among op classes.
++ * @todo: Might be better to just move these entries around instead
++ * of creating new ones every time the position changes due to an
++ * instruction issuing. Not sure std::list supports this.
++ */
++ std::list<ListOrderEntry> listOrder;
++
++ typedef typename std::list<ListOrderEntry>::iterator ListOrderIt;
++
++ /** Tracks if each ready queue is on the age order list. */
++ bool queueOnList[Num_OpClasses];
+
- unsigned intWidth;
++ /** Iterators of each ready queue. Points to their spot in the age order
++ * list.
+ */
- /** The number of floating point instructions that can be issued
- * in one cycle.
++ ListOrderIt readyIt[Num_OpClasses];
+
- unsigned floatWidth;
++ /** Add an op class to the age order list. */
++ void addToOrderList(OpClass op_class);
++
++ /**
++ * Called when the oldest instruction has been removed from a ready queue;
++ * this places that ready queue into the proper spot in the age order list.
+ */
- /** The number of branches that can be issued in one cycle. */
- unsigned branchWidth;
++ void moveToYoungerInst(ListOrderIt age_order_it);
++
++ DependencyGraph<DynInstPtr> dependGraph;
++
++ //////////////////////////////////////
++ // Various parameters
++ //////////////////////////////////////
++
++ /** IQ Resource Sharing Policy */
++ enum IQPolicy {
++ Dynamic,
++ Partitioned,
++ Threshold
++ };
++
++ /** IQ sharing policy for SMT. */
++ IQPolicy iqPolicy;
+
- /** The number of memory instructions that can be issued in one cycle. */
- unsigned memoryWidth;
++ /** Number of Total Threads*/
++ unsigned numThreads;
+
- //The number of physical registers in the CPU.
++ /** Pointer to list of active threads. */
++ std::list<unsigned> *activeThreads;
++
++ /** Per Thread IQ count */
++ unsigned count[Impl::MaxThreads];
++
++ /** Max IQ Entries Per Thread */
++ unsigned maxEntries[Impl::MaxThreads];
++
++ /** Number of free IQ entries left. */
++ unsigned freeEntries;
++
++ /** The number of entries in the instruction queue. */
++ unsigned numEntries;
+
+ /** The total number of instructions that can be issued in one cycle. */
+ unsigned totalWidth;
+
- //////////////////////////////////
- // Variables needed for squashing
- //////////////////////////////////
++ /** The number of physical registers in the CPU. */
+ unsigned numPhysRegs;
+
+ /** The number of physical integer registers in the CPU. */
+ unsigned numPhysIntRegs;
+
+ /** The number of floating point registers in the CPU. */
+ unsigned numPhysFloatRegs;
+
+ /** Delay between commit stage and the IQ.
+ * @todo: Make there be a distinction between the delays within IEW.
+ */
+ unsigned commitToIEWDelay;
+
- InstSeqNum squashedSeqNum;
-
- /** Iterator that points to the youngest instruction in the IQ. */
- ListIt tail;
-
- /** Iterator that points to the last instruction that has been squashed.
- * This will not be valid unless the IQ is in the process of squashing.
- */
- ListIt squashIt;
-
- ///////////////////////////////////
- // Dependency graph stuff
- ///////////////////////////////////
-
- class DependencyEntry
- {
- public:
- DynInstPtr inst;
- //Might want to include data about what arch. register the
- //dependence is waiting on.
- DependencyEntry *next;
-
- //This function, and perhaps this whole class, stand out a little
- //bit as they don't fit a classification well. I want access
- //to the underlying structure of the linked list, yet at
- //the same time it feels like this should be something abstracted
- //away. So for now it will sit here, within the IQ, until
- //a better implementation is decided upon.
- // This function probably shouldn't be within the entry...
- void insert(DynInstPtr &new_inst);
-
- void remove(DynInstPtr &inst_to_remove);
-
- // Debug variable, remove when done testing.
- static unsigned mem_alloc_counter;
- };
-
- /** Array of linked lists. Each linked list is a list of all the
- * instructions that depend upon a given register. The actual
- * register's index is used to index into the graph; ie all
- * instructions in flight that are dependent upon r34 will be
- * in the linked list of dependGraph[34].
- */
- DependencyEntry *dependGraph;
++ bool switchedOut;
+
+ /** The sequence number of the squashed instruction. */
- vector<bool> regScoreboard;
++ InstSeqNum squashedSeqNum[Impl::MaxThreads];
+
+ /** A cache of the recently woken registers. It is 1 if the register
+ * has been woken up recently, and 0 if the register has been added
+ * to the dependency graph and has not yet received its value. It
+ * is basically a secondary scoreboard, and should pretty much mirror
+ * the scoreboard that exists in the rename map.
+ */
- void insertDependency(DynInstPtr &new_inst);
- void createDependency(DynInstPtr &new_inst);
++ std::vector<bool> regScoreboard;
+
++ /** Adds an instruction to the dependency graph, as a consumer. */
+ bool addToDependents(DynInstPtr &new_inst);
- private:
+
++ /** Adds an instruction to the dependency graph, as a producer. */
++ void addToProducers(DynInstPtr &new_inst);
++
++ /** Moves an instruction to the ready queue if it is ready. */
+ void addIfReady(DynInstPtr &inst);
+
- /** Debugging function to dump out the dependency graph.
- */
- void dumpDependGraph();
-
+ /** Debugging function to count how many entries are in the IQ. It does
+ * a linear walk through the instructions, so do not call this function
+ * during normal execution.
+ */
+ int countInsts();
+
- // Stats::Scalar<> iqIntInstsAdded;
+ /** Debugging function to dump all the list sizes, as well as print
+ * out the list of nonspeculative instructions. Should not be used
+ * in any other capacity, but it has no harmful sideaffects.
+ */
+ void dumpLists();
+
++ /** Debugging function to dump out all instructions that are in the
++ * IQ.
++ */
++ void dumpInsts();
++
++ /** Stat for number of instructions added. */
+ Stats::Scalar<> iqInstsAdded;
++ /** Stat for number of non-speculative instructions added. */
+ Stats::Scalar<> iqNonSpecInstsAdded;
- // Stats::Scalar<> iqFloatInstsAdded;
++
++ Stats::Scalar<> iqInstsIssued;
++ /** Stat for number of integer instructions issued. */
+ Stats::Scalar<> iqIntInstsIssued;
- // Stats::Scalar<> iqBranchInstsAdded;
++ /** Stat for number of floating point instructions issued. */
+ Stats::Scalar<> iqFloatInstsIssued;
- // Stats::Scalar<> iqMemInstsAdded;
++ /** Stat for number of branch instructions issued. */
+ Stats::Scalar<> iqBranchInstsIssued;
- // Stats::Scalar<> iqMiscInstsAdded;
++ /** Stat for number of memory instructions issued. */
+ Stats::Scalar<> iqMemInstsIssued;
- Stats::Scalar<> iqLoopSquashStalls;
++ /** Stat for number of miscellaneous instructions issued. */
+ Stats::Scalar<> iqMiscInstsIssued;
++ /** Stat for number of squashed instructions that were ready to issue. */
+ Stats::Scalar<> iqSquashedInstsIssued;
- #endif //__CPU_O3_CPU_INST_QUEUE_HH__
++ /** Stat for number of squashed instructions examined when squashing. */
+ Stats::Scalar<> iqSquashedInstsExamined;
++ /** Stat for number of squashed instruction operands examined when
++ * squashing.
++ */
+ Stats::Scalar<> iqSquashedOperandsExamined;
++ /** Stat for number of non-speculative instructions removed due to a squash.
++ */
+ Stats::Scalar<> iqSquashedNonSpecRemoved;
+
++ Stats::VectorDistribution<> queueResDist;
++ Stats::Distribution<> numIssuedDist;
++ Stats::VectorDistribution<> issueDelayDist;
++
++ Stats::Vector<> statFuBusy;
++// Stats::Vector<> dist_unissued;
++ Stats::Vector2d<> statIssuedInstType;
++
++ Stats::Formula issueRate;
++// Stats::Formula issue_stores;
++// Stats::Formula issue_op_rate;
++ Stats::Vector<> fuBusy; //cumulative fu busy
++
++ Stats::Formula fuBusyRate;
+};
+
++#endif //__CPU_O3_INST_QUEUE_HH__
--- /dev/null
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
+/*
- // Todo:
- // Current ordering allows for 0 cycle added-to-scheduled. Could maybe fake
- // it; either do in reverse order, or have added instructions put into a
- // different ready queue that, in scheduleRreadyInsts(), gets put onto the
- // normal ready queue. This would however give only a one cycle delay,
- // but probably is more flexible to actually add in a delay parameter than
- // just running it backwards.
-
++ * Copyright (c) 2004-2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
- // Either compile error or max int due to sign extension.
- // Hack to avoid compile warnings.
- const InstSeqNum MaxInstSeqNum = std::numeric_limits<InstSeqNum>::max();
+#include <limits>
+#include <vector>
+
+#include "sim/root.hh"
+
++#include "cpu/o3/fu_pool.hh"
+#include "cpu/o3/inst_queue.hh"
+
- InstructionQueue<Impl>::InstructionQueue(Params ¶ms)
- : memDepUnit(params),
- numEntries(params.numIQEntries),
- intWidth(params.executeIntWidth),
- floatWidth(params.executeFloatWidth),
- branchWidth(params.executeBranchWidth),
- memoryWidth(params.executeMemoryWidth),
- totalWidth(params.issueWidth),
- numPhysIntRegs(params.numPhysIntRegs),
- numPhysFloatRegs(params.numPhysFloatRegs),
- commitToIEWDelay(params.commitToIEWDelay)
++using namespace std;
+
+template <class Impl>
- // Initialize the number of free IQ entries.
- freeEntries = numEntries;
++InstructionQueue<Impl>::FUCompletion::FUCompletion(DynInstPtr &_inst,
++ int fu_idx,
++ InstructionQueue<Impl> *iq_ptr)
++ : Event(&mainEventQueue, Stat_Event_Pri),
++ inst(_inst), fuIdx(fu_idx), iqPtr(iq_ptr), freeFU(false)
+{
- DPRINTF(IQ, "IQ: There are %i physical registers.\n", numPhysRegs);
++ this->setFlags(Event::AutoDelete);
++}
++
++template <class Impl>
++void
++InstructionQueue<Impl>::FUCompletion::process()
++{
++ iqPtr->processFUCompletion(inst, freeFU ? fuIdx : -1);
++ inst = NULL;
++}
++
++
++template <class Impl>
++const char *
++InstructionQueue<Impl>::FUCompletion::description()
++{
++ return "Functional unit completion event";
++}
++
++template <class Impl>
++InstructionQueue<Impl>::InstructionQueue(Params *params)
++ : dcacheInterface(params->dcacheInterface),
++ fuPool(params->fuPool),
++ numEntries(params->numIQEntries),
++ totalWidth(params->issueWidth),
++ numPhysIntRegs(params->numPhysIntRegs),
++ numPhysFloatRegs(params->numPhysFloatRegs),
++ commitToIEWDelay(params->commitToIEWDelay)
++{
++ assert(fuPool);
++
++ switchedOut = false;
++
++ numThreads = params->numberOfThreads;
+
+ // Set the number of physical registers as the number of int + float
+ numPhysRegs = numPhysIntRegs + numPhysFloatRegs;
+
- dependGraph = new DependencyEntry[numPhysRegs];
++ DPRINTF(IQ, "There are %i physical registers.\n", numPhysRegs);
+
+ //Create an entry for each physical register within the
+ //dependency graph.
- // Initialize all the head pointers to point to NULL, and all the
- // entries as unready.
- // Note that in actuality, the registers corresponding to the logical
- // registers start off as ready. However this doesn't matter for the
- // IQ as the instruction should have been correctly told if those
- // registers are ready in rename. Thus it can all be initialized as
- // unready.
- for (int i = 0; i < numPhysRegs; ++i)
- {
- dependGraph[i].next = NULL;
- dependGraph[i].inst = NULL;
- regScoreboard[i] = false;
++ dependGraph.resize(numPhysRegs);
+
+ // Resize the register scoreboard.
+ regScoreboard.resize(numPhysRegs);
+
- // iqIntInstsAdded;
++ //Initialize Mem Dependence Units
++ for (int i = 0; i < numThreads; i++) {
++ memDepUnit[i].init(params,i);
++ memDepUnit[i].setIQ(this);
+ }
+
++ resetState();
++
++ string policy = params->smtIQPolicy;
++
++ //Convert string to lowercase
++ std::transform(policy.begin(), policy.end(), policy.begin(),
++ (int(*)(int)) tolower);
++
++ //Figure out resource sharing policy
++ if (policy == "dynamic") {
++ iqPolicy = Dynamic;
++
++ //Set Max Entries to Total ROB Capacity
++ for (int i = 0; i < numThreads; i++) {
++ maxEntries[i] = numEntries;
++ }
++
++ } else if (policy == "partitioned") {
++ iqPolicy = Partitioned;
++
++ //@todo:make work if part_amt doesnt divide evenly.
++ int part_amt = numEntries / numThreads;
++
++ //Divide ROB up evenly
++ for (int i = 0; i < numThreads; i++) {
++ maxEntries[i] = part_amt;
++ }
++
++ DPRINTF(Fetch, "IQ sharing policy set to Partitioned:"
++ "%i entries per thread.\n",part_amt);
++
++ } else if (policy == "threshold") {
++ iqPolicy = Threshold;
++
++ double threshold = (double)params->smtIQThreshold / 100;
++
++ int thresholdIQ = (int)((double)threshold * numEntries);
++
++ //Divide up by threshold amount
++ for (int i = 0; i < numThreads; i++) {
++ maxEntries[i] = thresholdIQ;
++ }
++
++ DPRINTF(Fetch, "IQ sharing policy set to Threshold:"
++ "%i entries per thread.\n",thresholdIQ);
++ } else {
++ assert(0 && "Invalid IQ Sharing Policy.Options Are:{Dynamic,"
++ "Partitioned, Threshold}");
++ }
++}
++
++template <class Impl>
++InstructionQueue<Impl>::~InstructionQueue()
++{
++ dependGraph.reset();
++ cprintf("Nodes traversed: %i, removed: %i\n",
++ dependGraph.nodesTraversed, dependGraph.nodesRemoved);
++}
++
++template <class Impl>
++std::string
++InstructionQueue<Impl>::name() const
++{
++ return cpu->name() + ".iq";
+}
+
+template <class Impl>
+void
+InstructionQueue<Impl>::regStats()
+{
++ using namespace Stats;
+ iqInstsAdded
+ .name(name() + ".iqInstsAdded")
+ .desc("Number of instructions added to the IQ (excludes non-spec)")
+ .prereq(iqInstsAdded);
+
+ iqNonSpecInstsAdded
+ .name(name() + ".iqNonSpecInstsAdded")
+ .desc("Number of non-speculative instructions added to the IQ")
+ .prereq(iqNonSpecInstsAdded);
+
- // iqFloatInstsAdded;
-
++ iqInstsIssued
++ .name(name() + ".iqInstsIssued")
++ .desc("Number of instructions issued")
++ .prereq(iqInstsIssued);
+
+ iqIntInstsIssued
+ .name(name() + ".iqIntInstsIssued")
+ .desc("Number of integer instructions issued")
+ .prereq(iqIntInstsIssued);
+
- // iqBranchInstsAdded;
-
+ iqFloatInstsIssued
+ .name(name() + ".iqFloatInstsIssued")
+ .desc("Number of float instructions issued")
+ .prereq(iqFloatInstsIssued);
+
- // iqMemInstsAdded;
-
+ iqBranchInstsIssued
+ .name(name() + ".iqBranchInstsIssued")
+ .desc("Number of branch instructions issued")
+ .prereq(iqBranchInstsIssued);
+
- // iqMiscInstsAdded;
-
+ iqMemInstsIssued
+ .name(name() + ".iqMemInstsIssued")
+ .desc("Number of memory instructions issued")
+ .prereq(iqMemInstsIssued);
+
- iqLoopSquashStalls
- .name(name() + ".iqLoopSquashStalls")
- .desc("Number of times issue loop had to restart due to squashed "
- "inst; mainly for profiling")
- .prereq(iqLoopSquashStalls);
-
+ iqMiscInstsIssued
+ .name(name() + ".iqMiscInstsIssued")
+ .desc("Number of miscellaneous instructions issued")
+ .prereq(iqMiscInstsIssued);
+
+ iqSquashedInstsIssued
+ .name(name() + ".iqSquashedInstsIssued")
+ .desc("Number of squashed instructions issued")
+ .prereq(iqSquashedInstsIssued);
+
- // Tell mem dependence unit to reg stats as well.
- memDepUnit.regStats();
+ iqSquashedInstsExamined
+ .name(name() + ".iqSquashedInstsExamined")
+ .desc("Number of squashed instructions iterated over during squash;"
+ " mainly for profiling")
+ .prereq(iqSquashedInstsExamined);
+
+ iqSquashedOperandsExamined
+ .name(name() + ".iqSquashedOperandsExamined")
+ .desc("Number of squashed operands that are examined and possibly "
+ "removed from graph")
+ .prereq(iqSquashedOperandsExamined);
+
+ iqSquashedNonSpecRemoved
+ .name(name() + ".iqSquashedNonSpecRemoved")
+ .desc("Number of squashed non-spec instructions that were removed")
+ .prereq(iqSquashedNonSpecRemoved);
+
- InstructionQueue<Impl>::setCPU(FullCPU *cpu_ptr)
++ queueResDist
++ .init(Num_OpClasses, 0, 99, 2)
++ .name(name() + ".IQ:residence:")
++ .desc("cycles from dispatch to issue")
++ .flags(total | pdf | cdf )
++ ;
++ for (int i = 0; i < Num_OpClasses; ++i) {
++ queueResDist.subname(i, opClassStrings[i]);
++ }
++ numIssuedDist
++ .init(0,totalWidth,1)
++ .name(name() + ".ISSUE:issued_per_cycle")
++ .desc("Number of insts issued each cycle")
++ .flags(pdf)
++ ;
++/*
++ dist_unissued
++ .init(Num_OpClasses+2)
++ .name(name() + ".ISSUE:unissued_cause")
++ .desc("Reason ready instruction not issued")
++ .flags(pdf | dist)
++ ;
++ for (int i=0; i < (Num_OpClasses + 2); ++i) {
++ dist_unissued.subname(i, unissued_names[i]);
++ }
++*/
++ statIssuedInstType
++ .init(numThreads,Num_OpClasses)
++ .name(name() + ".ISSUE:FU_type")
++ .desc("Type of FU issued")
++ .flags(total | pdf | dist)
++ ;
++ statIssuedInstType.ysubnames(opClassStrings);
++
++ //
++ // How long did instructions for a particular FU type wait prior to issue
++ //
++
++ issueDelayDist
++ .init(Num_OpClasses,0,99,2)
++ .name(name() + ".ISSUE:")
++ .desc("cycles from operands ready to issue")
++ .flags(pdf | cdf)
++ ;
++
++ for (int i=0; i<Num_OpClasses; ++i) {
++ stringstream subname;
++ subname << opClassStrings[i] << "_delay";
++ issueDelayDist.subname(i, subname.str());
++ }
++
++ issueRate
++ .name(name() + ".ISSUE:rate")
++ .desc("Inst issue rate")
++ .flags(total)
++ ;
++ issueRate = iqInstsIssued / cpu->numCycles;
++/*
++ issue_stores
++ .name(name() + ".ISSUE:stores")
++ .desc("Number of stores issued")
++ .flags(total)
++ ;
++ issue_stores = exe_refs - exe_loads;
++*/
++/*
++ issue_op_rate
++ .name(name() + ".ISSUE:op_rate")
++ .desc("Operation issue rate")
++ .flags(total)
++ ;
++ issue_op_rate = issued_ops / numCycles;
++*/
++ statFuBusy
++ .init(Num_OpClasses)
++ .name(name() + ".ISSUE:fu_full")
++ .desc("attempts to use FU when none available")
++ .flags(pdf | dist)
++ ;
++ for (int i=0; i < Num_OpClasses; ++i) {
++ statFuBusy.subname(i, opClassStrings[i]);
++ }
++
++ fuBusy
++ .init(numThreads)
++ .name(name() + ".ISSUE:fu_busy_cnt")
++ .desc("FU busy when requested")
++ .flags(total)
++ ;
++
++ fuBusyRate
++ .name(name() + ".ISSUE:fu_busy_rate")
++ .desc("FU busy rate (busy events/executed inst)")
++ .flags(total)
++ ;
++ fuBusyRate = fuBusy / iqInstsIssued;
++
++ for ( int i=0; i < numThreads; i++) {
++ // Tell mem dependence unit to reg stats as well.
++ memDepUnit[i].regStats();
++ }
+}
+
+template <class Impl>
+void
- cpu = cpu_ptr;
++InstructionQueue<Impl>::resetState()
+{
- tail = cpu->instList.begin();
++ //Initialize thread IQ counts
++ for (int i = 0; i <numThreads; i++) {
++ count[i] = 0;
++ instList[i].clear();
++ }
++
++ // Initialize the number of free IQ entries.
++ freeEntries = numEntries;
++
++ // Note that in actuality, the registers corresponding to the logical
++ // registers start off as ready. However this doesn't matter for the
++ // IQ as the instruction should have been correctly told if those
++ // registers are ready in rename. Thus it can all be initialized as
++ // unready.
++ for (int i = 0; i < numPhysRegs; ++i) {
++ regScoreboard[i] = false;
++ }
++
++ for (int i = 0; i < numThreads; ++i) {
++ squashedSeqNum[i] = 0;
++ }
++
++ for (int i = 0; i < Num_OpClasses; ++i) {
++ while (!readyInsts[i].empty())
++ readyInsts[i].pop();
++ queueOnList[i] = false;
++ readyIt[i] = listOrder.end();
++ }
++ nonSpecInsts.clear();
++ listOrder.clear();
++}
+
- InstructionQueue<Impl>::setIssueToExecuteQueue(
- TimeBuffer<IssueStruct> *i2e_ptr)
++template <class Impl>
++void
++InstructionQueue<Impl>::setActiveThreads(list<unsigned> *at_ptr)
++{
++ DPRINTF(IQ, "Setting active threads list pointer.\n");
++ activeThreads = at_ptr;
+}
+
+template <class Impl>
+void
- DPRINTF(IQ, "IQ: Set the issue to execute queue.\n");
++InstructionQueue<Impl>::setIssueToExecuteQueue(TimeBuffer<IssueStruct> *i2e_ptr)
+{
- DPRINTF(IQ, "IQ: Set the time buffer.\n");
++ DPRINTF(IQ, "Set the issue to execute queue.\n");
+ issueToExecuteQueue = i2e_ptr;
+}
+
+template <class Impl>
+void
+InstructionQueue<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr)
+{
- DPRINTF(IQ, "IQ: Adding instruction PC %#x to the IQ.\n",
- new_inst->readPC());
++ DPRINTF(IQ, "Set the time buffer.\n");
+ timeBuffer = tb_ptr;
+
+ fromCommit = timeBuffer->getWire(-commitToIEWDelay);
+}
+
++template <class Impl>
++void
++InstructionQueue<Impl>::switchOut()
++{
++ resetState();
++ dependGraph.reset();
++ switchedOut = true;
++ for (int i = 0; i < numThreads; ++i) {
++ memDepUnit[i].switchOut();
++ }
++}
++
++template <class Impl>
++void
++InstructionQueue<Impl>::takeOverFrom()
++{
++ switchedOut = false;
++}
++
++template <class Impl>
++int
++InstructionQueue<Impl>::entryAmount(int num_threads)
++{
++ if (iqPolicy == Partitioned) {
++ return numEntries / num_threads;
++ } else {
++ return 0;
++ }
++}
++
++
++template <class Impl>
++void
++InstructionQueue<Impl>::resetEntries()
++{
++ if (iqPolicy != Dynamic || numThreads > 1) {
++ int active_threads = (*activeThreads).size();
++
++ list<unsigned>::iterator threads = (*activeThreads).begin();
++ list<unsigned>::iterator list_end = (*activeThreads).end();
++
++ while (threads != list_end) {
++ if (iqPolicy == Partitioned) {
++ maxEntries[*threads++] = numEntries / active_threads;
++ } else if(iqPolicy == Threshold && active_threads == 1) {
++ maxEntries[*threads++] = numEntries;
++ }
++ }
++ }
++}
++
+template <class Impl>
+unsigned
+InstructionQueue<Impl>::numFreeEntries()
+{
+ return freeEntries;
+}
+
++template <class Impl>
++unsigned
++InstructionQueue<Impl>::numFreeEntries(unsigned tid)
++{
++ return maxEntries[tid] - count[tid];
++}
++
+// Might want to do something more complex if it knows how many instructions
+// will be issued this cycle.
+template <class Impl>
+bool
+InstructionQueue<Impl>::isFull()
+{
+ if (freeEntries == 0) {
+ return(true);
+ } else {
+ return(false);
+ }
+}
+
++template <class Impl>
++bool
++InstructionQueue<Impl>::isFull(unsigned tid)
++{
++ if (numFreeEntries(tid) == 0) {
++ return(true);
++ } else {
++ return(false);
++ }
++}
++
++template <class Impl>
++bool
++InstructionQueue<Impl>::hasReadyInsts()
++{
++ if (!listOrder.empty()) {
++ return true;
++ }
++
++ for (int i = 0; i < Num_OpClasses; ++i) {
++ if (!readyInsts[i].empty()) {
++ return true;
++ }
++ }
++
++ return false;
++}
++
+template <class Impl>
+void
+InstructionQueue<Impl>::insert(DynInstPtr &new_inst)
+{
+ // Make sure the instruction is valid
+ assert(new_inst);
+
- // Check if there are any free entries. Panic if there are none.
- // Might want to have this return a fault in the future instead of
- // panicing.
++ DPRINTF(IQ, "Adding instruction [sn:%lli] PC %#x to the IQ.\n",
++ new_inst->seqNum, new_inst->readPC());
+
- // If the IQ currently has nothing in it, then there's a possibility
- // that the tail iterator is invalid (might have been pointing at an
- // instruction that was retired). Reset the tail iterator.
- if (freeEntries == numEntries) {
- tail = cpu->instList.begin();
- }
-
- // Move the tail iterator. Instructions may not have been issued
- // to the IQ, so we may have to increment the iterator more than once.
- while ((*tail) != new_inst) {
- tail++;
-
- // Make sure the tail iterator points at something legal.
- assert(tail != cpu->instList.end());
- }
-
+ assert(freeEntries != 0);
+
- // Decrease the number of free entries.
++ instList[new_inst->threadNumber].push_back(new_inst);
+
- createDependency(new_inst);
+ --freeEntries;
+
++ new_inst->setInIQ();
++
+ // Look through its source registers (physical regs), and mark any
+ // dependencies.
+ addToDependents(new_inst);
+
+ // Have this instruction set itself as the producer of its destination
+ // register(s).
- // If it's a memory instruction, add it to the memory dependency
- // unit.
++ addToProducers(new_inst);
+
- memDepUnit.insert(new_inst);
- // Uh..forgot to look it up and put it on the proper dependency list
- // if the instruction should not go yet.
+ if (new_inst->isMemRef()) {
- // If the instruction is ready then add it to the ready list.
++ memDepUnit[new_inst->threadNumber].insert(new_inst);
+ } else {
- InstructionQueue<Impl>::insertNonSpec(DynInstPtr &inst)
+ addIfReady(new_inst);
+ }
+
+ ++iqInstsAdded;
+
++ count[new_inst->threadNumber]++;
++
+ assert(freeEntries == (numEntries - countInsts()));
+}
+
+template <class Impl>
+void
- nonSpecInsts[inst->seqNum] = inst;
-
++InstructionQueue<Impl>::insertNonSpec(DynInstPtr &new_inst)
+{
- // Make sure the instruction is valid
- assert(inst);
-
- DPRINTF(IQ, "IQ: Adding instruction PC %#x to the IQ.\n",
- inst->readPC());
+ // @todo: Clean up this code; can do it by setting inst as unable
+ // to issue, then calling normal insert on the inst.
+
- // Check if there are any free entries. Panic if there are none.
- // Might want to have this return a fault in the future instead of
- // panicing.
- assert(freeEntries != 0);
++ assert(new_inst);
+
- // If the IQ currently has nothing in it, then there's a possibility
- // that the tail iterator is invalid (might have been pointing at an
- // instruction that was retired). Reset the tail iterator.
- if (freeEntries == numEntries) {
- tail = cpu->instList.begin();
- }
++ nonSpecInsts[new_inst->seqNum] = new_inst;
+
- // Move the tail iterator. Instructions may not have been issued
- // to the IQ, so we may have to increment the iterator more than once.
- while ((*tail) != inst) {
- tail++;
++ DPRINTF(IQ, "Adding non-speculative instruction [sn:%lli] PC %#x "
++ "to the IQ.\n",
++ new_inst->seqNum, new_inst->readPC());
+
- // Make sure the tail iterator points at something legal.
- assert(tail != cpu->instList.end());
- }
++ assert(freeEntries != 0);
+
- // Decrease the number of free entries.
++ instList[new_inst->threadNumber].push_back(new_inst);
+
- createDependency(inst);
+ --freeEntries;
+
++ new_inst->setInIQ();
++
+ // Have this instruction set itself as the producer of its destination
+ // register(s).
- if (inst->isMemRef()) {
- memDepUnit.insertNonSpec(inst);
++ addToProducers(new_inst);
+
+ // If it's a memory instruction, add it to the memory dependency
+ // unit.
- // Slightly hack function to advance the tail iterator in the case that
- // the IEW stage issues an instruction that is not added to the IQ. This
- // is needed in case a long chain of such instructions occurs.
- // I don't think this is used anymore.
++ if (new_inst->isMemRef()) {
++ memDepUnit[new_inst->threadNumber].insertNonSpec(new_inst);
+ }
+
+ ++iqNonSpecInstsAdded;
++
++ count[new_inst->threadNumber]++;
++
++ assert(freeEntries == (numEntries - countInsts()));
+}
+
- InstructionQueue<Impl>::advanceTail(DynInstPtr &inst)
+template <class Impl>
+void
- // Make sure the instruction is valid
- assert(inst);
-
- DPRINTF(IQ, "IQ: Adding instruction PC %#x to the IQ.\n",
- inst->readPC());
-
- // Check if there are any free entries. Panic if there are none.
- // Might want to have this return a fault in the future instead of
- // panicing.
- assert(freeEntries != 0);
-
- // If the IQ currently has nothing in it, then there's a possibility
- // that the tail iterator is invalid (might have been pointing at an
- // instruction that was retired). Reset the tail iterator.
- if (freeEntries == numEntries) {
- tail = cpu->instList.begin();
- }
-
- // Move the tail iterator. Instructions may not have been issued
- // to the IQ, so we may have to increment the iterator more than once.
- while ((*tail) != inst) {
- tail++;
-
- // Make sure the tail iterator points at something legal.
- assert(tail != cpu->instList.end());
- }
++InstructionQueue<Impl>::insertBarrier(DynInstPtr &barr_inst)
+{
- assert(freeEntries <= numEntries);
++ memDepUnit[barr_inst->threadNumber].insertBarrier(barr_inst);
+
- // Have this instruction set itself as the producer of its destination
- // register(s).
- createDependency(inst);
++ insertNonSpec(barr_inst);
++}
+
- // Need to make sure the number of float and integer instructions
- // issued does not exceed the total issue bandwidth.
- // @todo: Figure out a better way to remove the squashed items from the
- // lists. Checking the top item of each list to see if it's squashed
- // wastes time and forces jumps.
++template <class Impl>
++typename Impl::DynInstPtr
++InstructionQueue<Impl>::getInstToExecute()
++{
++ assert(!instsToExecute.empty());
++ DynInstPtr inst = instsToExecute.front();
++ instsToExecute.pop_front();
++ return inst;
+}
+
- InstructionQueue<Impl>::scheduleReadyInsts()
+template <class Impl>
+void
- DPRINTF(IQ, "IQ: Attempting to schedule ready instructions from "
- "the IQ.\n");
-
- int int_issued = 0;
- int float_issued = 0;
- int branch_issued = 0;
- int memory_issued = 0;
- int squashed_issued = 0;
- int total_issued = 0;
-
- IssueStruct *i2e_info = issueToExecuteQueue->access(0);
++InstructionQueue<Impl>::addToOrderList(OpClass op_class)
+{
- bool insts_available = !readyBranchInsts.empty() ||
- !readyIntInsts.empty() ||
- !readyFloatInsts.empty() ||
- !memDepUnit.empty() ||
- !readyMiscInsts.empty() ||
- !squashedInsts.empty();
-
- // Note: Requires a globally defined constant.
- InstSeqNum oldest_inst = MaxInstSeqNum;
- InstList list_with_oldest = None;
-
- // Temporary values.
- DynInstPtr int_head_inst;
- DynInstPtr float_head_inst;
- DynInstPtr branch_head_inst;
- DynInstPtr mem_head_inst;
- DynInstPtr misc_head_inst;
- DynInstPtr squashed_head_inst;
-
- // Somewhat nasty code to look at all of the lists where issuable
- // instructions are located, and choose the oldest instruction among
- // those lists. Consider a rewrite in the future.
- while (insts_available && total_issued < totalWidth)
- {
- // Set this to false. Each if-block is required to set it to true
- // if there were instructions available this check. This will cause
- // this loop to run once more than necessary, but avoids extra calls.
- insts_available = false;
-
- oldest_inst = MaxInstSeqNum;
-
- list_with_oldest = None;
-
- if (!readyIntInsts.empty() &&
- int_issued < intWidth) {
++ assert(!readyInsts[op_class].empty());
+
- insts_available = true;
++ ListOrderEntry queue_entry;
+
- int_head_inst = readyIntInsts.top();
++ queue_entry.queueType = op_class;
+
- if (int_head_inst->isSquashed()) {
- readyIntInsts.pop();
++ queue_entry.oldestInst = readyInsts[op_class].top()->seqNum;
+
- ++iqLoopSquashStalls;
-
- continue;
- }
-
- oldest_inst = int_head_inst->seqNum;
-
- list_with_oldest = Int;
++ ListOrderIt list_it = listOrder.begin();
++ ListOrderIt list_end_it = listOrder.end();
+
- if (!readyFloatInsts.empty() &&
- float_issued < floatWidth) {
-
- insts_available = true;
-
- float_head_inst = readyFloatInsts.top();
-
- if (float_head_inst->isSquashed()) {
- readyFloatInsts.pop();
-
- ++iqLoopSquashStalls;
++ while (list_it != list_end_it) {
++ if ((*list_it).oldestInst > queue_entry.oldestInst) {
++ break;
+ }
+
- continue;
- } else if (float_head_inst->seqNum < oldest_inst) {
- oldest_inst = float_head_inst->seqNum;
++ list_it++;
++ }
+
- list_with_oldest = Float;
- }
- }
++ readyIt[op_class] = listOrder.insert(list_it, queue_entry);
++ queueOnList[op_class] = true;
++}
+
- if (!readyBranchInsts.empty() &&
- branch_issued < branchWidth) {
++template <class Impl>
++void
++InstructionQueue<Impl>::moveToYoungerInst(ListOrderIt list_order_it)
++{
++ // Get iterator of next item on the list
++ // Delete the original iterator
++ // Determine if the next item is either the end of the list or younger
++ // than the new instruction. If so, then add in a new iterator right here.
++ // If not, then move along.
++ ListOrderEntry queue_entry;
++ OpClass op_class = (*list_order_it).queueType;
++ ListOrderIt next_it = list_order_it;
++
++ ++next_it;
++
++ queue_entry.queueType = op_class;
++ queue_entry.oldestInst = readyInsts[op_class].top()->seqNum;
++
++ while (next_it != listOrder.end() &&
++ (*next_it).oldestInst < queue_entry.oldestInst) {
++ ++next_it;
++ }
+
- insts_available = true;
++ readyIt[op_class] = listOrder.insert(next_it, queue_entry);
++}
+
- branch_head_inst = readyBranchInsts.top();
++template <class Impl>
++void
++InstructionQueue<Impl>::processFUCompletion(DynInstPtr &inst, int fu_idx)
++{
++ // The CPU could have been sleeping until this op completed (*extremely*
++ // long latency op). Wake it if it was. This may be overkill.
++ if (isSwitchedOut()) {
++ return;
++ }
+
- if (branch_head_inst->isSquashed()) {
- readyBranchInsts.pop();
++ iewStage->wakeCPU();
+
- ++iqLoopSquashStalls;
++ if (fu_idx > -1)
++ fuPool->freeUnitNextCycle(fu_idx);
+
- continue;
- } else if (branch_head_inst->seqNum < oldest_inst) {
- oldest_inst = branch_head_inst->seqNum;
++ // @todo: Ensure that these FU Completions happen at the beginning
++ // of a cycle, otherwise they could add too many instructions to
++ // the queue.
++ // @todo: This could break if there's multiple multi-cycle ops
++ // finishing on this cycle. Maybe implement something like
++ // instToCommit in iew_impl.hh.
++ issueToExecuteQueue->access(0)->size++;
++ instsToExecute.push_back(inst);
++// int &size = issueToExecuteQueue->access(0)->size;
+
- list_with_oldest = Branch;
- }
++// issueToExecuteQueue->access(0)->insts[size++] = inst;
++}
+
- }
++// @todo: Figure out a better way to remove the squashed items from the
++// lists. Checking the top item of each list to see if it's squashed
++// wastes time and forces jumps.
++template <class Impl>
++void
++InstructionQueue<Impl>::scheduleReadyInsts()
++{
++ DPRINTF(IQ, "Attempting to schedule ready instructions from "
++ "the IQ.\n");
+
- if (!memDepUnit.empty() &&
- memory_issued < memoryWidth) {
++ IssueStruct *i2e_info = issueToExecuteQueue->access(0);
+
- insts_available = true;
++ // Have iterator to head of the list
++ // While I haven't exceeded bandwidth or reached the end of the list,
++ // Try to get a FU that can do what this op needs.
++ // If successful, change the oldestInst to the new top of the list, put
++ // the queue in the proper place in the list.
++ // Increment the iterator.
++ // This will avoid trying to schedule a certain op class if there are no
++ // FUs that handle it.
++ ListOrderIt order_it = listOrder.begin();
++ ListOrderIt order_end_it = listOrder.end();
++ int total_issued = 0;
+
- mem_head_inst = memDepUnit.top();
++ while (total_issued < totalWidth &&
++ order_it != order_end_it) {
++ OpClass op_class = (*order_it).queueType;
+
- if (mem_head_inst->isSquashed()) {
- memDepUnit.pop();
++ assert(!readyInsts[op_class].empty());
+
- ++iqLoopSquashStalls;
++ DynInstPtr issuing_inst = readyInsts[op_class].top();
+
- continue;
- } else if (mem_head_inst->seqNum < oldest_inst) {
- oldest_inst = mem_head_inst->seqNum;
++ assert(issuing_inst->seqNum == (*order_it).oldestInst);
+
- list_with_oldest = Memory;
++ if (issuing_inst->isSquashed()) {
++ readyInsts[op_class].pop();
+
- }
-
- if (!readyMiscInsts.empty()) {
++ if (!readyInsts[op_class].empty()) {
++ moveToYoungerInst(order_it);
++ } else {
++ readyIt[op_class] = listOrder.end();
++ queueOnList[op_class] = false;
+ }
- insts_available = true;
+
- misc_head_inst = readyMiscInsts.top();
++ listOrder.erase(order_it++);
+
- if (misc_head_inst->isSquashed()) {
- readyMiscInsts.pop();
-
- ++iqLoopSquashStalls;
-
- continue;
- } else if (misc_head_inst->seqNum < oldest_inst) {
- oldest_inst = misc_head_inst->seqNum;
-
- list_with_oldest = Misc;
- }
++ ++iqSquashedInstsIssued;
+
- if (!squashedInsts.empty()) {
++ continue;
+ }
+
- insts_available = true;
++ int idx = -2;
++ int op_latency = 1;
++ int tid = issuing_inst->threadNumber;
+
- squashed_head_inst = squashedInsts.top();
-
- if (squashed_head_inst->seqNum < oldest_inst) {
- list_with_oldest = Squashed;
++ if (op_class != No_OpClass) {
++ idx = fuPool->getUnit(op_class);
+
-
++ if (idx > -1) {
++ op_latency = fuPool->getOpLatency(op_class);
+ }
- DynInstPtr issuing_inst = NULL;
-
- switch (list_with_oldest) {
- case None:
- DPRINTF(IQ, "IQ: Not able to schedule any instructions. Issuing "
- "inst is %#x.\n", issuing_inst);
- break;
+ }
+
- case Int:
- issuing_inst = int_head_inst;
- readyIntInsts.pop();
- ++int_issued;
- DPRINTF(IQ, "IQ: Issuing integer instruction PC %#x.\n",
- issuing_inst->readPC());
- break;
-
- case Float:
- issuing_inst = float_head_inst;
- readyFloatInsts.pop();
- ++float_issued;
- DPRINTF(IQ, "IQ: Issuing float instruction PC %#x.\n",
- issuing_inst->readPC());
- break;
-
- case Branch:
- issuing_inst = branch_head_inst;
- readyBranchInsts.pop();
- ++branch_issued;
- DPRINTF(IQ, "IQ: Issuing branch instruction PC %#x.\n",
- issuing_inst->readPC());
- break;
-
- case Memory:
- issuing_inst = mem_head_inst;
++ if (idx == -2 || idx != -1) {
++ if (op_latency == 1) {
++// i2e_info->insts[exec_queue_slot++] = issuing_inst;
++ i2e_info->size++;
++ instsToExecute.push_back(issuing_inst);
+
- memDepUnit.pop();
- ++memory_issued;
- DPRINTF(IQ, "IQ: Issuing memory instruction PC %#x.\n",
- issuing_inst->readPC());
- break;
++ // Add the FU onto the list of FU's to be freed next
++ // cycle if we used one.
++ if (idx >= 0)
++ fuPool->freeUnitNextCycle(idx);
++ } else {
++ int issue_latency = fuPool->getIssueLatency(op_class);
++ // Generate completion event for the FU
++ FUCompletion *execution = new FUCompletion(issuing_inst,
++ idx, this);
++
++ execution->schedule(curTick + cpu->cycles(issue_latency - 1));
++
++ // @todo: Enforce that issue_latency == 1 or op_latency
++ if (issue_latency > 1) {
++ execution->setFreeFU();
++ } else {
++ // @todo: Not sure I'm accounting for the
++ // multi-cycle op in a pipelined FU properly, or
++ // the number of instructions issued in one cycle.
++// i2e_info->insts[exec_queue_slot++] = issuing_inst;
++// i2e_info->size++;
++
++ // Add the FU onto the list of FU's to be freed next cycle.
++ fuPool->freeUnitNextCycle(idx);
++ }
++ }
+
- case Misc:
- issuing_inst = misc_head_inst;
- readyMiscInsts.pop();
++ DPRINTF(IQ, "Thread %i: Issuing instruction PC %#x "
++ "[sn:%lli]\n",
++ tid, issuing_inst->readPC(),
++ issuing_inst->seqNum);
+
- ++iqMiscInstsIssued;
++ readyInsts[op_class].pop();
+
- DPRINTF(IQ, "IQ: Issuing a miscellaneous instruction PC %#x.\n",
- issuing_inst->readPC());
- break;
++ if (!readyInsts[op_class].empty()) {
++ moveToYoungerInst(order_it);
++ } else {
++ readyIt[op_class] = listOrder.end();
++ queueOnList[op_class] = false;
++ }
+
- case Squashed:
- assert(0 && "Squashed insts should not issue any more!");
- squashedInsts.pop();
- // Set the squashed instruction as able to commit so that commit
- // can just drop it from the ROB. This is a bit faked.
- ++squashed_issued;
- ++freeEntries;
++ issuing_inst->setIssued();
++ ++total_issued;
+
- DPRINTF(IQ, "IQ: Issuing squashed instruction PC %#x.\n",
- squashed_head_inst->readPC());
- break;
++ if (!issuing_inst->isMemRef()) {
++ // Memory instructions can not be freed from the IQ until they
++ // complete.
++ ++freeEntries;
++ count[tid]--;
++ issuing_inst->removeInIQ();
++ } else {
++ memDepUnit[tid].issue(issuing_inst);
++ }
+
- if (list_with_oldest != None && list_with_oldest != Squashed) {
- i2e_info->insts[total_issued] = issuing_inst;
- i2e_info->size++;
-
- issuing_inst->setIssued();
-
- ++freeEntries;
- ++total_issued;
- }
++ listOrder.erase(order_it++);
++ statIssuedInstType[tid][op_class]++;
++ } else {
++ statFuBusy[op_class]++;
++ fuBusy[tid]++;
++ ++order_it;
+ }
++ }
+
- assert(freeEntries == (numEntries - countInsts()));
++ numIssuedDist.sample(total_issued);
++ iqInstsIssued+= total_issued;
+
-
- iqIntInstsIssued += int_issued;
- iqFloatInstsIssued += float_issued;
- iqBranchInstsIssued += branch_issued;
- iqMemInstsIssued += memory_issued;
- iqSquashedInstsIssued += squashed_issued;
++ if (total_issued) {
++ cpu->activityThisCycle();
++ } else {
++ DPRINTF(IQ, "Not able to schedule any instructions.\n");
+ }
- DPRINTF(IQ, "IQ: Marking nonspeculative instruction with sequence "
- "number %i as ready to execute.\n", inst);
+}
+
+template <class Impl>
+void
+InstructionQueue<Impl>::scheduleNonSpec(const InstSeqNum &inst)
+{
- non_spec_it_t inst_it = nonSpecInsts.find(inst);
++ DPRINTF(IQ, "Marking nonspeculative instruction [sn:%lli] as ready "
++ "to execute.\n", inst);
+
- // Mark this instruction as ready to issue.
++ NonSpecMapIt inst_it = nonSpecInsts.find(inst);
+
+ assert(inst_it != nonSpecInsts.end());
+
- // Now schedule the instruction.
++ unsigned tid = (*inst_it).second->threadNumber;
++
+ (*inst_it).second->setCanIssue();
+
- memDepUnit.nonSpecInstReady((*inst_it).second);
+ if (!(*inst_it).second->isMemRef()) {
+ addIfReady((*inst_it).second);
+ } else {
- DPRINTF(IQ, "IQ: Waking dependents of completed instruction.\n");
- //Look at the physical destination register of the DynInst
- //and look it up on the dependency graph. Then mark as ready
- //any instructions within the instruction queue.
- DependencyEntry *curr;
++ memDepUnit[tid].nonSpecInstReady((*inst_it).second);
+ }
+
++ (*inst_it).second = NULL;
++
+ nonSpecInsts.erase(inst_it);
+}
+
+template <class Impl>
+void
++InstructionQueue<Impl>::commit(const InstSeqNum &inst, unsigned tid)
++{
++ DPRINTF(IQ, "[tid:%i]: Committing instructions older than [sn:%i]\n",
++ tid,inst);
++
++ ListIt iq_it = instList[tid].begin();
++
++ while (iq_it != instList[tid].end() &&
++ (*iq_it)->seqNum <= inst) {
++ ++iq_it;
++ instList[tid].pop_front();
++ }
++
++ assert(freeEntries == (numEntries - countInsts()));
++}
++
++template <class Impl>
++int
+InstructionQueue<Impl>::wakeDependents(DynInstPtr &completed_inst)
+{
- // Tell the memory dependence unit to wake any dependents on this
- // instruction if it is a memory instruction.
++ int dependents = 0;
+
- memDepUnit.wakeDependents(completed_inst);
++ DPRINTF(IQ, "Waking dependents of completed instruction.\n");
+
++ assert(!completed_inst->isSquashed());
++
++ // Tell the memory dependence unit to wake any dependents on this
++ // instruction if it is a memory instruction. Also complete the memory
++ // instruction at this point since we know it executed without issues.
++ // @todo: Might want to rename "completeMemInst" to something that
++ // indicates that it won't need to be replayed, and call this
++ // earlier. Might not be a big deal.
+ if (completed_inst->isMemRef()) {
- DPRINTF(IQ, "IQ: Waking any dependents on register %i.\n",
++ memDepUnit[completed_inst->threadNumber].wakeDependents(completed_inst);
++ completeMemInst(completed_inst);
++ } else if (completed_inst->isMemBarrier() ||
++ completed_inst->isWriteBarrier()) {
++ memDepUnit[completed_inst->threadNumber].completeBarrier(completed_inst);
+ }
+
+ for (int dest_reg_idx = 0;
+ dest_reg_idx < completed_inst->numDestRegs();
+ dest_reg_idx++)
+ {
+ PhysRegIndex dest_reg =
+ completed_inst->renamedDestRegIdx(dest_reg_idx);
+
+ // Special case of uniq or control registers. They are not
+ // handled by the IQ and thus have no dependency graph entry.
+ // @todo Figure out a cleaner way to handle this.
+ if (dest_reg >= numPhysRegs) {
+ continue;
+ }
+
- //Maybe abstract this part into a function.
- //Go through the dependency chain, marking the registers as ready
- //within the waiting instructions.
- while (dependGraph[dest_reg].next) {
-
- curr = dependGraph[dest_reg].next;
++ DPRINTF(IQ, "Waking any dependents on register %i.\n",
+ (int) dest_reg);
+
- DPRINTF(IQ, "IQ: Waking up a dependent instruction, PC%#x.\n",
- curr->inst->readPC());
++ //Go through the dependency chain, marking the registers as
++ //ready within the waiting instructions.
++ DynInstPtr dep_inst = dependGraph.pop(dest_reg);
+
- // so that it knows which of its source registers is ready.
- // However that would mean that the dependency graph entries
- // would need to hold the src_reg_idx.
- curr->inst->markSrcRegReady();
++ while (dep_inst) {
++ DPRINTF(IQ, "Waking up a dependent instruction, PC%#x.\n",
++ dep_inst->readPC());
+
+ // Might want to give more information to the instruction
- addIfReady(curr->inst);
++ // so that it knows which of its source registers is
++ // ready. However that would mean that the dependency
++ // graph entries would need to hold the src_reg_idx.
++ dep_inst->markSrcRegReady();
+
- dependGraph[dest_reg].next = curr->next;
++ addIfReady(dep_inst);
+
- DependencyEntry::mem_alloc_counter--;
-
- curr->inst = NULL;
-
- delete curr;
++ dep_inst = dependGraph.pop(dest_reg);
+
- // Reset the head node now that all of its dependents have been woken
- // up.
- dependGraph[dest_reg].next = NULL;
- dependGraph[dest_reg].inst = NULL;
++ ++dependents;
+ }
+
- memDepUnit.violation(store, faulting_load);
++ // Reset the head node now that all of its dependents have
++ // been woken up.
++ assert(dependGraph.empty(dest_reg));
++ dependGraph.clearInst(dest_reg);
+
+ // Mark the scoreboard as having that register ready.
+ regScoreboard[dest_reg] = true;
+ }
++ return dependents;
++}
++
++template <class Impl>
++void
++InstructionQueue<Impl>::addReadyMemInst(DynInstPtr &ready_inst)
++{
++ OpClass op_class = ready_inst->opClass();
++
++ readyInsts[op_class].push(ready_inst);
++
++ // Will need to reorder the list if either a queue is not on the list,
++ // or it has an older instruction than last time.
++ if (!queueOnList[op_class]) {
++ addToOrderList(op_class);
++ } else if (readyInsts[op_class].top()->seqNum <
++ (*readyIt[op_class]).oldestInst) {
++ listOrder.erase(readyIt[op_class]);
++ addToOrderList(op_class);
++ }
++
++ DPRINTF(IQ, "Instruction is ready to issue, putting it onto "
++ "the ready list, PC %#x opclass:%i [sn:%lli].\n",
++ ready_inst->readPC(), op_class, ready_inst->seqNum);
++}
++
++template <class Impl>
++void
++InstructionQueue<Impl>::rescheduleMemInst(DynInstPtr &resched_inst)
++{
++ memDepUnit[resched_inst->threadNumber].reschedule(resched_inst);
++}
++
++template <class Impl>
++void
++InstructionQueue<Impl>::replayMemInst(DynInstPtr &replay_inst)
++{
++ memDepUnit[replay_inst->threadNumber].replay(replay_inst);
++}
++
++template <class Impl>
++void
++InstructionQueue<Impl>::completeMemInst(DynInstPtr &completed_inst)
++{
++ int tid = completed_inst->threadNumber;
++
++ DPRINTF(IQ, "Completing mem instruction PC:%#x [sn:%lli]\n",
++ completed_inst->readPC(), completed_inst->seqNum);
++
++ ++freeEntries;
++
++ completed_inst->memOpDone = true;
++
++ memDepUnit[tid].completed(completed_inst);
++
++ count[tid]--;
+}
+
+template <class Impl>
+void
+InstructionQueue<Impl>::violation(DynInstPtr &store,
+ DynInstPtr &faulting_load)
+{
- InstructionQueue<Impl>::squash()
++ memDepUnit[store->threadNumber].violation(store, faulting_load);
+}
+
+template <class Impl>
+void
- DPRINTF(IQ, "IQ: Starting to squash instructions in the IQ.\n");
++InstructionQueue<Impl>::squash(unsigned tid)
+{
- squashedSeqNum = fromCommit->commitInfo.doneSeqNum;
-
- // Setup the squash iterator to point to the tail.
- squashIt = tail;
++ DPRINTF(IQ, "[tid:%i]: Starting to squash instructions in "
++ "the IQ.\n", tid);
+
+ // Read instruction sequence number of last instruction out of the
+ // time buffer.
- if (freeEntries != numEntries) {
- doSquash();
++ squashedSeqNum[tid] = fromCommit->commitInfo[tid].doneSeqNum;
+
+ // Call doSquash if there are insts in the IQ
- memDepUnit.squash(squashedSeqNum);
++ if (count[tid] > 0) {
++ doSquash(tid);
+ }
+
+ // Also tell the memory dependence unit to squash.
- InstructionQueue<Impl>::doSquash()
++ memDepUnit[tid].squash(squashedSeqNum[tid], tid);
+}
+
+template <class Impl>
+void
- // Make sure the squash iterator isn't pointing to nothing.
- assert(squashIt != cpu->instList.end());
- // Make sure the squashed sequence number is valid.
- assert(squashedSeqNum != 0);
++InstructionQueue<Impl>::doSquash(unsigned tid)
+{
- DPRINTF(IQ, "IQ: Squashing instructions in the IQ.\n");
++ // Start at the tail.
++ ListIt squash_it = instList[tid].end();
++ --squash_it;
+
- while ((*squashIt)->seqNum > squashedSeqNum) {
- DynInstPtr squashed_inst = (*squashIt);
++ DPRINTF(IQ, "[tid:%i]: Squashing until sequence number %i!\n",
++ tid, squashedSeqNum[tid]);
+
+ // Squash any instructions younger than the squashed sequence number
+ // given.
- if (!squashed_inst->isIssued() &&
- !squashed_inst->isSquashedInIQ()) {
++ while (squash_it != instList[tid].end() &&
++ (*squash_it)->seqNum > squashedSeqNum[tid]) {
++
++ DynInstPtr squashed_inst = (*squash_it);
+
+ // Only handle the instruction if it actually is in the IQ and
+ // hasn't already been squashed in the IQ.
- // Hack for now: These below don't add themselves to the
- // dependency list, so don't try to remove them.
- if (!squashed_inst->isNonSpeculative()/* &&
- !squashed_inst->isStore()*/
- ) {
++ if (squashed_inst->threadNumber != tid ||
++ squashed_inst->isSquashedInIQ()) {
++ --squash_it;
++ continue;
++ }
++
++ if (!squashed_inst->isIssued() ||
++ (squashed_inst->isMemRef() &&
++ !squashed_inst->memOpDone)) {
+
+ // Remove the instruction from the dependency list.
- // Only remove it from the dependency graph if it was
- // placed there in the first place.
- // HACK: This assumes that instructions woken up from the
- // dependency chain aren't informed that a specific src
- // register has become ready. This may not always be true
- // in the future.
++ if (!squashed_inst->isNonSpeculative() &&
++ !squashed_inst->isStoreConditional() &&
++ !squashed_inst->isMemBarrier() &&
++ !squashed_inst->isWriteBarrier()) {
+
+ for (int src_reg_idx = 0;
+ src_reg_idx < squashed_inst->numSrcRegs();
+ src_reg_idx++)
+ {
+ PhysRegIndex src_reg =
+ squashed_inst->renamedSrcRegIdx(src_reg_idx);
+
- dependGraph[src_reg].remove(squashed_inst);
++ // Only remove it from the dependency graph if it
++ // was placed there in the first place.
++
++ // Instead of doing a linked list traversal, we
++ // can just remove these squashed instructions
++ // either at issue time, or when the register is
++ // overwritten. The only downside to this is it
++ // leaves more room for error.
++
+ if (!squashed_inst->isReadySrcRegIdx(src_reg_idx) &&
+ src_reg < numPhysRegs) {
-
- // Might want to remove producers as well.
++ dependGraph.remove(src_reg, squashed_inst);
+ }
+
++
+ ++iqSquashedOperandsExamined;
+ }
- nonSpecInsts[squashed_inst->seqNum] = NULL;
+ } else {
- nonSpecInsts.erase(squashed_inst->seqNum);
++ NonSpecMapIt ns_inst_it =
++ nonSpecInsts.find(squashed_inst->seqNum);
++ assert(ns_inst_it != nonSpecInsts.end());
++
++ (*ns_inst_it).second = NULL;
+
- // squashedInsts.push(squashed_inst);
++ nonSpecInsts.erase(ns_inst_it);
+
+ ++iqSquashedNonSpecRemoved;
+ }
+
+ // Might want to also clear out the head of the dependency graph.
+
+ // Mark it as squashed within the IQ.
+ squashed_inst->setSquashedInIQ();
+
- DPRINTF(IQ, "IQ: Instruction PC %#x squashed.\n",
- squashed_inst->readPC());
++ // @todo: Remove this hack where several statuses are set so the
++ // inst will flow through the rest of the pipeline.
+ squashed_inst->setIssued();
+ squashed_inst->setCanCommit();
++ squashed_inst->removeInIQ();
++
++ //Update Thread IQ Count
++ count[squashed_inst->threadNumber]--;
+
+ ++freeEntries;
+
- --squashIt;
++ DPRINTF(IQ, "[tid:%i]: Instruction [sn:%lli] PC %#x "
++ "squashed.\n",
++ tid, squashed_inst->seqNum, squashed_inst->readPC());
+ }
+
-
- assert(freeEntries <= numEntries);
-
- if (freeEntries == numEntries) {
- tail = cpu->instList.end();
- }
-
- }
-
- template <class Impl>
- void
- InstructionQueue<Impl>::stopSquash()
- {
- // Clear up the squash variables to ensure that squashing doesn't
- // get called improperly.
- squashedSeqNum = 0;
-
- squashIt = cpu->instList.end();
- }
-
- template <class Impl>
- void
- InstructionQueue<Impl>::DependencyEntry::insert(DynInstPtr &new_inst)
- {
- //Add this new, dependent instruction at the head of the dependency
- //chain.
-
- // First create the entry that will be added to the head of the
- // dependency chain.
- DependencyEntry *new_entry = new DependencyEntry;
- new_entry->next = this->next;
- new_entry->inst = new_inst;
-
- // Then actually add it to the chain.
- this->next = new_entry;
-
- ++mem_alloc_counter;
- }
-
- template <class Impl>
- void
- InstructionQueue<Impl>::DependencyEntry::remove(DynInstPtr &inst_to_remove)
- {
- DependencyEntry *prev = this;
- DependencyEntry *curr = this->next;
-
- // Make sure curr isn't NULL. Because this instruction is being
- // removed from a dependency list, it must have been placed there at
- // an earlier time. The dependency chain should not be empty,
- // unless the instruction dependent upon it is already ready.
- if (curr == NULL) {
- return;
- }
-
- // Find the instruction to remove within the dependency linked list.
- while(curr->inst != inst_to_remove)
- {
- prev = curr;
- curr = curr->next;
-
- assert(curr != NULL);
- }
-
- // Now remove this instruction from the list.
- prev->next = curr->next;
-
- --mem_alloc_counter;
-
- // Could push this off to the destructor of DependencyEntry
- curr->inst = NULL;
-
- delete curr;
++ instList[tid].erase(squash_it--);
+ ++iqSquashedInstsExamined;
+ }
- DPRINTF(IQ, "IQ: Instruction PC %#x has src reg %i that "
+}
+
+template <class Impl>
+bool
+InstructionQueue<Impl>::addToDependents(DynInstPtr &new_inst)
+{
+ // Loop through the instruction's source registers, adding
+ // them to the dependency list if they are not ready.
+ int8_t total_src_regs = new_inst->numSrcRegs();
+ bool return_val = false;
+
+ for (int src_reg_idx = 0;
+ src_reg_idx < total_src_regs;
+ src_reg_idx++)
+ {
+ // Only add it to the dependency graph if it's not ready.
+ if (!new_inst->isReadySrcRegIdx(src_reg_idx)) {
+ PhysRegIndex src_reg = new_inst->renamedSrcRegIdx(src_reg_idx);
+
+ // Check the IQ's scoreboard to make sure the register
+ // hasn't become ready while the instruction was in flight
+ // between stages. Only if it really isn't ready should
+ // it be added to the dependency graph.
+ if (src_reg >= numPhysRegs) {
+ continue;
+ } else if (regScoreboard[src_reg] == false) {
- dependGraph[src_reg].insert(new_inst);
++ DPRINTF(IQ, "Instruction PC %#x has src reg %i that "
+ "is being added to the dependency chain.\n",
+ new_inst->readPC(), src_reg);
+
- DPRINTF(IQ, "IQ: Instruction PC %#x has src reg %i that "
++ dependGraph.insert(src_reg, new_inst);
+
+ // Change the return value to indicate that something
+ // was added to the dependency graph.
+ return_val = true;
+ } else {
- new_inst->markSrcRegReady();
++ DPRINTF(IQ, "Instruction PC %#x has src reg %i that "
+ "became ready before it reached the IQ.\n",
+ new_inst->readPC(), src_reg);
+ // Mark a register ready within the instruction.
- InstructionQueue<Impl>::createDependency(DynInstPtr &new_inst)
++ new_inst->markSrcRegReady(src_reg_idx);
+ }
+ }
+ }
+
+ return return_val;
+}
+
+template <class Impl>
+void
- //Actually nothing really needs to be marked when an
- //instruction becomes the producer of a register's value,
- //but for convenience a ptr to the producing instruction will
- //be placed in the head node of the dependency links.
++InstructionQueue<Impl>::addToProducers(DynInstPtr &new_inst)
+{
- dependGraph[dest_reg].inst = new_inst;
-
- if (dependGraph[dest_reg].next) {
- dumpDependGraph();
- panic("IQ: Dependency graph not empty!");
++ // Nothing really needs to be marked when an instruction becomes
++ // the producer of a register's value, but for convenience a ptr
++ // to the producing instruction will be placed in the head node of
++ // the dependency links.
+ int8_t total_dest_regs = new_inst->numDestRegs();
+
+ for (int dest_reg_idx = 0;
+ dest_reg_idx < total_dest_regs;
+ dest_reg_idx++)
+ {
+ PhysRegIndex dest_reg = new_inst->renamedDestRegIdx(dest_reg_idx);
+
+ // Instructions that use the misc regs will have a reg number
+ // higher than the normal physical registers. In this case these
+ // registers are not renamed, and there is no need to track
+ // dependencies as these instructions must be executed at commit.
+ if (dest_reg >= numPhysRegs) {
+ continue;
+ }
+
- //If the instruction now has all of its source registers
++ if (!dependGraph.empty(dest_reg)) {
++ dependGraph.dump();
++ panic("Dependency graph %i not empty!", dest_reg);
+ }
+
++ dependGraph.setInst(dest_reg, new_inst);
++
+ // Mark the scoreboard to say it's not yet ready.
+ regScoreboard[dest_reg] = false;
+ }
+}
+
+template <class Impl>
+void
+InstructionQueue<Impl>::addIfReady(DynInstPtr &inst)
+{
- if (inst->isControl()) {
++ // If the instruction now has all of its source registers
+ // available, then add it to the list of ready instructions.
+ if (inst->readyToIssue()) {
+
+ //Add the instruction to the proper ready list.
- DPRINTF(IQ, "IQ: Branch instruction is ready to issue, "
- "putting it onto the ready list, PC %#x.\n",
- inst->readPC());
- readyBranchInsts.push(inst);
-
- } else if (inst->isMemRef()) {
-
- DPRINTF(IQ, "IQ: Checking if memory instruction can issue.\n");
++ if (inst->isMemRef()) {
+
- memDepUnit.regsReady(inst);
-
- #if 0
- if (memDepUnit.readyToIssue(inst)) {
- DPRINTF(IQ, "IQ: Memory instruction is ready to issue, "
- "putting it onto the ready list, PC %#x.\n",
- inst->readPC());
- readyMemInsts.push(inst);
- } else {
- // Make dependent on the store.
- // Will need some way to get the store instruction it should
- // be dependent upon; then when the store issues it can
- // put the instruction on the ready list.
- // Yet another tree?
- assert(0 && "Instruction has no way to actually issue");
- }
- #endif
-
- } else if (inst->isInteger()) {
++ DPRINTF(IQ, "Checking if memory instruction can issue.\n");
+
+ // Message to the mem dependence unit that this instruction has
+ // its registers ready.
++ memDepUnit[inst->threadNumber].regsReady(inst);
+
- DPRINTF(IQ, "IQ: Integer instruction is ready to issue, "
- "putting it onto the ready list, PC %#x.\n",
- inst->readPC());
- readyIntInsts.push(inst);
++ return;
++ }
+
- } else if (inst->isFloating()) {
++ OpClass op_class = inst->opClass();
+
- DPRINTF(IQ, "IQ: Floating instruction is ready to issue, "
- "putting it onto the ready list, PC %#x.\n",
- inst->readPC());
- readyFloatInsts.push(inst);
++ DPRINTF(IQ, "Instruction is ready to issue, putting it onto "
++ "the ready list, PC %#x opclass:%i [sn:%lli].\n",
++ inst->readPC(), op_class, inst->seqNum);
+
- } else {
- DPRINTF(IQ, "IQ: Miscellaneous instruction is ready to issue, "
- "putting it onto the ready list, PC %#x..\n",
- inst->readPC());
-
- readyMiscInsts.push(inst);
++ readyInsts[op_class].push(inst);
+
- /*
- * Caution, this function must not be called prior to tail being updated at
- * least once, otherwise it will fail the assertion. This is because
- * instList.begin() actually changes upon the insertion of an element into the
- * list when the list is empty.
- */
++ // Will need to reorder the list if either a queue is not on the list,
++ // or it has an older instruction than last time.
++ if (!queueOnList[op_class]) {
++ addToOrderList(op_class);
++ } else if (readyInsts[op_class].top()->seqNum <
++ (*readyIt[op_class]).oldestInst) {
++ listOrder.erase(readyIt[op_class]);
++ addToOrderList(op_class);
+ }
+ }
+}
+
- ListIt count_it = cpu->instList.begin();
+template <class Impl>
+int
+InstructionQueue<Impl>::countInsts()
+{
- if (tail == cpu->instList.end())
- return 0;
++ //ksewell:This works but definitely could use a cleaner write
++ //with a more intuitive way of counting. Right now it's
++ //just brute force ....
++
++#if 0
+ int total_insts = 0;
+
- while (count_it != tail) {
- if (!(*count_it)->isIssued()) {
- ++total_insts;
++ for (int i = 0; i < numThreads; ++i) {
++ ListIt count_it = instList[i].begin();
++
++ while (count_it != instList[i].end()) {
++ if (!(*count_it)->isSquashed() && !(*count_it)->isSquashedInIQ()) {
++ if (!(*count_it)->isIssued()) {
++ ++total_insts;
++ } else if ((*count_it)->isMemRef() &&
++ !(*count_it)->memOpDone) {
++ // Loads that have not been marked as executed still count
++ // towards the total instructions.
++ ++total_insts;
++ }
++ }
+
-
- ++count_it;
-
- assert(count_it != cpu->instList.end());
- }
-
- // Need to count the tail iterator as well.
- if (count_it != cpu->instList.end() &&
- (*count_it) &&
- !(*count_it)->isIssued()) {
- ++total_insts;
++ ++count_it;
+ }
- InstructionQueue<Impl>::dumpDependGraph()
+ }
+
+ return total_insts;
++#else
++ return numEntries - freeEntries;
++#endif
+}
+
+template <class Impl>
+void
- DependencyEntry *curr;
++InstructionQueue<Impl>::dumpLists()
+{
- for (int i = 0; i < numPhysRegs; ++i)
- {
- curr = &dependGraph[i];
++ for (int i = 0; i < Num_OpClasses; ++i) {
++ cprintf("Ready list %i size: %i\n", i, readyInsts[i].size());
+
- if (curr->inst) {
- cprintf("dependGraph[%i]: producer: %#x consumer: ", i,
- curr->inst->readPC());
- } else {
- cprintf("dependGraph[%i]: No producer. consumer: ", i);
- }
++ cprintf("\n");
++ }
+
- while (curr->next != NULL) {
- curr = curr->next;
++ cprintf("Non speculative list size: %i\n", nonSpecInsts.size());
+
- cprintf("%#x ", curr->inst->readPC());
- }
++ NonSpecMapIt non_spec_it = nonSpecInsts.begin();
++ NonSpecMapIt non_spec_end_it = nonSpecInsts.end();
+
- cprintf("\n");
++ cprintf("Non speculative list: ");
+
- }
++ while (non_spec_it != non_spec_end_it) {
++ cprintf("%#x [sn:%lli]", (*non_spec_it).second->readPC(),
++ (*non_spec_it).second->seqNum);
++ ++non_spec_it;
+ }
- template <class Impl>
- void
- InstructionQueue<Impl>::dumpLists()
- {
- cprintf("Ready integer list size: %i\n", readyIntInsts.size());
+
- cprintf("Ready float list size: %i\n", readyFloatInsts.size());
++ cprintf("\n");
+
- cprintf("Ready branch list size: %i\n", readyBranchInsts.size());
++ ListOrderIt list_order_it = listOrder.begin();
++ ListOrderIt list_order_end_it = listOrder.end();
++ int i = 1;
+
- cprintf("Ready misc list size: %i\n", readyMiscInsts.size());
++ cprintf("List order: ");
+
- cprintf("Squashed list size: %i\n", squashedInsts.size());
++ while (list_order_it != list_order_end_it) {
++ cprintf("%i OpClass:%i [sn:%lli] ", i, (*list_order_it).queueType,
++ (*list_order_it).oldestInst);
+
- cprintf("Non speculative list size: %i\n", nonSpecInsts.size());
++ ++list_order_it;
++ ++i;
++ }
+
- non_spec_it_t non_spec_it = nonSpecInsts.begin();
++ cprintf("\n");
++}
+
- cprintf("Non speculative list: ");
+
- while (non_spec_it != nonSpecInsts.end()) {
- cprintf("%#x ", (*non_spec_it).second->readPC());
- ++non_spec_it;
- }
++template <class Impl>
++void
++InstructionQueue<Impl>::dumpInsts()
++{
++ for (int i = 0; i < numThreads; ++i) {
++ int num = 0;
++ int valid_num = 0;
++ ListIt inst_list_it = instList[i].begin();
++
++ while (inst_list_it != instList[i].end())
++ {
++ cprintf("Instruction:%i\n",
++ num);
++ if (!(*inst_list_it)->isSquashed()) {
++ if (!(*inst_list_it)->isIssued()) {
++ ++valid_num;
++ cprintf("Count:%i\n", valid_num);
++ } else if ((*inst_list_it)->isMemRef() &&
++ !(*inst_list_it)->memOpDone) {
++ // Loads that have not been marked as executed
++ // still count towards the total instructions.
++ ++valid_num;
++ cprintf("Count:%i\n", valid_num);
++ }
++ }
+
- cprintf("\n");
++ cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
++ "Issued:%i\nSquashed:%i\n",
++ (*inst_list_it)->readPC(),
++ (*inst_list_it)->seqNum,
++ (*inst_list_it)->threadNumber,
++ (*inst_list_it)->isIssued(),
++ (*inst_list_it)->isSquashed());
+
++ if ((*inst_list_it)->isMemRef()) {
++ cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
++ }
+
++ cprintf("\n");
++
++ inst_list_it++;
++ ++num;
++ }
++ }
+}
--- /dev/null
+/*
+ * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "cpu/o3/alpha_dyn_inst.hh"
+#include "cpu/o3/alpha_impl.hh"
+#include "cpu/o3/store_set.hh"
+#include "cpu/o3/mem_dep_unit_impl.hh"
+
+// Force instantation of memory dependency unit using store sets and
+// AlphaSimpleImpl.
+template class MemDepUnit<StoreSet, AlphaSimpleImpl>;
++
++template <>
++int
++MemDepUnit<StoreSet, AlphaSimpleImpl>::MemDepEntry::memdep_count = 0;
++template <>
++int
++MemDepUnit<StoreSet, AlphaSimpleImpl>::MemDepEntry::memdep_insert = 0;
++template <>
++int
++MemDepUnit<StoreSet, AlphaSimpleImpl>::MemDepEntry::memdep_erase = 0;
--- /dev/null
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
+/*
- #ifndef __CPU_O3_CPU_MEM_DEP_UNIT_HH__
- #define __CPU_O3_CPU_MEM_DEP_UNIT_HH__
++ * Copyright (c) 2004-2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
- #include <map>
++#ifndef __CPU_O3_MEM_DEP_UNIT_HH__
++#define __CPU_O3_MEM_DEP_UNIT_HH__
+
- public:
- MemDepUnit(Params ¶ms);
++#include <list>
+#include <set>
+
++#include "base/hashmap.hh"
++#include "base/refcnt.hh"
+#include "base/statistics.hh"
+#include "cpu/inst_seq.hh"
+
++struct SNHash {
++ size_t operator() (const InstSeqNum &seq_num) const {
++ unsigned a = (unsigned)seq_num;
++ unsigned hash = (((a >> 14) ^ ((a >> 2) & 0xffff))) & 0x7FFFFFFF;
++
++ return hash;
++ }
++};
++
++template <class Impl>
++class InstructionQueue;
++
+/**
+ * Memory dependency unit class. This holds the memory dependence predictor.
+ * As memory operations are issued to the IQ, they are also issued to this
+ * unit, which then looks up the prediction as to what they are dependent
+ * upon. This unit must be checked prior to a memory operation being able
+ * to issue. Although this is templated, it's somewhat hard to make a generic
+ * memory dependence unit. This one is mostly for store sets; it will be
+ * quite limited in what other memory dependence predictions it can also
+ * utilize. Thus this class should be most likely be rewritten for other
+ * dependence prediction schemes.
+ */
+template <class MemDepPred, class Impl>
+class MemDepUnit {
+ public:
+ typedef typename Impl::Params Params;
+ typedef typename Impl::DynInstPtr DynInstPtr;
+
- // Will want to make this operation relatively fast. Right now it
- // is somewhat slow.
- DynInstPtr &top();
-
- void pop();
++ /** Empty constructor. Must call init() prior to using in this case. */
++ MemDepUnit() {}
++
++ /** Constructs a MemDepUnit with given parameters. */
++ MemDepUnit(Params *params);
+
++ /** Frees up any memory allocated. */
++ ~MemDepUnit();
++
++ /** Returns the name of the memory dependence unit. */
++ std::string name() const;
++
++ /** Initializes the unit with parameters and a thread id. */
++ void init(Params *params, int tid);
++
++ /** Registers statistics. */
+ void regStats();
+
++ void switchOut();
++
++ void takeOverFrom();
++
++ /** Sets the pointer to the IQ. */
++ void setIQ(InstructionQueue<Impl> *iq_ptr);
++
++ /** Inserts a memory instruction. */
+ void insert(DynInstPtr &inst);
+
++ /** Inserts a non-speculative memory instruction. */
+ void insertNonSpec(DynInstPtr &inst);
+
- void issue(DynInstPtr &inst);
++ /** Inserts a barrier instruction. */
++ void insertBarrier(DynInstPtr &barr_inst);
+
++ /** Indicate that an instruction has its registers ready. */
+ void regsReady(DynInstPtr &inst);
+
++ /** Indicate that a non-speculative instruction is ready. */
+ void nonSpecInstReady(DynInstPtr &inst);
+
- void squash(const InstSeqNum &squashed_num);
++ /** Reschedules an instruction to be re-executed. */
++ void reschedule(DynInstPtr &inst);
++
++ /** Replays all instructions that have been rescheduled by moving them to
++ * the ready list.
++ */
++ void replay(DynInstPtr &inst);
++
++ /** Completes a memory instruction. */
++ void completed(DynInstPtr &inst);
+
++ /** Completes a barrier instruction. */
++ void completeBarrier(DynInstPtr &inst);
++
++ /** Wakes any dependents of a memory instruction. */
+ void wakeDependents(DynInstPtr &inst);
+
- inline bool empty()
- { return readyInsts.empty(); }
++ /** Squashes all instructions up until a given sequence number for a
++ * specific thread.
++ */
++ void squash(const InstSeqNum &squashed_num, unsigned tid);
+
++ /** Indicates an ordering violation between a store and a younger load. */
+ void violation(DynInstPtr &store_inst, DynInstPtr &violating_load);
+
- typedef typename std::set<InstSeqNum>::iterator sn_it_t;
- typedef typename std::map<InstSeqNum, DynInstPtr>::iterator dyn_it_t;
-
- // Forward declarations so that the following two typedefs work.
- class Dependency;
- class ltDependency;
-
- typedef typename std::set<Dependency, ltDependency>::iterator dep_it_t;
- typedef typename std::map<InstSeqNum, vector<dep_it_t> >::iterator
- sd_it_t;
-
- struct Dependency {
- Dependency(const InstSeqNum &_seqNum)
- : seqNum(_seqNum), regsReady(0), memDepReady(0)
- { }
-
- Dependency(const InstSeqNum &_seqNum, bool _regsReady,
- bool _memDepReady)
- : seqNum(_seqNum), regsReady(_regsReady),
- memDepReady(_memDepReady)
- { }
-
- InstSeqNum seqNum;
- mutable bool regsReady;
- mutable bool memDepReady;
- mutable sd_it_t storeDep;
- };
++ /** Issues the given instruction */
++ void issue(DynInstPtr &inst);
++
++ /** Debugging function to dump the lists of instructions. */
++ void dumpLists();
+
+ private:
- struct ltDependency {
- bool operator() (const Dependency &lhs, const Dependency &rhs)
++ typedef typename std::list<DynInstPtr>::iterator ListIt;
+
- return lhs.seqNum < rhs.seqNum;
++ class MemDepEntry;
++
++ typedef RefCountingPtr<MemDepEntry> MemDepEntryPtr;
++
++ /** Memory dependence entries that track memory operations, marking
++ * when the instruction is ready to execute and what instructions depend
++ * upon it.
++ */
++ class MemDepEntry : public RefCounted {
++ public:
++ /** Constructs a memory dependence entry. */
++ MemDepEntry(DynInstPtr &new_inst)
++ : inst(new_inst), regsReady(false), memDepReady(false),
++ completed(false), squashed(false)
+ {
- inline void moveToReady(dep_it_t &woken_inst);
++ ++memdep_count;
++
++ DPRINTF(MemDepUnit, "Memory dependency entry created. "
++ "memdep_count=%i\n", memdep_count);
+ }
++
++ /** Frees any pointers. */
++ ~MemDepEntry()
++ {
++ for (int i = 0; i < dependInsts.size(); ++i) {
++ dependInsts[i] = NULL;
++ }
++
++ --memdep_count;
++
++ DPRINTF(MemDepUnit, "Memory dependency entry deleted. "
++ "memdep_count=%i\n", memdep_count);
++ }
++
++ /** Returns the name of the memory dependence entry. */
++ std::string name() const { return "memdepentry"; }
++
++ /** The instruction being tracked. */
++ DynInstPtr inst;
++
++ /** The iterator to the instruction's location inside the list. */
++ ListIt listIt;
++
++ /** A vector of any dependent instructions. */
++ std::vector<MemDepEntryPtr> dependInsts;
++
++ /** If the registers are ready or not. */
++ bool regsReady;
++ /** If all memory dependencies have been satisfied. */
++ bool memDepReady;
++ /** If the instruction is completed. */
++ bool completed;
++ /** If the instruction is squashed. */
++ bool squashed;
++
++ /** For debugging. */
++ static int memdep_count;
++ static int memdep_insert;
++ static int memdep_erase;
+ };
+
- /** List of instructions that have passed through rename, yet are still
- * waiting on either a memory dependence to resolve or source registers to
- * become available before they can issue.
- */
- std::set<Dependency, ltDependency> waitingInsts;
++ /** Finds the memory dependence entry in the hash map. */
++ inline MemDepEntryPtr &findInHash(const DynInstPtr &inst);
+
- /** List of instructions that have all their predicted memory dependences
- * resolved and their source registers ready.
- */
- std::set<InstSeqNum> readyInsts;
++ /** Moves an entry to the ready list. */
++ inline void moveToReady(MemDepEntryPtr &ready_inst_entry);
+
- // Change this to hold a vector of iterators, which will point to the
- // entry of the waiting instructions.
- /** List of stores' sequence numbers, each of which has a vector of
- * iterators. The iterators point to the appropriate node within
- * waitingInsts that has the depenendent instruction.
- */
- std::map<InstSeqNum, vector<dep_it_t> > storeDependents;
++ typedef m5::hash_map<InstSeqNum, MemDepEntryPtr, SNHash> MemDepHash;
+
- // For now will implement this as a map...hash table might not be too
- // bad, or could move to something that mimics the current dependency
- // graph.
- std::map<InstSeqNum, DynInstPtr> memInsts;
++ typedef typename MemDepHash::iterator MemDepHashIt;
++
++ /** A hash map of all memory dependence entries. */
++ MemDepHash memDepHash;
+
- // Iterator pointer to the top instruction which has is ready.
- // Is set by the top() call.
- dyn_it_t topInst;
++ /** A list of all instructions in the memory dependence unit. */
++ std::list<DynInstPtr> instList[Impl::MaxThreads];
+
- #endif // __CPU_O3_CPU_MEM_DEP_UNIT_HH__
++ /** A list of all instructions that are going to be replayed. */
++ std::list<DynInstPtr> instsToReplay;
+
+ /** The memory dependence predictor. It is accessed upon new
+ * instructions being added to the IQ, and responds by telling
+ * this unit what instruction the newly added instruction is dependent
+ * upon.
+ */
+ MemDepPred depPred;
+
++ bool loadBarrier;
++ InstSeqNum loadBarrierSN;
++ bool storeBarrier;
++ InstSeqNum storeBarrierSN;
++
++ /** Pointer to the IQ. */
++ InstructionQueue<Impl> *iqPtr;
++
++ /** The thread id of this memory dependence unit. */
++ int id;
++
++ /** Stat for number of inserted loads. */
+ Stats::Scalar<> insertedLoads;
++ /** Stat for number of inserted stores. */
+ Stats::Scalar<> insertedStores;
++ /** Stat for number of conflicting loads that had to wait for a store. */
+ Stats::Scalar<> conflictingLoads;
++ /** Stat for number of conflicting stores that had to wait for a store. */
+ Stats::Scalar<> conflictingStores;
+};
+
++#endif // __CPU_O3_MEM_DEP_UNIT_HH__
--- /dev/null
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
+/*
- MemDepUnit<MemDepPred, Impl>::MemDepUnit(Params ¶ms)
- : depPred(params.SSITSize, params.LFSTSize)
++ * Copyright (c) 2004-2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <map>
+
++#include "cpu/o3/inst_queue.hh"
+#include "cpu/o3/mem_dep_unit.hh"
+
+template <class MemDepPred, class Impl>
- DPRINTF(MemDepUnit, "MemDepUnit: Creating MemDepUnit object.\n");
++MemDepUnit<MemDepPred, Impl>::MemDepUnit(Params *params)
++ : depPred(params->SSITSize, params->LFSTSize), loadBarrier(false),
++ loadBarrierSN(0), storeBarrier(false), storeBarrierSN(0), iqPtr(NULL)
+{
- InstSeqNum inst_seq_num = inst->seqNum;
++ DPRINTF(MemDepUnit, "Creating MemDepUnit object.\n");
++}
++
++template <class MemDepPred, class Impl>
++MemDepUnit<MemDepPred, Impl>::~MemDepUnit()
++{
++ for (int tid=0; tid < Impl::MaxThreads; tid++) {
++
++ ListIt inst_list_it = instList[tid].begin();
++
++ MemDepHashIt hash_it;
++
++ while (!instList[tid].empty()) {
++ hash_it = memDepHash.find((*inst_list_it)->seqNum);
++
++ assert(hash_it != memDepHash.end());
++
++ memDepHash.erase(hash_it);
++
++ instList[tid].erase(inst_list_it++);
++ }
++ }
++
++ assert(MemDepEntry::memdep_count == 0);
++}
++
++template <class MemDepPred, class Impl>
++std::string
++MemDepUnit<MemDepPred, Impl>::name() const
++{
++ return "memdepunit";
++}
++
++template <class MemDepPred, class Impl>
++void
++MemDepUnit<MemDepPred, Impl>::init(Params *params, int tid)
++{
++ DPRINTF(MemDepUnit, "Creating MemDepUnit %i object.\n",tid);
++
++ id = tid;
++
++ depPred.init(params->SSITSize, params->LFSTSize);
+}
+
+template <class MemDepPred, class Impl>
+void
+MemDepUnit<MemDepPred, Impl>::regStats()
+{
+ insertedLoads
+ .name(name() + ".memDep.insertedLoads")
+ .desc("Number of loads inserted to the mem dependence unit.");
+
+ insertedStores
+ .name(name() + ".memDep.insertedStores")
+ .desc("Number of stores inserted to the mem dependence unit.");
+
+ conflictingLoads
+ .name(name() + ".memDep.conflictingLoads")
+ .desc("Number of conflicting loads.");
+
+ conflictingStores
+ .name(name() + ".memDep.conflictingStores")
+ .desc("Number of conflicting stores.");
+}
+
++template <class MemDepPred, class Impl>
++void
++MemDepUnit<MemDepPred, Impl>::switchOut()
++{
++ for (int i = 0; i < Impl::MaxThreads; ++i) {
++ instList[i].clear();
++ }
++ instsToReplay.clear();
++ memDepHash.clear();
++}
++
++template <class MemDepPred, class Impl>
++void
++MemDepUnit<MemDepPred, Impl>::takeOverFrom()
++{
++ loadBarrier = storeBarrier = false;
++ loadBarrierSN = storeBarrierSN = 0;
++ depPred.clear();
++}
++
++template <class MemDepPred, class Impl>
++void
++MemDepUnit<MemDepPred, Impl>::setIQ(InstructionQueue<Impl> *iq_ptr)
++{
++ iqPtr = iq_ptr;
++}
++
+template <class MemDepPred, class Impl>
+void
+MemDepUnit<MemDepPred, Impl>::insert(DynInstPtr &inst)
+{
- Dependency unresolved_dependencies(inst_seq_num);
++ unsigned tid = inst->threadNumber;
++
++ MemDepEntryPtr inst_entry = new MemDepEntry(inst);
++
++ // Add the MemDepEntry to the hash.
++ memDepHash.insert(
++ std::pair<InstSeqNum, MemDepEntryPtr>(inst->seqNum, inst_entry));
++ MemDepEntry::memdep_insert++;
++
++ instList[tid].push_back(inst);
++
++ inst_entry->listIt = --(instList[tid].end());
++
++ // Check any barriers and the dependence predictor for any
++ // producing stores.
++ InstSeqNum producing_store;
++ if (inst->isLoad() && loadBarrier) {
++ producing_store = loadBarrierSN;
++ } else if (inst->isStore() && storeBarrier) {
++ producing_store = storeBarrierSN;
++ } else {
++ producing_store = depPred.checkInst(inst->readPC());
++ }
+
- InstSeqNum producing_store = depPred.checkInst(inst->readPC());
++ MemDepEntryPtr store_entry = NULL;
+
- if (producing_store == 0 ||
- storeDependents.find(producing_store) == storeDependents.end()) {
++ // If there is a producing store, try to find the entry.
++ if (producing_store != 0) {
++ MemDepHashIt hash_it = memDepHash.find(producing_store);
+
- DPRINTF(MemDepUnit, "MemDepUnit: No dependency for inst PC "
- "%#x.\n", inst->readPC());
++ if (hash_it != memDepHash.end()) {
++ store_entry = (*hash_it).second;
++ }
++ }
+
- unresolved_dependencies.storeDep = storeDependents.end();
++ // If no store entry, then instruction can issue as soon as the registers
++ // are ready.
++ if (!store_entry) {
++ DPRINTF(MemDepUnit, "No dependency for inst PC "
++ "%#x [sn:%lli].\n", inst->readPC(), inst->seqNum);
+
- readyInsts.insert(inst_seq_num);
- } else {
- unresolved_dependencies.memDepReady = true;
++ inst_entry->memDepReady = true;
+
+ if (inst->readyToIssue()) {
- waitingInsts.insert(unresolved_dependencies);
++ inst_entry->regsReady = true;
+
- DPRINTF(MemDepUnit, "MemDepUnit: Adding to dependency list; "
- "inst PC %#x is dependent on seq num %i.\n",
++ moveToReady(inst_entry);
+ }
+ } else {
- unresolved_dependencies.regsReady = true;
++ // Otherwise make the instruction dependent on the store/barrier.
++ DPRINTF(MemDepUnit, "Adding to dependency list; "
++ "inst PC %#x is dependent on [sn:%lli].\n",
+ inst->readPC(), producing_store);
+
+ if (inst->readyToIssue()) {
- // Find the store that this instruction is dependent on.
- sd_it_t store_loc = storeDependents.find(producing_store);
-
- assert(store_loc != storeDependents.end());
-
- // Record the location of the store that this instruction is
- // dependent on.
- unresolved_dependencies.storeDep = store_loc;
-
- // If it's not already ready, then add it to the renamed
- // list and the dependencies.
- dep_it_t inst_loc =
- (waitingInsts.insert(unresolved_dependencies)).first;
-
++ inst_entry->regsReady = true;
+ }
+
- (*store_loc).second.push_back(inst_loc);
-
- assert(!(*store_loc).second.empty());
+ // Add this instruction to the list of dependents.
- DPRINTF(MemDepUnit, "MemDepUnit: Inserting store PC %#x.\n",
- inst->readPC());
-
- depPred.insertStore(inst->readPC(), inst_seq_num);
-
- // Make sure this store isn't already in this list.
- assert(storeDependents.find(inst_seq_num) == storeDependents.end());
-
- // Put a dependency entry in at the store's sequence number.
- // Uh, not sure how this works...I want to create an entry but
- // I don't have anything to put into the value yet.
- storeDependents[inst_seq_num];
++ store_entry->dependInsts.push_back(inst_entry);
+
+ if (inst->isLoad()) {
+ ++conflictingLoads;
+ } else {
+ ++conflictingStores;
+ }
+ }
+
+ if (inst->isStore()) {
- assert(storeDependents.size() != 0);
++ DPRINTF(MemDepUnit, "Inserting store PC %#x [sn:%lli].\n",
++ inst->readPC(), inst->seqNum);
+
-
++ depPred.insertStore(inst->readPC(), inst->seqNum, inst->threadNumber);
+
+ ++insertedStores;
- panic("MemDepUnit: Unknown type! (most likely a barrier).");
+ } else if (inst->isLoad()) {
+ ++insertedLoads;
+ } else {
-
- memInsts[inst_seq_num] = inst;
++ panic("Unknown type! (most likely a barrier).");
+ }
- InstSeqNum inst_seq_num = inst->seqNum;
+}
+
+template <class MemDepPred, class Impl>
+void
+MemDepUnit<MemDepPred, Impl>::insertNonSpec(DynInstPtr &inst)
+{
- Dependency non_spec_inst(inst_seq_num);
++ unsigned tid = inst->threadNumber;
++
++ MemDepEntryPtr inst_entry = new MemDepEntry(inst);
+
- non_spec_inst.storeDep = storeDependents.end();
++ // Insert the MemDepEntry into the hash.
++ memDepHash.insert(
++ std::pair<InstSeqNum, MemDepEntryPtr>(inst->seqNum, inst_entry));
++ MemDepEntry::memdep_insert++;
+
- waitingInsts.insert(non_spec_inst);
++ // Add the instruction to the list.
++ instList[tid].push_back(inst);
+
- DPRINTF(MemDepUnit, "MemDepUnit: Inserting store PC %#x.\n",
- inst->readPC());
-
- depPred.insertStore(inst->readPC(), inst_seq_num);
-
- // Make sure this store isn't already in this list.
- assert(storeDependents.find(inst_seq_num) == storeDependents.end());
-
- // Put a dependency entry in at the store's sequence number.
- // Uh, not sure how this works...I want to create an entry but
- // I don't have anything to put into the value yet.
- storeDependents[inst_seq_num];
++ inst_entry->listIt = --(instList[tid].end());
+
+ // Might want to turn this part into an inline function or something.
+ // It's shared between both insert functions.
+ if (inst->isStore()) {
- assert(storeDependents.size() != 0);
++ DPRINTF(MemDepUnit, "Inserting store PC %#x [sn:%lli].\n",
++ inst->readPC(), inst->seqNum);
+
-
++ depPred.insertStore(inst->readPC(), inst->seqNum, inst->threadNumber);
+
+ ++insertedStores;
- panic("MemDepUnit: Unknown type! (most likely a barrier).");
+ } else if (inst->isLoad()) {
+ ++insertedLoads;
+ } else {
-
- memInsts[inst_seq_num] = inst;
++ panic("Unknown type! (most likely a barrier).");
+ }
- typename Impl::DynInstPtr &
- MemDepUnit<MemDepPred, Impl>::top()
+}
+
+template <class MemDepPred, class Impl>
- topInst = memInsts.find( (*readyInsts.begin()) );
++void
++MemDepUnit<MemDepPred, Impl>::insertBarrier(DynInstPtr &barr_inst)
+{
- DPRINTF(MemDepUnit, "MemDepUnit: Top instruction is PC %#x.\n",
- (*topInst).second->readPC());
++ InstSeqNum barr_sn = barr_inst->seqNum;
++ if (barr_inst->isMemBarrier()) {
++ loadBarrier = true;
++ loadBarrierSN = barr_sn;
++ storeBarrier = true;
++ storeBarrierSN = barr_sn;
++ DPRINTF(MemDepUnit, "Inserted a memory barrier\n");
++ } else if (barr_inst->isWriteBarrier()) {
++ storeBarrier = true;
++ storeBarrierSN = barr_sn;
++ DPRINTF(MemDepUnit, "Inserted a write barrier\n");
++ }
++
++ unsigned tid = barr_inst->threadNumber;
++
++ MemDepEntryPtr inst_entry = new MemDepEntry(barr_inst);
++
++ // Add the MemDepEntry to the hash.
++ memDepHash.insert(
++ std::pair<InstSeqNum, MemDepEntryPtr>(barr_sn, inst_entry));
++ MemDepEntry::memdep_insert++;
+
- return (*topInst).second;
++ // Add the instruction to the instruction list.
++ instList[tid].push_back(barr_inst);
+
- MemDepUnit<MemDepPred, Impl>::pop()
++ inst_entry->listIt = --(instList[tid].end());
+}
+
+template <class MemDepPred, class Impl>
+void
- DPRINTF(MemDepUnit, "MemDepUnit: Removing instruction PC %#x.\n",
- (*topInst).second->readPC());
++MemDepUnit<MemDepPred, Impl>::regsReady(DynInstPtr &inst)
+{
- wakeDependents((*topInst).second);
++ DPRINTF(MemDepUnit, "Marking registers as ready for "
++ "instruction PC %#x [sn:%lli].\n",
++ inst->readPC(), inst->seqNum);
+
- issue((*topInst).second);
++ MemDepEntryPtr inst_entry = findInHash(inst);
+
- memInsts.erase(topInst);
++ inst_entry->regsReady = true;
+
- topInst = memInsts.end();
++ if (inst_entry->memDepReady) {
++ DPRINTF(MemDepUnit, "Instruction has its memory "
++ "dependencies resolved, adding it to the ready list.\n");
+
- MemDepUnit<MemDepPred, Impl>::regsReady(DynInstPtr &inst)
++ moveToReady(inst_entry);
++ } else {
++ DPRINTF(MemDepUnit, "Instruction still waiting on "
++ "memory dependency.\n");
++ }
+}
+
+template <class MemDepPred, class Impl>
+void
- DPRINTF(MemDepUnit, "MemDepUnit: Marking registers as ready for "
- "instruction PC %#x.\n",
- inst->readPC());
++MemDepUnit<MemDepPred, Impl>::nonSpecInstReady(DynInstPtr &inst)
+{
- InstSeqNum inst_seq_num = inst->seqNum;
++ DPRINTF(MemDepUnit, "Marking non speculative "
++ "instruction PC %#x as ready [sn:%lli].\n",
++ inst->readPC(), inst->seqNum);
+
- Dependency inst_to_find(inst_seq_num);
++ MemDepEntryPtr inst_entry = findInHash(inst);
+
- dep_it_t waiting_inst = waitingInsts.find(inst_to_find);
++ moveToReady(inst_entry);
++}
+
- assert(waiting_inst != waitingInsts.end());
++template <class MemDepPred, class Impl>
++void
++MemDepUnit<MemDepPred, Impl>::reschedule(DynInstPtr &inst)
++{
++ instsToReplay.push_back(inst);
++}
+
- if ((*waiting_inst).memDepReady) {
- DPRINTF(MemDepUnit, "MemDepUnit: Instruction has its memory "
- "dependencies resolved, adding it to the ready list.\n");
++template <class MemDepPred, class Impl>
++void
++MemDepUnit<MemDepPred, Impl>::replay(DynInstPtr &inst)
++{
++ DynInstPtr temp_inst;
++ bool found_inst = false;
+
- moveToReady(waiting_inst);
- } else {
- DPRINTF(MemDepUnit, "MemDepUnit: Instruction still waiting on "
- "memory dependency.\n");
++ while (!instsToReplay.empty()) {
++ temp_inst = instsToReplay.front();
+
- (*waiting_inst).regsReady = true;
++ MemDepEntryPtr inst_entry = findInHash(temp_inst);
+
- MemDepUnit<MemDepPred, Impl>::nonSpecInstReady(DynInstPtr &inst)
++ DPRINTF(MemDepUnit, "Replaying mem instruction PC %#x "
++ "[sn:%lli].\n",
++ temp_inst->readPC(), temp_inst->seqNum);
++
++ moveToReady(inst_entry);
++
++ if (temp_inst == inst) {
++ found_inst = true;
++ }
++
++ instsToReplay.pop_front();
+ }
++
++ assert(found_inst);
+}
+
+template <class MemDepPred, class Impl>
+void
- DPRINTF(MemDepUnit, "MemDepUnit: Marking non speculative "
- "instruction PC %#x as ready.\n",
- inst->readPC());
++MemDepUnit<MemDepPred, Impl>::completed(DynInstPtr &inst)
+{
- InstSeqNum inst_seq_num = inst->seqNum;
++ DPRINTF(MemDepUnit, "Completed mem instruction PC %#x "
++ "[sn:%lli].\n",
++ inst->readPC(), inst->seqNum);
++
++ unsigned tid = inst->threadNumber;
+
- Dependency inst_to_find(inst_seq_num);
++ // Remove the instruction from the hash and the list.
++ MemDepHashIt hash_it = memDepHash.find(inst->seqNum);
+
- dep_it_t waiting_inst = waitingInsts.find(inst_to_find);
++ assert(hash_it != memDepHash.end());
+
- assert(waiting_inst != waitingInsts.end());
++ instList[tid].erase((*hash_it).second->listIt);
+
- moveToReady(waiting_inst);
++ (*hash_it).second = NULL;
+
- MemDepUnit<MemDepPred, Impl>::issue(DynInstPtr &inst)
++ memDepHash.erase(hash_it);
++ MemDepEntry::memdep_erase++;
+}
+
+template <class MemDepPred, class Impl>
+void
- assert(readyInsts.find(inst->seqNum) != readyInsts.end());
-
- DPRINTF(MemDepUnit, "MemDepUnit: Issuing instruction PC %#x.\n",
- inst->readPC());
-
- // Remove the instruction from the ready list.
- readyInsts.erase(inst->seqNum);
-
- depPred.issued(inst->readPC(), inst->seqNum, inst->isStore());
++MemDepUnit<MemDepPred, Impl>::completeBarrier(DynInstPtr &inst)
+{
- // Only stores have dependents.
- if (!inst->isStore()) {
++ wakeDependents(inst);
++ completed(inst);
++
++ InstSeqNum barr_sn = inst->seqNum;
++
++ if (inst->isMemBarrier()) {
++ assert(loadBarrier && storeBarrier);
++ if (loadBarrierSN == barr_sn)
++ loadBarrier = false;
++ if (storeBarrierSN == barr_sn)
++ storeBarrier = false;
++ } else if (inst->isWriteBarrier()) {
++ assert(storeBarrier);
++ if (storeBarrierSN == barr_sn)
++ storeBarrier = false;
++ }
+}
+
+template <class MemDepPred, class Impl>
+void
+MemDepUnit<MemDepPred, Impl>::wakeDependents(DynInstPtr &inst)
+{
- // Wake any dependencies.
- sd_it_t sd_it = storeDependents.find(inst->seqNum);
++ // Only stores and barriers have dependents.
++ if (!inst->isStore() && !inst->isMemBarrier() && !inst->isWriteBarrier()) {
+ return;
+ }
+
- // If there's no entry, then return. Really there should only be
- // no entry if the instruction is a load.
- if (sd_it == storeDependents.end()) {
- DPRINTF(MemDepUnit, "MemDepUnit: Instruction PC %#x, sequence "
- "number %i has no dependents.\n",
- inst->readPC(), inst->seqNum);
++ MemDepEntryPtr inst_entry = findInHash(inst);
+
- return;
- }
-
- for (int i = 0; i < (*sd_it).second.size(); ++i ) {
- dep_it_t woken_inst = (*sd_it).second[i];
-
- DPRINTF(MemDepUnit, "MemDepUnit: Waking up a dependent inst, "
- "sequence number %i.\n",
- (*woken_inst).seqNum);
- #if 0
- // Should we have reached instructions that are actually squashed,
- // there will be no more useful instructions in this dependency
- // list. Break out early.
- if (waitingInsts.find(woken_inst) == waitingInsts.end()) {
- DPRINTF(MemDepUnit, "MemDepUnit: Dependents on inst PC %#x "
- "are squashed, starting at SN %i. Breaking early.\n",
- inst->readPC(), woken_inst);
- break;
++ for (int i = 0; i < inst_entry->dependInsts.size(); ++i ) {
++ MemDepEntryPtr woken_inst = inst_entry->dependInsts[i];
+
- #endif
++ if (!woken_inst->inst) {
++ // Potentially removed mem dep entries could be on this list
++ continue;
+ }
- if ((*woken_inst).regsReady) {
+
- (*woken_inst).memDepReady = true;
++ DPRINTF(MemDepUnit, "Waking up a dependent inst, "
++ "[sn:%lli].\n",
++ woken_inst->inst->seqNum);
++
++ if (woken_inst->regsReady && !woken_inst->squashed) {
+ moveToReady(woken_inst);
+ } else {
- storeDependents.erase(sd_it);
++ woken_inst->memDepReady = true;
+ }
+ }
+
- MemDepUnit<MemDepPred, Impl>::squash(const InstSeqNum &squashed_num)
++ inst_entry->dependInsts.clear();
+}
+
+template <class MemDepPred, class Impl>
+void
-
- if (!waitingInsts.empty()) {
- dep_it_t waiting_it = waitingInsts.end();
-
- --waiting_it;
-
- // Remove entries from the renamed list as long as we haven't reached
- // the end and the entries continue to be younger than the squashed.
- while (!waitingInsts.empty() &&
- (*waiting_it).seqNum > squashed_num)
- {
- if (!(*waiting_it).memDepReady &&
- (*waiting_it).storeDep != storeDependents.end()) {
- sd_it_t sd_it = (*waiting_it).storeDep;
-
- // Make sure the iterator that the store has pointing
- // back is actually to this instruction.
- assert((*sd_it).second.back() == waiting_it);
-
- // Now remove this from the store's list of dependent
- // instructions.
- (*sd_it).second.pop_back();
++MemDepUnit<MemDepPred, Impl>::squash(const InstSeqNum &squashed_num,
++ unsigned tid)
+{
-
- waitingInsts.erase(waiting_it--);
++ if (!instsToReplay.empty()) {
++ ListIt replay_it = instsToReplay.begin();
++ while (replay_it != instsToReplay.end()) {
++ if ((*replay_it)->threadNumber == tid &&
++ (*replay_it)->seqNum > squashed_num) {
++ instsToReplay.erase(replay_it++);
++ } else {
++ ++replay_it;
+ }
- if (!readyInsts.empty()) {
- sn_it_t ready_it = readyInsts.end();
+ }
+ }
+
- --ready_it;
++ ListIt squash_it = instList[tid].end();
++ --squash_it;
+
- // Same for the ready list.
- while (!readyInsts.empty() &&
- (*ready_it) > squashed_num)
- {
- readyInsts.erase(ready_it--);
- }
- }
++ MemDepHashIt hash_it;
+
- if (!storeDependents.empty()) {
- sd_it_t dep_it = storeDependents.end();
++ while (!instList[tid].empty() &&
++ (*squash_it)->seqNum > squashed_num) {
+
- --dep_it;
++ DPRINTF(MemDepUnit, "Squashing inst [sn:%lli]\n",
++ (*squash_it)->seqNum);
+
- // Same for the dependencies list.
- while (!storeDependents.empty() &&
- (*dep_it).first > squashed_num)
- {
- // This store's list of dependent instructions should be empty.
- assert((*dep_it).second.empty());
++ hash_it = memDepHash.find((*squash_it)->seqNum);
+
- storeDependents.erase(dep_it--);
- }
++ assert(hash_it != memDepHash.end());
+
- depPred.squash(squashed_num);
++ (*hash_it).second->squashed = true;
++
++ (*hash_it).second = NULL;
++
++ memDepHash.erase(hash_it);
++ MemDepEntry::memdep_erase++;
++
++ instList[tid].erase(squash_it--);
+ }
+
+ // Tell the dependency predictor to squash as well.
- DPRINTF(MemDepUnit, "MemDepUnit: Passing violating PCs to store sets,"
++ depPred.squash(squashed_num, tid);
+}
+
+template <class MemDepPred, class Impl>
+void
+MemDepUnit<MemDepPred, Impl>::violation(DynInstPtr &store_inst,
+ DynInstPtr &violating_load)
+{
- MemDepUnit<MemDepPred, Impl>::moveToReady(dep_it_t &woken_inst)
++ DPRINTF(MemDepUnit, "Passing violating PCs to store sets,"
+ " load: %#x, store: %#x\n", violating_load->readPC(),
+ store_inst->readPC());
+ // Tell the memory dependence unit of the violation.
+ depPred.violation(violating_load->readPC(), store_inst->readPC());
+}
+
++template <class MemDepPred, class Impl>
++void
++MemDepUnit<MemDepPred, Impl>::issue(DynInstPtr &inst)
++{
++ DPRINTF(MemDepUnit, "Issuing instruction PC %#x [sn:%lli].\n",
++ inst->readPC(), inst->seqNum);
++
++ depPred.issued(inst->readPC(), inst->seqNum, inst->isStore());
++}
++
++template <class MemDepPred, class Impl>
++inline typename MemDepUnit<MemDepPred,Impl>::MemDepEntryPtr &
++MemDepUnit<MemDepPred, Impl>::findInHash(const DynInstPtr &inst)
++{
++ MemDepHashIt hash_it = memDepHash.find(inst->seqNum);
++
++ assert(hash_it != memDepHash.end());
++
++ return (*hash_it).second;
++}
++
+template <class MemDepPred, class Impl>
+inline void
- DPRINTF(MemDepUnit, "MemDepUnit: Adding instruction sequence number %i "
- "to the ready list.\n", (*woken_inst).seqNum);
++MemDepUnit<MemDepPred, Impl>::moveToReady(MemDepEntryPtr &woken_inst_entry)
++{
++ DPRINTF(MemDepUnit, "Adding instruction [sn:%lli] "
++ "to the ready list.\n", woken_inst_entry->inst->seqNum);
++
++ assert(!woken_inst_entry->squashed);
++
++ iqPtr->addReadyMemInst(woken_inst_entry->inst);
++}
++
++
++template <class MemDepPred, class Impl>
++void
++MemDepUnit<MemDepPred, Impl>::dumpLists()
+{
- // Add it to the ready list.
- readyInsts.insert((*woken_inst).seqNum);
++ for (unsigned tid=0; tid < Impl::MaxThreads; tid++) {
++ cprintf("Instruction list %i size: %i\n",
++ tid, instList[tid].size());
++
++ ListIt inst_list_it = instList[tid].begin();
++ int num = 0;
++
++ while (inst_list_it != instList[tid].end()) {
++ cprintf("Instruction:%i\nPC:%#x\n[sn:%i]\n[tid:%i]\nIssued:%i\n"
++ "Squashed:%i\n\n",
++ num, (*inst_list_it)->readPC(),
++ (*inst_list_it)->seqNum,
++ (*inst_list_it)->threadNumber,
++ (*inst_list_it)->isIssued(),
++ (*inst_list_it)->isSquashed());
++ inst_list_it++;
++ ++num;
++ }
++ }
+
- // Remove it from the waiting instructions.
- waitingInsts.erase(woken_inst);
++ cprintf("Memory dependence hash size: %i\n", memDepHash.size());
+
++ cprintf("Memory dependence entries: %i\n", MemDepEntry::memdep_count);
+}
--- /dev/null
- ReturnAddrStack::ReturnAddrStack(unsigned _numEntries)
- : numEntries(_numEntries), usedEntries(0),
- tos(0)
+/*
+ * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "cpu/o3/ras.hh"
+
- addrStack = new Addr[numEntries];
++void
++ReturnAddrStack::init(unsigned _numEntries)
+{
- // Not sure it's possible to really track usedEntries properly.
- // assert(usedEntries > 0);
-
++ numEntries = _numEntries;
++ usedEntries = 0;
++ tos = 0;
++
++ addrStack.resize(numEntries);
++
++ for (int i = 0; i < numEntries; ++i)
++ addrStack[i] = 0;
++}
+
++void
++ReturnAddrStack::reset()
++{
++ usedEntries = 0;
++ tos = 0;
+ for (int i = 0; i < numEntries; ++i)
+ addrStack[i] = 0;
+}
+
+void
+ReturnAddrStack::push(const Addr &return_addr)
+{
+ incrTos();
+
+ addrStack[tos] = return_addr;
+
+ if (usedEntries != numEntries) {
+ ++usedEntries;
+ }
+}
+
+void
+ReturnAddrStack::pop()
+{
+ if (usedEntries > 0) {
+ --usedEntries;
+ }
+
+ decrTos();
+}
+
+void
+ReturnAddrStack::restore(unsigned top_entry_idx,
+ const Addr &restored_target)
+{
+ tos = top_entry_idx;
+
+ addrStack[tos] = restored_target;
+}
--- /dev/null
- #ifndef __CPU_O3_CPU_RAS_HH__
- #define __CPU_O3_CPU_RAS_HH__
+/*
+ * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
- ReturnAddrStack(unsigned numEntries);
++#ifndef __CPU_O3_RAS_HH__
++#define __CPU_O3_RAS_HH__
+
+// For Addr type.
+#include "arch/isa_traits.hh"
++#include <vector>
+
++/** Return address stack class, implements a simple RAS. */
+class ReturnAddrStack
+{
+ public:
- Addr *addrStack;
++ /** Creates a return address stack, but init() must be called prior to
++ * use.
++ */
++ ReturnAddrStack() {}
+
++ /** Initializes RAS with a specified number of entries.
++ * @param numEntries Number of entries in the RAS.
++ */
++ void init(unsigned numEntries);
++
++ void reset();
++
++ /** Returns the top address on the RAS. */
+ Addr top()
+ { return addrStack[tos]; }
+
++ /** Returns the index of the top of the RAS. */
+ unsigned topIdx()
+ { return tos; }
+
++ /** Pushes an address onto the RAS. */
+ void push(const Addr &return_addr);
+
++ /** Pops the top address from the RAS. */
+ void pop();
+
++ /** Changes index to the top of the RAS, and replaces the top address with
++ * a new target.
++ * @param top_entry_idx The index of the RAS that will now be the top.
++ * @param restored_target The new target address of the new top of the RAS.
++ */
+ void restore(unsigned top_entry_idx, const Addr &restored_target);
+
+ private:
++ /** Increments the top of stack index. */
+ inline void incrTos()
+ { if (++tos == numEntries) tos = 0; }
+
++ /** Decrements the top of stack index. */
+ inline void decrTos()
+ { tos = (tos == 0 ? numEntries - 1 : tos - 1); }
+
- #endif // __CPU_O3_CPU_RAS_HH__
++ /** The RAS itself. */
++ std::vector<Addr> addrStack;
+
++ /** The number of entries in the RAS. */
+ unsigned numEntries;
+
++ /** The number of used entries in the RAS. */
+ unsigned usedEntries;
+
++ /** The top of stack index. */
+ unsigned tos;
+};
+
++#endif // __CPU_O3_RAS_HH__
--- /dev/null
- #ifndef __CPU_O3_CPU_REGFILE_HH__
- #define __CPU_O3_CPU_REGFILE_HH__
-
- // @todo: Destructor
+/*
+ * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
- // This really only depends on the ISA, and not the Impl. It might be nicer
- // to see if I can make it depend on nothing...
- // Things that are in the ifdef FULL_SYSTEM are pretty dependent on the ISA,
- // and should go in the AlphaFullCPU.
++#ifndef __CPU_O3_REGFILE_HH__
++#define __CPU_O3_REGFILE_HH__
+
+#include "arch/isa_traits.hh"
+#include "arch/faults.hh"
+#include "base/trace.hh"
+#include "config/full_system.hh"
+#include "cpu/o3/comm.hh"
+
+#if FULL_SYSTEM
+#include "kern/kernel_stats.hh"
+
+#endif
+
- //Note that most of the definitions of the IntReg, FloatReg, etc. exist
- //within the Impl/ISA class and not within this PhysRegFile class.
-
- //Will need some way to allow stuff like swap_palshadow to access the
- //correct registers. Might require code changes to swap_palshadow and
- //other execution contexts.
-
- //Will make these registers public for now, but they probably should
- //be private eventually with some accessor functions.
++#include <vector>
+
++/**
++ * Simple physical register file class.
++ * This really only depends on the ISA, and not the Impl. Things that are
++ * in the ifdef FULL_SYSTEM are pretty dependent on the ISA, and probably
++ * should go in the AlphaFullCPU.
++ */
+template <class Impl>
+class PhysRegFile
+{
+ protected:
+ typedef TheISA::IntReg IntReg;
+ typedef TheISA::FloatReg FloatReg;
+ typedef TheISA::MiscRegFile MiscRegFile;
+ typedef TheISA::MiscReg MiscReg;
++ // Note that most of the definitions of the IntReg, FloatReg, etc. exist
++ // within the Impl/ISA class and not within this PhysRegFile class.
+
- intRegFile[reg_idx] = val;
++ // Will make these registers public for now, but they probably should
++ // be private eventually with some accessor functions.
+ public:
+ typedef typename Impl::FullCPU FullCPU;
+
++ /**
++ * Constructs a physical register file with the specified amount of
++ * integer and floating point registers.
++ */
+ PhysRegFile(unsigned _numPhysicalIntRegs,
+ unsigned _numPhysicalFloatRegs);
+
+ //Everything below should be pretty well identical to the normal
+ //register file that exists within AlphaISA class.
+ //The duplication is unfortunate but it's better than having
+ //different ways to access certain registers.
+
+ //Add these in later when everything else is in place
+// void serialize(std::ostream &os);
+// void unserialize(Checkpoint *cp, const std::string §ion);
+
++ /** Reads an integer register. */
+ uint64_t readIntReg(PhysRegIndex reg_idx)
+ {
+ assert(reg_idx < numPhysicalIntRegs);
+
+ DPRINTF(IEW, "RegFile: Access to int register %i, has data "
+ "%i\n", int(reg_idx), intRegFile[reg_idx]);
+ return intRegFile[reg_idx];
+ }
+
+ FloatReg readFloatReg(PhysRegIndex reg_idx, int width)
+ {
+ // Remove the base Float reg dependency.
+ reg_idx = reg_idx - numPhysicalIntRegs;
+
+ assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs);
+
+ FloatReg floatReg = floatRegFile.readReg(reg_idx, width);
+
+ DPRINTF(IEW, "RegFile: Access to %d byte float register %i, has "
+ "data %8.8d\n", int(reg_idx), (double)floatReg);
+
+ return floatReg;
+ }
+
++ /** Reads a floating point register (double precision). */
+ FloatReg readFloatReg(PhysRegIndex reg_idx)
+ {
+ // Remove the base Float reg dependency.
+ reg_idx = reg_idx - numPhysicalIntRegs;
+
+ assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs);
+
+ FloatReg floatReg = floatRegFile.readReg(reg_idx);
+
+ DPRINTF(IEW, "RegFile: Access to float register %i, has "
+ "data %8.8d\n", int(reg_idx), (double)floatReg);
+
+ return floatReg;
+ }
+
++ /** Reads a floating point register as an integer. */
+ FloatRegBits readFloatRegBits(PhysRegIndex reg_idx, int width)
+ {
+ // Remove the base Float reg dependency.
+ reg_idx = reg_idx - numPhysicalIntRegs;
+
+ assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs);
+
+ FloatRegBits floatRegBits = floatRegFile.readRegBits(reg_idx, width);
+
+ DPRINTF(IEW, "RegFile: Access to %d byte float register %i as int, "
+ "has data %lli\n", int(reg_idx), (uint64_t)floatRegBits);
+
+ return floatRegBits;
+ }
+
+ FloatRegBits readFloatRegBits(PhysRegIndex reg_idx)
+ {
+ // Remove the base Float reg dependency.
+ reg_idx = reg_idx - numPhysicalIntRegs;
+
+ assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs);
+
+ FloatRegBits floatRegBits = floatRegFile.readRegBits(reg_idx);
+
+ DPRINTF(IEW, "RegFile: Access to float register %i as int, "
+ "has data %lli\n", int(reg_idx), (uint64_t)floatRegBits);
+
+ return floatRegBits;
+ }
+
++ /** Sets an integer register to the given value. */
+ void setIntReg(PhysRegIndex reg_idx, uint64_t val)
+ {
+ assert(reg_idx < numPhysicalIntRegs);
+
+ DPRINTF(IEW, "RegFile: Setting int register %i to %lli\n",
+ int(reg_idx), val);
+
- floatRegFile.setReg(reg_idx, val, width);
++ if (reg_idx != TheISA::ZeroReg)
++ intRegFile[reg_idx] = val;
+ }
+
++ /** Sets a single precision floating point register to the given value. */
+ void setFloatReg(PhysRegIndex reg_idx, FloatReg val, int width)
+ {
+ // Remove the base Float reg dependency.
+ reg_idx = reg_idx - numPhysicalIntRegs;
+
+ assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs);
+
+ DPRINTF(IEW, "RegFile: Setting float register %i to %8.8d\n",
+ int(reg_idx), (double)val);
+
- floatRegFile.setReg(reg_idx, val);
++ if (reg_idx != TheISA::ZeroReg)
++ floatRegFile.setReg(reg_idx, val, width);
+ }
+
++ /** Sets a double precision floating point register to the given value. */
+ void setFloatReg(PhysRegIndex reg_idx, FloatReg val)
+ {
+ // Remove the base Float reg dependency.
+ reg_idx = reg_idx - numPhysicalIntRegs;
+
+ assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs);
+
+ DPRINTF(IEW, "RegFile: Setting float register %i to %8.8d\n",
+ int(reg_idx), (double)val);
+
-
- floatRegFile.setRegBits(reg_idx, val);
- }
-
- uint64_t readPC()
- {
- return pc;
- }
-
- void setPC(uint64_t val)
- {
- pc = val;
++ if (reg_idx != TheISA::ZeroReg)
++ floatRegFile.setReg(reg_idx, val);
+ }
+
++ /** Sets a floating point register to the given integer value. */
+ void setFloatRegBits(PhysRegIndex reg_idx, FloatRegBits val, int width)
+ {
+ // Remove the base Float reg dependency.
+ reg_idx = reg_idx - numPhysicalIntRegs;
+
+ assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs);
+
+ DPRINTF(IEW, "RegFile: Setting float register %i to %lli\n",
+ int(reg_idx), (uint64_t)val);
+
+ floatRegFile.setRegBits(reg_idx, val, width);
+ }
+
+ void setFloatRegBits(PhysRegIndex reg_idx, FloatRegBits val)
+ {
+ // Remove the base Float reg dependency.
+ reg_idx = reg_idx - numPhysicalIntRegs;
+
+ assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs);
+
+ DPRINTF(IEW, "RegFile: Setting float register %i to %lli\n",
+ int(reg_idx), (uint64_t)val);
- void setNextPC(uint64_t val)
+ }
+
- npc = val;
++ MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault,
++ unsigned thread_id)
+ {
- //Consider leaving this stuff and below in some implementation specific
- //file as opposed to the general register file. Or have a derived class.
- MiscReg readMiscReg(int misc_reg)
++ return miscRegs[thread_id].readRegWithEffect(misc_reg, fault,
++ cpu->xcBase(thread_id));
+ }
+
- // Dummy function for now.
- // @todo: Fix this once proxy XC is used.
- return 0;
++ Fault setMiscReg(int misc_reg, const MiscReg &val, unsigned thread_id)
+ {
- Fault setMiscReg(int misc_reg, const MiscReg &val)
++ return miscRegs[thread_id].setReg(misc_reg, val);
+ }
+
- // Dummy function for now.
- // @todo: Fix this once proxy XC is used.
- return NoFault;
++ Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val,
++ unsigned thread_id)
+ {
- // These should be private eventually, but will be public for now
- // so that I can hack around the initregs issue.
++ return miscRegs[thread_id].setRegWithEffect(misc_reg, val,
++ cpu->xcBase(thread_id));
+ }
+
+#if FULL_SYSTEM
+ int readIntrFlag() { return intrflag; }
++ /** Sets an interrupt flag. */
+ void setIntrFlag(int val) { intrflag = val; }
+#endif
+
- IntReg *intRegFile;
+ public:
+ /** (signed) integer register file. */
- FloatReg *floatRegFile;
++ std::vector<IntReg> intRegFile;
+
+ /** Floating point register file. */
- MiscRegFile miscRegs;
-
- /** Program counter. */
- Addr pc;
-
- /** Next-cycle program counter. */
- Addr npc;
++ std::vector<FloatReg> floatRegFile;
+
+ /** Miscellaneous register file. */
- // This is ISA specifc stuff; remove it eventually once ISAImpl is used
- // IntReg palregs[NumIntRegs]; // PAL shadow registers
++ MiscRegFile miscRegs[Impl::MaxThreads];
+
+#if FULL_SYSTEM
+ private:
- bool pal_shadow; // using pal_shadow registers
+ int intrflag; // interrupt flag
- intRegFile = new IntReg[numPhysicalIntRegs];
- floatRegFile = new FloatReg[numPhysicalFloatRegs];
+#endif
+
+ private:
++ /** CPU pointer. */
+ FullCPU *cpu;
+
+ public:
++ /** Sets the CPU pointer. */
+ void setCPU(FullCPU *cpu_ptr) { cpu = cpu_ptr; }
+
++ /** Number of physical integer registers. */
+ unsigned numPhysicalIntRegs;
++ /** Number of physical floating point registers. */
+ unsigned numPhysicalFloatRegs;
+};
+
+template <class Impl>
+PhysRegFile<Impl>::PhysRegFile(unsigned _numPhysicalIntRegs,
+ unsigned _numPhysicalFloatRegs)
+ : numPhysicalIntRegs(_numPhysicalIntRegs),
+ numPhysicalFloatRegs(_numPhysicalFloatRegs)
+{
- memset(intRegFile, 0, sizeof(*intRegFile));
- memset(floatRegFile, 0, sizeof(*floatRegFile));
++ intRegFile.resize(numPhysicalIntRegs);
++ floatRegFile.resize(numPhysicalFloatRegs);
+
- #endif // __CPU_O3_CPU_REGFILE_HH__
++ //memset(intRegFile, 0, sizeof(*intRegFile));
++ //memset(floatRegFile, 0, sizeof(*floatRegFile));
+}
+
++#endif
--- /dev/null
- template class SimpleRename<AlphaSimpleImpl>;
+/*
+ * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "cpu/o3/alpha_dyn_inst.hh"
+#include "cpu/o3/alpha_impl.hh"
+#include "cpu/o3/rename_impl.hh"
+
++template class DefaultRename<AlphaSimpleImpl>;
--- /dev/null
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
+/*
- // Todo:
- // Fix up trap and barrier handling.
- // May want to have different statuses to differentiate the different stall
- // conditions.
-
- #ifndef __CPU_O3_CPU_SIMPLE_RENAME_HH__
- #define __CPU_O3_CPU_SIMPLE_RENAME_HH__
++ * Copyright (c) 2004-2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
- // Will need rename maps for both the int reg file and fp reg file.
- // Or change rename map class to handle both. (RegFile handles both.)
++#ifndef __CPU_O3_RENAME_HH__
++#define __CPU_O3_RENAME_HH__
+
+#include <list>
+
+#include "base/statistics.hh"
+#include "base/timebuf.hh"
+
- class SimpleRename
++/**
++ * DefaultRename handles both single threaded and SMT rename. Its
++ * width is specified by the parameters; each cycle it tries to rename
++ * that many instructions. It holds onto the rename history of all
++ * instructions with destination registers, storing the
++ * arch. register, the new physical register, and the old physical
++ * register, to allow for undoing of mappings if squashing happens, or
++ * freeing up registers upon commit. Rename handles blocking if the
++ * ROB, IQ, or LSQ is going to be full. Rename also handles barriers,
++ * and does so by stalling on the instruction until the ROB is empty
++ * and there are no instructions in flight to the ROB.
++ */
+template<class Impl>
- typedef typename CPUPol::FetchStruct FetchStruct;
++class DefaultRename
+{
+ public:
+ // Typedefs from the Impl.
+ typedef typename Impl::CPUPol CPUPol;
+ typedef typename Impl::DynInstPtr DynInstPtr;
+ typedef typename Impl::FullCPU FullCPU;
+ typedef typename Impl::Params Params;
+
-
- // Typedefs from the CPUPol
++ // Typedefs from the CPUPol
+ typedef typename CPUPol::DecodeStruct DecodeStruct;
+ typedef typename CPUPol::RenameStruct RenameStruct;
+ typedef typename CPUPol::TimeStruct TimeStruct;
- // Rename will block if ROB becomes full or issue queue becomes full,
- // or there are no free registers to rename to.
- // Only case where rename squashes is if IEW squashes.
- enum Status {
+ typedef typename CPUPol::FreeList FreeList;
+ typedef typename CPUPol::RenameMap RenameMap;
++ // These are used only for initialization.
++ typedef typename CPUPol::IEW IEW;
++ typedef typename CPUPol::Commit Commit;
+
+ // Typedefs from the ISA.
+ typedef TheISA::RegIndex RegIndex;
+
++ // A list is used to queue the instructions. Barrier insts must
++ // be added to the front of the list, which is the only reason for
++ // using a list instead of a queue. (Most other stages use a
++ // queue)
++ typedef std::list<DynInstPtr> InstQueue;
++
+ public:
- BarrierStall
++ /** Overall rename status. Used to determine if the CPU can
++ * deschedule itself due to a lack of activity.
++ */
++ enum RenameStatus {
++ Active,
++ Inactive
++ };
++
++ /** Individual thread status. */
++ enum ThreadStatus {
+ Running,
+ Idle,
++ StartSquash,
+ Squashing,
+ Blocked,
+ Unblocking,
- Status _status;
++ SerializeStall
+ };
+
+ private:
- SimpleRename(Params ¶ms);
++ /** Rename status. */
++ RenameStatus _status;
++
++ /** Per-thread status. */
++ ThreadStatus renameStatus[Impl::MaxThreads];
+
+ public:
- void setRenameMap(RenameMap *rm_ptr);
++ /** DefaultRename constructor. */
++ DefaultRename(Params *params);
+
++ /** Returns the name of rename. */
++ std::string name() const;
++
++ /** Registers statistics. */
+ void regStats();
+
++ /** Sets CPU pointer. */
+ void setCPU(FullCPU *cpu_ptr);
+
++ /** Sets the main backwards communication time buffer pointer. */
+ void setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr);
+
++ /** Sets pointer to time buffer used to communicate to the next stage. */
+ void setRenameQueue(TimeBuffer<RenameStruct> *rq_ptr);
+
++ /** Sets pointer to time buffer coming from decode. */
+ void setDecodeQueue(TimeBuffer<DecodeStruct> *dq_ptr);
+
- void dumpHistory();
++ /** Sets pointer to IEW stage. Used only for initialization. */
++ void setIEWStage(IEW *iew_stage)
++ { iew_ptr = iew_stage; }
++
++ /** Sets pointer to commit stage. Used only for initialization. */
++ void setCommitStage(Commit *commit_stage)
++ { commit_ptr = commit_stage; }
++
++ private:
++ /** Pointer to IEW stage. Used only for initialization. */
++ IEW *iew_ptr;
++
++ /** Pointer to commit stage. Used only for initialization. */
++ Commit *commit_ptr;
+
++ public:
++ /** Initializes variables for the stage. */
++ void initStage();
++
++ /** Sets pointer to list of active threads. */
++ void setActiveThreads(std::list<unsigned> *at_ptr);
++
++ /** Sets pointer to rename maps (per-thread structures). */
++ void setRenameMap(RenameMap rm_ptr[Impl::MaxThreads]);
++
++ /** Sets pointer to the free list. */
+ void setFreeList(FreeList *fl_ptr);
+
- void tick();
++ /** Sets pointer to the scoreboard. */
++ void setScoreboard(Scoreboard *_scoreboard);
+
- void rename();
++ void switchOut();
++
++ void doSwitchOut();
++
++ void takeOverFrom();
+
- void squash();
++ /** Squashes all instructions in a thread. */
++ void squash(unsigned tid);
++
++ /** Ticks rename, which processes all input signals and attempts to rename
++ * as many instructions as possible.
++ */
++ void tick();
+
- void block();
++ /** Debugging function used to dump history buffer of renamings. */
++ void dumpHistory();
+
+ private:
- inline void unblock();
++ /** Determines what to do based on rename's current status.
++ * @param status_change rename() sets this variable if there was a status
++ * change (ie switching from blocking to unblocking).
++ * @param tid Thread id to rename instructions from.
++ */
++ void rename(bool &status_change, unsigned tid);
++
++ /** Renames instructions for the given thread. Also handles serializing
++ * instructions.
++ */
++ void renameInsts(unsigned tid);
++
++ /** Inserts unused instructions from a given thread into the skid buffer,
++ * to be renamed once rename unblocks.
++ */
++ void skidInsert(unsigned tid);
++
++ /** Separates instructions from decode into individual lists of instructions
++ * sorted by thread.
++ */
++ void sortInsts();
++
++ /** Returns if all of the skid buffers are empty. */
++ bool skidsEmpty();
++
++ /** Updates overall rename status based on all of the threads' statuses. */
++ void updateStatus();
++
++ /** Switches rename to blocking, and signals back that rename has become
++ * blocked.
++ * @return Returns true if there is a status change.
++ */
++ bool block(unsigned tid);
++
++ /** Switches rename to unblocking if the skid buffer is empty, and signals
++ * back that rename has unblocked.
++ * @return Returns true if there is a status change.
++ */
++ bool unblock(unsigned tid);
+
- void doSquash();
++ /** Executes actual squash, removing squashed instructions. */
++ void doSquash(unsigned tid);
+
- void removeFromHistory(InstSeqNum inst_seq_num);
++ /** Removes a committed instruction's rename history. */
++ void removeFromHistory(InstSeqNum inst_seq_num, unsigned tid);
+
- inline void renameSrcRegs(DynInstPtr &inst);
++ /** Renames the source registers of an instruction. */
++ inline void renameSrcRegs(DynInstPtr &inst, unsigned tid);
+
- inline void renameDestRegs(DynInstPtr &inst);
++ /** Renames the destination registers of an instruction. */
++ inline void renameDestRegs(DynInstPtr &inst, unsigned tid);
+
- inline int calcFreeROBEntries();
++ /** Calculates the number of free ROB entries for a specific thread. */
++ inline int calcFreeROBEntries(unsigned tid);
+
- inline int calcFreeIQEntries();
++ /** Calculates the number of free IQ entries for a specific thread. */
++ inline int calcFreeIQEntries(unsigned tid);
+
- /** Holds the previous information for each rename.
- * Note that often times the inst may have been deleted, so only access
- * the pointer for the address and do not dereference it.
++ /** Calculates the number of free LSQ entries for a specific thread. */
++ inline int calcFreeLSQEntries(unsigned tid);
+
- newPhysReg(_newPhysReg), prevPhysReg(_prevPhysReg),
- placeHolder(false)
- {
- }
-
- /** Constructor used specifically for cases where a place holder
- * rename history entry is being made.
- */
- RenameHistory(InstSeqNum _instSeqNum)
- : instSeqNum(_instSeqNum), archReg(0), newPhysReg(0),
- prevPhysReg(0), placeHolder(true)
++ /** Returns the number of valid instructions coming from decode. */
++ unsigned validInsts();
++
++ /** Reads signals telling rename to block/unblock. */
++ void readStallSignals(unsigned tid);
++
++ /** Checks if any stages are telling rename to block. */
++ bool checkStall(unsigned tid);
++
++ void readFreeEntries(unsigned tid);
++
++ bool checkSignalsAndUpdate(unsigned tid);
++
++ /** Either serializes on the next instruction available in the InstQueue,
++ * or records that it must serialize on the next instruction to enter
++ * rename.
++ * @param inst_list The list of younger, unprocessed instructions for the
++ * thread that has the serializeAfter instruction.
++ * @param tid The thread id.
++ */
++ void serializeAfter(InstQueue &inst_list, unsigned tid);
++
++ /** Holds the information for each destination register rename. It holds
++ * the instruction's sequence number, the arch register, the old physical
++ * register for that arch. register, and the new physical register.
+ */
+ struct RenameHistory {
+ RenameHistory(InstSeqNum _instSeqNum, RegIndex _archReg,
+ PhysRegIndex _newPhysReg, PhysRegIndex _prevPhysReg)
+ : instSeqNum(_instSeqNum), archReg(_archReg),
- bool placeHolder;
++ newPhysReg(_newPhysReg), prevPhysReg(_prevPhysReg)
+ {
+ }
+
++ /** The sequence number of the instruction that renamed. */
+ InstSeqNum instSeqNum;
++ /** The architectural register index that was renamed. */
+ RegIndex archReg;
++ /** The new physical register that the arch. register is renamed to. */
+ PhysRegIndex newPhysReg;
++ /** The old physical register that the arch. register was renamed to. */
+ PhysRegIndex prevPhysReg;
- std::list<RenameHistory> historyBuffer;
+ };
+
- /** CPU interface. */
++ /** A per-thread list of all destination register renames, used to either
++ * undo rename mappings or free old physical registers.
++ */
++ std::list<RenameHistory> historyBuffer[Impl::MaxThreads];
+
- // Interfaces to objects outside of rename.
- /** Time buffer interface. */
++ /** Pointer to CPU. */
+ FullCPU *cpu;
+
- // Might not be the best name as not only decode will read it.
++ /** Pointer to main time buffer used for backwards communication. */
+ TimeBuffer<TimeStruct> *timeBuffer;
+
+ /** Wire to get IEW's output from backwards time buffer. */
+ typename TimeBuffer<TimeStruct>::wire fromIEW;
+
+ /** Wire to get commit's output from backwards time buffer. */
+ typename TimeBuffer<TimeStruct>::wire fromCommit;
+
+ /** Wire to write infromation heading to previous stages. */
- std::queue<DecodeStruct> skidBuffer;
+ typename TimeBuffer<TimeStruct>::wire toDecode;
+
+ /** Rename instruction queue. */
+ TimeBuffer<RenameStruct> *renameQueue;
+
+ /** Wire to write any information heading to IEW. */
+ typename TimeBuffer<RenameStruct>::wire toIEW;
+
+ /** Decode instruction queue interface. */
+ TimeBuffer<DecodeStruct> *decodeQueue;
+
+ /** Wire to get decode's output from decode queue. */
+ typename TimeBuffer<DecodeStruct>::wire fromDecode;
+
++ /** Queue of all instructions coming from decode this cycle. */
++ InstQueue insts[Impl::MaxThreads];
++
+ /** Skid buffer between rename and decode. */
- SimpleRenameMap *renameMap;
++ InstQueue skidBuffer[Impl::MaxThreads];
+
+ /** Rename map interface. */
- /** The instruction that rename is currently on. It needs to have
- * persistent state so that when a stall occurs in the middle of a
- * group of instructions, it can restart at the proper instruction.
++ RenameMap *renameMap[Impl::MaxThreads];
+
+ /** Free list interface. */
+ FreeList *freeList;
+
++ /** Pointer to the list of active threads. */
++ std::list<unsigned> *activeThreads;
++
++ /** Pointer to the scoreboard. */
++ Scoreboard *scoreboard;
++
++ /** Count of instructions in progress that have been sent off to the IQ
++ * and ROB, but are not yet included in their occupancy counts.
++ */
++ int instsInProgress[Impl::MaxThreads];
++
++ /** Variable that tracks if decode has written to the time buffer this
++ * cycle. Used to tell CPU if there is activity this cycle.
++ */
++ bool wroteToTimeBuffer;
++
++ /** Structures whose free entries impact the amount of instructions that
++ * can be renamed.
++ */
++ struct FreeEntries {
++ unsigned iqEntries;
++ unsigned lsqEntries;
++ unsigned robEntries;
++ };
++
++ /** Per-thread tracking of the number of free entries of back-end
++ * structures.
++ */
++ FreeEntries freeEntries[Impl::MaxThreads];
++
++ /** Records if the ROB is empty. In SMT mode the ROB may be dynamically
++ * partitioned between threads, so the ROB must tell rename when it is
++ * empty.
++ */
++ bool emptyROB[Impl::MaxThreads];
++
++ /** Source of possible stalls. */
++ struct Stalls {
++ bool iew;
++ bool commit;
++ };
++
++ /** Tracks which stages are telling decode to stall. */
++ Stalls stalls[Impl::MaxThreads];
++
++ /** The serialize instruction that rename has stalled on. */
++ DynInstPtr serializeInst[Impl::MaxThreads];
++
++ /** Records if rename needs to serialize on the next instruction for any
++ * thread.
++ */
++ bool serializeOnNextInst[Impl::MaxThreads];
++
+ /** Delay between iew and rename, in ticks. */
+ int iewToRenameDelay;
+
+ /** Delay between decode and rename, in ticks. */
+ int decodeToRenameDelay;
+
+ /** Delay between commit and rename, in ticks. */
+ unsigned commitToRenameDelay;
+
+ /** Rename width, in instructions. */
+ unsigned renameWidth;
+
+ /** Commit width, in instructions. Used so rename knows how many
+ * instructions might have freed registers in the previous cycle.
+ */
+ unsigned commitWidth;
+
- unsigned numInst;
++ /** The index of the instruction in the time buffer to IEW that rename is
++ * currently using.
++ */
++ unsigned toIEWIndex;
++
++ /** Whether or not rename needs to block this cycle. */
++ bool blockThisCycle;
++
++ /** The number of threads active in rename. */
++ unsigned numThreads;
++
++ /** The maximum skid buffer size. */
++ unsigned skidBufferMax;
++
++ /** Enum to record the source of a structure full stall. Can come from
++ * either ROB, IQ, LSQ, and it is priortized in that order.
++ */
++ enum FullSource {
++ ROB,
++ IQ,
++ LSQ,
++ NONE
++ };
++
++ /** Function used to increment the stat that corresponds to the source of
++ * the stall.
+ */
- Stats::Scalar<> renameHBPlaceHolders;
++ inline void incrFullStat(const FullSource &source);
+
++ /** Stat for total number of cycles spent squashing. */
+ Stats::Scalar<> renameSquashCycles;
++ /** Stat for total number of cycles spent idle. */
+ Stats::Scalar<> renameIdleCycles;
++ /** Stat for total number of cycles spent blocking. */
+ Stats::Scalar<> renameBlockCycles;
++ /** Stat for total number of cycles spent stalling for a serializing inst. */
++ Stats::Scalar<> renameSerializeStallCycles;
++ /** Stat for total number of cycles spent running normally. */
++ Stats::Scalar<> renameRunCycles;
++ /** Stat for total number of cycles spent unblocking. */
+ Stats::Scalar<> renameUnblockCycles;
++ /** Stat for total number of renamed instructions. */
+ Stats::Scalar<> renameRenamedInsts;
++ /** Stat for total number of squashed instructions that rename discards. */
+ Stats::Scalar<> renameSquashedInsts;
++ /** Stat for total number of times that the ROB starts a stall in rename. */
+ Stats::Scalar<> renameROBFullEvents;
++ /** Stat for total number of times that the IQ starts a stall in rename. */
+ Stats::Scalar<> renameIQFullEvents;
++ /** Stat for total number of times that the LSQ starts a stall in rename. */
++ Stats::Scalar<> renameLSQFullEvents;
++ /** Stat for total number of times that rename runs out of free registers
++ * to use to rename. */
+ Stats::Scalar<> renameFullRegistersEvents;
++ /** Stat for total number of renamed destination registers. */
+ Stats::Scalar<> renameRenamedOperands;
++ /** Stat for total number of source register rename lookups. */
+ Stats::Scalar<> renameRenameLookups;
- Stats::Scalar<> renameValidUndoneMaps;
++ /** Stat for total number of committed renaming mappings. */
+ Stats::Scalar<> renameCommittedMaps;
++ /** Stat for total number of mappings that were undone due to a squash. */
+ Stats::Scalar<> renameUndoneMaps;
- #endif // __CPU_O3_CPU_SIMPLE_RENAME_HH__
++ Stats::Scalar<> renamedSerializing;
++ Stats::Scalar<> renamedTempSerializing;
++ Stats::Scalar<> renameSkidInsts;
+};
+
++#endif // __CPU_O3_RENAME_HH__
--- /dev/null
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
+/*
- SimpleRename<Impl>::SimpleRename(Params ¶ms)
- : iewToRenameDelay(params.iewToRenameDelay),
- decodeToRenameDelay(params.decodeToRenameDelay),
- commitToRenameDelay(params.commitToRenameDelay),
- renameWidth(params.renameWidth),
- commitWidth(params.commitWidth),
- numInst(0)
++ * Copyright (c) 2004-2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <list>
+
+#include "config/full_system.hh"
+#include "cpu/o3/rename.hh"
+
++using namespace std;
++
++template <class Impl>
++DefaultRename<Impl>::DefaultRename(Params *params)
++ : iewToRenameDelay(params->iewToRenameDelay),
++ decodeToRenameDelay(params->decodeToRenameDelay),
++ commitToRenameDelay(params->commitToRenameDelay),
++ renameWidth(params->renameWidth),
++ commitWidth(params->commitWidth),
++ numThreads(params->numberOfThreads)
++{
++ _status = Inactive;
++
++ for (int i=0; i< numThreads; i++) {
++ renameStatus[i] = Idle;
++
++ freeEntries[i].iqEntries = 0;
++ freeEntries[i].lsqEntries = 0;
++ freeEntries[i].robEntries = 0;
++
++ stalls[i].iew = false;
++ stalls[i].commit = false;
++ serializeInst[i] = NULL;
++
++ instsInProgress[i] = 0;
++
++ emptyROB[i] = true;
++
++ serializeOnNextInst[i] = false;
++ }
++
++ // @todo: Make into a parameter.
++ skidBufferMax = (2 * (iewToRenameDelay * params->decodeWidth)) + renameWidth;
++}
++
+template <class Impl>
- _status = Idle;
++std::string
++DefaultRename<Impl>::name() const
+{
- SimpleRename<Impl>::regStats()
++ return cpu->name() + ".rename";
+}
+
+template <class Impl>
+void
- .name(name() + ".renameSquashCycles")
++DefaultRename<Impl>::regStats()
+{
+ renameSquashCycles
- .name(name() + ".renameIdleCycles")
++ .name(name() + ".RENAME:SquashCycles")
+ .desc("Number of cycles rename is squashing")
+ .prereq(renameSquashCycles);
+ renameIdleCycles
- .name(name() + ".renameBlockCycles")
++ .name(name() + ".RENAME:IdleCycles")
+ .desc("Number of cycles rename is idle")
+ .prereq(renameIdleCycles);
+ renameBlockCycles
- .name(name() + ".renameUnblockCycles")
++ .name(name() + ".RENAME:BlockCycles")
+ .desc("Number of cycles rename is blocking")
+ .prereq(renameBlockCycles);
++ renameSerializeStallCycles
++ .name(name() + ".RENAME:serializeStallCycles")
++ .desc("count of cycles rename stalled for serializing inst")
++ .flags(Stats::total);
++ renameRunCycles
++ .name(name() + ".RENAME:RunCycles")
++ .desc("Number of cycles rename is running")
++ .prereq(renameIdleCycles);
+ renameUnblockCycles
- .name(name() + ".renameRenamedInsts")
++ .name(name() + ".RENAME:UnblockCycles")
+ .desc("Number of cycles rename is unblocking")
+ .prereq(renameUnblockCycles);
+ renameRenamedInsts
- .name(name() + ".renameSquashedInsts")
++ .name(name() + ".RENAME:RenamedInsts")
+ .desc("Number of instructions processed by rename")
+ .prereq(renameRenamedInsts);
+ renameSquashedInsts
- .name(name() + ".renameROBFullEvents")
- .desc("Number of times rename has considered the ROB 'full'")
++ .name(name() + ".RENAME:SquashedInsts")
+ .desc("Number of squashed instructions processed by rename")
+ .prereq(renameSquashedInsts);
+ renameROBFullEvents
- .name(name() + ".renameIQFullEvents")
- .desc("Number of times rename has considered the IQ 'full'")
++ .name(name() + ".RENAME:ROBFullEvents")
++ .desc("Number of times rename has blocked due to ROB full")
+ .prereq(renameROBFullEvents);
+ renameIQFullEvents
- .name(name() + ".renameFullRegisterEvents")
++ .name(name() + ".RENAME:IQFullEvents")
++ .desc("Number of times rename has blocked due to IQ full")
+ .prereq(renameIQFullEvents);
++ renameLSQFullEvents
++ .name(name() + ".RENAME:LSQFullEvents")
++ .desc("Number of times rename has blocked due to LSQ full")
++ .prereq(renameLSQFullEvents);
+ renameFullRegistersEvents
- .name(name() + ".renameRenamedOperands")
++ .name(name() + ".RENAME:FullRegisterEvents")
+ .desc("Number of times there has been no free registers")
+ .prereq(renameFullRegistersEvents);
+ renameRenamedOperands
- .name(name() + ".renameRenameLookups")
++ .name(name() + ".RENAME:RenamedOperands")
+ .desc("Number of destination operands rename has renamed")
+ .prereq(renameRenamedOperands);
+ renameRenameLookups
- renameHBPlaceHolders
- .name(name() + ".renameHBPlaceHolders")
- .desc("Number of place holders added to the history buffer")
- .prereq(renameHBPlaceHolders);
++ .name(name() + ".RENAME:RenameLookups")
+ .desc("Number of register rename lookups that rename has made")
+ .prereq(renameRenameLookups);
- .name(name() + ".renameCommittedMaps")
+ renameCommittedMaps
- .name(name() + ".renameUndoneMaps")
++ .name(name() + ".RENAME:CommittedMaps")
+ .desc("Number of HB maps that are committed")
+ .prereq(renameCommittedMaps);
+ renameUndoneMaps
- renameValidUndoneMaps
- .name(name() + ".renameValidUndoneMaps")
- .desc("Number of HB maps that are undone, and are not place holders")
- .prereq(renameValidUndoneMaps);
++ .name(name() + ".RENAME:UndoneMaps")
+ .desc("Number of HB maps that are undone due to squashing")
+ .prereq(renameUndoneMaps);
- SimpleRename<Impl>::setCPU(FullCPU *cpu_ptr)
++ renamedSerializing
++ .name(name() + ".RENAME:serializingInsts")
++ .desc("count of serializing insts renamed")
++ .flags(Stats::total)
++ ;
++ renamedTempSerializing
++ .name(name() + ".RENAME:tempSerializingInsts")
++ .desc("count of temporary serializing insts renamed")
++ .flags(Stats::total)
++ ;
++ renameSkidInsts
++ .name(name() + ".RENAME:skidInsts")
++ .desc("count of insts added to the skid buffer")
++ .flags(Stats::total)
++ ;
+}
+
+template <class Impl>
+void
- DPRINTF(Rename, "Rename: Setting CPU pointer.\n");
++DefaultRename<Impl>::setCPU(FullCPU *cpu_ptr)
+{
- SimpleRename<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr)
++ DPRINTF(Rename, "Setting CPU pointer.\n");
+ cpu = cpu_ptr;
+}
+
+template <class Impl>
+void
- DPRINTF(Rename, "Rename: Setting time buffer pointer.\n");
++DefaultRename<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr)
+{
- SimpleRename<Impl>::setRenameQueue(TimeBuffer<RenameStruct> *rq_ptr)
++ DPRINTF(Rename, "Setting time buffer pointer.\n");
+ timeBuffer = tb_ptr;
+
+ // Setup wire to read information from time buffer, from IEW stage.
+ fromIEW = timeBuffer->getWire(-iewToRenameDelay);
+
+ // Setup wire to read infromation from time buffer, from commit stage.
+ fromCommit = timeBuffer->getWire(-commitToRenameDelay);
+
+ // Setup wire to write information to previous stages.
+ toDecode = timeBuffer->getWire(0);
+}
+
+template <class Impl>
+void
- DPRINTF(Rename, "Rename: Setting rename queue pointer.\n");
++DefaultRename<Impl>::setRenameQueue(TimeBuffer<RenameStruct> *rq_ptr)
+{
- SimpleRename<Impl>::setDecodeQueue(TimeBuffer<DecodeStruct> *dq_ptr)
++ DPRINTF(Rename, "Setting rename queue pointer.\n");
+ renameQueue = rq_ptr;
+
+ // Setup wire to write information to future stages.
+ toIEW = renameQueue->getWire(0);
+}
+
+template <class Impl>
+void
- DPRINTF(Rename, "Rename: Setting decode queue pointer.\n");
++DefaultRename<Impl>::setDecodeQueue(TimeBuffer<DecodeStruct> *dq_ptr)
+{
- SimpleRename<Impl>::setRenameMap(RenameMap *rm_ptr)
++ DPRINTF(Rename, "Setting decode queue pointer.\n");
+ decodeQueue = dq_ptr;
+
+ // Setup wire to get information from decode.
+ fromDecode = decodeQueue->getWire(-decodeToRenameDelay);
+}
+
+template <class Impl>
+void
- DPRINTF(Rename, "Rename: Setting rename map pointer.\n");
- renameMap = rm_ptr;
++DefaultRename<Impl>::initStage()
++{
++ // Grab the number of free entries directly from the stages.
++ for (int tid=0; tid < numThreads; tid++) {
++ freeEntries[tid].iqEntries = iew_ptr->instQueue.numFreeEntries(tid);
++ freeEntries[tid].lsqEntries = iew_ptr->ldstQueue.numFreeEntries(tid);
++ freeEntries[tid].robEntries = commit_ptr->numROBFreeEntries(tid);
++ emptyROB[tid] = true;
++ }
++}
++
++template<class Impl>
++void
++DefaultRename<Impl>::setActiveThreads(list<unsigned> *at_ptr)
+{
- SimpleRename<Impl>::setFreeList(FreeList *fl_ptr)
++ DPRINTF(Rename, "Setting active threads list pointer.\n");
++ activeThreads = at_ptr;
+}
+
++
+template <class Impl>
+void
- DPRINTF(Rename, "Rename: Setting free list pointer.\n");
++DefaultRename<Impl>::setRenameMap(RenameMap rm_ptr[])
+{
- SimpleRename<Impl>::dumpHistory()
++ DPRINTF(Rename, "Setting rename map pointers.\n");
++
++ for (int i=0; i<numThreads; i++) {
++ renameMap[i] = &rm_ptr[i];
++ }
++}
++
++template <class Impl>
++void
++DefaultRename<Impl>::setFreeList(FreeList *fl_ptr)
++{
++ DPRINTF(Rename, "Setting free list pointer.\n");
+ freeList = fl_ptr;
+}
+
++template<class Impl>
++void
++DefaultRename<Impl>::setScoreboard(Scoreboard *_scoreboard)
++{
++ DPRINTF(Rename, "Setting scoreboard pointer.\n");
++ scoreboard = _scoreboard;
++}
++
+template <class Impl>
+void
- typename list<RenameHistory>::iterator buf_it = historyBuffer.begin();
++DefaultRename<Impl>::switchOut()
+{
- while (buf_it != historyBuffer.end())
- {
- cprintf("Seq num: %i\nArch reg: %i New phys reg: %i Old phys "
- "reg: %i\n", (*buf_it).instSeqNum, (int)(*buf_it).archReg,
- (int)(*buf_it).newPhysReg, (int)(*buf_it).prevPhysReg);
++ cpu->signalSwitched();
++}
++
++template <class Impl>
++void
++DefaultRename<Impl>::doSwitchOut()
++{
++ for (int i = 0; i < numThreads; i++) {
++ typename list<RenameHistory>::iterator hb_it = historyBuffer[i].begin();
++
++ while (!historyBuffer[i].empty()) {
++ assert(hb_it != historyBuffer[i].end());
+
- buf_it++;
++ DPRINTF(Rename, "[tid:%u]: Removing history entry with sequence "
++ "number %i.\n", i, (*hb_it).instSeqNum);
+
- SimpleRename<Impl>::block()
++ // Tell the rename map to set the architected register to the
++ // previous physical register that it was renamed to.
++ renameMap[i]->setEntry(hb_it->archReg, hb_it->prevPhysReg);
++
++ // Put the renamed physical register back on the free list.
++ freeList->addReg(hb_it->newPhysReg);
++
++ historyBuffer[i].erase(hb_it++);
++ }
++ insts[i].clear();
++ skidBuffer[i].clear();
+ }
+}
+
+template <class Impl>
+void
- DPRINTF(Rename, "Rename: Blocking.\n");
- // Set status to Blocked.
- _status = Blocked;
++DefaultRename<Impl>::takeOverFrom()
+{
- // Add the current inputs onto the skid buffer, so they can be
- // reprocessed when this stage unblocks.
- skidBuffer.push(*fromDecode);
++ _status = Inactive;
++ initStage();
+
- // Note that this stage only signals previous stages to stall when
- // it is the cause of the stall originates at this stage. Otherwise
- // the previous stages are expected to check all possible stall signals.
++ // Reset all state prior to taking over from the other CPU.
++ for (int i=0; i< numThreads; i++) {
++ renameStatus[i] = Idle;
++
++ stalls[i].iew = false;
++ stalls[i].commit = false;
++ serializeInst[i] = NULL;
++
++ instsInProgress[i] = 0;
++
++ emptyROB[i] = true;
+
- inline void
- SimpleRename<Impl>::unblock()
- {
- DPRINTF(Rename, "Rename: Read instructions out of skid buffer this "
- "cycle.\n");
- // Remove the now processed instructions from the skid buffer.
- skidBuffer.pop();
-
- // If there's still information in the skid buffer, then
- // continue to tell previous stages to stall. They will be
- // able to restart once the skid buffer is empty.
- if (!skidBuffer.empty()) {
- toDecode->renameInfo.stall = true;
- } else {
- DPRINTF(Rename, "Rename: Done unblocking.\n");
- _status = Running;
++ serializeOnNextInst[i] = false;
++ }
+}
+
+template <class Impl>
- SimpleRename<Impl>::doSquash()
++void
++DefaultRename<Impl>::squash(unsigned tid)
++{
++ DPRINTF(Rename, "[tid:%u]: Squashing instructions.\n",tid);
++
++ // Clear the stall signal if rename was blocked or unblocking before.
++ // If it still needs to block, the blocking should happen the next
++ // cycle and there should be space to hold everything due to the squash.
++ if (renameStatus[tid] == Blocked ||
++ renameStatus[tid] == Unblocking ||
++ renameStatus[tid] == SerializeStall) {
++#if 0
++ // In syscall emulation, we can have both a block and a squash due
++ // to a syscall in the same cycle. This would cause both signals to
++ // be high. This shouldn't happen in full system.
++ if (toDecode->renameBlock[tid]) {
++ toDecode->renameBlock[tid] = 0;
++ } else {
++ toDecode->renameUnblock[tid] = 1;
++ }
++#else
++ toDecode->renameUnblock[tid] = 1;
++#endif
++ serializeInst[tid] = NULL;
++ }
++
++ // Set the status to Squashing.
++ renameStatus[tid] = Squashing;
++
++ // Squash any instructions from decode.
++ unsigned squashCount = 0;
++
++ for (int i=0; i<fromDecode->size; i++) {
++ if (fromDecode->insts[i]->threadNumber == tid) {
++ fromDecode->insts[i]->squashed = true;
++ wroteToTimeBuffer = true;
++ squashCount++;
++ }
+ }
++
++ insts[tid].clear();
++
++ // Clear the skid buffer in case it has any data in it.
++ skidBuffer[tid].clear();
++
++ doSquash(tid);
+}
+
+template <class Impl>
+void
- typename list<RenameHistory>::iterator hb_it = historyBuffer.begin();
++DefaultRename<Impl>::tick()
+{
- InstSeqNum squashed_seq_num = fromCommit->commitInfo.doneSeqNum;
++ wroteToTimeBuffer = false;
+
- #if FULL_SYSTEM
- assert(!historyBuffer.empty());
- #else
- // After a syscall squashes everything, the history buffer may be empty
- // but the ROB may still be squashing instructions.
- if (historyBuffer.empty()) {
++ blockThisCycle = false;
+
- #endif // FULL_SYSTEM
++ bool status_change = false;
++
++ toIEWIndex = 0;
++
++ sortInsts();
++
++ list<unsigned>::iterator threads = (*activeThreads).begin();
++
++ // Check stall and squash signals.
++ while (threads != (*activeThreads).end()) {
++ unsigned tid = *threads++;
++
++ DPRINTF(Rename, "Processing [tid:%i]\n", tid);
++
++ status_change = checkSignalsAndUpdate(tid) || status_change;
++
++ rename(status_change, tid);
++ }
++
++ if (status_change) {
++ updateStatus();
++ }
++
++ if (wroteToTimeBuffer) {
++ DPRINTF(Activity, "Activity this cycle.\n");
++ cpu->activityThisCycle();
++ }
++
++ threads = (*activeThreads).begin();
++
++ while (threads != (*activeThreads).end()) {
++ unsigned tid = *threads++;
++
++ // If we committed this cycle then doneSeqNum will be > 0
++ if (fromCommit->commitInfo[tid].doneSeqNum != 0 &&
++ !fromCommit->commitInfo[tid].squash &&
++ renameStatus[tid] != Squashing) {
++
++ removeFromHistory(fromCommit->commitInfo[tid].doneSeqNum,
++ tid);
++ }
++ }
++
++ // @todo: make into updateProgress function
++ for (int tid=0; tid < numThreads; tid++) {
++ instsInProgress[tid] -= fromIEW->iewInfo[tid].dispatched;
++
++ assert(instsInProgress[tid] >=0);
++ }
++
++}
++
++template<class Impl>
++void
++DefaultRename<Impl>::rename(bool &status_change, unsigned tid)
++{
++ // If status is Running or idle,
++ // call renameInsts()
++ // If status is Unblocking,
++ // buffer any instructions coming from decode
++ // continue trying to empty skid buffer
++ // check if stall conditions have passed
++
++ if (renameStatus[tid] == Blocked) {
++ ++renameBlockCycles;
++ } else if (renameStatus[tid] == Squashing) {
++ ++renameSquashCycles;
++ } else if (renameStatus[tid] == SerializeStall) {
++ ++renameSerializeStallCycles;
++ }
++
++ if (renameStatus[tid] == Running ||
++ renameStatus[tid] == Idle) {
++ DPRINTF(Rename, "[tid:%u]: Not blocked, so attempting to run "
++ "stage.\n", tid);
++
++ renameInsts(tid);
++ } else if (renameStatus[tid] == Unblocking) {
++ renameInsts(tid);
++
++ if (validInsts()) {
++ // Add the current inputs to the skid buffer so they can be
++ // reprocessed when this stage unblocks.
++ skidInsert(tid);
++ }
++
++ // If we switched over to blocking, then there's a potential for
++ // an overall status change.
++ status_change = unblock(tid) || status_change || blockThisCycle;
++ }
++}
++
++template <class Impl>
++void
++DefaultRename<Impl>::renameInsts(unsigned tid)
++{
++ // Instructions can be either in the skid buffer or the queue of
++ // instructions coming from decode, depending on the status.
++ int insts_available = renameStatus[tid] == Unblocking ?
++ skidBuffer[tid].size() : insts[tid].size();
++
++ // Check the decode queue to see if instructions are available.
++ // If there are no available instructions to rename, then do nothing.
++ if (insts_available == 0) {
++ DPRINTF(Rename, "[tid:%u]: Nothing to do, breaking out early.\n",
++ tid);
++ // Should I change status to idle?
++ ++renameIdleCycles;
+ return;
++ } else if (renameStatus[tid] == Unblocking) {
++ ++renameUnblockCycles;
++ } else if (renameStatus[tid] == Running) {
++ ++renameRunCycles;
+ }
- // Go through the most recent instructions, undoing the mappings
- // they did and freeing up the registers.
- while ((*hb_it).instSeqNum > squashed_seq_num)
- {
- assert(hb_it != historyBuffer.end());
+
- DPRINTF(Rename, "Rename: Removing history entry with sequence "
- "number %i.\n", (*hb_it).instSeqNum);
++ DynInstPtr inst;
+
- // If it's not simply a place holder, then add the registers.
- if (!(*hb_it).placeHolder) {
- // Tell the rename map to set the architected register to the
- // previous physical register that it was renamed to.
- renameMap->setEntry(hb_it->archReg, hb_it->prevPhysReg);
++ // Will have to do a different calculation for the number of free
++ // entries.
++ int free_rob_entries = calcFreeROBEntries(tid);
++ int free_iq_entries = calcFreeIQEntries(tid);
++ int free_lsq_entries = calcFreeLSQEntries(tid);
++ int min_free_entries = free_rob_entries;
+
- // Put the renamed physical register back on the free list.
- freeList->addReg(hb_it->newPhysReg);
++ FullSource source = ROB;
+
- ++renameValidUndoneMaps;
++ if (free_iq_entries < min_free_entries) {
++ min_free_entries = free_iq_entries;
++ source = IQ;
++ }
+
- historyBuffer.erase(hb_it++);
++ if (free_lsq_entries < min_free_entries) {
++ min_free_entries = free_lsq_entries;
++ source = LSQ;
++ }
++
++ // Check if there's any space left.
++ if (min_free_entries <= 0) {
++ DPRINTF(Rename, "[tid:%u]: Blocking due to no free ROB/IQ/LSQ "
++ "entries.\n"
++ "ROB has %i free entries.\n"
++ "IQ has %i free entries.\n"
++ "LSQ has %i free entries.\n",
++ tid,
++ free_rob_entries,
++ free_iq_entries,
++ free_lsq_entries);
++
++ blockThisCycle = true;
++
++ block(tid);
++
++ incrFullStat(source);
++
++ return;
++ } else if (min_free_entries < insts_available) {
++ DPRINTF(Rename, "[tid:%u]: Will have to block this cycle."
++ "%i insts available, but only %i insts can be "
++ "renamed due to ROB/IQ/LSQ limits.\n",
++ tid, insts_available, min_free_entries);
++
++ insts_available = min_free_entries;
++
++ blockThisCycle = true;
++
++ incrFullStat(source);
++ }
++
++ InstQueue &insts_to_rename = renameStatus[tid] == Unblocking ?
++ skidBuffer[tid] : insts[tid];
++
++ DPRINTF(Rename, "[tid:%u]: %i available instructions to "
++ "send iew.\n", tid, insts_available);
++
++ DPRINTF(Rename, "[tid:%u]: %i insts pipelining from Rename | %i insts "
++ "dispatched to IQ last cycle.\n",
++ tid, instsInProgress[tid], fromIEW->iewInfo[tid].dispatched);
++
++ // Handle serializing the next instruction if necessary.
++ if (serializeOnNextInst[tid]) {
++ if (emptyROB[tid] && instsInProgress[tid] == 0) {
++ // ROB already empty; no need to serialize.
++ serializeOnNextInst[tid] = false;
++ } else if (!insts_to_rename.empty()) {
++ insts_to_rename.front()->setSerializeBefore();
+ }
++ }
+
- ++renameUndoneMaps;
++ int renamed_insts = 0;
+
- SimpleRename<Impl>::squash()
++ while (insts_available > 0 && toIEWIndex < renameWidth) {
++ DPRINTF(Rename, "[tid:%u]: Sending instructions to IEW.\n", tid);
++
++ assert(!insts_to_rename.empty());
++
++ inst = insts_to_rename.front();
++
++ insts_to_rename.pop_front();
++
++ if (renameStatus[tid] == Unblocking) {
++ DPRINTF(Rename,"[tid:%u]: Removing [sn:%lli] PC:%#x from rename "
++ "skidBuffer\n",
++ tid, inst->seqNum, inst->readPC());
++ }
++
++ if (inst->isSquashed()) {
++ DPRINTF(Rename, "[tid:%u]: instruction %i with PC %#x is "
++ "squashed, skipping.\n",
++ tid, inst->seqNum, inst->threadNumber,inst->readPC());
++
++ ++renameSquashedInsts;
++
++ // Decrement how many instructions are available.
++ --insts_available;
++
++ continue;
++ }
++
++ DPRINTF(Rename, "[tid:%u]: Processing instruction [sn:%lli] with "
++ "PC %#x.\n",
++ tid, inst->seqNum, inst->readPC());
++
++ // Handle serializeAfter/serializeBefore instructions.
++ // serializeAfter marks the next instruction as serializeBefore.
++ // serializeBefore makes the instruction wait in rename until the ROB
++ // is empty.
++
++ // In this model, IPR accesses are serialize before
++ // instructions, and store conditionals are serialize after
++ // instructions. This is mainly due to lack of support for
++ // out-of-order operations of either of those classes of
++ // instructions.
++ if ((inst->isIprAccess() || inst->isSerializeBefore()) &&
++ !inst->isSerializeHandled()) {
++ DPRINTF(Rename, "Serialize before instruction encountered.\n");
++
++ if (!inst->isTempSerializeBefore()) {
++ renamedSerializing++;
++ inst->setSerializeHandled();
++ } else {
++ renamedTempSerializing++;
++ }
++
++ // Change status over to SerializeStall so that other stages know
++ // what this is blocked on.
++ renameStatus[tid] = SerializeStall;
++
++ serializeInst[tid] = inst;
++
++ blockThisCycle = true;
++
++ break;
++ } else if ((inst->isStoreConditional() || inst->isSerializeAfter()) &&
++ !inst->isSerializeHandled()) {
++ DPRINTF(Rename, "Serialize after instruction encountered.\n");
++
++ renamedSerializing++;
++
++ inst->setSerializeHandled();
++
++ serializeAfter(insts_to_rename, tid);
++ }
++
++ // Check here to make sure there are enough destination registers
++ // to rename to. Otherwise block.
++ if (renameMap[tid]->numFreeEntries() < inst->numDestRegs()) {
++ DPRINTF(Rename, "Blocking due to lack of free "
++ "physical registers to rename to.\n");
++ blockThisCycle = true;
++
++ ++renameFullRegistersEvents;
++
++ break;
++ }
++
++ renameSrcRegs(inst, inst->threadNumber);
++
++ renameDestRegs(inst, inst->threadNumber);
++
++ ++renamed_insts;
++
++ // Put instruction in rename queue.
++ toIEW->insts[toIEWIndex] = inst;
++ ++(toIEW->size);
++
++ // Increment which instruction we're on.
++ ++toIEWIndex;
++
++ // Decrement how many instructions are available.
++ --insts_available;
++ }
++
++ instsInProgress[tid] += renamed_insts;
++ renameRenamedInsts += renamed_insts;
++
++ // If we wrote to the time buffer, record this.
++ if (toIEWIndex) {
++ wroteToTimeBuffer = true;
++ }
++
++ // Check if there's any instructions left that haven't yet been renamed.
++ // If so then block.
++ if (insts_available) {
++ blockThisCycle = true;
++ }
++
++ if (blockThisCycle) {
++ block(tid);
++ toDecode->renameUnblock[tid] = false;
+ }
+}
+
++template<class Impl>
++void
++DefaultRename<Impl>::skidInsert(unsigned tid)
++{
++ DynInstPtr inst = NULL;
++
++ while (!insts[tid].empty()) {
++ inst = insts[tid].front();
++
++ insts[tid].pop_front();
++
++ assert(tid == inst->threadNumber);
++
++ DPRINTF(Rename, "[tid:%u]: Inserting [sn:%lli] PC:%#x into Rename "
++ "skidBuffer\n", tid, inst->seqNum, inst->readPC());
++
++ ++renameSkidInsts;
++
++ skidBuffer[tid].push_back(inst);
++ }
++
++ if (skidBuffer[tid].size() > skidBufferMax)
++ panic("Skidbuffer Exceeded Max Size");
++}
++
+template <class Impl>
+void
- DPRINTF(Rename, "Rename: Squashing instructions.\n");
- // Set the status to Squashing.
- _status = Squashing;
++DefaultRename<Impl>::sortInsts()
+{
- numInst = 0;
++ int insts_from_decode = fromDecode->size;
++#ifdef DEBUG
++ for (int i=0; i < numThreads; i++)
++ assert(insts[i].empty());
++#endif
++ for (int i = 0; i < insts_from_decode; ++i) {
++ DynInstPtr inst = fromDecode->insts[i];
++ insts[inst->threadNumber].push_back(inst);
++ }
++}
+
- // Clear the skid buffer in case it has any data in it.
- while (!skidBuffer.empty())
- {
- skidBuffer.pop();
++template<class Impl>
++bool
++DefaultRename<Impl>::skidsEmpty()
++{
++ list<unsigned>::iterator threads = (*activeThreads).begin();
+
- doSquash();
++ while (threads != (*activeThreads).end()) {
++ if (!skidBuffer[*threads++].empty())
++ return false;
+ }
+
- SimpleRename<Impl>::removeFromHistory(InstSeqNum inst_seq_num)
++ return true;
+}
+
+template<class Impl>
+void
- DPRINTF(Rename, "Rename: Removing a committed instruction from the "
- "history buffer, until sequence number %lli.\n", inst_seq_num);
- typename list<RenameHistory>::iterator hb_it = historyBuffer.end();
++DefaultRename<Impl>::updateStatus()
+{
- --hb_it;
++ bool any_unblocking = false;
+
- if (hb_it->instSeqNum > inst_seq_num) {
- DPRINTF(Rename, "Rename: Old sequence number encountered. Ensure "
- "that a syscall happened recently.\n");
- return;
++ list<unsigned>::iterator threads = (*activeThreads).begin();
+
- while ((*hb_it).instSeqNum != inst_seq_num)
- {
- // Make sure we haven't gone off the end of the list.
- assert(hb_it != historyBuffer.end());
-
- // In theory instructions at the end of the history buffer
- // should be older than the instruction being removed, which
- // means they will have a lower sequence number. Also the
- // instruction being removed from the history really should
- // be the last instruction in the list, as it is the instruction
- // that was just committed that is being removed.
- assert(hb_it->instSeqNum < inst_seq_num);
- DPRINTF(Rename, "Rename: Freeing up older rename of reg %i, sequence"
- " number %i.\n",
- (*hb_it).prevPhysReg, (*hb_it).instSeqNum);
-
- if (!(*hb_it).placeHolder) {
- freeList->addReg((*hb_it).prevPhysReg);
- ++renameCommittedMaps;
++ threads = (*activeThreads).begin();
++
++ while (threads != (*activeThreads).end()) {
++ unsigned tid = *threads++;
++
++ if (renameStatus[tid] == Unblocking) {
++ any_unblocking = true;
++ break;
++ }
+ }
+
- historyBuffer.erase(hb_it--);
++ // Rename will have activity if it's unblocking.
++ if (any_unblocking) {
++ if (_status == Inactive) {
++ _status = Active;
++
++ DPRINTF(Activity, "Activating stage.\n");
++
++ cpu->activateStage(FullCPU::RenameIdx);
+ }
++ } else {
++ // If it's not unblocking, then rename will not have any internal
++ // activity. Switch it to inactive.
++ if (_status == Active) {
++ _status = Inactive;
++ DPRINTF(Activity, "Deactivating stage.\n");
+
- // Finally free up the previous register of the finished instruction
- // itself.
- if (!(*hb_it).placeHolder) {
- freeList->addReg(hb_it->prevPhysReg);
- ++renameCommittedMaps;
++ cpu->deactivateStage(FullCPU::RenameIdx);
++ }
+ }
++}
+
- historyBuffer.erase(hb_it);
++template <class Impl>
++bool
++DefaultRename<Impl>::block(unsigned tid)
++{
++ DPRINTF(Rename, "[tid:%u]: Blocking.\n", tid);
++
++ // Add the current inputs onto the skid buffer, so they can be
++ // reprocessed when this stage unblocks.
++ skidInsert(tid);
++
++ // Only signal backwards to block if the previous stages do not think
++ // rename is already blocked.
++ if (renameStatus[tid] != Blocked) {
++ if (renameStatus[tid] != Unblocking) {
++ toDecode->renameBlock[tid] = true;
++ toDecode->renameUnblock[tid] = false;
++ wroteToTimeBuffer = true;
++ }
++
++ // Rename can not go from SerializeStall to Blocked, otherwise
++ // it would not know to complete the serialize stall.
++ if (renameStatus[tid] != SerializeStall) {
++ // Set status to Blocked.
++ renameStatus[tid] = Blocked;
++ return true;
++ }
++ }
++
++ return false;
++}
++
++template <class Impl>
++bool
++DefaultRename<Impl>::unblock(unsigned tid)
++{
++ DPRINTF(Rename, "[tid:%u]: Trying to unblock.\n", tid);
++
++ // Rename is done unblocking if the skid buffer is empty.
++ if (skidBuffer[tid].empty() && renameStatus[tid] != SerializeStall) {
++
++ DPRINTF(Rename, "[tid:%u]: Done unblocking.\n", tid);
++
++ toDecode->renameUnblock[tid] = true;
++ wroteToTimeBuffer = true;
++
++ renameStatus[tid] = Running;
++ return true;
+ }
+
- SimpleRename<Impl>::renameSrcRegs(DynInstPtr &inst)
++ return false;
++}
++
++template <class Impl>
++void
++DefaultRename<Impl>::doSquash(unsigned tid)
++{
++ typename list<RenameHistory>::iterator hb_it = historyBuffer[tid].begin();
++
++ InstSeqNum squashed_seq_num = fromCommit->commitInfo[tid].doneSeqNum;
++
++ // After a syscall squashes everything, the history buffer may be empty
++ // but the ROB may still be squashing instructions.
++ if (historyBuffer[tid].empty()) {
++ return;
++ }
++
++ // Go through the most recent instructions, undoing the mappings
++ // they did and freeing up the registers.
++ while (!historyBuffer[tid].empty() &&
++ (*hb_it).instSeqNum > squashed_seq_num) {
++ assert(hb_it != historyBuffer[tid].end());
++
++ DPRINTF(Rename, "[tid:%u]: Removing history entry with sequence "
++ "number %i.\n", tid, (*hb_it).instSeqNum);
++
++ // Tell the rename map to set the architected register to the
++ // previous physical register that it was renamed to.
++ renameMap[tid]->setEntry(hb_it->archReg, hb_it->prevPhysReg);
++
++ // Put the renamed physical register back on the free list.
++ freeList->addReg(hb_it->newPhysReg);
++
++ historyBuffer[tid].erase(hb_it++);
++
++ ++renameUndoneMaps;
++ }
++}
++
++template<class Impl>
++void
++DefaultRename<Impl>::removeFromHistory(InstSeqNum inst_seq_num, unsigned tid)
++{
++ DPRINTF(Rename, "[tid:%u]: Removing a committed instruction from the "
++ "history buffer %u (size=%i), until [sn:%lli].\n",
++ tid, tid, historyBuffer[tid].size(), inst_seq_num);
++
++ typename list<RenameHistory>::iterator hb_it = historyBuffer[tid].end();
++
++ --hb_it;
++
++ if (historyBuffer[tid].empty()) {
++ DPRINTF(Rename, "[tid:%u]: History buffer is empty.\n", tid);
++ return;
++ } else if (hb_it->instSeqNum > inst_seq_num) {
++ DPRINTF(Rename, "[tid:%u]: Old sequence number encountered. Ensure "
++ "that a syscall happened recently.\n", tid);
++ return;
++ }
++
++ // Commit all the renames up until (and including) the committed sequence
++ // number. Some or even all of the committed instructions may not have
++ // rename histories if they did not have destination registers that were
++ // renamed.
++ while (!historyBuffer[tid].empty() &&
++ hb_it != historyBuffer[tid].end() &&
++ (*hb_it).instSeqNum <= inst_seq_num) {
++
++ DPRINTF(Rename, "[tid:%u]: Freeing up older rename of reg %i, "
++ "[sn:%lli].\n",
++ tid, (*hb_it).prevPhysReg, (*hb_it).instSeqNum);
++
++ freeList->addReg((*hb_it).prevPhysReg);
++ ++renameCommittedMaps;
++
++ historyBuffer[tid].erase(hb_it--);
++ }
+}
+
+template <class Impl>
+inline void
- for (int src_idx = 0; src_idx < num_src_regs; src_idx++)
- {
++DefaultRename<Impl>::renameSrcRegs(DynInstPtr &inst,unsigned tid)
+{
++ assert(renameMap[tid] != 0);
++
+ unsigned num_src_regs = inst->numSrcRegs();
+
+ // Get the architectual register numbers from the source and
+ // destination operands, and redirect them to the right register.
+ // Will need to mark dependencies though.
- PhysRegIndex renamed_reg = renameMap->lookup(src_reg);
++ for (int src_idx = 0; src_idx < num_src_regs; src_idx++) {
+ RegIndex src_reg = inst->srcRegIdx(src_idx);
+
+ // Look up the source registers to get the phys. register they've
+ // been renamed to, and set the sources to those registers.
- DPRINTF(Rename, "Rename: Looking up arch reg %i, got "
- "physical reg %i.\n", (int)src_reg, (int)renamed_reg);
++ PhysRegIndex renamed_reg = renameMap[tid]->lookup(src_reg);
+
- // Either incorporate it into the info passed back,
- // or make another function call to see if that register is
- // ready or not.
- if (renameMap->isReady(renamed_reg)) {
- DPRINTF(Rename, "Rename: Register is ready.\n");
++ DPRINTF(Rename, "[tid:%u]: Looking up arch reg %i, got "
++ "physical reg %i.\n", tid, (int)src_reg,
++ (int)renamed_reg);
+
+ inst->renameSrcReg(src_idx, renamed_reg);
+
- SimpleRename<Impl>::renameDestRegs(DynInstPtr &inst)
++ // See if the register is ready or not.
++ if (scoreboard->getReg(renamed_reg) == true) {
++ DPRINTF(Rename, "[tid:%u]: Register is ready.\n", tid);
+
+ inst->markSrcRegReady(src_idx);
+ }
+
+ ++renameRenameLookups;
+ }
+}
+
+template <class Impl>
+inline void
- typename SimpleRenameMap::RenameInfo rename_result;
++DefaultRename<Impl>::renameDestRegs(DynInstPtr &inst,unsigned tid)
+{
- // If it's an instruction with no destination registers, then put
- // a placeholder within the history buffer. It might be better
- // to not put it in the history buffer at all (other than branches,
- // which always need at least a place holder), and differentiate
- // between instructions with and without destination registers
- // when getting from commit the instructions that committed.
- if (num_dest_regs == 0) {
- RenameHistory hb_entry(inst->seqNum);
++ typename RenameMap::RenameInfo rename_result;
+
+ unsigned num_dest_regs = inst->numDestRegs();
+
- historyBuffer.push_front(hb_entry);
++ // Rename the destination registers.
++ for (int dest_idx = 0; dest_idx < num_dest_regs; dest_idx++) {
++ RegIndex dest_reg = inst->destRegIdx(dest_idx);
+
- DPRINTF(Rename, "Rename: Adding placeholder instruction to "
- "history buffer, sequence number %lli.\n",
- inst->seqNum);
++ // Get the physical register that the destination will be
++ // renamed to.
++ rename_result = renameMap[tid]->rename(dest_reg);
+
- ++renameHBPlaceHolders;
- } else {
++ //Mark Scoreboard entry as not ready
++ scoreboard->unsetReg(rename_result.first);
+
- // Rename the destination registers.
- for (int dest_idx = 0; dest_idx < num_dest_regs; dest_idx++)
- {
- RegIndex dest_reg = inst->destRegIdx(dest_idx);
++ DPRINTF(Rename, "[tid:%u]: Renaming arch reg %i to physical "
++ "reg %i.\n", tid, (int)dest_reg,
++ (int)rename_result.first);
+
- // Get the physical register that the destination will be
- // renamed to.
- rename_result = renameMap->rename(dest_reg);
++ // Record the rename information so that a history can be kept.
++ RenameHistory hb_entry(inst->seqNum, dest_reg,
++ rename_result.first,
++ rename_result.second);
+
- DPRINTF(Rename, "Rename: Renaming arch reg %i to physical "
- "reg %i.\n", (int)dest_reg,
- (int)rename_result.first);
++ historyBuffer[tid].push_front(hb_entry);
+
- // Record the rename information so that a history can be kept.
- RenameHistory hb_entry(inst->seqNum, dest_reg,
- rename_result.first,
- rename_result.second);
++ DPRINTF(Rename, "[tid:%u]: Adding instruction to history buffer, "
++ "[sn:%lli].\n",tid,
++ (*historyBuffer[tid].begin()).instSeqNum);
+
- historyBuffer.push_front(hb_entry);
++ // Tell the instruction to rename the appropriate destination
++ // register (dest_idx) to the new physical register
++ // (rename_result.first), and record the previous physical
++ // register that the same logical register was renamed to
++ // (rename_result.second).
++ inst->renameDestReg(dest_idx,
++ rename_result.first,
++ rename_result.second);
+
- DPRINTF(Rename, "Rename: Adding instruction to history buffer, "
- "sequence number %lli.\n",
- (*historyBuffer.begin()).instSeqNum);
++ ++renameRenamedOperands;
++ }
++}
+
- // Tell the instruction to rename the appropriate destination
- // register (dest_idx) to the new physical register
- // (rename_result.first), and record the previous physical
- // register that the same logical register was renamed to
- // (rename_result.second).
- inst->renameDestReg(dest_idx,
- rename_result.first,
- rename_result.second);
++template <class Impl>
++inline int
++DefaultRename<Impl>::calcFreeROBEntries(unsigned tid)
++{
++ int num_free = freeEntries[tid].robEntries -
++ (instsInProgress[tid] - fromIEW->iewInfo[tid].dispatched);
+
- ++renameRenamedOperands;
- }
- }
++ //DPRINTF(Rename,"[tid:%i]: %i rob free\n",tid,num_free);
+
- SimpleRename<Impl>::calcFreeROBEntries()
++ return num_free;
+}
+
+template <class Impl>
+inline int
- return fromCommit->commitInfo.freeROBEntries -
- renameWidth * iewToRenameDelay;
++DefaultRename<Impl>::calcFreeIQEntries(unsigned tid)
+{
- SimpleRename<Impl>::calcFreeIQEntries()
++ int num_free = freeEntries[tid].iqEntries -
++ (instsInProgress[tid] - fromIEW->iewInfo[tid].dispatched);
++
++ //DPRINTF(Rename,"[tid:%i]: %i iq free\n",tid,num_free);
++
++ return num_free;
+}
+
+template <class Impl>
+inline int
- return fromIEW->iewInfo.freeIQEntries - renameWidth * iewToRenameDelay;
- }
++DefaultRename<Impl>::calcFreeLSQEntries(unsigned tid)
+{
- template<class Impl>
- void
- SimpleRename<Impl>::tick()
- {
- // Rename will need to try to rename as many instructions as it
- // has bandwidth, unless it is blocked.
-
- // Check if _status is BarrierStall. If so, then check if the number
- // of free ROB entries is equal to the number of total ROB entries.
- // Once equal then wake this stage up. Set status to unblocking maybe.
-
- if (_status != Blocked && _status != Squashing) {
- DPRINTF(Rename, "Rename: Status is not blocked, will attempt to "
- "run stage.\n");
- // Make sure that the skid buffer has something in it if the
- // status is unblocking.
- assert(_status == Unblocking ? !skidBuffer.empty() : 1);
-
- rename();
-
- // If the status was unblocking, then instructions from the skid
- // buffer were used. Remove those instructions and handle
- // the rest of unblocking.
- if (_status == Unblocking) {
- ++renameUnblockCycles;
-
- if (fromDecode->size > 0) {
- // Add the current inputs onto the skid buffer, so they can be
- // reprocessed when this stage unblocks.
- skidBuffer.push(*fromDecode);
- }
++ int num_free = freeEntries[tid].lsqEntries -
++ (instsInProgress[tid] - fromIEW->iewInfo[tid].dispatchedToLSQ);
+
- unblock();
- }
- } else if (_status == Blocked) {
- ++renameBlockCycles;
++ //DPRINTF(Rename,"[tid:%i]: %i lsq free\n",tid,num_free);
+
- // If stage is blocked and still receiving valid instructions,
- // make sure to store them in the skid buffer.
- if (fromDecode->size > 0) {
++ return num_free;
++}
+
- block();
++template <class Impl>
++unsigned
++DefaultRename<Impl>::validInsts()
++{
++ unsigned inst_count = 0;
+
- // Continue to tell previous stage to stall.
- toDecode->renameInfo.stall = true;
- }
++ for (int i=0; i<fromDecode->size; i++) {
++ if (!fromDecode->insts[i]->squashed)
++ inst_count++;
++ }
+
- if (!fromIEW->iewInfo.stall &&
- !fromCommit->commitInfo.stall &&
- calcFreeROBEntries() > 0 &&
- calcFreeIQEntries() > 0 &&
- renameMap->numFreeEntries() > 0) {
-
- // Need to be sure to check all blocking conditions above.
- // If they have cleared, then start unblocking.
- DPRINTF(Rename, "Rename: Stall signals cleared, going to "
- "unblock.\n");
- _status = Unblocking;
-
- // Continue to tell previous stage to block until this stage
- // is done unblocking.
- toDecode->renameInfo.stall = true;
- } else {
- // Otherwise no conditions have changed. Tell previous
- // stage to continue blocking.
- toDecode->renameInfo.stall = true;
- }
++ return inst_count;
++}
+
- if (fromCommit->commitInfo.squash ||
- fromCommit->commitInfo.robSquashing) {
- squash();
- return;
- }
- } else if (_status == Squashing) {
- ++renameSquashCycles;
++template <class Impl>
++void
++DefaultRename<Impl>::readStallSignals(unsigned tid)
++{
++ if (fromIEW->iewBlock[tid]) {
++ stalls[tid].iew = true;
++ }
+
- if (fromCommit->commitInfo.squash) {
- squash();
- } else if (!fromCommit->commitInfo.squash &&
- !fromCommit->commitInfo.robSquashing) {
++ if (fromIEW->iewUnblock[tid]) {
++ assert(stalls[tid].iew);
++ stalls[tid].iew = false;
++ }
+
- DPRINTF(Rename, "Rename: Done squashing, going to running.\n");
- _status = Running;
- rename();
- } else {
- doSquash();
- }
++ if (fromCommit->commitBlock[tid]) {
++ stalls[tid].commit = true;
++ }
+
- // Ugly code, revamp all of the tick() functions eventually.
- if (fromCommit->commitInfo.doneSeqNum != 0 && _status != Squashing) {
- #if !FULL_SYSTEM
- if (!fromCommit->commitInfo.squash) {
- removeFromHistory(fromCommit->commitInfo.doneSeqNum);
- }
- #else
- removeFromHistory(fromCommit->commitInfo.doneSeqNum);
- #endif
++ if (fromCommit->commitUnblock[tid]) {
++ assert(stalls[tid].commit);
++ stalls[tid].commit = false;
+ }
++}
+
- template<class Impl>
++template <class Impl>
++bool
++DefaultRename<Impl>::checkStall(unsigned tid)
++{
++ bool ret_val = false;
++
++ if (stalls[tid].iew) {
++ DPRINTF(Rename,"[tid:%i]: Stall from IEW stage detected.\n", tid);
++ ret_val = true;
++ } else if (stalls[tid].commit) {
++ DPRINTF(Rename,"[tid:%i]: Stall from Commit stage detected.\n", tid);
++ ret_val = true;
++ } else if (calcFreeROBEntries(tid) <= 0) {
++ DPRINTF(Rename,"[tid:%i]: Stall: ROB has 0 free entries.\n", tid);
++ ret_val = true;
++ } else if (calcFreeIQEntries(tid) <= 0) {
++ DPRINTF(Rename,"[tid:%i]: Stall: IQ has 0 free entries.\n", tid);
++ ret_val = true;
++ } else if (calcFreeLSQEntries(tid) <= 0) {
++ DPRINTF(Rename,"[tid:%i]: Stall: LSQ has 0 free entries.\n", tid);
++ ret_val = true;
++ } else if (renameMap[tid]->numFreeEntries() <= 0) {
++ DPRINTF(Rename,"[tid:%i]: Stall: RenameMap has 0 free entries.\n", tid);
++ ret_val = true;
++ } else if (renameStatus[tid] == SerializeStall &&
++ (!emptyROB[tid] || instsInProgress[tid])) {
++ DPRINTF(Rename,"[tid:%i]: Stall: Serialize stall and ROB is not "
++ "empty.\n",
++ tid);
++ ret_val = true;
+ }
+
++ return ret_val;
+}
+
- SimpleRename<Impl>::rename()
- {
- // Check if any of the stages ahead of rename are telling rename
- // to squash. The squash() function will also take care of fixing up
- // the rename map and the free list.
- if (fromCommit->commitInfo.squash ||
- fromCommit->commitInfo.robSquashing) {
- DPRINTF(Rename, "Rename: Receiving signal from Commit to squash.\n");
- squash();
- return;
++template <class Impl>
+void
- // Check if time buffer is telling this stage to stall.
- if (fromIEW->iewInfo.stall ||
- fromCommit->commitInfo.stall) {
- DPRINTF(Rename, "Rename: Receiving signal from IEW/Commit to "
- "stall.\n");
- block();
- return;
++DefaultRename<Impl>::readFreeEntries(unsigned tid)
++{
++ bool updated = false;
++ if (fromIEW->iewInfo[tid].usedIQ) {
++ freeEntries[tid].iqEntries =
++ fromIEW->iewInfo[tid].freeIQEntries;
++ updated = true;
+ }
+
- // Check if the current status is squashing. If so, set its status
- // to running and resume execution the next cycle.
- if (_status == Squashing) {
- DPRINTF(Rename, "Rename: Done squashing.\n");
- _status = Running;
- return;
++ if (fromIEW->iewInfo[tid].usedLSQ) {
++ freeEntries[tid].lsqEntries =
++ fromIEW->iewInfo[tid].freeLSQEntries;
++ updated = true;
+ }
+
- // Check the decode queue to see if instructions are available.
- // If there are no available instructions to rename, then do nothing.
- // Or, if the stage is currently unblocking, then go ahead and run it.
- if (fromDecode->size == 0 && _status != Unblocking) {
- DPRINTF(Rename, "Rename: Nothing to do, breaking out early.\n");
- // Should I change status to idle?
- return;
- }
-
- ////////////////////////////////////
- // Actual rename part.
- ////////////////////////////////////
-
- DynInstPtr inst;
++ if (fromCommit->commitInfo[tid].usedROB) {
++ freeEntries[tid].robEntries =
++ fromCommit->commitInfo[tid].freeROBEntries;
++ emptyROB[tid] = fromCommit->commitInfo[tid].emptyROB;
++ updated = true;
+ }
+
- // If we're unblocking, then we may be in the middle of an instruction
- // group. Subtract off numInst to get the proper number of instructions
- // left.
- int insts_available = _status == Unblocking ?
- skidBuffer.front().size - numInst :
- fromDecode->size;
++ DPRINTF(Rename, "[tid:%i]: Free IQ: %i, Free ROB: %i, Free LSQ: %i\n",
++ tid,
++ freeEntries[tid].iqEntries,
++ freeEntries[tid].robEntries,
++ freeEntries[tid].lsqEntries);
+
- bool block_this_cycle = false;
++ DPRINTF(Rename, "[tid:%i]: %i instructions not yet in ROB\n",
++ tid, instsInProgress[tid]);
++}
+
- // Will have to do a different calculation for the number of free
- // entries. Number of free entries recorded on this cycle -
- // renameWidth * renameToDecodeDelay
- int free_rob_entries = calcFreeROBEntries();
- int free_iq_entries = calcFreeIQEntries();
- int min_iq_rob = min(free_rob_entries, free_iq_entries);
++template <class Impl>
++bool
++DefaultRename<Impl>::checkSignalsAndUpdate(unsigned tid)
++{
++ // Check if there's a squash signal, squash if there is
++ // Check stall signals, block if necessary.
++ // If status was blocked
++ // check if stall conditions have passed
++ // if so then go to unblocking
++ // If status was Squashing
++ // check if squashing is not high. Switch to running this cycle.
++ // If status was serialize stall
++ // check if ROB is empty and no insts are in flight to the ROB
++
++ readFreeEntries(tid);
++ readStallSignals(tid);
++
++ if (fromCommit->commitInfo[tid].squash) {
++ DPRINTF(Rename, "[tid:%u]: Squashing instructions due to squash from "
++ "commit.\n", tid);
++
++ squash(tid);
++
++ return true;
++ }
+
- unsigned to_iew_index = 0;
++ if (fromCommit->commitInfo[tid].robSquashing) {
++ DPRINTF(Rename, "[tid:%u]: ROB is still squashing.\n", tid);
+
- // Check if there's any space left.
- if (min_iq_rob <= 0) {
- DPRINTF(Rename, "Rename: Blocking due to no free ROB or IQ "
- "entries.\n"
- "Rename: ROB has %d free entries.\n"
- "Rename: IQ has %d free entries.\n",
- free_rob_entries,
- free_iq_entries);
- block();
- // Tell previous stage to stall.
- toDecode->renameInfo.stall = true;
++ renameStatus[tid] = Squashing;
+
- if (free_rob_entries <= 0) {
- ++renameROBFullEvents;
- } else {
- ++renameIQFullEvents;
- }
++ return true;
++ }
+
- return;
- } else if (min_iq_rob < insts_available) {
- DPRINTF(Rename, "Rename: Will have to block this cycle. Only "
- "%i insts can be renamed due to IQ/ROB limits.\n",
- min_iq_rob);
++ if (checkStall(tid)) {
++ return block(tid);
++ }
+
- insts_available = min_iq_rob;
++ if (renameStatus[tid] == Blocked) {
++ DPRINTF(Rename, "[tid:%u]: Done blocking, switching to unblocking.\n",
++ tid);
+
- block_this_cycle = true;
++ renameStatus[tid] = Unblocking;
+
- if (free_rob_entries < free_iq_entries) {
- ++renameROBFullEvents;
- } else {
- ++renameIQFullEvents;
- }
++ unblock(tid);
+
- while (insts_available > 0) {
- DPRINTF(Rename, "Rename: Sending instructions to iew.\n");
++ return true;
+ }
+
- // Get the next instruction either from the skid buffer or the
- // decode queue.
- inst = _status == Unblocking ? skidBuffer.front().insts[numInst] :
- fromDecode->insts[numInst];
++ if (renameStatus[tid] == Squashing) {
++ // Switch status to running if rename isn't being told to block or
++ // squash this cycle.
++ DPRINTF(Rename, "[tid:%u]: Done squashing, switching to running.\n",
++ tid);
+
- if (inst->isSquashed()) {
- DPRINTF(Rename, "Rename: instruction %i with PC %#x is "
- "squashed, skipping.\n",
- inst->seqNum, inst->readPC());
++ renameStatus[tid] = Running;
+
- // Go to the next instruction.
- ++numInst;
++ return false;
++ }
+
- ++renameSquashedInsts;
++ if (renameStatus[tid] == SerializeStall) {
++ // Stall ends once the ROB is free.
++ DPRINTF(Rename, "[tid:%u]: Done with serialize stall, switching to "
++ "unblocking.\n", tid);
+
- // Decrement how many instructions are available.
- --insts_available;
++ DynInstPtr serial_inst = serializeInst[tid];
+
- continue;
- }
++ renameStatus[tid] = Unblocking;
+
- DPRINTF(Rename, "Rename: Processing instruction %i with PC %#x.\n",
- inst->seqNum, inst->readPC());
-
- // If it's a trap instruction, then it needs to wait here within
- // rename until the ROB is empty. Needs a way to detect that the
- // ROB is empty. Maybe an event?
- // Would be nice if it could be avoided putting this into a
- // specific stage and instead just put it into the AlphaFullCPU.
- // Might not really be feasible though...
- // (EXCB, TRAPB)
- if (inst->isSerializing()) {
- panic("Rename: Serializing instruction encountered.\n");
- DPRINTF(Rename, "Rename: Serializing instruction "
- "encountered.\n");
-
- // Change status over to BarrierStall so that other stages know
- // what this is blocked on.
- _status = BarrierStall;
++ unblock(tid);
+
- block_this_cycle = true;
++ DPRINTF(Rename, "[tid:%u]: Processing instruction [%lli] with "
++ "PC %#x.\n",
++ tid, serial_inst->seqNum, serial_inst->readPC());
+
- break;
++ // Put instruction into queue here.
++ serial_inst->clearSerializeBefore();
+
- // Check here to make sure there are enough destination registers
- // to rename to. Otherwise block.
- if (renameMap->numFreeEntries() < inst->numDestRegs())
- {
- DPRINTF(Rename, "Rename: Blocking due to lack of free "
- "physical registers to rename to.\n");
- // Need some sort of event based on a register being freed.
-
- block_this_cycle = true;
++ if (!skidBuffer[tid].empty()) {
++ skidBuffer[tid].push_front(serial_inst);
++ } else {
++ insts[tid].push_front(serial_inst);
+ }
+
- ++renameFullRegistersEvents;
++ DPRINTF(Rename, "[tid:%u]: Instruction must be processed by rename."
++ " Adding to front of list.", tid);
+
- break;
- }
++ serializeInst[tid] = NULL;
+
- renameSrcRegs(inst);
++ return true;
++ }
+
- renameDestRegs(inst);
++ // If we've reached this point, we have not gotten any signals that
++ // cause rename to change its status. Rename remains the same as before.
++ return false;
++}
+
- // Put instruction in rename queue.
- toIEW->insts[to_iew_index] = inst;
- ++(toIEW->size);
++template<class Impl>
++void
++DefaultRename<Impl>::serializeAfter(InstQueue &inst_list,
++ unsigned tid)
++{
++ if (inst_list.empty()) {
++ // Mark a bit to say that I must serialize on the next instruction.
++ serializeOnNextInst[tid] = true;
++ return;
++ }
+
- // Decrease the number of free ROB and IQ entries.
- --free_rob_entries;
- --free_iq_entries;
++ // Set the next instruction as serializing.
++ inst_list.front()->setSerializeBefore();
++}
+
- // Increment which instruction we're on.
- ++to_iew_index;
- ++numInst;
++template <class Impl>
++inline void
++DefaultRename<Impl>::incrFullStat(const FullSource &source)
++{
++ switch (source) {
++ case ROB:
++ ++renameROBFullEvents;
++ break;
++ case IQ:
++ ++renameIQFullEvents;
++ break;
++ case LSQ:
++ ++renameLSQFullEvents;
++ break;
++ default:
++ panic("Rename full stall stat should be incremented for a reason!");
++ break;
++ }
++}
+
- ++renameRenamedInsts;
++template <class Impl>
++void
++DefaultRename<Impl>::dumpHistory()
++{
++ typename list<RenameHistory>::iterator buf_it;
+
- // Decrement how many instructions are available.
- --insts_available;
- }
++ for (int i = 0; i < numThreads; i++) {
+
- // Check if there's any instructions left that haven't yet been renamed.
- // If so then block.
- if (block_this_cycle) {
- block();
++ buf_it = historyBuffer[i].begin();
+
- toDecode->renameInfo.stall = true;
- } else {
- // If we had a successful rename and didn't have to exit early, then
- // reset numInst so it will refer to the correct instruction on next
- // run.
- numInst = 0;
++ while (buf_it != historyBuffer[i].end()) {
++ cprintf("Seq num: %i\nArch reg: %i New phys reg: %i Old phys "
++ "reg: %i\n", (*buf_it).instSeqNum, (int)(*buf_it).archReg,
++ (int)(*buf_it).newPhysReg, (int)(*buf_it).prevPhysReg);
+
++ buf_it++;
++ }
+ }
+}
--- /dev/null
- // Todo: Consider making functions inline. Avoid having things that are
- // using the zero register or misc registers from adding on the registers
- // to the free list. Possibly remove the direct communication between
- // this and the freelist. Considering making inline bool functions that
- // determine if the register is a logical int, logical fp, physical int,
- // physical fp, etc.
-
- SimpleRenameMap::SimpleRenameMap(unsigned _numLogicalIntRegs,
- unsigned _numPhysicalIntRegs,
- unsigned _numLogicalFloatRegs,
- unsigned _numPhysicalFloatRegs,
- unsigned _numMiscRegs,
- RegIndex _intZeroReg,
- RegIndex _floatZeroReg)
- : numLogicalIntRegs(_numLogicalIntRegs),
- numPhysicalIntRegs(_numPhysicalIntRegs),
- numLogicalFloatRegs(_numLogicalFloatRegs),
- numPhysicalFloatRegs(_numPhysicalFloatRegs),
- numMiscRegs(_numMiscRegs),
- intZeroReg(_intZeroReg),
- floatZeroReg(_floatZeroReg)
+/*
+ * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <vector>
+
+#include "cpu/o3/rename_map.hh"
+
+using namespace std;
+
- DPRINTF(Rename, "Rename: Creating rename map. Phys: %i / %i, Float: "
- "%i / %i.\n", numLogicalIntRegs, numPhysicalIntRegs,
++// @todo: Consider making inline bool functions that determine if the
++// register is a logical int, logical fp, physical int, physical fp,
++// etc.
++
++SimpleRenameMap::~SimpleRenameMap()
++{
++}
++
++void
++SimpleRenameMap::init(unsigned _numLogicalIntRegs,
++ unsigned _numPhysicalIntRegs,
++ PhysRegIndex &ireg_idx,
++
++ unsigned _numLogicalFloatRegs,
++ unsigned _numPhysicalFloatRegs,
++ PhysRegIndex &freg_idx,
++
++ unsigned _numMiscRegs,
++
++ RegIndex _intZeroReg,
++ RegIndex _floatZeroReg,
++
++ int map_id,
++ bool bindRegs)
+{
- //Create the rename maps, and their scoreboards.
- intRenameMap = new RenameEntry[numLogicalIntRegs];
- floatRenameMap = new RenameEntry[numLogicalRegs];
-
- // Should combine this into one scoreboard.
- intScoreboard.resize(numPhysicalIntRegs);
- floatScoreboard.resize(numPhysicalRegs);
- miscScoreboard.resize(numPhysicalRegs + numMiscRegs);
-
- // Initialize the entries in the integer rename map to point to the
- // physical registers of the same index, and consider each register
- // ready until the first rename occurs.
- for (RegIndex index = 0; index < numLogicalIntRegs; ++index)
- {
- intRenameMap[index].physical_reg = index;
- intScoreboard[index] = 1;
- }
++ id = map_id;
++
++ numLogicalIntRegs = _numLogicalIntRegs;
++
++ numLogicalFloatRegs = _numLogicalFloatRegs;
++
++ numPhysicalIntRegs = _numPhysicalIntRegs;
++
++ numPhysicalFloatRegs = _numPhysicalFloatRegs;
++
++ numMiscRegs = _numMiscRegs;
++
++ intZeroReg = _intZeroReg;
++ floatZeroReg = _floatZeroReg;
++
++ DPRINTF(Rename, "Creating rename map %i. Phys: %i / %i, Float: "
++ "%i / %i.\n", id, numLogicalIntRegs, numPhysicalIntRegs,
+ numLogicalFloatRegs, numPhysicalFloatRegs);
+
+ numLogicalRegs = numLogicalIntRegs + numLogicalFloatRegs;
+
+ numPhysicalRegs = numPhysicalIntRegs + numPhysicalFloatRegs;
+
- // Initialize the rest of the physical registers (the ones that don't
- // directly map to a logical register) as unready.
- for (PhysRegIndex index = numLogicalIntRegs;
- index < numPhysicalIntRegs;
- ++index)
- {
- intScoreboard[index] = 0;
- }
++ //Create the rename maps
++ intRenameMap.resize(numLogicalIntRegs);
++ floatRenameMap.resize(numLogicalRegs);
+
- int float_reg_idx = numPhysicalIntRegs;
-
- // Initialize the entries in the floating point rename map to point to
- // the physical registers of the same index, and consider each register
- // ready until the first rename occurs.
- // Although the index refers purely to architected registers, because
- // the floating reg indices come after the integer reg indices, they
- // may exceed the size of a normal RegIndex (short).
- for (PhysRegIndex index = numLogicalIntRegs;
- index < numLogicalRegs; ++index)
- {
- floatRenameMap[index].physical_reg = float_reg_idx++;
- }
++ if (bindRegs) {
++ DPRINTF(Rename, "Binding registers into rename map %i",id);
+
- for (PhysRegIndex index = numPhysicalIntRegs;
- index < numPhysicalIntRegs + numLogicalFloatRegs; ++index)
- {
- floatScoreboard[index] = 1;
- }
++ // Initialize the entries in the integer rename map to point to the
++ // physical registers of the same index
++ for (RegIndex index = 0; index < numLogicalIntRegs; ++index)
++ {
++ intRenameMap[index].physical_reg = ireg_idx++;
++ }
+
- // Initialize the rest of the physical registers (the ones that don't
- // directly map to a logical register) as unready.
- for (PhysRegIndex index = numPhysicalIntRegs + numLogicalFloatRegs;
- index < numPhysicalRegs;
- ++index)
- {
- floatScoreboard[index] = 0;
- }
++ // Initialize the entries in the floating point rename map to point to
++ // the physical registers of the same index
++ // Although the index refers purely to architected registers, because
++ // the floating reg indices come after the integer reg indices, they
++ // may exceed the size of a normal RegIndex (short).
++ for (PhysRegIndex index = numLogicalIntRegs;
++ index < numLogicalRegs; ++index)
++ {
++ floatRenameMap[index].physical_reg = freg_idx++;
++ }
++ } else {
++ DPRINTF(Rename, "Binding registers into rename map %i",id);
+
- // Initialize the entries in the misc register scoreboard to be ready.
- for (PhysRegIndex index = numPhysicalRegs;
- index < numPhysicalRegs + numMiscRegs; ++index)
- {
- miscScoreboard[index] = 1;
- }
- }
++ PhysRegIndex temp_ireg = ireg_idx;
+
- SimpleRenameMap::~SimpleRenameMap()
- {
- // Delete the rename maps as they were allocated with new.
- delete [] intRenameMap;
- delete [] floatRenameMap;
++ for (RegIndex index = 0; index < numLogicalIntRegs; ++index)
++ {
++ intRenameMap[index].physical_reg = temp_ireg++;
++ }
+
- //Setup the interface to the freelist.
++ PhysRegIndex temp_freg = freg_idx;
++
++ for (PhysRegIndex index = numLogicalIntRegs;
++ index < numLogicalRegs; ++index)
++ {
++ floatRenameMap[index].physical_reg = temp_freg++;
++ }
++ }
+}
+
+void
+SimpleRenameMap::setFreeList(SimpleFreeList *fl_ptr)
+{
- // Don't allow this stage to fault; force that check to the rename stage.
- // Simply ask to rename a logical register and get back a new physical
- // register index.
+ freeList = fl_ptr;
+}
+
+
- // If it's not referencing the zero register, then mark the register
- // as not ready.
+SimpleRenameMap::RenameInfo
+SimpleRenameMap::rename(RegIndex arch_reg)
+{
+ PhysRegIndex renamed_reg;
+ PhysRegIndex prev_reg;
+
+ if (arch_reg < numLogicalIntRegs) {
+
+ // Record the current physical register that is renamed to the
+ // requested architected register.
+ prev_reg = intRenameMap[arch_reg].physical_reg;
+
- // Get a free physical register to rename to.
++ // If it's not referencing the zero register, then rename the
++ // register.
+ if (arch_reg != intZeroReg) {
- // Update the integer rename map.
+ renamed_reg = freeList->getIntReg();
+
- // Mark register as not ready.
- intScoreboard[renamed_reg] = false;
+ intRenameMap[arch_reg].physical_reg = renamed_reg;
+
+ assert(renamed_reg >= 0 && renamed_reg < numPhysicalIntRegs);
+
- // Subtract off the base offset for floating point registers.
- // arch_reg = arch_reg - numLogicalIntRegs;
-
+ } else {
+ // Otherwise return the zero register so nothing bad happens.
+ renamed_reg = intZeroReg;
+ }
+ } else if (arch_reg < numLogicalRegs) {
- // If it's not referencing the zero register, then mark the register
- // as not ready.
+ // Record the current physical register that is renamed to the
+ // requested architected register.
+ prev_reg = floatRenameMap[arch_reg].physical_reg;
+
- // Get a free floating point register to rename to.
++ // If it's not referencing the zero register, then rename the
++ // register.
+ if (arch_reg != floatZeroReg) {
- // Update the floating point rename map.
+ renamed_reg = freeList->getFloatReg();
+
-
- // Mark register as not ready.
- floatScoreboard[renamed_reg] = false;
+ floatRenameMap[arch_reg].physical_reg = renamed_reg;
+
+ assert(renamed_reg < numPhysicalRegs &&
+ renamed_reg >= numPhysicalIntRegs);
- // No renaming happens to the misc. registers. They are simply the
- // registers that come after all the physical registers; thus
- // take the base architected register and add the physical registers
- // to it.
+ } else {
+ // Otherwise return the zero register so nothing bad happens.
+ renamed_reg = floatZeroReg;
+ }
+ } else {
+ // Subtract off the base offset for miscellaneous registers.
+ arch_reg = arch_reg - numLogicalRegs;
+
-
- miscScoreboard[renamed_reg] = false;
++ // No renaming happens to the misc. registers. They are
++ // simply the registers that come after all the physical
++ // registers; thus take the base architected register and add
++ // the physical registers to it.
+ renamed_reg = arch_reg + numPhysicalRegs;
+
+ // Set the previous register to the same register; mainly it must be
+ // known that the prev reg was outside the range of normal registers
+ // so the free list can avoid adding it.
+ prev_reg = renamed_reg;
+
+ assert(renamed_reg < numPhysicalRegs + numMiscRegs);
- //Perhaps give this a pair as a return value, of the physical register
- //and whether or not it's ready.
+ }
+
+ return RenameInfo(renamed_reg, prev_reg);
+}
+
- // Subtract off the base FP offset.
- // arch_reg = arch_reg - numLogicalIntRegs;
-
+PhysRegIndex
+SimpleRenameMap::lookup(RegIndex arch_reg)
+{
+ if (arch_reg < numLogicalIntRegs) {
+ return intRenameMap[arch_reg].physical_reg;
+ } else if (arch_reg < numLogicalRegs) {
- bool
- SimpleRenameMap::isReady(PhysRegIndex phys_reg)
- {
- if (phys_reg < numPhysicalIntRegs) {
- return intScoreboard[phys_reg];
- } else if (phys_reg < numPhysicalRegs) {
-
- // Subtract off the base FP offset.
- // phys_reg = phys_reg - numPhysicalIntRegs;
-
- return floatScoreboard[phys_reg];
- } else {
- // Subtract off the misc registers offset.
- // phys_reg = phys_reg - numPhysicalRegs;
-
- return miscScoreboard[phys_reg];
- }
- }
-
- // In this implementation the miscellaneous registers do not actually rename,
- // so this function does not allow you to try to change their mappings.
+ return floatRenameMap[arch_reg].physical_reg;
+ } else {
+ // Subtract off the misc registers offset.
+ arch_reg = arch_reg - numLogicalRegs;
+
+ // Misc. regs don't rename, so simply add the base arch reg to
+ // the number of physical registers.
+ return numPhysicalRegs + arch_reg;
+ }
+}
+
- } else {
- assert(arch_reg < (numLogicalIntRegs + numLogicalFloatRegs));
-
+void
+SimpleRenameMap::setEntry(RegIndex arch_reg, PhysRegIndex renamed_reg)
+{
++ // In this implementation the miscellaneous registers do not
++ // actually rename, so this function does not allow you to try to
++ // change their mappings.
+ if (arch_reg < numLogicalIntRegs) {
+ DPRINTF(Rename, "Rename Map: Integer register %i being set to %i.\n",
+ (int)arch_reg, renamed_reg);
+
+ intRenameMap[arch_reg].physical_reg = renamed_reg;
- void
- SimpleRenameMap::squash(vector<RegIndex> freed_regs,
- vector<UnmapInfo> unmaps)
- {
- panic("Not sure this function should be called.");
-
- // Not sure the rename map should be able to access the free list
- // like this.
- while (!freed_regs.empty()) {
- RegIndex free_register = freed_regs.back();
-
- if (free_register < numPhysicalIntRegs) {
- freeList->addIntReg(free_register);
- } else {
- // Subtract off the base FP dependence tag.
- free_register = free_register - numPhysicalIntRegs;
- freeList->addFloatReg(free_register);
- }
-
- freed_regs.pop_back();
- }
-
- // Take unmap info and roll back the rename map.
- }
-
- void
- SimpleRenameMap::markAsReady(PhysRegIndex ready_reg)
- {
- DPRINTF(Rename, "Rename map: Marking register %i as ready.\n",
- (int)ready_reg);
-
- if (ready_reg < numPhysicalIntRegs) {
- assert(ready_reg >= 0);
-
- intScoreboard[ready_reg] = 1;
- } else if (ready_reg < numPhysicalRegs) {
-
- // Subtract off the base FP offset.
- // ready_reg = ready_reg - numPhysicalIntRegs;
-
- floatScoreboard[ready_reg] = 1;
- } else {
- //Subtract off the misc registers offset.
- // ready_reg = ready_reg - numPhysicalRegs;
-
- miscScoreboard[ready_reg] = 1;
- }
- }
-
++ } else if (arch_reg < numLogicalIntRegs + numLogicalFloatRegs) {
+ DPRINTF(Rename, "Rename Map: Float register %i being set to %i.\n",
+ (int)arch_reg - numLogicalIntRegs, renamed_reg);
+
+ floatRenameMap[arch_reg].physical_reg = renamed_reg;
+ }
+}
+
+int
+SimpleRenameMap::numFreeEntries()
+{
+ int free_int_regs = freeList->numFreeIntRegs();
+ int free_float_regs = freeList->numFreeFloatRegs();
+
+ if (free_int_regs < free_float_regs) {
+ return free_int_regs;
+ } else {
+ return free_float_regs;
+ }
+}
--- /dev/null
- #ifndef __CPU_O3_CPU_RENAME_MAP_HH__
- #define __CPU_O3_CPU_RENAME_MAP_HH__
+/*
+ * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+// Todo: Create destructor.
+// Have it so that there's a more meaningful name given to the variable
+// that marks the beginning of the FP registers.
+
- SimpleRenameMap(unsigned _numLogicalIntRegs,
- unsigned _numPhysicalIntRegs,
- unsigned _numLogicalFloatRegs,
- unsigned _numPhysicalFloatRegs,
- unsigned _numMiscRegs,
- RegIndex _intZeroReg,
- RegIndex _floatZeroReg);
++#ifndef __CPU_O3_RENAME_MAP_HH__
++#define __CPU_O3_RENAME_MAP_HH__
+
+#include <iostream>
+#include <utility>
+#include <vector>
+
+#include "cpu/o3/free_list.hh"
+//For RegIndex
+#include "arch/isa_traits.hh"
+
+class SimpleRenameMap
+{
+ protected:
+ typedef TheISA::RegIndex RegIndex;
+ public:
+ /**
+ * Pair of a logical register and a physical register. Tells the
+ * previous mapping of a logical register to a physical register.
+ * Used to roll back the rename map to a previous state.
+ */
+ typedef std::pair<RegIndex, PhysRegIndex> UnmapInfo;
+
+ /**
+ * Pair of a physical register and a physical register. Used to
+ * return the physical register that a logical register has been
+ * renamed to, and the previous physical register that the same
+ * logical register was previously mapped to.
+ */
+ typedef std::pair<PhysRegIndex, PhysRegIndex> RenameInfo;
+
+ public:
+ //Constructor
- bool isReady(PhysRegIndex arch_reg);
-
++ SimpleRenameMap() {};
+
+ /** Destructor. */
+ ~SimpleRenameMap();
+
++ void init(unsigned _numLogicalIntRegs,
++ unsigned _numPhysicalIntRegs,
++ PhysRegIndex &_int_reg_start,
++
++ unsigned _numLogicalFloatRegs,
++ unsigned _numPhysicalFloatRegs,
++ PhysRegIndex &_float_reg_start,
++
++ unsigned _numMiscRegs,
++
++ RegIndex _intZeroReg,
++ RegIndex _floatZeroReg,
++
++ int id,
++ bool bindRegs);
++
+ void setFreeList(SimpleFreeList *fl_ptr);
+
+ //Tell rename map to get a free physical register for a given
+ //architected register. Not sure it should have a return value,
+ //but perhaps it should have some sort of fault in case there are
+ //no free registers.
+ RenameInfo rename(RegIndex arch_reg);
+
+ PhysRegIndex lookup(RegIndex phys_reg);
+
- void markAsReady(PhysRegIndex ready_reg);
-
+ /**
+ * Marks the given register as ready, meaning that its value has been
+ * calculated and written to the register file.
+ * @param ready_reg The index of the physical register that is now ready.
+ */
- void squash(std::vector<RegIndex> freed_regs,
- std::vector<UnmapInfo> unmaps);
-
+ void setEntry(RegIndex arch_reg, PhysRegIndex renamed_reg);
+
- RenameEntry *intRenameMap;
+ int numFreeEntries();
+
+ private:
++ /** Rename Map ID */
++ int id;
++
+ /** Number of logical integer registers. */
+ int numLogicalIntRegs;
+
+ /** Number of physical integer registers. */
+ int numPhysicalIntRegs;
+
+ /** Number of logical floating point registers. */
+ int numLogicalFloatRegs;
+
+ /** Number of physical floating point registers. */
+ int numPhysicalFloatRegs;
+
+ /** Number of miscellaneous registers. */
+ int numMiscRegs;
+
+ /** Number of logical integer + float registers. */
+ int numLogicalRegs;
+
+ /** Number of physical integer + float registers. */
+ int numPhysicalRegs;
+
+ /** The integer zero register. This implementation assumes it is always
+ * zero and never can be anything else.
+ */
+ RegIndex intZeroReg;
+
+ /** The floating point zero register. This implementation assumes it is
+ * always zero and never can be anything else.
+ */
+ RegIndex floatZeroReg;
+
+ class RenameEntry
+ {
+ public:
+ PhysRegIndex physical_reg;
+ bool valid;
+
+ RenameEntry()
+ : physical_reg(0), valid(false)
+ { }
+ };
+
++ //Change this to private
++ private:
+ /** Integer rename map. */
- RenameEntry *floatRenameMap;
++ std::vector<RenameEntry> intRenameMap;
+
+ /** Floating point rename map. */
-
- // Might want to make all these scoreboards into one large scoreboard.
-
- /** Scoreboard of physical integer registers, saying whether or not they
- * are ready.
- */
- std::vector<bool> intScoreboard;
-
- /** Scoreboard of physical floating registers, saying whether or not they
- * are ready.
- */
- std::vector<bool> floatScoreboard;
-
- /** Scoreboard of miscellaneous registers, saying whether or not they
- * are ready.
- */
- std::vector<bool> miscScoreboard;
++ std::vector<RenameEntry> floatRenameMap;
+
++ private:
+ /** Free list interface. */
+ SimpleFreeList *freeList;
- #endif //__CPU_O3_CPU_RENAME_MAP_HH__
+};
+
++#endif //__CPU_O3_RENAME_MAP_HH__
--- /dev/null
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
+/*
- // Todo: Probably add in support for scheduling events (more than one as
- // well) on the case of the ROB being empty or full. Considering tracking
- // free entries instead of insts in ROB. Differentiate between squashing
- // all instructions after the instruction, and all instructions after *and*
- // including that instruction.
-
- #ifndef __CPU_O3_CPU_ROB_HH__
- #define __CPU_O3_CPU_ROB_HH__
++ * Copyright (c) 2004-2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
- * ROB class. Uses the instruction list that exists within the CPU to
- * represent the ROB. This class doesn't contain that list, but instead
- * a pointer to the CPU to get access to the list. The ROB, in this first
- * implementation, is largely what drives squashing.
++#ifndef __CPU_O3_ROB_HH__
++#define __CPU_O3_ROB_HH__
+
++#include <string>
+#include <utility>
+#include <vector>
+
+/**
- typedef std::pair<RegIndex, PhysRegIndex> UnmapInfo_t;
- typedef typename list<DynInstPtr>::iterator InstIt_t;
++ * ROB class. The ROB is largely what drives squashing.
+ */
+template <class Impl>
+class ROB
+{
+ protected:
+ typedef TheISA::RegIndex RegIndex;
+ public:
+ //Typedefs from the Impl.
+ typedef typename Impl::FullCPU FullCPU;
+ typedef typename Impl::DynInstPtr DynInstPtr;
+
- * @param _numEntries Number of entries in ROB.
- * @param _squashWidth Number of instructions that can be squashed in a
- * single cycle.
++ typedef std::pair<RegIndex, PhysRegIndex> UnmapInfo;
++ typedef typename std::list<DynInstPtr>::iterator InstIt;
++
++ /** Possible ROB statuses. */
++ enum Status {
++ Running,
++ Idle,
++ ROBSquashing
++ };
++
++ /** SMT ROB Sharing Policy */
++ enum ROBPolicy{
++ Dynamic,
++ Partitioned,
++ Threshold
++ };
++
++ private:
++ /** Per-thread ROB status. */
++ Status robStatus[Impl::MaxThreads];
++
++ /** ROB resource sharing policy for SMT mode. */
++ ROBPolicy robPolicy;
+
+ public:
+ /** ROB constructor.
- ROB(unsigned _numEntries, unsigned _squashWidth);
++ * @param _numEntries Number of entries in ROB.
++ * @param _squashWidth Number of instructions that can be squashed in a
++ * single cycle.
++ * @param _smtROBPolicy ROB Partitioning Scheme for SMT.
++ * @param _smtROBThreshold Max Resources(by %) a thread can have in the ROB.
++ * @param _numThreads The number of active threads.
+ */
- /** Function to insert an instruction into the ROB. The parameter inst is
- * not truly required, but is useful for checking correctness. Note
- * that whatever calls this function must ensure that there is enough
- * space within the ROB for the new instruction.
++ ROB(unsigned _numEntries, unsigned _squashWidth, std::string smtROBPolicy,
++ unsigned _smtROBThreshold, unsigned _numThreads);
++
++ std::string name() const;
+
+ /** Function to set the CPU pointer, necessary due to which object the ROB
+ * is created within.
+ * @param cpu_ptr Pointer to the implementation specific full CPU object.
+ */
+ void setCPU(FullCPU *cpu_ptr);
+
- * @todo Remove the parameter once correctness is ensured.
++ /** Sets pointer to the list of active threads.
++ * @param at_ptr Pointer to the list of active threads.
++ */
++ void setActiveThreads(std::list<unsigned>* at_ptr);
++
++ void switchOut();
++
++ void takeOverFrom();
++
++ /** Function to insert an instruction into the ROB. Note that whatever
++ * calls this function must ensure that there is enough space within the
++ * ROB for the new instruction.
+ * @param inst The instruction being inserted into the ROB.
- DynInstPtr readHeadInst() { return cpu->instList.front(); }
+ */
+ void insertInst(DynInstPtr &inst);
+
+ /** Returns pointer to the head instruction within the ROB. There is
+ * no guarantee as to the return value if the ROB is empty.
+ * @retval Pointer to the DynInst that is at the head of the ROB.
+ */
- DynInstPtr readTailInst() { return (*tail); }
++// DynInstPtr readHeadInst();
++
++ /** Returns a pointer to the head instruction of a specific thread within
++ * the ROB.
++ * @return Pointer to the DynInst that is at the head of the ROB.
++ */
++ DynInstPtr readHeadInst(unsigned tid);
++
++ /** Returns pointer to the tail instruction within the ROB. There is
++ * no guarantee as to the return value if the ROB is empty.
++ * @retval Pointer to the DynInst that is at the tail of the ROB.
++ */
++// DynInstPtr readTailInst();
++
++ /** Returns a pointer to the tail instruction of a specific thread within
++ * the ROB.
++ * @return Pointer to the DynInst that is at the tail of the ROB.
++ */
++ DynInstPtr readTailInst(unsigned tid);
++
++ /** Retires the head instruction, removing it from the ROB. */
++// void retireHead();
++
++ /** Retires the head instruction of a specific thread, removing it from the
++ * ROB.
++ */
++ void retireHead(unsigned tid);
+
- void retireHead();
++ /** Is the oldest instruction across all threads ready. */
++// bool isHeadReady();
+
- bool isHeadReady();
++ /** Is the oldest instruction across a particular thread ready. */
++ bool isHeadReady(unsigned tid);
+
- void doSquash();
++ /** Is there any commitable head instruction across all threads ready. */
++ bool canCommit();
+
++ /** Re-adjust ROB partitioning. */
++ void resetEntries();
++
++ /** Number of entries needed For 'num_threads' amount of threads. */
++ int entryAmount(int num_threads);
++
++ /** Returns the number of total free entries in the ROB. */
+ unsigned numFreeEntries();
+
++ /** Returns the number of free entries in a specific ROB paritition. */
++ unsigned numFreeEntries(unsigned tid);
++
++ /** Returns the maximum number of entries for a specific thread. */
++ unsigned getMaxEntries(unsigned tid)
++ { return maxEntries[tid]; }
++
++ /** Returns the number of entries being used by a specific thread. */
++ unsigned getThreadEntries(unsigned tid)
++ { return threadEntries[tid]; }
++
++ /** Returns if the ROB is full. */
+ bool isFull()
+ { return numInstsInROB == numEntries; }
+
++ /** Returns if a specific thread's partition is full. */
++ bool isFull(unsigned tid)
++ { return threadEntries[tid] == numEntries; }
++
++ /** Returns if the ROB is empty. */
+ bool isEmpty()
+ { return numInstsInROB == 0; }
+
- void squash(InstSeqNum squash_num);
++ /** Returns if a specific thread's partition is empty. */
++ bool isEmpty(unsigned tid)
++ { return threadEntries[tid] == 0; }
+
- uint64_t readHeadPC();
++ /** Executes the squash, marking squashed instructions. */
++ void doSquash(unsigned tid);
+
- uint64_t readHeadNextPC();
++ /** Squashes all instructions younger than the given sequence number for
++ * the specific thread.
++ */
++ void squash(InstSeqNum squash_num, unsigned tid);
+
- InstSeqNum readHeadSeqNum();
++ /** Updates the head instruction with the new oldest instruction. */
++ void updateHead();
+
- uint64_t readTailPC();
++ /** Updates the tail instruction with the new youngest instruction. */
++ void updateTail();
+
- InstSeqNum readTailSeqNum();
++ /** Reads the PC of the oldest head instruction. */
++// uint64_t readHeadPC();
+
- bool isDoneSquashing() const { return doneSquashing; }
++ /** Reads the PC of the head instruction of a specific thread. */
++// uint64_t readHeadPC(unsigned tid);
++
++ /** Reads the next PC of the oldest head instruction. */
++// uint64_t readHeadNextPC();
++
++ /** Reads the next PC of the head instruction of a specific thread. */
++// uint64_t readHeadNextPC(unsigned tid);
++
++ /** Reads the sequence number of the oldest head instruction. */
++// InstSeqNum readHeadSeqNum();
++
++ /** Reads the sequence number of the head instruction of a specific thread.
++ */
++// InstSeqNum readHeadSeqNum(unsigned tid);
++
++ /** Reads the PC of the youngest tail instruction. */
++// uint64_t readTailPC();
++
++ /** Reads the PC of the tail instruction of a specific thread. */
++// uint64_t readTailPC(unsigned tid);
++
++ /** Reads the sequence number of the youngest tail instruction. */
++// InstSeqNum readTailSeqNum();
++
++ /** Reads the sequence number of tail instruction of a specific thread. */
++// InstSeqNum readTailSeqNum(unsigned tid);
+
+ /** Checks if the ROB is still in the process of squashing instructions.
+ * @retval Whether or not the ROB is done squashing.
+ */
- private:
++ bool isDoneSquashing(unsigned tid) const
++ { return doneSquashing[tid]; }
++
++ /** Checks if the ROB is still in the process of squashing instructions for
++ * any thread.
++ */
++ bool isDoneSquashing();
+
+ /** This is more of a debugging function than anything. Use
+ * numInstsInROB to get the instructions in the ROB unless you are
+ * double checking that variable.
+ */
+ int countInsts();
+
- InstIt_t tail;
++ /** This is more of a debugging function than anything. Use
++ * threadEntries to get the instructions in the ROB unless you are
++ * double checking that variable.
++ */
++ int countInsts(unsigned tid);
+
++ private:
+ /** Pointer to the CPU. */
+ FullCPU *cpu;
+
++ /** Active Threads in CPU */
++ std::list<unsigned>* activeThreads;
++
+ /** Number of instructions in the ROB. */
+ unsigned numEntries;
+
++ /** Entries Per Thread */
++ unsigned threadEntries[Impl::MaxThreads];
++
++ /** Max Insts a Thread Can Have in the ROB */
++ unsigned maxEntries[Impl::MaxThreads];
++
++ /** ROB List of Instructions */
++ std::list<DynInstPtr> instList[Impl::MaxThreads];
++
+ /** Number of instructions that can be squashed in a single cycle. */
+ unsigned squashWidth;
+
++ public:
+ /** Iterator pointing to the instruction which is the last instruction
+ * in the ROB. This may at times be invalid (ie when the ROB is empty),
+ * however it should never be incorrect.
+ */
- InstIt_t squashIt;
++ InstIt tail;
+
++ /** Iterator pointing to the instruction which is the first instruction in
++ * in the ROB*/
++ InstIt head;
++
++ private:
+ /** Iterator used for walking through the list of instructions when
+ * squashing. Used so that there is persistent state between cycles;
+ * when squashing, the instructions are marked as squashed but not
+ * immediately removed, meaning the tail iterator remains the same before
+ * and after a squash.
+ * This will always be set to cpu->instList.end() if it is invalid.
+ */
- bool doneSquashing;
++ InstIt squashIt[Impl::MaxThreads];
+
++ public:
+ /** Number of instructions in the ROB. */
+ int numInstsInROB;
+
++ DynInstPtr dummyInst;
++
++ private:
+ /** The sequence number of the squashed instruction. */
+ InstSeqNum squashedSeqNum;
+
+ /** Is the ROB done squashing. */
- #endif //__CPU_O3_CPU_ROB_HH__
++ bool doneSquashing[Impl::MaxThreads];
++
++ /** Number of active threads. */
++ unsigned numThreads;
+};
+
++#endif //__CPU_O3_ROB_HH__
--- /dev/null
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
+/*
- #ifndef __CPU_O3_CPU_ROB_IMPL_HH__
- #define __CPU_O3_CPU_ROB_IMPL_HH__
-
++ * Copyright (c) 2004-2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
- ROB<Impl>::ROB(unsigned _numEntries, unsigned _squashWidth)
+#include "config/full_system.hh"
+#include "cpu/o3/rob.hh"
+
++using namespace std;
++
+template <class Impl>
- squashedSeqNum(0)
++ROB<Impl>::ROB(unsigned _numEntries, unsigned _squashWidth,
++ string _smtROBPolicy, unsigned _smtROBThreshold,
++ unsigned _numThreads)
+ : numEntries(_numEntries),
+ squashWidth(_squashWidth),
+ numInstsInROB(0),
- doneSquashing = true;
++ squashedSeqNum(0),
++ numThreads(_numThreads)
++{
++ for (int tid=0; tid < numThreads; tid++) {
++ doneSquashing[tid] = true;
++ threadEntries[tid] = 0;
++ }
++
++ string policy = _smtROBPolicy;
++
++ //Convert string to lowercase
++ std::transform(policy.begin(), policy.end(), policy.begin(),
++ (int(*)(int)) tolower);
++
++ //Figure out rob policy
++ if (policy == "dynamic") {
++ robPolicy = Dynamic;
++
++ //Set Max Entries to Total ROB Capacity
++ for (int i = 0; i < numThreads; i++) {
++ maxEntries[i]=numEntries;
++ }
++
++ } else if (policy == "partitioned") {
++ robPolicy = Partitioned;
++ DPRINTF(Fetch, "ROB sharing policy set to Partitioned\n");
++
++ //@todo:make work if part_amt doesnt divide evenly.
++ int part_amt = numEntries / numThreads;
++
++ //Divide ROB up evenly
++ for (int i = 0; i < numThreads; i++) {
++ maxEntries[i]=part_amt;
++ }
++
++ } else if (policy == "threshold") {
++ robPolicy = Threshold;
++ DPRINTF(Fetch, "ROB sharing policy set to Threshold\n");
++
++ int threshold = _smtROBThreshold;;
++
++ //Divide up by threshold amount
++ for (int i = 0; i < numThreads; i++) {
++ maxEntries[i]=threshold;
++ }
++ } else {
++ assert(0 && "Invalid ROB Sharing Policy.Options Are:{Dynamic,"
++ "Partitioned, Threshold}");
++ }
++}
++
++template <class Impl>
++std::string
++ROB<Impl>::name() const
+{
- // Set the tail to the beginning of the CPU instruction list so that
- // upon the first instruction being inserted into the ROB, the tail
- // iterator can simply be incremented.
- tail = cpu->instList.begin();
++ return cpu->name() + ".rob";
+}
+
+template <class Impl>
+void
+ROB<Impl>::setCPU(FullCPU *cpu_ptr)
+{
+ cpu = cpu_ptr;
+
- // Set the squash iterator to the end of the instruction list.
- squashIt = cpu->instList.end();
++ // Set the per-thread iterators to the end of the instruction list.
++ for (int i=0; i < numThreads;i++) {
++ squashIt[i] = instList[i].end();
++ }
+
- int
- ROB<Impl>::countInsts()
++ // Initialize the "universal" ROB head & tail point to invalid
++ // pointers
++ head = instList[0].end();
++ tail = instList[0].end();
+}
+
+template <class Impl>
- // Start at 1; if the tail matches cpu->instList.begin(), then there is
- // one inst in the ROB.
- int return_val = 1;
++void
++ROB<Impl>::setActiveThreads(list<unsigned> *at_ptr)
+{
- // There are quite a few special cases. Do not use this function other
- // than for debugging purposes.
- if (cpu->instList.begin() == cpu->instList.end()) {
- // In this case there are no instructions in the list. The ROB
- // must be empty.
- return 0;
- } else if (tail == cpu->instList.end()) {
- // In this case, the tail is not yet pointing to anything valid.
- // The ROB must be empty.
- return 0;
++ DPRINTF(ROB, "Setting active threads list pointer.\n");
++ activeThreads = at_ptr;
++}
+
- // Iterate through the ROB from the head to the tail, counting the
- // entries.
- for (InstIt_t i = cpu->instList.begin(); i != tail; ++i)
- {
- assert(i != cpu->instList.end());
- ++return_val;
++template <class Impl>
++void
++ROB<Impl>::switchOut()
++{
++ for (int tid = 0; tid < numThreads; tid++) {
++ instList[tid].clear();
+ }
++}
+
- return return_val;
++template <class Impl>
++void
++ROB<Impl>::takeOverFrom()
++{
++ for (int tid=0; tid < numThreads; tid++) {
++ doneSquashing[tid] = true;
++ threadEntries[tid] = 0;
++ squashIt[tid] = instList[tid].end();
++ }
++ numInstsInROB = 0;
++
++ // Initialize the "universal" ROB head & tail point to invalid
++ // pointers
++ head = instList[0].end();
++ tail = instList[0].end();
++}
++
++template <class Impl>
++void
++ROB<Impl>::resetEntries()
++{
++ if (robPolicy != Dynamic || numThreads > 1) {
++ int active_threads = (*activeThreads).size();
++
++ list<unsigned>::iterator threads = (*activeThreads).begin();
++ list<unsigned>::iterator list_end = (*activeThreads).end();
++
++ while (threads != list_end) {
++ if (robPolicy == Partitioned) {
++ maxEntries[*threads++] = numEntries / active_threads;
++ } else if (robPolicy == Threshold && active_threads == 1) {
++ maxEntries[*threads++] = numEntries;
++ }
++ }
+ }
++}
++
++template <class Impl>
++int
++ROB<Impl>::entryAmount(int num_threads)
++{
++ if (robPolicy == Partitioned) {
++ return numEntries / num_threads;
++ } else {
++ return 0;
++ }
++}
++
++template <class Impl>
++int
++ROB<Impl>::countInsts()
++{
++ int total=0;
+
- // Because the head won't be tracked properly until the ROB gets the
- // first instruction, and any time that the ROB is empty and has not
- // yet gotten the instruction, this function doesn't work.
- // return numInstsInROB;
++ for (int i=0;i < numThreads;i++)
++ total += countInsts(i);
+
- // Make sure we have the right number of instructions.
- assert(numInstsInROB == countInsts());
- // Make sure the instruction is valid.
++ return total;
++}
++
++template <class Impl>
++int
++ROB<Impl>::countInsts(unsigned tid)
++{
++ return instList[tid].size();
+}
+
+template <class Impl>
+void
+ROB<Impl>::insertInst(DynInstPtr &inst)
+{
- DPRINTF(ROB, "ROB: Adding inst PC %#x to the ROB.\n", inst->readPC());
++ //assert(numInstsInROB == countInsts());
+ assert(inst);
+
- // If the ROB is full then exit.
++ DPRINTF(ROB, "Adding inst PC %#x to the ROB.\n", inst->readPC());
+
- ++numInstsInROB;
+ assert(numInstsInROB != numEntries);
+
- // Increment the tail iterator, moving it one instruction back.
- // There is a special case if the ROB was empty prior to this insertion,
- // in which case the tail will be pointing at instList.end(). If that
- // happens, then reset the tail to the beginning of the list.
- if (tail != cpu->instList.end()) {
- ++tail;
- } else {
- tail = cpu->instList.begin();
++ int tid = inst->threadNumber;
+
- // Make sure the tail iterator is actually pointing at the instruction
- // added.
- assert((*tail) == inst);
++ instList[tid].push_back(inst);
++
++ //Set Up head iterator if this is the 1st instruction in the ROB
++ if (numInstsInROB == 0) {
++ head = instList[tid].begin();
++ assert((*head) == inst);
+ }
+
- DPRINTF(ROB, "ROB: Now has %d instructions.\n", numInstsInROB);
++ //Must Decrement for iterator to actually be valid since __.end()
++ //actually points to 1 after the last inst
++ tail = instList[tid].end();
++ tail--;
+
- assert(numInstsInROB == countInsts());
++ inst->setInROB();
++
++ ++numInstsInROB;
++ ++threadEntries[tid];
+
++ assert((*tail) == inst);
++
++ DPRINTF(ROB, "[tid:%i] Now has %d instructions.\n", tid, threadEntries[tid]);
+}
+
+// Whatever calls this function needs to ensure that it properly frees up
+// registers prior to this function.
++/*
+template <class Impl>
+void
+ROB<Impl>::retireHead()
+{
- DynInstPtr head_inst = cpu->instList.front();
++ //assert(numInstsInROB == countInsts());
++ assert(numInstsInROB > 0);
++
++ int tid = (*head)->threadNumber;
++
++ retireHead(tid);
++
++ if (numInstsInROB == 0) {
++ tail = instList[tid].end();
++ }
++}
++*/
++
++template <class Impl>
++void
++ROB<Impl>::retireHead(unsigned tid)
++{
++ //assert(numInstsInROB == countInsts());
+ assert(numInstsInROB > 0);
+
+ // Get the head ROB instruction.
- // Make certain this can retire.
++ InstIt head_it = instList[tid].begin();
++
++ DynInstPtr head_inst = (*head_it);
+
- DPRINTF(ROB, "ROB: Retiring head instruction of the ROB, "
- "instruction PC %#x, seq num %i\n", head_inst->readPC(),
+ assert(head_inst->readyToCommit());
+
- // Keep track of how many instructions are in the ROB.
++ DPRINTF(ROB, "[tid:%u]: Retiring head instruction, "
++ "instruction PC %#x,[sn:%lli]\n", tid, head_inst->readPC(),
+ head_inst->seqNum);
+
- // Tell CPU to remove the instruction from the list of instructions.
- // A special case is needed if the instruction being retired is the
- // only instruction in the ROB; otherwise the tail iterator will become
- // invalidated.
+ --numInstsInROB;
++ --threadEntries[tid];
++
++ head_inst->removeInROB();
++ head_inst->setCommitted();
++
++ instList[tid].erase(head_it);
+
- if (numInstsInROB == 0) {
- tail = cpu->instList.end();
++ //Update "Global" Head of ROB
++ updateHead();
++
++ // @todo: A special case is needed if the instruction being
++ // retired is the only instruction in the ROB; otherwise the tail
++ // iterator will become invalidated.
+ cpu->removeFrontInst(head_inst);
++}
++/*
++template <class Impl>
++bool
++ROB<Impl>::isHeadReady()
++{
++ if (numInstsInROB != 0) {
++ return (*head)->readyToCommit();
++ }
+
- ROB<Impl>::isHeadReady()
++ return false;
++}
++*/
++template <class Impl>
++bool
++ROB<Impl>::isHeadReady(unsigned tid)
++{
++ if (threadEntries[tid] != 0) {
++ return instList[tid].front()->readyToCommit();
+ }
++
++ return false;
+}
+
+template <class Impl>
+bool
- if (numInstsInROB != 0) {
- return cpu->instList.front()->readyToCommit();
++ROB<Impl>::canCommit()
+{
- assert(numInstsInROB == countInsts());
++ //@todo: set ActiveThreads through ROB or CPU
++ list<unsigned>::iterator threads = (*activeThreads).begin();
++
++ while (threads != (*activeThreads).end()) {
++ unsigned tid = *threads++;
++
++ if (isHeadReady(tid)) {
++ return true;
++ }
+ }
+
+ return false;
+}
+
+template <class Impl>
+unsigned
+ROB<Impl>::numFreeEntries()
+{
- ROB<Impl>::doSquash()
++ //assert(numInstsInROB == countInsts());
+
+ return numEntries - numInstsInROB;
+}
+
++template <class Impl>
++unsigned
++ROB<Impl>::numFreeEntries(unsigned tid)
++{
++ return maxEntries[tid] - threadEntries[tid];
++}
++
+template <class Impl>
+void
- DPRINTF(ROB, "ROB: Squashing instructions.\n");
++ROB<Impl>::doSquash(unsigned tid)
+{
- assert(squashIt != cpu->instList.end());
++ DPRINTF(ROB, "[tid:%u]: Squashing instructions until [sn:%i].\n",
++ tid, squashedSeqNum);
++
++ assert(squashIt[tid] != instList[tid].end());
++
++ if ((*squashIt[tid])->seqNum < squashedSeqNum) {
++ DPRINTF(ROB, "[tid:%u]: Done squashing instructions.\n",
++ tid);
+
- numSquashed < squashWidth && (*squashIt)->seqNum != squashedSeqNum;
++ squashIt[tid] = instList[tid].end();
++
++ doneSquashing[tid] = true;
++ return;
++ }
++
++ bool robTailUpdate = false;
+
+ for (int numSquashed = 0;
- // Ensure that the instruction is younger.
- assert((*squashIt)->seqNum > squashedSeqNum);
-
- DPRINTF(ROB, "ROB: Squashing instruction PC %#x, seq num %i.\n",
- (*squashIt)->readPC(), (*squashIt)->seqNum);
++ numSquashed < squashWidth &&
++ squashIt[tid] != instList[tid].end() &&
++ (*squashIt[tid])->seqNum > squashedSeqNum;
+ ++numSquashed)
+ {
- (*squashIt)->setSquashed();
-
- (*squashIt)->setCanCommit();
-
- // Special case for when squashing due to a syscall. It's possible
- // that the squash happened after the head instruction was already
- // committed, meaning that (*squashIt)->seqNum != squashedSeqNum
- // will never be false. Normally the squash would never be able
- // to go past the head of the ROB; in this case it might, so it
- // must be handled otherwise it will segfault.
- #if !FULL_SYSTEM
- if (squashIt == cpu->instList.begin()) {
- DPRINTF(ROB, "ROB: Reached head of instruction list while "
++ DPRINTF(ROB, "[tid:%u]: Squashing instruction PC %#x, seq num %i.\n",
++ (*squashIt[tid])->threadNumber,
++ (*squashIt[tid])->readPC(),
++ (*squashIt[tid])->seqNum);
+
+ // Mark the instruction as squashed, and ready to commit so that
+ // it can drain out of the pipeline.
- squashIt = cpu->instList.end();
++ (*squashIt[tid])->setSquashed();
++
++ (*squashIt[tid])->setCanCommit();
++
++
++ if (squashIt[tid] == instList[tid].begin()) {
++ DPRINTF(ROB, "Reached head of instruction list while "
+ "squashing.\n");
+
- doneSquashing = true;
++ squashIt[tid] = instList[tid].end();
+
- #endif
++ doneSquashing[tid] = true;
+
+ return;
+ }
- // Move the tail iterator to the next instruction.
- squashIt--;
+
- if ((*squashIt)->seqNum == squashedSeqNum) {
- DPRINTF(ROB, "ROB: Done squashing instructions.\n");
++ InstIt tail_thread = instList[tid].end();
++ tail_thread--;
++
++ if ((*squashIt[tid]) == (*tail_thread))
++ robTailUpdate = true;
++
++ squashIt[tid]--;
+ }
+
+
+ // Check if ROB is done squashing.
- squashIt = cpu->instList.end();
++ if ((*squashIt[tid])->seqNum <= squashedSeqNum) {
++ DPRINTF(ROB, "[tid:%u]: Done squashing instructions.\n",
++ tid);
++
++ squashIt[tid] = instList[tid].end();
++
++ doneSquashing[tid] = true;
++ }
++
++ if (robTailUpdate) {
++ updateTail();
++ }
++}
++
++
++template <class Impl>
++void
++ROB<Impl>::updateHead()
++{
++ DynInstPtr head_inst;
++ InstSeqNum lowest_num = 0;
++ bool first_valid = true;
++
++ // @todo: set ActiveThreads through ROB or CPU
++ list<unsigned>::iterator threads = (*activeThreads).begin();
++
++ while (threads != (*activeThreads).end()) {
++ unsigned thread_num = *threads++;
++
++ if (instList[thread_num].empty())
++ continue;
++
++ if (first_valid) {
++ head = instList[thread_num].begin();
++ lowest_num = (*head)->seqNum;
++ first_valid = false;
++ continue;
++ }
++
++ InstIt head_thread = instList[thread_num].begin();
++
++ DynInstPtr head_inst = (*head_thread);
++
++ assert(head_inst != 0);
++
++ if (head_inst->seqNum < lowest_num) {
++ head = head_thread;
++ lowest_num = head_inst->seqNum;
++ }
++ }
++
++ if (first_valid) {
++ head = instList[0].end();
++ }
+
- doneSquashing = true;
++}
++
++template <class Impl>
++void
++ROB<Impl>::updateTail()
++{
++ tail = instList[0].end();
++ bool first_valid = true;
++
++ list<unsigned>::iterator threads = (*activeThreads).begin();
++
++ while (threads != (*activeThreads).end()) {
++ unsigned tid = *threads++;
++
++ if (instList[tid].empty()) {
++ continue;
++ }
++
++ // If this is the first valid then assign w/out
++ // comparison
++ if (first_valid) {
++ tail = instList[tid].end();
++ tail--;
++ first_valid = false;
++ continue;
++ }
+
- ROB<Impl>::squash(InstSeqNum squash_num)
++ // Assign new tail if this thread's tail is younger
++ // than our current "tail high"
++ InstIt tail_thread = instList[tid].end();
++ tail_thread--;
++
++ if ((*tail_thread)->seqNum > (*tail)->seqNum) {
++ tail = tail_thread;
++ }
+ }
+}
+
++
+template <class Impl>
+void
- DPRINTF(ROB, "ROB: Starting to squash within the ROB.\n");
- doneSquashing = false;
++ROB<Impl>::squash(InstSeqNum squash_num,unsigned tid)
+{
- assert(tail != cpu->instList.end());
++ if (isEmpty()) {
++ DPRINTF(ROB, "Does not need to squash due to being empty "
++ "[sn:%i]\n",
++ squash_num);
++
++ return;
++ }
++
++ DPRINTF(ROB, "Starting to squash within the ROB.\n");
++
++ robStatus[tid] = ROBSquashing;
++
++ doneSquashing[tid] = false;
+
+ squashedSeqNum = squash_num;
+
- squashIt = tail;
++ if (!instList[tid].empty()) {
++ InstIt tail_thread = instList[tid].end();
++ tail_thread--;
+
- doSquash();
++ squashIt[tid] = tail_thread;
+
- assert(numInstsInROB == countInsts());
++ doSquash(tid);
++ }
++}
++/*
++template <class Impl>
++typename Impl::DynInstPtr
++ROB<Impl>::readHeadInst()
++{
++ if (numInstsInROB != 0) {
++ assert((*head)->isInROB()==true);
++ return *head;
++ } else {
++ return dummyInst;
++ }
+}
++*/
++template <class Impl>
++typename Impl::DynInstPtr
++ROB<Impl>::readHeadInst(unsigned tid)
++{
++ if (threadEntries[tid] != 0) {
++ InstIt head_thread = instList[tid].begin();
+
++ assert((*head_thread)->isInROB()==true);
++
++ return *head_thread;
++ } else {
++ return dummyInst;
++ }
++}
++/*
+template <class Impl>
+uint64_t
+ROB<Impl>::readHeadPC()
+{
- DynInstPtr head_inst = cpu->instList.front();
++ //assert(numInstsInROB == countInsts());
+
- assert(numInstsInROB == countInsts());
++ DynInstPtr head_inst = *head;
+
+ return head_inst->readPC();
+}
+
++template <class Impl>
++uint64_t
++ROB<Impl>::readHeadPC(unsigned tid)
++{
++ //assert(numInstsInROB == countInsts());
++ InstIt head_thread = instList[tid].begin();
++
++ return (*head_thread)->readPC();
++}
++
++
+template <class Impl>
+uint64_t
+ROB<Impl>::readHeadNextPC()
+{
- DynInstPtr head_inst = cpu->instList.front();
++ //assert(numInstsInROB == countInsts());
+
- // Return the last sequence number that has not been squashed. Other
- // stages can use it to squash any instructions younger than the current
- // tail.
- DynInstPtr head_inst = cpu->instList.front();
++ DynInstPtr head_inst = *head;
+
+ return head_inst->readNextPC();
+}
+
++template <class Impl>
++uint64_t
++ROB<Impl>::readHeadNextPC(unsigned tid)
++{
++ //assert(numInstsInROB == countInsts());
++ InstIt head_thread = instList[tid].begin();
++
++ return (*head_thread)->readNextPC();
++}
++
+template <class Impl>
+InstSeqNum
+ROB<Impl>::readHeadSeqNum()
+{
- assert(numInstsInROB == countInsts());
++ //assert(numInstsInROB == countInsts());
++ DynInstPtr head_inst = *head;
+
+ return head_inst->seqNum;
+}
+
++template <class Impl>
++InstSeqNum
++ROB<Impl>::readHeadSeqNum(unsigned tid)
++{
++ InstIt head_thread = instList[tid].begin();
++
++ return ((*head_thread)->seqNum);
++}
++
++template <class Impl>
++typename Impl::DynInstPtr
++ROB<Impl>::readTailInst()
++{
++ //assert(numInstsInROB == countInsts());
++ //assert(tail != instList[0].end());
++
++ return (*tail);
++}
++*/
++template <class Impl>
++typename Impl::DynInstPtr
++ROB<Impl>::readTailInst(unsigned tid)
++{
++ //assert(tail_thread[tid] != instList[tid].end());
++
++ InstIt tail_thread = instList[tid].end();
++ tail_thread--;
++
++ return *tail_thread;
++}
++
++/*
+template <class Impl>
+uint64_t
+ROB<Impl>::readTailPC()
+{
- assert(tail != cpu->instList.end());
++ //assert(numInstsInROB == countInsts());
+
- #endif // __CPU_O3_CPU_ROB_IMPL_HH__
++ //assert(tail != instList[0].end());
+
+ return (*tail)->readPC();
+}
+
++template <class Impl>
++uint64_t
++ROB<Impl>::readTailPC(unsigned tid)
++{
++ //assert(tail_thread[tid] != instList[tid].end());
++
++ InstIt tail_thread = instList[tid].end();
++ tail_thread--;
++
++ return (*tail_thread)->readPC();
++}
++
+template <class Impl>
+InstSeqNum
+ROB<Impl>::readTailSeqNum()
+{
+ // Return the last sequence number that has not been squashed. Other
+ // stages can use it to squash any instructions younger than the current
+ // tail.
+ return (*tail)->seqNum;
+}
+
++template <class Impl>
++InstSeqNum
++ROB<Impl>::readTailSeqNum(unsigned tid)
++{
++ // Return the last sequence number that has not been squashed. Other
++ // stages can use it to squash any instructions younger than the current
++ // tail.
++ // assert(tail_thread[tid] != instList[tid].end());
++
++ InstIt tail_thread = instList[tid].end();
++ tail_thread--;
++
++ return (*tail_thread)->seqNum;
++}
++*/
--- /dev/null
- : maxVal(0), counter(0)
+/*
+ * Copyright (c) 2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "base/misc.hh"
+#include "cpu/o3/sat_counter.hh"
+
+SatCounter::SatCounter()
- : maxVal((1 << bits) - 1), counter(0)
++ : initialVal(0), counter(0)
+{
+}
+
+SatCounter::SatCounter(unsigned bits)
- SatCounter::SatCounter(unsigned bits, unsigned initial_val)
- : maxVal((1 << bits) - 1), counter(initial_val)
++ : initialVal(0), maxVal((1 << bits) - 1), counter(0)
+{
+}
+
- panic("BP: Initial counter value exceeds max size.");
++SatCounter::SatCounter(unsigned bits, uint8_t initial_val)
++ : initialVal(initialVal), maxVal((1 << bits) - 1), counter(initial_val)
+{
+ // Check to make sure initial value doesn't exceed the max counter value.
+ if (initial_val > maxVal) {
-
- void
- SatCounter::increment()
- {
- if(counter < maxVal) {
- ++counter;
- }
- }
-
- void
- SatCounter::decrement()
- {
- if(counter > 0) {
- --counter;
- }
- }
++ fatal("BP: Initial counter value exceeds max size.");
+ }
+}
+
+void
+SatCounter::setBits(unsigned bits)
+{
+ maxVal = (1 << bits) - 1;
+}
--- /dev/null
- * Copyright (c) 2005 The Regents of The University of Michigan
+/*
- #ifndef __CPU_O3_CPU_SAT_COUNTER_HH__
- #define __CPU_O3_CPU_SAT_COUNTER_HH__
++ * Copyright (c) 2005-2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
- SatCounter();
++#ifndef __CPU_O3_SAT_COUNTER_HH__
++#define __CPU_O3_SAT_COUNTER_HH__
+
+#include "sim/host.hh"
+
+/**
+ * Private counter class for the internal saturating counters.
+ * Implements an n bit saturating counter and provides methods to
+ * increment, decrement, and read it.
+ * @todo Consider making this something that more closely mimics a
+ * built in class so you can use ++ or --.
+ */
+class SatCounter
+{
+ public:
+ /**
+ * Constructor for the counter.
+ */
- SatCounter(unsigned bits);
++ SatCounter()
++ : initialVal(0), counter(0)
++ { }
+
+ /**
+ * Constructor for the counter.
+ * @param bits How many bits the counter will have.
+ */
- SatCounter(unsigned bits, unsigned initial_val);
++ SatCounter(unsigned bits)
++ : initialVal(0), maxVal((1 << bits) - 1), counter(0)
++ { }
+
+ /**
+ * Constructor for the counter.
+ * @param bits How many bits the counter will have.
+ * @param initial_val Starting value for each counter.
+ */
- void setBits(unsigned bits);
++ SatCounter(unsigned bits, uint8_t initial_val)
++ : initialVal(initialVal), maxVal((1 << bits) - 1), counter(initial_val)
++ {
++ // Check to make sure initial value doesn't exceed the max
++ // counter value.
++ if (initial_val > maxVal) {
++ fatal("BP: Initial counter value exceeds max size.");
++ }
++ }
+
+ /**
+ * Sets the number of bits.
+ */
- void increment();
++ void setBits(unsigned bits) { maxVal = (1 << bits) - 1; }
++
++ void reset() { counter = initialVal; }
+
+ /**
+ * Increments the counter's current value.
+ */
- void decrement();
++ void increment()
++ {
++ if (counter < maxVal) {
++ ++counter;
++ }
++ }
+
+ /**
+ * Decrements the counter's current value.
+ */
- {
- return counter;
- }
++ void decrement()
++ {
++ if (counter > 0) {
++ --counter;
++ }
++ }
+
+ /**
+ * Read the counter's value.
+ */
+ const uint8_t read() const
- #endif // __CPU_O3_CPU_SAT_COUNTER_HH__
++ { return counter; }
+
+ private:
++ uint8_t initialVal;
+ uint8_t maxVal;
+ uint8_t counter;
+};
+
++#endif // __CPU_O3_SAT_COUNTER_HH__
--- /dev/null
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
+/*
- : SSIT_size(_SSIT_size), LFST_size(_LFST_size)
++ * Copyright (c) 2004-2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "base/trace.hh"
+#include "cpu/o3/store_set.hh"
+
+StoreSet::StoreSet(int _SSIT_size, int _LFST_size)
- SSIT_size, LFST_size);
++ : SSITSize(_SSIT_size), LFSTSize(_LFST_size)
+{
+ DPRINTF(StoreSet, "StoreSet: Creating store set object.\n");
+ DPRINTF(StoreSet, "StoreSet: SSIT size: %i, LFST size: %i.\n",
- SSIT = new SSID[SSIT_size];
++ SSITSize, LFSTSize);
+
- validSSIT.resize(SSIT_size);
++ SSIT.resize(SSITSize);
+
- for (int i = 0; i < SSIT_size; ++i)
++ validSSIT.resize(SSITSize);
+
- LFST = new InstSeqNum[LFST_size];
++ for (int i = 0; i < SSITSize; ++i)
+ validSSIT[i] = false;
+
- validLFST.resize(LFST_size);
++ LFST.resize(LFSTSize);
+
- SSCounters = new int[LFST_size];
++ validLFST.resize(LFSTSize);
+
- for (int i = 0; i < LFST_size; ++i)
- {
++ for (int i = 0; i < LFSTSize; ++i) {
++ validLFST[i] = false;
++ LFST[i] = 0;
++ }
++
++ indexMask = SSITSize - 1;
++
++ offsetBits = 2;
++}
++
++StoreSet::~StoreSet()
++{
++}
++
++void
++StoreSet::init(int _SSIT_size, int _LFST_size)
++{
++ SSITSize = _SSIT_size;
++ LFSTSize = _LFST_size;
+
- SSCounters[i] = 0;
++ DPRINTF(StoreSet, "StoreSet: Creating store set object.\n");
++ DPRINTF(StoreSet, "StoreSet: SSIT size: %i, LFST size: %i.\n",
++ SSITSize, LFSTSize);
++
++ SSIT.resize(SSITSize);
++
++ validSSIT.resize(SSITSize);
++
++ for (int i = 0; i < SSITSize; ++i)
++ validSSIT[i] = false;
++
++ LFST.resize(LFSTSize);
++
++ validLFST.resize(LFSTSize);
++
++ for (int i = 0; i < LFSTSize; ++i) {
+ validLFST[i] = false;
- index_mask = SSIT_size - 1;
++ LFST[i] = 0;
+ }
+
- offset_bits = 2;
++ indexMask = SSITSize - 1;
+
- assert(load_index < SSIT_size && store_index < SSIT_size);
++ offsetBits = 2;
+}
+
++
+void
+StoreSet::violation(Addr store_PC, Addr load_PC)
+{
+ int load_index = calcIndex(load_PC);
+ int store_index = calcIndex(store_PC);
+
- assert(new_set < LFST_size);
-
- SSCounters[new_set]++;
-
++ assert(load_index < SSITSize && store_index < SSITSize);
+
+ bool valid_load_SSID = validSSIT[load_index];
+ bool valid_store_SSID = validSSIT[store_index];
+
+ if (!valid_load_SSID && !valid_store_SSID) {
+ // Calculate a new SSID here.
+ SSID new_set = calcSSID(load_PC);
+
+ validSSIT[load_index] = true;
+
+ SSIT[load_index] = new_set;
+
+ validSSIT[store_index] = true;
+
+ SSIT[store_index] = new_set;
+
- assert(load_SSID < LFST_size);
-
- SSCounters[load_SSID]++;
++ assert(new_set < LFSTSize);
+
+ DPRINTF(StoreSet, "StoreSet: Neither load nor store had a valid "
+ "storeset, creating a new one: %i for load %#x, store %#x\n",
+ new_set, load_PC, store_PC);
+ } else if (valid_load_SSID && !valid_store_SSID) {
+ SSID load_SSID = SSIT[load_index];
+
+ validSSIT[store_index] = true;
+
+ SSIT[store_index] = load_SSID;
+
- // Because we are having a load point to an already existing set,
- // the size of the store set is not incremented.
-
++ assert(load_SSID < LFSTSize);
+
+ DPRINTF(StoreSet, "StoreSet: Load had a valid store set. Adding "
+ "store to that set: %i for load %#x, store %#x\n",
+ load_SSID, load_PC, store_PC);
+ } else if (!valid_load_SSID && valid_store_SSID) {
+ SSID store_SSID = SSIT[store_index];
+
+ validSSIT[load_index] = true;
+
+ SSIT[load_index] = store_SSID;
+
- assert(load_SSID < LFST_size && store_SSID < LFST_size);
-
- int load_SS_size = SSCounters[load_SSID];
- int store_SS_size = SSCounters[store_SSID];
+ DPRINTF(StoreSet, "StoreSet: Store had a valid store set: %i for "
+ "load %#x, store %#x\n",
+ store_SSID, load_PC, store_PC);
+ } else {
+ SSID load_SSID = SSIT[load_index];
+ SSID store_SSID = SSIT[store_index];
+
- // If the load has the bigger store set, then assign the store
- // to the same store set as the load. Otherwise vice-versa.
- if (load_SS_size > store_SS_size) {
++ assert(load_SSID < LFSTSize && store_SSID < LFSTSize);
+
- SSCounters[load_SSID]++;
- SSCounters[store_SSID]--;
-
- DPRINTF(StoreSet, "StoreSet: Load had bigger store set: %i; "
++ // The store set with the lower number wins
++ if (store_SSID > load_SSID) {
+ SSIT[store_index] = load_SSID;
+
- SSCounters[store_SSID]++;
- SSCounters[load_SSID]--;
-
- DPRINTF(StoreSet, "StoreSet: Store had bigger store set: %i; "
++ DPRINTF(StoreSet, "StoreSet: Load had smaller store set: %i; "
+ "for load %#x, store %#x\n",
+ load_SSID, load_PC, store_PC);
+ } else {
+ SSIT[load_index] = store_SSID;
+
- StoreSet::insertStore(Addr store_PC, InstSeqNum store_seq_num)
++ DPRINTF(StoreSet, "StoreSet: Store had smaller store set: %i; "
+ "for load %#x, store %#x\n",
+ store_SSID, load_PC, store_PC);
+ }
+ }
+}
+
+void
+StoreSet::insertLoad(Addr load_PC, InstSeqNum load_seq_num)
+{
+ // Does nothing.
+ return;
+}
+
+void
- assert(index < SSIT_size);
++StoreSet::insertStore(Addr store_PC, InstSeqNum store_seq_num,
++ unsigned tid)
+{
+ int index = calcIndex(store_PC);
+
+ int store_SSID;
+
- assert(store_SSID < LFST_size);
++ assert(index < SSITSize);
+
+ if (!validSSIT[index]) {
+ // Do nothing if there's no valid entry.
+ return;
+ } else {
+ store_SSID = SSIT[index];
+
- assert(index < SSIT_size);
++ assert(store_SSID < LFSTSize);
+
+ // Update the last store that was fetched with the current one.
+ LFST[store_SSID] = store_seq_num;
+
+ validLFST[store_SSID] = 1;
+
++ storeList[store_seq_num] = store_SSID;
++
+ DPRINTF(StoreSet, "Store %#x updated the LFST, SSID: %i\n",
+ store_PC, store_SSID);
+ }
+}
+
+InstSeqNum
+StoreSet::checkInst(Addr PC)
+{
+ int index = calcIndex(PC);
+
+ int inst_SSID;
+
- assert(inst_SSID < LFST_size);
++ assert(index < SSITSize);
+
+ if (!validSSIT[index]) {
+ DPRINTF(StoreSet, "Inst %#x with index %i had no SSID\n",
+ PC, index);
+
+ // Return 0 if there's no valid entry.
+ return 0;
+ } else {
+ inst_SSID = SSIT[index];
+
- assert(index < SSIT_size);
++ assert(inst_SSID < LFSTSize);
+
+ if (!validLFST[inst_SSID]) {
+
+ DPRINTF(StoreSet, "Inst %#x with index %i and SSID %i had no "
+ "dependency\n", PC, index, inst_SSID);
+
+ return 0;
+ } else {
+ DPRINTF(StoreSet, "Inst %#x with index %i and SSID %i had LFST "
+ "inum of %i\n", PC, index, inst_SSID, LFST[inst_SSID]);
+
+ return LFST[inst_SSID];
+ }
+ }
+}
+
+void
+StoreSet::issued(Addr issued_PC, InstSeqNum issued_seq_num, bool is_store)
+{
+ // This only is updated upon a store being issued.
+ if (!is_store) {
+ return;
+ }
+
+ int index = calcIndex(issued_PC);
+
+ int store_SSID;
+
- assert(store_SSID < LFST_size);
++ assert(index < SSITSize);
++
++ SeqNumMapIt store_list_it = storeList.find(issued_seq_num);
++
++ if (store_list_it != storeList.end()) {
++ storeList.erase(store_list_it);
++ }
+
+ // Make sure the SSIT still has a valid entry for the issued store.
+ if (!validSSIT[index]) {
+ return;
+ }
+
+ store_SSID = SSIT[index];
+
- StoreSet::squash(InstSeqNum squashed_num)
++ assert(store_SSID < LFSTSize);
+
+ // If the last fetched store in the store set refers to the store that
+ // was just issued, then invalidate the entry.
+ if (validLFST[store_SSID] && LFST[store_SSID] == issued_seq_num) {
+ DPRINTF(StoreSet, "StoreSet: store invalidated itself in LFST.\n");
+ validLFST[store_SSID] = false;
+ }
+}
+
+void
- // Not really sure how to do this well.
- // Generally this is small enough that it should be okay; short circuit
- // evaluation should take care of invalid entries.
-
++StoreSet::squash(InstSeqNum squashed_num, unsigned tid)
+{
- for (int i = 0; i < LFST_size; ++i) {
- if (validLFST[i] && LFST[i] < squashed_num) {
- validLFST[i] = false;
+ DPRINTF(StoreSet, "StoreSet: Squashing until inum %i\n",
+ squashed_num);
+
- for (int i = 0; i < SSIT_size; ++i) {
++ int idx;
++ SeqNumMapIt store_list_it = storeList.begin();
++
++ //@todo:Fix to only delete from correct thread
++ while (!storeList.empty()) {
++ idx = (*store_list_it).second;
++
++ if ((*store_list_it).first <= squashed_num) {
++ break;
++ }
++
++ bool younger = LFST[idx] > squashed_num;
++
++ if (validLFST[idx] && younger) {
++ DPRINTF(StoreSet, "Squashed [sn:%lli]\n", LFST[idx]);
++ validLFST[idx] = false;
++
++ storeList.erase(store_list_it++);
++ } else if (!validLFST[idx] && younger) {
++ storeList.erase(store_list_it++);
+ }
+ }
+}
+
+void
+StoreSet::clear()
+{
- for (int i = 0; i < LFST_size; ++i) {
++ for (int i = 0; i < SSITSize; ++i) {
+ validSSIT[i] = false;
+ }
+
- }
++ for (int i = 0; i < LFSTSize; ++i) {
+ validLFST[i] = false;
+ }
+
++ storeList.clear();
++}
--- /dev/null
- #ifndef __CPU_O3_CPU_STORE_SET_HH__
- #define __CPU_O3_CPU_STORE_SET_HH__
+/*
+ * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
- void insertStore(Addr store_PC, InstSeqNum store_seq_num);
++#ifndef __CPU_O3_STORE_SET_HH__
++#define __CPU_O3_STORE_SET_HH__
+
++#include <list>
++#include <map>
++#include <utility>
+#include <vector>
+
+#include "arch/isa_traits.hh"
+#include "cpu/inst_seq.hh"
+
++struct ltseqnum {
++ bool operator()(const InstSeqNum &lhs, const InstSeqNum &rhs) const
++ {
++ return lhs > rhs;
++ }
++};
++
+class StoreSet
+{
+ public:
+ typedef unsigned SSID;
+
+ public:
++ StoreSet() { };
++
+ StoreSet(int SSIT_size, int LFST_size);
+
++ ~StoreSet();
++
++ void init(int SSIT_size, int LFST_size);
++
+ void violation(Addr store_PC, Addr load_PC);
+
+ void insertLoad(Addr load_PC, InstSeqNum load_seq_num);
+
- void squash(InstSeqNum squashed_num);
++ void insertStore(Addr store_PC, InstSeqNum store_seq_num,
++ unsigned tid);
+
+ InstSeqNum checkInst(Addr PC);
+
+ void issued(Addr issued_PC, InstSeqNum issued_seq_num, bool is_store);
+
- { return (PC >> offset_bits) & index_mask; }
++ void squash(InstSeqNum squashed_num, unsigned tid);
+
+ void clear();
+
+ private:
+ inline int calcIndex(Addr PC)
- { return ((PC ^ (PC >> 10)) % LFST_size); }
++ { return (PC >> offsetBits) & indexMask; }
+
+ inline SSID calcSSID(Addr PC)
- SSID *SSIT;
++ { return ((PC ^ (PC >> 10)) % LFSTSize); }
+
- InstSeqNum *LFST;
++ std::vector<SSID> SSIT;
+
+ std::vector<bool> validSSIT;
+
- int *SSCounters;
++ std::vector<InstSeqNum> LFST;
+
+ std::vector<bool> validLFST;
+
- int SSIT_size;
++ std::map<InstSeqNum, int, ltseqnum> storeList;
++
++ typedef std::map<InstSeqNum, int, ltseqnum>::iterator SeqNumMapIt;
+
- int LFST_size;
++ int SSITSize;
+
- int index_mask;
++ int LFSTSize;
+
- int offset_bits;
++ int indexMask;
+
+ // HACK: Hardcoded for now.
- #endif // __CPU_O3_CPU_STORE_SET_HH__
++ int offsetBits;
+};
+
++#endif // __CPU_O3_STORE_SET_HH__
--- /dev/null
- TournamentBP::TournamentBP(unsigned _local_predictor_size,
- unsigned _local_ctr_bits,
- unsigned _local_history_table_size,
- unsigned _local_history_bits,
- unsigned _global_predictor_size,
- unsigned _global_ctr_bits,
- unsigned _global_history_bits,
- unsigned _choice_predictor_size,
- unsigned _choice_ctr_bits,
+/*
+ * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "cpu/o3/tournament_pred.hh"
+
- : localPredictorSize(_local_predictor_size),
- localCtrBits(_local_ctr_bits),
- localHistoryTableSize(_local_history_table_size),
- localHistoryBits(_local_history_bits),
- globalPredictorSize(_global_predictor_size),
- globalCtrBits(_global_ctr_bits),
- globalHistoryBits(_global_history_bits),
- choicePredictorSize(_global_predictor_size),
- choiceCtrBits(_choice_ctr_bits),
++TournamentBP::TournamentBP(unsigned _localPredictorSize,
++ unsigned _localCtrBits,
++ unsigned _localHistoryTableSize,
++ unsigned _localHistoryBits,
++ unsigned _globalPredictorSize,
++ unsigned _globalCtrBits,
++ unsigned _globalHistoryBits,
++ unsigned _choicePredictorSize,
++ unsigned _choiceCtrBits,
+ unsigned _instShiftAmt)
- localCtrs = new SatCounter[localPredictorSize];
++ : localPredictorSize(_localPredictorSize),
++ localCtrBits(_localCtrBits),
++ localHistoryTableSize(_localHistoryTableSize),
++ localHistoryBits(_localHistoryBits),
++ globalPredictorSize(_globalPredictorSize),
++ globalCtrBits(_globalCtrBits),
++ globalHistoryBits(_globalHistoryBits),
++ choicePredictorSize(_globalPredictorSize),
++ choiceCtrBits(_choiceCtrBits),
+ instShiftAmt(_instShiftAmt)
+{
+ //Should do checks here to make sure sizes are correct (powers of 2)
+
+ //Setup the array of counters for the local predictor
- localHistoryTable = new unsigned[localHistoryTableSize];
++ localCtrs.resize(localPredictorSize);
+
+ for (int i = 0; i < localPredictorSize; ++i)
+ localCtrs[i].setBits(localCtrBits);
+
+ //Setup the history table for the local table
- globalCtrs = new SatCounter[globalPredictorSize];
++ localHistoryTable.resize(localHistoryTableSize);
+
+ for (int i = 0; i < localHistoryTableSize; ++i)
+ localHistoryTable[i] = 0;
+
+ // Setup the local history mask
+ localHistoryMask = (1 << localHistoryBits) - 1;
+
+ //Setup the array of counters for the global predictor
- choiceCtrs = new SatCounter[choicePredictorSize];
++ globalCtrs.resize(globalPredictorSize);
+
+ for (int i = 0; i < globalPredictorSize; ++i)
+ globalCtrs[i].setBits(globalCtrBits);
+
+ //Clear the global history
+ globalHistory = 0;
+ // Setup the global history mask
+ globalHistoryMask = (1 << globalHistoryBits) - 1;
+
+ //Setup the array of counters for the choice predictor
- }
- else {
++ choiceCtrs.resize(choicePredictorSize);
+
+ for (int i = 0; i < choicePredictorSize; ++i)
+ choiceCtrs[i].setBits(choiceCtrBits);
+
+ threshold = (1 << (localCtrBits - 1)) - 1;
+ threshold = threshold / 2;
+}
+
+inline
+unsigned
+TournamentBP::calcLocHistIdx(Addr &branch_addr)
+{
+ return (branch_addr >> instShiftAmt) & (localHistoryTableSize - 1);
+}
+
+inline
+void
+TournamentBP::updateHistoriesTaken(unsigned local_history_idx)
+{
+ globalHistory = (globalHistory << 1) | 1;
+ globalHistory = globalHistory & globalHistoryMask;
+
+ localHistoryTable[local_history_idx] =
+ (localHistoryTable[local_history_idx] << 1) | 1;
+}
+
+inline
+void
+TournamentBP::updateHistoriesNotTaken(unsigned local_history_idx)
+{
+ globalHistory = (globalHistory << 1);
+ globalHistory = globalHistory & globalHistoryMask;
+
+ localHistoryTable[local_history_idx] =
+ (localHistoryTable[local_history_idx] << 1);
+}
+
+bool
+TournamentBP::lookup(Addr &branch_addr)
+{
+ uint8_t local_prediction;
+ unsigned local_history_idx;
+ unsigned local_predictor_idx;
+
+ uint8_t global_prediction;
+ uint8_t choice_prediction;
+
+ //Lookup in the local predictor to get its branch prediction
+ local_history_idx = calcLocHistIdx(branch_addr);
+ local_predictor_idx = localHistoryTable[local_history_idx]
+ & localHistoryMask;
+ local_prediction = localCtrs[local_predictor_idx].read();
+
+ //Lookup in the global predictor to get its branch prediction
+ global_prediction = globalCtrs[globalHistory].read();
+
+ //Lookup in the choice predictor to see which one to use
+ choice_prediction = choiceCtrs[globalHistory].read();
+
+ //@todo Put a threshold value in for the three predictors that can
+ // be set through the constructor (so this isn't hard coded).
+ //Also should put some of this code into functions.
+ if (choice_prediction > threshold) {
+ if (global_prediction > threshold) {
+ updateHistoriesTaken(local_history_idx);
+
+ assert(globalHistory < globalPredictorSize &&
+ local_history_idx < localPredictorSize);
+
+ globalCtrs[globalHistory].increment();
+ localCtrs[local_history_idx].increment();
+
+ return true;
+ } else {
+ updateHistoriesNotTaken(local_history_idx);
+
+ assert(globalHistory < globalPredictorSize &&
+ local_history_idx < localPredictorSize);
+
+ globalCtrs[globalHistory].decrement();
+ localCtrs[local_history_idx].decrement();
+
+ return false;
+ }
+ } else {
+ if (local_prediction > threshold) {
+ updateHistoriesTaken(local_history_idx);
+
+ assert(globalHistory < globalPredictorSize &&
+ local_history_idx < localPredictorSize);
+
+ globalCtrs[globalHistory].increment();
+ localCtrs[local_history_idx].increment();
+
+ return true;
+ } else {
+ updateHistoriesNotTaken(local_history_idx);
+
+ assert(globalHistory < globalPredictorSize &&
+ local_history_idx < localPredictorSize);
+
+ globalCtrs[globalHistory].decrement();
+ localCtrs[local_history_idx].decrement();
+
+ return false;
+ }
+ }
+}
+
+// Update the branch predictor if it predicted a branch wrong.
+void
+TournamentBP::update(Addr &branch_addr, unsigned correct_gh, bool taken)
+{
+
+ uint8_t local_prediction;
+ unsigned local_history_idx;
+ unsigned local_predictor_idx;
+ bool local_pred_taken;
+
+ uint8_t global_prediction;
+ bool global_pred_taken;
+
+ // Load the correct global history into the register.
+ globalHistory = correct_gh;
+
+ // Get the local predictor's current prediction, remove the incorrect
+ // update, and update the local predictor
+ local_history_idx = calcLocHistIdx(branch_addr);
+ local_predictor_idx = localHistoryTable[local_history_idx];
+ local_predictor_idx = (local_predictor_idx >> 1) & localHistoryMask;
+
+ local_prediction = localCtrs[local_predictor_idx].read();
+ local_pred_taken = local_prediction > threshold;
+
+ //Get the global predictor's current prediction, and update the
+ //global predictor
+ global_prediction = globalCtrs[globalHistory].read();
+ global_pred_taken = global_prediction > threshold;
+
+ //Update the choice predictor to tell it which one was correct
+ if (local_pred_taken != global_pred_taken) {
+ //If the local prediction matches the actual outcome, decerement
+ //the counter. Otherwise increment the counter.
+ if (local_pred_taken == taken) {
+ choiceCtrs[globalHistory].decrement();
+ } else {
+ choiceCtrs[globalHistory].increment();
+ }
+ }
+
+ if (taken) {
+ assert(globalHistory < globalPredictorSize &&
+ local_predictor_idx < localPredictorSize);
+
+ localCtrs[local_predictor_idx].increment();
+ globalCtrs[globalHistory].increment();
+
+ globalHistory = (globalHistory << 1) | 1;
+ globalHistory = globalHistory & globalHistoryMask;
+
+ localHistoryTable[local_history_idx] |= 1;
++ } else {
+ assert(globalHistory < globalPredictorSize &&
+ local_predictor_idx < localPredictorSize);
+
+ localCtrs[local_predictor_idx].decrement();
+ globalCtrs[globalHistory].decrement();
+
+ globalHistory = (globalHistory << 1);
+ globalHistory = globalHistory & globalHistoryMask;
+
+ localHistoryTable[local_history_idx] &= ~1;
+ }
+}
--- /dev/null
- #ifndef __CPU_O3_CPU_TOURNAMENT_PRED_HH__
- #define __CPU_O3_CPU_TOURNAMENT_PRED_HH__
+/*
+ * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
- TournamentBP(unsigned local_predictor_size,
- unsigned local_ctr_bits,
- unsigned local_history_table_size,
- unsigned local_history_bits,
- unsigned global_predictor_size,
- unsigned global_history_bits,
- unsigned global_ctr_bits,
- unsigned choice_predictor_size,
- unsigned choice_ctr_bits,
++#ifndef __CPU_O3_TOURNAMENT_PRED_HH__
++#define __CPU_O3_TOURNAMENT_PRED_HH__
+
+// For Addr type.
+#include "arch/isa_traits.hh"
+#include "cpu/o3/sat_counter.hh"
++#include <vector>
+
+class TournamentBP
+{
+ public:
+ /**
+ * Default branch predictor constructor.
+ */
- SatCounter *localCtrs;
++ TournamentBP(unsigned localPredictorSize,
++ unsigned localCtrBits,
++ unsigned localHistoryTableSize,
++ unsigned localHistoryBits,
++ unsigned globalPredictorSize,
++ unsigned globalHistoryBits,
++ unsigned globalCtrBits,
++ unsigned choicePredictorSize,
++ unsigned choiceCtrBits,
+ unsigned instShiftAmt);
+
+ /**
+ * Looks up the given address in the branch predictor and returns
+ * a true/false value as to whether it is taken.
+ * @param branch_addr The address of the branch to look up.
+ * @return Whether or not the branch is taken.
+ */
+ bool lookup(Addr &branch_addr);
+
+ /**
+ * Updates the branch predictor with the actual result of a branch.
+ * @param branch_addr The address of the branch to update.
+ * @param taken Whether or not the branch was taken.
+ */
+ void update(Addr &branch_addr, unsigned global_history, bool taken);
+
+ inline unsigned readGlobalHist() { return globalHistory; }
+
+ private:
+
+ inline bool getPrediction(uint8_t &count);
+
+ inline unsigned calcLocHistIdx(Addr &branch_addr);
+
+ inline void updateHistoriesTaken(unsigned local_history_idx);
+
+ inline void updateHistoriesNotTaken(unsigned local_history_idx);
+
+ /** Local counters. */
- unsigned *localHistoryTable;
++ std::vector<SatCounter> localCtrs;
+
+ /** Size of the local predictor. */
+ unsigned localPredictorSize;
+
+ /** Number of bits of the local predictor's counters. */
+ unsigned localCtrBits;
+
+ /** Array of local history table entries. */
- SatCounter *globalCtrs;
++ std::vector<unsigned> localHistoryTable;
+
+ /** Size of the local history table. */
+ unsigned localHistoryTableSize;
+
+ /** Number of bits for each entry of the local history table.
+ * @todo Doesn't this come from the size of the local predictor?
+ */
+ unsigned localHistoryBits;
+
+ /** Mask to get the proper local history. */
+ unsigned localHistoryMask;
+
+
+ /** Array of counters that make up the global predictor. */
- SatCounter *choiceCtrs;
++ std::vector<SatCounter> globalCtrs;
+
+ /** Size of the global predictor. */
+ unsigned globalPredictorSize;
+
+ /** Number of bits of the global predictor's counters. */
+ unsigned globalCtrBits;
+
+ /** Global history register. */
+ unsigned globalHistory;
+
+ /** Number of bits for the global history. */
+ unsigned globalHistoryBits;
+
+ /** Mask to get the proper global history. */
+ unsigned globalHistoryMask;
+
+
+ /** Array of counters that make up the choice predictor. */
- #endif // __CPU_O3_CPU_TOURNAMENT_PRED_HH__
++ std::vector<SatCounter> choiceCtrs;
+
+ /** Size of the choice predictor (identical to the global predictor). */
+ unsigned choicePredictorSize;
+
+ /** Number of bits of the choice predictor's counters. */
+ unsigned choiceCtrBits;
+
+ /** Number of bits to shift the instruction over to get rid of the word
+ * offset.
+ */
+ unsigned instShiftAmt;
+
+ /** Threshold for the counter value; above the threshold is taken,
+ * equal to or below the threshold is not taken.
+ */
+ unsigned threshold;
+};
+
++#endif // __CPU_O3_TOURNAMENT_PRED_HH__
--- /dev/null
- #include "cpu/ooo_cpu/ooo_cpu_impl.hh"
- #include "cpu/ooo_cpu/ooo_dyn_inst.hh"
- #include "cpu/ooo_cpu/ooo_impl.hh"
+/*
+ * Copyright (c) 2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
- template class OoOCPU<OoOImpl>;
++#include "cpu/ozone/cpu_impl.hh"
++#include "cpu/ozone/ozone_impl.hh"
++#include "cpu/ozone/simple_impl.hh"
+
++template class OzoneCPU<SimpleImpl>;
++template class OzoneCPU<OzoneImpl>;
--- /dev/null
- #ifndef __CPU_OOO_CPU_OOO_CPU_HH__
- #define __CPU_OOO_CPU_OOO_CPU_HH__
+/*
+ * Copyright (c) 2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
- #include "encumbered/cpu/full/fu_pool.hh"
- #include "cpu/ooo_cpu/ea_list.hh"
++#ifndef __CPU_OZONE_CPU_HH__
++#define __CPU_OZONE_CPU_HH__
++
++#include <set>
+
+#include "base/statistics.hh"
++#include "base/timebuf.hh"
+#include "config/full_system.hh"
+#include "cpu/base.hh"
+#include "cpu/exec_context.hh"
- class Processor;
++#include "cpu/inst_seq.hh"
++#include "cpu/ozone/rename_table.hh"
++#include "cpu/ozone/thread_state.hh"
+#include "cpu/pc_event.hh"
+#include "cpu/static_inst.hh"
+#include "mem/mem_interface.hh"
+#include "sim/eventq.hh"
+
+// forward declarations
+#if FULL_SYSTEM
- class OoOCPU : public BaseCPU
++#include "arch/alpha/tlb.hh"
++
+class AlphaITB;
+class AlphaDTB;
+class PhysicalMemory;
++class MemoryController;
+
++class Sampler;
+class RemoteGDB;
+class GDBListener;
+
++namespace Kernel {
++ class Statistics;
++};
++
+#else
+
+class Process;
+
+#endif // FULL_SYSTEM
+
+class Checkpoint;
++class EndQuiesceEvent;
+class MemInterface;
+
+namespace Trace {
+ class InstRecord;
+}
+
++template <class>
++class Checker;
++
+/**
+ * Declaration of Out-of-Order CPU class. Basically it is a SimpleCPU with
+ * simple out-of-order capabilities added to it. It is still a 1 CPI machine
+ * (?), but is capable of handling cache misses. Basically it models having
+ * a ROB/IQ by only allowing a certain amount of instructions to execute while
+ * the cache miss is outstanding.
+ */
+
+template <class Impl>
- OoOCPU *cpu;
++class OzoneCPU : public BaseCPU
+{
+ private:
++ typedef typename Impl::FrontEnd FrontEnd;
++ typedef typename Impl::BackEnd BackEnd;
+ typedef typename Impl::DynInst DynInst;
+ typedef typename Impl::DynInstPtr DynInstPtr;
+
++ typedef TheISA::MiscReg MiscReg;
++
++ public:
++ class OzoneXC : public ExecContext {
++ public:
++ OzoneCPU<Impl> *cpu;
++
++ OzoneThreadState<Impl> *thread;
++
++ BaseCPU *getCpuPtr();
++
++ void setCpuId(int id);
++
++ int readCpuId() { return thread->cpuId; }
++
++ FunctionalMemory *getMemPtr() { return thread->mem; }
++
++#if FULL_SYSTEM
++ System *getSystemPtr() { return cpu->system; }
++
++ PhysicalMemory *getPhysMemPtr() { return cpu->physmem; }
++
++ AlphaITB *getITBPtr() { return cpu->itb; }
++
++ AlphaDTB * getDTBPtr() { return cpu->dtb; }
++
++ Kernel::Statistics *getKernelStats() { return thread->kernelStats; }
++#else
++ Process *getProcessPtr() { return thread->process; }
++#endif
++
++ Status status() const { return thread->_status; }
++
++ void setStatus(Status new_status);
++
++ /// Set the status to Active. Optional delay indicates number of
++ /// cycles to wait before beginning execution.
++ void activate(int delay = 1);
++
++ /// Set the status to Suspended.
++ void suspend();
++
++ /// Set the status to Unallocated.
++ void deallocate();
++
++ /// Set the status to Halted.
++ void halt();
++
++#if FULL_SYSTEM
++ void dumpFuncProfile();
++#endif
++
++ void takeOverFrom(ExecContext *old_context);
++
++ void regStats(const std::string &name);
++
++ void serialize(std::ostream &os);
++ void unserialize(Checkpoint *cp, const std::string §ion);
++
++#if FULL_SYSTEM
++ EndQuiesceEvent *getQuiesceEvent();
++
++ Tick readLastActivate();
++ Tick readLastSuspend();
++
++ void profileClear();
++ void profileSample();
++#endif
++
++ int getThreadNum();
++
++ // Also somewhat obnoxious. Really only used for the TLB fault.
++ TheISA::MachInst getInst();
++
++ void copyArchRegs(ExecContext *xc);
++
++ void clearArchRegs();
++
++ uint64_t readIntReg(int reg_idx);
++
++ float readFloatRegSingle(int reg_idx);
++
++ double readFloatRegDouble(int reg_idx);
++
++ uint64_t readFloatRegInt(int reg_idx);
++
++ void setIntReg(int reg_idx, uint64_t val);
++
++ void setFloatRegSingle(int reg_idx, float val);
++
++ void setFloatRegDouble(int reg_idx, double val);
++
++ void setFloatRegInt(int reg_idx, uint64_t val);
++
++ uint64_t readPC() { return thread->PC; }
++ void setPC(Addr val);
++
++ uint64_t readNextPC() { return thread->nextPC; }
++ void setNextPC(Addr val);
++
++ public:
++ // ISA stuff:
++ MiscReg readMiscReg(int misc_reg);
++
++ MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault);
++
++ Fault setMiscReg(int misc_reg, const MiscReg &val);
++
++ Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val);
++
++ unsigned readStCondFailures()
++ { return thread->storeCondFailures; }
++
++ void setStCondFailures(unsigned sc_failures)
++ { thread->storeCondFailures = sc_failures; }
++
++#if FULL_SYSTEM
++ bool inPalMode() { return cpu->inPalMode(); }
++#endif
++
++ bool misspeculating() { return false; }
++
++#if !FULL_SYSTEM
++ TheISA::IntReg getSyscallArg(int i)
++ { return thread->renameTable[TheISA::ArgumentReg0 + i]->readIntResult(); }
++
++ // used to shift args for indirect syscall
++ void setSyscallArg(int i, TheISA::IntReg val)
++ { thread->renameTable[TheISA::ArgumentReg0 + i]->setIntResult(i); }
++
++ void setSyscallReturn(SyscallReturn return_value)
++ { cpu->setSyscallReturn(return_value, thread->tid); }
++
++ Counter readFuncExeInst() { return thread->funcExeInst; }
++
++ void setFuncExeInst(Counter new_val)
++ { thread->funcExeInst = new_val; }
++#endif
++ };
++
++ // execution context proxy
++ OzoneXC ozoneXC;
++ ExecContext *xcProxy;
++ ExecContext *checkerXC;
++
++ typedef OzoneThreadState<Impl> ImplState;
++
++ private:
++ OzoneThreadState<Impl> thread;
++
+ public:
+ // main simulation loop (one cycle)
+ void tick();
+
++ std::set<InstSeqNum> snList;
++ std::set<Addr> lockAddrList;
+ private:
+ struct TickEvent : public Event
+ {
- TickEvent(OoOCPU *c, int w);
++ OzoneCPU *cpu;
+ int width;
+
- tickEvent.reschedule(curTick + delay);
++ TickEvent(OzoneCPU *c, int w);
+ void process();
+ const char *description();
+ };
+
+ TickEvent tickEvent;
+
+ /// Schedule tick event, regardless of its current state.
+ void scheduleTickEvent(int delay)
+ {
+ if (tickEvent.squashed())
- tickEvent.schedule(curTick + delay);
++ tickEvent.reschedule(curTick + cycles(delay));
+ else if (!tickEvent.scheduled())
- //
++ tickEvent.schedule(curTick + cycles(delay));
+ }
+
+ /// Unschedule tick event, regardless of its current state.
+ void unscheduleTickEvent()
+ {
+ if (tickEvent.scheduled())
+ tickEvent.squash();
+ }
+
+ private:
+ Trace::InstRecord *traceData;
+
+ template<typename T>
+ void trace_data(T data);
+
+ public:
- IcacheMiss,
- IcacheMissComplete,
- DcacheMissStall,
+ enum Status {
+ Running,
+ Idle,
- private:
+ SwitchedOut
+ };
+
- struct Params : public BaseCPU::Params
- {
- MemInterface *icache_interface;
- MemInterface *dcache_interface;
- int width;
- #if FULL_SYSTEM
- AlphaITB *itb;
- AlphaDTB *dtb;
- FunctionalMemory *mem;
- #else
- Process *process;
- #endif
- int issueWidth;
- };
+ Status _status;
+
+ public:
++ bool checkInterrupts;
++
+ void post_interrupt(int int_num, int index);
+
+ void zero_fill_64(Addr addr) {
+ static int warned = 0;
+ if (!warned) {
+ warn ("WH64 is not implemented");
+ warned = 1;
+ }
+ };
+
- OoOCPU(Params *params);
++ typedef typename Impl::Params Params;
+
- virtual ~OoOCPU();
++ OzoneCPU(Params *params);
+
- private:
- void copyFromXC();
-
++ virtual ~OzoneCPU();
+
+ void init();
+
- // execution context
- ExecContext *xc;
+ public:
- void switchOut();
++ BaseCPU *getCpuPtr() { return this; }
++
++ void setCpuId(int id) { cpuId = id; }
++
++ int readCpuId() { return cpuId; }
++
++ int cpuId;
+
- FuncUnitPool *fuPool;
++ void switchOut(Sampler *sampler);
++ void signalSwitched();
+ void takeOverFrom(BaseCPU *oldCPU);
+
++ Sampler *sampler;
++
++ int switchCount;
++
+#if FULL_SYSTEM
+ Addr dbg_vtophys(Addr addr);
+
+ bool interval_stats;
++
++ AlphaITB *itb;
++ AlphaDTB *dtb;
++ System *system;
++
++ // the following two fields are redundant, since we can always
++ // look them up through the system pointer, but we'll leave them
++ // here for now for convenience
++ MemoryController *memctrl;
++ PhysicalMemory *physmem;
+#endif
+
+ // L1 instruction cache
+ MemInterface *icacheInterface;
+
+ // L1 data cache
+ MemInterface *dcacheInterface;
+
- // Refcounted pointer to the one memory request.
- MemReqPtr cacheMemReq;
-
- class ICacheCompletionEvent : public Event
- {
- private:
- OoOCPU *cpu;
-
- public:
- ICacheCompletionEvent(OoOCPU *_cpu);
-
- virtual void process();
- virtual const char *description();
- };
-
- // Will need to create a cache completion event upon any memory miss.
- ICacheCompletionEvent iCacheCompletionEvent;
-
- class DCacheCompletionEvent;
-
- typedef typename
- std::list<DCacheCompletionEvent>::iterator DCacheCompEventIt;
-
- class DCacheCompletionEvent : public Event
- {
- private:
- OoOCPU *cpu;
- DynInstPtr inst;
- DCacheCompEventIt dcceIt;
-
- public:
- DCacheCompletionEvent(OoOCPU *_cpu, DynInstPtr &_inst,
- DCacheCompEventIt &_dcceIt);
-
- virtual void process();
- virtual const char *description();
- };
-
- friend class DCacheCompletionEvent;
-
- protected:
- std::list<DCacheCompletionEvent> dCacheCompList;
- DCacheCompEventIt dcceIt;
++ /** Pointer to memory. */
++ FunctionalMemory *mem;
+
- Stats::Scalar<> numInsts;
++ FrontEnd *frontEnd;
+
++ BackEnd *backEnd;
+ private:
+ Status status() const { return _status; }
++ void setStatus(Status new_status) { _status = new_status; }
+
+ virtual void activateContext(int thread_num, int delay);
+ virtual void suspendContext(int thread_num);
+ virtual void deallocateContext(int thread_num);
+ virtual void haltContext(int thread_num);
+
+ // statistics
+ virtual void regStats();
+ virtual void resetStats();
+
+ // number of simulated instructions
++ public:
+ Counter numInst;
+ Counter startNumInst;
- // number of simulated memory references
- Stats::Scalar<> numMemRefs;
-
+
+ virtual Counter totalInstructions() const
+ {
+ return numInst - startNumInst;
+ }
+
-
- // number of cycles stalled for I-cache misses
- Stats::Scalar<> icacheStallCycles;
- Counter lastIcacheStall;
-
- // number of cycles stalled for D-cache misses
- Stats::Scalar<> dcacheStallCycles;
- Counter lastDcacheStall;
-
- void processICacheCompletion();
-
++ private:
+ // number of simulated loads
+ Counter numLoad;
+ Counter startNumLoad;
+
+ // number of idle cycles
+ Stats::Average<> notIdleFraction;
+ Stats::Formula idleFraction;
- int getInstAsid() { return xc->regs.instAsid(); }
- int getDataAsid() { return xc->regs.dataAsid(); }
+ public:
+
+ virtual void serialize(std::ostream &os);
+ virtual void unserialize(Checkpoint *cp, const std::string §ion);
+
++
+#if FULL_SYSTEM
+ bool validInstAddr(Addr addr) { return true; }
+ bool validDataAddr(Addr addr) { return true; }
- { return xc->validInstAddr(addr); }
+
+ Fault translateInstReq(MemReqPtr &req)
+ {
+ return itb->translate(req);
+ }
+
+ Fault translateDataReadReq(MemReqPtr &req)
+ {
+ return dtb->translate(req, false);
+ }
+
+ Fault translateDataWriteReq(MemReqPtr &req)
+ {
+ return dtb->translate(req, true);
+ }
+
+#else
+ bool validInstAddr(Addr addr)
- { return xc->validDataAddr(addr); }
++ { return true; }
+
+ bool validDataAddr(Addr addr)
- int getInstAsid() { return xc->asid; }
- int getDataAsid() { return xc->asid; }
++ { return true; }
+
-
++ int getInstAsid() { return thread.asid; }
++ int getDataAsid() { return thread.asid; }
+
+ Fault dummyTranslation(MemReqPtr &req)
+ {
+#if 0
+ assert((req->vaddr >> 48 & 0xffff) == 0);
+#endif
+
+ // put the asid in the upper 16 bits of the paddr
+ req->paddr = req->vaddr & ~((Addr)0xffff << sizeof(Addr) * 8 - 16);
+ req->paddr = req->paddr | (Addr)req->asid << sizeof(Addr) * 8 - 16;
+ return NoFault;
+ }
++
++ /** Translates instruction requestion in syscall emulation mode. */
+ Fault translateInstReq(MemReqPtr &req)
+ {
+ return dummyTranslation(req);
+ }
++
++ /** Translates data read request in syscall emulation mode. */
+ Fault translateDataReadReq(MemReqPtr &req)
+ {
+ return dummyTranslation(req);
+ }
++
++ /** Translates data write request in syscall emulation mode. */
+ Fault translateDataWriteReq(MemReqPtr &req)
+ {
+ return dummyTranslation(req);
+ }
- Fault read(Addr addr, T &data, unsigned flags, DynInstPtr inst);
-
- template <class T>
- Fault write(T data, Addr addr, unsigned flags,
- uint64_t *res, DynInstPtr inst);
-
- void prefetch(Addr addr, unsigned flags)
+#endif
+
++ /** Old CPU read from memory function. No longer used. */
+ template <class T>
- // need to do this...
- }
-
- void writeHint(Addr addr, int size, unsigned flags)
- {
- // need to do this...
- }
-
- Fault copySrcTranslate(Addr src);
-
- Fault copy(Addr dest);
-
- private:
- bool executeInst(DynInstPtr &inst);
-
- void renameInst(DynInstPtr &inst);
-
- void addInst(DynInstPtr &inst);
-
- void commitHeadInst();
-
- bool getOneInst();
-
- Fault fetchCacheLine();
-
- InstSeqNum getAndIncrementInstSeq();
-
- bool ambigMemAddr;
-
- private:
- InstSeqNum globalSeqNum;
-
- DynInstPtr renameTable[TheISA::TotalNumRegs];
- DynInstPtr commitTable[TheISA::TotalNumRegs];
-
- // Might need a table of the shadow registers as well.
- #if FULL_SYSTEM
- DynInstPtr palShadowTable[TheISA::NumIntRegs];
++ Fault read(MemReqPtr &req, T &data)
+ {
- public:
- // The register accessor methods provide the index of the
- // instruction's operand (e.g., 0 or 1), not the architectural
- // register index, to simplify the implementation of register
- // renaming. We find the architectural register index by indexing
- // into the instruction's own operand index table. Note that a
- // raw pointer to the StaticInst is provided instead of a
- // ref-counted StaticInstPtr to redice overhead. This is fine as
- // long as these methods don't copy the pointer into any long-term
- // storage (which is pretty hard to imagine they would have reason
- // to do).
-
- // In the OoO case these shouldn't read from the XC but rather from the
- // rename table of DynInsts. Also these likely shouldn't be called very
- // often, other than when adding things into the xc during say a syscall.
-
- uint64_t readIntReg(StaticInst *si, int idx)
- {
- return xc->readIntReg(si->srcRegIdx(idx));
++#if 0
++#if FULL_SYSTEM && defined(TARGET_ALPHA)
++ if (req->flags & LOCKED) {
++ req->xc->setMiscReg(TheISA::Lock_Addr_DepTag, req->paddr);
++ req->xc->setMiscReg(TheISA::Lock_Flag_DepTag, true);
++ }
++#endif
+#endif
++ Fault error;
++ if (req->flags & LOCKED) {
++ lockAddrList.insert(req->paddr);
++ lockFlag = true;
++ }
+
- FloatReg readFloatReg(StaticInst *si, int idx, width)
- {
- int reg_idx = si->srcRegIdx(idx) - TheISA::FP_Base_DepTag;
- return xc->readFloatReg(reg_idx, width);
- }
++ error = this->mem->read(req, data);
++ data = gtoh(data);
++ return error;
+ }
+
- FloatReg readFloatReg(StaticInst *si, int idx)
+
- int reg_idx = si->srcRegIdx(idx) - TheISA::FP_Base_DepTag;
- return xc->readFloatReg(reg_idx);
++ /** CPU read function, forwards read to LSQ. */
++ template <class T>
++ Fault read(MemReqPtr &req, T &data, int load_idx)
+ {
- FloatRegBits readFloatRegBits(StaticInst *si, int idx, int width)
++ return backEnd->read(req, data, load_idx);
+ }
+
- int reg_idx = si->srcRegIdx(idx) - TheISA::FP_Base_DepTag;
- return xc->readFloatRegBits(reg_idx, width);
- }
++ /** Old CPU write to memory function. No longer used. */
++ template <class T>
++ Fault write(MemReqPtr &req, T &data)
+ {
- FloatRegBits readFloatRegBits(StaticInst *si, int idx)
- {
- int reg_idx = si->srcRegIdx(idx) - TheISA::FP_Base_DepTag;
- return xc->readFloatRegBits(reg_idx);
- }
++#if 0
++#if FULL_SYSTEM && defined(TARGET_ALPHA)
++ ExecContext *xc;
++
++ // If this is a store conditional, act appropriately
++ if (req->flags & LOCKED) {
++ xc = req->xc;
++
++ if (req->flags & UNCACHEABLE) {
++ // Don't update result register (see stq_c in isa_desc)
++ req->result = 2;
++ xc->setStCondFailures(0);//Needed? [RGD]
++ } else {
++ bool lock_flag = xc->readMiscReg(TheISA::Lock_Flag_DepTag);
++ Addr lock_addr = xc->readMiscReg(TheISA::Lock_Addr_DepTag);
++ req->result = lock_flag;
++ if (!lock_flag ||
++ ((lock_addr & ~0xf) != (req->paddr & ~0xf))) {
++ xc->setMiscReg(TheISA::Lock_Flag_DepTag, false);
++ xc->setStCondFailures(xc->readStCondFailures() + 1);
++ if (((xc->readStCondFailures()) % 100000) == 0) {
++ std::cerr << "Warning: "
++ << xc->readStCondFailures()
++ << " consecutive store conditional failures "
++ << "on cpu " << req->xc->readCpuId()
++ << std::endl;
++ }
++ return NoFault;
++ }
++ else xc->setStCondFailures(0);
++ }
++ }
+
- void setIntReg(StaticInst *si, int idx, uint64_t val)
- {
- xc->setIntReg(si->destRegIdx(idx), val);
- }
++ // Need to clear any locked flags on other proccessors for
++ // this address. Only do this for succsful Store Conditionals
++ // and all other stores (WH64?). Unsuccessful Store
++ // Conditionals would have returned above, and wouldn't fall
++ // through.
++ for (int i = 0; i < this->system->execContexts.size(); i++){
++ xc = this->system->execContexts[i];
++ if ((xc->readMiscReg(TheISA::Lock_Addr_DepTag) & ~0xf) ==
++ (req->paddr & ~0xf)) {
++ xc->setMiscReg(TheISA::Lock_Flag_DepTag, false);
++ }
++ }
+
- void setFloatReg(StaticInst *si, int idx, FloatReg val, int width)
- {
- int reg_idx = si->destRegIdx(idx) - TheISA::FP_Base_DepTag;
- xc->setFloatReg(reg_idx, val, width);
- }
++#endif
++#endif
+
- void setFloatReg(StaticInst *si, int idx, FloatReg val)
- {
- int reg_idx = si->destRegIdx(idx) - TheISA::FP_Base_DepTag;
- xc->setFloatReg(reg_idx, val);
++ if (req->flags & LOCKED) {
++ if (req->flags & UNCACHEABLE) {
++ req->result = 2;
++ } else {
++ if (this->lockFlag) {
++ if (lockAddrList.find(req->paddr) !=
++ lockAddrList.end()) {
++ req->result = 1;
++ } else {
++ req->result = 0;
++ return NoFault;
++ }
++ } else {
++ req->result = 0;
++ return NoFault;
++ }
++ }
++ }
+
- void setFloatRegBits(StaticInst *si, int idx, FloatRegBits val, int width)
++ return this->mem->write(req, (T)htog(data));
+ }
+
- int reg_idx = si->destRegIdx(idx) - TheISA::FP_Base_DepTag;
- xc->setFloatRegBits(reg_idx, val, width);
++ /** CPU write function, forwards write to LSQ. */
++ template <class T>
++ Fault write(MemReqPtr &req, T &data, int store_idx)
+ {
- void setFloatRegBits(StaticInst *si, int idx, FloatRegBits val)
++ return backEnd->write(req, data, store_idx);
+ }
+
- int reg_idx = si->destRegIdx(idx) - TheISA::FP_Base_DepTag;
- xc->setFloatRegBits(reg_idx, val);
++ void prefetch(Addr addr, unsigned flags)
+ {
- uint64_t readPC() { return PC; }
- void setNextPC(Addr val) { nextPC = val; }
-
- private:
- Addr PC;
- Addr nextPC;
-
- unsigned issueWidth;
-
- bool fetchRedirExcp;
- bool fetchRedirBranch;
-
- /** Mask to get a cache block's address. */
- Addr cacheBlkMask;
-
- unsigned cacheBlkSize;
-
- Addr cacheBlkPC;
-
- /** The cache line being fetched. */
- uint8_t *cacheData;
-
- protected:
- bool cacheBlkValid;
-
- private:
-
- // Align an address (typically a PC) to the start of an I-cache block.
- // We fold in the PISA 64- to 32-bit conversion here as well.
- Addr icacheBlockAlignPC(Addr addr)
++ // need to do this...
+ }
+
- addr = TheISA::realPCToFetchPC(addr);
- return (addr & ~(cacheBlkMask));
++ void writeHint(Addr addr, int size, unsigned flags)
+ {
- unsigned instSize;
++ // need to do this...
+ }
+
- // ROB tracking stuff.
- DynInstPtr robHeadPtr;
- DynInstPtr robTailPtr;
- unsigned robSize;
- unsigned robInsts;
++ Fault copySrcTranslate(Addr src);
+
- // List of outstanding EA instructions.
- protected:
- EAList eaList;
++ Fault copy(Addr dest);
+
- void branchToTarget(Addr val)
- {
- if (!fetchRedirExcp) {
- fetchRedirBranch = true;
- PC = val;
- }
- }
-
- // ISA stuff:
- uint64_t readUniq() { return xc->readUniq(); }
- void setUniq(uint64_t val) { xc->setUniq(val); }
++ InstSeqNum globalSeqNum;
+
+ public:
- uint64_t readFpcr() { return xc->readFpcr(); }
- void setFpcr(uint64_t val) { xc->setFpcr(val); }
++ void squashFromXC();
+
- uint64_t readIpr(int idx, Fault &fault) { return xc->readIpr(idx, fault); }
- Fault setIpr(int idx, uint64_t val) { return xc->setIpr(idx, val); }
- Fault hwrei() { return xc->hwrei(); }
- int readIntrFlag() { return xc->readIntrFlag(); }
- void setIntrFlag(int val) { xc->setIntrFlag(val); }
- bool inPalMode() { return xc->inPalMode(); }
- void trap(Fault fault) { fault->invoke(xc); }
- bool simPalCheck(int palFunc) { return xc->simPalCheck(palFunc); }
++ // @todo: This can be a useful debug function. Implement it.
++ void dumpInsts() { frontEnd->dumpInsts(); }
+
+#if FULL_SYSTEM
- void syscall() { xc->syscall(); }
- #endif
-
- ExecContext *xcBase() { return xc; }
- };
-
-
- // precise architected memory state accessor macros
- template <class Impl>
- template <class T>
- Fault
- OoOCPU<Impl>::read(Addr addr, T &data, unsigned flags, DynInstPtr inst)
- {
- MemReqPtr readReq = new MemReq();
- readReq->xc = xc;
- readReq->asid = 0;
- readReq->data = new uint8_t[64];
-
- readReq->reset(addr, sizeof(T), flags);
-
- // translate to physical address - This might be an ISA impl call
- Fault fault = translateDataReadReq(readReq);
-
- // do functional access
- if (fault == NoFault)
- fault = xc->mem->read(readReq, data);
- #if 0
- if (traceData) {
- traceData->setAddr(addr);
- if (fault == NoFault)
- traceData->setData(data);
- }
++ Fault hwrei();
++ int readIntrFlag() { return thread.regs.intrflag; }
++ void setIntrFlag(int val) { thread.regs.intrflag = val; }
++ bool inPalMode() { return AlphaISA::PcPAL(thread.PC); }
++ bool inPalMode(Addr pc) { return AlphaISA::PcPAL(pc); }
++ bool simPalCheck(int palFunc);
++ void processInterrupts();
+#else
- // if we have a cache, do cache access too
- if (fault == NoFault && dcacheInterface) {
- readReq->cmd = Read;
- readReq->completionEvent = NULL;
- readReq->time = curTick;
- /*MemAccessResult result = */dcacheInterface->access(readReq);
++ void syscall();
++ void setSyscallReturn(SyscallReturn return_value, int tid);
+#endif
+
- if (dcacheInterface->doEvents()) {
- readReq->completionEvent = new DCacheCompletionEvent(this, inst,
- dcceIt);
- }
- }
-
- if (!dcacheInterface && (readReq->flags & UNCACHEABLE))
- recordEvent("Uncached Read");
-
- return fault;
- }
-
- template <class Impl>
- template <class T>
- Fault
- OoOCPU<Impl>::write(T data, Addr addr, unsigned flags,
- uint64_t *res, DynInstPtr inst)
- {
- MemReqPtr writeReq = new MemReq();
- writeReq->xc = xc;
- writeReq->asid = 0;
- writeReq->data = new uint8_t[64];
++ ExecContext *xcBase() { return xcProxy; }
+
- #if 0
- if (traceData) {
- traceData->setAddr(addr);
- traceData->setData(data);
- }
- #endif
-
- writeReq->reset(addr, sizeof(T), flags);
-
- // translate to physical address
- Fault fault = translateDataWriteReq(writeReq);
-
- // do functional access
- if (fault == NoFault)
- fault = xc->write(writeReq, data);
-
- if (fault == NoFault && dcacheInterface) {
- writeReq->cmd = Write;
- memcpy(writeReq->data,(uint8_t *)&data,writeReq->size);
- writeReq->completionEvent = NULL;
- writeReq->time = curTick;
- /*MemAccessResult result = */dcacheInterface->access(writeReq);
-
- if (dcacheInterface->doEvents()) {
- writeReq->completionEvent = new DCacheCompletionEvent(this, inst,
- dcceIt);
- }
- }
-
- if (res && (fault == NoFault))
- *res = writeReq->result;
++ bool decoupledFrontEnd;
++ struct CommStruct {
++ InstSeqNum doneSeqNum;
++ InstSeqNum nonSpecSeqNum;
++ bool uncached;
++ unsigned lqIdx;
+
- if (!dcacheInterface && (writeReq->flags & UNCACHEABLE))
- recordEvent("Uncached Write");
++ bool stall;
++ };
++ TimeBuffer<CommStruct> comm;
+
- return fault;
- }
++ bool lockFlag;
+
- #endif // __CPU_OOO_CPU_OOO_CPU_HH__
++ Stats::Scalar<> quiesceCycles;
+
++ Checker<DynInstPtr> *checker;
++};
+
++#endif // __CPU_OZONE_CPU_HH__
--- /dev/null
- * Copyright (c) 2005 The Regents of The University of Michigan
+/*
- #ifndef __CPU_OOO_CPU_OOO_IMPL_HH__
- #define __CPU_OOO_CPU_OOO_IMPL_HH__
++ * Copyright (c) 2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
- #include "arch/isa_traits.hh"
++//#include <cstdio>
++//#include <cstdlib>
+
- class OoOCPU;
++#include "arch/isa_traits.hh" // For MachInst
++#include "base/trace.hh"
++#include "config/full_system.hh"
++#include "cpu/base.hh"
++#include "cpu/checker/exec_context.hh"
++#include "cpu/exec_context.hh"
++#include "cpu/exetrace.hh"
++#include "cpu/ozone/cpu.hh"
++#include "cpu/quiesce_event.hh"
++#include "cpu/static_inst.hh"
++//#include "mem/base_mem.hh"
++#include "mem/mem_interface.hh"
++#include "sim/sim_object.hh"
++#include "sim/stats.hh"
++
++#if FULL_SYSTEM
++#include "arch/faults.hh"
++#include "arch/alpha/osfpal.hh"
++#include "arch/alpha/tlb.hh"
++#include "arch/vtophys.hh"
++#include "base/callback.hh"
++//#include "base/remote_gdb.hh"
++#include "cpu/profile.hh"
++#include "kern/kernel_stats.hh"
++#include "mem/functional/memory_control.hh"
++#include "mem/functional/physical.hh"
++#include "sim/faults.hh"
++#include "sim/sim_events.hh"
++#include "sim/sim_exit.hh"
++#include "sim/system.hh"
++#else // !FULL_SYSTEM
++#include "mem/functional/functional.hh"
++#include "sim/process.hh"
++#endif // FULL_SYSTEM
++
++using namespace TheISA;
++
++template <class Impl>
++template<typename T>
++void
++OzoneCPU<Impl>::trace_data(T data) {
++ if (traceData) {
++ traceData->setData(data);
++ }
++}
++
++template <class Impl>
++OzoneCPU<Impl>::TickEvent::TickEvent(OzoneCPU *c, int w)
++ : Event(&mainEventQueue, CPU_Tick_Pri), cpu(c), width(w)
++{
++}
++
++template <class Impl>
++void
++OzoneCPU<Impl>::TickEvent::process()
++{
++ cpu->tick();
++}
++
++template <class Impl>
++const char *
++OzoneCPU<Impl>::TickEvent::description()
++{
++ return "OzoneCPU tick event";
++}
++
++template <class Impl>
++OzoneCPU<Impl>::OzoneCPU(Params *p)
++#if FULL_SYSTEM
++ : BaseCPU(p), thread(this, 0, p->mem), tickEvent(this, p->width),
++ mem(p->mem),
++#else
++ : BaseCPU(p), thread(this, 0, p->workload[0], 0), tickEvent(this, p->width),
++ mem(p->workload[0]->getMemory()),
++#endif
++ comm(5, 5)
++{
++ frontEnd = new FrontEnd(p);
++ backEnd = new BackEnd(p);
++
++ _status = Idle;
++
++ if (p->checker) {
++ BaseCPU *temp_checker = p->checker;
++ checker = dynamic_cast<Checker<DynInstPtr> *>(temp_checker);
++ checker->setMemory(mem);
++#if FULL_SYSTEM
++ checker->setSystem(p->system);
++#endif
++ checkerXC = new CheckerExecContext<OzoneXC>(&ozoneXC, checker);
++ thread.xcProxy = checkerXC;
++ xcProxy = checkerXC;
++ } else {
++ checker = NULL;
++ thread.xcProxy = &ozoneXC;
++ xcProxy = &ozoneXC;
++ }
++
++ ozoneXC.cpu = this;
++ ozoneXC.thread = &thread;
++
++ thread.inSyscall = false;
++
++ thread.setStatus(ExecContext::Suspended);
++#if FULL_SYSTEM
++ /***** All thread state stuff *****/
++ thread.cpu = this;
++ thread.tid = 0;
++ thread.mem = p->mem;
++
++ thread.quiesceEvent = new EndQuiesceEvent(xcProxy);
++
++ system = p->system;
++ itb = p->itb;
++ dtb = p->dtb;
++ memctrl = p->system->memctrl;
++ physmem = p->system->physmem;
++
++ if (p->profile) {
++ thread.profile = new FunctionProfile(p->system->kernelSymtab);
++ // @todo: This might be better as an ExecContext instead of OzoneXC
++ Callback *cb =
++ new MakeCallback<OzoneXC,
++ &OzoneXC::dumpFuncProfile>(&ozoneXC);
++ registerExitCallback(cb);
++ }
++
++ // let's fill with a dummy node for now so we don't get a segfault
++ // on the first cycle when there's no node available.
++ static ProfileNode dummyNode;
++ thread.profileNode = &dummyNode;
++ thread.profilePC = 3;
++#else
++ thread.cpu = this;
++ thread.tid = 0;
++ thread.process = p->workload[0];
++ thread.asid = 0;
++#endif // !FULL_SYSTEM
++
++ numInst = 0;
++ startNumInst = 0;
++
++ execContexts.push_back(xcProxy);
++
++ frontEnd->setCPU(this);
++ backEnd->setCPU(this);
++
++ frontEnd->setXC(xcProxy);
++ backEnd->setXC(xcProxy);
++
++ frontEnd->setThreadState(&thread);
++ backEnd->setThreadState(&thread);
++
++ frontEnd->setCommBuffer(&comm);
++ backEnd->setCommBuffer(&comm);
++
++ frontEnd->setBackEnd(backEnd);
++ backEnd->setFrontEnd(frontEnd);
++
++ decoupledFrontEnd = p->decoupledFrontEnd;
++
++ globalSeqNum = 1;
++
++ checkInterrupts = false;
++
++ for (int i = 0; i < TheISA::TotalNumRegs; ++i) {
++ thread.renameTable[i] = new DynInst(this);
++ thread.renameTable[i]->setResultReady();
++ }
++
++ frontEnd->renameTable.copyFrom(thread.renameTable);
++ backEnd->renameTable.copyFrom(thread.renameTable);
++
++#if !FULL_SYSTEM
++// pTable = p->pTable;
++#endif
++
++ lockFlag = 0;
++
++ DPRINTF(OzoneCPU, "OzoneCPU: Created Ozone cpu object.\n");
++}
++
++template <class Impl>
++OzoneCPU<Impl>::~OzoneCPU()
++{
++}
++
++template <class Impl>
++void
++OzoneCPU<Impl>::switchOut(Sampler *_sampler)
++{
++ sampler = _sampler;
++ switchCount = 0;
++ // Front end needs state from back end, so switch out the back end first.
++ backEnd->switchOut();
++ frontEnd->switchOut();
++}
++
++template <class Impl>
++void
++OzoneCPU<Impl>::signalSwitched()
++{
++ if (++switchCount == 2) {
++ backEnd->doSwitchOut();
++ frontEnd->doSwitchOut();
++ if (checker)
++ checker->switchOut(sampler);
++ _status = SwitchedOut;
++ if (tickEvent.scheduled())
++ tickEvent.squash();
++ sampler->signalSwitched();
++ }
++ assert(switchCount <= 2);
++}
++
++template <class Impl>
++void
++OzoneCPU<Impl>::takeOverFrom(BaseCPU *oldCPU)
++{
++ BaseCPU::takeOverFrom(oldCPU);
++
++ backEnd->takeOverFrom();
++ frontEnd->takeOverFrom();
++ assert(!tickEvent.scheduled());
++
++ // @todo: Fix hardcoded number
++ // Clear out any old information in time buffer.
++ for (int i = 0; i < 6; ++i) {
++ comm.advance();
++ }
++
++ // if any of this CPU's ExecContexts are active, mark the CPU as
++ // running and schedule its tick event.
++ for (int i = 0; i < execContexts.size(); ++i) {
++ ExecContext *xc = execContexts[i];
++ if (xc->status() == ExecContext::Active &&
++ _status != Running) {
++ _status = Running;
++ tickEvent.schedule(curTick);
++ }
++ }
++ // Nothing running, change status to reflect that we're no longer
++ // switched out.
++ if (_status == SwitchedOut) {
++ _status = Idle;
++ }
++}
++
++template <class Impl>
++void
++OzoneCPU<Impl>::activateContext(int thread_num, int delay)
++{
++ // Eventually change this in SMT.
++ assert(thread_num == 0);
++
++ assert(_status == Idle);
++ notIdleFraction++;
++ scheduleTickEvent(delay);
++ _status = Running;
++ thread._status = ExecContext::Active;
++ frontEnd->wakeFromQuiesce();
++}
++
++template <class Impl>
++void
++OzoneCPU<Impl>::suspendContext(int thread_num)
++{
++ // Eventually change this in SMT.
++ assert(thread_num == 0);
++ // @todo: Figure out how to initially set the status properly so
++ // this is running.
++// assert(_status == Running);
++ notIdleFraction--;
++ unscheduleTickEvent();
++ _status = Idle;
++}
++
++template <class Impl>
++void
++OzoneCPU<Impl>::deallocateContext(int thread_num)
++{
++ // for now, these are equivalent
++ suspendContext(thread_num);
++}
+
+template <class Impl>
- class OoODynInst;
++void
++OzoneCPU<Impl>::haltContext(int thread_num)
++{
++ // for now, these are equivalent
++ suspendContext(thread_num);
++}
++
++template <class Impl>
++void
++OzoneCPU<Impl>::regStats()
++{
++ using namespace Stats;
++
++ BaseCPU::regStats();
++
++ thread.numInsts
++ .name(name() + ".num_insts")
++ .desc("Number of instructions executed")
++ ;
++
++ thread.numMemRefs
++ .name(name() + ".num_refs")
++ .desc("Number of memory references")
++ ;
++
++ notIdleFraction
++ .name(name() + ".not_idle_fraction")
++ .desc("Percentage of non-idle cycles")
++ ;
++
++ idleFraction
++ .name(name() + ".idle_fraction")
++ .desc("Percentage of idle cycles")
++ ;
++
++ quiesceCycles
++ .name(name() + ".quiesce_cycles")
++ .desc("Number of cycles spent in quiesce")
++ ;
++
++ idleFraction = constant(1.0) - notIdleFraction;
++
++ frontEnd->regStats();
++ backEnd->regStats();
++}
++
++template <class Impl>
++void
++OzoneCPU<Impl>::resetStats()
++{
++ startNumInst = numInst;
++ notIdleFraction = (_status != Idle);
++}
++
++template <class Impl>
++void
++OzoneCPU<Impl>::init()
++{
++ BaseCPU::init();
++
++ // Mark this as in syscall so it won't need to squash
++ thread.inSyscall = true;
++#if FULL_SYSTEM
++ for (int i = 0; i < execContexts.size(); ++i) {
++ ExecContext *xc = execContexts[i];
++
++ // initialize CPU, including PC
++ TheISA::initCPU(xc, xc->readCpuId());
++ }
++#endif
++ frontEnd->renameTable.copyFrom(thread.renameTable);
++ backEnd->renameTable.copyFrom(thread.renameTable);
++
++ thread.inSyscall = false;
++}
++
++template <class Impl>
++void
++OzoneCPU<Impl>::serialize(std::ostream &os)
++{
++ BaseCPU::serialize(os);
++ SERIALIZE_ENUM(_status);
++ nameOut(os, csprintf("%s.xc", name()));
++ ozoneXC.serialize(os);
++ nameOut(os, csprintf("%s.tickEvent", name()));
++ tickEvent.serialize(os);
++}
++
++template <class Impl>
++void
++OzoneCPU<Impl>::unserialize(Checkpoint *cp, const std::string §ion)
++{
++ BaseCPU::unserialize(cp, section);
++ UNSERIALIZE_ENUM(_status);
++ ozoneXC.unserialize(cp, csprintf("%s.xc", section));
++ tickEvent.unserialize(cp, csprintf("%s.tickEvent", section));
++}
++
++template <class Impl>
++Fault
++OzoneCPU<Impl>::copySrcTranslate(Addr src)
++{
++ panic("Copy not implemented!\n");
++ return NoFault;
++#if 0
++ static bool no_warn = true;
++ int blk_size = (dcacheInterface) ? dcacheInterface->getBlockSize() : 64;
++ // Only support block sizes of 64 atm.
++ assert(blk_size == 64);
++ int offset = src & (blk_size - 1);
++
++ // Make sure block doesn't span page
++ if (no_warn &&
++ (src & TheISA::PageMask) != ((src + blk_size) & TheISA::PageMask) &&
++ (src >> 40) != 0xfffffc) {
++ warn("Copied block source spans pages %x.", src);
++ no_warn = false;
++ }
++
++ memReq->reset(src & ~(blk_size - 1), blk_size);
++
++ // translate to physical address
++ Fault fault = xc->translateDataReadReq(memReq);
++
++ assert(fault != Alignment_Fault);
++
++ if (fault == NoFault) {
++ xc->copySrcAddr = src;
++ xc->copySrcPhysAddr = memReq->paddr + offset;
++ } else {
++ xc->copySrcAddr = 0;
++ xc->copySrcPhysAddr = 0;
++ }
++ return fault;
++#endif
++}
++
++template <class Impl>
++Fault
++OzoneCPU<Impl>::copy(Addr dest)
++{
++ panic("Copy not implemented!\n");
++ return NoFault;
++#if 0
++ static bool no_warn = true;
++ int blk_size = (dcacheInterface) ? dcacheInterface->getBlockSize() : 64;
++ // Only support block sizes of 64 atm.
++ assert(blk_size == 64);
++ uint8_t data[blk_size];
++ //assert(xc->copySrcAddr);
++ int offset = dest & (blk_size - 1);
++
++ // Make sure block doesn't span page
++ if (no_warn &&
++ (dest & TheISA::PageMask) != ((dest + blk_size) & TheISA::PageMask) &&
++ (dest >> 40) != 0xfffffc) {
++ no_warn = false;
++ warn("Copied block destination spans pages %x. ", dest);
++ }
++
++ memReq->reset(dest & ~(blk_size -1), blk_size);
++ // translate to physical address
++ Fault fault = xc->translateDataWriteReq(memReq);
++
++ assert(fault != Alignment_Fault);
++
++ if (fault == NoFault) {
++ Addr dest_addr = memReq->paddr + offset;
++ // Need to read straight from memory since we have more than 8 bytes.
++ memReq->paddr = xc->copySrcPhysAddr;
++ xc->mem->read(memReq, data);
++ memReq->paddr = dest_addr;
++ xc->mem->write(memReq, data);
++ if (dcacheInterface) {
++ memReq->cmd = Copy;
++ memReq->completionEvent = NULL;
++ memReq->paddr = xc->copySrcPhysAddr;
++ memReq->dest = dest_addr;
++ memReq->size = 64;
++ memReq->time = curTick;
++ dcacheInterface->access(memReq);
++ }
++ }
++ return fault;
++#endif
++}
++
++#if FULL_SYSTEM
++template <class Impl>
++Addr
++OzoneCPU<Impl>::dbg_vtophys(Addr addr)
++{
++ return vtophys(xcProxy, addr);
++}
++#endif // FULL_SYSTEM
++
++#if FULL_SYSTEM
++template <class Impl>
++void
++OzoneCPU<Impl>::post_interrupt(int int_num, int index)
++{
++ BaseCPU::post_interrupt(int_num, index);
++
++ if (_status == Idle) {
++ DPRINTF(IPI,"Suspended Processor awoke\n");
++// thread.activate();
++ // Hack for now. Otherwise might have to go through the xcProxy, or
++ // I need to figure out what's the right thing to call.
++ activateContext(thread.tid, 1);
++ }
++}
++#endif // FULL_SYSTEM
++
++/* start simulation, program loaded, processor precise state initialized */
++template <class Impl>
++void
++OzoneCPU<Impl>::tick()
++{
++ DPRINTF(OzoneCPU, "\n\nOzoneCPU: Ticking cpu.\n");
++
++ _status = Running;
++ thread.renameTable[ZeroReg]->setIntResult(0);
++ thread.renameTable[ZeroReg+TheISA::FP_Base_DepTag]->
++ setDoubleResult(0.0);
++
++ comm.advance();
++ frontEnd->tick();
++ backEnd->tick();
++
++ // check for instruction-count-based events
++ comInstEventQueue[0]->serviceEvents(numInst);
++
++ if (!tickEvent.scheduled() && _status == Running)
++ tickEvent.schedule(curTick + cycles(1));
++}
++
++template <class Impl>
++void
++OzoneCPU<Impl>::squashFromXC()
++{
++ thread.inSyscall = true;
++ backEnd->generateXCEvent();
++}
++
++#if !FULL_SYSTEM
++template <class Impl>
++void
++OzoneCPU<Impl>::syscall()
++{
++ // Not sure this copy is needed, depending on how the XC proxy is made.
++ thread.renameTable.copyFrom(backEnd->renameTable);
++
++ thread.inSyscall = true;
++
++ thread.funcExeInst++;
++
++ DPRINTF(OzoneCPU, "FuncExeInst: %i\n", thread.funcExeInst);
++
++ thread.process->syscall(xcProxy);
++
++ thread.funcExeInst--;
++
++ thread.inSyscall = false;
++
++ frontEnd->renameTable.copyFrom(thread.renameTable);
++ backEnd->renameTable.copyFrom(thread.renameTable);
++}
++
++template <class Impl>
++void
++OzoneCPU<Impl>::setSyscallReturn(SyscallReturn return_value, int tid)
++{
++ // check for error condition. Alpha syscall convention is to
++ // indicate success/failure in reg a3 (r19) and put the
++ // return value itself in the standard return value reg (v0).
++ if (return_value.successful()) {
++ // no error
++ thread.renameTable[SyscallSuccessReg]->setIntResult(0);
++ thread.renameTable[ReturnValueReg]->setIntResult(
++ return_value.value());
++ } else {
++ // got an error, return details
++ thread.renameTable[SyscallSuccessReg]->setIntResult((IntReg) -1);
++ thread.renameTable[ReturnValueReg]->setIntResult(
++ -return_value.value());
++ }
++}
++#else
++template <class Impl>
++Fault
++OzoneCPU<Impl>::hwrei()
++{
++ // Need to move this to ISA code
++ // May also need to make this per thread
++
++ lockFlag = false;
++ lockAddrList.clear();
++ thread.kernelStats->hwrei();
++
++ checkInterrupts = true;
++
++ // FIXME: XXX check for interrupts? XXX
++ return NoFault;
++}
++
++template <class Impl>
++void
++OzoneCPU<Impl>::processInterrupts()
++{
++ // Check for interrupts here. For now can copy the code that
++ // exists within isa_fullsys_traits.hh. Also assume that thread 0
++ // is the one that handles the interrupts.
++
++ // Check if there are any outstanding interrupts
++ //Handle the interrupts
++ int ipl = 0;
++ int summary = 0;
++
++ checkInterrupts = false;
++
++ if (thread.readMiscReg(IPR_ASTRR))
++ panic("asynchronous traps not implemented\n");
++
++ if (thread.readMiscReg(IPR_SIRR)) {
++ for (int i = INTLEVEL_SOFTWARE_MIN;
++ i < INTLEVEL_SOFTWARE_MAX; i++) {
++ if (thread.readMiscReg(IPR_SIRR) & (ULL(1) << i)) {
++ // See table 4-19 of the 21164 hardware reference
++ ipl = (i - INTLEVEL_SOFTWARE_MIN) + 1;
++ summary |= (ULL(1) << i);
++ }
++ }
++ }
++
++ uint64_t interrupts = intr_status();
++
++ if (interrupts) {
++ for (int i = INTLEVEL_EXTERNAL_MIN;
++ i < INTLEVEL_EXTERNAL_MAX; i++) {
++ if (interrupts & (ULL(1) << i)) {
++ // See table 4-19 of the 21164 hardware reference
++ ipl = i;
++ summary |= (ULL(1) << i);
++ }
++ }
++ }
++
++ if (ipl && ipl > thread.readMiscReg(IPR_IPLR)) {
++ thread.setMiscReg(IPR_ISR, summary);
++ thread.setMiscReg(IPR_INTID, ipl);
++ // @todo: Make this more transparent
++ if (checker) {
++ checker->cpuXCBase()->setMiscReg(IPR_ISR, summary);
++ checker->cpuXCBase()->setMiscReg(IPR_INTID, ipl);
++ }
++ Fault fault = new InterruptFault;
++ fault->invoke(thread.getXCProxy());
++ DPRINTF(Flow, "Interrupt! IPLR=%d ipl=%d summary=%x\n",
++ thread.readMiscReg(IPR_IPLR), ipl, summary);
++ }
++}
++
++template <class Impl>
++bool
++OzoneCPU<Impl>::simPalCheck(int palFunc)
++{
++ // Need to move this to ISA code
++ // May also need to make this per thread
++ thread.kernelStats->callpal(palFunc, xcProxy);
++
++ switch (palFunc) {
++ case PAL::halt:
++ haltContext(thread.tid);
++ if (--System::numSystemsRunning == 0)
++ new SimExitEvent("all cpus halted");
++ break;
++
++ case PAL::bpt:
++ case PAL::bugchk:
++ if (system->breakpoint())
++ return false;
++ break;
++ }
++
++ return true;
++}
++#endif
++
++template <class Impl>
++BaseCPU *
++OzoneCPU<Impl>::OzoneXC::getCpuPtr()
++{
++ return cpu;
++}
++
++template <class Impl>
++void
++OzoneCPU<Impl>::OzoneXC::setCpuId(int id)
++{
++ cpu->cpuId = id;
++ thread->cpuId = id;
++}
++
++template <class Impl>
++void
++OzoneCPU<Impl>::OzoneXC::setStatus(Status new_status)
++{
++ thread->_status = new_status;
++}
++
++template <class Impl>
++void
++OzoneCPU<Impl>::OzoneXC::activate(int delay)
++{
++ cpu->activateContext(thread->tid, delay);
++}
++
++/// Set the status to Suspended.
++template <class Impl>
++void
++OzoneCPU<Impl>::OzoneXC::suspend()
++{
++ cpu->suspendContext(thread->tid);
++}
++
++/// Set the status to Unallocated.
++template <class Impl>
++void
++OzoneCPU<Impl>::OzoneXC::deallocate()
++{
++ cpu->deallocateContext(thread->tid);
++}
++
++/// Set the status to Halted.
++template <class Impl>
++void
++OzoneCPU<Impl>::OzoneXC::halt()
++{
++ cpu->haltContext(thread->tid);
++}
++
++#if FULL_SYSTEM
++template <class Impl>
++void
++OzoneCPU<Impl>::OzoneXC::dumpFuncProfile()
++{ }
++#endif
++
++template <class Impl>
++void
++OzoneCPU<Impl>::OzoneXC::takeOverFrom(ExecContext *old_context)
++{
++ // some things should already be set up
++ assert(getMemPtr() == old_context->getMemPtr());
++#if FULL_SYSTEM
++ assert(getSystemPtr() == old_context->getSystemPtr());
++#else
++ assert(getProcessPtr() == old_context->getProcessPtr());
++#endif
++
++ // copy over functional state
++ setStatus(old_context->status());
++ copyArchRegs(old_context);
++ setCpuId(old_context->readCpuId());
++
++#if !FULL_SYSTEM
++ setFuncExeInst(old_context->readFuncExeInst());
++#else
++ EndQuiesceEvent *other_quiesce = old_context->getQuiesceEvent();
++ if (other_quiesce) {
++ // Point the quiesce event's XC at this XC so that it wakes up
++ // the proper CPU.
++ other_quiesce->xc = this;
++ }
++ if (thread->quiesceEvent) {
++ thread->quiesceEvent->xc = this;
++ }
++
++ thread->kernelStats = old_context->getKernelStats();
++// storeCondFailures = 0;
++ cpu->lockFlag = false;
++#endif
++
++ old_context->setStatus(ExecContext::Unallocated);
++}
++
++template <class Impl>
++void
++OzoneCPU<Impl>::OzoneXC::regStats(const std::string &name)
++{
++#if FULL_SYSTEM
++ thread->kernelStats = new Kernel::Statistics(cpu->system);
++ thread->kernelStats->regStats(name + ".kern");
++#endif
++}
++
++template <class Impl>
++void
++OzoneCPU<Impl>::OzoneXC::serialize(std::ostream &os)
++{ }
++
++template <class Impl>
++void
++OzoneCPU<Impl>::OzoneXC::unserialize(Checkpoint *cp, const std::string §ion)
++{ }
++
++#if FULL_SYSTEM
++template <class Impl>
++EndQuiesceEvent *
++OzoneCPU<Impl>::OzoneXC::getQuiesceEvent()
++{
++ return thread->quiesceEvent;
++}
++
++template <class Impl>
++Tick
++OzoneCPU<Impl>::OzoneXC::readLastActivate()
++{
++ return thread->lastActivate;
++}
++
++template <class Impl>
++Tick
++OzoneCPU<Impl>::OzoneXC::readLastSuspend()
++{
++ return thread->lastSuspend;
++}
++
++template <class Impl>
++void
++OzoneCPU<Impl>::OzoneXC::profileClear()
++{
++ if (thread->profile)
++ thread->profile->clear();
++}
++
++template <class Impl>
++void
++OzoneCPU<Impl>::OzoneXC::profileSample()
++{
++ if (thread->profile)
++ thread->profile->sample(thread->profileNode, thread->profilePC);
++}
++#endif
++
++template <class Impl>
++int
++OzoneCPU<Impl>::OzoneXC::getThreadNum()
++{
++ return thread->tid;
++}
++
++// Also somewhat obnoxious. Really only used for the TLB fault.
++template <class Impl>
++TheISA::MachInst
++OzoneCPU<Impl>::OzoneXC::getInst()
++{
++ return thread->inst;
++}
++
++template <class Impl>
++void
++OzoneCPU<Impl>::OzoneXC::copyArchRegs(ExecContext *xc)
++{
++ thread->PC = xc->readPC();
++ thread->nextPC = xc->readNextPC();
++
++ cpu->frontEnd->setPC(thread->PC);
++ cpu->frontEnd->setNextPC(thread->nextPC);
++
++ for (int i = 0; i < TheISA::TotalNumRegs; ++i) {
++ if (i < TheISA::FP_Base_DepTag) {
++ thread->renameTable[i]->setIntResult(xc->readIntReg(i));
++ } else if (i < (TheISA::FP_Base_DepTag + TheISA::NumFloatRegs)) {
++ int fp_idx = i - TheISA::FP_Base_DepTag;
++ thread->renameTable[i]->setDoubleResult(
++ xc->readFloatRegDouble(fp_idx));
++ }
++ }
++
++#if !FULL_SYSTEM
++ thread->funcExeInst = xc->readFuncExeInst();
++#endif
++
++ // Need to copy the XC values into the current rename table,
++ // copy the misc regs.
++ thread->regs.miscRegs.copyMiscRegs(xc);
++}
++
++template <class Impl>
++void
++OzoneCPU<Impl>::OzoneXC::clearArchRegs()
++{
++ panic("Unimplemented!");
++}
++
++template <class Impl>
++uint64_t
++OzoneCPU<Impl>::OzoneXC::readIntReg(int reg_idx)
++{
++ return thread->renameTable[reg_idx]->readIntResult();
++}
++
++template <class Impl>
++float
++OzoneCPU<Impl>::OzoneXC::readFloatRegSingle(int reg_idx)
++{
++ int idx = reg_idx + TheISA::FP_Base_DepTag;
++ return thread->renameTable[idx]->readFloatResult();
++}
++
++template <class Impl>
++double
++OzoneCPU<Impl>::OzoneXC::readFloatRegDouble(int reg_idx)
++{
++ int idx = reg_idx + TheISA::FP_Base_DepTag;
++ return thread->renameTable[idx]->readDoubleResult();
++}
++
++template <class Impl>
++uint64_t
++OzoneCPU<Impl>::OzoneXC::readFloatRegInt(int reg_idx)
++{
++ int idx = reg_idx + TheISA::FP_Base_DepTag;
++ return thread->renameTable[idx]->readIntResult();
++}
++
++template <class Impl>
++void
++OzoneCPU<Impl>::OzoneXC::setIntReg(int reg_idx, uint64_t val)
++{
++ thread->renameTable[reg_idx]->setIntResult(val);
++
++ if (!thread->inSyscall) {
++ cpu->squashFromXC();
++ }
++}
++
++template <class Impl>
++void
++OzoneCPU<Impl>::OzoneXC::setFloatRegSingle(int reg_idx, float val)
++{
++ panic("Unimplemented!");
++}
++
++template <class Impl>
++void
++OzoneCPU<Impl>::OzoneXC::setFloatRegDouble(int reg_idx, double val)
++{
++ int idx = reg_idx + TheISA::FP_Base_DepTag;
++
++ thread->renameTable[idx]->setDoubleResult(val);
++
++ if (!thread->inSyscall) {
++ cpu->squashFromXC();
++ }
++}
++
++template <class Impl>
++void
++OzoneCPU<Impl>::OzoneXC::setFloatRegInt(int reg_idx, uint64_t val)
++{
++ panic("Unimplemented!");
++}
++
++template <class Impl>
++void
++OzoneCPU<Impl>::OzoneXC::setPC(Addr val)
++{
++ thread->PC = val;
++ cpu->frontEnd->setPC(val);
++
++ if (!thread->inSyscall) {
++ cpu->squashFromXC();
++ }
++}
++
++template <class Impl>
++void
++OzoneCPU<Impl>::OzoneXC::setNextPC(Addr val)
++{
++ thread->nextPC = val;
++ cpu->frontEnd->setNextPC(val);
++
++ if (!thread->inSyscall) {
++ cpu->squashFromXC();
++ }
++}
++
++template <class Impl>
++TheISA::MiscReg
++OzoneCPU<Impl>::OzoneXC::readMiscReg(int misc_reg)
++{
++ return thread->regs.miscRegs.readReg(misc_reg);
++}
++
++template <class Impl>
++TheISA::MiscReg
++OzoneCPU<Impl>::OzoneXC::readMiscRegWithEffect(int misc_reg, Fault &fault)
++{
++ return thread->regs.miscRegs.readRegWithEffect(misc_reg,
++ fault, this);
++}
++
++template <class Impl>
++Fault
++OzoneCPU<Impl>::OzoneXC::setMiscReg(int misc_reg, const MiscReg &val)
++{
++ // Needs to setup a squash event unless we're in syscall mode
++ Fault ret_fault = thread->regs.miscRegs.setReg(misc_reg, val);
++
++ if (!thread->inSyscall) {
++ cpu->squashFromXC();
++ }
++
++ return ret_fault;
++}
+
+template <class Impl>
- struct OoOImpl {
- typedef AlphaISA ISA;
- typedef OoOCPU<OoOImpl> OoOCPU;
- typedef OoOCPU FullCPU;
- typedef OoODynInst<OoOImpl> DynInst;
- typedef RefCountingPtr<DynInst> DynInstPtr;
- };
++Fault
++OzoneCPU<Impl>::OzoneXC::setMiscRegWithEffect(int misc_reg, const MiscReg &val)
++{
++ // Needs to setup a squash event unless we're in syscall mode
++ Fault ret_fault = thread->regs.miscRegs.setRegWithEffect(misc_reg, val,
++ this);
+
- #endif // __CPU_OOO_CPU_OOO_IMPL_HH__
++ if (!thread->inSyscall) {
++ cpu->squashFromXC();
++ }
+
++ return ret_fault;
++}
--- /dev/null
+/*
+ * Copyright (c) 2003-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __CPU_STATIC_INST_HH__
+#define __CPU_STATIC_INST_HH__
+
+#include <bitset>
+#include <string>
+
+#include "base/hashmap.hh"
+#include "base/misc.hh"
+#include "base/refcnt.hh"
+#include "cpu/op_class.hh"
+#include "sim/host.hh"
+#include "arch/isa_traits.hh"
+
+// forward declarations
+struct AlphaSimpleImpl;
++struct OzoneImpl;
++struct SimpleImpl;
+class ExecContext;
+class DynInst;
+class Packet;
+
+template <class Impl>
+class AlphaDynInst;
+
++template <class Impl>
++class OzoneDynInst;
++
++class CheckerCPU;
+class FastCPU;
+class AtomicSimpleCPU;
+class TimingSimpleCPU;
+class InorderCPU;
+class SymbolTable;
+
+namespace Trace {
+ class InstRecord;
+}
+
+/**
+ * Base, ISA-independent static instruction class.
+ *
+ * The main component of this class is the vector of flags and the
+ * associated methods for reading them. Any object that can rely
+ * solely on these flags can process instructions without being
+ * recompiled for multiple ISAs.
+ */
+class StaticInstBase : public RefCounted
+{
+ protected:
+
+ /// Set of boolean static instruction properties.
+ ///
+ /// Notes:
+ /// - The IsInteger and IsFloating flags are based on the class of
+ /// registers accessed by the instruction. Although most
+ /// instructions will have exactly one of these two flags set, it
+ /// is possible for an instruction to have neither (e.g., direct
+ /// unconditional branches, memory barriers) or both (e.g., an
+ /// FP/int conversion).
+ /// - If IsMemRef is set, then exactly one of IsLoad or IsStore
+ /// will be set.
+ /// - If IsControl is set, then exactly one of IsDirectControl or
+ /// IsIndirect Control will be set, and exactly one of
+ /// IsCondControl or IsUncondControl will be set.
+ /// - IsSerializing, IsMemBarrier, and IsWriteBarrier are
+ /// implemented as flags since in the current model there's no
+ /// other way for instructions to inject behavior into the
+ /// pipeline outside of fetch. Once we go to an exec-in-exec CPU
+ /// model we should be able to get rid of these flags and
+ /// implement this behavior via the execute() methods.
+ ///
+ enum Flags {
+ IsNop, ///< Is a no-op (no effect at all).
+
+ IsInteger, ///< References integer regs.
+ IsFloating, ///< References FP regs.
+
+ IsMemRef, ///< References memory (load, store, or prefetch).
+ IsLoad, ///< Reads from memory (load or prefetch).
+ IsStore, ///< Writes to memory.
++ IsStoreConditional, ///< Store conditional instruction.
+ IsInstPrefetch, ///< Instruction-cache prefetch.
+ IsDataPrefetch, ///< Data-cache prefetch.
+ IsCopy, ///< Fast Cache block copy
+
+ IsControl, ///< Control transfer instruction.
+ IsDirectControl, ///< PC relative control transfer.
+ IsIndirectControl, ///< Register indirect control transfer.
+ IsCondControl, ///< Conditional control transfer.
+ IsUncondControl, ///< Unconditional control transfer.
+ IsCall, ///< Subroutine call.
+ IsReturn, ///< Subroutine return.
+
+ IsCondDelaySlot,///< Conditional Delay-Slot Instruction
+
+ IsThreadSync, ///< Thread synchronization operation.
+
+ IsSerializing, ///< Serializes pipeline: won't execute until all
+ /// older instructions have committed.
+ IsSerializeBefore,
+ IsSerializeAfter,
+ IsMemBarrier, ///< Is a memory barrier
+ IsWriteBarrier, ///< Is a write barrier
+
+ IsNonSpeculative, ///< Should not be executed speculatively
++ IsQuiesce, ///< Is a quiesce instruction
++
++ IsIprAccess, ///< Accesses IPRs
++ IsUnverifiable, ///< Can't be verified by a checker
+
+ NumFlags
+ };
+
+ /// Flag values for this instruction.
+ std::bitset<NumFlags> flags;
+
+ /// See opClass().
+ OpClass _opClass;
+
+ /// See numSrcRegs().
+ int8_t _numSrcRegs;
+
+ /// See numDestRegs().
+ int8_t _numDestRegs;
+
+ /// The following are used to track physical register usage
+ /// for machines with separate int & FP reg files.
+ //@{
+ int8_t _numFPDestRegs;
+ int8_t _numIntDestRegs;
+ //@}
+
+ /// Constructor.
+ /// It's important to initialize everything here to a sane
+ /// default, since the decoder generally only overrides
+ /// the fields that are meaningful for the particular
+ /// instruction.
+ StaticInstBase(OpClass __opClass)
+ : _opClass(__opClass), _numSrcRegs(0), _numDestRegs(0),
+ _numFPDestRegs(0), _numIntDestRegs(0)
+ {
+ }
+
+ public:
+
+ /// @name Register information.
+ /// The sum of numFPDestRegs() and numIntDestRegs() equals
+ /// numDestRegs(). The former two functions are used to track
+ /// physical register usage for machines with separate int & FP
+ /// reg files.
+ //@{
+ /// Number of source registers.
+ int8_t numSrcRegs() const { return _numSrcRegs; }
+ /// Number of destination registers.
+ int8_t numDestRegs() const { return _numDestRegs; }
+ /// Number of floating-point destination regs.
+ int8_t numFPDestRegs() const { return _numFPDestRegs; }
+ /// Number of integer destination regs.
+ int8_t numIntDestRegs() const { return _numIntDestRegs; }
+ //@}
+
+ /// @name Flag accessors.
+ /// These functions are used to access the values of the various
+ /// instruction property flags. See StaticInstBase::Flags for descriptions
+ /// of the individual flags.
+ //@{
+
+ bool isNop() const { return flags[IsNop]; }
+
+ bool isMemRef() const { return flags[IsMemRef]; }
+ bool isLoad() const { return flags[IsLoad]; }
+ bool isStore() const { return flags[IsStore]; }
++ bool isStoreConditional() const { return flags[IsStoreConditional]; }
+ bool isInstPrefetch() const { return flags[IsInstPrefetch]; }
+ bool isDataPrefetch() const { return flags[IsDataPrefetch]; }
+ bool isCopy() const { return flags[IsCopy];}
+
+ bool isInteger() const { return flags[IsInteger]; }
+ bool isFloating() const { return flags[IsFloating]; }
+
+ bool isControl() const { return flags[IsControl]; }
+ bool isCall() const { return flags[IsCall]; }
+ bool isReturn() const { return flags[IsReturn]; }
+ bool isDirectCtrl() const { return flags[IsDirectControl]; }
+ bool isIndirectCtrl() const { return flags[IsIndirectControl]; }
+ bool isCondCtrl() const { return flags[IsCondControl]; }
+ bool isUncondCtrl() const { return flags[IsUncondControl]; }
+
+ bool isThreadSync() const { return flags[IsThreadSync]; }
+ bool isSerializing() const { return flags[IsSerializing] ||
+ flags[IsSerializeBefore] ||
+ flags[IsSerializeAfter]; }
+ bool isSerializeBefore() const { return flags[IsSerializeBefore]; }
+ bool isSerializeAfter() const { return flags[IsSerializeAfter]; }
+ bool isMemBarrier() const { return flags[IsMemBarrier]; }
+ bool isWriteBarrier() const { return flags[IsWriteBarrier]; }
+ bool isNonSpeculative() const { return flags[IsNonSpeculative]; }
++ bool isQuiesce() const { return flags[IsQuiesce]; }
++ bool isIprAccess() const { return flags[IsIprAccess]; }
++ bool isUnverifiable() const { return flags[IsUnverifiable]; }
+ //@}
+
+ /// Operation class. Used to select appropriate function unit in issue.
+ OpClass opClass() const { return _opClass; }
+};
+
+
+// forward declaration
+class StaticInstPtr;
+
+/**
+ * Generic yet ISA-dependent static instruction class.
+ *
+ * This class builds on StaticInstBase, defining fields and interfaces
+ * that are generic across all ISAs but that differ in details
+ * according to the specific ISA being used.
+ */
+class StaticInst : public StaticInstBase
+{
+ public:
+
+ /// Binary machine instruction type.
+ typedef TheISA::MachInst MachInst;
+ /// Binary extended machine instruction type.
+ typedef TheISA::ExtMachInst ExtMachInst;
+ /// Logical register index type.
+ typedef TheISA::RegIndex RegIndex;
+
+ enum {
+ MaxInstSrcRegs = TheISA::MaxInstSrcRegs, //< Max source regs
+ MaxInstDestRegs = TheISA::MaxInstDestRegs, //< Max dest regs
+ };
+
+
+ /// Return logical index (architectural reg num) of i'th destination reg.
+ /// Only the entries from 0 through numDestRegs()-1 are valid.
+ RegIndex destRegIdx(int i) const { return _destRegIdx[i]; }
+
+ /// Return logical index (architectural reg num) of i'th source reg.
+ /// Only the entries from 0 through numSrcRegs()-1 are valid.
+ RegIndex srcRegIdx(int i) const { return _srcRegIdx[i]; }
+
+ /// Pointer to a statically allocated "null" instruction object.
+ /// Used to give eaCompInst() and memAccInst() something to return
+ /// when called on non-memory instructions.
+ static StaticInstPtr nullStaticInstPtr;
+
+ /**
+ * Memory references only: returns "fake" instruction representing
+ * the effective address part of the memory operation. Used to
+ * obtain the dependence info (numSrcRegs and srcRegIdx[]) for
+ * just the EA computation.
+ */
+ virtual const
+ StaticInstPtr &eaCompInst() const { return nullStaticInstPtr; }
+
+ /**
+ * Memory references only: returns "fake" instruction representing
+ * the memory access part of the memory operation. Used to
+ * obtain the dependence info (numSrcRegs and srcRegIdx[]) for
+ * just the memory access (not the EA computation).
+ */
+ virtual const
+ StaticInstPtr &memAccInst() const { return nullStaticInstPtr; }
+
+ /// The binary machine instruction.
+ const ExtMachInst machInst;
+
+ protected:
+
+ /// See destRegIdx().
+ RegIndex _destRegIdx[MaxInstDestRegs];
+ /// See srcRegIdx().
+ RegIndex _srcRegIdx[MaxInstSrcRegs];
+
+ /**
+ * Base mnemonic (e.g., "add"). Used by generateDisassembly()
+ * methods. Also useful to readily identify instructions from
+ * within the debugger when #cachedDisassembly has not been
+ * initialized.
+ */
+ const char *mnemonic;
+
+ /**
+ * String representation of disassembly (lazily evaluated via
+ * disassemble()).
+ */
+ mutable std::string *cachedDisassembly;
+
+ /**
+ * Internal function to generate disassembly string.
+ */
+ virtual std::string
+ generateDisassembly(Addr pc, const SymbolTable *symtab) const = 0;
+
+ /// Constructor.
+ StaticInst(const char *_mnemonic, ExtMachInst _machInst, OpClass __opClass)
+ : StaticInstBase(__opClass),
+ machInst(_machInst), mnemonic(_mnemonic), cachedDisassembly(0)
+ {
+ }
+
+ public:
+
+ virtual ~StaticInst()
+ {
+ if (cachedDisassembly)
+ delete cachedDisassembly;
+ }
+
+/**
+ * The execute() signatures are auto-generated by scons based on the
+ * set of CPU models we are compiling in today.
+ */
+#include "cpu/static_inst_exec_sigs.hh"
+
+ /**
+ * Return the target address for a PC-relative branch.
+ * Invalid if not a PC-relative branch (i.e. isDirectCtrl()
+ * should be true).
+ */
+ virtual Addr branchTarget(Addr branchPC) const
+ {
+ panic("StaticInst::branchTarget() called on instruction "
+ "that is not a PC-relative branch.");
+ }
+
+ /**
+ * Return the target address for an indirect branch (jump). The
+ * register value is read from the supplied execution context, so
+ * the result is valid only if the execution context is about to
+ * execute the branch in question. Invalid if not an indirect
+ * branch (i.e. isIndirectCtrl() should be true).
+ */
+ virtual Addr branchTarget(ExecContext *xc) const
+ {
+ panic("StaticInst::branchTarget() called on instruction "
+ "that is not an indirect branch.");
+ }
+
+ /**
+ * Return true if the instruction is a control transfer, and if so,
+ * return the target address as well.
+ */
+ bool hasBranchTarget(Addr pc, ExecContext *xc, Addr &tgt) const;
+
+ /**
+ * Return string representation of disassembled instruction.
+ * The default version of this function will call the internal
+ * virtual generateDisassembly() function to get the string,
+ * then cache it in #cachedDisassembly. If the disassembly
+ * should not be cached, this function should be overridden directly.
+ */
+ virtual const std::string &disassemble(Addr pc,
+ const SymbolTable *symtab = 0) const
+ {
+ if (!cachedDisassembly)
+ cachedDisassembly =
+ new std::string(generateDisassembly(pc, symtab));
+
+ return *cachedDisassembly;
+ }
+
+ /// Decoded instruction cache type.
+ /// For now we're using a generic hash_map; this seems to work
+ /// pretty well.
+ typedef m5::hash_map<ExtMachInst, StaticInstPtr> DecodeCache;
+
+ /// A cache of decoded instruction objects.
+ static DecodeCache decodeCache;
+
+ /**
+ * Dump some basic stats on the decode cache hash map.
+ * Only gets called if DECODE_CACHE_HASH_STATS is defined.
+ */
+ static void dumpDecodeCacheStats();
+
+ /// Decode a machine instruction.
+ /// @param mach_inst The binary instruction to decode.
+ /// @retval A pointer to the corresponding StaticInst object.
+ //This is defined as inline below.
+ static StaticInstPtr decode(ExtMachInst mach_inst);
+
+ //MIPS Decoder Debug Functions
+ int getOpcode() { return (machInst & 0xFC000000) >> 26 ; }//31..26
+ int getRs() { return (machInst & 0x03E00000) >> 21; } //25...21
+ int getRt() { return (machInst & 0x001F0000) >> 16; } //20...16
+ int getRd() { return (machInst & 0x0000F800) >> 11; } //15...11
+ int getImm() { return (machInst & 0x0000FFFF); } //15...0
+ int getFunction(){ return (machInst & 0x0000003F); }//5...0
+ int getBranch(){ return (machInst & 0x0000FFFF); }//15...0
+ int getJump(){ return (machInst & 0x03FFFFFF); }//5...0
+ int getHint(){ return (machInst & 0x000007C0) >> 6; } //10...6
+ std::string getName() { return mnemonic; }
+};
+
+typedef RefCountingPtr<StaticInstBase> StaticInstBasePtr;
+
+/// Reference-counted pointer to a StaticInst object.
+/// This type should be used instead of "StaticInst *" so that
+/// StaticInst objects can be properly reference-counted.
+class StaticInstPtr : public RefCountingPtr<StaticInst>
+{
+ public:
+ /// Constructor.
+ StaticInstPtr()
+ : RefCountingPtr<StaticInst>()
+ {
+ }
+
+ /// Conversion from "StaticInst *".
+ StaticInstPtr(StaticInst *p)
+ : RefCountingPtr<StaticInst>(p)
+ {
+ }
+
+ /// Copy constructor.
+ StaticInstPtr(const StaticInstPtr &r)
+ : RefCountingPtr<StaticInst>(r)
+ {
+ }
+
+ /// Construct directly from machine instruction.
+ /// Calls StaticInst::decode().
+ StaticInstPtr(TheISA::ExtMachInst mach_inst)
+ : RefCountingPtr<StaticInst>(StaticInst::decode(mach_inst))
+ {
+ }
+
+ /// Convert to pointer to StaticInstBase class.
+ operator const StaticInstBasePtr()
+ {
+ return this->get();
+ }
+};
+
+inline StaticInstPtr
+StaticInst::decode(StaticInst::ExtMachInst mach_inst)
+{
+#ifdef DECODE_CACHE_HASH_STATS
+ // Simple stats on decode hash_map. Turns out the default
+ // hash function is as good as anything I could come up with.
+ const int dump_every_n = 10000000;
+ static int decodes_til_dump = dump_every_n;
+
+ if (--decodes_til_dump == 0) {
+ dumpDecodeCacheStats();
+ decodes_til_dump = dump_every_n;
+ }
+#endif
+
+ DecodeCache::iterator iter = decodeCache.find(mach_inst);
+ if (iter != decodeCache.end()) {
+ return iter->second;
+ }
+
+ StaticInstPtr si = TheISA::decodeInst(mach_inst);
+ decodeCache[mach_inst] = si;
+ return si;
+}
+
+#endif // __CPU_STATIC_INST_HH__
--- /dev/null
- xc->getCpuPtr()->kernelStats->setIdleProcess(
- xc->readMiscReg(AlphaISA::IPR_PALtemp23), xc);
+/*
+ * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "cpu/base.hh"
+#include "cpu/cpu_exec_context.hh"
+#include "kern/kernel_stats.hh"
+#include "kern/system_events.hh"
+#include "sim/system.hh"
+
+using namespace TheISA;
+
+void
+SkipFuncEvent::process(ExecContext *xc)
+{
+ Addr newpc = xc->readIntReg(ReturnAddressReg);
+
+ DPRINTF(PCEvent, "skipping %s: pc=%x, newpc=%x\n", description,
+ xc->readPC(), newpc);
+
+ xc->setPC(newpc);
+ xc->setNextPC(xc->readPC() + sizeof(TheISA::MachInst));
+/*
+ BranchPred *bp = xc->getCpuPtr()->getBranchPred();
+ if (bp != NULL) {
+ bp->popRAS(xc->getThreadNum());
+ }
+*/
+}
+
+
+FnEvent::FnEvent(PCEventQueue *q, const std::string &desc, Addr addr,
+ Stats::MainBin *bin)
+ : PCEvent(q, desc, addr), _name(desc), mybin(bin)
+{
+}
+
+void
+FnEvent::process(ExecContext *xc)
+{
+ if (xc->misspeculating())
+ return;
+
+ xc->getSystemPtr()->kernelBinning->call(xc, mybin);
+}
+
+void
+IdleStartEvent::process(ExecContext *xc)
+{
- xc->getCpuPtr()->kernelStats->mode(Kernel::interrupt, xc);
++ if (xc->getKernelStats())
++ xc->getKernelStats()->setIdleProcess(
++ xc->readMiscReg(AlphaISA::IPR_PALtemp23), xc);
+ remove();
+}
+
+void
+InterruptStartEvent::process(ExecContext *xc)
+{
- xc->getCpuPtr()->kernelStats->mode(Kernel::kernel, xc);
++ if (xc->getKernelStats())
++ xc->getKernelStats()->mode(Kernel::interrupt, xc);
+}
+
+void
+InterruptEndEvent::process(ExecContext *xc)
+{
+ // We go back to kernel, if we are user, inside the rti
+ // pal code we will get switched to user because of the ICM write
++ if (xc->getKernelStats())
++ xc->getKernelStats()->mode(Kernel::kernel, xc);
+}
--- /dev/null
- if (xc->status() == ExecContext::Unallocated) {
+/*
+ * Copyright (c) 2001-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __TRU64_HH__
+#define __TRU64_HH__
+#include "config/full_system.hh"
+
+#if FULL_SYSTEM
+
+class Tru64 {};
+
+#else //!FULL_SYSTEM
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#if defined(__OpenBSD__) || defined(__APPLE__) || defined(__FreeBSD__)
+#include <sys/param.h>
+#include <sys/mount.h>
+#else
+#include <sys/statfs.h>
+#endif
+
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <string.h> // for memset()
+#include <unistd.h>
+
+#include "cpu/base.hh"
+#include "sim/root.hh"
+#include "sim/syscall_emul.hh"
+
+typedef struct stat global_stat;
+typedef struct statfs global_statfs;
+typedef struct dirent global_dirent;
+
+class TranslatingPort;
+
+///
+/// This class encapsulates the types, structures, constants,
+/// functions, and syscall-number mappings specific to the Alpha Tru64
+/// syscall interface.
+///
+class Tru64 {
+
+ public:
+
+ //@{
+ /// Basic Tru64 types.
+ typedef uint64_t size_t;
+ typedef uint64_t off_t;
+ typedef uint16_t nlink_t;
+ typedef int32_t dev_t;
+ typedef uint32_t uid_t;
+ typedef uint32_t gid_t;
+ typedef uint32_t time_t;
+ typedef uint32_t mode_t;
+ typedef uint32_t ino_t;
+ typedef struct { int val[2]; } quad;
+ typedef quad fsid_t;
+ //@}
+
+ /// Stat buffer. Note that Tru64 v5.0+ use a new "F64" stat
+ /// structure, and a new set of syscall numbers for stat calls.
+ /// On some hosts (notably Linux) define st_atime, st_mtime, and
+ /// st_ctime as macros, so we append an X to get around this.
+ struct F64_stat {
+ dev_t st_dev; //!< st_dev
+ int32_t st_retired1; //!< st_retired1
+ mode_t st_mode; //!< st_mode
+ nlink_t st_nlink; //!< st_nlink
+ uint16_t st_nlink_reserved; //!< st_nlink_reserved
+ uid_t st_uid; //!< st_uid
+ gid_t st_gid; //!< st_gid
+ dev_t st_rdev; //!< st_rdev
+ dev_t st_ldev; //!< st_ldev
+ off_t st_size; //!< st_size
+ time_t st_retired2; //!< st_retired2
+ int32_t st_uatime; //!< st_uatime
+ time_t st_retired3; //!< st_retired3
+ int32_t st_umtime; //!< st_umtime
+ time_t st_retired4; //!< st_retired4
+ int32_t st_uctime; //!< st_uctime
+ int32_t st_retired5; //!< st_retired5
+ int32_t st_retired6; //!< st_retired6
+ uint32_t st_flags; //!< st_flags
+ uint32_t st_gen; //!< st_gen
+ uint64_t st_spare[4]; //!< st_spare[4]
+ ino_t st_ino; //!< st_ino
+ int32_t st_ino_reserved; //!< st_ino_reserved
+ time_t st_atimeX; //!< st_atime
+ int32_t st_atime_reserved; //!< st_atime_reserved
+ time_t st_mtimeX; //!< st_mtime
+ int32_t st_mtime_reserved; //!< st_mtime_reserved
+ time_t st_ctimeX; //!< st_ctime
+ int32_t st_ctime_reserved; //!< st_ctime_reserved
+ uint64_t st_blksize; //!< st_blksize
+ uint64_t st_blocks; //!< st_blocks
+ };
+
+
+ /// Old Tru64 v4.x stat struct.
+ /// Tru64 maintains backwards compatibility with v4.x by
+ /// implementing another set of stat functions using the old
+ /// structure definition and binding them to the old syscall
+ /// numbers.
+
+ struct pre_F64_stat {
+ dev_t st_dev;
+ ino_t st_ino;
+ mode_t st_mode;
+ nlink_t st_nlink;
+ uid_t st_uid __attribute__ ((aligned(sizeof(uid_t))));
+ gid_t st_gid;
+ dev_t st_rdev;
+ off_t st_size __attribute__ ((aligned(sizeof(off_t))));
+ time_t st_atimeX;
+ int32_t st_uatime;
+ time_t st_mtimeX;
+ int32_t st_umtime;
+ time_t st_ctimeX;
+ int32_t st_uctime;
+ uint32_t st_blksize;
+ int32_t st_blocks;
+ uint32_t st_flags;
+ uint32_t st_gen;
+ };
+
+ /// For statfs().
+ struct F64_statfs {
+ int16_t f_type;
+ int16_t f_flags;
+ int32_t f_retired1;
+ int32_t f_retired2;
+ int32_t f_retired3;
+ int32_t f_retired4;
+ int32_t f_retired5;
+ int32_t f_retired6;
+ int32_t f_retired7;
+ fsid_t f_fsid;
+ int32_t f_spare[9];
+ char f_retired8[90];
+ char f_retired9[90];
+ uint64_t dummy[10]; // was union mount_info mount_info;
+ uint64_t f_flags2;
+ int64_t f_spare2[14];
+ int64_t f_fsize;
+ int64_t f_bsize;
+ int64_t f_blocks;
+ int64_t f_bfree;
+ int64_t f_bavail;
+ int64_t f_files;
+ int64_t f_ffree;
+ char f_mntonname[1024];
+ char f_mntfromname[1024];
+ };
+
+ /// For old Tru64 v4.x statfs()
+ struct pre_F64_statfs {
+ int16_t f_type;
+ int16_t f_flags;
+ int32_t f_fsize;
+ int32_t f_bsize;
+ int32_t f_blocks;
+ int32_t f_bfree;
+ int32_t f_bavail;
+ int32_t f_files;
+ int32_t f_ffree;
+ fsid_t f_fsid;
+ int32_t f_spare[9];
+ char f_mntonname[90];
+ char f_mntfromname[90];
+ uint64_t dummy[10]; // was union mount_info mount_info;
+ };
+
+ /// For getdirentries().
+ struct dirent
+ {
+ ino_t d_ino; //!< file number of entry
+ uint16_t d_reclen; //!< length of this record
+ uint16_t d_namlen; //!< length of string in d_name
+ char d_name[256]; //!< dummy name length
+ };
+
+
+ /// Length of strings in struct utsname (plus 1 for null char).
+ static const int _SYS_NMLN = 32;
+
+ /// Interface struct for uname().
+ struct utsname {
+ char sysname[_SYS_NMLN]; //!< System name.
+ char nodename[_SYS_NMLN]; //!< Node name.
+ char release[_SYS_NMLN]; //!< OS release.
+ char version[_SYS_NMLN]; //!< OS version.
+ char machine[_SYS_NMLN]; //!< Machine type.
+ };
+
+ /// Limit struct for getrlimit/setrlimit.
+ struct rlimit {
+ uint64_t rlim_cur; //!< soft limit
+ uint64_t rlim_max; //!< hard limit
+ };
+
+
+ /// For getsysinfo() GSI_CPU_INFO option.
+ struct cpu_info {
+ uint32_t current_cpu; //!< current_cpu
+ uint32_t cpus_in_box; //!< cpus_in_box
+ uint32_t cpu_type; //!< cpu_type
+ uint32_t ncpus; //!< ncpus
+ uint64_t cpus_present; //!< cpus_present
+ uint64_t cpus_running; //!< cpus_running
+ uint64_t cpu_binding; //!< cpu_binding
+ uint64_t cpu_ex_binding; //!< cpu_ex_binding
+ uint32_t mhz; //!< mhz
+ uint32_t unused[3]; //!< future expansion
+ };
+
+ /// For gettimeofday.
+ struct timeval {
+ uint32_t tv_sec; //!< seconds
+ uint32_t tv_usec; //!< microseconds
+ };
+
+ /// For getrusage().
+ struct rusage {
+ struct timeval ru_utime; //!< user time used
+ struct timeval ru_stime; //!< system time used
+ uint64_t ru_maxrss; //!< ru_maxrss
+ uint64_t ru_ixrss; //!< integral shared memory size
+ uint64_t ru_idrss; //!< integral unshared data "
+ uint64_t ru_isrss; //!< integral unshared stack "
+ uint64_t ru_minflt; //!< page reclaims - total vmfaults
+ uint64_t ru_majflt; //!< page faults
+ uint64_t ru_nswap; //!< swaps
+ uint64_t ru_inblock; //!< block input operations
+ uint64_t ru_oublock; //!< block output operations
+ uint64_t ru_msgsnd; //!< messages sent
+ uint64_t ru_msgrcv; //!< messages received
+ uint64_t ru_nsignals; //!< signals received
+ uint64_t ru_nvcsw; //!< voluntary context switches
+ uint64_t ru_nivcsw; //!< involuntary "
+ };
+
+ /// For sigreturn().
+ struct sigcontext {
+ int64_t sc_onstack; //!< sigstack state to restore
+ int64_t sc_mask; //!< signal mask to restore
+ int64_t sc_pc; //!< pc at time of signal
+ int64_t sc_ps; //!< psl to retore
+ int64_t sc_regs[32]; //!< processor regs 0 to 31
+ int64_t sc_ownedfp; //!< fp has been used
+ int64_t sc_fpregs[32]; //!< fp regs 0 to 31
+ uint64_t sc_fpcr; //!< floating point control reg
+ uint64_t sc_fp_control; //!< software fpcr
+ int64_t sc_reserved1; //!< reserved for kernel
+ uint32_t sc_kreserved1; //!< reserved for kernel
+ uint32_t sc_kreserved2; //!< reserved for kernel
+ size_t sc_ssize; //!< stack size
+ caddr_t sc_sbase; //!< stack start
+ uint64_t sc_traparg_a0; //!< a0 argument to trap on exc
+ uint64_t sc_traparg_a1; //!< a1 argument to trap on exc
+ uint64_t sc_traparg_a2; //!< a2 argument to trap on exc
+ uint64_t sc_fp_trap_pc; //!< imprecise pc
+ uint64_t sc_fp_trigger_sum; //!< Exception summary at trigg
+ uint64_t sc_fp_trigger_inst; //!< Instruction at trigger pc
+ };
+
+
+
+ /// For table().
+ struct tbl_sysinfo {
+ uint64_t si_user; //!< User time
+ uint64_t si_nice; //!< Nice time
+ uint64_t si_sys; //!< System time
+ uint64_t si_idle; //!< Idle time
+ uint64_t si_hz; //!< hz
+ uint64_t si_phz; //!< phz
+ uint64_t si_boottime; //!< Boot time in seconds
+ uint64_t wait; //!< Wait time
+ uint32_t si_max_procs; //!< rpb->rpb_numprocs
+ uint32_t pad; //!< padding
+ };
+
+
+ /// For stack_create.
+ struct vm_stack {
+ // was void *
+ Addr address; //!< address hint
+ size_t rsize; //!< red zone size
+ size_t ysize; //!< yellow zone size
+ size_t gsize; //!< green zone size
+ size_t swap; //!< amount of swap to reserve
+ size_t incr; //!< growth increment
+ uint64_t align; //!< address alignment
+ uint64_t flags; //!< MAP_FIXED etc.
+ // was struct memalloc_attr *
+ Addr attr; //!< allocation policy
+ uint64_t reserved; //!< reserved
+ };
+
+ /// Return values for nxm calls.
+ enum {
+ KERN_NOT_RECEIVER = 7,
+ KERN_NOT_IN_SET = 12
+ };
+
+ /// For nxm_task_init.
+ static const int NXM_TASK_INIT_VP = 2; //!< initial thread is VP
+
+ /// Task attribute structure.
+ struct nxm_task_attr {
+ int64_t nxm_callback; //!< nxm_callback
+ unsigned int nxm_version; //!< nxm_version
+ unsigned short nxm_uniq_offset; //!< nxm_uniq_offset
+ unsigned short flags; //!< flags
+ int nxm_quantum; //!< nxm_quantum
+ int pad1; //!< pad1
+ int64_t pad2; //!< pad2
+ };
+
+ /// Signal set.
+ typedef uint64_t sigset_t;
+
+ /// Thread state shared between user & kernel.
+ struct ushared_state {
+ sigset_t sigmask; //!< thread signal mask
+ sigset_t sig; //!< thread pending mask
+ // struct nxm_pth_state *
+ Addr pth_id; //!< out-of-line state
+ int flags; //!< shared flags
+#define US_SIGSTACK 0x1 // thread called sigaltstack
+#define US_ONSTACK 0x2 // thread is running on altstack
+#define US_PROFILE 0x4 // thread called profil
+#define US_SYSCALL 0x8 // thread in syscall
+#define US_TRAP 0x10 // thread has trapped
+#define US_YELLOW 0x20 // thread has mellowed yellow
+#define US_YZONE 0x40 // thread has zoned out
+#define US_FP_OWNED 0x80 // thread used floating point
+
+ int cancel_state; //!< thread's cancelation state
+#define US_CANCEL 0x1 // cancel pending
+#define US_NOCANCEL 0X2 // synch cancel disabled
+#define US_SYS_NOCANCEL 0x4 // syscall cancel disabled
+#define US_ASYNC_NOCANCEL 0x8 // asynch cancel disabled
+#define US_CANCEL_BITS (US_NOCANCEL|US_SYS_NOCANCEL|US_ASYNC_NOCANCEL)
+#define US_CANCEL_MASK (US_CANCEL|US_NOCANCEL|US_SYS_NOCANCEL| \
+ US_ASYNC_NOCANCEL)
+
+ // These are semi-shared. They are always visible to
+ // the kernel but are never context-switched by the library.
+
+ int nxm_ssig; //!< scheduler's synchronous signals
+ int reserved1; //!< reserved1
+ int64_t nxm_active; //!< scheduler active
+ int64_t reserved2; //!< reserved2
+ };
+
+ struct nxm_sched_state {
+ struct ushared_state nxm_u; //!< state own by user thread
+ unsigned int nxm_bits; //!< scheduler state / slot
+ int nxm_quantum; //!< quantum count-down value
+ int nxm_set_quantum; //!< quantum reset value
+ int nxm_sysevent; //!< syscall state
+ // struct nxm_upcall *
+ Addr nxm_uc_ret; //!< stack ptr of null thread
+ // void *
+ Addr nxm_tid; //!< scheduler's thread id
+ int64_t nxm_va; //!< page fault address
+ // struct nxm_pth_state *
+ Addr nxm_pthid; //!< id of null thread
+ uint64_t nxm_bound_pcs_count; //!< bound PCS thread count
+ int64_t pad[2]; //!< pad
+ };
+
+ /// nxm_shared.
+ struct nxm_shared {
+ int64_t nxm_callback; //!< address of upcall routine
+ unsigned int nxm_version; //!< version number
+ unsigned short nxm_uniq_offset; //!< correction factor for TEB
+ unsigned short pad1; //!< pad1
+ int64_t space[2]; //!< future growth
+ struct nxm_sched_state nxm_ss[1]; //!< array of shared areas
+ };
+
+ /// nxm_slot_state_t.
+ enum nxm_slot_state_t {
+ NXM_SLOT_AVAIL,
+ NXM_SLOT_BOUND,
+ NXM_SLOT_UNBOUND,
+ NXM_SLOT_EMPTY
+ };
+
+ /// nxm_config_info
+ struct nxm_config_info {
+ int nxm_nslots_per_rad; //!< max number of VP slots per RAD
+ int nxm_nrads; //!< max number of RADs
+ // nxm_slot_state_t *
+ Addr nxm_slot_state; //!< per-VP slot state
+ // struct nxm_shared *
+ Addr nxm_rad[1]; //!< per-RAD shared areas
+ };
+
+ /// For nxm_thread_create.
+ enum nxm_thread_type {
+ NXM_TYPE_SCS = 0,
+ NXM_TYPE_VP = 1,
+ NXM_TYPE_MANAGER = 2
+ };
+
+ /// Thread attributes.
+ struct nxm_thread_attr {
+ int version; //!< version
+ int type; //!< type
+ int cancel_flags; //!< cancel_flags
+ int priority; //!< priority
+ int policy; //!< policy
+ int signal_type; //!< signal_type
+ // void *
+ Addr pthid; //!< pthid
+ sigset_t sigmask; //!< sigmask
+ /// Initial register values.
+ struct {
+ uint64_t pc; //!< pc
+ uint64_t sp; //!< sp
+ uint64_t a0; //!< a0
+ } registers;
+ uint64_t pad2[2]; //!< pad2
+ };
+
+ /// Helper function to convert a host stat buffer to a target stat
+ /// buffer. Also copies the target buffer out to the simulated
+ /// memory space. Used by stat(), fstat(), and lstat().
+ template <class T>
+ static void
+ copyOutStatBuf(TranslatingPort *mem, Addr addr, global_stat *host)
+ {
+ using namespace TheISA;
+
+ TypedBufferArg<T> tgt(addr);
+
+ tgt->st_dev = htog(host->st_dev);
+ tgt->st_ino = htog(host->st_ino);
+ tgt->st_mode = htog(host->st_mode);
+ tgt->st_nlink = htog(host->st_nlink);
+ tgt->st_uid = htog(host->st_uid);
+ tgt->st_gid = htog(host->st_gid);
+ tgt->st_rdev = htog(host->st_rdev);
+ tgt->st_size = htog(host->st_size);
+ tgt->st_atimeX = htog(host->st_atime);
+ tgt->st_mtimeX = htog(host->st_mtime);
+ tgt->st_ctimeX = htog(host->st_ctime);
+ tgt->st_blksize = htog(host->st_blksize);
+ tgt->st_blocks = htog(host->st_blocks);
+
+ tgt.copyOut(mem);
+ }
+
+ /// Helper function to convert a host statfs buffer to a target statfs
+ /// buffer. Also copies the target buffer out to the simulated
+ /// memory space. Used by statfs() and fstatfs().
+ template <class T>
+ static void
+ copyOutStatfsBuf(TranslatingPort *mem, Addr addr, global_statfs *host)
+ {
+ using namespace TheISA;
+
+ TypedBufferArg<T> tgt(addr);
+
+#if defined(__OpenBSD__) || defined(__APPLE__) || defined(__FreeBSD__)
+ tgt->f_type = 0;
+#else
+ tgt->f_type = htog(host->f_type);
+#endif
+ tgt->f_bsize = htog(host->f_bsize);
+ tgt->f_blocks = htog(host->f_blocks);
+ tgt->f_bfree = htog(host->f_bfree);
+ tgt->f_bavail = htog(host->f_bavail);
+ tgt->f_files = htog(host->f_files);
+ tgt->f_ffree = htog(host->f_ffree);
+
+ // Is this as string normally?
+ memcpy(&tgt->f_fsid, &host->f_fsid, sizeof(host->f_fsid));
+
+ tgt.copyOut(mem);
+ }
+
+ class F64 {
+ public:
+ static void copyOutStatBuf(TranslatingPort *mem, Addr addr,
+ global_stat *host)
+ {
+ Tru64::copyOutStatBuf<Tru64::F64_stat>(mem, addr, host);
+ }
+
+ static void copyOutStatfsBuf(TranslatingPort *mem, Addr addr,
+ global_statfs *host)
+ {
+ Tru64::copyOutStatfsBuf<Tru64::F64_statfs>(mem, addr, host);
+ }
+ };
+
+ class PreF64 {
+ public:
+ static void copyOutStatBuf(TranslatingPort *mem, Addr addr,
+ global_stat *host)
+ {
+ Tru64::copyOutStatBuf<Tru64::pre_F64_stat>(mem, addr, host);
+ }
+
+ static void copyOutStatfsBuf(TranslatingPort *mem, Addr addr,
+ global_statfs *host)
+ {
+ Tru64::copyOutStatfsBuf<Tru64::pre_F64_statfs>(mem, addr, host);
+ }
+ };
+
+ /// Helper function to convert a host stat buffer to an old pre-F64
+ /// (4.x) target stat buffer. Also copies the target buffer out to
+ /// the simulated memory space. Used by pre_F64_stat(),
+ /// pre_F64_fstat(), and pre_F64_lstat().
+ static void
+ copyOutPreF64StatBuf(TranslatingPort *mem, Addr addr, struct stat *host)
+ {
+ using namespace TheISA;
+
+ TypedBufferArg<Tru64::pre_F64_stat> tgt(addr);
+
+ tgt->st_dev = htog(host->st_dev);
+ tgt->st_ino = htog(host->st_ino);
+ tgt->st_mode = htog(host->st_mode);
+ tgt->st_nlink = htog(host->st_nlink);
+ tgt->st_uid = htog(host->st_uid);
+ tgt->st_gid = htog(host->st_gid);
+ tgt->st_rdev = htog(host->st_rdev);
+ tgt->st_size = htog(host->st_size);
+ tgt->st_atimeX = htog(host->st_atime);
+ tgt->st_mtimeX = htog(host->st_mtime);
+ tgt->st_ctimeX = htog(host->st_ctime);
+ tgt->st_blksize = htog(host->st_blksize);
+ tgt->st_blocks = htog(host->st_blocks);
+
+ tgt.copyOut(mem);
+ }
+
+
+ /// The target system's hostname.
+ static const char *hostname;
+
+
+ /// Target getdirentries() handler.
+ static SyscallReturn
+ getdirentriesFunc(SyscallDesc *desc, int callnum, Process *process,
+ ExecContext *xc)
+ {
+ using namespace TheISA;
+
+#ifdef __CYGWIN__
+ panic("getdirent not implemented on cygwin!");
+#else
+ int fd = process->sim_fd(xc->getSyscallArg(0));
+ Addr tgt_buf = xc->getSyscallArg(1);
+ int tgt_nbytes = xc->getSyscallArg(2);
+ Addr tgt_basep = xc->getSyscallArg(3);
+
+ char * const host_buf = new char[tgt_nbytes];
+
+ // just pass basep through uninterpreted.
+ TypedBufferArg<int64_t> basep(tgt_basep);
+ basep.copyIn(xc->getMemPort());
+ long host_basep = (off_t)htog((int64_t)*basep);
+ int host_result = getdirentries(fd, host_buf, tgt_nbytes, &host_basep);
+
+ // check for error
+ if (host_result < 0) {
+ delete [] host_buf;
+ return -errno;
+ }
+
+ // no error: copy results back to target space
+ Addr tgt_buf_ptr = tgt_buf;
+ char *host_buf_ptr = host_buf;
+ char *host_buf_end = host_buf + host_result;
+ while (host_buf_ptr < host_buf_end) {
+ global_dirent *host_dp = (global_dirent *)host_buf_ptr;
+ int namelen = strlen(host_dp->d_name);
+
+ // Actual size includes padded string rounded up for alignment.
+ // Subtract 256 for dummy char array in Tru64::dirent definition.
+ // Add 1 to namelen for terminating null char.
+ int tgt_bufsize = sizeof(Tru64::dirent) - 256 + roundUp(namelen+1, 8);
+ TypedBufferArg<Tru64::dirent> tgt_dp(tgt_buf_ptr, tgt_bufsize);
+ tgt_dp->d_ino = host_dp->d_ino;
+ tgt_dp->d_reclen = tgt_bufsize;
+ tgt_dp->d_namlen = namelen;
+ strcpy(tgt_dp->d_name, host_dp->d_name);
+ tgt_dp.copyOut(xc->getMemPort());
+
+ tgt_buf_ptr += tgt_bufsize;
+ host_buf_ptr += host_dp->d_reclen;
+ }
+
+ delete [] host_buf;
+
+ *basep = htog((int64_t)host_basep);
+ basep.copyOut(xc->getMemPort());
+
+ return tgt_buf_ptr - tgt_buf;
+#endif
+ }
+
+ /// Target sigreturn() handler.
+ static SyscallReturn
+ sigreturnFunc(SyscallDesc *desc, int callnum, Process *process,
+ ExecContext *xc)
+ {
+ using namespace TheISA;
+
+ using TheISA::RegFile;
+ TypedBufferArg<Tru64::sigcontext> sc(xc->getSyscallArg(0));
+
+ sc.copyIn(xc->getMemPort());
+
+ // Restore state from sigcontext structure.
+ // Note that we'll advance PC <- NPC before the end of the cycle,
+ // so we need to restore the desired PC into NPC.
+ // The current regs->pc will get clobbered.
+ xc->setNextPC(htog(sc->sc_pc));
+
+ for (int i = 0; i < 31; ++i) {
+ xc->setIntReg(i, htog(sc->sc_regs[i]));
+ xc->setFloatRegBits(i, htog(sc->sc_fpregs[i]));
+ }
+
+ xc->setMiscReg(TheISA::Fpcr_DepTag, htog(sc->sc_fpcr));
+
+ return 0;
+ }
+
+
+ //
+ // Mach syscalls -- identified by negated syscall numbers
+ //
+
+ /// Create a stack region for a thread.
+ static SyscallReturn
+ stack_createFunc(SyscallDesc *desc, int callnum, Process *process,
+ ExecContext *xc)
+ {
+ using namespace TheISA;
+
+ TypedBufferArg<Tru64::vm_stack> argp(xc->getSyscallArg(0));
+
+ argp.copyIn(xc->getMemPort());
+
+ // if the user chose an address, just let them have it. Otherwise
+ // pick one for them.
+ if (htog(argp->address) == 0) {
+ argp->address = htog(process->next_thread_stack_base);
+ int stack_size = (htog(argp->rsize) + htog(argp->ysize) +
+ htog(argp->gsize));
+ process->next_thread_stack_base -= stack_size;
+ argp.copyOut(xc->getMemPort());
+ }
+
+ return 0;
+ }
+
+ /// NXM library version stamp.
+ static
+ const int NXM_LIB_VERSION = 301003;
+
+ /// This call sets up the interface between the user and kernel
+ /// schedulers by creating a shared-memory region. The shared memory
+ /// region has several structs, some global, some per-RAD, some per-VP.
+ static SyscallReturn
+ nxm_task_initFunc(SyscallDesc *desc, int callnum, Process *process,
+ ExecContext *xc)
+ {
+ using namespace std;
+ using namespace TheISA;
+
+ TypedBufferArg<Tru64::nxm_task_attr> attrp(xc->getSyscallArg(0));
+ TypedBufferArg<Addr> configptr_ptr(xc->getSyscallArg(1));
+
+ attrp.copyIn(xc->getMemPort());
+
+ if (gtoh(attrp->nxm_version) != NXM_LIB_VERSION) {
+ cerr << "nxm_task_init: thread library version mismatch! "
+ << "got " << attrp->nxm_version
+ << ", expected " << NXM_LIB_VERSION << endl;
+ abort();
+ }
+
+ if (gtoh(attrp->flags) != Tru64::NXM_TASK_INIT_VP) {
+ cerr << "nxm_task_init: bad flag value " << attrp->flags
+ << " (expected " << Tru64::NXM_TASK_INIT_VP << ")" << endl;
+ abort();
+ }
+
+ const Addr base_addr = 0x12000; // was 0x3f0000000LL;
+ Addr cur_addr = base_addr; // next addresses to use
+ // first comes the config_info struct
+ Addr config_addr = cur_addr;
+ cur_addr += sizeof(Tru64::nxm_config_info);
+ // next comes the per-cpu state vector
+ Addr slot_state_addr = cur_addr;
+ int slot_state_size =
+ process->numCpus() * sizeof(Tru64::nxm_slot_state_t);
+ cur_addr += slot_state_size;
+ // now the per-RAD state struct (we only support one RAD)
+ cur_addr = 0x14000; // bump up addr for alignment
+ Addr rad_state_addr = cur_addr;
+ int rad_state_size =
+ (sizeof(Tru64::nxm_shared)
+ + (process->numCpus()-1) * sizeof(Tru64::nxm_sched_state));
+ cur_addr += rad_state_size;
+
+ // now initialize a config_info struct and copy it out to user space
+ TypedBufferArg<Tru64::nxm_config_info> config(config_addr);
+
+ config->nxm_nslots_per_rad = htog(process->numCpus());
+ config->nxm_nrads = htog(1); // only one RAD in our system!
+ config->nxm_slot_state = htog(slot_state_addr);
+ config->nxm_rad[0] = htog(rad_state_addr);
+
+ config.copyOut(xc->getMemPort());
+
+ // initialize the slot_state array and copy it out
+ TypedBufferArg<Tru64::nxm_slot_state_t> slot_state(slot_state_addr,
+ slot_state_size);
+ for (int i = 0; i < process->numCpus(); ++i) {
+ // CPU 0 is bound to the calling process; all others are available
+ // XXX this code should have an endian conversion, but I don't think
+ // it works anyway
+ slot_state[i] =
+ (i == 0) ? Tru64::NXM_SLOT_BOUND : Tru64::NXM_SLOT_AVAIL;
+ }
+
+ slot_state.copyOut(xc->getMemPort());
+
+ // same for the per-RAD "shared" struct. Note that we need to
+ // allocate extra bytes for the per-VP array which is embedded at
+ // the end.
+ TypedBufferArg<Tru64::nxm_shared> rad_state(rad_state_addr,
+ rad_state_size);
+
+ rad_state->nxm_callback = attrp->nxm_callback;
+ rad_state->nxm_version = attrp->nxm_version;
+ rad_state->nxm_uniq_offset = attrp->nxm_uniq_offset;
+ for (int i = 0; i < process->numCpus(); ++i) {
+ Tru64::nxm_sched_state *ssp = &rad_state->nxm_ss[i];
+ ssp->nxm_u.sigmask = htog(0);
+ ssp->nxm_u.sig = htog(0);
+ ssp->nxm_u.flags = htog(0);
+ ssp->nxm_u.cancel_state = htog(0);
+ ssp->nxm_u.nxm_ssig = 0;
+ ssp->nxm_bits = htog(0);
+ ssp->nxm_quantum = attrp->nxm_quantum;
+ ssp->nxm_set_quantum = attrp->nxm_quantum;
+ ssp->nxm_sysevent = htog(0);
+
+ if (i == 0) {
+ uint64_t uniq = xc->readMiscReg(TheISA::Uniq_DepTag);
+ ssp->nxm_u.pth_id = htog(uniq + gtoh(attrp->nxm_uniq_offset));
+ ssp->nxm_u.nxm_active = htog(uniq | 1);
+ }
+ else {
+ ssp->nxm_u.pth_id = htog(0);
+ ssp->nxm_u.nxm_active = htog(0);
+ }
+ }
+
+ rad_state.copyOut(xc->getMemPort());
+
+ //
+ // copy pointer to shared config area out to user
+ //
+ *configptr_ptr = htog(config_addr);
+ configptr_ptr.copyOut(xc->getMemPort());
+
+ // Register this as a valid address range with the process
+ process->nxm_start = base_addr;
+ process->nxm_end = cur_addr;
+
+ return 0;
+ }
+
+ /// Initialize execution context.
+ static void
+ init_exec_context(ExecContext *ec,
+ Tru64::nxm_thread_attr *attrp, uint64_t uniq_val)
+ {
+ using namespace TheISA;
+
+ ec->clearArchRegs();
+
+ ec->setIntReg(TheISA::ArgumentReg0, gtoh(attrp->registers.a0));
+ ec->setIntReg(27/*t12*/, gtoh(attrp->registers.pc));
+ ec->setIntReg(TheISA::StackPointerReg, gtoh(attrp->registers.sp));
+ ec->setMiscReg(TheISA::Uniq_DepTag, uniq_val);
+
+ ec->setPC(gtoh(attrp->registers.pc));
+ ec->setNextPC(gtoh(attrp->registers.pc) + sizeof(TheISA::MachInst));
+
+ ec->activate();
+ }
+
+ /// Create thread.
+ static SyscallReturn
+ nxm_thread_createFunc(SyscallDesc *desc, int callnum, Process *process,
+ ExecContext *xc)
+ {
+ using namespace std;
+ using namespace TheISA;
+
+ TypedBufferArg<Tru64::nxm_thread_attr> attrp(xc->getSyscallArg(0));
+ TypedBufferArg<uint64_t> kidp(xc->getSyscallArg(1));
+ int thread_index = xc->getSyscallArg(2);
+
+ // get attribute args
+ attrp.copyIn(xc->getMemPort());
+
+ if (gtoh(attrp->version) != NXM_LIB_VERSION) {
+ cerr << "nxm_thread_create: thread library version mismatch! "
+ << "got " << attrp->version
+ << ", expected " << NXM_LIB_VERSION << endl;
+ abort();
+ }
+
+ if (thread_index < 0 | thread_index > process->numCpus()) {
+ cerr << "nxm_thread_create: bad thread index " << thread_index
+ << endl;
+ abort();
+ }
+
+ // On a real machine, the per-RAD shared structure is in
+ // shared memory, so both the user and kernel can get at it.
+ // We don't have that luxury, so we just copy it in and then
+ // back out again.
+ int rad_state_size =
+ (sizeof(Tru64::nxm_shared) +
+ (process->numCpus()-1) * sizeof(Tru64::nxm_sched_state));
+
+ TypedBufferArg<Tru64::nxm_shared> rad_state(0x14000,
+ rad_state_size);
+ rad_state.copyIn(xc->getMemPort());
+
+ uint64_t uniq_val = gtoh(attrp->pthid) - gtoh(rad_state->nxm_uniq_offset);
+
+ if (gtoh(attrp->type) == Tru64::NXM_TYPE_MANAGER) {
+ // DEC pthreads seems to always create one of these (in
+ // addition to N application threads), but we don't use it,
+ // so don't bother creating it.
+
+ // This is supposed to be a port number. Make something up.
+ *kidp = htog(99);
+ kidp.copyOut(xc->getMemPort());
+
+ return 0;
+ } else if (gtoh(attrp->type) == Tru64::NXM_TYPE_VP) {
+ // A real "virtual processor" kernel thread. Need to fork
+ // this thread on another CPU.
+ Tru64::nxm_sched_state *ssp = &rad_state->nxm_ss[thread_index];
+
+ if (gtoh(ssp->nxm_u.nxm_active) != 0)
+ return (int) Tru64::KERN_NOT_RECEIVER;
+
+ ssp->nxm_u.pth_id = attrp->pthid;
+ ssp->nxm_u.nxm_active = htog(uniq_val | 1);
+
+ rad_state.copyOut(xc->getMemPort());
+
+ Addr slot_state_addr = 0x12000 + sizeof(Tru64::nxm_config_info);
+ int slot_state_size =
+ process->numCpus() * sizeof(Tru64::nxm_slot_state_t);
+
+ TypedBufferArg<Tru64::nxm_slot_state_t>
+ slot_state(slot_state_addr,
+ slot_state_size);
+
+ slot_state.copyIn(xc->getMemPort());
+
+ if (slot_state[thread_index] != Tru64::NXM_SLOT_AVAIL) {
+ cerr << "nxm_thread_createFunc: requested VP slot "
+ << thread_index << " not available!" << endl;
+ fatal("");
+ }
+
+ // XXX This should have an endian conversion but I think this code
+ // doesn't work anyway
+ slot_state[thread_index] = Tru64::NXM_SLOT_BOUND;
+
+ slot_state.copyOut(xc->getMemPort());
+
+ // Find a free simulator execution context.
+ for (int i = 0; i < process->numCpus(); ++i) {
+ ExecContext *xc = process->execContexts[i];
+
++ if (xc->status() == ExecContext::Suspended) {
+ // inactive context... grab it
+ init_exec_context(xc, attrp, uniq_val);
+
+ // This is supposed to be a port number, but we'll try
+ // and get away with just sticking the thread index
+ // here.
+ *kidp = htog(thread_index);
+ kidp.copyOut(xc->getMemPort());
+
+ return 0;
+ }
+ }
+
+ // fell out of loop... no available inactive context
+ cerr << "nxm_thread_create: no idle contexts available." << endl;
+ abort();
+ } else {
+ cerr << "nxm_thread_create: can't handle thread type "
+ << attrp->type << endl;
+ abort();
+ }
+
+ return 0;
+ }
+
+ /// Thread idle call (like yield()).
+ static SyscallReturn
+ nxm_idleFunc(SyscallDesc *desc, int callnum, Process *process,
+ ExecContext *xc)
+ {
+ return 0;
+ }
+
+ /// Block thread.
+ static SyscallReturn
+ nxm_thread_blockFunc(SyscallDesc *desc, int callnum, Process *process,
+ ExecContext *xc)
+ {
+ using namespace std;
+
+ uint64_t tid = xc->getSyscallArg(0);
+ uint64_t secs = xc->getSyscallArg(1);
+ uint64_t flags = xc->getSyscallArg(2);
+ uint64_t action = xc->getSyscallArg(3);
+ uint64_t usecs = xc->getSyscallArg(4);
+
+ cout << xc->getCpuPtr()->name() << ": nxm_thread_block " << tid << " "
+ << secs << " " << flags << " " << action << " " << usecs << endl;
+
+ return 0;
+ }
+
+ /// block.
+ static SyscallReturn
+ nxm_blockFunc(SyscallDesc *desc, int callnum, Process *process,
+ ExecContext *xc)
+ {
+ using namespace std;
+
+ Addr uaddr = xc->getSyscallArg(0);
+ uint64_t val = xc->getSyscallArg(1);
+ uint64_t secs = xc->getSyscallArg(2);
+ uint64_t usecs = xc->getSyscallArg(3);
+ uint64_t flags = xc->getSyscallArg(4);
+
+ BaseCPU *cpu = xc->getCpuPtr();
+
+ cout << cpu->name() << ": nxm_block "
+ << hex << uaddr << dec << " " << val
+ << " " << secs << " " << usecs
+ << " " << flags << endl;
+
+ return 0;
+ }
+
+ /// Unblock thread.
+ static SyscallReturn
+ nxm_unblockFunc(SyscallDesc *desc, int callnum, Process *process,
+ ExecContext *xc)
+ {
+ using namespace std;
+
+ Addr uaddr = xc->getSyscallArg(0);
+
+ cout << xc->getCpuPtr()->name() << ": nxm_unblock "
+ << hex << uaddr << dec << endl;
+
+ return 0;
+ }
+
+ /// Switch thread priority.
+ static SyscallReturn
+ swtch_priFunc(SyscallDesc *desc, int callnum, Process *process,
+ ExecContext *xc)
+ {
+ // Attempts to switch to another runnable thread (if there is
+ // one). Returns false if there are no other threads to run
+ // (i.e., the thread can reasonably spin-wait) or true if there
+ // are other threads.
+ //
+ // Since we assume at most one "kernel" thread per CPU, it's
+ // always safe to return false here.
+ return 0; //false;
+ }
+
+
+ /// Activate exec context waiting on a channel. Just activate one
+ /// by default.
+ static int
+ activate_waiting_context(Addr uaddr, Process *process,
+ bool activate_all = false)
+ {
+ using namespace std;
+
+ int num_activated = 0;
+
+ list<Process::WaitRec>::iterator i = process->waitList.begin();
+ list<Process::WaitRec>::iterator end = process->waitList.end();
+
+ while (i != end && (num_activated == 0 || activate_all)) {
+ if (i->waitChan == uaddr) {
+ // found waiting process: make it active
+ ExecContext *newCtx = i->waitingContext;
+ assert(newCtx->status() == ExecContext::Suspended);
+ newCtx->activate();
+
+ // get rid of this record
+ i = process->waitList.erase(i);
+
+ ++num_activated;
+ } else {
+ ++i;
+ }
+ }
+
+ return num_activated;
+ }
+
+ /// M5 hacked-up lock acquire.
+ static void
+ m5_lock_mutex(Addr uaddr, Process *process, ExecContext *xc)
+ {
+ using namespace TheISA;
+
+ TypedBufferArg<uint64_t> lockp(uaddr);
+
+ lockp.copyIn(xc->getMemPort());
+
+ if (gtoh(*lockp) == 0) {
+ // lock is free: grab it
+ *lockp = htog(1);
+ lockp.copyOut(xc->getMemPort());
+ } else {
+ // lock is busy: disable until free
+ process->waitList.push_back(Process::WaitRec(uaddr, xc));
+ xc->suspend();
+ }
+ }
+
+ /// M5 unlock call.
+ static void
+ m5_unlock_mutex(Addr uaddr, Process *process, ExecContext *xc)
+ {
+ TypedBufferArg<uint64_t> lockp(uaddr);
+
+ lockp.copyIn(xc->getMemPort());
+ assert(*lockp != 0);
+
+ // Check for a process waiting on the lock.
+ int num_waiting = activate_waiting_context(uaddr, process);
+
+ // clear lock field if no waiting context is taking over the lock
+ if (num_waiting == 0) {
+ *lockp = 0;
+ lockp.copyOut(xc->getMemPort());
+ }
+ }
+
+ /// Lock acquire syscall handler.
+ static SyscallReturn
+ m5_mutex_lockFunc(SyscallDesc *desc, int callnum, Process *process,
+ ExecContext *xc)
+ {
+ Addr uaddr = xc->getSyscallArg(0);
+
+ m5_lock_mutex(uaddr, process, xc);
+
+ // Return 0 since we will always return to the user with the lock
+ // acquired. We will just keep the context inactive until that is
+ // true.
+ return 0;
+ }
+
+ /// Try lock (non-blocking).
+ static SyscallReturn
+ m5_mutex_trylockFunc(SyscallDesc *desc, int callnum, Process *process,
+ ExecContext *xc)
+ {
+ using namespace TheISA;
+
+ Addr uaddr = xc->getSyscallArg(0);
+ TypedBufferArg<uint64_t> lockp(uaddr);
+
+ lockp.copyIn(xc->getMemPort());
+
+ if (gtoh(*lockp) == 0) {
+ // lock is free: grab it
+ *lockp = htog(1);
+ lockp.copyOut(xc->getMemPort());
+ return 0;
+ } else {
+ return 1;
+ }
+ }
+
+ /// Unlock syscall handler.
+ static SyscallReturn
+ m5_mutex_unlockFunc(SyscallDesc *desc, int callnum, Process *process,
+ ExecContext *xc)
+ {
+ Addr uaddr = xc->getSyscallArg(0);
+
+ m5_unlock_mutex(uaddr, process, xc);
+
+ return 0;
+ }
+
+ /// Signal ocndition.
+ static SyscallReturn
+ m5_cond_signalFunc(SyscallDesc *desc, int callnum, Process *process,
+ ExecContext *xc)
+ {
+ Addr cond_addr = xc->getSyscallArg(0);
+
+ // Wake up one process waiting on the condition variable.
+ activate_waiting_context(cond_addr, process);
+
+ return 0;
+ }
+
+ /// Wake up all processes waiting on the condition variable.
+ static SyscallReturn
+ m5_cond_broadcastFunc(SyscallDesc *desc, int callnum, Process *process,
+ ExecContext *xc)
+ {
+ Addr cond_addr = xc->getSyscallArg(0);
+
+ activate_waiting_context(cond_addr, process, true);
+
+ return 0;
+ }
+
+ /// Wait on a condition.
+ static SyscallReturn
+ m5_cond_waitFunc(SyscallDesc *desc, int callnum, Process *process,
+ ExecContext *xc)
+ {
+ using namespace TheISA;
+
+ Addr cond_addr = xc->getSyscallArg(0);
+ Addr lock_addr = xc->getSyscallArg(1);
+ TypedBufferArg<uint64_t> condp(cond_addr);
+ TypedBufferArg<uint64_t> lockp(lock_addr);
+
+ // user is supposed to acquire lock before entering
+ lockp.copyIn(xc->getMemPort());
+ assert(gtoh(*lockp) != 0);
+
+ m5_unlock_mutex(lock_addr, process, xc);
+
+ process->waitList.push_back(Process::WaitRec(cond_addr, xc));
+ xc->suspend();
+
+ return 0;
+ }
+
+ /// Thread exit.
+ static SyscallReturn
+ m5_thread_exitFunc(SyscallDesc *desc, int callnum, Process *process,
+ ExecContext *xc)
+ {
+ assert(xc->status() == ExecContext::Active);
+ xc->deallocate();
+
+ return 0;
+ }
+
+ /// Indirect syscall invocation (call #0).
+ static SyscallReturn
+ indirectSyscallFunc(SyscallDesc *desc, int callnum, Process *process,
+ ExecContext *xc)
+ {
+ int new_callnum = xc->getSyscallArg(0);
+ LiveProcess *lp = dynamic_cast<LiveProcess*>(process);
+ assert(lp);
+
+ for (int i = 0; i < 5; ++i)
+ xc->setSyscallArg(i, xc->getSyscallArg(i+1));
+
+
+ SyscallDesc *new_desc = lp->getDesc(new_callnum);
+ if (desc == NULL)
+ fatal("Syscall %d out of range", callnum);
+
+ new_desc->doSyscall(new_callnum, process, xc);
+
+ return 0;
+ }
+
+}; // class Tru64
+
+
+#endif // FULL_SYSTEM
+
+#endif // __TRU64_HH__
--- /dev/null
-
+from m5 import *
+from BaseCPU import BaseCPU
+
+class DerivAlphaFullCPU(BaseCPU):
+ type = 'DerivAlphaFullCPU'
- local_predictor_size = Param.Unsigned("Size of local predictor")
- local_ctr_bits = Param.Unsigned("Bits per counter")
- local_history_table_size = Param.Unsigned("Size of local history table")
- local_history_bits = Param.Unsigned("Bits for the local history")
- global_predictor_size = Param.Unsigned("Size of global predictor")
- global_ctr_bits = Param.Unsigned("Bits per counter")
- global_history_bits = Param.Unsigned("Bits of history")
- choice_predictor_size = Param.Unsigned("Size of choice predictor")
- choice_ctr_bits = Param.Unsigned("Bits of choice counters")
++ activity = Param.Unsigned("Initial count")
+ numThreads = Param.Unsigned("number of HW thread contexts")
+
+ if not build_env['FULL_SYSTEM']:
+ mem = Param.FunctionalMemory(NULL, "memory")
+
++ checker = Param.BaseCPU(NULL, "checker")
++
++ cachePorts = Param.Unsigned("Cache Ports")
++
+ decodeToFetchDelay = Param.Unsigned("Decode to fetch delay")
+ renameToFetchDelay = Param.Unsigned("Rename to fetch delay")
+ iewToFetchDelay = Param.Unsigned("Issue/Execute/Writeback to fetch "
+ "delay")
+ commitToFetchDelay = Param.Unsigned("Commit to fetch delay")
+ fetchWidth = Param.Unsigned("Fetch width")
+
+ renameToDecodeDelay = Param.Unsigned("Rename to decode delay")
+ iewToDecodeDelay = Param.Unsigned("Issue/Execute/Writeback to decode "
+ "delay")
+ commitToDecodeDelay = Param.Unsigned("Commit to decode delay")
+ fetchToDecodeDelay = Param.Unsigned("Fetch to decode delay")
+ decodeWidth = Param.Unsigned("Decode width")
+
+ iewToRenameDelay = Param.Unsigned("Issue/Execute/Writeback to rename "
+ "delay")
+ commitToRenameDelay = Param.Unsigned("Commit to rename delay")
+ decodeToRenameDelay = Param.Unsigned("Decode to rename delay")
+ renameWidth = Param.Unsigned("Rename width")
+
+ commitToIEWDelay = Param.Unsigned("Commit to "
+ "Issue/Execute/Writeback delay")
+ renameToIEWDelay = Param.Unsigned("Rename to "
+ "Issue/Execute/Writeback delay")
+ issueToExecuteDelay = Param.Unsigned("Issue to execute delay (internal "
+ "to the IEW stage)")
+ issueWidth = Param.Unsigned("Issue width")
+ executeWidth = Param.Unsigned("Execute width")
+ executeIntWidth = Param.Unsigned("Integer execute width")
+ executeFloatWidth = Param.Unsigned("Floating point execute width")
+ executeBranchWidth = Param.Unsigned("Branch execute width")
+ executeMemoryWidth = Param.Unsigned("Memory execute width")
++ fuPool = Param.FUPool(NULL, "Functional Unit pool")
+
+ iewToCommitDelay = Param.Unsigned("Issue/Execute/Writeback to commit "
+ "delay")
+ renameToROBDelay = Param.Unsigned("Rename to reorder buffer delay")
+ commitWidth = Param.Unsigned("Commit width")
+ squashWidth = Param.Unsigned("Squash width")
++ trapLatency = Param.Tick("Trap latency")
++ fetchTrapLatency = Param.Tick("Fetch trap latency")
+
++ localPredictorSize = Param.Unsigned("Size of local predictor")
++ localCtrBits = Param.Unsigned("Bits per counter")
++ localHistoryTableSize = Param.Unsigned("Size of local history table")
++ localHistoryBits = Param.Unsigned("Bits for the local history")
++ globalPredictorSize = Param.Unsigned("Size of global predictor")
++ globalCtrBits = Param.Unsigned("Bits per counter")
++ globalHistoryBits = Param.Unsigned("Bits of history")
++ choicePredictorSize = Param.Unsigned("Size of choice predictor")
++ choiceCtrBits = Param.Unsigned("Bits of choice counters")
+
+ BTBEntries = Param.Unsigned("Number of BTB entries")
+ BTBTagSize = Param.Unsigned("Size of the BTB tags, in bits")
+
+ RASSize = Param.Unsigned("RAS size")
+
+ LQEntries = Param.Unsigned("Number of load queue entries")
+ SQEntries = Param.Unsigned("Number of store queue entries")
+ LFSTSize = Param.Unsigned("Last fetched store table size")
+ SSITSize = Param.Unsigned("Store set ID table size")
+
++ numRobs = Param.Unsigned("Number of Reorder Buffers");
++
+ numPhysIntRegs = Param.Unsigned("Number of physical integer registers")
+ numPhysFloatRegs = Param.Unsigned("Number of physical floating point "
+ "registers")
+ numIQEntries = Param.Unsigned("Number of instruction queue entries")
+ numROBEntries = Param.Unsigned("Number of reorder buffer entries")
+
+ instShiftAmt = Param.Unsigned("Number of bits to shift instructions by")
+
+ function_trace = Param.Bool(False, "Enable function trace")
+ function_trace_start = Param.Tick(0, "Cycle to start function trace")
++
++ smtNumFetchingThreads = Param.Unsigned("SMT Number of Fetching Threads")
++ smtFetchPolicy = Param.String("SMT Fetch policy")
++ smtLSQPolicy = Param.String("SMT LSQ Sharing Policy")
++ smtLSQThreshold = Param.String("SMT LSQ Threshold Sharing Parameter")
++ smtIQPolicy = Param.String("SMT IQ Sharing Policy")
++ smtIQThreshold = Param.String("SMT IQ Threshold Sharing Parameter")
++ smtROBPolicy = Param.String("SMT ROB Sharing Policy")
++ smtROBThreshold = Param.String("SMT ROB Threshold Sharing Parameter")
++ smtCommitPolicy = Param.String("SMT Commit Policy")
--- /dev/null
- xc->getCpuPtr()->kernelStats->arm();
+/*
+ * Copyright (c) 2003-2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#include <string>
+
+#include "sim/pseudo_inst.hh"
+#include "arch/vtophys.hh"
+#include "cpu/base.hh"
+#include "cpu/sampler/sampler.hh"
+#include "cpu/exec_context.hh"
++#include "cpu/quiesce_event.hh"
+#include "kern/kernel_stats.hh"
+#include "sim/param.hh"
+#include "sim/serialize.hh"
+#include "sim/sim_exit.hh"
+#include "sim/stat_control.hh"
+#include "sim/stats.hh"
+#include "sim/system.hh"
+#include "sim/debug.hh"
+#include "sim/vptr.hh"
+
+using namespace std;
+
+extern Sampler *SampCPU;
+
+using namespace Stats;
+using namespace TheISA;
+
+namespace AlphaPseudo
+{
+ bool doStatisticsInsts;
+ bool doCheckpointInsts;
+ bool doQuiesce;
+
+ void
+ arm(ExecContext *xc)
+ {
- xc->getCpuPtr()->kernelStats->quiesce();
++ if (xc->getKernelStats())
++ xc->getKernelStats()->arm();
+ }
+
+ void
+ quiesce(ExecContext *xc)
+ {
+ if (!doQuiesce)
+ return;
+
+ xc->suspend();
- Event *quiesceEvent = xc->getQuiesceEvent();
++ if (xc->getKernelStats())
++ xc->getKernelStats()->quiesce();
+ }
+
+ void
+ quiesceNs(ExecContext *xc, uint64_t ns)
+ {
+ if (!doQuiesce || ns == 0)
+ return;
+
- xc->getCpuPtr()->kernelStats->quiesce();
++ EndQuiesceEvent *quiesceEvent = xc->getQuiesceEvent();
+
+ if (quiesceEvent->scheduled())
+ quiesceEvent->reschedule(curTick + Clock::Int::ns * ns);
+ else
+ quiesceEvent->schedule(curTick + Clock::Int::ns * ns);
+
+ xc->suspend();
- Event *quiesceEvent = xc->getQuiesceEvent();
++ if (xc->getKernelStats())
++ xc->getKernelStats()->quiesce();
+ }
+
+ void
+ quiesceCycles(ExecContext *xc, uint64_t cycles)
+ {
+ if (!doQuiesce || cycles == 0)
+ return;
+
- xc->getCpuPtr()->kernelStats->quiesce();
++ EndQuiesceEvent *quiesceEvent = xc->getQuiesceEvent();
+
+ if (quiesceEvent->scheduled())
+ quiesceEvent->reschedule(curTick +
+ xc->getCpuPtr()->cycles(cycles));
+ else
+ quiesceEvent->schedule(curTick +
+ xc->getCpuPtr()->cycles(cycles));
+
+ xc->suspend();
- xc->getCpuPtr()->kernelStats->ivlb();
++ if (xc->getKernelStats())
++ xc->getKernelStats()->quiesce();
+ }
+
+ uint64_t
+ quiesceTime(ExecContext *xc)
+ {
+ return (xc->readLastActivate() - xc->readLastSuspend()) / Clock::Int::ns;
+ }
+
+ void
+ ivlb(ExecContext *xc)
+ {
++ if (xc->getKernelStats())
++ xc->getKernelStats()->ivlb();
+ }
+
+ void
+ ivle(ExecContext *xc)
+ {
+ }
+
+ void
+ m5exit_old(ExecContext *xc)
+ {
+ SimExit(curTick, "m5_exit_old instruction encountered");
+ }
+
+ void
+ m5exit(ExecContext *xc, Tick delay)
+ {
+ Tick when = curTick + delay * Clock::Int::ns;
+ SimExit(when, "m5_exit instruction encountered");
+ }
+
+ void
+ resetstats(ExecContext *xc, Tick delay, Tick period)
+ {
+ if (!doStatisticsInsts)
+ return;
+
+
+ Tick when = curTick + delay * Clock::Int::ns;
+ Tick repeat = period * Clock::Int::ns;
+
+ using namespace Stats;
+ SetupEvent(Reset, when, repeat);
+ }
+
+ void
+ dumpstats(ExecContext *xc, Tick delay, Tick period)
+ {
+ if (!doStatisticsInsts)
+ return;
+
+
+ Tick when = curTick + delay * Clock::Int::ns;
+ Tick repeat = period * Clock::Int::ns;
+
+ using namespace Stats;
+ SetupEvent(Dump, when, repeat);
+ }
+
+ void
+ addsymbol(ExecContext *xc, Addr addr, Addr symbolAddr)
+ {
+ char symb[100];
+ CopyStringOut(xc, symb, symbolAddr, 100);
+ std::string symbol(symb);
+
+ DPRINTF(Loader, "Loaded symbol: %s @ %#llx\n", symbol, addr);
+
+ xc->getSystemPtr()->kernelSymtab->insert(addr,symbol);
+ }
+
+ void
+ dumpresetstats(ExecContext *xc, Tick delay, Tick period)
+ {
+ if (!doStatisticsInsts)
+ return;
+
+
+ Tick when = curTick + delay * Clock::Int::ns;
+ Tick repeat = period * Clock::Int::ns;
+
+ using namespace Stats;
+ SetupEvent(Dump|Reset, when, repeat);
+ }
+
+ void
+ m5checkpoint(ExecContext *xc, Tick delay, Tick period)
+ {
+ if (!doCheckpointInsts)
+ return;
+
+
+ Tick when = curTick + delay * Clock::Int::ns;
+ Tick repeat = period * Clock::Int::ns;
+
+ Checkpoint::setup(when, repeat);
+ }
+
+ uint64_t
+ readfile(ExecContext *xc, Addr vaddr, uint64_t len, uint64_t offset)
+ {
+ const string &file = xc->getCpuPtr()->system->params()->readfile;
+ if (file.empty()) {
+ return ULL(0);
+ }
+
+ uint64_t result = 0;
+
+ int fd = ::open(file.c_str(), O_RDONLY, 0);
+ if (fd < 0)
+ panic("could not open file %s\n", file);
+
+ if (::lseek(fd, offset, SEEK_SET) < 0)
+ panic("could not seek: %s", strerror(errno));
+
+ char *buf = new char[len];
+ char *p = buf;
+ while (len > 0) {
+ int bytes = ::read(fd, p, len);
+ if (bytes <= 0)
+ break;
+
+ p += bytes;
+ result += bytes;
+ len -= bytes;
+ }
+
+ close(fd);
+ CopyIn(xc, vaddr, buf, result);
+ delete [] buf;
+ return result;
+ }
+
+ class Context : public ParamContext
+ {
+ public:
+ Context(const string §ion) : ParamContext(section) {}
+ void checkParams();
+ };
+
+ Context context("pseudo_inst");
+
+ Param<bool> __quiesce(&context, "quiesce",
+ "enable quiesce instructions",
+ true);
+ Param<bool> __statistics(&context, "statistics",
+ "enable statistics pseudo instructions",
+ true);
+ Param<bool> __checkpoint(&context, "checkpoint",
+ "enable checkpoint pseudo instructions",
+ true);
+
+ void
+ Context::checkParams()
+ {
+ doQuiesce = __quiesce;
+ doStatisticsInsts = __statistics;
+ doCheckpointInsts = __checkpoint;
+ }
+
+ void debugbreak(ExecContext *xc)
+ {
+ debug_break();
+ }
+
+ void switchcpu(ExecContext *xc)
+ {
+ if (SampCPU)
+ SampCPU->switchCPUs();
+ }
+}