Merge ktlim@zamp:./local/clean/o3-merge/m5

author Kevin Lim <ktlim@umich.edu>

Sun, 1 Oct 2006 03:43:23 +0000 (23:43 -0400)

committer Kevin Lim <ktlim@umich.edu>

Sun, 1 Oct 2006 03:43:23 +0000 (23:43 -0400)
author Kevin Lim <ktlim@umich.edu>
Sun, 1 Oct 2006 03:43:23 +0000 (23:43 -0400)
committer Kevin Lim <ktlim@umich.edu>
Sun, 1 Oct 2006 03:43:23 +0000 (23:43 -0400)
diff --cc src/arch/alpha/ev5.cc

index 796ed07de424b4dcb079f99aaa05d9953e739221,0000000000000000000000000000000000000000..7595423c38cb4a912b491082b0e4655a6f7b139a

mode 100644,000000..100644
--- 1/src/arch/alpha/ev5.cc
--- /dev/null
+++ b/src/arch/alpha/ev5.cc
@@@ -1,592 -1,0 +1,594 @@@
+ +/*
+ + * Copyright (c) 2002-2005 The Regents of The University of Michigan
+ + * All rights reserved.
+ + *
+ + * Redistribution and use in source and binary forms, with or without
+ + * modification, are permitted provided that the following conditions are
+ + * met: redistributions of source code must retain the above copyright
+ + * notice, this list of conditions and the following disclaimer;
+ + * redistributions in binary form must reproduce the above copyright
+ + * notice, this list of conditions and the following disclaimer in the
+ + * documentation and/or other materials provided with the distribution;
+ + * neither the name of the copyright holders nor the names of its
+ + * contributors may be used to endorse or promote products derived from
+ + * this software without specific prior written permission.
+ + *
+ + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ + *
+ + * Authors: Steve Reinhardt
+ + *          Nathan Binkert
+ + */
+ +
+ +#include "arch/alpha/faults.hh"
+ +#include "arch/alpha/isa_traits.hh"
+ +#include "arch/alpha/osfpal.hh"
+ +#include "arch/alpha/tlb.hh"
+ +#include "base/kgdb.h"
+ +#include "base/remote_gdb.hh"
+ +#include "base/stats/events.hh"
+ +#include "config/full_system.hh"
+ +#include "cpu/base.hh"
+ +#include "cpu/simple_thread.hh"
+ +#include "cpu/thread_context.hh"
+ +#include "kern/kernel_stats.hh"
+ +#include "sim/debug.hh"
+ +#include "sim/sim_exit.hh"
+ +
+ +#if FULL_SYSTEM
+ +
+ +using namespace EV5;
+ +
+ +////////////////////////////////////////////////////////////////////////
+ +//
+ +//  Machine dependent functions
+ +//
+ +void
+ +AlphaISA::initCPU(ThreadContext *tc, int cpuId)
+ +{
+ +    initIPRs(tc, cpuId);
+ +
+ +    tc->setIntReg(16, cpuId);
+ +    tc->setIntReg(0, cpuId);
+ +
+ +    AlphaFault *reset = new ResetFault;
+ +
+ +    tc->setPC(tc->readMiscReg(IPR_PAL_BASE) + reset->vect());
+ +    tc->setNextPC(tc->readPC() + sizeof(MachInst));
+ +
+ +    delete reset;
+ +}
+ +
+ +////////////////////////////////////////////////////////////////////////
+ +//
+ +//
+ +//
+ +void
+ +AlphaISA::initIPRs(ThreadContext *tc, int cpuId)
+ +{
+ +    for (int i = 0; i < NumInternalProcRegs; ++i) {
+ +        tc->setMiscReg(i, 0);
+ +    }
+ +
+ +    tc->setMiscReg(IPR_PAL_BASE, PalBase);
+ +    tc->setMiscReg(IPR_MCSR, 0x6);
+ +    tc->setMiscReg(IPR_PALtemp16, cpuId);
+ +}
+ +
+ +
+ +template <class CPU>
+ +void
+ +AlphaISA::processInterrupts(CPU *cpu)
+ +{
+ +    //Check if there are any outstanding interrupts
+ +    //Handle the interrupts
+ +    int ipl = 0;
+ +    int summary = 0;
+ +
+ +    cpu->checkInterrupts = false;
+ +
+ +    if (cpu->readMiscReg(IPR_ASTRR))
+ +        panic("asynchronous traps not implemented\n");
+ +
+ +    if (cpu->readMiscReg(IPR_SIRR)) {
+ +        for (int i = INTLEVEL_SOFTWARE_MIN;
+ +             i < INTLEVEL_SOFTWARE_MAX; i++) {
+ +            if (cpu->readMiscReg(IPR_SIRR) & (ULL(1) << i)) {
+ +                // See table 4-19 of the 21164 hardware reference
+ +                ipl = (i - INTLEVEL_SOFTWARE_MIN) + 1;
+ +                summary |= (ULL(1) << i);
+ +            }
+ +        }
+ +    }
+ +
+ +    uint64_t interrupts = cpu->intr_status();
+ +
+ +    if (interrupts) {
+ +        for (int i = INTLEVEL_EXTERNAL_MIN;
+ +             i < INTLEVEL_EXTERNAL_MAX; i++) {
+ +            if (interrupts & (ULL(1) << i)) {
+ +                // See table 4-19 of the 21164 hardware reference
+ +                ipl = i;
+ +                summary |= (ULL(1) << i);
+ +            }
+ +        }
+ +    }
+ +
+ +    if (ipl && ipl > cpu->readMiscReg(IPR_IPLR)) {
+ +        cpu->setMiscReg(IPR_ISR, summary);
+ +        cpu->setMiscReg(IPR_INTID, ipl);
+ +        cpu->trap(new InterruptFault);
+ +        DPRINTF(Flow, "Interrupt! IPLR=%d ipl=%d summary=%x\n",
+ +                cpu->readMiscReg(IPR_IPLR), ipl, summary);
+ +    }
+ +
+ +}
+ +
+ +template <class CPU>
+ +void
+ +AlphaISA::zeroRegisters(CPU *cpu)
+ +{
+ +    // Insure ISA semantics
+ +    // (no longer very clean due to the change in setIntReg() in the
+ +    // cpu model.  Consider changing later.)
+ +    cpu->thread->setIntReg(ZeroReg, 0);
+ +    cpu->thread->setFloatReg(ZeroReg, 0.0);
+ +}
+ +
+ +Fault
+ +SimpleThread::hwrei()
+ +{
+ +    if (!inPalMode())
+ +        return new UnimplementedOpcodeFault;
+ +
+ +    setNextPC(readMiscReg(AlphaISA::IPR_EXC_ADDR));
+ +
+ +    if (!misspeculating()) {
+ +        if (kernelStats)
+ +            kernelStats->hwrei();
+ +
+ +        cpu->checkInterrupts = true;
+ +    }
+ +
+ +    // FIXME: XXX check for interrupts? XXX
+ +    return NoFault;
+ +}
+ +
+ +int
+ +AlphaISA::MiscRegFile::getInstAsid()
+ +{
+ +    return EV5::ITB_ASN_ASN(ipr[IPR_ITB_ASN]);
+ +}
+ +
+ +int
+ +AlphaISA::MiscRegFile::getDataAsid()
+ +{
+ +    return EV5::DTB_ASN_ASN(ipr[IPR_DTB_ASN]);
+ +}
+ +
+ +AlphaISA::MiscReg
+ +AlphaISA::MiscRegFile::readIpr(int idx, Fault &fault, ThreadContext *tc)
+ +{
+ +    uint64_t retval = 0;      // return value, default 0
+ +
+ +    switch (idx) {
+ +      case AlphaISA::IPR_PALtemp0:
+ +      case AlphaISA::IPR_PALtemp1:
+ +      case AlphaISA::IPR_PALtemp2:
+ +      case AlphaISA::IPR_PALtemp3:
+ +      case AlphaISA::IPR_PALtemp4:
+ +      case AlphaISA::IPR_PALtemp5:
+ +      case AlphaISA::IPR_PALtemp6:
+ +      case AlphaISA::IPR_PALtemp7:
+ +      case AlphaISA::IPR_PALtemp8:
+ +      case AlphaISA::IPR_PALtemp9:
+ +      case AlphaISA::IPR_PALtemp10:
+ +      case AlphaISA::IPR_PALtemp11:
+ +      case AlphaISA::IPR_PALtemp12:
+ +      case AlphaISA::IPR_PALtemp13:
+ +      case AlphaISA::IPR_PALtemp14:
+ +      case AlphaISA::IPR_PALtemp15:
+ +      case AlphaISA::IPR_PALtemp16:
+ +      case AlphaISA::IPR_PALtemp17:
+ +      case AlphaISA::IPR_PALtemp18:
+ +      case AlphaISA::IPR_PALtemp19:
+ +      case AlphaISA::IPR_PALtemp20:
+ +      case AlphaISA::IPR_PALtemp21:
+ +      case AlphaISA::IPR_PALtemp22:
+ +      case AlphaISA::IPR_PALtemp23:
+ +      case AlphaISA::IPR_PAL_BASE:
+ +
+ +      case AlphaISA::IPR_IVPTBR:
+ +      case AlphaISA::IPR_DC_MODE:
+ +      case AlphaISA::IPR_MAF_MODE:
+ +      case AlphaISA::IPR_ISR:
+ +      case AlphaISA::IPR_EXC_ADDR:
+ +      case AlphaISA::IPR_IC_PERR_STAT:
+ +      case AlphaISA::IPR_DC_PERR_STAT:
+ +      case AlphaISA::IPR_MCSR:
+ +      case AlphaISA::IPR_ASTRR:
+ +      case AlphaISA::IPR_ASTER:
+ +      case AlphaISA::IPR_SIRR:
+ +      case AlphaISA::IPR_ICSR:
+ +      case AlphaISA::IPR_ICM:
+ +      case AlphaISA::IPR_DTB_CM:
+ +      case AlphaISA::IPR_IPLR:
+ +      case AlphaISA::IPR_INTID:
+ +      case AlphaISA::IPR_PMCTR:
+ +        // no side-effect
+ +        retval = ipr[idx];
+ +        break;
+ +
+ +      case AlphaISA::IPR_CC:
+ +        retval |= ipr[idx] & ULL(0xffffffff00000000);
+ +        retval |= tc->getCpuPtr()->curCycle()  & ULL(0x00000000ffffffff);
+ +        break;
+ +
+ +      case AlphaISA::IPR_VA:
+ +        retval = ipr[idx];
+ +        break;
+ +
+ +      case AlphaISA::IPR_VA_FORM:
+ +      case AlphaISA::IPR_MM_STAT:
+ +      case AlphaISA::IPR_IFAULT_VA_FORM:
+ +      case AlphaISA::IPR_EXC_MASK:
+ +      case AlphaISA::IPR_EXC_SUM:
+ +        retval = ipr[idx];
+ +        break;
+ +
+ +      case AlphaISA::IPR_DTB_PTE:
+ +        {
+ +            AlphaISA::PTE &pte = tc->getDTBPtr()->index(!tc->misspeculating());
+ +
+ +            retval |= ((u_int64_t)pte.ppn & ULL(0x7ffffff)) << 32;
+ +            retval |= ((u_int64_t)pte.xre & ULL(0xf)) << 8;
+ +            retval |= ((u_int64_t)pte.xwe & ULL(0xf)) << 12;
+ +            retval |= ((u_int64_t)pte.fonr & ULL(0x1)) << 1;
+ +            retval |= ((u_int64_t)pte.fonw & ULL(0x1))<< 2;
+ +            retval |= ((u_int64_t)pte.asma & ULL(0x1)) << 4;
+ +            retval |= ((u_int64_t)pte.asn & ULL(0x7f)) << 57;
+ +        }
+ +        break;
+ +
+ +        // write only registers
+ +      case AlphaISA::IPR_HWINT_CLR:
+ +      case AlphaISA::IPR_SL_XMIT:
+ +      case AlphaISA::IPR_DC_FLUSH:
+ +      case AlphaISA::IPR_IC_FLUSH:
+ +      case AlphaISA::IPR_ALT_MODE:
+ +      case AlphaISA::IPR_DTB_IA:
+ +      case AlphaISA::IPR_DTB_IAP:
+ +      case AlphaISA::IPR_ITB_IA:
+ +      case AlphaISA::IPR_ITB_IAP:
+ +        fault = new UnimplementedOpcodeFault;
+ +        break;
+ +
+ +      default:
+ +        // invalid IPR
+ +        fault = new UnimplementedOpcodeFault;
+ +        break;
+ +    }
+ +
+ +    return retval;
+ +}
+ +
+ +#ifdef DEBUG
+ +// Cause the simulator to break when changing to the following IPL
+ +int break_ipl = -1;
+ +#endif
+ +
+ +Fault
+ +AlphaISA::MiscRegFile::setIpr(int idx, uint64_t val, ThreadContext *tc)
+ +{
+ +    uint64_t old;
+ +
+ +    if (tc->misspeculating())
+ +        return NoFault;
+ +
+ +    switch (idx) {
+ +      case AlphaISA::IPR_PALtemp0:
+ +      case AlphaISA::IPR_PALtemp1:
+ +      case AlphaISA::IPR_PALtemp2:
+ +      case AlphaISA::IPR_PALtemp3:
+ +      case AlphaISA::IPR_PALtemp4:
+ +      case AlphaISA::IPR_PALtemp5:
+ +      case AlphaISA::IPR_PALtemp6:
+ +      case AlphaISA::IPR_PALtemp7:
+ +      case AlphaISA::IPR_PALtemp8:
+ +      case AlphaISA::IPR_PALtemp9:
+ +      case AlphaISA::IPR_PALtemp10:
+ +      case AlphaISA::IPR_PALtemp11:
+ +      case AlphaISA::IPR_PALtemp12:
+ +      case AlphaISA::IPR_PALtemp13:
+ +      case AlphaISA::IPR_PALtemp14:
+ +      case AlphaISA::IPR_PALtemp15:
+ +      case AlphaISA::IPR_PALtemp16:
+ +      case AlphaISA::IPR_PALtemp17:
+ +      case AlphaISA::IPR_PALtemp18:
+ +      case AlphaISA::IPR_PALtemp19:
+ +      case AlphaISA::IPR_PALtemp20:
+ +      case AlphaISA::IPR_PALtemp21:
+ +      case AlphaISA::IPR_PALtemp22:
+ +      case AlphaISA::IPR_PAL_BASE:
+ +      case AlphaISA::IPR_IC_PERR_STAT:
+ +      case AlphaISA::IPR_DC_PERR_STAT:
+ +      case AlphaISA::IPR_PMCTR:
+ +        // write entire quad w/ no side-effect
+ +        ipr[idx] = val;
+ +        break;
+ +
+ +      case AlphaISA::IPR_CC_CTL:
+ +        // This IPR resets the cycle counter.  We assume this only
+ +        // happens once... let's verify that.
+ +        assert(ipr[idx] == 0);
+ +        ipr[idx] = 1;
+ +        break;
+ +
+ +      case AlphaISA::IPR_CC:
+ +        // This IPR only writes the upper 64 bits.  It's ok to write
+ +        // all 64 here since we mask out the lower 32 in rpcc (see
+ +        // isa_desc).
+ +        ipr[idx] = val;
+ +        break;
+ +
+ +      case AlphaISA::IPR_PALtemp23:
+ +        // write entire quad w/ no side-effect
+ +        old = ipr[idx];
+ +        ipr[idx] = val;
+ +        if (tc->getKernelStats())
+ +            tc->getKernelStats()->context(old, val, tc);
+ +        break;
+ +
+ +      case AlphaISA::IPR_DTB_PTE:
+ +        // write entire quad w/ no side-effect, tag is forthcoming
+ +        ipr[idx] = val;
+ +        break;
+ +
+ +      case AlphaISA::IPR_EXC_ADDR:
+ +        // second least significant bit in PC is always zero
+ +        ipr[idx] = val & ~2;
+ +        break;
+ +
+ +      case AlphaISA::IPR_ASTRR:
+ +      case AlphaISA::IPR_ASTER:
+ +        // only write least significant four bits - privilege mask
+ +        ipr[idx] = val & 0xf;
+ +        break;
+ +
+ +      case AlphaISA::IPR_IPLR:
+ +#ifdef DEBUG
+ +        if (break_ipl != -1 && break_ipl == (val & 0x1f))
+ +            debug_break();
+ +#endif
+ +
+ +        // only write least significant five bits - interrupt level
+ +        ipr[idx] = val & 0x1f;
+ +        if (tc->getKernelStats())
+ +            tc->getKernelStats()->swpipl(ipr[idx]);
+ +        break;
+ +
+ +      case AlphaISA::IPR_DTB_CM:
+ +        if (val & 0x18) {
+ +            if (tc->getKernelStats())
+ +                tc->getKernelStats()->mode(Kernel::user, tc);
+ +        } else {
+ +            if (tc->getKernelStats())
+ +                tc->getKernelStats()->mode(Kernel::kernel, tc);
+ +        }
+ +
+ +      case AlphaISA::IPR_ICM:
+ +        // only write two mode bits - processor mode
+ +        ipr[idx] = val & 0x18;
+ +        break;
+ +
+ +      case AlphaISA::IPR_ALT_MODE:
+ +        // only write two mode bits - processor mode
+ +        ipr[idx] = val & 0x18;
+ +        break;
+ +
+ +      case AlphaISA::IPR_MCSR:
+ +        // more here after optimization...
+ +        ipr[idx] = val;
+ +        break;
+ +
+ +      case AlphaISA::IPR_SIRR:
+ +        // only write software interrupt mask
+ +        ipr[idx] = val & 0x7fff0;
+ +        break;
+ +
+ +      case AlphaISA::IPR_ICSR:
+ +        ipr[idx] = val & ULL(0xffffff0300);
+ +        break;
+ +
+ +      case AlphaISA::IPR_IVPTBR:
+ +      case AlphaISA::IPR_MVPTBR:
+ +        ipr[idx] = val & ULL(0xffffffffc0000000);
+ +        break;
+ +
+ +      case AlphaISA::IPR_DC_TEST_CTL:
+ +        ipr[idx] = val & 0x1ffb;
+ +        break;
+ +
+ +      case AlphaISA::IPR_DC_MODE:
+ +      case AlphaISA::IPR_MAF_MODE:
+ +        ipr[idx] = val & 0x3f;
+ +        break;
+ +
+ +      case AlphaISA::IPR_ITB_ASN:
+ +        ipr[idx] = val & 0x7f0;
+ +        break;
+ +
+ +      case AlphaISA::IPR_DTB_ASN:
+ +        ipr[idx] = val & ULL(0xfe00000000000000);
+ +        break;
+ +
+ +      case AlphaISA::IPR_EXC_SUM:
+ +      case AlphaISA::IPR_EXC_MASK:
+ +        // any write to this register clears it
+ +        ipr[idx] = 0;
+ +        break;
+ +
+ +      case AlphaISA::IPR_INTID:
+ +      case AlphaISA::IPR_SL_RCV:
+ +      case AlphaISA::IPR_MM_STAT:
+ +      case AlphaISA::IPR_ITB_PTE_TEMP:
+ +      case AlphaISA::IPR_DTB_PTE_TEMP:
+ +        // read-only registers
+ +        return new UnimplementedOpcodeFault;
+ +
+ +      case AlphaISA::IPR_HWINT_CLR:
+ +      case AlphaISA::IPR_SL_XMIT:
+ +      case AlphaISA::IPR_DC_FLUSH:
+ +      case AlphaISA::IPR_IC_FLUSH:
+ +        // the following are write only
+ +        ipr[idx] = val;
+ +        break;
+ +
+ +      case AlphaISA::IPR_DTB_IA:
+ +        // really a control write
+ +        ipr[idx] = 0;
+ +
+ +        tc->getDTBPtr()->flushAll();
+ +        break;
+ +
+ +      case AlphaISA::IPR_DTB_IAP:
+ +        // really a control write
+ +        ipr[idx] = 0;
+ +
+ +        tc->getDTBPtr()->flushProcesses();
+ +        break;
+ +
+ +      case AlphaISA::IPR_DTB_IS:
+ +        // really a control write
+ +        ipr[idx] = val;
+ +
+ +        tc->getDTBPtr()->flushAddr(val,
+ +                                   DTB_ASN_ASN(ipr[AlphaISA::IPR_DTB_ASN]));
+ +        break;
+ +
+ +      case AlphaISA::IPR_DTB_TAG: {
+ +          struct AlphaISA::PTE pte;
+ +
+ +          // FIXME: granularity hints NYI...
+ +          if (DTB_PTE_GH(ipr[AlphaISA::IPR_DTB_PTE]) != 0)
+ +              panic("PTE GH field != 0");
+ +
+ +          // write entire quad
+ +          ipr[idx] = val;
+ +
+ +          // construct PTE for new entry
+ +          pte.ppn = DTB_PTE_PPN(ipr[AlphaISA::IPR_DTB_PTE]);
+ +          pte.xre = DTB_PTE_XRE(ipr[AlphaISA::IPR_DTB_PTE]);
+ +          pte.xwe = DTB_PTE_XWE(ipr[AlphaISA::IPR_DTB_PTE]);
+ +          pte.fonr = DTB_PTE_FONR(ipr[AlphaISA::IPR_DTB_PTE]);
+ +          pte.fonw = DTB_PTE_FONW(ipr[AlphaISA::IPR_DTB_PTE]);
+ +          pte.asma = DTB_PTE_ASMA(ipr[AlphaISA::IPR_DTB_PTE]);
+ +          pte.asn = DTB_ASN_ASN(ipr[AlphaISA::IPR_DTB_ASN]);
+ +
+ +          // insert new TAG/PTE value into data TLB
+ +          tc->getDTBPtr()->insert(val, pte);
+ +      }
+ +        break;
+ +
+ +      case AlphaISA::IPR_ITB_PTE: {
+ +          struct AlphaISA::PTE pte;
+ +
+ +          // FIXME: granularity hints NYI...
+ +          if (ITB_PTE_GH(val) != 0)
+ +              panic("PTE GH field != 0");
+ +
+ +          // write entire quad
+ +          ipr[idx] = val;
+ +
+ +          // construct PTE for new entry
+ +          pte.ppn = ITB_PTE_PPN(val);
+ +          pte.xre = ITB_PTE_XRE(val);
+ +          pte.xwe = 0;
+ +          pte.fonr = ITB_PTE_FONR(val);
+ +          pte.fonw = ITB_PTE_FONW(val);
+ +          pte.asma = ITB_PTE_ASMA(val);
+ +          pte.asn = ITB_ASN_ASN(ipr[AlphaISA::IPR_ITB_ASN]);
+ +
+ +          // insert new TAG/PTE value into data TLB
+ +          tc->getITBPtr()->insert(ipr[AlphaISA::IPR_ITB_TAG], pte);
+ +      }
+ +        break;
+ +
+ +      case AlphaISA::IPR_ITB_IA:
+ +        // really a control write
+ +        ipr[idx] = 0;
+ +
+ +        tc->getITBPtr()->flushAll();
+ +        break;
+ +
+ +      case AlphaISA::IPR_ITB_IAP:
+ +        // really a control write
+ +        ipr[idx] = 0;
+ +
+ +        tc->getITBPtr()->flushProcesses();
+ +        break;
+ +
+ +      case AlphaISA::IPR_ITB_IS:
+ +        // really a control write
+ +        ipr[idx] = val;
+ +
+ +        tc->getITBPtr()->flushAddr(val,
+ +                                   ITB_ASN_ASN(ipr[AlphaISA::IPR_ITB_ASN]));
+ +        break;
+ +
+ +      default:
+ +        // invalid IPR
+ +        return new UnimplementedOpcodeFault;
+ +    }
+ +
+ +    // no error...
+ +    return NoFault;
+ +}
+ +
++
+ +void
+ +AlphaISA::copyIprs(ThreadContext *src, ThreadContext *dest)
+ +{
+ +    for (int i = IPR_Base_DepTag; i < NumInternalProcRegs; ++i) {
+ +        dest->setMiscReg(i, src->readMiscReg(i));
+ +    }
+ +}
+ +
++
+ +/**
+ + * Check for special simulator handling of specific PAL calls.
+ + * If return value is false, actual PAL call will be suppressed.
+ + */
+ +bool
+ +SimpleThread::simPalCheck(int palFunc)
+ +{
+ +    if (kernelStats)
+ +        kernelStats->callpal(palFunc, tc);
+ +
+ +    switch (palFunc) {
+ +      case PAL::halt:
+ +        halt();
+ +        if (--System::numSystemsRunning == 0)
+ +            exitSimLoop("all cpus halted");
+ +        break;
+ +
+ +      case PAL::bpt:
+ +      case PAL::bugchk:
+ +        if (system->breakpoint())
+ +            return false;
+ +        break;
+ +    }
+ +
+ +    return true;
+ +}
+ +
+ +#endif // FULL_SYSTEM
diff --cc src/arch/alpha/freebsd/system.cc

index 8d50e16122b4b05d0cd235c9fce8cd3cfb9476e3,0000000000000000000000000000000000000000..99be2505764f5af8e1ec210219fba5adba7054f3

mode 100644,000000..100644
--- 1/src/arch/alpha/freebsd/system.cc
--- /dev/null
+++ b/src/arch/alpha/freebsd/system.cc
@@@ -1,152 -1,0 +1,155 @@@
+ +/*
+ + * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ + * All rights reserved.
+ + *
+ + * Redistribution and use in source and binary forms, with or without
+ + * modification, are permitted provided that the following conditions are
+ + * met: redistributions of source code must retain the above copyright
+ + * notice, this list of conditions and the following disclaimer;
+ + * redistributions in binary form must reproduce the above copyright
+ + * notice, this list of conditions and the following disclaimer in the
+ + * documentation and/or other materials provided with the distribution;
+ + * neither the name of the copyright holders nor the names of its
+ + * contributors may be used to endorse or promote products derived from
+ + * this software without specific prior written permission.
+ + *
+ + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ + *
+ + * Authors: Ben Nash
+ + */
+ +
+ +/**
+ + * @file
+ + * Modifications for the FreeBSD kernel.
+ + * Based on kern/linux/linux_system.cc.
+ + *
+ + */
+ +
+ +#include "arch/alpha/system.hh"
+ +#include "arch/alpha/freebsd/system.hh"
+ +#include "base/loader/symtab.hh"
+ +#include "cpu/thread_context.hh"
+ +#include "mem/physical.hh"
+ +#include "mem/port.hh"
+ +#include "arch/isa_traits.hh"
+ +#include "sim/builder.hh"
+ +#include "sim/byteswap.hh"
+ +#include "arch/vtophys.hh"
+ +
+ +#define TIMER_FREQUENCY 1193180
+ +
+ +using namespace std;
+ +using namespace AlphaISA;
+ +
+ +FreebsdAlphaSystem::FreebsdAlphaSystem(Params *p)
+ +    : AlphaSystem(p)
+ +{
+ +    /**
+ +     * Any time DELAY is called just skip the function.
+ +     * Shouldn't we actually emulate the delay?
+ +     */
+ +    skipDelayEvent = addKernelFuncEvent<SkipFuncEvent>("DELAY");
+ +    skipCalibrateClocks =
+ +        addKernelFuncEvent<SkipCalibrateClocksEvent>("calibrate_clocks");
+ +}
+ +
+ +
+ +FreebsdAlphaSystem::~FreebsdAlphaSystem()
+ +{
+ +    delete skipDelayEvent;
+ +    delete skipCalibrateClocks;
+ +}
+ +
+ +
+ +void
+ +FreebsdAlphaSystem::doCalibrateClocks(ThreadContext *tc)
+ +{
+ +    Addr ppc_vaddr = 0;
+ +    Addr timer_vaddr = 0;
+ +
+ +    ppc_vaddr = (Addr)tc->readIntReg(ArgumentReg1);
+ +    timer_vaddr = (Addr)tc->readIntReg(ArgumentReg2);
+ +
+ +    virtPort.write(ppc_vaddr, (uint32_t)Clock::Frequency);
+ +    virtPort.write(timer_vaddr, (uint32_t)TIMER_FREQUENCY);
+ +}
+ +
+ +
+ +void
+ +FreebsdAlphaSystem::SkipCalibrateClocksEvent::process(ThreadContext *tc)
+ +{
+ +    SkipFuncEvent::process(tc);
+ +    ((FreebsdAlphaSystem *)tc->getSystemPtr())->doCalibrateClocks(tc);
+ +}
+ +
+ +
+ +BEGIN_DECLARE_SIM_OBJECT_PARAMS(FreebsdAlphaSystem)
+ +
+ +    Param<Tick> boot_cpu_frequency;
+ +    SimObjectParam<PhysicalMemory *> physmem;
+ +    SimpleEnumParam<System::MemoryMode> mem_mode;
+ +
+ +    Param<string> kernel;
+ +    Param<string> console;
+ +    Param<string> pal;
+ +
+ +    Param<string> boot_osflags;
+ +    Param<string> readfile;
++    Param<string> symbolfile;
+ +    Param<unsigned int> init_param;
+ +
+ +    Param<uint64_t> system_type;
+ +    Param<uint64_t> system_rev;
+ +
+ +END_DECLARE_SIM_OBJECT_PARAMS(FreebsdAlphaSystem)
+ +
+ +BEGIN_INIT_SIM_OBJECT_PARAMS(FreebsdAlphaSystem)
+ +
+ +    INIT_PARAM(boot_cpu_frequency, "Frequency of the boot CPU"),
+ +    INIT_PARAM(physmem, "phsyical memory"),
+ +    INIT_ENUM_PARAM(mem_mode, "Memory Mode, (1=atomic, 2=timing)",
+ +            System::MemoryModeStrings),
+ +    INIT_PARAM(kernel, "file that contains the kernel code"),
+ +    INIT_PARAM(console, "file that contains the console code"),
+ +    INIT_PARAM(pal, "file that contains palcode"),
+ +    INIT_PARAM_DFLT(boot_osflags, "flags to pass to the kernel during boot",
+ +                    "a"),
+ +    INIT_PARAM_DFLT(readfile, "file to read startup script from", ""),
++    INIT_PARAM_DFLT(symbolfile, "file to read symbols from", ""),
+ +    INIT_PARAM_DFLT(init_param, "numerical value to pass into simulator", 0),
+ +    INIT_PARAM_DFLT(system_type, "Type of system we are emulating", 34),
+ +    INIT_PARAM_DFLT(system_rev, "Revision of system we are emulating", 1<<10)
+ +
+ +END_INIT_SIM_OBJECT_PARAMS(FreebsdAlphaSystem)
+ +
+ +CREATE_SIM_OBJECT(FreebsdAlphaSystem)
+ +{
+ +    AlphaSystem::Params *p = new AlphaSystem::Params;
+ +    p->name = getInstanceName();
+ +    p->boot_cpu_frequency = boot_cpu_frequency;
+ +    p->physmem = physmem;
+ +    p->mem_mode = mem_mode;
+ +    p->kernel_path = kernel;
+ +    p->console_path = console;
+ +    p->palcode = pal;
+ +    p->boot_osflags = boot_osflags;
+ +    p->init_param = init_param;
+ +    p->readfile = readfile;
++    p->symbolfile = symbolfile;
+ +    p->system_type = system_type;
+ +    p->system_rev = system_rev;
+ +    return new FreebsdAlphaSystem(p);
+ +}
+ +
+ +REGISTER_SIM_OBJECT("FreebsdAlphaSystem", FreebsdAlphaSystem)
+ +
diff --cc src/arch/alpha/isa/decoder.isa

index 30959c72eee6ffea905fad9333323a927daf1265,0000000000000000000000000000000000000000..4fc9da3f3fbc37b3287bb8adbfbc8e67cb07201b

mode 100644,000000..100644
--- 1/src/arch/alpha/isa/decoder.isa
--- /dev/null
+++ b/src/arch/alpha/isa/decoder.isa
@@@ -1,835 -1,0 +1,838 @@@
-             }}, IsNonSpeculative, IsQuiesce);
+ +// -*- mode:c++ -*-
+ +
+ +// Copyright (c) 2003-2006 The Regents of The University of Michigan
+ +// All rights reserved.
+ +//
+ +// Redistribution and use in source and binary forms, with or without
+ +// modification, are permitted provided that the following conditions are
+ +// met: redistributions of source code must retain the above copyright
+ +// notice, this list of conditions and the following disclaimer;
+ +// redistributions in binary form must reproduce the above copyright
+ +// notice, this list of conditions and the following disclaimer in the
+ +// documentation and/or other materials provided with the distribution;
+ +// neither the name of the copyright holders nor the names of its
+ +// contributors may be used to endorse or promote products derived from
+ +// this software without specific prior written permission.
+ +//
+ +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ +//
+ +// Authors: Steve Reinhardt
+ +
+ +////////////////////////////////////////////////////////////////////
+ +//
+ +// The actual decoder specification
+ +//
+ +
+ +decode OPCODE default Unknown::unknown() {
+ +
+ +    format LoadAddress {
+ +        0x08: lda({{ Ra = Rb + disp; }});
+ +        0x09: ldah({{ Ra = Rb + (disp << 16); }});
+ +    }
+ +
+ +    format LoadOrNop {
+ +        0x0a: ldbu({{ Ra.uq = Mem.ub; }});
+ +        0x0c: ldwu({{ Ra.uq = Mem.uw; }});
+ +        0x0b: ldq_u({{ Ra = Mem.uq; }}, ea_code = {{ EA = (Rb + disp) & ~7; }});
+ +        0x23: ldt({{ Fa = Mem.df; }});
+ +        0x2a: ldl_l({{ Ra.sl = Mem.sl; }}, mem_flags = LOCKED);
+ +        0x2b: ldq_l({{ Ra.uq = Mem.uq; }}, mem_flags = LOCKED);
+ +#ifdef USE_COPY
+ +        0x20: MiscPrefetch::copy_load({{ EA = Ra; }},
+ +                                      {{ fault = xc->copySrcTranslate(EA); }},
+ +                                      inst_flags = [IsMemRef, IsLoad, IsCopy]);
+ +#endif
+ +    }
+ +
+ +    format LoadOrPrefetch {
+ +        0x28: ldl({{ Ra.sl = Mem.sl; }});
+ +        0x29: ldq({{ Ra.uq = Mem.uq; }}, pf_flags = EVICT_NEXT);
+ +        // IsFloating flag on lds gets the prefetch to disassemble
+ +        // using f31 instead of r31... funcitonally it's unnecessary
+ +        0x22: lds({{ Fa.uq = s_to_t(Mem.ul); }},
+ +                  pf_flags = PF_EXCLUSIVE, inst_flags = IsFloating);
+ +    }
+ +
+ +    format Store {
+ +        0x0e: stb({{ Mem.ub = Ra<7:0>; }});
+ +        0x0d: stw({{ Mem.uw = Ra<15:0>; }});
+ +        0x2c: stl({{ Mem.ul = Ra<31:0>; }});
+ +        0x2d: stq({{ Mem.uq = Ra.uq; }});
+ +        0x0f: stq_u({{ Mem.uq = Ra.uq; }}, {{ EA = (Rb + disp) & ~7; }});
+ +        0x26: sts({{ Mem.ul = t_to_s(Fa.uq); }});
+ +        0x27: stt({{ Mem.df = Fa; }});
+ +#ifdef USE_COPY
+ +        0x24: MiscPrefetch::copy_store({{ EA = Rb; }},
+ +                                       {{ fault = xc->copy(EA); }},
+ +                                       inst_flags = [IsMemRef, IsStore, IsCopy]);
+ +#endif
+ +    }
+ +
+ +    format StoreCond {
+ +        0x2e: stl_c({{ Mem.ul = Ra<31:0>; }},
+ +                    {{
+ +                        uint64_t tmp = write_result;
+ +                        // see stq_c
+ +                        Ra = (tmp == 0 || tmp == 1) ? tmp : Ra;
+ +                    }}, mem_flags = LOCKED, inst_flags = IsStoreConditional);
+ +        0x2f: stq_c({{ Mem.uq = Ra; }},
+ +                    {{
+ +                        uint64_t tmp = write_result;
+ +                        // If the write operation returns 0 or 1, then
+ +                        // this was a conventional store conditional,
+ +                        // and the value indicates the success/failure
+ +                        // of the operation.  If another value is
+ +                        // returned, then this was a Turbolaser
+ +                        // mailbox access, and we don't update the
+ +                        // result register at all.
+ +                        Ra = (tmp == 0 || tmp == 1) ? tmp : Ra;
+ +                    }}, mem_flags = LOCKED, inst_flags = IsStoreConditional);
+ +    }
+ +
+ +    format IntegerOperate {
+ +
+ +        0x10: decode INTFUNC {        // integer arithmetic operations
+ +
+ +            0x00: addl({{ Rc.sl = Ra.sl + Rb_or_imm.sl; }});
+ +            0x40: addlv({{
+ +                uint32_t tmp  = Ra.sl + Rb_or_imm.sl;
+ +                // signed overflow occurs when operands have same sign
+ +                // and sign of result does not match.
+ +                if (Ra.sl<31:> == Rb_or_imm.sl<31:> && tmp<31:> != Ra.sl<31:>)
+ +                    fault = new IntegerOverflowFault;
+ +                Rc.sl = tmp;
+ +            }});
+ +            0x02: s4addl({{ Rc.sl = (Ra.sl << 2) + Rb_or_imm.sl; }});
+ +            0x12: s8addl({{ Rc.sl = (Ra.sl << 3) + Rb_or_imm.sl; }});
+ +
+ +            0x20: addq({{ Rc = Ra + Rb_or_imm; }});
+ +            0x60: addqv({{
+ +                uint64_t tmp = Ra + Rb_or_imm;
+ +                // signed overflow occurs when operands have same sign
+ +                // and sign of result does not match.
+ +                if (Ra<63:> == Rb_or_imm<63:> && tmp<63:> != Ra<63:>)
+ +                    fault = new IntegerOverflowFault;
+ +                Rc = tmp;
+ +            }});
+ +            0x22: s4addq({{ Rc = (Ra << 2) + Rb_or_imm; }});
+ +            0x32: s8addq({{ Rc = (Ra << 3) + Rb_or_imm; }});
+ +
+ +            0x09: subl({{ Rc.sl = Ra.sl - Rb_or_imm.sl; }});
+ +            0x49: sublv({{
+ +                uint32_t tmp  = Ra.sl - Rb_or_imm.sl;
+ +                // signed overflow detection is same as for add,
+ +                // except we need to look at the *complemented*
+ +                // sign bit of the subtrahend (Rb), i.e., if the initial
+ +                // signs are the *same* then no overflow can occur
+ +                if (Ra.sl<31:> != Rb_or_imm.sl<31:> && tmp<31:> != Ra.sl<31:>)
+ +                    fault = new IntegerOverflowFault;
+ +                Rc.sl = tmp;
+ +            }});
+ +            0x0b: s4subl({{ Rc.sl = (Ra.sl << 2) - Rb_or_imm.sl; }});
+ +            0x1b: s8subl({{ Rc.sl = (Ra.sl << 3) - Rb_or_imm.sl; }});
+ +
+ +            0x29: subq({{ Rc = Ra - Rb_or_imm; }});
+ +            0x69: subqv({{
+ +                uint64_t tmp  = Ra - Rb_or_imm;
+ +                // signed overflow detection is same as for add,
+ +                // except we need to look at the *complemented*
+ +                // sign bit of the subtrahend (Rb), i.e., if the initial
+ +                // signs are the *same* then no overflow can occur
+ +                if (Ra<63:> != Rb_or_imm<63:> && tmp<63:> != Ra<63:>)
+ +                    fault = new IntegerOverflowFault;
+ +                Rc = tmp;
+ +            }});
+ +            0x2b: s4subq({{ Rc = (Ra << 2) - Rb_or_imm; }});
+ +            0x3b: s8subq({{ Rc = (Ra << 3) - Rb_or_imm; }});
+ +
+ +            0x2d: cmpeq({{ Rc = (Ra == Rb_or_imm); }});
+ +            0x6d: cmple({{ Rc = (Ra.sq <= Rb_or_imm.sq); }});
+ +            0x4d: cmplt({{ Rc = (Ra.sq <  Rb_or_imm.sq); }});
+ +            0x3d: cmpule({{ Rc = (Ra.uq <= Rb_or_imm.uq); }});
+ +            0x1d: cmpult({{ Rc = (Ra.uq <  Rb_or_imm.uq); }});
+ +
+ +            0x0f: cmpbge({{
+ +                int hi = 7;
+ +                int lo = 0;
+ +                uint64_t tmp = 0;
+ +                for (int i = 0; i < 8; ++i) {
+ +                    tmp |= (Ra.uq<hi:lo> >= Rb_or_imm.uq<hi:lo>) << i;
+ +                    hi += 8;
+ +                    lo += 8;
+ +                }
+ +                Rc = tmp;
+ +            }});
+ +        }
+ +
+ +        0x11: decode INTFUNC {        // integer logical operations
+ +
+ +            0x00: and({{ Rc = Ra & Rb_or_imm; }});
+ +            0x08: bic({{ Rc = Ra & ~Rb_or_imm; }});
+ +            0x20: bis({{ Rc = Ra | Rb_or_imm; }});
+ +            0x28: ornot({{ Rc = Ra | ~Rb_or_imm; }});
+ +            0x40: xor({{ Rc = Ra ^ Rb_or_imm; }});
+ +            0x48: eqv({{ Rc = Ra ^ ~Rb_or_imm; }});
+ +
+ +            // conditional moves
+ +            0x14: cmovlbs({{ Rc = ((Ra & 1) == 1) ? Rb_or_imm : Rc; }});
+ +            0x16: cmovlbc({{ Rc = ((Ra & 1) == 0) ? Rb_or_imm : Rc; }});
+ +            0x24: cmoveq({{ Rc = (Ra == 0) ? Rb_or_imm : Rc; }});
+ +            0x26: cmovne({{ Rc = (Ra != 0) ? Rb_or_imm : Rc; }});
+ +            0x44: cmovlt({{ Rc = (Ra.sq <  0) ? Rb_or_imm : Rc; }});
+ +            0x46: cmovge({{ Rc = (Ra.sq >= 0) ? Rb_or_imm : Rc; }});
+ +            0x64: cmovle({{ Rc = (Ra.sq <= 0) ? Rb_or_imm : Rc; }});
+ +            0x66: cmovgt({{ Rc = (Ra.sq >  0) ? Rb_or_imm : Rc; }});
+ +
+ +            // For AMASK, RA must be R31.
+ +            0x61: decode RA {
+ +                31: amask({{ Rc = Rb_or_imm & ~ULL(0x17); }});
+ +            }
+ +
+ +            // For IMPLVER, RA must be R31 and the B operand
+ +            // must be the immediate value 1.
+ +            0x6c: decode RA {
+ +                31: decode IMM {
+ +                    1: decode INTIMM {
+ +                        // return EV5 for FULL_SYSTEM and EV6 otherwise
+ +                        1: implver({{
+ +#if FULL_SYSTEM
+ +                             Rc = 1;
+ +#else
+ +                             Rc = 2;
+ +#endif
+ +                        }});
+ +                    }
+ +                }
+ +            }
+ +
+ +#if FULL_SYSTEM
+ +            // The mysterious 11.25...
+ +            0x25: WarnUnimpl::eleven25();
+ +#endif
+ +        }
+ +
+ +        0x12: decode INTFUNC {
+ +            0x39: sll({{ Rc = Ra << Rb_or_imm<5:0>; }});
+ +            0x34: srl({{ Rc = Ra.uq >> Rb_or_imm<5:0>; }});
+ +            0x3c: sra({{ Rc = Ra.sq >> Rb_or_imm<5:0>; }});
+ +
+ +            0x02: mskbl({{ Rc = Ra & ~(mask( 8) << (Rb_or_imm<2:0> * 8)); }});
+ +            0x12: mskwl({{ Rc = Ra & ~(mask(16) << (Rb_or_imm<2:0> * 8)); }});
+ +            0x22: mskll({{ Rc = Ra & ~(mask(32) << (Rb_or_imm<2:0> * 8)); }});
+ +            0x32: mskql({{ Rc = Ra & ~(mask(64) << (Rb_or_imm<2:0> * 8)); }});
+ +
+ +            0x52: mskwh({{
+ +                int bv = Rb_or_imm<2:0>;
+ +                Rc =  bv ? (Ra & ~(mask(16) >> (64 - 8 * bv))) : Ra;
+ +            }});
+ +            0x62: msklh({{
+ +                int bv = Rb_or_imm<2:0>;
+ +                Rc =  bv ? (Ra & ~(mask(32) >> (64 - 8 * bv))) : Ra;
+ +            }});
+ +            0x72: mskqh({{
+ +                int bv = Rb_or_imm<2:0>;
+ +                Rc =  bv ? (Ra & ~(mask(64) >> (64 - 8 * bv))) : Ra;
+ +            }});
+ +
+ +            0x06: extbl({{ Rc = (Ra.uq >> (Rb_or_imm<2:0> * 8))< 7:0>; }});
+ +            0x16: extwl({{ Rc = (Ra.uq >> (Rb_or_imm<2:0> * 8))<15:0>; }});
+ +            0x26: extll({{ Rc = (Ra.uq >> (Rb_or_imm<2:0> * 8))<31:0>; }});
+ +            0x36: extql({{ Rc = (Ra.uq >> (Rb_or_imm<2:0> * 8)); }});
+ +
+ +            0x5a: extwh({{
+ +                Rc = (Ra << (64 - (Rb_or_imm<2:0> * 8))<5:0>)<15:0>; }});
+ +            0x6a: extlh({{
+ +                Rc = (Ra << (64 - (Rb_or_imm<2:0> * 8))<5:0>)<31:0>; }});
+ +            0x7a: extqh({{
+ +                Rc = (Ra << (64 - (Rb_or_imm<2:0> * 8))<5:0>); }});
+ +
+ +            0x0b: insbl({{ Rc = Ra< 7:0> << (Rb_or_imm<2:0> * 8); }});
+ +            0x1b: inswl({{ Rc = Ra<15:0> << (Rb_or_imm<2:0> * 8); }});
+ +            0x2b: insll({{ Rc = Ra<31:0> << (Rb_or_imm<2:0> * 8); }});
+ +            0x3b: insql({{ Rc = Ra       << (Rb_or_imm<2:0> * 8); }});
+ +
+ +            0x57: inswh({{
+ +                int bv = Rb_or_imm<2:0>;
+ +                Rc = bv ? (Ra.uq<15:0> >> (64 - 8 * bv)) : 0;
+ +            }});
+ +            0x67: inslh({{
+ +                int bv = Rb_or_imm<2:0>;
+ +                Rc = bv ? (Ra.uq<31:0> >> (64 - 8 * bv)) : 0;
+ +            }});
+ +            0x77: insqh({{
+ +                int bv = Rb_or_imm<2:0>;
+ +                Rc = bv ? (Ra.uq       >> (64 - 8 * bv)) : 0;
+ +            }});
+ +
+ +            0x30: zap({{
+ +                uint64_t zapmask = 0;
+ +                for (int i = 0; i < 8; ++i) {
+ +                    if (Rb_or_imm<i:>)
+ +                        zapmask |= (mask(8) << (i * 8));
+ +                }
+ +                Rc = Ra & ~zapmask;
+ +            }});
+ +            0x31: zapnot({{
+ +                uint64_t zapmask = 0;
+ +                for (int i = 0; i < 8; ++i) {
+ +                    if (!Rb_or_imm<i:>)
+ +                        zapmask |= (mask(8) << (i * 8));
+ +                }
+ +                Rc = Ra & ~zapmask;
+ +            }});
+ +        }
+ +
+ +        0x13: decode INTFUNC {        // integer multiplies
+ +            0x00: mull({{ Rc.sl = Ra.sl * Rb_or_imm.sl; }}, IntMultOp);
+ +            0x20: mulq({{ Rc    = Ra    * Rb_or_imm;    }}, IntMultOp);
+ +            0x30: umulh({{
+ +                uint64_t hi, lo;
+ +                mul128(Ra, Rb_or_imm, hi, lo);
+ +                Rc = hi;
+ +            }}, IntMultOp);
+ +            0x40: mullv({{
+ +                // 32-bit multiply with trap on overflow
+ +                int64_t Rax = Ra.sl;  // sign extended version of Ra.sl
+ +                int64_t Rbx = Rb_or_imm.sl;
+ +                int64_t tmp = Rax * Rbx;
+ +                // To avoid overflow, all the upper 32 bits must match
+ +                // the sign bit of the lower 32.  We code this as
+ +                // checking the upper 33 bits for all 0s or all 1s.
+ +                uint64_t sign_bits = tmp<63:31>;
+ +                if (sign_bits != 0 && sign_bits != mask(33))
+ +                    fault = new IntegerOverflowFault;
+ +                Rc.sl = tmp<31:0>;
+ +            }}, IntMultOp);
+ +            0x60: mulqv({{
+ +                // 64-bit multiply with trap on overflow
+ +                uint64_t hi, lo;
+ +                mul128(Ra, Rb_or_imm, hi, lo);
+ +                // all the upper 64 bits must match the sign bit of
+ +                // the lower 64
+ +                if (!((hi == 0 && lo<63:> == 0) ||
+ +                      (hi == mask(64) && lo<63:> == 1)))
+ +                    fault = new IntegerOverflowFault;
+ +                Rc = lo;
+ +            }}, IntMultOp);
+ +        }
+ +
+ +        0x1c: decode INTFUNC {
+ +            0x00: decode RA { 31: sextb({{ Rc.sb = Rb_or_imm< 7:0>; }}); }
+ +            0x01: decode RA { 31: sextw({{ Rc.sw = Rb_or_imm<15:0>; }}); }
+ +            0x32: ctlz({{
+ +                             uint64_t count = 0;
+ +                             uint64_t temp = Rb;
+ +                             if (temp<63:32>) temp >>= 32; else count += 32;
+ +                             if (temp<31:16>) temp >>= 16; else count += 16;
+ +                             if (temp<15:8>) temp >>= 8; else count += 8;
+ +                             if (temp<7:4>) temp >>= 4; else count += 4;
+ +                             if (temp<3:2>) temp >>= 2; else count += 2;
+ +                             if (temp<1:1>) temp >>= 1; else count += 1;
+ +                             if ((temp<0:0>) != 0x1) count += 1;
+ +                             Rc = count;
+ +                           }}, IntAluOp);
+ +
+ +            0x33: cttz({{
+ +                             uint64_t count = 0;
+ +                             uint64_t temp = Rb;
+ +                             if (!(temp<31:0>)) { temp >>= 32; count += 32; }
+ +                             if (!(temp<15:0>)) { temp >>= 16; count += 16; }
+ +                             if (!(temp<7:0>)) { temp >>= 8; count += 8; }
+ +                             if (!(temp<3:0>)) { temp >>= 4; count += 4; }
+ +                             if (!(temp<1:0>)) { temp >>= 2; count += 2; }
+ +                             if (!(temp<0:0> & ULL(0x1))) count += 1;
+ +                             Rc = count;
+ +                           }}, IntAluOp);
+ +
+ +            format FailUnimpl {
+ +                0x30: ctpop();
+ +                0x31: perr();
+ +                0x34: unpkbw();
+ +                0x35: unpkbl();
+ +                0x36: pkwb();
+ +                0x37: pklb();
+ +                0x38: minsb8();
+ +                0x39: minsw4();
+ +                0x3a: minub8();
+ +                0x3b: minuw4();
+ +                0x3c: maxub8();
+ +                0x3d: maxuw4();
+ +                0x3e: maxsb8();
+ +                0x3f: maxsw4();
+ +            }
+ +
+ +            format BasicOperateWithNopCheck {
+ +                0x70: decode RB {
+ +                    31: ftoit({{ Rc = Fa.uq; }}, FloatCvtOp);
+ +                }
+ +                0x78: decode RB {
+ +                    31: ftois({{ Rc.sl = t_to_s(Fa.uq); }},
+ +                              FloatCvtOp);
+ +                }
+ +            }
+ +        }
+ +    }
+ +
+ +    // Conditional branches.
+ +    format CondBranch {
+ +        0x39: beq({{ cond = (Ra == 0); }});
+ +        0x3d: bne({{ cond = (Ra != 0); }});
+ +        0x3e: bge({{ cond = (Ra.sq >= 0); }});
+ +        0x3f: bgt({{ cond = (Ra.sq >  0); }});
+ +        0x3b: ble({{ cond = (Ra.sq <= 0); }});
+ +        0x3a: blt({{ cond = (Ra.sq < 0); }});
+ +        0x38: blbc({{ cond = ((Ra & 1) == 0); }});
+ +        0x3c: blbs({{ cond = ((Ra & 1) == 1); }});
+ +
+ +        0x31: fbeq({{ cond = (Fa == 0); }});
+ +        0x35: fbne({{ cond = (Fa != 0); }});
+ +        0x36: fbge({{ cond = (Fa >= 0); }});
+ +        0x37: fbgt({{ cond = (Fa >  0); }});
+ +        0x33: fble({{ cond = (Fa <= 0); }});
+ +        0x32: fblt({{ cond = (Fa < 0); }});
+ +    }
+ +
+ +    // unconditional branches
+ +    format UncondBranch {
+ +        0x30: br();
+ +        0x34: bsr(IsCall);
+ +    }
+ +
+ +    // indirect branches
+ +    0x1a: decode JMPFUNC {
+ +        format Jump {
+ +            0: jmp();
+ +            1: jsr(IsCall);
+ +            2: ret(IsReturn);
+ +            3: jsr_coroutine(IsCall, IsReturn);
+ +        }
+ +    }
+ +
+ +    // Square root and integer-to-FP moves
+ +    0x14: decode FP_SHORTFUNC {
+ +        // Integer to FP register moves must have RB == 31
+ +        0x4: decode RB {
+ +            31: decode FP_FULLFUNC {
+ +                format BasicOperateWithNopCheck {
+ +                    0x004: itofs({{ Fc.uq = s_to_t(Ra.ul); }}, FloatCvtOp);
+ +                    0x024: itoft({{ Fc.uq = Ra.uq; }}, FloatCvtOp);
+ +                    0x014: FailUnimpl::itoff();       // VAX-format conversion
+ +                }
+ +            }
+ +        }
+ +
+ +        // Square root instructions must have FA == 31
+ +        0xb: decode FA {
+ +            31: decode FP_TYPEFUNC {
+ +                format FloatingPointOperate {
+ +#if SS_COMPATIBLE_FP
+ +                    0x0b: sqrts({{
+ +                        if (Fb < 0.0)
+ +                            fault = new ArithmeticFault;
+ +                        Fc = sqrt(Fb);
+ +                    }}, FloatSqrtOp);
+ +#else
+ +                    0x0b: sqrts({{
+ +                        if (Fb.sf < 0.0)
+ +                            fault = new ArithmeticFault;
+ +                        Fc.sf = sqrt(Fb.sf);
+ +                    }}, FloatSqrtOp);
+ +#endif
+ +                    0x2b: sqrtt({{
+ +                        if (Fb < 0.0)
+ +                            fault = new ArithmeticFault;
+ +                        Fc = sqrt(Fb);
+ +                    }}, FloatSqrtOp);
+ +                }
+ +            }
+ +        }
+ +
+ +        // VAX-format sqrtf and sqrtg are not implemented
+ +        0xa: FailUnimpl::sqrtfg();
+ +    }
+ +
+ +    // IEEE floating point
+ +    0x16: decode FP_SHORTFUNC_TOP2 {
+ +        // The top two bits of the short function code break this
+ +        // space into four groups: binary ops, compares, reserved, and
+ +        // conversions.  See Table 4-12 of AHB.  There are different
+ +        // special cases in these different groups, so we decode on
+ +        // these top two bits first just to select a decode strategy.
+ +        // Most of these instructions may have various trapping and
+ +        // rounding mode flags set; these are decoded in the
+ +        // FloatingPointDecode template used by the
+ +        // FloatingPointOperate format.
+ +
+ +        // add/sub/mul/div: just decode on the short function code
+ +        // and source type.  All valid trapping and rounding modes apply.
+ +        0: decode FP_TRAPMODE {
+ +            // check for valid trapping modes here
+ +            0,1,5,7: decode FP_TYPEFUNC {
+ +                   format FloatingPointOperate {
+ +#if SS_COMPATIBLE_FP
+ +                       0x00: adds({{ Fc = Fa + Fb; }});
+ +                       0x01: subs({{ Fc = Fa - Fb; }});
+ +                       0x02: muls({{ Fc = Fa * Fb; }}, FloatMultOp);
+ +                       0x03: divs({{ Fc = Fa / Fb; }}, FloatDivOp);
+ +#else
+ +                       0x00: adds({{ Fc.sf = Fa.sf + Fb.sf; }});
+ +                       0x01: subs({{ Fc.sf = Fa.sf - Fb.sf; }});
+ +                       0x02: muls({{ Fc.sf = Fa.sf * Fb.sf; }}, FloatMultOp);
+ +                       0x03: divs({{ Fc.sf = Fa.sf / Fb.sf; }}, FloatDivOp);
+ +#endif
+ +
+ +                       0x20: addt({{ Fc = Fa + Fb; }});
+ +                       0x21: subt({{ Fc = Fa - Fb; }});
+ +                       0x22: mult({{ Fc = Fa * Fb; }}, FloatMultOp);
+ +                       0x23: divt({{ Fc = Fa / Fb; }}, FloatDivOp);
+ +                   }
+ +             }
+ +        }
+ +
+ +        // Floating-point compare instructions must have the default
+ +        // rounding mode, and may use the default trapping mode or
+ +        // /SU.  Both trapping modes are treated the same by M5; the
+ +        // only difference on the real hardware (as far a I can tell)
+ +        // is that without /SU you'd get an imprecise trap if you
+ +        // tried to compare a NaN with something else (instead of an
+ +        // "unordered" result).
+ +        1: decode FP_FULLFUNC {
+ +            format BasicOperateWithNopCheck {
+ +                0x0a5, 0x5a5: cmpteq({{ Fc = (Fa == Fb) ? 2.0 : 0.0; }},
+ +                                     FloatCmpOp);
+ +                0x0a7, 0x5a7: cmptle({{ Fc = (Fa <= Fb) ? 2.0 : 0.0; }},
+ +                                     FloatCmpOp);
+ +                0x0a6, 0x5a6: cmptlt({{ Fc = (Fa <  Fb) ? 2.0 : 0.0; }},
+ +                                     FloatCmpOp);
+ +                0x0a4, 0x5a4: cmptun({{ // unordered
+ +                    Fc = (!(Fa < Fb) && !(Fa == Fb) && !(Fa > Fb)) ? 2.0 : 0.0;
+ +                }}, FloatCmpOp);
+ +            }
+ +        }
+ +
+ +        // The FP-to-integer and integer-to-FP conversion insts
+ +        // require that FA be 31.
+ +        3: decode FA {
+ +            31: decode FP_TYPEFUNC {
+ +                format FloatingPointOperate {
+ +                    0x2f: decode FP_ROUNDMODE {
+ +                        format FPFixedRounding {
+ +                            // "chopped" i.e. round toward zero
+ +                            0: cvttq({{ Fc.sq = (int64_t)trunc(Fb); }},
+ +                                     Chopped);
+ +                            // round to minus infinity
+ +                            1: cvttq({{ Fc.sq = (int64_t)floor(Fb); }},
+ +                                     MinusInfinity);
+ +                        }
+ +                      default: cvttq({{ Fc.sq = (int64_t)nearbyint(Fb); }});
+ +                    }
+ +
+ +                    // The cvtts opcode is overloaded to be cvtst if the trap
+ +                    // mode is 2 or 6 (which are not valid otherwise)
+ +                    0x2c: decode FP_FULLFUNC {
+ +                        format BasicOperateWithNopCheck {
+ +                            // trap on denorm version "cvtst/s" is
+ +                            // simulated same as cvtst
+ +                            0x2ac, 0x6ac: cvtst({{ Fc = Fb.sf; }});
+ +                        }
+ +                      default: cvtts({{ Fc.sf = Fb; }});
+ +                    }
+ +
+ +                    // The trapping mode for integer-to-FP conversions
+ +                    // must be /SUI or nothing; /U and /SU are not
+ +                    // allowed.  The full set of rounding modes are
+ +                    // supported though.
+ +                    0x3c: decode FP_TRAPMODE {
+ +                        0,7: cvtqs({{ Fc.sf = Fb.sq; }});
+ +                    }
+ +                    0x3e: decode FP_TRAPMODE {
+ +                        0,7: cvtqt({{ Fc    = Fb.sq; }});
+ +                    }
+ +                }
+ +            }
+ +        }
+ +    }
+ +
+ +    // misc FP operate
+ +    0x17: decode FP_FULLFUNC {
+ +        format BasicOperateWithNopCheck {
+ +            0x010: cvtlq({{
+ +                Fc.sl = (Fb.uq<63:62> << 30) | Fb.uq<58:29>;
+ +            }});
+ +            0x030: cvtql({{
+ +                Fc.uq = (Fb.uq<31:30> << 62) | (Fb.uq<29:0> << 29);
+ +            }});
+ +
+ +            // We treat the precise & imprecise trapping versions of
+ +            // cvtql identically.
+ +            0x130, 0x530: cvtqlv({{
+ +                // To avoid overflow, all the upper 32 bits must match
+ +                // the sign bit of the lower 32.  We code this as
+ +                // checking the upper 33 bits for all 0s or all 1s.
+ +                uint64_t sign_bits = Fb.uq<63:31>;
+ +                if (sign_bits != 0 && sign_bits != mask(33))
+ +                    fault = new IntegerOverflowFault;
+ +                Fc.uq = (Fb.uq<31:30> << 62) | (Fb.uq<29:0> << 29);
+ +            }});
+ +
+ +            0x020: cpys({{  // copy sign
+ +                Fc.uq = (Fa.uq<63:> << 63) | Fb.uq<62:0>;
+ +            }});
+ +            0x021: cpysn({{ // copy sign negated
+ +                Fc.uq = (~Fa.uq<63:> << 63) | Fb.uq<62:0>;
+ +            }});
+ +            0x022: cpyse({{ // copy sign and exponent
+ +                Fc.uq = (Fa.uq<63:52> << 52) | Fb.uq<51:0>;
+ +            }});
+ +
+ +            0x02a: fcmoveq({{ Fc = (Fa == 0) ? Fb : Fc; }});
+ +            0x02b: fcmovne({{ Fc = (Fa != 0) ? Fb : Fc; }});
+ +            0x02c: fcmovlt({{ Fc = (Fa <  0) ? Fb : Fc; }});
+ +            0x02d: fcmovge({{ Fc = (Fa >= 0) ? Fb : Fc; }});
+ +            0x02e: fcmovle({{ Fc = (Fa <= 0) ? Fb : Fc; }});
+ +            0x02f: fcmovgt({{ Fc = (Fa >  0) ? Fb : Fc; }});
+ +
+ +            0x024: mt_fpcr({{ FPCR = Fa.uq; }}, IsIprAccess);
+ +            0x025: mf_fpcr({{ Fa.uq = FPCR; }}, IsIprAccess);
+ +        }
+ +    }
+ +
+ +    // miscellaneous mem-format ops
+ +    0x18: decode MEMFUNC {
+ +        format WarnUnimpl {
+ +            0x8000: fetch();
+ +            0xa000: fetch_m();
+ +            0xe800: ecb();
+ +        }
+ +
+ +        format MiscPrefetch {
+ +            0xf800: wh64({{ EA = Rb & ~ULL(63); }},
+ +                         {{ xc->writeHint(EA, 64, memAccessFlags); }},
+ +                         mem_flags = NO_FAULT,
+ +                         inst_flags = [IsMemRef, IsDataPrefetch,
+ +                                       IsStore, MemWriteOp]);
+ +        }
+ +
+ +        format BasicOperate {
+ +            0xc000: rpcc({{
+ +#if FULL_SYSTEM
+ +        /* Rb is a fake dependency so here is a fun way to get
+ +         * the parser to understand that.
+ +         */
+ +                Ra = xc->readMiscRegWithEffect(AlphaISA::IPR_CC, fault) + (Rb & 0);
+ +
+ +#else
+ +                Ra = curTick;
+ +#endif
+ +            }}, IsUnverifiable);
+ +
+ +            // All of the barrier instructions below do nothing in
+ +            // their execute() methods (hence the empty code blocks).
+ +            // All of their functionality is hard-coded in the
+ +            // pipeline based on the flags IsSerializing,
+ +            // IsMemBarrier, and IsWriteBarrier.  In the current
+ +            // detailed CPU model, the execute() function only gets
+ +            // called at fetch, so there's no way to generate pipeline
+ +            // behavior at any other stage.  Once we go to an
+ +            // exec-in-exec CPU model we should be able to get rid of
+ +            // these flags and implement this behavior via the
+ +            // execute() methods.
+ +
+ +            // trapb is just a barrier on integer traps, where excb is
+ +            // a barrier on integer and FP traps.  "EXCB is thus a
+ +            // superset of TRAPB." (Alpha ARM, Sec 4.11.4) We treat
+ +            // them the same though.
+ +            0x0000: trapb({{ }}, IsSerializing, IsSerializeBefore, No_OpClass);
+ +            0x0400: excb({{ }}, IsSerializing, IsSerializeBefore, No_OpClass);
+ +            0x4000: mb({{ }}, IsMemBarrier, MemReadOp);
+ +            0x4400: wmb({{ }}, IsWriteBarrier, MemWriteOp);
+ +        }
+ +
+ +#if FULL_SYSTEM
+ +        format BasicOperate {
+ +            0xe000: rc({{
+ +                Ra = xc->readIntrFlag();
+ +                xc->setIntrFlag(0);
+ +            }}, IsNonSpeculative, IsUnverifiable);
+ +            0xf000: rs({{
+ +                Ra = xc->readIntrFlag();
+ +                xc->setIntrFlag(1);
+ +            }}, IsNonSpeculative, IsUnverifiable);
+ +        }
+ +#else
+ +        format FailUnimpl {
+ +            0xe000: rc();
+ +            0xf000: rs();
+ +        }
+ +#endif
+ +    }
+ +
+ +#if FULL_SYSTEM
+ +    0x00: CallPal::call_pal({{
+ +        if (!palValid ||
+ +            (palPriv
+ +             && xc->readMiscRegWithEffect(AlphaISA::IPR_ICM, fault) != AlphaISA::mode_kernel)) {
+ +            // invalid pal function code, or attempt to do privileged
+ +            // PAL call in non-kernel mode
+ +            fault = new UnimplementedOpcodeFault;
+ +        }
+ +        else {
+ +            // check to see if simulator wants to do something special
+ +            // on this PAL call (including maybe suppress it)
+ +            bool dopal = xc->simPalCheck(palFunc);
+ +
+ +            if (dopal) {
+ +                xc->setMiscRegWithEffect(AlphaISA::IPR_EXC_ADDR, NPC);
+ +                NPC = xc->readMiscRegWithEffect(AlphaISA::IPR_PAL_BASE, fault) + palOffset;
+ +            }
+ +        }
+ +    }}, IsNonSpeculative);
+ +#else
+ +    0x00: decode PALFUNC {
+ +        format EmulatedCallPal {
+ +            0x00: halt ({{
+ +                exitSimLoop(curTick, "halt instruction encountered");
+ +            }}, IsNonSpeculative);
+ +            0x83: callsys({{
+ +                xc->syscall(R0);
+ +            }}, IsSerializeAfter, IsNonSpeculative);
+ +            // Read uniq reg into ABI return value register (r0)
+ +            0x9e: rduniq({{ R0 = Runiq; }}, IsIprAccess);
+ +            // Write uniq reg with value from ABI arg register (r16)
+ +            0x9f: wruniq({{ Runiq = R16; }}, IsIprAccess);
+ +        }
+ +    }
+ +#endif
+ +
+ +#if FULL_SYSTEM
+ +    0x1b: decode PALMODE {
+ +        0: OpcdecFault::hw_st_quad();
+ +        1: decode HW_LDST_QUAD {
+ +            format HwLoad {
+ +                0: hw_ld({{ EA = (Rb + disp) & ~3; }}, {{ Ra = Mem.ul; }}, L);
+ +                1: hw_ld({{ EA = (Rb + disp) & ~7; }}, {{ Ra = Mem.uq; }}, Q);
+ +            }
+ +        }
+ +    }
+ +
+ +    0x1f: decode PALMODE {
+ +        0: OpcdecFault::hw_st_cond();
+ +        format HwStore {
+ +            1: decode HW_LDST_COND {
+ +                0: decode HW_LDST_QUAD {
+ +                    0: hw_st({{ EA = (Rb + disp) & ~3; }},
+ +                {{ Mem.ul = Ra<31:0>; }}, L);
+ +                    1: hw_st({{ EA = (Rb + disp) & ~7; }},
+ +                {{ Mem.uq = Ra.uq; }}, Q);
+ +                }
+ +
+ +                1: FailUnimpl::hw_st_cond();
+ +            }
+ +        }
+ +    }
+ +
+ +    0x19: decode PALMODE {
+ +        0: OpcdecFault::hw_mfpr();
+ +        format HwMoveIPR {
+ +            1: hw_mfpr({{
+ +                Ra = xc->readMiscRegWithEffect(ipr_index, fault);
+ +            }}, IsIprAccess);
+ +        }
+ +    }
+ +
+ +    0x1d: decode PALMODE {
+ +        0: OpcdecFault::hw_mtpr();
+ +        format HwMoveIPR {
+ +            1: hw_mtpr({{
+ +                xc->setMiscRegWithEffect(ipr_index, Ra);
+ +                if (traceData) { traceData->setData(Ra); }
+ +            }}, IsIprAccess);
+ +        }
+ +    }
+ +
+ +    format BasicOperate {
+ +        0x1e: decode PALMODE {
+ +            0: OpcdecFault::hw_rei();
+ +            1:hw_rei({{ xc->hwrei(); }}, IsSerializing, IsSerializeBefore);
+ +        }
+ +
+ +        // M5 special opcodes use the reserved 0x01 opcode space
+ +        0x01: decode M5FUNC {
+ +            0x00: arm({{
+ +                AlphaPseudo::arm(xc->tcBase());
+ +            }}, IsNonSpeculative);
+ +            0x01: quiesce({{
+ +                AlphaPseudo::quiesce(xc->tcBase());
+ +            }}, IsNonSpeculative, IsQuiesce);
+ +            0x02: quiesceNs({{
+ +                AlphaPseudo::quiesceNs(xc->tcBase(), R16);
+ +            }}, IsNonSpeculative, IsQuiesce);
+ +            0x03: quiesceCycles({{
+ +                AlphaPseudo::quiesceCycles(xc->tcBase(), R16);
-             }}, IsNonSpeculative);
++            }}, IsNonSpeculative, IsQuiesce, IsUnverifiable);
+ +            0x04: quiesceTime({{
+ +                R0 = AlphaPseudo::quiesceTime(xc->tcBase());
++            }}, IsNonSpeculative, IsUnverifiable);
+ +            0x10: ivlb({{
+ +                AlphaPseudo::ivlb(xc->tcBase());
+ +            }}, No_OpClass, IsNonSpeculative);
+ +            0x11: ivle({{
+ +                AlphaPseudo::ivle(xc->tcBase());
+ +            }}, No_OpClass, IsNonSpeculative);
+ +            0x20: m5exit_old({{
+ +                AlphaPseudo::m5exit_old(xc->tcBase());
+ +            }}, No_OpClass, IsNonSpeculative);
+ +            0x21: m5exit({{
+ +                AlphaPseudo::m5exit(xc->tcBase(), R16);
+ +            }}, No_OpClass, IsNonSpeculative);
++            0x31: loadsymbol({{
++                AlphaPseudo::loadsymbol(xc->tcBase());
++            }}, No_OpClass, IsNonSpeculative);
+ +            0x30: initparam({{ Ra = xc->tcBase()->getCpuPtr()->system->init_param; }});
+ +            0x40: resetstats({{
+ +                AlphaPseudo::resetstats(xc->tcBase(), R16, R17);
+ +            }}, IsNonSpeculative);
+ +            0x41: dumpstats({{
+ +                AlphaPseudo::dumpstats(xc->tcBase(), R16, R17);
+ +            }}, IsNonSpeculative);
+ +            0x42: dumpresetstats({{
+ +                AlphaPseudo::dumpresetstats(xc->tcBase(), R16, R17);
+ +            }}, IsNonSpeculative);
+ +            0x43: m5checkpoint({{
+ +                AlphaPseudo::m5checkpoint(xc->tcBase(), R16, R17);
+ +            }}, IsNonSpeculative);
+ +            0x50: m5readfile({{
+ +                R0 = AlphaPseudo::readfile(xc->tcBase(), R16, R17, R18);
+ +            }}, IsNonSpeculative);
+ +            0x51: m5break({{
+ +                AlphaPseudo::debugbreak(xc->tcBase());
+ +            }}, IsNonSpeculative);
+ +            0x52: m5switchcpu({{
+ +                AlphaPseudo::switchcpu(xc->tcBase());
+ +            }}, IsNonSpeculative);
+ +            0x53: m5addsymbol({{
+ +                AlphaPseudo::addsymbol(xc->tcBase(), R16, R17);
+ +            }}, IsNonSpeculative);
+ +            0x54: m5panic({{
+ +                panic("M5 panic instruction called at pc=%#x.", xc->readPC());
+ +            }}, IsNonSpeculative);
+ +            0x55: m5anBegin({{
+ +                AlphaPseudo::anBegin(xc->tcBase(), R16);
+ +            }}, IsNonSpeculative);
+ +            0x56: m5anWait({{
+ +                AlphaPseudo::anWait(xc->tcBase(), R16, R17);
+ +            }}, IsNonSpeculative);
+ +        }
+ +    }
+ +#endif
+ +}
diff --cc src/arch/alpha/isa/mem.isa

index a5dda7fc6963fcbea3159e6a13df2925c3454384,0000000000000000000000000000000000000000..fe69c36a5dc66800e709850fd4f1a9a9cbc1bb38

mode 100644,000000..100644
--- 1/src/arch/alpha/isa/mem.isa
--- /dev/null
+++ b/src/arch/alpha/isa/mem.isa
@@@ -1,736 -1,0 +1,736 @@@
-         panic("Misc instruction does not support split access method!");
+ +// -*- mode:c++ -*-
+ +
+ +// Copyright (c) 2003-2005 The Regents of The University of Michigan
+ +// All rights reserved.
+ +//
+ +// Redistribution and use in source and binary forms, with or without
+ +// modification, are permitted provided that the following conditions are
+ +// met: redistributions of source code must retain the above copyright
+ +// notice, this list of conditions and the following disclaimer;
+ +// redistributions in binary form must reproduce the above copyright
+ +// notice, this list of conditions and the following disclaimer in the
+ +// documentation and/or other materials provided with the distribution;
+ +// neither the name of the copyright holders nor the names of its
+ +// contributors may be used to endorse or promote products derived from
+ +// this software without specific prior written permission.
+ +//
+ +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ +//
+ +// Authors: Steve Reinhardt
+ +//          Kevin Lim
+ +
+ +////////////////////////////////////////////////////////////////////
+ +//
+ +// Memory-format instructions: LoadAddress, Load, Store
+ +//
+ +
+ +output header {{
+ +    /**
+ +     * Base class for general Alpha memory-format instructions.
+ +     */
+ +    class Memory : public AlphaStaticInst
+ +    {
+ +      protected:
+ +
+ +        /// Memory request flags.  See mem_req_base.hh.
+ +        unsigned memAccessFlags;
+ +        /// Pointer to EAComp object.
+ +        const StaticInstPtr eaCompPtr;
+ +        /// Pointer to MemAcc object.
+ +        const StaticInstPtr memAccPtr;
+ +
+ +        /// Constructor
+ +        Memory(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
+ +               StaticInstPtr _eaCompPtr = nullStaticInstPtr,
+ +               StaticInstPtr _memAccPtr = nullStaticInstPtr)
+ +            : AlphaStaticInst(mnem, _machInst, __opClass),
+ +              memAccessFlags(0), eaCompPtr(_eaCompPtr), memAccPtr(_memAccPtr)
+ +        {
+ +        }
+ +
+ +        std::string
+ +        generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+ +
+ +      public:
+ +
+ +        const StaticInstPtr &eaCompInst() const { return eaCompPtr; }
+ +        const StaticInstPtr &memAccInst() const { return memAccPtr; }
+ +    };
+ +
+ +    /**
+ +     * Base class for memory-format instructions using a 32-bit
+ +     * displacement (i.e. most of them).
+ +     */
+ +    class MemoryDisp32 : public Memory
+ +    {
+ +      protected:
+ +        /// Displacement for EA calculation (signed).
+ +        int32_t disp;
+ +
+ +        /// Constructor.
+ +        MemoryDisp32(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
+ +                     StaticInstPtr _eaCompPtr = nullStaticInstPtr,
+ +                     StaticInstPtr _memAccPtr = nullStaticInstPtr)
+ +            : Memory(mnem, _machInst, __opClass, _eaCompPtr, _memAccPtr),
+ +              disp(MEMDISP)
+ +        {
+ +        }
+ +    };
+ +
+ +
+ +    /**
+ +     * Base class for a few miscellaneous memory-format insts
+ +     * that don't interpret the disp field: wh64, fetch, fetch_m, ecb.
+ +     * None of these instructions has a destination register either.
+ +     */
+ +    class MemoryNoDisp : public Memory
+ +    {
+ +      protected:
+ +        /// Constructor
+ +        MemoryNoDisp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
+ +                     StaticInstPtr _eaCompPtr = nullStaticInstPtr,
+ +                     StaticInstPtr _memAccPtr = nullStaticInstPtr)
+ +            : Memory(mnem, _machInst, __opClass, _eaCompPtr, _memAccPtr)
+ +        {
+ +        }
+ +
+ +        std::string
+ +        generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+ +    };
+ +}};
+ +
+ +
+ +output decoder {{
+ +    std::string
+ +    Memory::generateDisassembly(Addr pc, const SymbolTable *symtab) const
+ +    {
+ +        return csprintf("%-10s %c%d,%d(r%d)", mnemonic,
+ +                        flags[IsFloating] ? 'f' : 'r', RA, MEMDISP, RB);
+ +    }
+ +
+ +    std::string
+ +    MemoryNoDisp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
+ +    {
+ +        return csprintf("%-10s (r%d)", mnemonic, RB);
+ +    }
+ +}};
+ +
+ +def format LoadAddress(code) {{
+ +    iop = InstObjParams(name, Name, 'MemoryDisp32', CodeBlock(code))
+ +    header_output = BasicDeclare.subst(iop)
+ +    decoder_output = BasicConstructor.subst(iop)
+ +    decode_block = BasicDecode.subst(iop)
+ +    exec_output = BasicExecute.subst(iop)
+ +}};
+ +
+ +
+ +def template LoadStoreDeclare {{
+ +    /**
+ +     * Static instruction class for "%(mnemonic)s".
+ +     */
+ +    class %(class_name)s : public %(base_class)s
+ +    {
+ +      protected:
+ +
+ +        /**
+ +         * "Fake" effective address computation class for "%(mnemonic)s".
+ +         */
+ +        class EAComp : public %(base_class)s
+ +        {
+ +          public:
+ +            /// Constructor
+ +            EAComp(ExtMachInst machInst);
+ +
+ +            %(BasicExecDeclare)s
+ +        };
+ +
+ +        /**
+ +         * "Fake" memory access instruction class for "%(mnemonic)s".
+ +         */
+ +        class MemAcc : public %(base_class)s
+ +        {
+ +          public:
+ +            /// Constructor
+ +            MemAcc(ExtMachInst machInst);
+ +
+ +            %(BasicExecDeclare)s
+ +        };
+ +
+ +      public:
+ +
+ +        /// Constructor.
+ +        %(class_name)s(ExtMachInst machInst);
+ +
+ +        %(BasicExecDeclare)s
+ +
+ +        %(InitiateAccDeclare)s
+ +
+ +        %(CompleteAccDeclare)s
+ +    };
+ +}};
+ +
+ +
+ +def template InitiateAccDeclare {{
+ +    Fault initiateAcc(%(CPU_exec_context)s *, Trace::InstRecord *) const;
+ +}};
+ +
+ +
+ +def template CompleteAccDeclare {{
+ +    Fault completeAcc(Packet *, %(CPU_exec_context)s *,
+ +                      Trace::InstRecord *) const;
+ +}};
+ +
+ +
+ +def template LoadStoreConstructor {{
+ +    /** TODO: change op_class to AddrGenOp or something (requires
+ +     * creating new member of OpClass enum in op_class.hh, updating
+ +     * config files, etc.). */
+ +    inline %(class_name)s::EAComp::EAComp(ExtMachInst machInst)
+ +        : %(base_class)s("%(mnemonic)s (EAComp)", machInst, IntAluOp)
+ +    {
+ +        %(ea_constructor)s;
+ +    }
+ +
+ +    inline %(class_name)s::MemAcc::MemAcc(ExtMachInst machInst)
+ +        : %(base_class)s("%(mnemonic)s (MemAcc)", machInst, %(op_class)s)
+ +    {
+ +        %(memacc_constructor)s;
+ +    }
+ +
+ +    inline %(class_name)s::%(class_name)s(ExtMachInst machInst)
+ +         : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s,
+ +                          new EAComp(machInst), new MemAcc(machInst))
+ +    {
+ +        %(constructor)s;
+ +    }
+ +}};
+ +
+ +
+ +def template EACompExecute {{
+ +    Fault
+ +    %(class_name)s::EAComp::execute(%(CPU_exec_context)s *xc,
+ +                                   Trace::InstRecord *traceData) const
+ +    {
+ +        Addr EA;
+ +        Fault fault = NoFault;
+ +
+ +        %(fp_enable_check)s;
+ +        %(op_decl)s;
+ +        %(op_rd)s;
+ +        %(code)s;
+ +
+ +        if (fault == NoFault) {
+ +            %(op_wb)s;
+ +            xc->setEA(EA);
+ +        }
+ +
+ +        return fault;
+ +    }
+ +}};
+ +
+ +def template LoadMemAccExecute {{
+ +    Fault
+ +    %(class_name)s::MemAcc::execute(%(CPU_exec_context)s *xc,
+ +                                   Trace::InstRecord *traceData) const
+ +    {
+ +        Addr EA;
+ +        Fault fault = NoFault;
+ +
+ +        %(fp_enable_check)s;
+ +        %(op_decl)s;
+ +        %(op_rd)s;
+ +        EA = xc->getEA();
+ +
+ +        if (fault == NoFault) {
+ +            fault = xc->read(EA, (uint%(mem_acc_size)d_t&)Mem, memAccessFlags);
+ +            %(code)s;
+ +        }
+ +
+ +        if (fault == NoFault) {
+ +            %(op_wb)s;
+ +        }
+ +
+ +        return fault;
+ +    }
+ +}};
+ +
+ +
+ +def template LoadExecute {{
+ +    Fault %(class_name)s::execute(%(CPU_exec_context)s *xc,
+ +                                  Trace::InstRecord *traceData) const
+ +    {
+ +        Addr EA;
+ +        Fault fault = NoFault;
+ +
+ +        %(fp_enable_check)s;
+ +        %(op_decl)s;
+ +        %(op_rd)s;
+ +        %(ea_code)s;
+ +
+ +        if (fault == NoFault) {
+ +            fault = xc->read(EA, (uint%(mem_acc_size)d_t&)Mem, memAccessFlags);
+ +            %(memacc_code)s;
+ +        }
+ +
+ +        if (fault == NoFault) {
+ +            %(op_wb)s;
+ +        }
+ +
+ +        return fault;
+ +    }
+ +}};
+ +
+ +
+ +def template LoadInitiateAcc {{
+ +    Fault %(class_name)s::initiateAcc(%(CPU_exec_context)s *xc,
+ +                                      Trace::InstRecord *traceData) const
+ +    {
+ +        Addr EA;
+ +        Fault fault = NoFault;
+ +
+ +        %(fp_enable_check)s;
+ +        %(op_src_decl)s;
+ +        %(op_rd)s;
+ +        %(ea_code)s;
+ +
+ +        if (fault == NoFault) {
+ +            fault = xc->read(EA, (uint%(mem_acc_size)d_t &)Mem, memAccessFlags);
+ +        }
+ +
+ +        return fault;
+ +    }
+ +}};
+ +
+ +
+ +def template LoadCompleteAcc {{
+ +    Fault %(class_name)s::completeAcc(Packet *pkt,
+ +                                      %(CPU_exec_context)s *xc,
+ +                                      Trace::InstRecord *traceData) const
+ +    {
+ +        Fault fault = NoFault;
+ +
+ +        %(fp_enable_check)s;
+ +        %(op_decl)s;
+ +
+ +        Mem = pkt->get<typeof(Mem)>();
+ +
+ +        if (fault == NoFault) {
+ +            %(memacc_code)s;
+ +        }
+ +
+ +        if (fault == NoFault) {
+ +            %(op_wb)s;
+ +        }
+ +
+ +        return fault;
+ +    }
+ +}};
+ +
+ +
+ +def template StoreMemAccExecute {{
+ +    Fault
+ +    %(class_name)s::MemAcc::execute(%(CPU_exec_context)s *xc,
+ +                                   Trace::InstRecord *traceData) const
+ +    {
+ +        Addr EA;
+ +        Fault fault = NoFault;
+ +        uint64_t write_result = 0;
+ +
+ +        %(fp_enable_check)s;
+ +        %(op_decl)s;
+ +        %(op_rd)s;
+ +        EA = xc->getEA();
+ +
+ +        if (fault == NoFault) {
+ +            %(code)s;
+ +        }
+ +
+ +        if (fault == NoFault) {
+ +            fault = xc->write((uint%(mem_acc_size)d_t&)Mem, EA,
+ +                              memAccessFlags, &write_result);
+ +            if (traceData) { traceData->setData(Mem); }
+ +        }
+ +
+ +        if (fault == NoFault) {
+ +            %(postacc_code)s;
+ +        }
+ +
+ +        if (fault == NoFault) {
+ +            %(op_wb)s;
+ +        }
+ +
+ +        return fault;
+ +    }
+ +}};
+ +
+ +
+ +def template StoreExecute {{
+ +    Fault %(class_name)s::execute(%(CPU_exec_context)s *xc,
+ +                                  Trace::InstRecord *traceData) const
+ +    {
+ +        Addr EA;
+ +        Fault fault = NoFault;
+ +        uint64_t write_result = 0;
+ +
+ +        %(fp_enable_check)s;
+ +        %(op_decl)s;
+ +        %(op_rd)s;
+ +        %(ea_code)s;
+ +
+ +        if (fault == NoFault) {
+ +            %(memacc_code)s;
+ +        }
+ +
+ +        if (fault == NoFault) {
+ +            fault = xc->write((uint%(mem_acc_size)d_t&)Mem, EA,
+ +                              memAccessFlags, &write_result);
+ +            if (traceData) { traceData->setData(Mem); }
+ +        }
+ +
+ +        if (fault == NoFault) {
+ +            %(postacc_code)s;
+ +        }
+ +
+ +        if (fault == NoFault) {
+ +            %(op_wb)s;
+ +        }
+ +
+ +        return fault;
+ +    }
+ +}};
+ +
+ +def template StoreInitiateAcc {{
+ +    Fault %(class_name)s::initiateAcc(%(CPU_exec_context)s *xc,
+ +                                      Trace::InstRecord *traceData) const
+ +    {
+ +        Addr EA;
+ +        Fault fault = NoFault;
+ +
+ +        %(fp_enable_check)s;
+ +        %(op_decl)s;
+ +        %(op_rd)s;
+ +        %(ea_code)s;
+ +
+ +        if (fault == NoFault) {
+ +            %(memacc_code)s;
+ +        }
+ +
+ +        if (fault == NoFault) {
+ +            fault = xc->write((uint%(mem_acc_size)d_t&)Mem, EA,
+ +                              memAccessFlags, NULL);
+ +            if (traceData) { traceData->setData(Mem); }
+ +        }
+ +
+ +        return fault;
+ +    }
+ +}};
+ +
+ +
+ +def template StoreCompleteAcc {{
+ +    Fault %(class_name)s::completeAcc(Packet *pkt,
+ +                                      %(CPU_exec_context)s *xc,
+ +                                      Trace::InstRecord *traceData) const
+ +    {
+ +        Fault fault = NoFault;
+ +
+ +        %(fp_enable_check)s;
+ +        %(op_dest_decl)s;
+ +
+ +        if (fault == NoFault) {
+ +            %(postacc_code)s;
+ +        }
+ +
+ +        if (fault == NoFault) {
+ +            %(op_wb)s;
+ +        }
+ +
+ +        return fault;
+ +    }
+ +}};
+ +
+ +
+ +def template StoreCondCompleteAcc {{
+ +    Fault %(class_name)s::completeAcc(Packet *pkt,
+ +                                      %(CPU_exec_context)s *xc,
+ +                                      Trace::InstRecord *traceData) const
+ +    {
+ +        Fault fault = NoFault;
+ +
+ +        %(fp_enable_check)s;
+ +        %(op_dest_decl)s;
+ +
+ +        uint64_t write_result = pkt->req->getScResult();
+ +
+ +        if (fault == NoFault) {
+ +            %(postacc_code)s;
+ +        }
+ +
+ +        if (fault == NoFault) {
+ +            %(op_wb)s;
+ +        }
+ +
+ +        return fault;
+ +    }
+ +}};
+ +
+ +
+ +def template MiscMemAccExecute {{
+ +    Fault %(class_name)s::MemAcc::execute(%(CPU_exec_context)s *xc,
+ +                                          Trace::InstRecord *traceData) const
+ +    {
+ +        Addr EA;
+ +        Fault fault = NoFault;
+ +
+ +        %(fp_enable_check)s;
+ +        %(op_decl)s;
+ +        %(op_rd)s;
+ +        EA = xc->getEA();
+ +
+ +        if (fault == NoFault) {
+ +            %(code)s;
+ +        }
+ +
+ +        return NoFault;
+ +    }
+ +}};
+ +
+ +def template MiscExecute {{
+ +    Fault %(class_name)s::execute(%(CPU_exec_context)s *xc,
+ +                                  Trace::InstRecord *traceData) const
+ +    {
+ +        Addr EA;
+ +        Fault fault = NoFault;
+ +
+ +        %(fp_enable_check)s;
+ +        %(op_decl)s;
+ +        %(op_rd)s;
+ +        %(ea_code)s;
+ +
+ +        if (fault == NoFault) {
+ +            %(memacc_code)s;
+ +        }
+ +
+ +        return NoFault;
+ +    }
+ +}};
+ +
+ +def template MiscInitiateAcc {{
+ +    Fault %(class_name)s::initiateAcc(%(CPU_exec_context)s *xc,
+ +                                      Trace::InstRecord *traceData) const
+ +    {
-         panic("Misc instruction does not support split access method!");
++        warn("Misc instruction does not support split access method!");
+ +        return NoFault;
+ +    }
+ +}};
+ +
+ +
+ +def template MiscCompleteAcc {{
+ +    Fault %(class_name)s::completeAcc(Packet *pkt,
+ +                                      %(CPU_exec_context)s *xc,
+ +                                      Trace::InstRecord *traceData) const
+ +    {
++        warn("Misc instruction does not support split access method!");
+ +
+ +        return NoFault;
+ +    }
+ +}};
+ +
+ +// load instructions use Ra as dest, so check for
+ +// Ra == 31 to detect nops
+ +def template LoadNopCheckDecode {{
+ + {
+ +     AlphaStaticInst *i = new %(class_name)s(machInst);
+ +     if (RA == 31) {
+ +         i = makeNop(i);
+ +     }
+ +     return i;
+ + }
+ +}};
+ +
+ +
+ +// for some load instructions, Ra == 31 indicates a prefetch (not a nop)
+ +def template LoadPrefetchCheckDecode {{
+ + {
+ +     if (RA != 31) {
+ +         return new %(class_name)s(machInst);
+ +     }
+ +     else {
+ +         return new %(class_name)sPrefetch(machInst);
+ +     }
+ + }
+ +}};
+ +
+ +
+ +let {{
+ +def LoadStoreBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags,
+ +                  postacc_code = '', base_class = 'MemoryDisp32',
+ +                  decode_template = BasicDecode, exec_template_base = ''):
+ +    # Make sure flags are in lists (convert to lists if not).
+ +    mem_flags = makeList(mem_flags)
+ +    inst_flags = makeList(inst_flags)
+ +
+ +    # add hook to get effective addresses into execution trace output.
+ +    ea_code += '\nif (traceData) { traceData->setAddr(EA); }\n'
+ +
+ +    # generate code block objects
+ +    ea_cblk = CodeBlock(ea_code)
+ +    memacc_cblk = CodeBlock(memacc_code)
+ +    postacc_cblk = CodeBlock(postacc_code)
+ +
+ +    # Some CPU models execute the memory operation as an atomic unit,
+ +    # while others want to separate them into an effective address
+ +    # computation and a memory access operation.  As a result, we need
+ +    # to generate three StaticInst objects.  Note that the latter two
+ +    # are nested inside the larger "atomic" one.
+ +
+ +    # generate InstObjParams for EAComp object
+ +    ea_iop = InstObjParams(name, Name, base_class, ea_cblk, inst_flags)
+ +
+ +    # generate InstObjParams for MemAcc object
+ +    memacc_iop = InstObjParams(name, Name, base_class, memacc_cblk, inst_flags)
+ +    # in the split execution model, the MemAcc portion is responsible
+ +    # for the post-access code.
+ +    memacc_iop.postacc_code = postacc_cblk.code
+ +
+ +    # generate InstObjParams for InitiateAcc, CompleteAcc object
+ +    # The code used depends on the template being used
+ +    if (exec_template_base == 'Load'):
+ +        initiateacc_cblk = CodeBlock(ea_code + memacc_code)
+ +        completeacc_cblk = CodeBlock(memacc_code + postacc_code)
+ +    elif (exec_template_base.startswith('Store')):
+ +        initiateacc_cblk = CodeBlock(ea_code + memacc_code)
+ +        completeacc_cblk = CodeBlock(postacc_code)
+ +    else:
+ +        initiateacc_cblk = ''
+ +        completeacc_cblk = ''
+ +
+ +    initiateacc_iop = InstObjParams(name, Name, base_class, initiateacc_cblk,
+ +                                    inst_flags)
+ +
+ +    completeacc_iop = InstObjParams(name, Name, base_class, completeacc_cblk,
+ +                                    inst_flags)
+ +
+ +    if (exec_template_base == 'Load'):
+ +        initiateacc_iop.ea_code = ea_cblk.code
+ +        initiateacc_iop.memacc_code = memacc_cblk.code
+ +        completeacc_iop.memacc_code = memacc_cblk.code
+ +        completeacc_iop.postacc_code = postacc_cblk.code
+ +    elif (exec_template_base.startswith('Store')):
+ +        initiateacc_iop.ea_code = ea_cblk.code
+ +        initiateacc_iop.memacc_code = memacc_cblk.code
+ +        completeacc_iop.postacc_code = postacc_cblk.code
+ +
+ +    # generate InstObjParams for unified execution
+ +    cblk = CodeBlock(ea_code + memacc_code + postacc_code)
+ +    iop = InstObjParams(name, Name, base_class, cblk, inst_flags)
+ +
+ +    iop.ea_constructor = ea_cblk.constructor
+ +    iop.ea_code = ea_cblk.code
+ +    iop.memacc_constructor = memacc_cblk.constructor
+ +    iop.memacc_code = memacc_cblk.code
+ +    iop.postacc_code = postacc_cblk.code
+ +
+ +    if mem_flags:
+ +        s = '\n\tmemAccessFlags = ' + string.join(mem_flags, '|') + ';'
+ +        iop.constructor += s
+ +        memacc_iop.constructor += s
+ +
+ +    # select templates
+ +
+ +    # define aliases... most StoreCond templates are the same as the
+ +    # corresponding Store templates (only CompleteAcc is different).
+ +    StoreCondMemAccExecute = StoreMemAccExecute
+ +    StoreCondExecute = StoreExecute
+ +    StoreCondInitiateAcc = StoreInitiateAcc
+ +
+ +    memAccExecTemplate = eval(exec_template_base + 'MemAccExecute')
+ +    fullExecTemplate = eval(exec_template_base + 'Execute')
+ +    initiateAccTemplate = eval(exec_template_base + 'InitiateAcc')
+ +    completeAccTemplate = eval(exec_template_base + 'CompleteAcc')
+ +
+ +    # (header_output, decoder_output, decode_block, exec_output)
+ +    return (LoadStoreDeclare.subst(iop), LoadStoreConstructor.subst(iop),
+ +            decode_template.subst(iop),
+ +            EACompExecute.subst(ea_iop)
+ +            + memAccExecTemplate.subst(memacc_iop)
+ +            + fullExecTemplate.subst(iop)
+ +            + initiateAccTemplate.subst(initiateacc_iop)
+ +            + completeAccTemplate.subst(completeacc_iop))
+ +}};
+ +
+ +def format LoadOrNop(memacc_code, ea_code = {{ EA = Rb + disp; }},
+ +                     mem_flags = [], inst_flags = []) {{
+ +    (header_output, decoder_output, decode_block, exec_output) = \
+ +        LoadStoreBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags,
+ +                      decode_template = LoadNopCheckDecode,
+ +                      exec_template_base = 'Load')
+ +}};
+ +
+ +
+ +// Note that the flags passed in apply only to the prefetch version
+ +def format LoadOrPrefetch(memacc_code, ea_code = {{ EA = Rb + disp; }},
+ +                          mem_flags = [], pf_flags = [], inst_flags = []) {{
+ +    # declare the load instruction object and generate the decode block
+ +    (header_output, decoder_output, decode_block, exec_output) = \
+ +        LoadStoreBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags,
+ +                      decode_template = LoadPrefetchCheckDecode,
+ +                      exec_template_base = 'Load')
+ +
+ +    # Declare the prefetch instruction object.
+ +
+ +    # Make sure flag args are lists so we can mess with them.
+ +    mem_flags = makeList(mem_flags)
+ +    pf_flags = makeList(pf_flags)
+ +    inst_flags = makeList(inst_flags)
+ +
+ +    pf_mem_flags = mem_flags + pf_flags + ['NO_FAULT']
+ +    pf_inst_flags = inst_flags + ['IsMemRef', 'IsLoad',
+ +                                  'IsDataPrefetch', 'MemReadOp']
+ +
+ +    (pf_header_output, pf_decoder_output, _, pf_exec_output) = \
+ +        LoadStoreBase(name, Name + 'Prefetch', ea_code,
+ +                      'xc->prefetch(EA, memAccessFlags);',
+ +                      pf_mem_flags, pf_inst_flags, exec_template_base = 'Misc')
+ +
+ +    header_output += pf_header_output
+ +    decoder_output += pf_decoder_output
+ +    exec_output += pf_exec_output
+ +}};
+ +
+ +
+ +def format Store(memacc_code, ea_code = {{ EA = Rb + disp; }},
+ +                 mem_flags = [], inst_flags = []) {{
+ +    (header_output, decoder_output, decode_block, exec_output) = \
+ +        LoadStoreBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags,
+ +                      exec_template_base = 'Store')
+ +}};
+ +
+ +
+ +def format StoreCond(memacc_code, postacc_code,
+ +                     ea_code = {{ EA = Rb + disp; }},
+ +                     mem_flags = [], inst_flags = []) {{
+ +    (header_output, decoder_output, decode_block, exec_output) = \
+ +        LoadStoreBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags,
+ +                      postacc_code, exec_template_base = 'StoreCond')
+ +}};
+ +
+ +
+ +// Use 'MemoryNoDisp' as base: for wh64, fetch, ecb
+ +def format MiscPrefetch(ea_code, memacc_code,
+ +                        mem_flags = [], inst_flags = []) {{
+ +    (header_output, decoder_output, decode_block, exec_output) = \
+ +        LoadStoreBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags,
+ +                      base_class = 'MemoryNoDisp', exec_template_base = 'Misc')
+ +}};
+ +
+ +
diff --cc src/arch/alpha/isa_traits.hh

index 4f439b8dfe9bba7bf35e6bee5d9f544f2f223028,0000000000000000000000000000000000000000..c59d93238aeeb4f4120614ef68a88335863e6308

mode 100644,000000..100644
--- 1/src/arch/alpha/isa_traits.hh
--- /dev/null
+++ b/src/arch/alpha/isa_traits.hh
@@@ -1,287 -1,0 +1,345 @@@
-     using namespace LittleEndianGuest;
+ +/*
+ + * Copyright (c) 2003-2005 The Regents of The University of Michigan
+ + * All rights reserved.
+ + *
+ + * Redistribution and use in source and binary forms, with or without
+ + * modification, are permitted provided that the following conditions are
+ + * met: redistributions of source code must retain the above copyright
+ + * notice, this list of conditions and the following disclaimer;
+ + * redistributions in binary form must reproduce the above copyright
+ + * notice, this list of conditions and the following disclaimer in the
+ + * documentation and/or other materials provided with the distribution;
+ + * neither the name of the copyright holders nor the names of its
+ + * contributors may be used to endorse or promote products derived from
+ + * this software without specific prior written permission.
+ + *
+ + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ + *
+ + * Authors: Steve Reinhardt
+ + *          Gabe Black
+ + */
+ +
+ +#ifndef __ARCH_ALPHA_ISA_TRAITS_HH__
+ +#define __ARCH_ALPHA_ISA_TRAITS_HH__
+ +
+ +namespace LittleEndianGuest {}
+ +
+ +#include "arch/alpha/types.hh"
+ +#include "config/full_system.hh"
+ +#include "sim/host.hh"
+ +
+ +class StaticInstPtr;
+ +
+ +namespace AlphaISA
+ +{
-     StaticInstPtr decodeInst(ExtMachInst);
++
++    typedef uint32_t MachInst;
++    typedef uint64_t ExtMachInst;
++    typedef uint8_t  RegIndex;
++
++    const int NumIntArchRegs = 32;
++    const int NumPALShadowRegs = 8;
++    const int NumFloatArchRegs = 32;
++    // @todo: Figure out what this number really should be.
++    const int NumMiscArchRegs = 32;
++
++    // Static instruction parameters
++    const int MaxInstSrcRegs = 3;
++    const int MaxInstDestRegs = 2;
++
++    // semantically meaningful register indices
++    const int ZeroReg = 31;   // architecturally meaningful
++    // the rest of these depend on the ABI
++    const int StackPointerReg = 30;
++    const int GlobalPointerReg = 29;
++    const int ProcedureValueReg = 27;
++    const int ReturnAddressReg = 26;
++    const int ReturnValueReg = 0;
++    const int FramePointerReg = 15;
++    const int ArgumentReg0 = 16;
++    const int ArgumentReg1 = 17;
++    const int ArgumentReg2 = 18;
++    const int ArgumentReg3 = 19;
++    const int ArgumentReg4 = 20;
++    const int ArgumentReg5 = 21;
++    const int SyscallNumReg = ReturnValueReg;
++    const int SyscallPseudoReturnReg = ArgumentReg4;
++    const int SyscallSuccessReg = 19;
++
++
++
++    const int LogVMPageSize = 13;     // 8K bytes
++    const int VMPageSize = (1 << LogVMPageSize);
++
++    const int BranchPredAddrShiftAmt = 2; // instructions are 4-byte aligned
++
++    const int WordBytes = 4;
++    const int HalfwordBytes = 2;
++    const int ByteBytes = 1;
++
++
++    const int NumIntRegs = NumIntArchRegs + NumPALShadowRegs;
++    const int NumFloatRegs = NumFloatArchRegs;
++    const int NumMiscRegs = NumMiscArchRegs;
+ +
+ +    // These enumerate all the registers for dependence tracking.
+ +    enum DependenceTags {
+ +        // 0..31 are the integer regs 0..31
+ +        // 32..63 are the FP regs 0..31, i.e. use (reg + FP_Base_DepTag)
+ +        FP_Base_DepTag = 40,
+ +        Ctrl_Base_DepTag = 72,
+ +        Fpcr_DepTag = 72,             // floating point control register
+ +        Uniq_DepTag = 73,
+ +        Lock_Flag_DepTag = 74,
+ +        Lock_Addr_DepTag = 75,
+ +        IPR_Base_DepTag = 76
+ +    };
+ +
-     // Alpha Does NOT have a delay slot
-     #define ISA_HAS_DELAY_SLOT 0
++    typedef uint64_t IntReg;
++    typedef IntReg IntRegFile[NumIntRegs];
+ +
-     const Addr PageShift = 13;
-     const Addr PageBytes = ULL(1) << PageShift;
-     const Addr PageMask = ~(PageBytes - 1);
-     const Addr PageOffset = PageBytes - 1;
++    // floating point register file entry type
++    typedef union {
++        uint64_t q;
++        double d;
++    } FloatReg;
+ +
- #if FULL_SYSTEM
++    typedef union {
++        uint64_t q[NumFloatRegs];     // integer qword view
++        double d[NumFloatRegs];               // double-precision floating point view
+ +
-     ////////////////////////////////////////////////////////////////////////
-     //
-     //  Translation stuff
-     //
++        void clear()
++        { bzero(d, sizeof(d)); }
++    } FloatRegFile;
+ +
-    const Addr PteShift = 3;
-     const Addr NPtePageShift = PageShift - PteShift;
-     const Addr NPtePage = ULL(1) << NPtePageShift;
-     const Addr PteMask = NPtePage - 1;
++extern const Addr PageShift;
++extern const Addr PageBytes;
++extern const Addr PageMask;
++extern const Addr PageOffset;
+ +
-     // User Virtual
-     const Addr USegBase = ULL(0x0);
-     const Addr USegEnd = ULL(0x000003ffffffffff);
++// redirected register map, really only used for the full system case.
++extern const int reg_redir[NumIntRegs];
+ +
-     // Kernel Direct Mapped
-     const Addr K0SegBase = ULL(0xfffffc0000000000);
-     const Addr K0SegEnd = ULL(0xfffffdffffffffff);
++#if FULL_SYSTEM
+ +
-     // Kernel Virtual
-     const Addr K1SegBase = ULL(0xfffffe0000000000);
-     const Addr K1SegEnd = ULL(0xffffffffffffffff);
++    typedef uint64_t InternalProcReg;
+ +
-     // For loading... XXX This maybe could be USegEnd?? --ali
-     const Addr LoadAddrMask = ULL(0xffffffffff);
++#include "arch/alpha/isa_fullsys_traits.hh"
+ +
-     ////////////////////////////////////////////////////////////////////////
-     //
-     //  Interrupt levels
-     //
-     enum InterruptLevels
-     {
-         INTLEVEL_SOFTWARE_MIN = 4,
-         INTLEVEL_SOFTWARE_MAX = 19,
++#else
++    const int NumInternalProcRegs = 0;
++#endif
+ +
-         INTLEVEL_EXTERNAL_MIN = 20,
-         INTLEVEL_EXTERNAL_MAX = 34,
++    // control register file contents
++    typedef uint64_t MiscReg;
++    class MiscRegFile {
++      protected:
++        uint64_t      fpcr;           // floating point condition codes
++        uint64_t      uniq;           // process-unique register
++        bool          lock_flag;      // lock flag for LL/SC
++        Addr          lock_addr;      // lock address for LL/SC
+ +
-         INTLEVEL_IRQ0 = 20,
-         INTLEVEL_IRQ1 = 21,
-         INTINDEX_ETHERNET = 0,
-         INTINDEX_SCSI = 1,
-         INTLEVEL_IRQ2 = 22,
-         INTLEVEL_IRQ3 = 23,
++      public:
++        MiscReg readReg(int misc_reg);
+ +
-         INTLEVEL_SERIAL = 33,
++        //These functions should be removed once the simplescalar cpu model
++        //has been replaced.
++        int getInstAsid();
++        int getDataAsid();
+ +
-         NumInterruptLevels = INTLEVEL_EXTERNAL_MAX
-     };
++        MiscReg readRegWithEffect(int misc_reg, Fault &fault, ExecContext *xc);
+ +
-     // EV5 modes
-     enum mode_type
-     {
-         mode_kernel = 0,              // kernel
-         mode_executive = 1,           // executive (unused by unix)
-         mode_supervisor = 2,  // supervisor (unused by unix)
-         mode_user = 3,                // user mode
-         mode_number                   // number of modes
-     };
++        Fault setReg(int misc_reg, const MiscReg &val);
+ +
++        Fault setRegWithEffect(int misc_reg, const MiscReg &val,
++                               ExecContext *xc);
+ +
- #endif
++        void serialize(std::ostream &os);
+ +
-     ////////////////////////////////////////////////////////////////////////
-     //
-     //  Internal Processor Reigsters
-     //
-     enum md_ipr_names
-     {
-         IPR_ISR = 0x100,              // interrupt summary register
-         IPR_ITB_TAG = 0x101,  // ITLB tag register
-         IPR_ITB_PTE = 0x102,  // ITLB page table entry register
-         IPR_ITB_ASN = 0x103,  // ITLB address space register
-         IPR_ITB_PTE_TEMP = 0x104,     // ITLB page table entry temp register
-         IPR_ITB_IA = 0x105,           // ITLB invalidate all register
-         IPR_ITB_IAP = 0x106,  // ITLB invalidate all process register
-         IPR_ITB_IS = 0x107,           // ITLB invalidate select register
-         IPR_SIRR = 0x108,             // software interrupt request register
-         IPR_ASTRR = 0x109,            // asynchronous system trap request register
-         IPR_ASTER = 0x10a,            // asynchronous system trap enable register
-         IPR_EXC_ADDR = 0x10b, // exception address register
-         IPR_EXC_SUM = 0x10c,  // exception summary register
-         IPR_EXC_MASK = 0x10d, // exception mask register
-         IPR_PAL_BASE = 0x10e, // PAL base address register
-         IPR_ICM = 0x10f,              // instruction current mode
-         IPR_IPLR = 0x110,             // interrupt priority level register
-         IPR_INTID = 0x111,            // interrupt ID register
-         IPR_IFAULT_VA_FORM = 0x112,   // formatted faulting virtual addr register
-         IPR_IVPTBR = 0x113,           // virtual page table base register
-         IPR_HWINT_CLR = 0x115,        // H/W interrupt clear register
-         IPR_SL_XMIT = 0x116,  // serial line transmit register
-         IPR_SL_RCV = 0x117,           // serial line receive register
-         IPR_ICSR = 0x118,             // instruction control and status register
-         IPR_IC_FLUSH = 0x119, // instruction cache flush control
-         IPR_IC_PERR_STAT = 0x11a,     // inst cache parity error status register
-         IPR_PMCTR = 0x11c,            // performance counter register
- 
-         // PAL temporary registers...
-         // register meanings gleaned from osfpal.s source code
-         IPR_PALtemp0 = 0x140, // local scratch
-         IPR_PALtemp1 = 0x141, // local scratch
-         IPR_PALtemp2 = 0x142, // entUna
-         IPR_PALtemp3 = 0x143, // CPU specific impure area pointer
-         IPR_PALtemp4 = 0x144, // memory management temp
-         IPR_PALtemp5 = 0x145, // memory management temp
-         IPR_PALtemp6 = 0x146, // memory management temp
-         IPR_PALtemp7 = 0x147, // entIF
-         IPR_PALtemp8 = 0x148, // intmask
-         IPR_PALtemp9 = 0x149, // entSys
-         IPR_PALtemp10 = 0x14a,        // ??
-         IPR_PALtemp11 = 0x14b,        // entInt
-         IPR_PALtemp12 = 0x14c,        // entArith
-         IPR_PALtemp13 = 0x14d,        // reserved for platform specific PAL
-         IPR_PALtemp14 = 0x14e,        // reserved for platform specific PAL
-         IPR_PALtemp15 = 0x14f,        // reserved for platform specific PAL
-         IPR_PALtemp16 = 0x150,        // scratch / whami<7:0> / mces<4:0>
-         IPR_PALtemp17 = 0x151,        // sysval
-         IPR_PALtemp18 = 0x152,        // usp
-         IPR_PALtemp19 = 0x153,        // ksp
-         IPR_PALtemp20 = 0x154,        // PTBR
-         IPR_PALtemp21 = 0x155,        // entMM
-         IPR_PALtemp22 = 0x156,        // kgp
-         IPR_PALtemp23 = 0x157,        // PCBB
- 
-         IPR_DTB_ASN = 0x200,  // DTLB address space number register
-         IPR_DTB_CM = 0x201,           // DTLB current mode register
-         IPR_DTB_TAG = 0x202,  // DTLB tag register
-         IPR_DTB_PTE = 0x203,  // DTLB page table entry register
-         IPR_DTB_PTE_TEMP = 0x204,     // DTLB page table entry temporary register
- 
-         IPR_MM_STAT = 0x205,  // data MMU fault status register
-         IPR_VA = 0x206,               // fault virtual address register
-         IPR_VA_FORM = 0x207,  // formatted virtual address register
-         IPR_MVPTBR = 0x208,           // MTU virtual page table base register
-         IPR_DTB_IAP = 0x209,  // DTLB invalidate all process register
-         IPR_DTB_IA = 0x20a,           // DTLB invalidate all register
-         IPR_DTB_IS = 0x20b,           // DTLB invalidate single register
-         IPR_ALT_MODE = 0x20c, // alternate mode register
-         IPR_CC = 0x20d,               // cycle counter register
-         IPR_CC_CTL = 0x20e,           // cycle counter control register
-         IPR_MCSR = 0x20f,             // MTU control register
- 
-         IPR_DC_FLUSH = 0x210,
-         IPR_DC_PERR_STAT = 0x212,     // Dcache parity error status register
-         IPR_DC_TEST_CTL = 0x213,      // Dcache test tag control register
-         IPR_DC_TEST_TAG = 0x214,      // Dcache test tag register
-         IPR_DC_TEST_TAG_TEMP = 0x215, // Dcache test tag temporary register
-         IPR_DC_MODE = 0x216,  // Dcache mode register
-         IPR_MAF_MODE = 0x217, // miss address file mode register
- 
-         NumInternalProcRegs           // number of IPR registers
-     };
- #else
-     const int NumInternalProcRegs = 0;
- #endif
++        void unserialize(Checkpoint *cp, const std::string &section);
++
++        void clear()
++        {
++            fpcr = uniq = 0;
++            lock_flag = 0;
++            lock_addr = 0;
++        }
+ +
+ +#if FULL_SYSTEM
-     // Constants Related to the number of registers
++      protected:
++        InternalProcReg ipr[NumInternalProcRegs]; // Internal processor regs
+ +
-     const int NumIntArchRegs = 32;
-     const int NumPALShadowRegs = 8;
-     const int NumFloatArchRegs = 32;
-     // @todo: Figure out what this number really should be.
-     const int NumMiscArchRegs = 32;
++      private:
++        MiscReg readIpr(int idx, Fault &fault, ExecContext *xc);
+ +
-     const int NumIntRegs = NumIntArchRegs + NumPALShadowRegs;
-     const int NumFloatRegs = NumFloatArchRegs;
-     const int NumMiscRegs = NumMiscArchRegs;
++        Fault setIpr(int idx, uint64_t val, ExecContext *xc);
+ +
-     // Static instruction parameters
-     const int MaxInstSrcRegs = 3;
-     const int MaxInstDestRegs = 2;
++        void copyIprs(ExecContext *xc);
++#endif
++        friend class RegFile;
++    };
+ +
+ +    const int TotalNumRegs = NumIntRegs + NumFloatRegs +
+ +        NumMiscRegs + NumInternalProcRegs;
+ +
+ +    const int TotalDataRegs = NumIntRegs + NumFloatRegs;
+ +
-     // semantically meaningful register indices
-     const int ZeroReg = 31;   // architecturally meaningful
-     // the rest of these depend on the ABI
-     const int StackPointerReg = 30;
-     const int GlobalPointerReg = 29;
-     const int ProcedureValueReg = 27;
-     const int ReturnAddressReg = 26;
-     const int ReturnValueReg = 0;
-     const int FramePointerReg = 15;
-     const int ArgumentReg0 = 16;
-     const int ArgumentReg1 = 17;
-     const int ArgumentReg2 = 18;
-     const int ArgumentReg3 = 19;
-     const int ArgumentReg4 = 20;
-     const int ArgumentReg5 = 21;
-     const int SyscallNumReg = ReturnValueReg;
-     const int SyscallPseudoReturnReg = ArgumentReg4;
-     const int SyscallSuccessReg = 19;
++    typedef union {
++        IntReg  intreg;
++        FloatReg   fpreg;
++        MiscReg ctrlreg;
++    } AnyReg;
+ +
-     const int LogVMPageSize = 13;     // 8K bytes
-     const int VMPageSize = (1 << LogVMPageSize);
++    struct RegFile {
++        IntRegFile intRegFile;                // (signed) integer register file
++        FloatRegFile floatRegFile;    // floating point register file
++        MiscRegFile miscRegs;         // control register file
++        Addr pc;                      // program counter
++        Addr npc;                     // next-cycle program counter
++        Addr nnpc;
+ +
-     const int BranchPredAddrShiftAmt = 2; // instructions are 4-byte aligned
++#if FULL_SYSTEM
++        int intrflag;                 // interrupt flag
++        inline int instAsid()
++        { return EV5::ITB_ASN_ASN(miscRegs.ipr[IPR_ITB_ASN]); }
++        inline int dataAsid()
++        { return EV5::DTB_ASN_ASN(miscRegs.ipr[IPR_DTB_ASN]); }
++#endif // FULL_SYSTEM
++
++        void serialize(std::ostream &os);
++        void unserialize(Checkpoint *cp, const std::string &section);
++
++        void clear()
++        {
++            bzero(intRegFile, sizeof(intRegFile));
++            floatRegFile.clear();
++            miscRegs.clear();
++        }
++    };
+ +
-     const int MachineBytes = 8;
-     const int WordBytes = 4;
-     const int HalfwordBytes = 2;
-     const int ByteBytes = 1;
++    static inline ExtMachInst makeExtMI(MachInst inst, const uint64_t &pc);
+ +
-     // Alpha UNOP (ldq_u r31,0(r0))
-     const ExtMachInst NoopMachInst = 0x2ffe0000;
++    StaticInstPtr decodeInst(ExtMachInst);
++
++    // Alpha Does NOT have a delay slot
++    #define ISA_HAS_DELAY_SLOT 0
+ +
+ +    // return a no-op instruction... used for instruction fetch faults
-     // redirected register map, really only used for the full system case.
-     extern const int reg_redir[NumIntRegs];
++    extern const ExtMachInst NoopMachInst;
++
++    enum annotes {
++        ANNOTE_NONE = 0,
++        // An impossible number for instruction annotations
++        ITOUCH_ANNOTE = 0xffffffff,
++    };
++
++    static inline bool isCallerSaveIntegerRegister(unsigned int reg) {
++        panic("register classification not implemented");
++        return (reg >= 1 && reg <= 8 || reg >= 22 && reg <= 25 || reg == 27);
++    }
++
++    static inline bool isCalleeSaveIntegerRegister(unsigned int reg) {
++        panic("register classification not implemented");
++        return (reg >= 9 && reg <= 15);
++    }
++
++    static inline bool isCallerSaveFloatRegister(unsigned int reg) {
++        panic("register classification not implemented");
++        return false;
++    }
++
++    static inline bool isCalleeSaveFloatRegister(unsigned int reg) {
++        panic("register classification not implemented");
++        return false;
++    }
++
++    static inline Addr alignAddress(const Addr &addr,
++                                         unsigned int nbytes) {
++        return (addr & ~(nbytes - 1));
++    }
++
++    // Instruction address compression hooks
++    static inline Addr realPCToFetchPC(const Addr &addr) {
++        return addr;
++    }
++
++    static inline Addr fetchPCToRealPC(const Addr &addr) {
++        return addr;
++    }
++
++    // the size of "fetched" instructions (not necessarily the size
++    // of real instructions for PISA)
++    static inline size_t fetchInstSize() {
++        return sizeof(MachInst);
++    }
++
++    static inline MachInst makeRegisterCopy(int dest, int src) {
++        panic("makeRegisterCopy not implemented");
++        return 0;
++    }
++
++    // Machine operations
++
++    void saveMachineReg(AnyReg &savereg, const RegFile &reg_file,
++                               int regnum);
++
++    void restoreMachineReg(RegFile &regs, const AnyReg &reg,
++                                  int regnum);
++
++#if 0
++    static void serializeSpecialRegs(const Serializable::Proxy &proxy,
++                                     const RegFile &regs);
++
++    static void unserializeSpecialRegs(const IniFile *db,
++                                       const std::string &category,
++                                       ConfigNode *node,
++                                       RegFile &regs);
++#endif
++
++    /**
++     * Function to insure ISA semantics about 0 registers.
++     * @param xc The execution context.
++     */
++    template <class XC>
++    void zeroRegisters(XC *xc);
+ +
++    const Addr MaxAddr = (Addr)-1;
+ +
++#if !FULL_SYSTEM
++    static inline void setSyscallReturn(SyscallReturn return_value, RegFile *regs)
++    {
++        // check for error condition.  Alpha syscall convention is to
++        // indicate success/failure in reg a3 (r19) and put the
++        // return value itself in the standard return value reg (v0).
++        if (return_value.successful()) {
++            // no error
++            regs->intRegFile[SyscallSuccessReg] = 0;
++            regs->intRegFile[ReturnValueReg] = return_value.value();
++        } else {
++            // got an error, return details
++            regs->intRegFile[SyscallSuccessReg] = (IntReg) -1;
++            regs->intRegFile[ReturnValueReg] = -return_value.value();
++        }
++    }
++#endif
++
++    void copyRegs(ExecContext *src, ExecContext *dest);
++
++    void copyMiscRegs(ExecContext *src, ExecContext *dest);
++
++#if FULL_SYSTEM
++    void copyIprs(ExecContext *src, ExecContext *dest);
++#endif
+ +};
+ +
+ +#endif // __ARCH_ALPHA_ISA_TRAITS_HH__
diff --cc src/arch/alpha/linux/system.cc

index ef4e18cb557278fef928f1ace27b31c02a63456d,0000000000000000000000000000000000000000..7cf234eeb33747e030461170a39b0145376c8704

mode 100644,000000..100644
--- 1/src/arch/alpha/linux/system.cc
--- /dev/null
+++ b/src/arch/alpha/linux/system.cc
@@@ -1,246 -1,0 +1,249 @@@
+ +/*
+ + * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ + * All rights reserved.
+ + *
+ + * Redistribution and use in source and binary forms, with or without
+ + * modification, are permitted provided that the following conditions are
+ + * met: redistributions of source code must retain the above copyright
+ + * notice, this list of conditions and the following disclaimer;
+ + * redistributions in binary form must reproduce the above copyright
+ + * notice, this list of conditions and the following disclaimer in the
+ + * documentation and/or other materials provided with the distribution;
+ + * neither the name of the copyright holders nor the names of its
+ + * contributors may be used to endorse or promote products derived from
+ + * this software without specific prior written permission.
+ + *
+ + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ + *
+ + * Authors: Ali Saidi
+ + *          Lisa Hsu
+ + *          Nathan Binkert
+ + *          Steve Reinhardt
+ + */
+ +
+ +/**
+ + * @file
+ + * This code loads the linux kernel, console, pal and patches certain
+ + * functions.  The symbol tables are loaded so that traces can show
+ + * the executing function and we can skip functions. Various delay
+ + * loops are skipped and their final values manually computed to speed
+ + * up boot time.
+ + */
+ +
+ +#include "arch/arguments.hh"
+ +#include "arch/vtophys.hh"
+ +#include "arch/alpha/linux/system.hh"
+ +#include "arch/alpha/linux/threadinfo.hh"
+ +#include "arch/alpha/system.hh"
+ +#include "base/loader/symtab.hh"
+ +#include "cpu/thread_context.hh"
+ +#include "cpu/base.hh"
+ +#include "dev/platform.hh"
+ +#include "kern/linux/printk.hh"
+ +#include "kern/linux/events.hh"
+ +#include "mem/physical.hh"
+ +#include "mem/port.hh"
+ +#include "sim/builder.hh"
+ +#include "sim/byteswap.hh"
+ +
+ +using namespace std;
+ +using namespace AlphaISA;
+ +using namespace Linux;
+ +
+ +LinuxAlphaSystem::LinuxAlphaSystem(Params *p)
+ +    : AlphaSystem(p)
+ +{
+ +    Addr addr = 0;
+ +
+ +    /**
+ +     * The symbol swapper_pg_dir marks the beginning of the kernel and
+ +     * the location of bootloader passed arguments
+ +     */
+ +    if (!kernelSymtab->findAddress("swapper_pg_dir", KernelStart)) {
+ +        panic("Could not determine start location of kernel");
+ +    }
+ +
+ +    /**
+ +     * Since we aren't using a bootloader, we have to copy the
+ +     * kernel arguments directly into the kernel's memory.
+ +     */
+ +    virtPort.writeBlob(CommandLine(), (uint8_t*)params()->boot_osflags.c_str(),
+ +                params()->boot_osflags.length()+1);
+ +
+ +    /**
+ +     * find the address of the est_cycle_freq variable and insert it
+ +     * so we don't through the lengthly process of trying to
+ +     * calculated it by using the PIT, RTC, etc.
+ +     */
+ +    if (kernelSymtab->findAddress("est_cycle_freq", addr))
+ +        virtPort.write(addr, (uint64_t)(Clock::Frequency /
+ +                    p->boot_cpu_frequency));
+ +
+ +
+ +    /**
+ +     * EV5 only supports 127 ASNs so we are going to tell the kernel that the
+ +     * paritiuclar EV6 we have only supports 127 asns.
+ +     * @todo At some point we should change ev5.hh and the palcode to support
+ +     * 255 ASNs.
+ +     */
+ +    if (kernelSymtab->findAddress("dp264_mv", addr))
+ +        virtPort.write(addr + 0x18, LittleEndianGuest::htog((uint32_t)127));
+ +    else
+ +        panic("could not find dp264_mv\n");
+ +
+ +#ifndef NDEBUG
+ +    kernelPanicEvent = addKernelFuncEvent<BreakPCEvent>("panic");
+ +    if (!kernelPanicEvent)
+ +        panic("could not find kernel symbol \'panic\'");
+ +
+ +#if 0
+ +    kernelDieEvent = addKernelFuncEvent<BreakPCEvent>("die_if_kernel");
+ +    if (!kernelDieEvent)
+ +        panic("could not find kernel symbol \'die_if_kernel\'");
+ +#endif
+ +
+ +#endif
+ +
+ +    /**
+ +     * Any time ide_delay_50ms, calibarte_delay or
+ +     * determine_cpu_caches is called just skip the
+ +     * function. Currently determine_cpu_caches only is used put
+ +     * information in proc, however if that changes in the future we
+ +     * will have to fill in the cache size variables appropriately.
+ +     */
+ +
+ +    skipIdeDelay50msEvent =
+ +        addKernelFuncEvent<SkipFuncEvent>("ide_delay_50ms");
+ +    skipDelayLoopEvent =
+ +        addKernelFuncEvent<SkipDelayLoopEvent>("calibrate_delay");
+ +    skipCacheProbeEvent =
+ +        addKernelFuncEvent<SkipFuncEvent>("determine_cpu_caches");
+ +    debugPrintkEvent = addKernelFuncEvent<DebugPrintkEvent>("dprintk");
+ +    idleStartEvent = addKernelFuncEvent<IdleStartEvent>("cpu_idle");
+ +
+ +    if (kernelSymtab->findAddress("alpha_switch_to", addr) && DTRACE(Thread)) {
+ +        printThreadEvent = new PrintThreadInfo(&pcEventQueue, "threadinfo",
+ +                                               addr + sizeof(MachInst) * 6);
+ +    } else {
+ +        printThreadEvent = NULL;
+ +    }
+ +}
+ +
+ +LinuxAlphaSystem::~LinuxAlphaSystem()
+ +{
+ +#ifndef NDEBUG
+ +    delete kernelPanicEvent;
+ +#endif
+ +    delete skipIdeDelay50msEvent;
+ +    delete skipDelayLoopEvent;
+ +    delete skipCacheProbeEvent;
+ +    delete debugPrintkEvent;
+ +    delete idleStartEvent;
+ +    delete printThreadEvent;
+ +}
+ +
+ +
+ +void
+ +LinuxAlphaSystem::setDelayLoop(ThreadContext *tc)
+ +{
+ +    Addr addr = 0;
+ +    if (kernelSymtab->findAddress("loops_per_jiffy", addr)) {
+ +        Tick cpuFreq = tc->getCpuPtr()->frequency();
+ +        Tick intrFreq = platform->intrFrequency();
+ +        VirtualPort *vp;
+ +
+ +        vp = tc->getVirtPort();
+ +        vp->writeHtoG(addr, (uint32_t)((cpuFreq / intrFreq) * 0.9988));
+ +        tc->delVirtPort(vp);
+ +    }
+ +}
+ +
+ +
+ +void
+ +LinuxAlphaSystem::SkipDelayLoopEvent::process(ThreadContext *tc)
+ +{
+ +    SkipFuncEvent::process(tc);
+ +    // calculate and set loops_per_jiffy
+ +    ((LinuxAlphaSystem *)tc->getSystemPtr())->setDelayLoop(tc);
+ +}
+ +
+ +void
+ +LinuxAlphaSystem::PrintThreadInfo::process(ThreadContext *tc)
+ +{
+ +    Linux::ThreadInfo ti(tc);
+ +
+ +    DPRINTF(Thread, "Currently Executing Thread %s, pid %d, started at: %d\n",
+ +            ti.curTaskName(), ti.curTaskPID(), ti.curTaskStart());
+ +}
+ +
+ +
+ +BEGIN_DECLARE_SIM_OBJECT_PARAMS(LinuxAlphaSystem)
+ +
+ +    Param<Tick> boot_cpu_frequency;
+ +    SimObjectParam<PhysicalMemory *> physmem;
+ +    SimpleEnumParam<System::MemoryMode> mem_mode;
+ +
+ +    Param<string> kernel;
+ +    Param<string> console;
+ +    Param<string> pal;
+ +
+ +    Param<string> boot_osflags;
+ +    Param<string> readfile;
++    Param<string> symbolfile;
+ +    Param<unsigned int> init_param;
+ +
+ +    Param<uint64_t> system_type;
+ +    Param<uint64_t> system_rev;
+ +
+ +END_DECLARE_SIM_OBJECT_PARAMS(LinuxAlphaSystem)
+ +
+ +BEGIN_INIT_SIM_OBJECT_PARAMS(LinuxAlphaSystem)
+ +
+ +    INIT_PARAM(boot_cpu_frequency, "Frequency of the boot CPU"),
+ +    INIT_PARAM(physmem, "phsyical memory"),
+ +    INIT_ENUM_PARAM(mem_mode, "Memory Mode, (1=atomic, 2=timing)",
+ +            System::MemoryModeStrings),
+ +    INIT_PARAM(kernel, "file that contains the kernel code"),
+ +    INIT_PARAM(console, "file that contains the console code"),
+ +    INIT_PARAM(pal, "file that contains palcode"),
+ +    INIT_PARAM_DFLT(boot_osflags, "flags to pass to the kernel during boot",
+ +                    "a"),
+ +    INIT_PARAM_DFLT(readfile, "file to read startup script from", ""),
++    INIT_PARAM_DFLT(symbolfile, "file to read symbols from", ""),
+ +    INIT_PARAM_DFLT(init_param, "numerical value to pass into simulator", 0),
+ +    INIT_PARAM_DFLT(system_type, "Type of system we are emulating", 34),
+ +    INIT_PARAM_DFLT(system_rev, "Revision of system we are emulating", 1<<10)
+ +
+ +END_INIT_SIM_OBJECT_PARAMS(LinuxAlphaSystem)
+ +
+ +CREATE_SIM_OBJECT(LinuxAlphaSystem)
+ +{
+ +    AlphaSystem::Params *p = new AlphaSystem::Params;
+ +    p->name = getInstanceName();
+ +    p->boot_cpu_frequency = boot_cpu_frequency;
+ +    p->physmem = physmem;
+ +    p->mem_mode = mem_mode;
+ +    p->kernel_path = kernel;
+ +    p->console_path = console;
+ +    p->palcode = pal;
+ +    p->boot_osflags = boot_osflags;
+ +    p->init_param = init_param;
+ +    p->readfile = readfile;
++    p->symbolfile = symbolfile;
+ +    p->system_type = system_type;
+ +    p->system_rev = system_rev;
+ +    return new LinuxAlphaSystem(p);
+ +}
+ +
+ +REGISTER_SIM_OBJECT("LinuxAlphaSystem", LinuxAlphaSystem)
+ +
diff --cc src/arch/alpha/system.cc

index a7e615531342d78fb87cd138f680a1d8006d06a7,0000000000000000000000000000000000000000..5597eaedcdf887d1ced7304c391dc126ec2ef036

mode 100644,000000..100644
--- 1/src/arch/alpha/system.cc
--- /dev/null
+++ b/src/arch/alpha/system.cc
@@@ -1,277 -1,0 +1,280 @@@
+ +/*
+ + * Copyright (c) 2002-2005 The Regents of The University of Michigan
+ + * All rights reserved.
+ + *
+ + * Redistribution and use in source and binary forms, with or without
+ + * modification, are permitted provided that the following conditions are
+ + * met: redistributions of source code must retain the above copyright
+ + * notice, this list of conditions and the following disclaimer;
+ + * redistributions in binary form must reproduce the above copyright
+ + * notice, this list of conditions and the following disclaimer in the
+ + * documentation and/or other materials provided with the distribution;
+ + * neither the name of the copyright holders nor the names of its
+ + * contributors may be used to endorse or promote products derived from
+ + * this software without specific prior written permission.
+ + *
+ + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ + *
+ + * Authors: Ali Saidi
+ + *          Nathan Binkert
+ + */
+ +
+ +#include "arch/alpha/ev5.hh"
+ +#include "arch/alpha/system.hh"
+ +#include "arch/vtophys.hh"
+ +#include "base/remote_gdb.hh"
+ +#include "base/loader/object_file.hh"
+ +#include "base/loader/symtab.hh"
+ +#include "base/trace.hh"
+ +#include "mem/physical.hh"
+ +#include "sim/byteswap.hh"
+ +#include "sim/builder.hh"
+ +
+ +
+ +using namespace LittleEndianGuest;
+ +
+ +AlphaSystem::AlphaSystem(Params *p)
+ +    : System(p)
+ +{
+ +    consoleSymtab = new SymbolTable;
+ +    palSymtab = new SymbolTable;
+ +
+ +
+ +    /**
+ +     * Load the pal, and console code into memory
+ +     */
+ +    // Load Console Code
+ +    console = createObjectFile(params()->console_path);
+ +    if (console == NULL)
+ +        fatal("Could not load console file %s", params()->console_path);
+ +
+ +    // Load pal file
+ +    pal = createObjectFile(params()->palcode);
+ +    if (pal == NULL)
+ +        fatal("Could not load PALcode file %s", params()->palcode);
+ +
+ +
+ +    // Load program sections into memory
+ +    pal->loadSections(&functionalPort, AlphaISA::LoadAddrMask);
+ +    console->loadSections(&functionalPort, AlphaISA::LoadAddrMask);
+ +
+ +    // load symbols
+ +    if (!console->loadGlobalSymbols(consoleSymtab))
+ +        panic("could not load console symbols\n");
+ +
+ +    if (!pal->loadGlobalSymbols(palSymtab))
+ +        panic("could not load pal symbols\n");
+ +
+ +    if (!pal->loadLocalSymbols(palSymtab))
+ +        panic("could not load pal symbols\n");
+ +
+ +    if (!console->loadGlobalSymbols(debugSymbolTable))
+ +        panic("could not load console symbols\n");
+ +
+ +    if (!pal->loadGlobalSymbols(debugSymbolTable))
+ +        panic("could not load pal symbols\n");
+ +
+ +    if (!pal->loadLocalSymbols(debugSymbolTable))
+ +        panic("could not load pal symbols\n");
+ +
+ +     Addr addr = 0;
+ +#ifndef NDEBUG
+ +    consolePanicEvent = addConsoleFuncEvent<BreakPCEvent>("panic");
+ +#endif
+ +
+ +    /**
+ +     * Copy the osflags (kernel arguments) into the consoles
+ +     * memory. (Presently Linux does not use the console service
+ +     * routine to get these command line arguments, but Tru64 and
+ +     * others do.)
+ +     */
+ +    if (consoleSymtab->findAddress("env_booted_osflags", addr)) {
+ +        virtPort.writeBlob(addr, (uint8_t*)params()->boot_osflags.c_str(),
+ +                strlen(params()->boot_osflags.c_str()));
+ +    }
+ +
+ +    /**
+ +     * Set the hardware reset parameter block system type and revision
+ +     * information to Tsunami.
+ +     */
+ +    if (consoleSymtab->findAddress("m5_rpb", addr)) {
+ +        uint64_t data;
+ +        data = htog(params()->system_type);
+ +        virtPort.write(addr+0x50, data);
+ +        data = htog(params()->system_rev);
+ +        virtPort.write(addr+0x58, data);
+ +    } else
+ +        panic("could not find hwrpb\n");
+ +
+ +}
+ +
+ +AlphaSystem::~AlphaSystem()
+ +{
+ +    delete consoleSymtab;
+ +    delete console;
+ +    delete pal;
+ +#ifdef DEBUG
+ +    delete consolePanicEvent;
+ +#endif
+ +}
+ +
+ +/**
+ + * This function fixes up addresses that are used to match PCs for
+ + * hooking simulator events on to target function executions.
+ + *
+ + * Alpha binaries may have multiple global offset table (GOT)
+ + * sections.  A function that uses the GOT starts with a
+ + * two-instruction prolog which sets the global pointer (gp == r29) to
+ + * the appropriate GOT section.  The proper gp value is calculated
+ + * based on the function address, which must be passed by the caller
+ + * in the procedure value register (pv aka t12 == r27).  This sequence
+ + * looks like the following:
+ + *
+ + *                    opcode Ra Rb offset
+ + *    ldah gp,X(pv)     09   29 27   X
+ + *    lda  gp,Y(gp)     08   29 29   Y
+ + *
+ + * for some constant offsets X and Y.  The catch is that the linker
+ + * (or maybe even the compiler, I'm not sure) may recognize that the
+ + * caller and callee are using the same GOT section, making this
+ + * prolog redundant, and modify the call target to skip these
+ + * instructions.  If we check for execution of the first instruction
+ + * of a function (the one the symbol points to) to detect when to skip
+ + * it, we'll miss all these modified calls.  It might work to
+ + * unconditionally check for the third instruction, but not all
+ + * functions have this prolog, and there's some chance that those
+ + * first two instructions could have undesired consequences.  So we do
+ + * the Right Thing and pattern-match the first two instructions of the
+ + * function to decide where to patch.
+ + *
+ + * Eventually this code should be moved into an ISA-specific file.
+ + */
+ +Addr
+ +AlphaSystem::fixFuncEventAddr(Addr addr)
+ +{
+ +    // mask for just the opcode, Ra, and Rb fields (not the offset)
+ +    const uint32_t inst_mask = 0xffff0000;
+ +    // ldah gp,X(pv): opcode 9, Ra = 29, Rb = 27
+ +    const uint32_t gp_ldah_pattern = (9 << 26) | (29 << 21) | (27 << 16);
+ +    // lda  gp,Y(gp): opcode 8, Ra = 29, rb = 29
+ +    const uint32_t gp_lda_pattern  = (8 << 26) | (29 << 21) | (29 << 16);
+ +
+ +    uint32_t i1 = virtPort.read<uint32_t>(addr);
+ +    uint32_t i2 = virtPort.read<uint32_t>(addr + sizeof(AlphaISA::MachInst));
+ +
+ +    if ((i1 & inst_mask) == gp_ldah_pattern &&
+ +        (i2 & inst_mask) == gp_lda_pattern) {
+ +        Addr new_addr = addr + 2* sizeof(AlphaISA::MachInst);
+ +        DPRINTF(Loader, "fixFuncEventAddr: %p -> %p", addr, new_addr);
+ +        return new_addr;
+ +    } else {
+ +        return addr;
+ +    }
+ +}
+ +
+ +
+ +void
+ +AlphaSystem::setAlphaAccess(Addr access)
+ +{
+ +    Addr addr = 0;
+ +    if (consoleSymtab->findAddress("m5AlphaAccess", addr)) {
+ +        virtPort.write(addr, htog(EV5::Phys2K0Seg(access)));
+ +    } else
+ +        panic("could not find m5AlphaAccess\n");
+ +}
+ +
+ +bool
+ +AlphaSystem::breakpoint()
+ +{
+ +    return remoteGDB[0]->trap(ALPHA_KENTRY_INT);
+ +}
+ +
+ +void
+ +AlphaSystem::serialize(std::ostream &os)
+ +{
+ +    System::serialize(os);
+ +    consoleSymtab->serialize("console_symtab", os);
+ +    palSymtab->serialize("pal_symtab", os);
+ +}
+ +
+ +
+ +void
+ +AlphaSystem::unserialize(Checkpoint *cp, const std::string &section)
+ +{
+ +    System::unserialize(cp,section);
+ +    consoleSymtab->unserialize("console_symtab", cp, section);
+ +    palSymtab->unserialize("pal_symtab", cp, section);
+ +}
+ +
+ +
+ +BEGIN_DECLARE_SIM_OBJECT_PARAMS(AlphaSystem)
+ +
+ +    Param<Tick> boot_cpu_frequency;
+ +    SimObjectParam<PhysicalMemory *> physmem;
+ +    SimpleEnumParam<System::MemoryMode> mem_mode;
+ +
+ +    Param<std::string> kernel;
+ +    Param<std::string> console;
+ +    Param<std::string> pal;
+ +
+ +    Param<std::string> boot_osflags;
+ +    Param<std::string> readfile;
++    Param<std::string> symbolfile;
+ +    Param<unsigned int> init_param;
+ +
+ +    Param<uint64_t> system_type;
+ +    Param<uint64_t> system_rev;
+ +
+ +END_DECLARE_SIM_OBJECT_PARAMS(AlphaSystem)
+ +
+ +BEGIN_INIT_SIM_OBJECT_PARAMS(AlphaSystem)
+ +
+ +    INIT_PARAM(boot_cpu_frequency, "Frequency of the boot CPU"),
+ +    INIT_PARAM(physmem, "phsyical memory"),
+ +    INIT_ENUM_PARAM(mem_mode, "Memory Mode, (1=atomic, 2=timing)",
+ +            System::MemoryModeStrings),
+ +    INIT_PARAM(kernel, "file that contains the kernel code"),
+ +    INIT_PARAM(console, "file that contains the console code"),
+ +    INIT_PARAM(pal, "file that contains palcode"),
+ +    INIT_PARAM_DFLT(boot_osflags, "flags to pass to the kernel during boot",
+ +                    "a"),
+ +    INIT_PARAM_DFLT(readfile, "file to read startup script from", ""),
++    INIT_PARAM_DFLT(symbolfile, "file to read symbols from", ""),
+ +    INIT_PARAM_DFLT(init_param, "numerical value to pass into simulator", 0),
+ +    INIT_PARAM_DFLT(system_type, "Type of system we are emulating", 34),
+ +    INIT_PARAM_DFLT(system_rev, "Revision of system we are emulating", 1<<10)
+ +
+ +END_INIT_SIM_OBJECT_PARAMS(AlphaSystem)
+ +
+ +CREATE_SIM_OBJECT(AlphaSystem)
+ +{
+ +    AlphaSystem::Params *p = new AlphaSystem::Params;
+ +    p->name = getInstanceName();
+ +    p->boot_cpu_frequency = boot_cpu_frequency;
+ +    p->physmem = physmem;
+ +    p->mem_mode = mem_mode;
+ +    p->kernel_path = kernel;
+ +    p->console_path = console;
+ +    p->palcode = pal;
+ +    p->boot_osflags = boot_osflags;
+ +    p->init_param = init_param;
+ +    p->readfile = readfile;
++    p->symbolfile = symbolfile;
+ +    p->system_type = system_type;
+ +    p->system_rev = system_rev;
+ +    return new AlphaSystem(p);
+ +}
+ +
+ +REGISTER_SIM_OBJECT("AlphaSystem", AlphaSystem)
+ +
+ +
diff --cc src/arch/alpha/tru64/system.cc

index 3ef1e4d3cc7baab50630ca3972bbf335f6806ea6,0000000000000000000000000000000000000000..00918bda4f2ec2eeb32f1cc3df1aa9c99674ee68

mode 100644,000000..100644
--- 1/src/arch/alpha/tru64/system.cc
--- /dev/null
+++ b/src/arch/alpha/tru64/system.cc
@@@ -1,150 -1,0 +1,153 @@@
+ +/*
+ + * Copyright (c) 2003-2005 The Regents of The University of Michigan
+ + * All rights reserved.
+ + *
+ + * Redistribution and use in source and binary forms, with or without
+ + * modification, are permitted provided that the following conditions are
+ + * met: redistributions of source code must retain the above copyright
+ + * notice, this list of conditions and the following disclaimer;
+ + * redistributions in binary form must reproduce the above copyright
+ + * notice, this list of conditions and the following disclaimer in the
+ + * documentation and/or other materials provided with the distribution;
+ + * neither the name of the copyright holders nor the names of its
+ + * contributors may be used to endorse or promote products derived from
+ + * this software without specific prior written permission.
+ + *
+ + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ + *
+ + * Authors: Nathan Binkert
+ + *          Lisa Hsu
+ + */
+ +
+ +#include "arch/alpha/tru64/system.hh"
+ +#include "arch/isa_traits.hh"
+ +#include "arch/vtophys.hh"
+ +#include "base/loader/symtab.hh"
+ +#include "base/trace.hh"
+ +#include "cpu/base.hh"
+ +#include "cpu/thread_context.hh"
+ +#include "kern/tru64/tru64_events.hh"
+ +#include "kern/system_events.hh"
+ +#include "mem/physical.hh"
+ +#include "mem/port.hh"
+ +#include "sim/builder.hh"
+ +
+ +using namespace std;
+ +
+ +Tru64AlphaSystem::Tru64AlphaSystem(Tru64AlphaSystem::Params *p)
+ +    : AlphaSystem(p)
+ +{
+ +    Addr addr = 0;
+ +    if (kernelSymtab->findAddress("enable_async_printf", addr)) {
+ +        virtPort.write(addr, (uint32_t)0);
+ +    }
+ +
+ +#ifdef DEBUG
+ +    kernelPanicEvent = addKernelFuncEvent<BreakPCEvent>("panic");
+ +    if (!kernelPanicEvent)
+ +        panic("could not find kernel symbol \'panic\'");
+ +#endif
+ +
+ +    badaddrEvent = addKernelFuncEvent<BadAddrEvent>("badaddr");
+ +    if (!badaddrEvent)
+ +        panic("could not find kernel symbol \'badaddr\'");
+ +
+ +    skipPowerStateEvent =
+ +        addKernelFuncEvent<SkipFuncEvent>("tl_v48_capture_power_state");
+ +    skipScavengeBootEvent =
+ +        addKernelFuncEvent<SkipFuncEvent>("pmap_scavenge_boot");
+ +
+ +#if TRACING_ON
+ +    printfEvent = addKernelFuncEvent<PrintfEvent>("printf");
+ +    debugPrintfEvent = addKernelFuncEvent<DebugPrintfEvent>("m5printf");
+ +    debugPrintfrEvent = addKernelFuncEvent<DebugPrintfrEvent>("m5printfr");
+ +    dumpMbufEvent = addKernelFuncEvent<DumpMbufEvent>("m5_dump_mbuf");
+ +#endif
+ +}
+ +
+ +Tru64AlphaSystem::~Tru64AlphaSystem()
+ +{
+ +#ifdef DEBUG
+ +    delete kernelPanicEvent;
+ +#endif
+ +    delete badaddrEvent;
+ +    delete skipPowerStateEvent;
+ +    delete skipScavengeBootEvent;
+ +#if TRACING_ON
+ +    delete printfEvent;
+ +    delete debugPrintfEvent;
+ +    delete debugPrintfrEvent;
+ +    delete dumpMbufEvent;
+ +#endif
+ +}
+ +
+ +BEGIN_DECLARE_SIM_OBJECT_PARAMS(Tru64AlphaSystem)
+ +
+ +    Param<Tick> boot_cpu_frequency;
+ +    SimObjectParam<PhysicalMemory *> physmem;
+ +    SimpleEnumParam<System::MemoryMode> mem_mode;
+ +
+ +    Param<string> kernel;
+ +    Param<string> console;
+ +    Param<string> pal;
+ +
+ +    Param<string> boot_osflags;
+ +    Param<string> readfile;
++    Param<string> symbolfile;
+ +    Param<unsigned int> init_param;
+ +
+ +    Param<uint64_t> system_type;
+ +    Param<uint64_t> system_rev;
+ +
+ +END_DECLARE_SIM_OBJECT_PARAMS(Tru64AlphaSystem)
+ +
+ +BEGIN_INIT_SIM_OBJECT_PARAMS(Tru64AlphaSystem)
+ +
+ +    INIT_PARAM(boot_cpu_frequency, "frequency of the boot cpu"),
+ +    INIT_PARAM(physmem, "phsyical memory"),
+ +    INIT_ENUM_PARAM(mem_mode, "Memory Mode, (1=atomic, 2=timing)",
+ +            System::MemoryModeStrings),
+ +    INIT_PARAM(kernel, "file that contains the kernel code"),
+ +    INIT_PARAM(console, "file that contains the console code"),
+ +    INIT_PARAM(pal, "file that contains palcode"),
+ +    INIT_PARAM_DFLT(boot_osflags, "flags to pass to the kernel during boot",
+ +                    "a"),
+ +    INIT_PARAM_DFLT(readfile, "file to read startup script from", ""),
++    INIT_PARAM_DFLT(symbolfile, "file to read symbols from", ""),
+ +    INIT_PARAM_DFLT(init_param, "numerical value to pass into simulator", 0),
+ +    INIT_PARAM_DFLT(system_type, "Type of system we are emulating", 12),
+ +    INIT_PARAM_DFLT(system_rev, "Revision of system we are emulating", 2<<1)
+ +
+ +END_INIT_SIM_OBJECT_PARAMS(Tru64AlphaSystem)
+ +
+ +CREATE_SIM_OBJECT(Tru64AlphaSystem)
+ +{
+ +    AlphaSystem::Params *p = new AlphaSystem::Params;
+ +    p->name = getInstanceName();
+ +    p->boot_cpu_frequency = boot_cpu_frequency;
+ +    p->physmem = physmem;
+ +    p->mem_mode = mem_mode;
+ +    p->kernel_path = kernel;
+ +    p->console_path = console;
+ +    p->palcode = pal;
+ +    p->boot_osflags = boot_osflags;
+ +    p->init_param = init_param;
+ +    p->readfile = readfile;
++    p->symbolfile = symbolfile;
+ +    p->system_type = system_type;
+ +    p->system_rev = system_rev;
+ +
+ +    return new Tru64AlphaSystem(p);
+ +}
+ +
+ +REGISTER_SIM_OBJECT("Tru64AlphaSystem", Tru64AlphaSystem)
diff --cc src/cpu/base.cc

index ce440aeffc2eaf9ba316ea5e8a7abb13e093224f,0000000000000000000000000000000000000000..f00dad7d651efb2c0b71a586af9983b328e7e7a6

mode 100644,000000..100644
--- 1/src/cpu/base.cc
--- /dev/null
+++ b/src/cpu/base.cc
@@@ -1,382 -1,0 +1,428 @@@
- 
+ +/*
+ + * Copyright (c) 2002-2005 The Regents of The University of Michigan
+ + * All rights reserved.
+ + *
+ + * Redistribution and use in source and binary forms, with or without
+ + * modification, are permitted provided that the following conditions are
+ + * met: redistributions of source code must retain the above copyright
+ + * notice, this list of conditions and the following disclaimer;
+ + * redistributions in binary form must reproduce the above copyright
+ + * notice, this list of conditions and the following disclaimer in the
+ + * documentation and/or other materials provided with the distribution;
+ + * neither the name of the copyright holders nor the names of its
+ + * contributors may be used to endorse or promote products derived from
+ + * this software without specific prior written permission.
+ + *
+ + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ + *
+ + * Authors: Steve Reinhardt
+ + *          Nathan Binkert
+ + */
+ +
+ +#include <iostream>
+ +#include <string>
+ +#include <sstream>
+ +
+ +#include "base/cprintf.hh"
+ +#include "base/loader/symtab.hh"
+ +#include "base/misc.hh"
+ +#include "base/output.hh"
+ +#include "cpu/base.hh"
+ +#include "cpu/cpuevent.hh"
+ +#include "cpu/thread_context.hh"
+ +#include "cpu/profile.hh"
+ +#include "sim/param.hh"
+ +#include "sim/process.hh"
+ +#include "sim/sim_events.hh"
+ +#include "sim/system.hh"
+ +
+ +#include "base/trace.hh"
+ +
++// Hack
++#include "sim/stat_control.hh"
++
+ +using namespace std;
+ +
+ +vector<BaseCPU *> BaseCPU::cpuList;
+ +
+ +// This variable reflects the max number of threads in any CPU.  Be
+ +// careful to only use it once all the CPUs that you care about have
+ +// been initialized
+ +int maxThreadsPerCPU = 1;
+ +
++void
++CPUProgressEvent::process()
++{
++    Counter temp = cpu->totalInstructions();
++#ifndef NDEBUG
++    double ipc = double(temp - lastNumInst) / (interval / cpu->cycles(1));
++
++    DPRINTFN("%s progress event, instructions committed: %lli, IPC: %0.8d\n",
++             cpu->name(), temp - lastNumInst, ipc);
++    ipc = 0.0;
++#else
++    cprintf("%lli: %s progress event, instructions committed: %lli\n",
++            curTick, cpu->name(), temp - lastNumInst);
++#endif
++    lastNumInst = temp;
++    schedule(curTick + interval);
++}
++
++const char *
++CPUProgressEvent::description()
++{
++    return "CPU Progress event";
++}
++
+ +#if FULL_SYSTEM
+ +BaseCPU::BaseCPU(Params *p)
+ +    : MemObject(p->name), clock(p->clock), checkInterrupts(true),
+ +      params(p), number_of_threads(p->numberOfThreads), system(p->system)
+ +#else
+ +BaseCPU::BaseCPU(Params *p)
+ +    : MemObject(p->name), clock(p->clock), params(p),
+ +      number_of_threads(p->numberOfThreads), system(p->system)
+ +#endif
+ +{
++//    currentTick = curTick;
+ +    DPRINTF(FullCPU, "BaseCPU: Creating object, mem address %#x.\n", this);
+ +
+ +    // add self to global list of CPUs
+ +    cpuList.push_back(this);
+ +
+ +    DPRINTF(FullCPU, "BaseCPU: CPU added to cpuList, mem address %#x.\n",
+ +            this);
+ +
+ +    if (number_of_threads > maxThreadsPerCPU)
+ +        maxThreadsPerCPU = number_of_threads;
+ +
+ +    // allocate per-thread instruction-based event queues
+ +    comInstEventQueue = new EventQueue *[number_of_threads];
+ +    for (int i = 0; i < number_of_threads; ++i)
+ +        comInstEventQueue[i] = new EventQueue("instruction-based event queue");
+ +
+ +    //
+ +    // set up instruction-count-based termination events, if any
+ +    //
+ +    if (p->max_insts_any_thread != 0)
+ +        for (int i = 0; i < number_of_threads; ++i)
+ +            new SimLoopExitEvent(comInstEventQueue[i], p->max_insts_any_thread,
+ +                                 "a thread reached the max instruction count");
+ +
+ +    if (p->max_insts_all_threads != 0) {
+ +        // allocate & initialize shared downcounter: each event will
+ +        // decrement this when triggered; simulation will terminate
+ +        // when counter reaches 0
+ +        int *counter = new int;
+ +        *counter = number_of_threads;
+ +        for (int i = 0; i < number_of_threads; ++i)
+ +            new CountedExitEvent(comInstEventQueue[i],
+ +                "all threads reached the max instruction count",
+ +                p->max_insts_all_threads, *counter);
+ +    }
+ +
+ +    // allocate per-thread load-based event queues
+ +    comLoadEventQueue = new EventQueue *[number_of_threads];
+ +    for (int i = 0; i < number_of_threads; ++i)
+ +        comLoadEventQueue[i] = new EventQueue("load-based event queue");
+ +
+ +    //
+ +    // set up instruction-count-based termination events, if any
+ +    //
+ +    if (p->max_loads_any_thread != 0)
+ +        for (int i = 0; i < number_of_threads; ++i)
+ +            new SimLoopExitEvent(comLoadEventQueue[i], p->max_loads_any_thread,
+ +                                 "a thread reached the max load count");
+ +
+ +    if (p->max_loads_all_threads != 0) {
+ +        // allocate & initialize shared downcounter: each event will
+ +        // decrement this when triggered; simulation will terminate
+ +        // when counter reaches 0
+ +        int *counter = new int;
+ +        *counter = number_of_threads;
+ +        for (int i = 0; i < number_of_threads; ++i)
+ +            new CountedExitEvent(comLoadEventQueue[i],
+ +                "all threads reached the max load count",
+ +                p->max_loads_all_threads, *counter);
+ +    }
+ +
++    if (p->stats_reset_inst != 0) {
++        Stats::SetupEvent(Stats::Reset, p->stats_reset_inst, 0, comInstEventQueue[0]);
++        cprintf("Stats reset event scheduled for %lli insts\n",
++                p->stats_reset_inst);
++    }
++
+ +#if FULL_SYSTEM
+ +    memset(interrupts, 0, sizeof(interrupts));
+ +    intstatus = 0;
+ +#endif
+ +
+ +    functionTracingEnabled = false;
+ +    if (p->functionTrace) {
+ +        functionTraceStream = simout.find(csprintf("ftrace.%s", name()));
+ +        currentFunctionStart = currentFunctionEnd = 0;
+ +        functionEntryTick = p->functionTraceStart;
+ +
+ +        if (p->functionTraceStart == 0) {
+ +            functionTracingEnabled = true;
+ +        } else {
+ +            Event *e =
+ +                new EventWrapper<BaseCPU, &BaseCPU::enableFunctionTrace>(this,
+ +                                                                         true);
+ +            e->schedule(p->functionTraceStart);
+ +        }
+ +    }
+ +#if FULL_SYSTEM
+ +    profileEvent = NULL;
+ +    if (params->profile)
+ +        profileEvent = new ProfileEvent(this, params->profile);
+ +#endif
-     panic("This CPU doesn't support sampling!");
+ +}
+ +
+ +BaseCPU::Params::Params()
+ +{
+ +#if FULL_SYSTEM
+ +    profile = false;
+ +#endif
+ +    checker = NULL;
+ +}
+ +
+ +void
+ +BaseCPU::enableFunctionTrace()
+ +{
+ +    functionTracingEnabled = true;
+ +}
+ +
+ +BaseCPU::~BaseCPU()
+ +{
+ +}
+ +
+ +void
+ +BaseCPU::init()
+ +{
+ +    if (!params->deferRegistration)
+ +        registerThreadContexts();
+ +}
+ +
+ +void
+ +BaseCPU::startup()
+ +{
+ +#if FULL_SYSTEM
+ +    if (!params->deferRegistration && profileEvent)
+ +        profileEvent->schedule(curTick);
+ +#endif
++
++    if (params->progress_interval) {
++        new CPUProgressEvent(&mainEventQueue, params->progress_interval,
++                             this);
++    }
+ +}
+ +
+ +
+ +void
+ +BaseCPU::regStats()
+ +{
+ +    using namespace Stats;
+ +
+ +    numCycles
+ +        .name(name() + ".numCycles")
+ +        .desc("number of cpu cycles simulated")
+ +        ;
+ +
+ +    int size = threadContexts.size();
+ +    if (size > 1) {
+ +        for (int i = 0; i < size; ++i) {
+ +            stringstream namestr;
+ +            ccprintf(namestr, "%s.ctx%d", name(), i);
+ +            threadContexts[i]->regStats(namestr.str());
+ +        }
+ +    } else if (size == 1)
+ +        threadContexts[0]->regStats(name());
+ +
+ +#if FULL_SYSTEM
+ +#endif
+ +}
+ +
+ +
+ +void
+ +BaseCPU::registerThreadContexts()
+ +{
+ +    for (int i = 0; i < threadContexts.size(); ++i) {
+ +        ThreadContext *tc = threadContexts[i];
+ +
+ +#if FULL_SYSTEM
+ +        int id = params->cpu_id;
+ +        if (id != -1)
+ +            id += i;
+ +
+ +        tc->setCpuId(system->registerThreadContext(tc, id));
+ +#else
+ +        tc->setCpuId(tc->getProcessPtr()->registerThreadContext(tc));
+ +#endif
+ +    }
+ +}
+ +
+ +
+ +void
+ +BaseCPU::switchOut()
+ +{
-     if (profileEvent)
-         profileEvent->schedule(curTick);
++//    panic("This CPU doesn't support sampling!");
++#if FULL_SYSTEM
++    if (profileEvent && profileEvent->scheduled())
++        profileEvent->deschedule();
++#endif
+ +}
+ +
+ +void
+ +BaseCPU::takeOverFrom(BaseCPU *oldCPU)
+ +{
+ +    assert(threadContexts.size() == oldCPU->threadContexts.size());
+ +
+ +    for (int i = 0; i < threadContexts.size(); ++i) {
+ +        ThreadContext *newTC = threadContexts[i];
+ +        ThreadContext *oldTC = oldCPU->threadContexts[i];
+ +
+ +        newTC->takeOverFrom(oldTC);
+ +
+ +        CpuEvent::replaceThreadContext(oldTC, newTC);
+ +
+ +        assert(newTC->readCpuId() == oldTC->readCpuId());
+ +#if FULL_SYSTEM
+ +        system->replaceThreadContext(newTC, newTC->readCpuId());
+ +#else
+ +        assert(newTC->getProcessPtr() == oldTC->getProcessPtr());
+ +        newTC->getProcessPtr()->replaceThreadContext(newTC, newTC->readCpuId());
+ +#endif
++
++//    TheISA::compareXCs(oldXC, newXC);
+ +    }
+ +
+ +#if FULL_SYSTEM
+ +    for (int i = 0; i < TheISA::NumInterruptLevels; ++i)
+ +        interrupts[i] = oldCPU->interrupts[i];
+ +    intstatus = oldCPU->intstatus;
++    checkInterrupts = oldCPU->checkInterrupts;
+ +
+ +    for (int i = 0; i < threadContexts.size(); ++i)
+ +        threadContexts[i]->profileClear();
+ +
++    // The Sampler must take care of this!
++//    if (profileEvent)
++//        profileEvent->schedule(curTick);
+ +#endif
+ +}
+ +
+ +
+ +#if FULL_SYSTEM
+ +BaseCPU::ProfileEvent::ProfileEvent(BaseCPU *_cpu, int _interval)
+ +    : Event(&mainEventQueue), cpu(_cpu), interval(_interval)
+ +{ }
+ +
+ +void
+ +BaseCPU::ProfileEvent::process()
+ +{
+ +    for (int i = 0, size = cpu->threadContexts.size(); i < size; ++i) {
+ +        ThreadContext *tc = cpu->threadContexts[i];
+ +        tc->profileSample();
+ +    }
+ +
+ +    schedule(curTick + interval);
+ +}
+ +
+ +void
+ +BaseCPU::post_interrupt(int int_num, int index)
+ +{
+ +    DPRINTF(Interrupt, "Interrupt %d:%d posted\n", int_num, index);
+ +
+ +    if (int_num < 0 || int_num >= TheISA::NumInterruptLevels)
+ +        panic("int_num out of bounds\n");
+ +
+ +    if (index < 0 || index >= sizeof(uint64_t) * 8)
+ +        panic("int_num out of bounds\n");
+ +
+ +    checkInterrupts = true;
+ +    interrupts[int_num] |= 1 << index;
+ +    intstatus |= (ULL(1) << int_num);
+ +}
+ +
+ +void
+ +BaseCPU::clear_interrupt(int int_num, int index)
+ +{
+ +    DPRINTF(Interrupt, "Interrupt %d:%d cleared\n", int_num, index);
+ +
+ +    if (int_num < 0 || int_num >= TheISA::NumInterruptLevels)
+ +        panic("int_num out of bounds\n");
+ +
+ +    if (index < 0 || index >= sizeof(uint64_t) * 8)
+ +        panic("int_num out of bounds\n");
+ +
+ +    interrupts[int_num] &= ~(1 << index);
+ +    if (interrupts[int_num] == 0)
+ +        intstatus &= ~(ULL(1) << int_num);
+ +}
+ +
+ +void
+ +BaseCPU::clear_interrupts()
+ +{
+ +    DPRINTF(Interrupt, "Interrupts all cleared\n");
+ +
+ +    memset(interrupts, 0, sizeof(interrupts));
+ +    intstatus = 0;
+ +}
+ +
+ +
+ +void
+ +BaseCPU::serialize(std::ostream &os)
+ +{
+ +    SERIALIZE_ARRAY(interrupts, TheISA::NumInterruptLevels);
+ +    SERIALIZE_SCALAR(intstatus);
+ +}
+ +
+ +void
+ +BaseCPU::unserialize(Checkpoint *cp, const std::string &section)
+ +{
+ +    UNSERIALIZE_ARRAY(interrupts, TheISA::NumInterruptLevels);
+ +    UNSERIALIZE_SCALAR(intstatus);
+ +}
+ +
+ +#endif // FULL_SYSTEM
+ +
+ +void
+ +BaseCPU::traceFunctionsInternal(Addr pc)
+ +{
+ +    if (!debugSymbolTable)
+ +        return;
+ +
+ +    // if pc enters different function, print new function symbol and
+ +    // update saved range.  Otherwise do nothing.
+ +    if (pc < currentFunctionStart || pc >= currentFunctionEnd) {
+ +        string sym_str;
+ +        bool found = debugSymbolTable->findNearestSymbol(pc, sym_str,
+ +                                                         currentFunctionStart,
+ +                                                         currentFunctionEnd);
+ +
+ +        if (!found) {
+ +            // no symbol found: use addr as label
+ +            sym_str = csprintf("0x%x", pc);
+ +            currentFunctionStart = pc;
+ +            currentFunctionEnd = pc + 1;
+ +        }
+ +
+ +        ccprintf(*functionTraceStream, " (%d)\n%d: %s",
+ +                 curTick - functionEntryTick, curTick, sym_str);
+ +        functionEntryTick = curTick;
+ +    }
+ +}
+ +
+ +
+ +DEFINE_SIM_OBJECT_CLASS_NAME("BaseCPU", BaseCPU)
diff --cc src/cpu/base.hh

index 2be6e4e81bc479c851601b6b5ce84d6303c2b9ae,0000000000000000000000000000000000000000..2a3fd9b56a3ad4d00273331a94e2ea8f434d0875

mode 100644,000000..100644
--- 1/src/cpu/base.hh
--- /dev/null
+++ b/src/cpu/base.hh
@@@ -1,241 -1,0 +1,261 @@@
+ +/*
+ + * Copyright (c) 2002-2005 The Regents of The University of Michigan
+ + * All rights reserved.
+ + *
+ + * Redistribution and use in source and binary forms, with or without
+ + * modification, are permitted provided that the following conditions are
+ + * met: redistributions of source code must retain the above copyright
+ + * notice, this list of conditions and the following disclaimer;
+ + * redistributions in binary form must reproduce the above copyright
+ + * notice, this list of conditions and the following disclaimer in the
+ + * documentation and/or other materials provided with the distribution;
+ + * neither the name of the copyright holders nor the names of its
+ + * contributors may be used to endorse or promote products derived from
+ + * this software without specific prior written permission.
+ + *
+ + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ + *
+ + * Authors: Steve Reinhardt
+ + *          Nathan Binkert
+ + */
+ +
+ +#ifndef __CPU_BASE_HH__
+ +#define __CPU_BASE_HH__
+ +
+ +#include <vector>
+ +
+ +#include "base/statistics.hh"
+ +#include "config/full_system.hh"
+ +#include "sim/eventq.hh"
+ +#include "mem/mem_object.hh"
+ +#include "arch/isa_traits.hh"
+ +
+ +class BranchPred;
+ +class CheckerCPU;
+ +class ThreadContext;
+ +class System;
+ +class Port;
+ +
++class CPUProgressEvent : public Event
++{
++  protected:
++    Tick interval;
++    Counter lastNumInst;
++    BaseCPU *cpu;
++
++  public:
++    CPUProgressEvent(EventQueue *q, Tick ival, BaseCPU *_cpu)
++        : Event(q, Event::Stat_Event_Pri), interval(ival), lastNumInst(0), cpu(_cpu)
++    { schedule(curTick + interval); }
++
++    void process();
++
++    virtual const char *description();
++};
++
+ +class BaseCPU : public MemObject
+ +{
+ +  protected:
+ +    // CPU's clock period in terms of the number of ticks of curTime.
+ +    Tick clock;
+ +
+ +  public:
++//    Tick currentTick;
+ +    inline Tick frequency() const { return Clock::Frequency / clock; }
+ +    inline Tick cycles(int numCycles) const { return clock * numCycles; }
+ +    inline Tick curCycle() const { return curTick / clock; }
+ +
+ +#if FULL_SYSTEM
+ +  protected:
+ +    uint64_t interrupts[TheISA::NumInterruptLevels];
+ +    uint64_t intstatus;
+ +
+ +  public:
+ +    virtual void post_interrupt(int int_num, int index);
+ +    virtual void clear_interrupt(int int_num, int index);
+ +    virtual void clear_interrupts();
+ +    bool checkInterrupts;
+ +
+ +    bool check_interrupt(int int_num) const {
+ +        if (int_num > TheISA::NumInterruptLevels)
+ +            panic("int_num out of bounds\n");
+ +
+ +        return interrupts[int_num] != 0;
+ +    }
+ +
+ +    bool check_interrupts() const { return intstatus != 0; }
+ +    uint64_t intr_status() const { return intstatus; }
+ +
+ +    class ProfileEvent : public Event
+ +    {
+ +      private:
+ +        BaseCPU *cpu;
+ +        int interval;
+ +
+ +      public:
+ +        ProfileEvent(BaseCPU *cpu, int interval);
+ +        void process();
+ +    };
+ +    ProfileEvent *profileEvent;
+ +#endif
+ +
+ +  protected:
+ +    std::vector<ThreadContext *> threadContexts;
+ +
+ +  public:
+ +
+ +    /// Notify the CPU that the indicated context is now active.  The
+ +    /// delay parameter indicates the number of ticks to wait before
+ +    /// executing (typically 0 or 1).
+ +    virtual void activateContext(int thread_num, int delay) {}
+ +
+ +    /// Notify the CPU that the indicated context is now suspended.
+ +    virtual void suspendContext(int thread_num) {}
+ +
+ +    /// Notify the CPU that the indicated context is now deallocated.
+ +    virtual void deallocateContext(int thread_num) {}
+ +
+ +    /// Notify the CPU that the indicated context is now halted.
+ +    virtual void haltContext(int thread_num) {}
+ +
+ +  public:
+ +    struct Params
+ +    {
+ +        std::string name;
+ +        int numberOfThreads;
+ +        bool deferRegistration;
+ +        Counter max_insts_any_thread;
+ +        Counter max_insts_all_threads;
+ +        Counter max_loads_any_thread;
+ +        Counter max_loads_all_threads;
++        Counter stats_reset_inst;
+ +        Tick clock;
+ +        bool functionTrace;
+ +        Tick functionTraceStart;
+ +        System *system;
+ +#if FULL_SYSTEM
+ +        int cpu_id;
+ +        Tick profile;
+ +#endif
++        Tick progress_interval;
+ +        BaseCPU *checker;
+ +
+ +        Params();
+ +    };
+ +
+ +    const Params *params;
+ +
+ +    BaseCPU(Params *params);
+ +    virtual ~BaseCPU();
+ +
+ +    virtual void init();
+ +    virtual void startup();
+ +    virtual void regStats();
+ +
+ +    virtual void activateWhenReady(int tid) {};
+ +
+ +    void registerThreadContexts();
+ +
+ +    /// Prepare for another CPU to take over execution.  When it is
+ +    /// is ready (drained pipe) it signals the sampler.
+ +    virtual void switchOut();
+ +
+ +    /// Take over execution from the given CPU.  Used for warm-up and
+ +    /// sampling.
+ +    virtual void takeOverFrom(BaseCPU *);
+ +
+ +    /**
+ +     *  Number of threads we're actually simulating (<= SMT_MAX_THREADS).
+ +     * This is a constant for the duration of the simulation.
+ +     */
+ +    int number_of_threads;
+ +
+ +    /**
+ +     * Vector of per-thread instruction-based event queues.  Used for
+ +     * scheduling events based on number of instructions committed by
+ +     * a particular thread.
+ +     */
+ +    EventQueue **comInstEventQueue;
+ +
+ +    /**
+ +     * Vector of per-thread load-based event queues.  Used for
+ +     * scheduling events based on number of loads committed by
+ +     *a particular thread.
+ +     */
+ +    EventQueue **comLoadEventQueue;
+ +
+ +    System *system;
+ +
+ +#if FULL_SYSTEM
+ +    /**
+ +     * Serialize this object to the given output stream.
+ +     * @param os The stream to serialize to.
+ +     */
+ +    virtual void serialize(std::ostream &os);
+ +
+ +    /**
+ +     * Reconstruct the state of this object from a checkpoint.
+ +     * @param cp The checkpoint use.
+ +     * @param section The section name of this object
+ +     */
+ +    virtual void unserialize(Checkpoint *cp, const std::string &section);
+ +
+ +#endif
+ +
+ +    /**
+ +     * Return pointer to CPU's branch predictor (NULL if none).
+ +     * @return Branch predictor pointer.
+ +     */
+ +    virtual BranchPred *getBranchPred() { return NULL; };
+ +
+ +    virtual Counter totalInstructions() const { return 0; }
+ +
+ +    // Function tracing
+ +  private:
+ +    bool functionTracingEnabled;
+ +    std::ostream *functionTraceStream;
+ +    Addr currentFunctionStart;
+ +    Addr currentFunctionEnd;
+ +    Tick functionEntryTick;
+ +    void enableFunctionTrace();
+ +    void traceFunctionsInternal(Addr pc);
+ +
+ +  protected:
+ +    void traceFunctions(Addr pc)
+ +    {
+ +        if (functionTracingEnabled)
+ +            traceFunctionsInternal(pc);
+ +    }
+ +
+ +  private:
+ +    static std::vector<BaseCPU *> cpuList;   //!< Static global cpu list
+ +
+ +  public:
+ +    static int numSimulatedCPUs() { return cpuList.size(); }
+ +    static Counter numSimulatedInstructions()
+ +    {
+ +        Counter total = 0;
+ +
+ +        int size = cpuList.size();
+ +        for (int i = 0; i < size; ++i)
+ +            total += cpuList[i]->totalInstructions();
+ +
+ +        return total;
+ +    }
+ +
+ +  public:
+ +    // Number of CPU cycles simulated
+ +    Stats::Scalar<> numCycles;
+ +};
+ +
+ +#endif // __CPU_BASE_HH__
diff --cc src/cpu/base_dyn_inst.hh

index 3158aa9cf3a3e218ecabb1544d073bcbfcd5943d,0000000000000000000000000000000000000000..926bfcbb28624cc5a09895c7fd0a546a62bdb416

mode 100644,000000..100644
--- 1/src/cpu/base_dyn_inst.hh
--- /dev/null
+++ b/src/cpu/base_dyn_inst.hh
@@@ -1,731 -1,0 +1,732 @@@
-         float fp;
+ +/*
+ + * Copyright (c) 2004-2006 The Regents of The University of Michigan
+ + * All rights reserved.
+ + *
+ + * Redistribution and use in source and binary forms, with or without
+ + * modification, are permitted provided that the following conditions are
+ + * met: redistributions of source code must retain the above copyright
+ + * notice, this list of conditions and the following disclaimer;
+ + * redistributions in binary form must reproduce the above copyright
+ + * notice, this list of conditions and the following disclaimer in the
+ + * documentation and/or other materials provided with the distribution;
+ + * neither the name of the copyright holders nor the names of its
+ + * contributors may be used to endorse or promote products derived from
+ + * this software without specific prior written permission.
+ + *
+ + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ + *
+ + * Authors: Kevin Lim
+ + */
+ +
+ +#ifndef __CPU_BASE_DYN_INST_HH__
+ +#define __CPU_BASE_DYN_INST_HH__
+ +
+ +#include <bitset>
+ +#include <list>
+ +#include <string>
+ +
+ +#include "arch/faults.hh"
+ +#include "base/fast_alloc.hh"
+ +#include "base/trace.hh"
+ +#include "config/full_system.hh"
+ +#include "cpu/exetrace.hh"
+ +#include "cpu/inst_seq.hh"
+ +#include "cpu/op_class.hh"
+ +#include "cpu/static_inst.hh"
+ +#include "mem/packet.hh"
+ +#include "sim/system.hh"
+ +
+ +/**
+ + * @file
+ + * Defines a dynamic instruction context.
+ + */
+ +
+ +// Forward declaration.
+ +class StaticInstPtr;
+ +
+ +template <class Impl>
+ +class BaseDynInst : public FastAlloc, public RefCounted
+ +{
+ +  public:
+ +    // Typedef for the CPU.
+ +    typedef typename Impl::CPUType ImplCPU;
+ +    typedef typename ImplCPU::ImplState ImplState;
+ +
+ +    // Binary machine instruction type.
+ +    typedef TheISA::MachInst MachInst;
+ +    // Extended machine instruction type
+ +    typedef TheISA::ExtMachInst ExtMachInst;
+ +    // Logical register index type.
+ +    typedef TheISA::RegIndex RegIndex;
+ +    // Integer register type.
+ +    typedef TheISA::IntReg IntReg;
+ +    // Floating point register type.
+ +    typedef TheISA::FloatReg FloatReg;
+ +
+ +    // The DynInstPtr type.
+ +    typedef typename Impl::DynInstPtr DynInstPtr;
+ +
+ +    // The list of instructions iterator type.
+ +    typedef typename std::list<DynInstPtr>::iterator ListIt;
+ +
+ +    enum {
+ +        MaxInstSrcRegs = TheISA::MaxInstSrcRegs,      /// Max source regs
+ +        MaxInstDestRegs = TheISA::MaxInstDestRegs,    /// Max dest regs
+ +    };
+ +
+ +    /** The StaticInst used by this BaseDynInst. */
+ +    StaticInstPtr staticInst;
+ +
+ +    ////////////////////////////////////////////
+ +    //
+ +    // INSTRUCTION EXECUTION
+ +    //
+ +    ////////////////////////////////////////////
+ +    /** InstRecord that tracks this instructions. */
+ +    Trace::InstRecord *traceData;
+ +
+ +    /**
+ +     * Does a read to a given address.
+ +     * @param addr The address to read.
+ +     * @param data The read's data is written into this parameter.
+ +     * @param flags The request's flags.
+ +     * @return Returns any fault due to the read.
+ +     */
+ +    template <class T>
+ +    Fault read(Addr addr, T &data, unsigned flags);
+ +
+ +    /**
+ +     * Does a write to a given address.
+ +     * @param data The data to be written.
+ +     * @param addr The address to write to.
+ +     * @param flags The request's flags.
+ +     * @param res The result of the write (for load locked/store conditionals).
+ +     * @return Returns any fault due to the write.
+ +     */
+ +    template <class T>
+ +    Fault write(T data, Addr addr, unsigned flags,
+ +                        uint64_t *res);
+ +
+ +    void prefetch(Addr addr, unsigned flags);
+ +    void writeHint(Addr addr, int size, unsigned flags);
+ +    Fault copySrcTranslate(Addr src);
+ +    Fault copy(Addr dest);
+ +
+ +    /** @todo: Consider making this private. */
+ +  public:
+ +    /** The sequence number of the instruction. */
+ +    InstSeqNum seqNum;
+ +
+ +    enum Status {
+ +        IqEntry,                 /// Instruction is in the IQ
+ +        RobEntry,                /// Instruction is in the ROB
+ +        LsqEntry,                /// Instruction is in the LSQ
+ +        Completed,               /// Instruction has completed
+ +        ResultReady,             /// Instruction has its result
+ +        CanIssue,                /// Instruction can issue and execute
+ +        Issued,                  /// Instruction has issued
+ +        Executed,                /// Instruction has executed
+ +        CanCommit,               /// Instruction can commit
+ +        AtCommit,                /// Instruction has reached commit
+ +        Committed,               /// Instruction has committed
+ +        Squashed,                /// Instruction is squashed
+ +        SquashedInIQ,            /// Instruction is squashed in the IQ
+ +        SquashedInLSQ,           /// Instruction is squashed in the LSQ
+ +        SquashedInROB,           /// Instruction is squashed in the ROB
+ +        RecoverInst,             /// Is a recover instruction
+ +        BlockingInst,            /// Is a blocking instruction
+ +        ThreadsyncWait,          /// Is a thread synchronization instruction
+ +        SerializeBefore,         /// Needs to serialize on
+ +                                 /// instructions ahead of it
+ +        SerializeAfter,          /// Needs to serialize instructions behind it
+ +        SerializeHandled,        /// Serialization has been handled
+ +        NumStatus
+ +    };
+ +
+ +    /** The status of this BaseDynInst.  Several bits can be set. */
+ +    std::bitset<NumStatus> status;
+ +
+ +    /** The thread this instruction is from. */
+ +    short threadNumber;
+ +
+ +    /** data address space ID, for loads & stores. */
+ +    short asid;
+ +
+ +    /** How many source registers are ready. */
+ +    unsigned readyRegs;
+ +
+ +    /** Pointer to the Impl's CPU object. */
+ +    ImplCPU *cpu;
+ +
+ +    /** Pointer to the thread state. */
+ +    ImplState *thread;
+ +
+ +    /** The kind of fault this instruction has generated. */
+ +    Fault fault;
+ +
+ +    /** The memory request. */
+ +    Request *req;
+ +
+ +    /** Pointer to the data for the memory access. */
+ +    uint8_t *memData;
+ +
+ +    /** The effective virtual address (lds & stores only). */
+ +    Addr effAddr;
+ +
+ +    /** The effective physical address. */
+ +    Addr physEffAddr;
+ +
+ +    /** Effective virtual address for a copy source. */
+ +    Addr copySrcEffAddr;
+ +
+ +    /** Effective physical address for a copy source. */
+ +    Addr copySrcPhysEffAddr;
+ +
+ +    /** The memory request flags (from translation). */
+ +    unsigned memReqFlags;
+ +
+ +    union Result {
+ +        uint64_t integer;
-     float readFloatResult() { return instResult.fp; }
++//        float fp;
+ +        double dbl;
+ +    };
+ +
+ +    /** The result of the instruction; assumes for now that there's only one
+ +     *  destination register.
+ +     */
+ +    Result instResult;
+ +
+ +    /** PC of this instruction. */
+ +    Addr PC;
+ +
+ +    /** Next non-speculative PC.  It is not filled in at fetch, but rather
+ +     *  once the target of the branch is truly known (either decode or
+ +     *  execute).
+ +     */
+ +    Addr nextPC;
+ +
+ +    /** Next non-speculative NPC. Target PC for Mips or Sparc. */
+ +    Addr nextNPC;
+ +
+ +    /** Predicted next PC. */
+ +    Addr predPC;
+ +
+ +    /** Count of total number of dynamic instructions. */
+ +    static int instcount;
+ +
+ +#ifdef DEBUG
+ +    void dumpSNList();
+ +#endif
+ +
+ +    /** Whether or not the source register is ready.
+ +     *  @todo: Not sure this should be here vs the derived class.
+ +     */
+ +    bool _readySrcRegIdx[MaxInstSrcRegs];
+ +
+ +  public:
+ +    /** BaseDynInst constructor given a binary instruction.
+ +     *  @param inst The binary instruction.
+ +     *  @param PC The PC of the instruction.
+ +     *  @param pred_PC The predicted next PC.
+ +     *  @param seq_num The sequence number of the instruction.
+ +     *  @param cpu Pointer to the instruction's CPU.
+ +     */
+ +    BaseDynInst(ExtMachInst inst, Addr PC, Addr pred_PC, InstSeqNum seq_num,
+ +                ImplCPU *cpu);
+ +
+ +    /** BaseDynInst constructor given a StaticInst pointer.
+ +     *  @param _staticInst The StaticInst for this BaseDynInst.
+ +     */
+ +    BaseDynInst(StaticInstPtr &_staticInst);
+ +
+ +    /** BaseDynInst destructor. */
+ +    ~BaseDynInst();
+ +
+ +  private:
+ +    /** Function to initialize variables in the constructors. */
+ +    void initVars();
+ +
+ +  public:
+ +    /** Dumps out contents of this BaseDynInst. */
+ +    void dump();
+ +
+ +    /** Dumps out contents of this BaseDynInst into given string. */
+ +    void dump(std::string &outstring);
+ +
+ +    /** Returns the fault type. */
+ +    Fault getFault() { return fault; }
+ +
+ +    /** Checks whether or not this instruction has had its branch target
+ +     *  calculated yet.  For now it is not utilized and is hacked to be
+ +     *  always false.
+ +     *  @todo: Actually use this instruction.
+ +     */
+ +    bool doneTargCalc() { return false; }
+ +
+ +    /** Returns the next PC.  This could be the speculative next PC if it is
+ +     *  called prior to the actual branch target being calculated.
+ +     */
+ +    Addr readNextPC() { return nextPC; }
+ +
+ +    /** Returns the next NPC.  This could be the speculative next NPC if it is
+ +     *  called prior to the actual branch target being calculated.
+ +     */
+ +    Addr readNextNPC() { return nextNPC; }
+ +
+ +    /** Set the predicted target of this current instruction. */
+ +    void setPredTarg(Addr predicted_PC) { predPC = predicted_PC; }
+ +
+ +    /** Returns the predicted target of the branch. */
+ +    Addr readPredTarg() { return predPC; }
+ +
+ +    /** Returns whether the instruction was predicted taken or not. */
+ +    bool predTaken()
+ +#if ISA_HAS_DELAY_SLOT
+ +    { return predPC != (nextPC + sizeof(MachInst)); }
+ +#else
+ +    { return predPC != (PC + sizeof(MachInst)); }
+ +#endif
+ +
+ +    /** Returns whether the instruction mispredicted. */
+ +    bool mispredicted()
+ +#if ISA_HAS_DELAY_SLOT
+ +    { return predPC != nextNPC; }
+ +#else
+ +    { return predPC != nextPC; }
+ +#endif
+ +    //
+ +    //  Instruction types.  Forward checks to StaticInst object.
+ +    //
+ +    bool isNop()        const { return staticInst->isNop(); }
+ +    bool isMemRef()             const { return staticInst->isMemRef(); }
+ +    bool isLoad()       const { return staticInst->isLoad(); }
+ +    bool isStore()      const { return staticInst->isStore(); }
+ +    bool isStoreConditional() const
+ +    { return staticInst->isStoreConditional(); }
+ +    bool isInstPrefetch() const { return staticInst->isInstPrefetch(); }
+ +    bool isDataPrefetch() const { return staticInst->isDataPrefetch(); }
+ +    bool isCopy()         const { return staticInst->isCopy(); }
+ +    bool isInteger()    const { return staticInst->isInteger(); }
+ +    bool isFloating()   const { return staticInst->isFloating(); }
+ +    bool isControl()    const { return staticInst->isControl(); }
+ +    bool isCall()       const { return staticInst->isCall(); }
+ +    bool isReturn()     const { return staticInst->isReturn(); }
+ +    bool isDirectCtrl()         const { return staticInst->isDirectCtrl(); }
+ +    bool isIndirectCtrl() const { return staticInst->isIndirectCtrl(); }
+ +    bool isCondCtrl()   const { return staticInst->isCondCtrl(); }
+ +    bool isUncondCtrl()         const { return staticInst->isUncondCtrl(); }
+ +    bool isCondDelaySlot() const { return staticInst->isCondDelaySlot(); }
+ +    bool isThreadSync()   const { return staticInst->isThreadSync(); }
+ +    bool isSerializing()  const { return staticInst->isSerializing(); }
+ +    bool isSerializeBefore() const
+ +    { return staticInst->isSerializeBefore() || status[SerializeBefore]; }
+ +    bool isSerializeAfter() const
+ +    { return staticInst->isSerializeAfter() || status[SerializeAfter]; }
+ +    bool isMemBarrier()   const { return staticInst->isMemBarrier(); }
+ +    bool isWriteBarrier() const { return staticInst->isWriteBarrier(); }
+ +    bool isNonSpeculative() const { return staticInst->isNonSpeculative(); }
+ +    bool isQuiesce() const { return staticInst->isQuiesce(); }
+ +    bool isIprAccess() const { return staticInst->isIprAccess(); }
+ +    bool isUnverifiable() const { return staticInst->isUnverifiable(); }
+ +
+ +    /** Temporarily sets this instruction as a serialize before instruction. */
+ +    void setSerializeBefore() { status.set(SerializeBefore); }
+ +
+ +    /** Clears the serializeBefore part of this instruction. */
+ +    void clearSerializeBefore() { status.reset(SerializeBefore); }
+ +
+ +    /** Checks if this serializeBefore is only temporarily set. */
+ +    bool isTempSerializeBefore() { return status[SerializeBefore]; }
+ +
+ +    /** Temporarily sets this instruction as a serialize after instruction. */
+ +    void setSerializeAfter() { status.set(SerializeAfter); }
+ +
+ +    /** Clears the serializeAfter part of this instruction.*/
+ +    void clearSerializeAfter() { status.reset(SerializeAfter); }
+ +
+ +    /** Checks if this serializeAfter is only temporarily set. */
+ +    bool isTempSerializeAfter() { return status[SerializeAfter]; }
+ +
+ +    /** Sets the serialization part of this instruction as handled. */
+ +    void setSerializeHandled() { status.set(SerializeHandled); }
+ +
+ +    /** Checks if the serialization part of this instruction has been
+ +     *  handled.  This does not apply to the temporary serializing
+ +     *  state; it only applies to this instruction's own permanent
+ +     *  serializing state.
+ +     */
+ +    bool isSerializeHandled() { return status[SerializeHandled]; }
+ +
+ +    /** Returns the opclass of this instruction. */
+ +    OpClass opClass() const { return staticInst->opClass(); }
+ +
+ +    /** Returns the branch target address. */
+ +    Addr branchTarget() const { return staticInst->branchTarget(PC); }
+ +
+ +    /** Returns the number of source registers. */
+ +    int8_t numSrcRegs()       const { return staticInst->numSrcRegs(); }
+ +
+ +    /** Returns the number of destination registers. */
+ +    int8_t numDestRegs() const { return staticInst->numDestRegs(); }
+ +
+ +    // the following are used to track physical register usage
+ +    // for machines with separate int & FP reg files
+ +    int8_t numFPDestRegs()  const { return staticInst->numFPDestRegs(); }
+ +    int8_t numIntDestRegs() const { return staticInst->numIntDestRegs(); }
+ +
+ +    /** Returns the logical register index of the i'th destination register. */
+ +    RegIndex destRegIdx(int i) const { return staticInst->destRegIdx(i); }
+ +
+ +    /** Returns the logical register index of the i'th source register. */
+ +    RegIndex srcRegIdx(int i) const { return staticInst->srcRegIdx(i); }
+ +
+ +    /** Returns the result of an integer instruction. */
+ +    uint64_t readIntResult() { return instResult.integer; }
+ +
+ +    /** Returns the result of a floating point instruction. */
-         instResult.fp = val;
++    float readFloatResult() { return (float)instResult.dbl; }
+ +
+ +    /** Returns the result of a floating point (double) instruction. */
+ +    double readDoubleResult() { return instResult.dbl; }
+ +
+ +    /** Records an integer register being set to a value. */
+ +    void setIntReg(const StaticInst *si, int idx, uint64_t val)
+ +    {
+ +        instResult.integer = val;
+ +    }
+ +
+ +    /** Records an fp register being set to a value. */
+ +    void setFloatReg(const StaticInst *si, int idx, FloatReg val, int width)
+ +    {
+ +        if (width == 32)
+ +            instResult.fp = val;
+ +        else if (width == 64)
+ +            instResult.dbl = val;
+ +        else
+ +            panic("Unsupported width!");
+ +    }
+ +
+ +    /** Records an fp register being set to a value. */
+ +    void setFloatReg(const StaticInst *si, int idx, FloatReg val)
+ +    {
++//        instResult.fp = val;
++        instResult.dbl = (double)val;
+ +    }
+ +
+ +    /** Records an fp register being set to an integer value. */
+ +    void setFloatRegBits(const StaticInst *si, int idx, uint64_t val, int width)
+ +    {
+ +        instResult.integer = val;
+ +    }
+ +
+ +    /** Records an fp register being set to an integer value. */
+ +    void setFloatRegBits(const StaticInst *si, int idx, uint64_t val)
+ +    {
+ +        instResult.integer = val;
+ +    }
+ +
+ +    /** Records that one of the source registers is ready. */
+ +    void markSrcRegReady();
+ +
+ +    /** Marks a specific register as ready. */
+ +    void markSrcRegReady(RegIndex src_idx);
+ +
+ +    /** Returns if a source register is ready. */
+ +    bool isReadySrcRegIdx(int idx) const
+ +    {
+ +        return this->_readySrcRegIdx[idx];
+ +    }
+ +
+ +    /** Sets this instruction as completed. */
+ +    void setCompleted() { status.set(Completed); }
+ +
+ +    /** Returns whether or not this instruction is completed. */
+ +    bool isCompleted() const { return status[Completed]; }
+ +
+ +    /** Marks the result as ready. */
+ +    void setResultReady() { status.set(ResultReady); }
+ +
+ +    /** Returns whether or not the result is ready. */
+ +    bool isResultReady() const { return status[ResultReady]; }
+ +
+ +    /** Sets this instruction as ready to issue. */
+ +    void setCanIssue() { status.set(CanIssue); }
+ +
+ +    /** Returns whether or not this instruction is ready to issue. */
+ +    bool readyToIssue() const { return status[CanIssue]; }
+ +
+ +    /** Sets this instruction as issued from the IQ. */
+ +    void setIssued() { status.set(Issued); }
+ +
+ +    /** Returns whether or not this instruction has issued. */
+ +    bool isIssued() const { return status[Issued]; }
+ +
+ +    /** Sets this instruction as executed. */
+ +    void setExecuted() { status.set(Executed); }
+ +
+ +    /** Returns whether or not this instruction has executed. */
+ +    bool isExecuted() const { return status[Executed]; }
+ +
+ +    /** Sets this instruction as ready to commit. */
+ +    void setCanCommit() { status.set(CanCommit); }
+ +
+ +    /** Clears this instruction as being ready to commit. */
+ +    void clearCanCommit() { status.reset(CanCommit); }
+ +
+ +    /** Returns whether or not this instruction is ready to commit. */
+ +    bool readyToCommit() const { return status[CanCommit]; }
+ +
+ +    void setAtCommit() { status.set(AtCommit); }
+ +
+ +    bool isAtCommit() { return status[AtCommit]; }
+ +
+ +    /** Sets this instruction as committed. */
+ +    void setCommitted() { status.set(Committed); }
+ +
+ +    /** Returns whether or not this instruction is committed. */
+ +    bool isCommitted() const { return status[Committed]; }
+ +
+ +    /** Sets this instruction as squashed. */
+ +    void setSquashed() { status.set(Squashed); }
+ +
+ +    /** Returns whether or not this instruction is squashed. */
+ +    bool isSquashed() const { return status[Squashed]; }
+ +
+ +    //Instruction Queue Entry
+ +    //-----------------------
+ +    /** Sets this instruction as a entry the IQ. */
+ +    void setInIQ() { status.set(IqEntry); }
+ +
+ +    /** Sets this instruction as a entry the IQ. */
+ +    void clearInIQ() { status.reset(IqEntry); }
+ +
+ +    /** Returns whether or not this instruction has issued. */
+ +    bool isInIQ() const { return status[IqEntry]; }
+ +
+ +    /** Sets this instruction as squashed in the IQ. */
+ +    void setSquashedInIQ() { status.set(SquashedInIQ); status.set(Squashed);}
+ +
+ +    /** Returns whether or not this instruction is squashed in the IQ. */
+ +    bool isSquashedInIQ() const { return status[SquashedInIQ]; }
+ +
+ +
+ +    //Load / Store Queue Functions
+ +    //-----------------------
+ +    /** Sets this instruction as a entry the LSQ. */
+ +    void setInLSQ() { status.set(LsqEntry); }
+ +
+ +    /** Sets this instruction as a entry the LSQ. */
+ +    void removeInLSQ() { status.reset(LsqEntry); }
+ +
+ +    /** Returns whether or not this instruction is in the LSQ. */
+ +    bool isInLSQ() const { return status[LsqEntry]; }
+ +
+ +    /** Sets this instruction as squashed in the LSQ. */
+ +    void setSquashedInLSQ() { status.set(SquashedInLSQ);}
+ +
+ +    /** Returns whether or not this instruction is squashed in the LSQ. */
+ +    bool isSquashedInLSQ() const { return status[SquashedInLSQ]; }
+ +
+ +
+ +    //Reorder Buffer Functions
+ +    //-----------------------
+ +    /** Sets this instruction as a entry the ROB. */
+ +    void setInROB() { status.set(RobEntry); }
+ +
+ +    /** Sets this instruction as a entry the ROB. */
+ +    void clearInROB() { status.reset(RobEntry); }
+ +
+ +    /** Returns whether or not this instruction is in the ROB. */
+ +    bool isInROB() const { return status[RobEntry]; }
+ +
+ +    /** Sets this instruction as squashed in the ROB. */
+ +    void setSquashedInROB() { status.set(SquashedInROB); }
+ +
+ +    /** Returns whether or not this instruction is squashed in the ROB. */
+ +    bool isSquashedInROB() const { return status[SquashedInROB]; }
+ +
+ +    /** Read the PC of this instruction. */
+ +    const Addr readPC() const { return PC; }
+ +
+ +    /** Set the next PC of this instruction (its actual target). */
+ +    void setNextPC(uint64_t val)
+ +    {
+ +        nextPC = val;
+ +    }
+ +
+ +    /** Set the next NPC of this instruction (the target in Mips or Sparc).*/
+ +    void setNextNPC(uint64_t val)
+ +    {
+ +        nextNPC = val;
+ +    }
+ +
+ +    /** Sets the ASID. */
+ +    void setASID(short addr_space_id) { asid = addr_space_id; }
+ +
+ +    /** Sets the thread id. */
+ +    void setTid(unsigned tid) { threadNumber = tid; }
+ +
+ +    /** Sets the pointer to the thread state. */
+ +    void setThreadState(ImplState *state) { thread = state; }
+ +
+ +    /** Returns the thread context. */
+ +    ThreadContext *tcBase() { return thread->getTC(); }
+ +
+ +  private:
+ +    /** Instruction effective address.
+ +     *  @todo: Consider if this is necessary or not.
+ +     */
+ +    Addr instEffAddr;
+ +
+ +    /** Whether or not the effective address calculation is completed.
+ +     *  @todo: Consider if this is necessary or not.
+ +     */
+ +    bool eaCalcDone;
+ +
+ +  public:
+ +    /** Sets the effective address. */
+ +    void setEA(Addr &ea) { instEffAddr = ea; eaCalcDone = true; }
+ +
+ +    /** Returns the effective address. */
+ +    const Addr &getEA() const { return instEffAddr; }
+ +
+ +    /** Returns whether or not the eff. addr. calculation has been completed. */
+ +    bool doneEACalc() { return eaCalcDone; }
+ +
+ +    /** Returns whether or not the eff. addr. source registers are ready. */
+ +    bool eaSrcsReady();
+ +
+ +    /** Whether or not the memory operation is done. */
+ +    bool memOpDone;
+ +
+ +  public:
+ +    /** Load queue index. */
+ +    int16_t lqIdx;
+ +
+ +    /** Store queue index. */
+ +    int16_t sqIdx;
+ +
+ +    /** Iterator pointing to this BaseDynInst in the list of all insts. */
+ +    ListIt instListIt;
+ +
+ +    /** Returns iterator to this instruction in the list of all insts. */
+ +    ListIt &getInstListIt() { return instListIt; }
+ +
+ +    /** Sets iterator for this instruction in the list of all insts. */
+ +    void setInstListIt(ListIt _instListIt) { instListIt = _instListIt; }
+ +};
+ +
+ +template<class Impl>
+ +template<class T>
+ +inline Fault
+ +BaseDynInst<Impl>::read(Addr addr, T &data, unsigned flags)
+ +{
+ +    // Sometimes reads will get retried, so they may come through here
+ +    // twice.
+ +    if (!req) {
+ +        req = new Request();
+ +        req->setVirt(asid, addr, sizeof(T), flags, this->PC);
+ +        req->setThreadContext(thread->readCpuId(), threadNumber);
+ +    } else {
+ +        assert(addr == req->getVaddr());
+ +    }
+ +
+ +    if ((req->getVaddr() & (TheISA::VMPageSize - 1)) + req->getSize() >
+ +        TheISA::VMPageSize) {
+ +        return TheISA::genAlignmentFault();
+ +    }
+ +
+ +    fault = cpu->translateDataReadReq(req, thread);
+ +
+ +    if (fault == NoFault) {
+ +        effAddr = req->getVaddr();
+ +        physEffAddr = req->getPaddr();
+ +        memReqFlags = req->getFlags();
+ +
+ +#if 0
+ +        if (cpu->system->memctrl->badaddr(physEffAddr)) {
+ +            fault = TheISA::genMachineCheckFault();
+ +            data = (T)-1;
+ +            this->setExecuted();
+ +        } else {
+ +            fault = cpu->read(req, data, lqIdx);
+ +        }
+ +#else
+ +        fault = cpu->read(req, data, lqIdx);
+ +#endif
+ +    } else {
+ +        // Return a fixed value to keep simulation deterministic even
+ +        // along misspeculated paths.
+ +        data = (T)-1;
+ +
+ +        // Commit will have to clean up whatever happened.  Set this
+ +        // instruction as executed.
+ +        this->setExecuted();
+ +    }
+ +
+ +    if (traceData) {
+ +        traceData->setAddr(addr);
+ +        traceData->setData(data);
+ +    }
+ +
+ +    return fault;
+ +}
+ +
+ +template<class Impl>
+ +template<class T>
+ +inline Fault
+ +BaseDynInst<Impl>::write(T data, Addr addr, unsigned flags, uint64_t *res)
+ +{
+ +    if (traceData) {
+ +        traceData->setAddr(addr);
+ +        traceData->setData(data);
+ +    }
+ +
+ +    assert(req == NULL);
+ +
+ +    req = new Request();
+ +    req->setVirt(asid, addr, sizeof(T), flags, this->PC);
+ +    req->setThreadContext(thread->readCpuId(), threadNumber);
+ +
+ +    if ((req->getVaddr() & (TheISA::VMPageSize - 1)) + req->getSize() >
+ +        TheISA::VMPageSize) {
+ +        return TheISA::genAlignmentFault();
+ +    }
+ +
+ +    fault = cpu->translateDataWriteReq(req, thread);
+ +
+ +    if (fault == NoFault) {
+ +        effAddr = req->getVaddr();
+ +        physEffAddr = req->getPaddr();
+ +        memReqFlags = req->getFlags();
+ +#if 0
+ +        if (cpu->system->memctrl->badaddr(physEffAddr)) {
+ +            fault = TheISA::genMachineCheckFault();
+ +        } else {
+ +            fault = cpu->write(req, data, sqIdx);
+ +        }
+ +#else
+ +        fault = cpu->write(req, data, sqIdx);
+ +#endif
+ +    }
+ +
+ +    if (res) {
+ +        // always return some result to keep misspeculated paths
+ +        // (which will ignore faults) deterministic
+ +        *res = (fault == NoFault) ? req->getScResult() : 0;
+ +    }
+ +
+ +    return fault;
+ +}
+ +
+ +#endif // __CPU_BASE_DYN_INST_HH__
diff --cc src/cpu/checker/cpu.hh

index 6d6ae1e0a1ef65c32fa3e90b44a2598b114a0e53,0000000000000000000000000000000000000000..737b4b5d48c7e7999d7612378ea4f84bf9f226e2

mode 100644,000000..100644
--- 1/src/cpu/checker/cpu.hh
--- /dev/null
+++ b/src/cpu/checker/cpu.hh
@@@ -1,407 -1,0 +1,416 @@@
-         float fp;
+ +/*
+ + * Copyright (c) 2006 The Regents of The University of Michigan
+ + * All rights reserved.
+ + *
+ + * Redistribution and use in source and binary forms, with or without
+ + * modification, are permitted provided that the following conditions are
+ + * met: redistributions of source code must retain the above copyright
+ + * notice, this list of conditions and the following disclaimer;
+ + * redistributions in binary form must reproduce the above copyright
+ + * notice, this list of conditions and the following disclaimer in the
+ + * documentation and/or other materials provided with the distribution;
+ + * neither the name of the copyright holders nor the names of its
+ + * contributors may be used to endorse or promote products derived from
+ + * this software without specific prior written permission.
+ + *
+ + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ + *
+ + * Authors: Kevin Lim
+ + */
+ +
+ +#ifndef __CPU_CHECKER_CPU_HH__
+ +#define __CPU_CHECKER_CPU_HH__
+ +
+ +#include <list>
+ +#include <queue>
+ +#include <map>
+ +
+ +#include "arch/types.hh"
+ +#include "base/statistics.hh"
+ +#include "config/full_system.hh"
+ +#include "cpu/base.hh"
+ +#include "cpu/base_dyn_inst.hh"
+ +#include "cpu/simple_thread.hh"
+ +#include "cpu/pc_event.hh"
+ +#include "cpu/static_inst.hh"
+ +#include "sim/eventq.hh"
+ +
+ +// forward declarations
+ +#if FULL_SYSTEM
+ +class Processor;
+ +class AlphaITB;
+ +class AlphaDTB;
+ +class PhysicalMemory;
+ +
+ +class RemoteGDB;
+ +class GDBListener;
+ +
+ +#else
+ +
+ +class Process;
+ +
+ +#endif // FULL_SYSTEM
+ +template <class>
+ +class BaseDynInst;
+ +class ThreadContext;
+ +class MemInterface;
+ +class Checkpoint;
+ +class Request;
+ +
+ +/**
+ + * CheckerCPU class.  Dynamically verifies instructions as they are
+ + * completed by making sure that the instruction and its results match
+ + * the independent execution of the benchmark inside the checker.  The
+ + * checker verifies instructions in order, regardless of the order in
+ + * which instructions complete.  There are certain results that can
+ + * not be verified, specifically the result of a store conditional or
+ + * the values of uncached accesses.  In these cases, and with
+ + * instructions marked as "IsUnverifiable", the checker assumes that
+ + * the value from the main CPU's execution is correct and simply
+ + * copies that value.  It provides a CheckerThreadContext (see
+ + * checker/thread_context.hh) that provides hooks for updating the
+ + * Checker's state through any ThreadContext accesses.  This allows the
+ + * checker to be able to correctly verify instructions, even with
+ + * external accesses to the ThreadContext that change state.
+ + */
+ +class CheckerCPU : public BaseCPU
+ +{
+ +  protected:
+ +    typedef TheISA::MachInst MachInst;
+ +    typedef TheISA::FloatReg FloatReg;
+ +    typedef TheISA::FloatRegBits FloatRegBits;
+ +    typedef TheISA::MiscReg MiscReg;
+ +  public:
+ +    virtual void init();
+ +
+ +    struct Params : public BaseCPU::Params
+ +    {
+ +#if FULL_SYSTEM
+ +        AlphaITB *itb;
+ +        AlphaDTB *dtb;
+ +#else
+ +        Process *process;
+ +#endif
+ +        bool exitOnError;
++        bool updateOnError;
+ +        bool warnOnlyOnLoadError;
+ +    };
+ +
+ +  public:
+ +    CheckerCPU(Params *p);
+ +    virtual ~CheckerCPU();
+ +
+ +    Process *process;
+ +
+ +    void setMemory(MemObject *mem);
+ +
+ +    MemObject *memPtr;
+ +
+ +    void setSystem(System *system);
+ +
+ +    System *systemPtr;
+ +
+ +    void setIcachePort(Port *icache_port);
+ +
+ +    Port *icachePort;
+ +
+ +    void setDcachePort(Port *dcache_port);
+ +
+ +    Port *dcachePort;
+ +
+ +    virtual Port *getPort(const std::string &name, int idx)
+ +    {
+ +        panic("Not supported on checker!");
+ +        return NULL;
+ +    }
+ +
+ +  public:
+ +    // Primary thread being run.
+ +    SimpleThread *thread;
+ +
+ +    ThreadContext *tc;
+ +
+ +    AlphaITB *itb;
+ +    AlphaDTB *dtb;
+ +
+ +#if FULL_SYSTEM
+ +    Addr dbg_vtophys(Addr addr);
+ +#endif
+ +
+ +    union Result {
+ +        uint64_t integer;
-         result.fp = val;
++//        float fp;
+ +        double dbl;
+ +    };
+ +
+ +    Result result;
+ +
+ +    // current instruction
+ +    MachInst machInst;
+ +
+ +    // Pointer to the one memory request.
+ +    RequestPtr memReq;
+ +
+ +    StaticInstPtr curStaticInst;
+ +
+ +    // number of simulated instructions
+ +    Counter numInst;
+ +    Counter startNumInst;
+ +
+ +    std::queue<int> miscRegIdxs;
+ +
+ +    virtual Counter totalInstructions() const
+ +    {
+ +        return 0;
+ +    }
+ +
+ +    // number of simulated loads
+ +    Counter numLoad;
+ +    Counter startNumLoad;
+ +
+ +    virtual void serialize(std::ostream &os);
+ +    virtual void unserialize(Checkpoint *cp, const std::string &section);
+ +
+ +    template <class T>
+ +    Fault read(Addr addr, T &data, unsigned flags);
+ +
+ +    template <class T>
+ +    Fault write(T data, Addr addr, unsigned flags, uint64_t *res);
+ +
+ +    // These functions are only used in CPU models that split
+ +    // effective address computation from the actual memory access.
+ +    void setEA(Addr EA) { panic("SimpleCPU::setEA() not implemented\n"); }
+ +    Addr getEA()      { panic("SimpleCPU::getEA() not implemented\n"); }
+ +
+ +    void prefetch(Addr addr, unsigned flags)
+ +    {
+ +        // need to do this...
+ +    }
+ +
+ +    void writeHint(Addr addr, int size, unsigned flags)
+ +    {
+ +        // need to do this...
+ +    }
+ +
+ +    Fault copySrcTranslate(Addr src);
+ +
+ +    Fault copy(Addr dest);
+ +
+ +    // The register accessor methods provide the index of the
+ +    // instruction's operand (e.g., 0 or 1), not the architectural
+ +    // register index, to simplify the implementation of register
+ +    // renaming.  We find the architectural register index by indexing
+ +    // into the instruction's own operand index table.  Note that a
+ +    // raw pointer to the StaticInst is provided instead of a
+ +    // ref-counted StaticInstPtr to redice overhead.  This is fine as
+ +    // long as these methods don't copy the pointer into any long-term
+ +    // storage (which is pretty hard to imagine they would have reason
+ +    // to do).
+ +
+ +    uint64_t readIntReg(const StaticInst *si, int idx)
+ +    {
+ +        return thread->readIntReg(si->srcRegIdx(idx));
+ +    }
+ +
+ +    FloatReg readFloatReg(const StaticInst *si, int idx, int width)
+ +    {
+ +        int reg_idx = si->srcRegIdx(idx) - TheISA::FP_Base_DepTag;
+ +        return thread->readFloatReg(reg_idx, width);
+ +    }
+ +
+ +    FloatReg readFloatReg(const StaticInst *si, int idx)
+ +    {
+ +        int reg_idx = si->srcRegIdx(idx) - TheISA::FP_Base_DepTag;
+ +        return thread->readFloatReg(reg_idx);
+ +    }
+ +
+ +    FloatRegBits readFloatRegBits(const StaticInst *si, int idx, int width)
+ +    {
+ +        int reg_idx = si->srcRegIdx(idx) - TheISA::FP_Base_DepTag;
+ +        return thread->readFloatRegBits(reg_idx, width);
+ +    }
+ +
+ +    FloatRegBits readFloatRegBits(const StaticInst *si, int idx)
+ +    {
+ +        int reg_idx = si->srcRegIdx(idx) - TheISA::FP_Base_DepTag;
+ +        return thread->readFloatRegBits(reg_idx);
+ +    }
+ +
+ +    void setIntReg(const StaticInst *si, int idx, uint64_t val)
+ +    {
+ +        thread->setIntReg(si->destRegIdx(idx), val);
+ +        result.integer = val;
+ +    }
+ +
+ +    void setFloatReg(const StaticInst *si, int idx, FloatReg val, int width)
+ +    {
+ +        int reg_idx = si->destRegIdx(idx) - TheISA::FP_Base_DepTag;
+ +        thread->setFloatReg(reg_idx, val, width);
+ +        switch(width) {
+ +          case 32:
+ +            result.fp = val;
+ +            break;
+ +          case 64:
+ +            result.dbl = val;
+ +            break;
+ +        };
+ +    }
+ +
+ +    void setFloatReg(const StaticInst *si, int idx, FloatReg val)
+ +    {
+ +        int reg_idx = si->destRegIdx(idx) - TheISA::FP_Base_DepTag;
+ +        thread->setFloatReg(reg_idx, val);
-     void recordPCChange(uint64_t val) { changedPC = true; }
++        result.dbl = (double)val;
+ +    }
+ +
+ +    void setFloatRegBits(const StaticInst *si, int idx, FloatRegBits val,
+ +                         int width)
+ +    {
+ +        int reg_idx = si->destRegIdx(idx) - TheISA::FP_Base_DepTag;
+ +        thread->setFloatRegBits(reg_idx, val, width);
+ +        result.integer = val;
+ +    }
+ +
+ +    void setFloatRegBits(const StaticInst *si, int idx, FloatRegBits val)
+ +    {
+ +        int reg_idx = si->destRegIdx(idx) - TheISA::FP_Base_DepTag;
+ +        thread->setFloatRegBits(reg_idx, val);
+ +        result.integer = val;
+ +    }
+ +
+ +    uint64_t readPC() { return thread->readPC(); }
+ +
+ +    uint64_t readNextPC() { return thread->readNextPC(); }
+ +
+ +    void setNextPC(uint64_t val) {
+ +        thread->setNextPC(val);
+ +    }
+ +
+ +    MiscReg readMiscReg(int misc_reg)
+ +    {
+ +        return thread->readMiscReg(misc_reg);
+ +    }
+ +
+ +    MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault)
+ +    {
+ +        return thread->readMiscRegWithEffect(misc_reg, fault);
+ +    }
+ +
+ +    Fault setMiscReg(int misc_reg, const MiscReg &val)
+ +    {
+ +        result.integer = val;
+ +        miscRegIdxs.push(misc_reg);
+ +        return thread->setMiscReg(misc_reg, val);
+ +    }
+ +
+ +    Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val)
+ +    {
+ +        miscRegIdxs.push(misc_reg);
+ +        return thread->setMiscRegWithEffect(misc_reg, val);
+ +    }
+ +
-         : CheckerCPU(p)
++    void recordPCChange(uint64_t val) { changedPC = true; newPC = val; }
+ +    void recordNextPCChange(uint64_t val) { changedNextPC = true; }
+ +
+ +    bool translateInstReq(Request *req);
+ +    void translateDataWriteReq(Request *req);
+ +    void translateDataReadReq(Request *req);
+ +
+ +#if FULL_SYSTEM
+ +    Fault hwrei() { return thread->hwrei(); }
+ +    int readIntrFlag() { return thread->readIntrFlag(); }
+ +    void setIntrFlag(int val) { thread->setIntrFlag(val); }
+ +    bool inPalMode() { return thread->inPalMode(); }
+ +    void ev5_trap(Fault fault) { fault->invoke(tc); }
+ +    bool simPalCheck(int palFunc) { return thread->simPalCheck(palFunc); }
+ +#else
+ +    // Assume that the normal CPU's call to syscall was successful.
+ +    // The checker's state would have already been updated by the syscall.
+ +    void syscall(uint64_t callnum) { }
+ +#endif
+ +
+ +    void handleError()
+ +    {
+ +        if (exitOnError)
+ +            dumpAndExit();
+ +    }
+ +
+ +    bool checkFlags(Request *req);
+ +
+ +    void dumpAndExit();
+ +
+ +    ThreadContext *tcBase() { return tc; }
+ +    SimpleThread *threadBase() { return thread; }
+ +
+ +    Result unverifiedResult;
+ +    Request *unverifiedReq;
+ +    uint8_t *unverifiedMemData;
+ +
+ +    bool changedPC;
+ +    bool willChangePC;
+ +    uint64_t newPC;
+ +    bool changedNextPC;
+ +    bool exitOnError;
++    bool updateOnError;
+ +    bool warnOnlyOnLoadError;
+ +
+ +    InstSeqNum youngestSN;
+ +};
+ +
+ +/**
+ + * Templated Checker class.  This Checker class is templated on the
+ + * DynInstPtr of the instruction type that will be verified.  Proper
+ + * template instantiations of the Checker must be placed at the bottom
+ + * of checker/cpu.cc.
+ + */
+ +template <class DynInstPtr>
+ +class Checker : public CheckerCPU
+ +{
+ +  public:
+ +    Checker(Params *p)
-         if (exitOnError)
++        : CheckerCPU(p), updateThisCycle(false), unverifiedInst(NULL)
+ +    { }
+ +
+ +    void switchOut();
+ +    void takeOverFrom(BaseCPU *oldCPU);
+ +
+ +    void verify(DynInstPtr &inst);
+ +
+ +    void validateInst(DynInstPtr &inst);
+ +    void validateExecution(DynInstPtr &inst);
+ +    void validateState();
+ +
+ +    void copyResult(DynInstPtr &inst);
+ +
+ +  private:
+ +    void handleError(DynInstPtr &inst)
+ +    {
++        if (exitOnError) {
+ +            dumpAndExit(inst);
++        } else if (updateOnError) {
++            updateThisCycle = true;
++        }
+ +    }
+ +
+ +    void dumpAndExit(DynInstPtr &inst);
+ +
++    bool updateThisCycle;
++
++    DynInstPtr unverifiedInst;
++
+ +    std::list<DynInstPtr> instList;
+ +    typedef typename std::list<DynInstPtr>::iterator InstListIt;
+ +    void dumpInsts();
+ +};
+ +
+ +#endif // __CPU_CHECKER_CPU_HH__
diff --cc src/cpu/checker/cpu_impl.hh

index 81f97726c5c4bb5ebc20c34ae3fb5e3806046add,0000000000000000000000000000000000000000..3bb81c4b91ca83a844baee83ec5678ebe415e43e

mode 100644,000000..100644
--- 1/src/cpu/checker/cpu_impl.hh
--- /dev/null
+++ b/src/cpu/checker/cpu_impl.hh
@@@ -1,458 -1,0 +1,479 @@@
-                 return;
+ +/*
+ + * Copyright (c) 2006 The Regents of The University of Michigan
+ + * All rights reserved.
+ + *
+ + * Redistribution and use in source and binary forms, with or without
+ + * modification, are permitted provided that the following conditions are
+ + * met: redistributions of source code must retain the above copyright
+ + * notice, this list of conditions and the following disclaimer;
+ + * redistributions in binary form must reproduce the above copyright
+ + * notice, this list of conditions and the following disclaimer in the
+ + * documentation and/or other materials provided with the distribution;
+ + * neither the name of the copyright holders nor the names of its
+ + * contributors may be used to endorse or promote products derived from
+ + * this software without specific prior written permission.
+ + *
+ + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ + *
+ + * Authors: Kevin Lim
+ + */
+ +
+ +#include <list>
+ +#include <string>
+ +
+ +#include "base/refcnt.hh"
+ +#include "cpu/base_dyn_inst.hh"
+ +#include "cpu/checker/cpu.hh"
+ +#include "cpu/simple_thread.hh"
+ +#include "cpu/thread_context.hh"
+ +#include "cpu/static_inst.hh"
+ +#include "mem/packet_impl.hh"
+ +#include "sim/byteswap.hh"
+ +#include "sim/sim_object.hh"
+ +#include "sim/stats.hh"
+ +
+ +#if FULL_SYSTEM
+ +#include "arch/vtophys.hh"
+ +#endif // FULL_SYSTEM
+ +
+ +using namespace std;
+ +//The CheckerCPU does alpha only
+ +using namespace AlphaISA;
+ +
+ +template <class DynInstPtr>
+ +void
+ +Checker<DynInstPtr>::verify(DynInstPtr &completed_inst)
+ +{
+ +    DynInstPtr inst;
+ +
+ +    // Either check this instruction, or add it to a list of
+ +    // instructions waiting to be checked.  Instructions must be
+ +    // checked in program order, so if a store has committed yet not
+ +    // completed, there may be some instructions that are waiting
+ +    // behind it that have completed and must be checked.
+ +    if (!instList.empty()) {
+ +        if (youngestSN < completed_inst->seqNum) {
+ +            DPRINTF(Checker, "Adding instruction [sn:%lli] PC:%#x to list.\n",
+ +                    completed_inst->seqNum, completed_inst->readPC());
+ +            instList.push_back(completed_inst);
+ +            youngestSN = completed_inst->seqNum;
+ +        }
+ +
+ +        if (!instList.front()->isCompleted()) {
+ +            return;
+ +        } else {
+ +            inst = instList.front();
+ +            instList.pop_front();
+ +        }
+ +    } else {
+ +        if (!completed_inst->isCompleted()) {
+ +            if (youngestSN < completed_inst->seqNum) {
+ +                DPRINTF(Checker, "Adding instruction [sn:%lli] PC:%#x to list.\n",
+ +                        completed_inst->seqNum, completed_inst->readPC());
+ +                instList.push_back(completed_inst);
+ +                youngestSN = completed_inst->seqNum;
+ +            }
+ +            return;
+ +        } else {
+ +            if (youngestSN < completed_inst->seqNum) {
+ +                inst = completed_inst;
+ +                youngestSN = completed_inst->seqNum;
+ +            } else {
+ +                return;
+ +            }
+ +        }
+ +    }
+ +
++    unverifiedInst = inst;
++
+ +    // Try to check all instructions that are completed, ending if we
+ +    // run out of instructions to check or if an instruction is not
+ +    // yet completed.
+ +    while (1) {
+ +        DPRINTF(Checker, "Processing instruction [sn:%lli] PC:%#x.\n",
+ +                inst->seqNum, inst->readPC());
+ +        unverifiedResult.integer = inst->readIntResult();
+ +        unverifiedReq = inst->req;
+ +        unverifiedMemData = inst->memData;
+ +        numCycles++;
+ +
+ +        Fault fault = NoFault;
+ +
+ +        // maintain $r0 semantics
+ +        thread->setIntReg(ZeroReg, 0);
+ +#ifdef TARGET_ALPHA
+ +        thread->setFloatRegDouble(ZeroReg, 0.0);
+ +#endif // TARGET_ALPHA
+ +
+ +        // Check if any recent PC changes match up with anything we
+ +        // expect to happen.  This is mostly to check if traps or
+ +        // PC-based events have occurred in both the checker and CPU.
+ +        if (changedPC) {
+ +            DPRINTF(Checker, "Changed PC recently to %#x\n",
+ +                    thread->readPC());
+ +            if (willChangePC) {
+ +                if (newPC == thread->readPC()) {
+ +                    DPRINTF(Checker, "Changed PC matches expected PC\n");
+ +                } else {
+ +                    warn("%lli: Changed PC does not match expected PC, "
+ +                         "changed: %#x, expected: %#x",
+ +                         curTick, thread->readPC(), newPC);
+ +                    CheckerCPU::handleError();
+ +                }
+ +                willChangePC = false;
+ +            }
+ +            changedPC = false;
+ +        }
+ +        if (changedNextPC) {
+ +            DPRINTF(Checker, "Changed NextPC recently to %#x\n",
+ +                    thread->readNextPC());
+ +            changedNextPC = false;
+ +        }
+ +
+ +        // Try to fetch the instruction
+ +
+ +#if FULL_SYSTEM
+ +#define IFETCH_FLAGS(pc)      ((pc) & 1) ? PHYSICAL : 0
+ +#else
+ +#define IFETCH_FLAGS(pc)      0
+ +#endif
+ +
+ +        uint64_t fetch_PC = thread->readPC() & ~3;
+ +
+ +        // set up memory request for instruction fetch
+ +        memReq = new Request(inst->threadNumber, fetch_PC,
+ +                             sizeof(uint32_t),
+ +                             IFETCH_FLAGS(thread->readPC()),
+ +                             fetch_PC, thread->readCpuId(), inst->threadNumber);
+ +
+ +        bool succeeded = translateInstReq(memReq);
+ +
+ +        if (!succeeded) {
+ +            if (inst->getFault() == NoFault) {
+ +                // In this case the instruction was not a dummy
+ +                // instruction carrying an ITB fault.  In the single
+ +                // threaded case the ITB should still be able to
+ +                // translate this instruction; in the SMT case it's
+ +                // possible that its ITB entry was kicked out.
+ +                warn("%lli: Instruction PC %#x was not found in the ITB!",
+ +                     curTick, thread->readPC());
+ +                handleError(inst);
+ +
+ +                // go to the next instruction
+ +                thread->setPC(thread->readNextPC());
+ +                thread->setNextPC(thread->readNextPC() + sizeof(MachInst));
+ +
-             fault = curStaticInst->execute(this, NULL);
++                break;
+ +            } else {
+ +                // The instruction is carrying an ITB fault.  Handle
+ +                // the fault and see if our results match the CPU on
+ +                // the next tick().
+ +                fault = inst->getFault();
+ +            }
+ +        }
+ +
+ +        if (fault == NoFault) {
+ +            Packet *pkt = new Packet(memReq, Packet::ReadReq,
+ +                                     Packet::Broadcast);
+ +
+ +            pkt->dataStatic(&machInst);
+ +
+ +            icachePort->sendFunctional(pkt);
+ +
+ +            delete pkt;
+ +
+ +            // keep an instruction count
+ +            numInst++;
+ +
+ +            // decode the instruction
+ +            machInst = gtoh(machInst);
+ +            // Checks that the instruction matches what we expected it to be.
+ +            // Checks both the machine instruction and the PC.
+ +            validateInst(inst);
+ +
+ +            curStaticInst = StaticInst::decode(makeExtMI(machInst,
+ +                                                         thread->readPC()));
+ +
+ +#if FULL_SYSTEM
+ +            thread->setInst(machInst);
+ +#endif // FULL_SYSTEM
+ +
+ +            fault = inst->getFault();
+ +        }
+ +
+ +        // Discard fetch's memReq.
+ +        delete memReq;
+ +        memReq = NULL;
+ +
+ +        // Either the instruction was a fault and we should process the fault,
+ +        // or we should just go ahead execute the instruction.  This assumes
+ +        // that the instruction is properly marked as a fault.
+ +        if (fault == NoFault) {
+ +
+ +            thread->funcExeInst++;
+ +
++            if (!inst->isUnverifiable())
++                fault = curStaticInst->execute(this, NULL);
+ +
+ +            // Checks to make sure instrution results are correct.
+ +            validateExecution(inst);
+ +
+ +            if (curStaticInst->isLoad()) {
+ +                ++numLoad;
+ +            }
+ +        }
+ +
+ +        if (fault != NoFault) {
+ +#if FULL_SYSTEM
+ +            fault->invoke(tc);
+ +            willChangePC = true;
+ +            newPC = thread->readPC();
+ +            DPRINTF(Checker, "Fault, PC is now %#x\n", newPC);
+ +#endif
+ +        } else {
+ +#if THE_ISA != MIPS_ISA
+ +            // go to the next instruction
+ +            thread->setPC(thread->readNextPC());
+ +            thread->setNextPC(thread->readNextPC() + sizeof(MachInst));
+ +#else
+ +            // go to the next instruction
+ +            thread->setPC(thread->readNextPC());
+ +            thread->setNextPC(thread->readNextNPC());
+ +            thread->setNextNPC(thread->readNextNPC() + sizeof(MachInst));
+ +#endif
+ +
+ +        }
+ +
+ +#if FULL_SYSTEM
+ +        // @todo: Determine if these should happen only if the
+ +        // instruction hasn't faulted.  In the SimpleCPU case this may
+ +        // not be true, but in the O3 or Ozone case this may be true.
+ +        Addr oldpc;
+ +        int count = 0;
+ +        do {
+ +            oldpc = thread->readPC();
+ +            system->pcEventQueue.service(tc);
+ +            count++;
+ +        } while (oldpc != thread->readPC());
+ +        if (count > 1) {
+ +            willChangePC = true;
+ +            newPC = thread->readPC();
+ +            DPRINTF(Checker, "PC Event, PC is now %#x\n", newPC);
+ +        }
+ +#endif
+ +
+ +        // @todo:  Optionally can check all registers. (Or just those
+ +        // that have been modified).
+ +        validateState();
+ +
+ +        if (memReq) {
+ +            delete memReq;
+ +            memReq = NULL;
+ +        }
+ +
+ +        // Continue verifying instructions if there's another completed
+ +        // instruction waiting to be verified.
+ +        if (instList.empty()) {
+ +            break;
+ +        } else if (instList.front()->isCompleted()) {
+ +            inst = instList.front();
+ +            instList.pop_front();
+ +        } else {
+ +            break;
+ +        }
+ +    }
++    unverifiedInst = NULL;
+ +}
+ +
+ +template <class DynInstPtr>
+ +void
+ +Checker<DynInstPtr>::switchOut()
+ +{
+ +    instList.clear();
+ +}
+ +
+ +template <class DynInstPtr>
+ +void
+ +Checker<DynInstPtr>::takeOverFrom(BaseCPU *oldCPU)
+ +{
+ +}
+ +
+ +template <class DynInstPtr>
+ +void
+ +Checker<DynInstPtr>::validateInst(DynInstPtr &inst)
+ +{
+ +    if (inst->readPC() != thread->readPC()) {
+ +        warn("%lli: PCs do not match! Inst: %#x, checker: %#x",
+ +             curTick, inst->readPC(), thread->readPC());
+ +        if (changedPC) {
+ +            warn("%lli: Changed PCs recently, may not be an error",
+ +                 curTick);
+ +        } else {
+ +            handleError(inst);
+ +        }
+ +    }
+ +
+ +    MachInst mi = static_cast<MachInst>(inst->staticInst->machInst);
+ +
+ +    if (mi != machInst) {
+ +        warn("%lli: Binary instructions do not match! Inst: %#x, "
+ +             "checker: %#x",
+ +             curTick, mi, machInst);
+ +        handleError(inst);
+ +    }
+ +}
+ +
+ +template <class DynInstPtr>
+ +void
+ +Checker<DynInstPtr>::validateExecution(DynInstPtr &inst)
+ +{
+ +    bool result_mismatch = false;
+ +    if (inst->numDestRegs()) {
+ +        // @todo: Support more destination registers.
+ +        if (inst->isUnverifiable()) {
+ +            // Unverifiable instructions assume they were executed
+ +            // properly by the CPU. Grab the result from the
+ +            // instruction and write it to the register.
+ +            copyResult(inst);
+ +        } else if (result.integer != inst->readIntResult()) {
+ +            result_mismatch = true;
+ +        }
+ +    }
+ +
+ +    if (result_mismatch) {
+ +        warn("%lli: Instruction results do not match! (Values may not "
+ +             "actually be integers) Inst: %#x, checker: %#x",
+ +             curTick, inst->readIntResult(), result.integer);
+ +
+ +        // It's useful to verify load values from memory, but in MP
+ +        // systems the value obtained at execute may be different than
+ +        // the value obtained at completion.  Similarly DMA can
+ +        // present the same problem on even UP systems.  Thus there is
+ +        // the option to only warn on loads having a result error.
+ +        if (inst->isLoad() && warnOnlyOnLoadError) {
+ +            copyResult(inst);
+ +        } else {
+ +            handleError(inst);
+ +        }
+ +    }
+ +
+ +    if (inst->readNextPC() != thread->readNextPC()) {
+ +        warn("%lli: Instruction next PCs do not match! Inst: %#x, "
+ +             "checker: %#x",
+ +             curTick, inst->readNextPC(), thread->readNextPC());
+ +        handleError(inst);
+ +    }
+ +
+ +    // Checking side effect registers can be difficult if they are not
+ +    // checked simultaneously with the execution of the instruction.
+ +    // This is because other valid instructions may have modified
+ +    // these registers in the meantime, and their values are not
+ +    // stored within the DynInst.
+ +    while (!miscRegIdxs.empty()) {
+ +        int misc_reg_idx = miscRegIdxs.front();
+ +        miscRegIdxs.pop();
+ +
+ +        if (inst->tcBase()->readMiscReg(misc_reg_idx) !=
+ +            thread->readMiscReg(misc_reg_idx)) {
+ +            warn("%lli: Misc reg idx %i (side effect) does not match! "
+ +                 "Inst: %#x, checker: %#x",
+ +                 curTick, misc_reg_idx,
+ +                 inst->tcBase()->readMiscReg(misc_reg_idx),
+ +                 thread->readMiscReg(misc_reg_idx));
+ +            handleError(inst);
+ +        }
+ +    }
+ +}
+ +
+ +template <class DynInstPtr>
+ +void
+ +Checker<DynInstPtr>::validateState()
+ +{
++    if (updateThisCycle) {
++        warn("%lli: Instruction PC %#x results didn't match up, copying all "
++             "registers from main CPU", curTick, unverifiedInst->readPC());
++        // Heavy-weight copying of all registers
++        cpuXC->copyArchRegs(unverifiedInst->xcBase());
++        // Also advance the PC.  Hopefully no PC-based events happened.
++#if THE_ISA != MIPS_ISA
++        // go to the next instruction
++        cpuXC->setPC(cpuXC->readNextPC());
++        cpuXC->setNextPC(cpuXC->readNextPC() + sizeof(MachInst));
++#else
++        // go to the next instruction
++        cpuXC->setPC(cpuXC->readNextPC());
++        cpuXC->setNextPC(cpuXC->readNextNPC());
++        cpuXC->setNextNPC(cpuXC->readNextNPC() + sizeof(MachInst));
++#endif
++        updateThisCycle = false;
+ +}
+ +
+ +template <class DynInstPtr>
+ +void
+ +Checker<DynInstPtr>::copyResult(DynInstPtr &inst)
+ +{
+ +    RegIndex idx = inst->destRegIdx(0);
+ +    if (idx < TheISA::FP_Base_DepTag) {
+ +        thread->setIntReg(idx, inst->readIntResult());
+ +    } else if (idx < TheISA::Fpcr_DepTag) {
+ +        thread->setFloatRegBits(idx, inst->readIntResult());
+ +    } else {
+ +        thread->setMiscReg(idx, inst->readIntResult());
+ +    }
+ +}
+ +
+ +template <class DynInstPtr>
+ +void
+ +Checker<DynInstPtr>::dumpAndExit(DynInstPtr &inst)
+ +{
+ +    cprintf("Error detected, instruction information:\n");
+ +    cprintf("PC:%#x, nextPC:%#x\n[sn:%lli]\n[tid:%i]\n"
+ +            "Completed:%i\n",
+ +            inst->readPC(),
+ +            inst->readNextPC(),
+ +            inst->seqNum,
+ +            inst->threadNumber,
+ +            inst->isCompleted());
+ +    inst->dump();
+ +    CheckerCPU::dumpAndExit();
+ +}
+ +
+ +template <class DynInstPtr>
+ +void
+ +Checker<DynInstPtr>::dumpInsts()
+ +{
+ +    int num = 0;
+ +
+ +    InstListIt inst_list_it = --(instList.end());
+ +
+ +    cprintf("Inst list size: %i\n", instList.size());
+ +
+ +    while (inst_list_it != instList.end())
+ +    {
+ +        cprintf("Instruction:%i\n",
+ +                num);
+ +
+ +        cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
+ +                "Completed:%i\n",
+ +                (*inst_list_it)->readPC(),
+ +                (*inst_list_it)->seqNum,
+ +                (*inst_list_it)->threadNumber,
+ +                (*inst_list_it)->isCompleted());
+ +
+ +        cprintf("\n");
+ +
+ +        inst_list_it--;
+ +        ++num;
+ +    }
+ +
+ +}
diff --cc src/cpu/o3/alpha/cpu_builder.cc

index 5e767655d56c5ded1f66e18a48870a281659a20f,0000000000000000000000000000000000000000..fbf1f342c95b1dc2f156d5941e4cd66682aad860

mode 100644,000000..100644
--- 1/src/cpu/o3/alpha/cpu_builder.cc
--- /dev/null
+++ b/src/cpu/o3/alpha/cpu_builder.cc
@@@ -1,419 -1,0 +1,430 @@@
+ +/*
+ + * Copyright (c) 2004-2006 The Regents of The University of Michigan
+ + * All rights reserved.
+ + *
+ + * Redistribution and use in source and binary forms, with or without
+ + * modification, are permitted provided that the following conditions are
+ + * met: redistributions of source code must retain the above copyright
+ + * notice, this list of conditions and the following disclaimer;
+ + * redistributions in binary form must reproduce the above copyright
+ + * notice, this list of conditions and the following disclaimer in the
+ + * documentation and/or other materials provided with the distribution;
+ + * neither the name of the copyright holders nor the names of its
+ + * contributors may be used to endorse or promote products derived from
+ + * this software without specific prior written permission.
+ + *
+ + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ + *
+ + * Authors: Kevin Lim
+ + */
+ +
+ +#include <string>
+ +
+ +#include "cpu/base.hh"
+ +#include "cpu/o3/alpha/cpu.hh"
+ +#include "cpu/o3/alpha/impl.hh"
+ +#include "cpu/o3/alpha/params.hh"
+ +#include "cpu/o3/fu_pool.hh"
+ +#include "sim/builder.hh"
+ +
+ +class DerivO3CPU : public AlphaO3CPU<AlphaSimpleImpl>
+ +{
+ +  public:
+ +    DerivO3CPU(AlphaSimpleParams *p)
+ +        : AlphaO3CPU<AlphaSimpleImpl>(p)
+ +    { }
+ +};
+ +
+ +BEGIN_DECLARE_SIM_OBJECT_PARAMS(DerivO3CPU)
+ +
+ +    Param<int> clock;
+ +    Param<int> numThreads;
+ +Param<int> activity;
+ +
+ +#if FULL_SYSTEM
+ +SimObjectParam<System *> system;
+ +Param<int> cpu_id;
+ +SimObjectParam<AlphaITB *> itb;
+ +SimObjectParam<AlphaDTB *> dtb;
++Param<Tick> profile;
+ +#else
+ +SimObjectVectorParam<Process *> workload;
+ +#endif // FULL_SYSTEM
+ +
+ +SimObjectParam<MemObject *> mem;
+ +
+ +SimObjectParam<BaseCPU *> checker;
+ +
+ +Param<Counter> max_insts_any_thread;
+ +Param<Counter> max_insts_all_threads;
+ +Param<Counter> max_loads_any_thread;
+ +Param<Counter> max_loads_all_threads;
++Param<Counter> stats_reset_inst;
++Param<Tick> progress_interval;
+ +
+ +Param<unsigned> cachePorts;
+ +
+ +Param<unsigned> decodeToFetchDelay;
+ +Param<unsigned> renameToFetchDelay;
+ +Param<unsigned> iewToFetchDelay;
+ +Param<unsigned> commitToFetchDelay;
+ +Param<unsigned> fetchWidth;
+ +
+ +Param<unsigned> renameToDecodeDelay;
+ +Param<unsigned> iewToDecodeDelay;
+ +Param<unsigned> commitToDecodeDelay;
+ +Param<unsigned> fetchToDecodeDelay;
+ +Param<unsigned> decodeWidth;
+ +
+ +Param<unsigned> iewToRenameDelay;
+ +Param<unsigned> commitToRenameDelay;
+ +Param<unsigned> decodeToRenameDelay;
+ +Param<unsigned> renameWidth;
+ +
+ +Param<unsigned> commitToIEWDelay;
+ +Param<unsigned> renameToIEWDelay;
+ +Param<unsigned> issueToExecuteDelay;
+ +Param<unsigned> dispatchWidth;
+ +Param<unsigned> issueWidth;
+ +Param<unsigned> wbWidth;
+ +Param<unsigned> wbDepth;
+ +SimObjectParam<FUPool *> fuPool;
+ +
+ +Param<unsigned> iewToCommitDelay;
+ +Param<unsigned> renameToROBDelay;
+ +Param<unsigned> commitWidth;
+ +Param<unsigned> squashWidth;
+ +Param<Tick> trapLatency;
+ +
+ +Param<unsigned> backComSize;
+ +Param<unsigned> forwardComSize;
+ +
+ +Param<std::string> predType;
+ +Param<unsigned> localPredictorSize;
+ +Param<unsigned> localCtrBits;
+ +Param<unsigned> localHistoryTableSize;
+ +Param<unsigned> localHistoryBits;
+ +Param<unsigned> globalPredictorSize;
+ +Param<unsigned> globalCtrBits;
+ +Param<unsigned> globalHistoryBits;
+ +Param<unsigned> choicePredictorSize;
+ +Param<unsigned> choiceCtrBits;
+ +
+ +Param<unsigned> BTBEntries;
+ +Param<unsigned> BTBTagSize;
+ +
+ +Param<unsigned> RASSize;
+ +
+ +Param<unsigned> LQEntries;
+ +Param<unsigned> SQEntries;
+ +Param<unsigned> LFSTSize;
+ +Param<unsigned> SSITSize;
+ +
+ +Param<unsigned> numPhysIntRegs;
+ +Param<unsigned> numPhysFloatRegs;
+ +Param<unsigned> numIQEntries;
+ +Param<unsigned> numROBEntries;
+ +
+ +Param<unsigned> smtNumFetchingThreads;
+ +Param<std::string>   smtFetchPolicy;
+ +Param<std::string>   smtLSQPolicy;
+ +Param<unsigned> smtLSQThreshold;
+ +Param<std::string>   smtIQPolicy;
+ +Param<unsigned> smtIQThreshold;
+ +Param<std::string>   smtROBPolicy;
+ +Param<unsigned> smtROBThreshold;
+ +Param<std::string>   smtCommitPolicy;
+ +
+ +Param<unsigned> instShiftAmt;
+ +
+ +Param<bool> defer_registration;
+ +
+ +Param<bool> function_trace;
+ +Param<Tick> function_trace_start;
+ +
+ +END_DECLARE_SIM_OBJECT_PARAMS(DerivO3CPU)
+ +
+ +BEGIN_INIT_SIM_OBJECT_PARAMS(DerivO3CPU)
+ +
+ +    INIT_PARAM(clock, "clock speed"),
+ +    INIT_PARAM(numThreads, "number of HW thread contexts"),
+ +    INIT_PARAM_DFLT(activity, "Initial activity count", 0),
+ +
+ +#if FULL_SYSTEM
+ +    INIT_PARAM(system, "System object"),
+ +    INIT_PARAM(cpu_id, "processor ID"),
+ +    INIT_PARAM(itb, "Instruction translation buffer"),
+ +    INIT_PARAM(dtb, "Data translation buffer"),
++    INIT_PARAM(profile, ""),
+ +#else
+ +    INIT_PARAM(workload, "Processes to run"),
+ +#endif // FULL_SYSTEM
+ +
+ +    INIT_PARAM(mem, "Memory"),
+ +
+ +    INIT_PARAM_DFLT(checker, "Checker CPU", NULL),
+ +
+ +    INIT_PARAM_DFLT(max_insts_any_thread,
+ +                    "Terminate when any thread reaches this inst count",
+ +                    0),
+ +    INIT_PARAM_DFLT(max_insts_all_threads,
+ +                    "Terminate when all threads have reached"
+ +                    "this inst count",
+ +                    0),
+ +    INIT_PARAM_DFLT(max_loads_any_thread,
+ +                    "Terminate when any thread reaches this load count",
+ +                    0),
+ +    INIT_PARAM_DFLT(max_loads_all_threads,
+ +                    "Terminate when all threads have reached this load"
+ +                    "count",
+ +                    0),
++    INIT_PARAM_DFLT(stats_reset_inst,
++                    "blah",
++                    0),
++    INIT_PARAM_DFLT(progress_interval, "Progress interval", 0),
+ +
+ +    INIT_PARAM_DFLT(cachePorts, "Cache Ports", 200),
+ +
+ +    INIT_PARAM(decodeToFetchDelay, "Decode to fetch delay"),
+ +    INIT_PARAM(renameToFetchDelay, "Rename to fetch delay"),
+ +    INIT_PARAM(iewToFetchDelay, "Issue/Execute/Writeback to fetch"
+ +               "delay"),
+ +    INIT_PARAM(commitToFetchDelay, "Commit to fetch delay"),
+ +    INIT_PARAM(fetchWidth, "Fetch width"),
+ +    INIT_PARAM(renameToDecodeDelay, "Rename to decode delay"),
+ +    INIT_PARAM(iewToDecodeDelay, "Issue/Execute/Writeback to decode"
+ +               "delay"),
+ +    INIT_PARAM(commitToDecodeDelay, "Commit to decode delay"),
+ +    INIT_PARAM(fetchToDecodeDelay, "Fetch to decode delay"),
+ +    INIT_PARAM(decodeWidth, "Decode width"),
+ +
+ +    INIT_PARAM(iewToRenameDelay, "Issue/Execute/Writeback to rename"
+ +               "delay"),
+ +    INIT_PARAM(commitToRenameDelay, "Commit to rename delay"),
+ +    INIT_PARAM(decodeToRenameDelay, "Decode to rename delay"),
+ +    INIT_PARAM(renameWidth, "Rename width"),
+ +
+ +    INIT_PARAM(commitToIEWDelay, "Commit to "
+ +               "Issue/Execute/Writeback delay"),
+ +    INIT_PARAM(renameToIEWDelay, "Rename to "
+ +               "Issue/Execute/Writeback delay"),
+ +    INIT_PARAM(issueToExecuteDelay, "Issue to execute delay (internal"
+ +               "to the IEW stage)"),
+ +    INIT_PARAM(dispatchWidth, "Dispatch width"),
+ +    INIT_PARAM(issueWidth, "Issue width"),
+ +    INIT_PARAM(wbWidth, "Writeback width"),
+ +    INIT_PARAM(wbDepth, "Writeback depth (number of cycles it can buffer)"),
+ +    INIT_PARAM_DFLT(fuPool, "Functional unit pool", NULL),
+ +
+ +    INIT_PARAM(iewToCommitDelay, "Issue/Execute/Writeback to commit "
+ +               "delay"),
+ +    INIT_PARAM(renameToROBDelay, "Rename to reorder buffer delay"),
+ +    INIT_PARAM(commitWidth, "Commit width"),
+ +    INIT_PARAM(squashWidth, "Squash width"),
+ +    INIT_PARAM_DFLT(trapLatency, "Number of cycles before the trap is handled", 6),
+ +
+ +    INIT_PARAM(backComSize, "Time buffer size for backwards communication"),
+ +    INIT_PARAM(forwardComSize, "Time buffer size for forward communication"),
+ +
+ +    INIT_PARAM(predType, "Type of branch predictor ('local', 'tournament')"),
+ +    INIT_PARAM(localPredictorSize, "Size of local predictor"),
+ +    INIT_PARAM(localCtrBits, "Bits per counter"),
+ +    INIT_PARAM(localHistoryTableSize, "Size of local history table"),
+ +    INIT_PARAM(localHistoryBits, "Bits for the local history"),
+ +    INIT_PARAM(globalPredictorSize, "Size of global predictor"),
+ +    INIT_PARAM(globalCtrBits, "Bits per counter"),
+ +    INIT_PARAM(globalHistoryBits, "Bits of history"),
+ +    INIT_PARAM(choicePredictorSize, "Size of choice predictor"),
+ +    INIT_PARAM(choiceCtrBits, "Bits of choice counters"),
+ +
+ +    INIT_PARAM(BTBEntries, "Number of BTB entries"),
+ +    INIT_PARAM(BTBTagSize, "Size of the BTB tags, in bits"),
+ +
+ +    INIT_PARAM(RASSize, "RAS size"),
+ +
+ +    INIT_PARAM(LQEntries, "Number of load queue entries"),
+ +    INIT_PARAM(SQEntries, "Number of store queue entries"),
+ +    INIT_PARAM(LFSTSize, "Last fetched store table size"),
+ +    INIT_PARAM(SSITSize, "Store set ID table size"),
+ +
+ +    INIT_PARAM(numPhysIntRegs, "Number of physical integer registers"),
+ +    INIT_PARAM(numPhysFloatRegs, "Number of physical floating point "
+ +               "registers"),
+ +    INIT_PARAM(numIQEntries, "Number of instruction queue entries"),
+ +    INIT_PARAM(numROBEntries, "Number of reorder buffer entries"),
+ +
+ +    INIT_PARAM_DFLT(smtNumFetchingThreads, "SMT Number of Fetching Threads", 1),
+ +    INIT_PARAM_DFLT(smtFetchPolicy, "SMT Fetch Policy", "SingleThread"),
+ +    INIT_PARAM_DFLT(smtLSQPolicy,   "SMT LSQ Sharing Policy",    "Partitioned"),
+ +    INIT_PARAM_DFLT(smtLSQThreshold,"SMT LSQ Threshold", 100),
+ +    INIT_PARAM_DFLT(smtIQPolicy,    "SMT IQ Policy",    "Partitioned"),
+ +    INIT_PARAM_DFLT(smtIQThreshold, "SMT IQ Threshold", 100),
+ +    INIT_PARAM_DFLT(smtROBPolicy,   "SMT ROB Sharing Policy", "Partitioned"),
+ +    INIT_PARAM_DFLT(smtROBThreshold,"SMT ROB Threshold", 100),
+ +    INIT_PARAM_DFLT(smtCommitPolicy,"SMT Commit Fetch Policy", "RoundRobin"),
+ +
+ +    INIT_PARAM(instShiftAmt, "Number of bits to shift instructions by"),
+ +    INIT_PARAM(defer_registration, "defer system registration (for sampling)"),
+ +
+ +    INIT_PARAM(function_trace, "Enable function trace"),
+ +    INIT_PARAM(function_trace_start, "Cycle to start function trace")
+ +
+ +END_INIT_SIM_OBJECT_PARAMS(DerivO3CPU)
+ +
+ +CREATE_SIM_OBJECT(DerivO3CPU)
+ +{
+ +    DerivO3CPU *cpu;
+ +
+ +#if FULL_SYSTEM
+ +    // Full-system only supports a single thread for the moment.
+ +    int actual_num_threads = 1;
+ +#else
+ +    // In non-full-system mode, we infer the number of threads from
+ +    // the workload if it's not explicitly specified.
+ +    int actual_num_threads =
+ +        (numThreads.isValid() && numThreads >= workload.size()) ?
+ +         numThreads : workload.size();
+ +
+ +    if (workload.size() == 0) {
+ +        fatal("Must specify at least one workload!");
+ +    }
+ +#endif
+ +
+ +    AlphaSimpleParams *params = new AlphaSimpleParams;
+ +
+ +    params->clock = clock;
+ +
+ +    params->name = getInstanceName();
+ +    params->numberOfThreads = actual_num_threads;
+ +    params->activity = activity;
+ +
+ +#if FULL_SYSTEM
+ +    params->system = system;
+ +    params->cpu_id = cpu_id;
+ +    params->itb = itb;
+ +    params->dtb = dtb;
++    params->profile = profile;
+ +#else
+ +    params->workload = workload;
+ +#endif // FULL_SYSTEM
+ +
+ +    params->mem = mem;
+ +
+ +    params->checker = checker;
+ +
+ +    params->max_insts_any_thread = max_insts_any_thread;
+ +    params->max_insts_all_threads = max_insts_all_threads;
+ +    params->max_loads_any_thread = max_loads_any_thread;
+ +    params->max_loads_all_threads = max_loads_all_threads;
++    params->stats_reset_inst = stats_reset_inst;
++    params->progress_interval = progress_interval;
+ +
+ +    //
+ +    // Caches
+ +    //
+ +    params->cachePorts = cachePorts;
+ +
+ +    params->decodeToFetchDelay = decodeToFetchDelay;
+ +    params->renameToFetchDelay = renameToFetchDelay;
+ +    params->iewToFetchDelay = iewToFetchDelay;
+ +    params->commitToFetchDelay = commitToFetchDelay;
+ +    params->fetchWidth = fetchWidth;
+ +
+ +    params->renameToDecodeDelay = renameToDecodeDelay;
+ +    params->iewToDecodeDelay = iewToDecodeDelay;
+ +    params->commitToDecodeDelay = commitToDecodeDelay;
+ +    params->fetchToDecodeDelay = fetchToDecodeDelay;
+ +    params->decodeWidth = decodeWidth;
+ +
+ +    params->iewToRenameDelay = iewToRenameDelay;
+ +    params->commitToRenameDelay = commitToRenameDelay;
+ +    params->decodeToRenameDelay = decodeToRenameDelay;
+ +    params->renameWidth = renameWidth;
+ +
+ +    params->commitToIEWDelay = commitToIEWDelay;
+ +    params->renameToIEWDelay = renameToIEWDelay;
+ +    params->issueToExecuteDelay = issueToExecuteDelay;
+ +    params->dispatchWidth = dispatchWidth;
+ +    params->issueWidth = issueWidth;
+ +    params->wbWidth = wbWidth;
+ +    params->wbDepth = wbDepth;
+ +    params->fuPool = fuPool;
+ +
+ +    params->iewToCommitDelay = iewToCommitDelay;
+ +    params->renameToROBDelay = renameToROBDelay;
+ +    params->commitWidth = commitWidth;
+ +    params->squashWidth = squashWidth;
+ +    params->trapLatency = trapLatency;
+ +
+ +    params->backComSize = backComSize;
+ +    params->forwardComSize = forwardComSize;
+ +
+ +    params->predType = predType;
+ +    params->localPredictorSize = localPredictorSize;
+ +    params->localCtrBits = localCtrBits;
+ +    params->localHistoryTableSize = localHistoryTableSize;
+ +    params->localHistoryBits = localHistoryBits;
+ +    params->globalPredictorSize = globalPredictorSize;
+ +    params->globalCtrBits = globalCtrBits;
+ +    params->globalHistoryBits = globalHistoryBits;
+ +    params->choicePredictorSize = choicePredictorSize;
+ +    params->choiceCtrBits = choiceCtrBits;
+ +
+ +    params->BTBEntries = BTBEntries;
+ +    params->BTBTagSize = BTBTagSize;
+ +
+ +    params->RASSize = RASSize;
+ +
+ +    params->LQEntries = LQEntries;
+ +    params->SQEntries = SQEntries;
+ +
+ +    params->SSITSize = SSITSize;
+ +    params->LFSTSize = LFSTSize;
+ +
+ +    params->numPhysIntRegs = numPhysIntRegs;
+ +    params->numPhysFloatRegs = numPhysFloatRegs;
+ +    params->numIQEntries = numIQEntries;
+ +    params->numROBEntries = numROBEntries;
+ +
+ +    params->smtNumFetchingThreads = smtNumFetchingThreads;
+ +
+ +    // Default smtFetchPolicy to "RoundRobin", if necessary.
+ +    std::string round_robin_policy = "RoundRobin";
+ +    std::string single_thread = "SingleThread";
+ +
+ +    if (actual_num_threads > 1 && single_thread.compare(smtFetchPolicy) == 0)
+ +        params->smtFetchPolicy = round_robin_policy;
+ +    else
+ +        params->smtFetchPolicy = smtFetchPolicy;
+ +
+ +    params->smtIQPolicy    = smtIQPolicy;
+ +    params->smtLSQPolicy    = smtLSQPolicy;
+ +    params->smtLSQThreshold = smtLSQThreshold;
+ +    params->smtROBPolicy   = smtROBPolicy;
+ +    params->smtROBThreshold = smtROBThreshold;
+ +    params->smtCommitPolicy = smtCommitPolicy;
+ +
+ +    params->instShiftAmt = 2;
+ +
+ +    params->deferRegistration = defer_registration;
+ +
+ +    params->functionTrace = function_trace;
+ +    params->functionTraceStart = function_trace_start;
+ +
+ +    cpu = new DerivO3CPU(params);
+ +
+ +    return cpu;
+ +}
+ +
+ +REGISTER_SIM_OBJECT("DerivO3CPU", DerivO3CPU)
+ +
diff --cc src/cpu/o3/checker_builder.cc

index 782d963b0c95b599bacc5e0969ce5a9e04e5f067,0000000000000000000000000000000000000000..ad83ec57a4fa291e3959e8960ea6171b46511c56

mode 100644,000000..100644
--- 1/src/cpu/o3/checker_builder.cc
--- /dev/null
+++ b/src/cpu/o3/checker_builder.cc
@@@ -1,157 -1,0 +1,170 @@@
+ +/*
+ + * Copyright (c) 2006 The Regents of The University of Michigan
+ + * All rights reserved.
+ + *
+ + * Redistribution and use in source and binary forms, with or without
+ + * modification, are permitted provided that the following conditions are
+ + * met: redistributions of source code must retain the above copyright
+ + * notice, this list of conditions and the following disclaimer;
+ + * redistributions in binary form must reproduce the above copyright
+ + * notice, this list of conditions and the following disclaimer in the
+ + * documentation and/or other materials provided with the distribution;
+ + * neither the name of the copyright holders nor the names of its
+ + * contributors may be used to endorse or promote products derived from
+ + * this software without specific prior written permission.
+ + *
+ + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ + *
+ + * Authors: Kevin Lim
+ + */
+ +
+ +#include <string>
+ +
+ +#include "cpu/checker/cpu_impl.hh"
+ +#include "cpu/inst_seq.hh"
+ +#include "cpu/o3/alpha/dyn_inst.hh"
+ +#include "cpu/o3/alpha/impl.hh"
+ +#include "sim/builder.hh"
+ +#include "sim/process.hh"
+ +#include "sim/sim_object.hh"
+ +
+ +class MemObject;
+ +
+ +template
+ +class Checker<RefCountingPtr<AlphaDynInst<AlphaSimpleImpl> > >;
+ +
+ +/**
+ + * Specific non-templated derived class used for SimObject configuration.
+ + */
+ +class O3Checker : public Checker<RefCountingPtr<AlphaDynInst<AlphaSimpleImpl> > >
+ +{
+ +  public:
+ +    O3Checker(Params *p)
+ +        : Checker<RefCountingPtr<AlphaDynInst<AlphaSimpleImpl> > >(p)
+ +    { }
+ +};
+ +
+ +////////////////////////////////////////////////////////////////////////
+ +//
+ +//  CheckerCPU Simulation Object
+ +//
+ +BEGIN_DECLARE_SIM_OBJECT_PARAMS(O3Checker)
+ +
+ +    Param<Counter> max_insts_any_thread;
+ +    Param<Counter> max_insts_all_threads;
+ +    Param<Counter> max_loads_any_thread;
+ +    Param<Counter> max_loads_all_threads;
++    Param<Counter> stats_reset_inst;
++    Param<Tick> progress_interval;
+ +
+ +#if FULL_SYSTEM
+ +    SimObjectParam<AlphaITB *> itb;
+ +    SimObjectParam<AlphaDTB *> dtb;
+ +    SimObjectParam<System *> system;
+ +    Param<int> cpu_id;
+ +    Param<Tick> profile;
+ +#else
+ +    SimObjectParam<Process *> workload;
+ +#endif // FULL_SYSTEM
+ +    Param<int> clock;
+ +
+ +    Param<bool> defer_registration;
+ +    Param<bool> exitOnError;
++    Param<bool> updateOnError;
+ +    Param<bool> warnOnlyOnLoadError;
+ +    Param<bool> function_trace;
+ +    Param<Tick> function_trace_start;
+ +
+ +END_DECLARE_SIM_OBJECT_PARAMS(O3Checker)
+ +
+ +BEGIN_INIT_SIM_OBJECT_PARAMS(O3Checker)
+ +
+ +    INIT_PARAM(max_insts_any_thread,
+ +               "terminate when any thread reaches this inst count"),
+ +    INIT_PARAM(max_insts_all_threads,
+ +               "terminate when all threads have reached this inst count"),
+ +    INIT_PARAM(max_loads_any_thread,
+ +               "terminate when any thread reaches this load count"),
+ +    INIT_PARAM(max_loads_all_threads,
+ +               "terminate when all threads have reached this load count"),
++    INIT_PARAM(stats_reset_inst,
++               "blah"),
++    INIT_PARAM_DFLT(progress_interval, "CPU Progress Interval", 0),
+ +
+ +#if FULL_SYSTEM
+ +    INIT_PARAM(itb, "Instruction TLB"),
+ +    INIT_PARAM(dtb, "Data TLB"),
+ +    INIT_PARAM(system, "system object"),
+ +    INIT_PARAM(cpu_id, "processor ID"),
+ +    INIT_PARAM(profile, ""),
+ +#else
+ +    INIT_PARAM(workload, "processes to run"),
+ +#endif // FULL_SYSTEM
+ +
+ +    INIT_PARAM(clock, "clock speed"),
+ +
+ +    INIT_PARAM(defer_registration, "defer system registration (for sampling)"),
+ +    INIT_PARAM(exitOnError, "exit on error"),
++    INIT_PARAM(updateOnError, "Update the checker with the main CPU's state on error"),
+ +    INIT_PARAM_DFLT(warnOnlyOnLoadError, "warn, but don't exit, if a load "
+ +                    "result errors", false),
+ +    INIT_PARAM(function_trace, "Enable function trace"),
+ +    INIT_PARAM(function_trace_start, "Cycle to start function trace")
+ +
+ +END_INIT_SIM_OBJECT_PARAMS(O3Checker)
+ +
+ +
+ +CREATE_SIM_OBJECT(O3Checker)
+ +{
+ +    O3Checker::Params *params = new O3Checker::Params();
+ +    params->name = getInstanceName();
+ +    params->numberOfThreads = 1;
+ +    params->max_insts_any_thread = 0;
+ +    params->max_insts_all_threads = 0;
+ +    params->max_loads_any_thread = 0;
+ +    params->max_loads_all_threads = 0;
++    params->stats_reset_inst = 0;
+ +    params->exitOnError = exitOnError;
++    params->updateOnError = updateOnError;
+ +    params->warnOnlyOnLoadError = warnOnlyOnLoadError;
+ +    params->deferRegistration = defer_registration;
+ +    params->functionTrace = function_trace;
+ +    params->functionTraceStart = function_trace_start;
+ +    params->clock = clock;
+ +    // Hack to touch all parameters.  Consider not deriving Checker
+ +    // from BaseCPU..it's not really a CPU in the end.
+ +    Counter temp;
+ +    temp = max_insts_any_thread;
+ +    temp = max_insts_all_threads;
+ +    temp = max_loads_any_thread;
+ +    temp = max_loads_all_threads;
++    temp = stats_reset_inst;
++    Tick temp2 = progress_interval;
++    params->progress_interval = 0;
++    temp2++;
+ +
+ +#if FULL_SYSTEM
+ +    params->itb = itb;
+ +    params->dtb = dtb;
+ +    params->system = system;
+ +    params->cpu_id = cpu_id;
+ +    params->profile = profile;
+ +#else
+ +    params->process = workload;
+ +#endif
+ +
+ +    O3Checker *cpu = new O3Checker(params);
+ +    return cpu;
+ +}
+ +
+ +REGISTER_SIM_OBJECT("O3Checker", O3Checker)
diff --cc src/cpu/o3/commit_impl.hh

index 34f487e2cdba0ea53aeb06ac0570f623dca449b8,0000000000000000000000000000000000000000..6ae01ae678ed41d157655e3b7a0ea9c1c2d62171

mode 100644,000000..100644
--- 1/src/cpu/o3/commit_impl.hh
--- /dev/null
+++ b/src/cpu/o3/commit_impl.hh
@@@ -1,1402 -1,0 +1,1419 @@@
- 
+ +/*
+ + * Copyright (c) 2004-2006 The Regents of The University of Michigan
+ + * All rights reserved.
+ + *
+ + * Redistribution and use in source and binary forms, with or without
+ + * modification, are permitted provided that the following conditions are
+ + * met: redistributions of source code must retain the above copyright
+ + * notice, this list of conditions and the following disclaimer;
+ + * redistributions in binary form must reproduce the above copyright
+ + * notice, this list of conditions and the following disclaimer in the
+ + * documentation and/or other materials provided with the distribution;
+ + * neither the name of the copyright holders nor the names of its
+ + * contributors may be used to endorse or promote products derived from
+ + * this software without specific prior written permission.
+ + *
+ + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ + *
+ + * Authors: Kevin Lim
+ + *          Korey Sewell
+ + */
+ +
+ +#include "config/full_system.hh"
+ +#include "config/use_checker.hh"
+ +
+ +#include <algorithm>
+ +#include <string>
+ +
+ +#include "base/loader/symtab.hh"
+ +#include "base/timebuf.hh"
+ +#include "cpu/exetrace.hh"
+ +#include "cpu/o3/commit.hh"
+ +#include "cpu/o3/thread_state.hh"
+ +
+ +#if USE_CHECKER
+ +#include "cpu/checker/cpu.hh"
+ +#endif
+ +
+ +template <class Impl>
+ +DefaultCommit<Impl>::TrapEvent::TrapEvent(DefaultCommit<Impl> *_commit,
+ +                                          unsigned _tid)
+ +    : Event(&mainEventQueue, CPU_Tick_Pri), commit(_commit), tid(_tid)
+ +{
+ +    this->setFlags(Event::AutoDelete);
+ +}
+ +
+ +template <class Impl>
+ +void
+ +DefaultCommit<Impl>::TrapEvent::process()
+ +{
+ +    // This will get reset by commit if it was switched out at the
+ +    // time of this event processing.
+ +    commit->trapSquash[tid] = true;
+ +}
+ +
+ +template <class Impl>
+ +const char *
+ +DefaultCommit<Impl>::TrapEvent::description()
+ +{
+ +    return "Trap event";
+ +}
+ +
+ +template <class Impl>
+ +DefaultCommit<Impl>::DefaultCommit(Params *params)
+ +    : squashCounter(0),
+ +      iewToCommitDelay(params->iewToCommitDelay),
+ +      commitToIEWDelay(params->commitToIEWDelay),
+ +      renameToROBDelay(params->renameToROBDelay),
+ +      fetchToCommitDelay(params->commitToFetchDelay),
+ +      renameWidth(params->renameWidth),
+ +      commitWidth(params->commitWidth),
+ +      numThreads(params->numberOfThreads),
+ +      drainPending(false),
+ +      switchedOut(false),
+ +      trapLatency(params->trapLatency)
+ +{
+ +    _status = Active;
+ +    _nextStatus = Inactive;
+ +    std::string policy = params->smtCommitPolicy;
+ +
+ +    //Convert string to lowercase
+ +    std::transform(policy.begin(), policy.end(), policy.begin(),
+ +                   (int(*)(int)) tolower);
+ +
+ +    //Assign commit policy
+ +    if (policy == "aggressive"){
+ +        commitPolicy = Aggressive;
+ +
+ +        DPRINTF(Commit,"Commit Policy set to Aggressive.");
+ +    } else if (policy == "roundrobin"){
+ +        commitPolicy = RoundRobin;
+ +
+ +        //Set-Up Priority List
+ +        for (int tid=0; tid < numThreads; tid++) {
+ +            priority_list.push_back(tid);
+ +        }
+ +
+ +        DPRINTF(Commit,"Commit Policy set to Round Robin.");
+ +    } else if (policy == "oldestready"){
+ +        commitPolicy = OldestReady;
+ +
+ +        DPRINTF(Commit,"Commit Policy set to Oldest Ready.");
+ +    } else {
+ +        assert(0 && "Invalid SMT Commit Policy. Options Are: {Aggressive,"
+ +               "RoundRobin,OldestReady}");
+ +    }
+ +
+ +    for (int i=0; i < numThreads; i++) {
+ +        commitStatus[i] = Idle;
+ +        changedROBNumEntries[i] = false;
+ +        trapSquash[i] = false;
+ +        tcSquash[i] = false;
+ +        PC[i] = nextPC[i] = nextNPC[i] = 0;
+ +    }
+ +}
+ +
+ +template <class Impl>
+ +std::string
+ +DefaultCommit<Impl>::name() const
+ +{
+ +    return cpu->name() + ".commit";
+ +}
+ +
+ +template <class Impl>
+ +void
+ +DefaultCommit<Impl>::regStats()
+ +{
+ +    using namespace Stats;
+ +    commitCommittedInsts
+ +        .name(name() + ".commitCommittedInsts")
+ +        .desc("The number of committed instructions")
+ +        .prereq(commitCommittedInsts);
+ +    commitSquashedInsts
+ +        .name(name() + ".commitSquashedInsts")
+ +        .desc("The number of squashed insts skipped by commit")
+ +        .prereq(commitSquashedInsts);
+ +    commitSquashEvents
+ +        .name(name() + ".commitSquashEvents")
+ +        .desc("The number of times commit is told to squash")
+ +        .prereq(commitSquashEvents);
+ +    commitNonSpecStalls
+ +        .name(name() + ".commitNonSpecStalls")
+ +        .desc("The number of times commit has been forced to stall to "
+ +              "communicate backwards")
+ +        .prereq(commitNonSpecStalls);
+ +    branchMispredicts
+ +        .name(name() + ".branchMispredicts")
+ +        .desc("The number of times a branch was mispredicted")
+ +        .prereq(branchMispredicts);
+ +    numCommittedDist
+ +        .init(0,commitWidth,1)
+ +        .name(name() + ".COM:committed_per_cycle")
+ +        .desc("Number of insts commited each cycle")
+ +        .flags(Stats::pdf)
+ +        ;
+ +
+ +    statComInst
+ +        .init(cpu->number_of_threads)
+ +        .name(name() + ".COM:count")
+ +        .desc("Number of instructions committed")
+ +        .flags(total)
+ +        ;
+ +
+ +    statComSwp
+ +        .init(cpu->number_of_threads)
+ +        .name(name() + ".COM:swp_count")
+ +        .desc("Number of s/w prefetches committed")
+ +        .flags(total)
+ +        ;
+ +
+ +    statComRefs
+ +        .init(cpu->number_of_threads)
+ +        .name(name() +  ".COM:refs")
+ +        .desc("Number of memory references committed")
+ +        .flags(total)
+ +        ;
+ +
+ +    statComLoads
+ +        .init(cpu->number_of_threads)
+ +        .name(name() +  ".COM:loads")
+ +        .desc("Number of loads committed")
+ +        .flags(total)
+ +        ;
+ +
+ +    statComMembars
+ +        .init(cpu->number_of_threads)
+ +        .name(name() +  ".COM:membars")
+ +        .desc("Number of memory barriers committed")
+ +        .flags(total)
+ +        ;
+ +
+ +    statComBranches
+ +        .init(cpu->number_of_threads)
+ +        .name(name() + ".COM:branches")
+ +        .desc("Number of branches committed")
+ +        .flags(total)
+ +        ;
+ +
+ +    commitEligible
+ +        .init(cpu->number_of_threads)
+ +        .name(name() + ".COM:bw_limited")
+ +        .desc("number of insts not committed due to BW limits")
+ +        .flags(total)
+ +        ;
+ +
+ +    commitEligibleSamples
+ +        .name(name() + ".COM:bw_lim_events")
+ +        .desc("number cycles where commit BW limit reached")
+ +        ;
+ +}
+ +
+ +template <class Impl>
+ +void
+ +DefaultCommit<Impl>::setCPU(O3CPU *cpu_ptr)
+ +{
+ +    DPRINTF(Commit, "Commit: Setting CPU pointer.\n");
+ +    cpu = cpu_ptr;
+ +
+ +    // Commit must broadcast the number of free entries it has at the start of
+ +    // the simulation, so it starts as active.
+ +    cpu->activateStage(O3CPU::CommitIdx);
+ +
+ +    trapLatency = cpu->cycles(trapLatency);
+ +}
+ +
+ +template <class Impl>
+ +void
+ +DefaultCommit<Impl>::setThreads(std::vector<Thread *> &threads)
+ +{
+ +    thread = threads;
+ +}
+ +
+ +template <class Impl>
+ +void
+ +DefaultCommit<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr)
+ +{
+ +    DPRINTF(Commit, "Commit: Setting time buffer pointer.\n");
+ +    timeBuffer = tb_ptr;
+ +
+ +    // Setup wire to send information back to IEW.
+ +    toIEW = timeBuffer->getWire(0);
+ +
+ +    // Setup wire to read data from IEW (for the ROB).
+ +    robInfoFromIEW = timeBuffer->getWire(-iewToCommitDelay);
+ +}
+ +
+ +template <class Impl>
+ +void
+ +DefaultCommit<Impl>::setFetchQueue(TimeBuffer<FetchStruct> *fq_ptr)
+ +{
+ +    DPRINTF(Commit, "Commit: Setting fetch queue pointer.\n");
+ +    fetchQueue = fq_ptr;
+ +
+ +    // Setup wire to get instructions from rename (for the ROB).
+ +    fromFetch = fetchQueue->getWire(-fetchToCommitDelay);
+ +}
+ +
+ +template <class Impl>
+ +void
+ +DefaultCommit<Impl>::setRenameQueue(TimeBuffer<RenameStruct> *rq_ptr)
+ +{
+ +    DPRINTF(Commit, "Commit: Setting rename queue pointer.\n");
+ +    renameQueue = rq_ptr;
+ +
+ +    // Setup wire to get instructions from rename (for the ROB).
+ +    fromRename = renameQueue->getWire(-renameToROBDelay);
+ +}
+ +
+ +template <class Impl>
+ +void
+ +DefaultCommit<Impl>::setIEWQueue(TimeBuffer<IEWStruct> *iq_ptr)
+ +{
+ +    DPRINTF(Commit, "Commit: Setting IEW queue pointer.\n");
+ +    iewQueue = iq_ptr;
+ +
+ +    // Setup wire to get instructions from IEW.
+ +    fromIEW = iewQueue->getWire(-iewToCommitDelay);
+ +}
+ +
+ +template <class Impl>
+ +void
+ +DefaultCommit<Impl>::setIEWStage(IEW *iew_stage)
+ +{
+ +    iewStage = iew_stage;
+ +}
+ +
+ +template<class Impl>
+ +void
+ +DefaultCommit<Impl>::setActiveThreads(std::list<unsigned> *at_ptr)
+ +{
+ +    DPRINTF(Commit, "Commit: Setting active threads list pointer.\n");
+ +    activeThreads = at_ptr;
+ +}
+ +
+ +template <class Impl>
+ +void
+ +DefaultCommit<Impl>::setRenameMap(RenameMap rm_ptr[])
+ +{
+ +    DPRINTF(Commit, "Setting rename map pointers.\n");
+ +
+ +    for (int i=0; i < numThreads; i++) {
+ +        renameMap[i] = &rm_ptr[i];
+ +    }
+ +}
+ +
+ +template <class Impl>
+ +void
+ +DefaultCommit<Impl>::setROB(ROB *rob_ptr)
+ +{
+ +    DPRINTF(Commit, "Commit: Setting ROB pointer.\n");
+ +    rob = rob_ptr;
+ +}
+ +
+ +template <class Impl>
+ +void
+ +DefaultCommit<Impl>::initStage()
+ +{
+ +    rob->setActiveThreads(activeThreads);
+ +    rob->resetEntries();
+ +
+ +    // Broadcast the number of free entries.
+ +    for (int i=0; i < numThreads; i++) {
+ +        toIEW->commitInfo[i].usedROB = true;
+ +        toIEW->commitInfo[i].freeROBEntries = rob->numFreeEntries(i);
+ +    }
+ +
+ +    cpu->activityThisCycle();
+ +}
+ +
+ +template <class Impl>
+ +bool
+ +DefaultCommit<Impl>::drain()
+ +{
+ +    drainPending = true;
+ +
+ +    // If it's already drained, return true.
+ +    if (rob->isEmpty() && !iewStage->hasStoresToWB()) {
+ +        cpu->signalDrained();
+ +        return true;
+ +    }
+ +
+ +    return false;
+ +}
+ +
+ +template <class Impl>
+ +void
+ +DefaultCommit<Impl>::switchOut()
+ +{
+ +    switchedOut = true;
+ +    drainPending = false;
+ +    rob->switchOut();
+ +}
+ +
+ +template <class Impl>
+ +void
+ +DefaultCommit<Impl>::resume()
+ +{
+ +    drainPending = false;
+ +}
+ +
+ +template <class Impl>
+ +void
+ +DefaultCommit<Impl>::takeOverFrom()
+ +{
+ +    switchedOut = false;
+ +    _status = Active;
+ +    _nextStatus = Inactive;
+ +    for (int i=0; i < numThreads; i++) {
+ +        commitStatus[i] = Idle;
+ +        changedROBNumEntries[i] = false;
+ +        trapSquash[i] = false;
+ +        tcSquash[i] = false;
+ +    }
+ +    squashCounter = 0;
+ +    rob->takeOverFrom();
+ +}
+ +
+ +template <class Impl>
+ +void
+ +DefaultCommit<Impl>::updateStatus()
+ +{
+ +    // reset ROB changed variable
+ +    std::list<unsigned>::iterator threads = (*activeThreads).begin();
+ +    while (threads != (*activeThreads).end()) {
+ +        unsigned tid = *threads++;
+ +        changedROBNumEntries[tid] = false;
+ +
+ +        // Also check if any of the threads has a trap pending
+ +        if (commitStatus[tid] == TrapPending ||
+ +            commitStatus[tid] == FetchTrapPending) {
+ +            _nextStatus = Active;
+ +        }
+ +    }
+ +
+ +    if (_nextStatus == Inactive && _status == Active) {
+ +        DPRINTF(Activity, "Deactivating stage.\n");
+ +        cpu->deactivateStage(O3CPU::CommitIdx);
+ +    } else if (_nextStatus == Active && _status == Inactive) {
+ +        DPRINTF(Activity, "Activating stage.\n");
+ +        cpu->activateStage(O3CPU::CommitIdx);
+ +    }
+ +
+ +    _status = _nextStatus;
+ +}
+ +
+ +template <class Impl>
+ +void
+ +DefaultCommit<Impl>::setNextStatus()
+ +{
+ +    int squashes = 0;
+ +
+ +    std::list<unsigned>::iterator threads = (*activeThreads).begin();
+ +
+ +    while (threads != (*activeThreads).end()) {
+ +        unsigned tid = *threads++;
+ +
+ +        if (commitStatus[tid] == ROBSquashing) {
+ +            squashes++;
+ +        }
+ +    }
+ +
+ +    squashCounter = squashes;
+ +
+ +    // If commit is currently squashing, then it will have activity for the
+ +    // next cycle. Set its next status as active.
+ +    if (squashCounter) {
+ +        _nextStatus = Active;
+ +    }
+ +}
+ +
+ +template <class Impl>
+ +bool
+ +DefaultCommit<Impl>::changedROBEntries()
+ +{
+ +    std::list<unsigned>::iterator threads = (*activeThreads).begin();
+ +
+ +    while (threads != (*activeThreads).end()) {
+ +        unsigned tid = *threads++;
+ +
+ +        if (changedROBNumEntries[tid]) {
+ +            return true;
+ +        }
+ +    }
+ +
+ +    return false;
+ +}
+ +
+ +template <class Impl>
+ +unsigned
+ +DefaultCommit<Impl>::numROBFreeEntries(unsigned tid)
+ +{
+ +    return rob->numFreeEntries(tid);
+ +}
+ +
+ +template <class Impl>
+ +void
+ +DefaultCommit<Impl>::generateTrapEvent(unsigned tid)
+ +{
+ +    DPRINTF(Commit, "Generating trap event for [tid:%i]\n", tid);
+ +
+ +    TrapEvent *trap = new TrapEvent(this, tid);
+ +
+ +    trap->schedule(curTick + trapLatency);
+ +
+ +    thread[tid]->trapPending = true;
+ +}
+ +
+ +template <class Impl>
+ +void
+ +DefaultCommit<Impl>::generateTCEvent(unsigned tid)
+ +{
+ +    DPRINTF(Commit, "Generating TC squash event for [tid:%i]\n", tid);
+ +
+ +    tcSquash[tid] = true;
+ +}
+ +
+ +template <class Impl>
+ +void
+ +DefaultCommit<Impl>::squashAll(unsigned tid)
+ +{
+ +    // If we want to include the squashing instruction in the squash,
+ +    // then use one older sequence number.
+ +    // Hopefully this doesn't mess things up.  Basically I want to squash
+ +    // all instructions of this thread.
+ +    InstSeqNum squashed_inst = rob->isEmpty() ?
+ +        0 : rob->readHeadInst(tid)->seqNum - 1;;
+ +
+ +    // All younger instructions will be squashed. Set the sequence
+ +    // number as the youngest instruction in the ROB (0 in this case.
+ +    // Hopefully nothing breaks.)
+ +    youngestSeqNum[tid] = 0;
+ +
+ +    rob->squash(squashed_inst, tid);
+ +    changedROBNumEntries[tid] = true;
+ +
+ +    // Send back the sequence number of the squashed instruction.
+ +    toIEW->commitInfo[tid].doneSeqNum = squashed_inst;
+ +
+ +    // Send back the squash signal to tell stages that they should
+ +    // squash.
+ +    toIEW->commitInfo[tid].squash = true;
+ +
+ +    // Send back the rob squashing signal so other stages know that
+ +    // the ROB is in the process of squashing.
+ +    toIEW->commitInfo[tid].robSquashing = true;
+ +
+ +    toIEW->commitInfo[tid].branchMispredict = false;
+ +
+ +    toIEW->commitInfo[tid].nextPC = PC[tid];
+ +}
+ +
+ +template <class Impl>
+ +void
+ +DefaultCommit<Impl>::squashFromTrap(unsigned tid)
+ +{
+ +    squashAll(tid);
+ +
+ +    DPRINTF(Commit, "Squashing from trap, restarting at PC %#x\n", PC[tid]);
+ +
+ +    thread[tid]->trapPending = false;
+ +    thread[tid]->inSyscall = false;
+ +
+ +    trapSquash[tid] = false;
+ +
+ +    commitStatus[tid] = ROBSquashing;
+ +    cpu->activityThisCycle();
+ +}
+ +
+ +template <class Impl>
+ +void
+ +DefaultCommit<Impl>::squashFromTC(unsigned tid)
+ +{
+ +    squashAll(tid);
+ +
+ +    DPRINTF(Commit, "Squashing from TC, restarting at PC %#x\n", PC[tid]);
+ +
+ +    thread[tid]->inSyscall = false;
+ +    assert(!thread[tid]->trapPending);
+ +
+ +    commitStatus[tid] = ROBSquashing;
+ +    cpu->activityThisCycle();
+ +
+ +    tcSquash[tid] = false;
+ +}
+ +
+ +template <class Impl>
+ +void
+ +DefaultCommit<Impl>::tick()
+ +{
+ +    wroteToTimeBuffer = false;
+ +    _nextStatus = Inactive;
+ +
+ +    if (drainPending && rob->isEmpty() && !iewStage->hasStoresToWB()) {
+ +        cpu->signalDrained();
+ +        drainPending = false;
+ +        return;
+ +    }
+ +
+ +    if ((*activeThreads).size() <= 0)
+ +        return;
+ +
+ +    std::list<unsigned>::iterator threads = (*activeThreads).begin();
+ +
+ +    // Check if any of the threads are done squashing.  Change the
+ +    // status if they are done.
+ +    while (threads != (*activeThreads).end()) {
+ +        unsigned tid = *threads++;
+ +
+ +        if (commitStatus[tid] == ROBSquashing) {
+ +
+ +            if (rob->isDoneSquashing(tid)) {
+ +                commitStatus[tid] = Running;
+ +            } else {
+ +                DPRINTF(Commit,"[tid:%u]: Still Squashing, cannot commit any"
+ +                        " insts this cycle.\n", tid);
+ +                rob->doSquash(tid);
+ +                toIEW->commitInfo[tid].robSquashing = true;
+ +                wroteToTimeBuffer = true;
+ +            }
+ +        }
+ +    }
+ +
+ +    commit();
+ +
+ +    markCompletedInsts();
+ +
+ +    threads = (*activeThreads).begin();
+ +
+ +    while (threads != (*activeThreads).end()) {
+ +        unsigned tid = *threads++;
+ +
+ +        if (!rob->isEmpty(tid) && rob->readHeadInst(tid)->readyToCommit()) {
+ +            // The ROB has more instructions it can commit. Its next status
+ +            // will be active.
+ +            _nextStatus = Active;
+ +
+ +            DynInstPtr inst = rob->readHeadInst(tid);
+ +
+ +            DPRINTF(Commit,"[tid:%i]: Instruction [sn:%lli] PC %#x is head of"
+ +                    " ROB and ready to commit\n",
+ +                    tid, inst->seqNum, inst->readPC());
+ +
+ +        } else if (!rob->isEmpty(tid)) {
+ +            DynInstPtr inst = rob->readHeadInst(tid);
+ +
+ +            DPRINTF(Commit,"[tid:%i]: Can't commit, Instruction [sn:%lli] PC "
+ +                    "%#x is head of ROB and not ready\n",
+ +                    tid, inst->seqNum, inst->readPC());
+ +        }
+ +
+ +        DPRINTF(Commit, "[tid:%i]: ROB has %d insts & %d free entries.\n",
+ +                tid, rob->countInsts(tid), rob->numFreeEntries(tid));
+ +    }
+ +
+ +
+ +    if (wroteToTimeBuffer) {
+ +        DPRINTF(Activity, "Activity This Cycle.\n");
+ +        cpu->activityThisCycle();
+ +    }
+ +
+ +    updateStatus();
+ +}
+ +
+ +template <class Impl>
+ +void
+ +DefaultCommit<Impl>::commit()
+ +{
+ +
+ +    //////////////////////////////////////
+ +    // Check for interrupts
+ +    //////////////////////////////////////
+ +
+ +#if FULL_SYSTEM
+ +    // Process interrupts if interrupts are enabled, not in PAL mode,
+ +    // and no other traps or external squashes are currently pending.
+ +    // @todo: Allow other threads to handle interrupts.
+ +    if (cpu->checkInterrupts &&
+ +        cpu->check_interrupts() &&
+ +        !cpu->inPalMode(readPC()) &&
+ +        !trapSquash[0] &&
+ +        !tcSquash[0]) {
+ +        // Tell fetch that there is an interrupt pending.  This will
+ +        // make fetch wait until it sees a non PAL-mode PC, at which
+ +        // point it stops fetching instructions.
+ +        toIEW->commitInfo[0].interruptPending = true;
+ +
+ +        // Wait until the ROB is empty and all stores have drained in
+ +        // order to enter the interrupt.
+ +        if (rob->isEmpty() && !iewStage->hasStoresToWB()) {
+ +            // Not sure which thread should be the one to interrupt.  For now
+ +            // always do thread 0.
+ +            assert(!thread[0]->inSyscall);
+ +            thread[0]->inSyscall = true;
+ +
+ +            // CPU will handle implementation of the interrupt.
+ +            cpu->processInterrupts();
+ +
+ +            // Now squash or record that I need to squash this cycle.
+ +            commitStatus[0] = TrapPending;
+ +
+ +            // Exit state update mode to avoid accidental updating.
+ +            thread[0]->inSyscall = false;
+ +
+ +            // Generate trap squash event.
+ +            generateTrapEvent(0);
+ +
+ +            toIEW->commitInfo[0].clearInterrupt = true;
+ +
+ +            DPRINTF(Commit, "Interrupt detected.\n");
+ +        } else {
+ +            DPRINTF(Commit, "Interrupt pending, waiting for ROB to empty.\n");
+ +        }
+ +    }
+ +#endif // FULL_SYSTEM
+ +
+ +    ////////////////////////////////////
+ +    // Check for any possible squashes, handle them first
+ +    ////////////////////////////////////
+ +
+ +    std::list<unsigned>::iterator threads = (*activeThreads).begin();
+ +
+ +    while (threads != (*activeThreads).end()) {
+ +        unsigned tid = *threads++;
+ +
+ +        // Not sure which one takes priority.  I think if we have
+ +        // both, that's a bad sign.
+ +        if (trapSquash[tid] == true) {
+ +            assert(!tcSquash[tid]);
+ +            squashFromTrap(tid);
+ +        } else if (tcSquash[tid] == true) {
+ +            squashFromTC(tid);
+ +        }
+ +
+ +        // Squashed sequence number must be older than youngest valid
+ +        // instruction in the ROB. This prevents squashes from younger
+ +        // instructions overriding squashes from older instructions.
+ +        if (fromIEW->squash[tid] &&
+ +            commitStatus[tid] != TrapPending &&
+ +            fromIEW->squashedSeqNum[tid] <= youngestSeqNum[tid]) {
+ +
+ +            DPRINTF(Commit, "[tid:%i]: Squashing due to PC %#x [sn:%i]\n",
+ +                    tid,
+ +                    fromIEW->mispredPC[tid],
+ +                    fromIEW->squashedSeqNum[tid]);
+ +
+ +            DPRINTF(Commit, "[tid:%i]: Redirecting to PC %#x\n",
+ +                    tid,
+ +                    fromIEW->nextPC[tid]);
+ +
+ +            commitStatus[tid] = ROBSquashing;
+ +
+ +            // If we want to include the squashing instruction in the squash,
+ +            // then use one older sequence number.
+ +            InstSeqNum squashed_inst = fromIEW->squashedSeqNum[tid];
+ +
+ +#if ISA_HAS_DELAY_SLOT
+ +            InstSeqNum bdelay_done_seq_num;
+ +            bool squash_bdelay_slot;
+ +
+ +            if (fromIEW->branchMispredict[tid]) {
+ +                if (fromIEW->branchTaken[tid] &&
+ +                    fromIEW->condDelaySlotBranch[tid]) {
+ +                    DPRINTF(Commit, "[tid:%i]: Cond. delay slot branch"
+ +                            "mispredicted as taken. Squashing after previous "
+ +                            "inst, [sn:%i]\n",
+ +                            tid, squashed_inst);
+ +                     bdelay_done_seq_num = squashed_inst;
+ +                     squash_bdelay_slot = true;
+ +                } else {
+ +                    DPRINTF(Commit, "[tid:%i]: Branch Mispredict. Squashing "
+ +                            "after delay slot [sn:%i]\n", tid, squashed_inst+1);
+ +                    bdelay_done_seq_num = squashed_inst + 1;
+ +                    squash_bdelay_slot = false;
+ +                }
+ +            } else {
+ +                bdelay_done_seq_num = squashed_inst;
+ +            }
+ +#endif
+ +
+ +            if (fromIEW->includeSquashInst[tid] == true) {
+ +                squashed_inst--;
+ +#if ISA_HAS_DELAY_SLOT
+ +                bdelay_done_seq_num--;
+ +#endif
+ +            }
+ +            // All younger instructions will be squashed. Set the sequence
+ +            // number as the youngest instruction in the ROB.
+ +            youngestSeqNum[tid] = squashed_inst;
+ +
+ +#if ISA_HAS_DELAY_SLOT
+ +            rob->squash(bdelay_done_seq_num, tid);
+ +            toIEW->commitInfo[tid].squashDelaySlot = squash_bdelay_slot;
+ +            toIEW->commitInfo[tid].bdelayDoneSeqNum = bdelay_done_seq_num;
+ +#else
+ +            rob->squash(squashed_inst, tid);
+ +            toIEW->commitInfo[tid].squashDelaySlot = true;
+ +#endif
+ +            changedROBNumEntries[tid] = true;
+ +
+ +            toIEW->commitInfo[tid].doneSeqNum = squashed_inst;
+ +
+ +            toIEW->commitInfo[tid].squash = true;
+ +
+ +            // Send back the rob squashing signal so other stages know that
+ +            // the ROB is in the process of squashing.
+ +            toIEW->commitInfo[tid].robSquashing = true;
+ +
+ +            toIEW->commitInfo[tid].branchMispredict =
+ +                fromIEW->branchMispredict[tid];
+ +
+ +            toIEW->commitInfo[tid].branchTaken =
+ +                fromIEW->branchTaken[tid];
+ +
+ +            toIEW->commitInfo[tid].nextPC = fromIEW->nextPC[tid];
+ +
+ +            toIEW->commitInfo[tid].mispredPC = fromIEW->mispredPC[tid];
+ +
+ +            if (toIEW->commitInfo[tid].branchMispredict) {
+ +                ++branchMispredicts;
+ +            }
+ +        }
+ +
+ +    }
+ +
+ +    setNextStatus();
+ +
+ +    if (squashCounter != numThreads) {
+ +        // If we're not currently squashing, then get instructions.
+ +        getInsts();
+ +
+ +        // Try to commit any instructions.
+ +        commitInsts();
+ +    } else {
+ +#if ISA_HAS_DELAY_SLOT
+ +        skidInsert();
+ +#endif
+ +    }
+ +
+ +    //Check for any activity
+ +    threads = (*activeThreads).begin();
+ +
+ +    while (threads != (*activeThreads).end()) {
+ +        unsigned tid = *threads++;
+ +
+ +        if (changedROBNumEntries[tid]) {
+ +            toIEW->commitInfo[tid].usedROB = true;
+ +            toIEW->commitInfo[tid].freeROBEntries = rob->numFreeEntries(tid);
+ +
+ +            if (rob->isEmpty(tid)) {
+ +                toIEW->commitInfo[tid].emptyROB = true;
+ +            }
+ +
+ +            wroteToTimeBuffer = true;
+ +            changedROBNumEntries[tid] = false;
+ +        }
+ +    }
+ +}
+ +
+ +template <class Impl>
+ +void
+ +DefaultCommit<Impl>::commitInsts()
+ +{
+ +    ////////////////////////////////////
+ +    // Handle commit
+ +    // Note that commit will be handled prior to putting new
+ +    // instructions in the ROB so that the ROB only tries to commit
+ +    // instructions it has in this current cycle, and not instructions
+ +    // it is writing in during this cycle.  Can't commit and squash
+ +    // things at the same time...
+ +    ////////////////////////////////////
+ +
+ +    DPRINTF(Commit, "Trying to commit instructions in the ROB.\n");
+ +
+ +    unsigned num_committed = 0;
+ +
+ +    DynInstPtr head_inst;
+ +
+ +    // Commit as many instructions as possible until the commit bandwidth
+ +    // limit is reached, or it becomes impossible to commit any more.
+ +    while (num_committed < commitWidth) {
+ +        int commit_thread = getCommittingThread();
+ +
+ +        if (commit_thread == -1 || !rob->isHeadReady(commit_thread))
+ +            break;
+ +
+ +        head_inst = rob->readHeadInst(commit_thread);
+ +
+ +        int tid = head_inst->threadNumber;
+ +
+ +        assert(tid == commit_thread);
+ +
+ +        DPRINTF(Commit, "Trying to commit head instruction, [sn:%i] [tid:%i]\n",
+ +                head_inst->seqNum, tid);
+ +
+ +        // If the head instruction is squashed, it is ready to retire
+ +        // (be removed from the ROB) at any time.
+ +        if (head_inst->isSquashed()) {
+ +
+ +            DPRINTF(Commit, "Retiring squashed instruction from "
+ +                    "ROB.\n");
+ +
+ +            rob->retireHead(commit_thread);
+ +
+ +            ++commitSquashedInsts;
+ +
+ +            // Record that the number of ROB entries has changed.
+ +            changedROBNumEntries[tid] = true;
+ +        } else {
+ +            PC[tid] = head_inst->readPC();
+ +            nextPC[tid] = head_inst->readNextPC();
+ +            nextNPC[tid] = head_inst->readNextNPC();
+ +
+ +            // Increment the total number of non-speculative instructions
+ +            // executed.
+ +            // Hack for now: it really shouldn't happen until after the
+ +            // commit is deemed to be successful, but this count is needed
+ +            // for syscalls.
+ +            thread[tid]->funcExeInst++;
+ +
+ +            // Try to commit the head instruction.
+ +            bool commit_success = commitHead(head_inst, num_committed);
+ +
+ +            if (commit_success) {
+ +                ++num_committed;
+ +
+ +                changedROBNumEntries[tid] = true;
+ +
+ +                // Set the doneSeqNum to the youngest committed instruction.
+ +                toIEW->commitInfo[tid].doneSeqNum = head_inst->seqNum;
+ +
+ +                ++commitCommittedInsts;
+ +
+ +                // To match the old model, don't count nops and instruction
+ +                // prefetches towards the total commit count.
+ +                if (!head_inst->isNop() && !head_inst->isInstPrefetch()) {
+ +                    cpu->instDone(tid);
+ +                }
+ +
+ +                PC[tid] = nextPC[tid];
+ +#if ISA_HAS_DELAY_SLOT
+ +                nextPC[tid] = nextNPC[tid];
+ +                nextNPC[tid] = nextNPC[tid] + sizeof(TheISA::MachInst);
+ +#else
+ +                nextPC[tid] = nextPC[tid] + sizeof(TheISA::MachInst);
+ +#endif
+ +
+ +#if FULL_SYSTEM
+ +                int count = 0;
+ +                Addr oldpc;
+ +                do {
+ +                    // Debug statement.  Checks to make sure we're not
+ +                    // currently updating state while handling PC events.
+ +                    if (count == 0)
+ +                        assert(!thread[tid]->inSyscall &&
+ +                               !thread[tid]->trapPending);
+ +                    oldpc = PC[tid];
+ +                    cpu->system->pcEventQueue.service(
+ +                        thread[tid]->getTC());
+ +                    count++;
+ +                } while (oldpc != PC[tid]);
+ +                if (count > 1) {
+ +                    DPRINTF(Commit, "PC skip function event, stopping commit\n");
+ +                    break;
+ +                }
+ +#endif
+ +            } else {
+ +                DPRINTF(Commit, "Unable to commit head instruction PC:%#x "
+ +                        "[tid:%i] [sn:%i].\n",
+ +                        head_inst->readPC(), tid ,head_inst->seqNum);
+ +                break;
+ +            }
+ +        }
+ +    }
+ +
+ +    DPRINTF(CommitRate, "%i\n", num_committed);
+ +    numCommittedDist.sample(num_committed);
+ +
+ +    if (num_committed == commitWidth) {
+ +        commitEligibleSamples++;
+ +    }
+ +}
+ +
+ +template <class Impl>
+ +bool
+ +DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
+ +{
+ +    assert(head_inst);
+ +
+ +    int tid = head_inst->threadNumber;
+ +
+ +    // If the instruction is not executed yet, then it will need extra
+ +    // handling.  Signal backwards that it should be executed.
+ +    if (!head_inst->isExecuted()) {
+ +        // Keep this number correct.  We have not yet actually executed
+ +        // and committed this instruction.
+ +        thread[tid]->funcExeInst--;
+ +
+ +        head_inst->setAtCommit();
+ +
+ +        if (head_inst->isNonSpeculative() ||
+ +            head_inst->isStoreConditional() ||
+ +            head_inst->isMemBarrier() ||
+ +            head_inst->isWriteBarrier()) {
+ +
+ +            DPRINTF(Commit, "Encountered a barrier or non-speculative "
+ +                    "instruction [sn:%lli] at the head of the ROB, PC %#x.\n",
+ +                    head_inst->seqNum, head_inst->readPC());
+ +
+ +#if !FULL_SYSTEM
+ +            // Hack to make sure syscalls/memory barriers/quiesces
+ +            // aren't executed until all stores write back their data.
+ +            // This direct communication shouldn't be used for
+ +            // anything other than this.
+ +            if (inst_num > 0 || iewStage->hasStoresToWB())
+ +#else
+ +            if ((head_inst->isMemBarrier() || head_inst->isWriteBarrier() ||
+ +                    head_inst->isQuiesce()) &&
+ +                iewStage->hasStoresToWB())
+ +#endif
+ +            {
+ +                DPRINTF(Commit, "Waiting for all stores to writeback.\n");
+ +                return false;
+ +            }
+ +
+ +            toIEW->commitInfo[tid].nonSpecSeqNum = head_inst->seqNum;
+ +
+ +            // Change the instruction so it won't try to commit again until
+ +            // it is executed.
+ +            head_inst->clearCanCommit();
+ +
+ +            ++commitNonSpecStalls;
+ +
+ +            return false;
+ +        } else if (head_inst->isLoad()) {
+ +            DPRINTF(Commit, "[sn:%lli]: Uncached load, PC %#x.\n",
+ +                    head_inst->seqNum, head_inst->readPC());
+ +
+ +            // Send back the non-speculative instruction's sequence
+ +            // number.  Tell the lsq to re-execute the load.
+ +            toIEW->commitInfo[tid].nonSpecSeqNum = head_inst->seqNum;
+ +            toIEW->commitInfo[tid].uncached = true;
+ +            toIEW->commitInfo[tid].uncachedLoad = head_inst;
+ +
+ +            head_inst->clearCanCommit();
+ +
+ +            return false;
+ +        } else {
+ +            panic("Trying to commit un-executed instruction "
+ +                  "of unknown type!\n");
+ +        }
+ +    }
+ +
+ +    if (head_inst->isThreadSync()) {
+ +        // Not handled for now.
+ +        panic("Thread sync instructions are not handled yet.\n");
+ +    }
+ +
+ +    // Stores mark themselves as completed.
+ +    if (!head_inst->isStore()) {
+ +        head_inst->setCompleted();
+ +    }
+ +
+ +#if USE_CHECKER
+ +    // Use checker prior to updating anything due to traps or PC
+ +    // based events.
+ +    if (cpu->checker) {
+ +        cpu->checker->verify(head_inst);
+ +    }
+ +#endif
+ +
+ +    // Check if the instruction caused a fault.  If so, trap.
+ +    Fault inst_fault = head_inst->getFault();
+ +
+ +    // DTB will sometimes need the machine instruction for when
+ +    // faults happen.  So we will set it here, prior to the DTB
+ +    // possibly needing it for its fault.
+ +    thread[tid]->setInst(
+ +        static_cast<TheISA::MachInst>(head_inst->staticInst->machInst));
+ +
+ +    if (inst_fault != NoFault) {
+ +        head_inst->setCompleted();
+ +        DPRINTF(Commit, "Inst [sn:%lli] PC %#x has a fault\n",
+ +                head_inst->seqNum, head_inst->readPC());
+ +
+ +        if (iewStage->hasStoresToWB() || inst_num > 0) {
+ +            DPRINTF(Commit, "Stores outstanding, fault must wait.\n");
+ +            return false;
+ +        }
+ +
+ +#if USE_CHECKER
+ +        if (cpu->checker && head_inst->isStore()) {
+ +            cpu->checker->verify(head_inst);
+ +        }
+ +#endif
+ +
+ +        assert(!thread[tid]->inSyscall);
+ +
+ +        // Mark that we're in state update mode so that the trap's
+ +        // execution doesn't generate extra squashes.
+ +        thread[tid]->inSyscall = true;
+ +
+ +        // Execute the trap.  Although it's slightly unrealistic in
+ +        // terms of timing (as it doesn't wait for the full timing of
+ +        // the trap event to complete before updating state), it's
+ +        // needed to update the state as soon as possible.  This
+ +        // prevents external agents from changing any specific state
+ +        // that the trap need.
+ +        cpu->trap(inst_fault, tid);
+ +
+ +        // Exit state update mode to avoid accidental updating.
+ +        thread[tid]->inSyscall = false;
+ +
+ +        commitStatus[tid] = TrapPending;
+ +
+ +        // Generate trap squash event.
+ +        generateTrapEvent(tid);
++//        warn("%lli fault (%d) handled @ PC %08p", curTick, inst_fault->name(), head_inst->readPC());
+ +        return false;
+ +    }
+ +
+ +    updateComInstStats(head_inst);
+ +
++#if FULL_SYSTEM
++    if (thread[tid]->profile) {
++//        bool usermode =
++//            (cpu->readMiscReg(AlphaISA::IPR_DTB_CM, tid) & 0x18) != 0;
++//        thread[tid]->profilePC = usermode ? 1 : head_inst->readPC();
++        thread[tid]->profilePC = head_inst->readPC();
++        ProfileNode *node = thread[tid]->profile->consume(thread[tid]->getXCProxy(),
++                                                          head_inst->staticInst);
++
++        if (node)
++            thread[tid]->profileNode = node;
++    }
++#endif
++
+ +    if (head_inst->traceData) {
+ +        head_inst->traceData->setFetchSeq(head_inst->seqNum);
+ +        head_inst->traceData->setCPSeq(thread[tid]->numInst);
+ +        head_inst->traceData->finalize();
+ +        head_inst->traceData = NULL;
+ +    }
+ +
+ +    // Update the commit rename map
+ +    for (int i = 0; i < head_inst->numDestRegs(); i++) {
+ +        renameMap[tid]->setEntry(head_inst->destRegIdx(i),
+ +                                 head_inst->renamedDestRegIdx(i));
+ +    }
+ +
++    if (head_inst->isCopy())
++        panic("Should not commit any copy instructions!");
++
+ +    // Finally clear the head ROB entry.
+ +    rob->retireHead(tid);
+ +
+ +    // Return true to indicate that we have committed an instruction.
+ +    return true;
+ +}
+ +
+ +template <class Impl>
+ +void
+ +DefaultCommit<Impl>::getInsts()
+ +{
+ +    DPRINTF(Commit, "Getting instructions from Rename stage.\n");
+ +
+ +#if ISA_HAS_DELAY_SLOT
+ +    // Read any renamed instructions and place them into the ROB.
+ +    int insts_to_process = std::min((int)renameWidth,
+ +                               (int)(fromRename->size + skidBuffer.size()));
+ +    int rename_idx = 0;
+ +
+ +    DPRINTF(Commit, "%i insts available to process. Rename Insts:%i "
+ +            "SkidBuffer Insts:%i\n", insts_to_process, fromRename->size,
+ +            skidBuffer.size());
+ +#else
+ +    // Read any renamed instructions and place them into the ROB.
+ +    int insts_to_process = std::min((int)renameWidth, fromRename->size);
+ +#endif
+ +
+ +
+ +    for (int inst_num = 0; inst_num < insts_to_process; ++inst_num) {
+ +        DynInstPtr inst;
+ +
+ +#if ISA_HAS_DELAY_SLOT
+ +        // Get insts from skidBuffer or from Rename
+ +        if (skidBuffer.size() > 0) {
+ +            DPRINTF(Commit, "Grabbing skidbuffer inst.\n");
+ +            inst = skidBuffer.front();
+ +            skidBuffer.pop();
+ +        } else {
+ +            DPRINTF(Commit, "Grabbing rename inst.\n");
+ +            inst = fromRename->insts[rename_idx++];
+ +        }
+ +#else
+ +        inst = fromRename->insts[inst_num];
+ +#endif
+ +        int tid = inst->threadNumber;
+ +
+ +        if (!inst->isSquashed() &&
+ +            commitStatus[tid] != ROBSquashing) {
+ +            changedROBNumEntries[tid] = true;
+ +
+ +            DPRINTF(Commit, "Inserting PC %#x [sn:%i] [tid:%i] into ROB.\n",
+ +                    inst->readPC(), inst->seqNum, tid);
+ +
+ +            rob->insertInst(inst);
+ +
+ +            assert(rob->getThreadEntries(tid) <= rob->getMaxEntries(tid));
+ +
+ +            youngestSeqNum[tid] = inst->seqNum;
+ +        } else {
+ +            DPRINTF(Commit, "Instruction PC %#x [sn:%i] [tid:%i] was "
+ +                    "squashed, skipping.\n",
+ +                    inst->readPC(), inst->seqNum, tid);
+ +        }
+ +    }
+ +
+ +#if ISA_HAS_DELAY_SLOT
+ +    if (rename_idx < fromRename->size) {
+ +        DPRINTF(Commit,"Placing Rename Insts into skidBuffer.\n");
+ +
+ +        for (;
+ +             rename_idx < fromRename->size;
+ +             rename_idx++) {
+ +            DynInstPtr inst = fromRename->insts[rename_idx];
+ +            int tid = inst->threadNumber;
+ +
+ +            if (!inst->isSquashed()) {
+ +                DPRINTF(Commit, "Inserting PC %#x [sn:%i] [tid:%i] into ",
+ +                        "skidBuffer.\n", inst->readPC(), inst->seqNum, tid);
+ +                skidBuffer.push(inst);
+ +            } else {
+ +                DPRINTF(Commit, "Instruction PC %#x [sn:%i] [tid:%i] was "
+ +                        "squashed, skipping.\n",
+ +                        inst->readPC(), inst->seqNum, tid);
+ +            }
+ +        }
+ +    }
+ +#endif
+ +
+ +}
+ +
+ +template <class Impl>
+ +void
+ +DefaultCommit<Impl>::skidInsert()
+ +{
+ +    DPRINTF(Commit, "Attempting to any instructions from rename into "
+ +            "skidBuffer.\n");
+ +
+ +    for (int inst_num = 0; inst_num < fromRename->size; ++inst_num) {
+ +        DynInstPtr inst = fromRename->insts[inst_num];
+ +        int tid = inst->threadNumber;
+ +
+ +        if (!inst->isSquashed()) {
+ +            DPRINTF(Commit, "Inserting PC %#x [sn:%i] [tid:%i] into ",
+ +                    "skidBuffer.\n", inst->readPC(), inst->seqNum, tid);
+ +            skidBuffer.push(inst);
+ +        } else {
+ +            DPRINTF(Commit, "Instruction PC %#x [sn:%i] [tid:%i] was "
+ +                    "squashed, skipping.\n",
+ +                    inst->readPC(), inst->seqNum, tid);
+ +        }
+ +    }
+ +}
+ +
+ +template <class Impl>
+ +void
+ +DefaultCommit<Impl>::markCompletedInsts()
+ +{
+ +    // Grab completed insts out of the IEW instruction queue, and mark
+ +    // instructions completed within the ROB.
+ +    for (int inst_num = 0;
+ +         inst_num < fromIEW->size && fromIEW->insts[inst_num];
+ +         ++inst_num)
+ +    {
+ +        if (!fromIEW->insts[inst_num]->isSquashed()) {
+ +            DPRINTF(Commit, "[tid:%i]: Marking PC %#x, [sn:%lli] ready "
+ +                    "within ROB.\n",
+ +                    fromIEW->insts[inst_num]->threadNumber,
+ +                    fromIEW->insts[inst_num]->readPC(),
+ +                    fromIEW->insts[inst_num]->seqNum);
+ +
+ +            // Mark the instruction as ready to commit.
+ +            fromIEW->insts[inst_num]->setCanCommit();
+ +        }
+ +    }
+ +}
+ +
+ +template <class Impl>
+ +bool
+ +DefaultCommit<Impl>::robDoneSquashing()
+ +{
+ +    std::list<unsigned>::iterator threads = (*activeThreads).begin();
+ +
+ +    while (threads != (*activeThreads).end()) {
+ +        unsigned tid = *threads++;
+ +
+ +        if (!rob->isDoneSquashing(tid))
+ +            return false;
+ +    }
+ +
+ +    return true;
+ +}
+ +
+ +template <class Impl>
+ +void
+ +DefaultCommit<Impl>::updateComInstStats(DynInstPtr &inst)
+ +{
+ +    unsigned thread = inst->threadNumber;
+ +
+ +    //
+ +    //  Pick off the software prefetches
+ +    //
+ +#ifdef TARGET_ALPHA
+ +    if (inst->isDataPrefetch()) {
+ +        statComSwp[thread]++;
+ +    } else {
+ +        statComInst[thread]++;
+ +    }
+ +#else
+ +    statComInst[thread]++;
+ +#endif
+ +
+ +    //
+ +    //  Control Instructions
+ +    //
+ +    if (inst->isControl())
+ +        statComBranches[thread]++;
+ +
+ +    //
+ +    //  Memory references
+ +    //
+ +    if (inst->isMemRef()) {
+ +        statComRefs[thread]++;
+ +
+ +        if (inst->isLoad()) {
+ +            statComLoads[thread]++;
+ +        }
+ +    }
+ +
+ +    if (inst->isMemBarrier()) {
+ +        statComMembars[thread]++;
+ +    }
+ +}
+ +
+ +////////////////////////////////////////
+ +//                                    //
+ +//  SMT COMMIT POLICY MAINTAINED HERE //
+ +//                                    //
+ +////////////////////////////////////////
+ +template <class Impl>
+ +int
+ +DefaultCommit<Impl>::getCommittingThread()
+ +{
+ +    if (numThreads > 1) {
+ +        switch (commitPolicy) {
+ +
+ +          case Aggressive:
+ +            //If Policy is Aggressive, commit will call
+ +            //this function multiple times per
+ +            //cycle
+ +            return oldestReady();
+ +
+ +          case RoundRobin:
+ +            return roundRobin();
+ +
+ +          case OldestReady:
+ +            return oldestReady();
+ +
+ +          default:
+ +            return -1;
+ +        }
+ +    } else {
+ +        int tid = (*activeThreads).front();
+ +
+ +        if (commitStatus[tid] == Running ||
+ +            commitStatus[tid] == Idle ||
+ +            commitStatus[tid] == FetchTrapPending) {
+ +            return tid;
+ +        } else {
+ +            return -1;
+ +        }
+ +    }
+ +}
+ +
+ +template<class Impl>
+ +int
+ +DefaultCommit<Impl>::roundRobin()
+ +{
+ +    std::list<unsigned>::iterator pri_iter = priority_list.begin();
+ +    std::list<unsigned>::iterator end      = priority_list.end();
+ +
+ +    while (pri_iter != end) {
+ +        unsigned tid = *pri_iter;
+ +
+ +        if (commitStatus[tid] == Running ||
+ +            commitStatus[tid] == Idle ||
+ +            commitStatus[tid] == FetchTrapPending) {
+ +
+ +            if (rob->isHeadReady(tid)) {
+ +                priority_list.erase(pri_iter);
+ +                priority_list.push_back(tid);
+ +
+ +                return tid;
+ +            }
+ +        }
+ +
+ +        pri_iter++;
+ +    }
+ +
+ +    return -1;
+ +}
+ +
+ +template<class Impl>
+ +int
+ +DefaultCommit<Impl>::oldestReady()
+ +{
+ +    unsigned oldest = 0;
+ +    bool first = true;
+ +
+ +    std::list<unsigned>::iterator threads = (*activeThreads).begin();
+ +
+ +    while (threads != (*activeThreads).end()) {
+ +        unsigned tid = *threads++;
+ +
+ +        if (!rob->isEmpty(tid) &&
+ +            (commitStatus[tid] == Running ||
+ +             commitStatus[tid] == Idle ||
+ +             commitStatus[tid] == FetchTrapPending)) {
+ +
+ +            if (rob->isHeadReady(tid)) {
+ +
+ +                DynInstPtr head_inst = rob->readHeadInst(tid);
+ +
+ +                if (first) {
+ +                    oldest = tid;
+ +                    first = false;
+ +                } else if (head_inst->seqNum < oldest) {
+ +                    oldest = tid;
+ +                }
+ +            }
+ +        }
+ +    }
+ +
+ +    if (!first) {
+ +        return oldest;
+ +    } else {
+ +        return -1;
+ +    }
+ +}
diff --cc src/cpu/o3/cpu.cc

index 19ab7f4c5dc7945aea1a481e2afdedd6e1675638,0000000000000000000000000000000000000000..4279df6f7eb971c1ad3aec77958ef6ecd395537c

mode 100644,000000..100644
--- 1/src/cpu/o3/cpu.cc
--- /dev/null
+++ b/src/cpu/o3/cpu.cc
@@@ -1,1394 -1,0 +1,1437 @@@
+ +/*
+ + * Copyright (c) 2004-2006 The Regents of The University of Michigan
+ + * All rights reserved.
+ + *
+ + * Redistribution and use in source and binary forms, with or without
+ + * modification, are permitted provided that the following conditions are
+ + * met: redistributions of source code must retain the above copyright
+ + * notice, this list of conditions and the following disclaimer;
+ + * redistributions in binary form must reproduce the above copyright
+ + * notice, this list of conditions and the following disclaimer in the
+ + * documentation and/or other materials provided with the distribution;
+ + * neither the name of the copyright holders nor the names of its
+ + * contributors may be used to endorse or promote products derived from
+ + * this software without specific prior written permission.
+ + *
+ + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ + *
+ + * Authors: Kevin Lim
+ + *          Korey Sewell
+ + */
+ +
+ +#include "config/full_system.hh"
+ +#include "config/use_checker.hh"
+ +
+ +#if FULL_SYSTEM
++#include "cpu/quiesce_event.hh"
+ +#include "sim/system.hh"
+ +#else
+ +#include "sim/process.hh"
+ +#endif
+ +
+ +#include "cpu/activity.hh"
+ +#include "cpu/simple_thread.hh"
+ +#include "cpu/thread_context.hh"
+ +#include "cpu/o3/isa_specific.hh"
+ +#include "cpu/o3/cpu.hh"
+ +
+ +#include "sim/root.hh"
+ +#include "sim/stat_control.hh"
+ +
+ +#if USE_CHECKER
+ +#include "cpu/checker/cpu.hh"
+ +#endif
+ +
+ +using namespace std;
+ +using namespace TheISA;
+ +
+ +BaseO3CPU::BaseO3CPU(Params *params)
+ +    : BaseCPU(params), cpu_id(0)
+ +{
+ +}
+ +
+ +void
+ +BaseO3CPU::regStats()
+ +{
+ +    BaseCPU::regStats();
+ +}
+ +
+ +template <class Impl>
+ +FullO3CPU<Impl>::TickEvent::TickEvent(FullO3CPU<Impl> *c)
+ +    : Event(&mainEventQueue, CPU_Tick_Pri), cpu(c)
+ +{
+ +}
+ +
+ +template <class Impl>
+ +void
+ +FullO3CPU<Impl>::TickEvent::process()
+ +{
+ +    cpu->tick();
+ +}
+ +
+ +template <class Impl>
+ +const char *
+ +FullO3CPU<Impl>::TickEvent::description()
+ +{
+ +    return "FullO3CPU tick event";
+ +}
+ +
+ +template <class Impl>
+ +FullO3CPU<Impl>::ActivateThreadEvent::ActivateThreadEvent()
+ +    : Event(&mainEventQueue, CPU_Tick_Pri)
+ +{
+ +}
+ +
+ +template <class Impl>
+ +void
+ +FullO3CPU<Impl>::ActivateThreadEvent::init(int thread_num,
+ +                                           FullO3CPU<Impl> *thread_cpu)
+ +{
+ +    tid = thread_num;
+ +    cpu = thread_cpu;
+ +}
+ +
+ +template <class Impl>
+ +void
+ +FullO3CPU<Impl>::ActivateThreadEvent::process()
+ +{
+ +    cpu->activateThread(tid);
+ +}
+ +
+ +template <class Impl>
+ +const char *
+ +FullO3CPU<Impl>::ActivateThreadEvent::description()
+ +{
+ +    return "FullO3CPU \"Activate Thread\" event";
+ +}
+ +
+ +template <class Impl>
+ +FullO3CPU<Impl>::DeallocateContextEvent::DeallocateContextEvent()
+ +    : Event(&mainEventQueue, CPU_Tick_Pri)
+ +{
+ +}
+ +
+ +template <class Impl>
+ +void
+ +FullO3CPU<Impl>::DeallocateContextEvent::init(int thread_num,
+ +                                           FullO3CPU<Impl> *thread_cpu)
+ +{
+ +    tid = thread_num;
+ +    cpu = thread_cpu;
+ +}
+ +
+ +template <class Impl>
+ +void
+ +FullO3CPU<Impl>::DeallocateContextEvent::process()
+ +{
+ +    cpu->deactivateThread(tid);
+ +    cpu->removeThread(tid);
+ +}
+ +
+ +template <class Impl>
+ +const char *
+ +FullO3CPU<Impl>::DeallocateContextEvent::description()
+ +{
+ +    return "FullO3CPU \"Deallocate Context\" event";
+ +}
+ +
+ +template <class Impl>
+ +FullO3CPU<Impl>::FullO3CPU(Params *params)
+ +    : BaseO3CPU(params),
+ +      tickEvent(this),
+ +      removeInstsThisCycle(false),
+ +      fetch(params),
+ +      decode(params),
+ +      rename(params),
+ +      iew(params),
+ +      commit(params),
+ +
+ +      regFile(params->numPhysIntRegs, params->numPhysFloatRegs),
+ +
+ +      freeList(params->numberOfThreads,
+ +               TheISA::NumIntRegs, params->numPhysIntRegs,
+ +               TheISA::NumFloatRegs, params->numPhysFloatRegs),
+ +
+ +      rob(params->numROBEntries, params->squashWidth,
+ +          params->smtROBPolicy, params->smtROBThreshold,
+ +          params->numberOfThreads),
+ +
+ +      scoreboard(params->numberOfThreads,
+ +                 TheISA::NumIntRegs, params->numPhysIntRegs,
+ +                 TheISA::NumFloatRegs, params->numPhysFloatRegs,
+ +                 TheISA::NumMiscRegs * number_of_threads,
+ +                 TheISA::ZeroReg),
+ +
+ +      timeBuffer(params->backComSize, params->forwardComSize),
+ +      fetchQueue(params->backComSize, params->forwardComSize),
+ +      decodeQueue(params->backComSize, params->forwardComSize),
+ +      renameQueue(params->backComSize, params->forwardComSize),
+ +      iewQueue(params->backComSize, params->forwardComSize),
+ +      activityRec(NumStages,
+ +                  params->backComSize + params->forwardComSize,
+ +                  params->activity),
+ +
+ +      globalSeqNum(1),
+ +#if FULL_SYSTEM
+ +      system(params->system),
+ +      physmem(system->physmem),
+ +#endif // FULL_SYSTEM
+ +      mem(params->mem),
+ +      drainCount(0),
+ +      deferRegistration(params->deferRegistration),
+ +      numThreads(number_of_threads)
+ +{
+ +    _status = Idle;
+ +
+ +    checker = NULL;
+ +
+ +    if (params->checker) {
+ +#if USE_CHECKER
+ +        BaseCPU *temp_checker = params->checker;
+ +        checker = dynamic_cast<Checker<DynInstPtr> *>(temp_checker);
+ +        checker->setMemory(mem);
+ +#if FULL_SYSTEM
+ +        checker->setSystem(params->system);
+ +#endif
+ +#else
+ +        panic("Checker enabled but not compiled in!");
+ +#endif // USE_CHECKER
+ +    }
+ +
+ +#if !FULL_SYSTEM
+ +    thread.resize(number_of_threads);
+ +    tids.resize(number_of_threads);
+ +#endif
+ +
+ +    // The stages also need their CPU pointer setup.  However this
+ +    // must be done at the upper level CPU because they have pointers
+ +    // to the upper level CPU, and not this FullO3CPU.
+ +
+ +    // Set up Pointers to the activeThreads list for each stage
+ +    fetch.setActiveThreads(&activeThreads);
+ +    decode.setActiveThreads(&activeThreads);
+ +    rename.setActiveThreads(&activeThreads);
+ +    iew.setActiveThreads(&activeThreads);
+ +    commit.setActiveThreads(&activeThreads);
+ +
+ +    // Give each of the stages the time buffer they will use.
+ +    fetch.setTimeBuffer(&timeBuffer);
+ +    decode.setTimeBuffer(&timeBuffer);
+ +    rename.setTimeBuffer(&timeBuffer);
+ +    iew.setTimeBuffer(&timeBuffer);
+ +    commit.setTimeBuffer(&timeBuffer);
+ +
+ +    // Also setup each of the stages' queues.
+ +    fetch.setFetchQueue(&fetchQueue);
+ +    decode.setFetchQueue(&fetchQueue);
+ +    commit.setFetchQueue(&fetchQueue);
+ +    decode.setDecodeQueue(&decodeQueue);
+ +    rename.setDecodeQueue(&decodeQueue);
+ +    rename.setRenameQueue(&renameQueue);
+ +    iew.setRenameQueue(&renameQueue);
+ +    iew.setIEWQueue(&iewQueue);
+ +    commit.setIEWQueue(&iewQueue);
+ +    commit.setRenameQueue(&renameQueue);
+ +
+ +    commit.setIEWStage(&iew);
+ +    rename.setIEWStage(&iew);
+ +    rename.setCommitStage(&commit);
+ +
+ +#if !FULL_SYSTEM
+ +    int active_threads = params->workload.size();
+ +
+ +    if (active_threads > Impl::MaxThreads) {
+ +        panic("Workload Size too large. Increase the 'MaxThreads'"
+ +              "constant in your O3CPU impl. file (e.g. o3/alpha/impl.hh) or "
+ +              "edit your workload size.");
+ +    }
+ +#else
+ +    int active_threads = 1;
+ +#endif
+ +
+ +    //Make Sure That this a Valid Architeture
+ +    assert(params->numPhysIntRegs   >= numThreads * TheISA::NumIntRegs);
+ +    assert(params->numPhysFloatRegs >= numThreads * TheISA::NumFloatRegs);
+ +
+ +    rename.setScoreboard(&scoreboard);
+ +    iew.setScoreboard(&scoreboard);
+ +
+ +    // Setup the rename map for whichever stages need it.
+ +    PhysRegIndex lreg_idx = 0;
+ +    PhysRegIndex freg_idx = params->numPhysIntRegs; //Index to 1 after int regs
+ +
+ +    for (int tid=0; tid < numThreads; tid++) {
+ +        bool bindRegs = (tid <= active_threads - 1);
+ +
+ +        commitRenameMap[tid].init(TheISA::NumIntRegs,
+ +                                  params->numPhysIntRegs,
+ +                                  lreg_idx,            //Index for Logical. Regs
+ +
+ +                                  TheISA::NumFloatRegs,
+ +                                  params->numPhysFloatRegs,
+ +                                  freg_idx,            //Index for Float Regs
+ +
+ +                                  TheISA::NumMiscRegs,
+ +
+ +                                  TheISA::ZeroReg,
+ +                                  TheISA::ZeroReg,
+ +
+ +                                  tid,
+ +                                  false);
+ +
+ +        renameMap[tid].init(TheISA::NumIntRegs,
+ +                            params->numPhysIntRegs,
+ +                            lreg_idx,                  //Index for Logical. Regs
+ +
+ +                            TheISA::NumFloatRegs,
+ +                            params->numPhysFloatRegs,
+ +                            freg_idx,                  //Index for Float Regs
+ +
+ +                            TheISA::NumMiscRegs,
+ +
+ +                            TheISA::ZeroReg,
+ +                            TheISA::ZeroReg,
+ +
+ +                            tid,
+ +                            bindRegs);
+ +    }
+ +
+ +    rename.setRenameMap(renameMap);
+ +    commit.setRenameMap(commitRenameMap);
+ +
+ +    // Give renameMap & rename stage access to the freeList;
+ +    for (int i=0; i < numThreads; i++) {
+ +        renameMap[i].setFreeList(&freeList);
+ +    }
+ +    rename.setFreeList(&freeList);
+ +
+ +    // Setup the ROB for whichever stages need it.
+ +    commit.setROB(&rob);
+ +
+ +    lastRunningCycle = curTick;
+ +
+ +    lastActivatedCycle = -1;
+ +
+ +    // Give renameMap & rename stage access to the freeList;
+ +    //for (int i=0; i < numThreads; i++) {
+ +        //globalSeqNum[i] = 1;
+ +        //}
+ +
+ +    contextSwitch = false;
+ +}
+ +
+ +template <class Impl>
+ +FullO3CPU<Impl>::~FullO3CPU()
+ +{
+ +}
+ +
+ +template <class Impl>
+ +void
+ +FullO3CPU<Impl>::fullCPURegStats()
+ +{
+ +    BaseO3CPU::regStats();
+ +
+ +    // Register any of the O3CPU's stats here.
+ +    timesIdled
+ +        .name(name() + ".timesIdled")
+ +        .desc("Number of times that the entire CPU went into an idle state and"
+ +              " unscheduled itself")
+ +        .prereq(timesIdled);
+ +
+ +    idleCycles
+ +        .name(name() + ".idleCycles")
+ +        .desc("Total number of cycles that the CPU has spent unscheduled due "
+ +              "to idling")
+ +        .prereq(idleCycles);
+ +
+ +    // Number of Instructions simulated
+ +    // --------------------------------
+ +    // Should probably be in Base CPU but need templated
+ +    // MaxThreads so put in here instead
+ +    committedInsts
+ +        .init(numThreads)
+ +        .name(name() + ".committedInsts")
+ +        .desc("Number of Instructions Simulated");
+ +
+ +    totalCommittedInsts
+ +        .name(name() + ".committedInsts_total")
+ +        .desc("Number of Instructions Simulated");
+ +
+ +    cpi
+ +        .name(name() + ".cpi")
+ +        .desc("CPI: Cycles Per Instruction")
+ +        .precision(6);
+ +    cpi = simTicks / committedInsts;
+ +
+ +    totalCpi
+ +        .name(name() + ".cpi_total")
+ +        .desc("CPI: Total CPI of All Threads")
+ +        .precision(6);
+ +    totalCpi = simTicks / totalCommittedInsts;
+ +
+ +    ipc
+ +        .name(name() + ".ipc")
+ +        .desc("IPC: Instructions Per Cycle")
+ +        .precision(6);
+ +    ipc =  committedInsts / simTicks;
+ +
+ +    totalIpc
+ +        .name(name() + ".ipc_total")
+ +        .desc("IPC: Total IPC of All Threads")
+ +        .precision(6);
+ +    totalIpc =  totalCommittedInsts / simTicks;
+ +
+ +}
+ +
+ +template <class Impl>
+ +Port *
+ +FullO3CPU<Impl>::getPort(const std::string &if_name, int idx)
+ +{
+ +    if (if_name == "dcache_port")
+ +        return iew.getDcachePort();
+ +    else if (if_name == "icache_port")
+ +        return fetch.getIcachePort();
+ +    else
+ +        panic("No Such Port\n");
+ +}
+ +
+ +template <class Impl>
+ +void
+ +FullO3CPU<Impl>::tick()
+ +{
+ +    DPRINTF(O3CPU, "\n\nFullO3CPU: Ticking main, FullO3CPU.\n");
+ +
+ +    ++numCycles;
+ +
+ +//    activity = false;
+ +
+ +    //Tick each of the stages
+ +    fetch.tick();
+ +
+ +    decode.tick();
+ +
+ +    rename.tick();
+ +
+ +    iew.tick();
+ +
+ +    commit.tick();
+ +
+ +#if !FULL_SYSTEM
+ +    doContextSwitch();
+ +#endif
+ +
+ +    // Now advance the time buffers
+ +    timeBuffer.advance();
+ +
+ +    fetchQueue.advance();
+ +    decodeQueue.advance();
+ +    renameQueue.advance();
+ +    iewQueue.advance();
+ +
+ +    activityRec.advance();
+ +
+ +    if (removeInstsThisCycle) {
+ +        cleanUpRemovedInsts();
+ +    }
+ +
+ +    if (!tickEvent.scheduled()) {
+ +        if (_status == SwitchedOut ||
+ +            getState() == SimObject::Drained) {
+ +            // increment stat
+ +            lastRunningCycle = curTick;
+ +        } else if (!activityRec.active()) {
+ +            lastRunningCycle = curTick;
+ +            timesIdled++;
+ +        } else {
+ +            tickEvent.schedule(curTick + cycles(1));
+ +        }
+ +    }
+ +
+ +#if !FULL_SYSTEM
+ +    updateThreadPriority();
+ +#endif
+ +
+ +}
+ +
+ +template <class Impl>
+ +void
+ +FullO3CPU<Impl>::init()
+ +{
+ +    if (!deferRegistration) {
+ +        registerThreadContexts();
+ +    }
+ +
+ +    // Set inSyscall so that the CPU doesn't squash when initially
+ +    // setting up registers.
+ +    for (int i = 0; i < number_of_threads; ++i)
+ +        thread[i]->inSyscall = true;
+ +
+ +    for (int tid=0; tid < number_of_threads; tid++) {
+ +#if FULL_SYSTEM
+ +        ThreadContext *src_tc = threadContexts[tid];
+ +#else
+ +        ThreadContext *src_tc = thread[tid]->getTC();
+ +#endif
+ +        // Threads start in the Suspended State
+ +        if (src_tc->status() != ThreadContext::Suspended) {
+ +            continue;
+ +        }
+ +
+ +#if FULL_SYSTEM
+ +        TheISA::initCPU(src_tc, src_tc->readCpuId());
+ +#endif
+ +    }
+ +
+ +    // Clear inSyscall.
+ +    for (int i = 0; i < number_of_threads; ++i)
+ +        thread[i]->inSyscall = false;
+ +
+ +    // Initialize stages.
+ +    fetch.initStage();
+ +    iew.initStage();
+ +    rename.initStage();
+ +    commit.initStage();
+ +
+ +    commit.setThreads(thread);
+ +}
+ +
+ +template <class Impl>
+ +void
+ +FullO3CPU<Impl>::activateThread(unsigned tid)
+ +{
+ +    list<unsigned>::iterator isActive = find(
+ +        activeThreads.begin(), activeThreads.end(), tid);
+ +
+ +    if (isActive == activeThreads.end()) {
+ +        DPRINTF(O3CPU, "[tid:%i]: Adding to active threads list\n",
+ +                tid);
+ +
+ +        activeThreads.push_back(tid);
+ +    }
+ +}
+ +
+ +template <class Impl>
+ +void
+ +FullO3CPU<Impl>::deactivateThread(unsigned tid)
+ +{
+ +    //Remove From Active List, if Active
+ +    list<unsigned>::iterator thread_it =
+ +        find(activeThreads.begin(), activeThreads.end(), tid);
+ +
+ +    if (thread_it != activeThreads.end()) {
+ +        DPRINTF(O3CPU,"[tid:%i]: Removing from active threads list\n",
+ +                tid);
+ +        activeThreads.erase(thread_it);
+ +    }
+ +}
+ +
+ +template <class Impl>
+ +void
+ +FullO3CPU<Impl>::activateContext(int tid, int delay)
+ +{
+ +    // Needs to set each stage to running as well.
+ +    if (delay){
+ +        DPRINTF(O3CPU, "[tid:%i]: Scheduling thread context to activate "
+ +                "on cycle %d\n", tid, curTick + cycles(delay));
+ +        scheduleActivateThreadEvent(tid, delay);
+ +    } else {
+ +        activateThread(tid);
+ +    }
+ +
+ +    if(lastActivatedCycle < curTick) {
+ +        scheduleTickEvent(delay);
+ +
+ +        // Be sure to signal that there's some activity so the CPU doesn't
+ +        // deschedule itself.
+ +        activityRec.activity();
+ +        fetch.wakeFromQuiesce();
+ +
+ +        lastActivatedCycle = curTick;
+ +
+ +        _status = Running;
+ +    }
+ +}
+ +
+ +template <class Impl>
+ +void
+ +FullO3CPU<Impl>::deallocateContext(int tid, int delay)
+ +{
+ +    // Schedule removal of thread data from CPU
+ +    if (delay){
+ +        DPRINTF(O3CPU, "[tid:%i]: Scheduling thread context to deallocate "
+ +                "on cycle %d\n", tid, curTick + cycles(delay));
+ +        scheduleDeallocateContextEvent(tid, delay);
+ +    } else {
+ +        deactivateThread(tid);
+ +        removeThread(tid);
+ +    }
+ +}
+ +
+ +template <class Impl>
+ +void
+ +FullO3CPU<Impl>::suspendContext(int tid)
+ +{
+ +    DPRINTF(O3CPU,"[tid: %i]: Suspending Thread Context.\n", tid);
+ +    deactivateThread(tid);
+ +    if (activeThreads.size() == 0)
+ +        unscheduleTickEvent();
+ +    _status = Idle;
+ +}
+ +
+ +template <class Impl>
+ +void
+ +FullO3CPU<Impl>::haltContext(int tid)
+ +{
+ +    //For now, this is the same as deallocate
+ +    DPRINTF(O3CPU,"[tid:%i]: Halt Context called. Deallocating", tid);
+ +    deallocateContext(tid, 1);
+ +}
+ +
+ +template <class Impl>
+ +void
+ +FullO3CPU<Impl>::insertThread(unsigned tid)
+ +{
+ +    DPRINTF(O3CPU,"[tid:%i] Initializing thread into CPU");
+ +    // Will change now that the PC and thread state is internal to the CPU
+ +    // and not in the ThreadContext.
+ +#if FULL_SYSTEM
+ +    ThreadContext *src_tc = system->threadContexts[tid];
+ +#else
+ +    ThreadContext *src_tc = tcBase(tid);
+ +#endif
+ +
+ +    //Bind Int Regs to Rename Map
+ +    for (int ireg = 0; ireg < TheISA::NumIntRegs; ireg++) {
+ +        PhysRegIndex phys_reg = freeList.getIntReg();
+ +
+ +        renameMap[tid].setEntry(ireg,phys_reg);
+ +        scoreboard.setReg(phys_reg);
+ +    }
+ +
+ +    //Bind Float Regs to Rename Map
+ +    for (int freg = 0; freg < TheISA::NumFloatRegs; freg++) {
+ +        PhysRegIndex phys_reg = freeList.getFloatReg();
+ +
+ +        renameMap[tid].setEntry(freg,phys_reg);
+ +        scoreboard.setReg(phys_reg);
+ +    }
+ +
+ +    //Copy Thread Data Into RegFile
+ +    //this->copyFromTC(tid);
+ +
+ +    //Set PC/NPC/NNPC
+ +    setPC(src_tc->readPC(), tid);
+ +    setNextPC(src_tc->readNextPC(), tid);
+ +#if ISA_HAS_DELAY_SLOT
+ +    setNextNPC(src_tc->readNextNPC(), tid);
+ +#endif
+ +
+ +    src_tc->setStatus(ThreadContext::Active);
+ +
+ +    activateContext(tid,1);
+ +
+ +    //Reset ROB/IQ/LSQ Entries
+ +    commit.rob->resetEntries();
+ +    iew.resetEntries();
+ +}
+ +
+ +template <class Impl>
+ +void
+ +FullO3CPU<Impl>::removeThread(unsigned tid)
+ +{
+ +    DPRINTF(O3CPU,"[tid:%i] Removing thread context from CPU.\n", tid);
+ +
+ +    // Copy Thread Data From RegFile
+ +    // If thread is suspended, it might be re-allocated
+ +    //this->copyToTC(tid);
+ +
+ +    // Unbind Int Regs from Rename Map
+ +    for (int ireg = 0; ireg < TheISA::NumIntRegs; ireg++) {
+ +        PhysRegIndex phys_reg = renameMap[tid].lookup(ireg);
+ +
+ +        scoreboard.unsetReg(phys_reg);
+ +        freeList.addReg(phys_reg);
+ +    }
+ +
+ +    // Unbind Float Regs from Rename Map
+ +    for (int freg = 0; freg < TheISA::NumFloatRegs; freg++) {
+ +        PhysRegIndex phys_reg = renameMap[tid].lookup(freg);
+ +
+ +        scoreboard.unsetReg(phys_reg);
+ +        freeList.addReg(phys_reg);
+ +    }
+ +
+ +    // Squash Throughout Pipeline
+ +    InstSeqNum squash_seq_num = commit.rob->readHeadInst(tid)->seqNum;
+ +    fetch.squash(0, squash_seq_num, true, tid);
+ +    decode.squash(tid);
+ +    rename.squash(squash_seq_num, tid);
+ +    iew.squash(tid);
+ +    commit.rob->squash(squash_seq_num, tid);
+ +
+ +    assert(iew.ldstQueue.getCount(tid) == 0);
+ +
+ +    // Reset ROB/IQ/LSQ Entries
+ +    if (activeThreads.size() >= 1) {
+ +        commit.rob->resetEntries();
+ +        iew.resetEntries();
+ +    }
+ +}
+ +
+ +
+ +template <class Impl>
+ +void
+ +FullO3CPU<Impl>::activateWhenReady(int tid)
+ +{
+ +    DPRINTF(O3CPU,"[tid:%i]: Checking if resources are available for incoming"
+ +            "(e.g. PhysRegs/ROB/IQ/LSQ) \n",
+ +            tid);
+ +
+ +    bool ready = true;
+ +
+ +    if (freeList.numFreeIntRegs() >= TheISA::NumIntRegs) {
+ +        DPRINTF(O3CPU,"[tid:%i] Suspending thread due to not enough "
+ +                "Phys. Int. Regs.\n",
+ +                tid);
+ +        ready = false;
+ +    } else if (freeList.numFreeFloatRegs() >= TheISA::NumFloatRegs) {
+ +        DPRINTF(O3CPU,"[tid:%i] Suspending thread due to not enough "
+ +                "Phys. Float. Regs.\n",
+ +                tid);
+ +        ready = false;
+ +    } else if (commit.rob->numFreeEntries() >=
+ +               commit.rob->entryAmount(activeThreads.size() + 1)) {
+ +        DPRINTF(O3CPU,"[tid:%i] Suspending thread due to not enough "
+ +                "ROB entries.\n",
+ +                tid);
+ +        ready = false;
+ +    } else if (iew.instQueue.numFreeEntries() >=
+ +               iew.instQueue.entryAmount(activeThreads.size() + 1)) {
+ +        DPRINTF(O3CPU,"[tid:%i] Suspending thread due to not enough "
+ +                "IQ entries.\n",
+ +                tid);
+ +        ready = false;
+ +    } else if (iew.ldstQueue.numFreeEntries() >=
+ +               iew.ldstQueue.entryAmount(activeThreads.size() + 1)) {
+ +        DPRINTF(O3CPU,"[tid:%i] Suspending thread due to not enough "
+ +                "LSQ entries.\n",
+ +                tid);
+ +        ready = false;
+ +    }
+ +
+ +    if (ready) {
+ +        insertThread(tid);
+ +
+ +        contextSwitch = false;
+ +
+ +        cpuWaitList.remove(tid);
+ +    } else {
+ +        suspendContext(tid);
+ +
+ +        //blocks fetch
+ +        contextSwitch = true;
+ +
+ +        //@todo: dont always add to waitlist
+ +        //do waitlist
+ +        cpuWaitList.push_back(tid);
+ +    }
+ +}
+ +
+ +template <class Impl>
+ +void
+ +FullO3CPU<Impl>::serialize(std::ostream &os)
+ +{
+ +    SimObject::State so_state = SimObject::getState();
+ +    SERIALIZE_ENUM(so_state);
+ +    BaseCPU::serialize(os);
+ +    nameOut(os, csprintf("%s.tickEvent", name()));
+ +    tickEvent.serialize(os);
+ +
+ +    // Use SimpleThread's ability to checkpoint to make it easier to
+ +    // write out the registers.  Also make this static so it doesn't
+ +    // get instantiated multiple times (causes a panic in statistics).
+ +    static SimpleThread temp;
+ +
+ +    for (int i = 0; i < thread.size(); i++) {
+ +        nameOut(os, csprintf("%s.xc.%i", name(), i));
+ +        temp.copyTC(thread[i]->getTC());
+ +        temp.serialize(os);
+ +    }
+ +}
+ +
+ +template <class Impl>
+ +void
+ +FullO3CPU<Impl>::unserialize(Checkpoint *cp, const std::string &section)
+ +{
+ +    SimObject::State so_state;
+ +    UNSERIALIZE_ENUM(so_state);
+ +    BaseCPU::unserialize(cp, section);
+ +    tickEvent.unserialize(cp, csprintf("%s.tickEvent", section));
+ +
+ +    // Use SimpleThread's ability to checkpoint to make it easier to
+ +    // read in the registers.  Also make this static so it doesn't
+ +    // get instantiated multiple times (causes a panic in statistics).
+ +    static SimpleThread temp;
+ +
+ +    for (int i = 0; i < thread.size(); i++) {
+ +        temp.copyTC(thread[i]->getTC());
+ +        temp.unserialize(cp, csprintf("%s.xc.%i", section, i));
+ +        thread[i]->getTC()->copyArchRegs(temp.getTC());
+ +    }
+ +}
+ +
+ +template <class Impl>
+ +unsigned int
+ +FullO3CPU<Impl>::drain(Event *drain_event)
+ +{
++    DPRINTF(O3CPU, "Switching out\n");
++    BaseCPU::switchOut(_sampler);
+ +    drainCount = 0;
+ +    fetch.drain();
+ +    decode.drain();
+ +    rename.drain();
+ +    iew.drain();
+ +    commit.drain();
+ +
+ +    // Wake the CPU and record activity so everything can drain out if
+ +    // the CPU was not able to immediately drain.
+ +    if (getState() != SimObject::Drained) {
+ +        // A bit of a hack...set the drainEvent after all the drain()
+ +        // calls have been made, that way if all of the stages drain
+ +        // immediately, the signalDrained() function knows not to call
+ +        // process on the drain event.
+ +        drainEvent = drain_event;
+ +
+ +        wakeCPU();
+ +        activityRec.activity();
+ +
+ +        return 1;
+ +    } else {
+ +        return 0;
+ +    }
+ +}
+ +
+ +template <class Impl>
+ +void
+ +FullO3CPU<Impl>::resume()
+ +{
+ +    assert(system->getMemoryMode() == System::Timing);
+ +    fetch.resume();
+ +    decode.resume();
+ +    rename.resume();
+ +    iew.resume();
+ +    commit.resume();
+ +
+ +    changeState(SimObject::Running);
+ +
+ +    if (_status == SwitchedOut || _status == Idle)
+ +        return;
+ +
+ +    if (!tickEvent.scheduled())
+ +        tickEvent.schedule(curTick);
+ +    _status = Running;
+ +}
+ +
+ +template <class Impl>
+ +void
+ +FullO3CPU<Impl>::signalDrained()
+ +{
+ +    if (++drainCount == NumStages) {
+ +        if (tickEvent.scheduled())
+ +            tickEvent.squash();
+ +
+ +        changeState(SimObject::Drained);
+ +
+ +        if (drainEvent) {
+ +            drainEvent->process();
+ +            drainEvent = NULL;
+ +        }
+ +    }
+ +    assert(drainCount <= 5);
+ +}
+ +
+ +template <class Impl>
+ +void
+ +FullO3CPU<Impl>::switchOut()
+ +{
+ +    fetch.switchOut();
+ +    rename.switchOut();
++    iew.switchOut();
+ +    commit.switchOut();
+ +    instList.clear();
+ +    while (!removeList.empty()) {
+ +        removeList.pop();
+ +    }
+ +
+ +    _status = SwitchedOut;
+ +#if USE_CHECKER
+ +    if (checker)
+ +        checker->switchOut();
+ +#endif
+ +}
+ +
+ +template <class Impl>
+ +void
+ +FullO3CPU<Impl>::takeOverFrom(BaseCPU *oldCPU)
+ +{
+ +    // Flush out any old data from the time buffers.
+ +    for (int i = 0; i < timeBuffer.getSize(); ++i) {
+ +        timeBuffer.advance();
+ +        fetchQueue.advance();
+ +        decodeQueue.advance();
+ +        renameQueue.advance();
+ +        iewQueue.advance();
+ +    }
+ +
+ +    activityRec.reset();
+ +
+ +    BaseCPU::takeOverFrom(oldCPU);
+ +
+ +    fetch.takeOverFrom();
+ +    decode.takeOverFrom();
+ +    rename.takeOverFrom();
+ +    iew.takeOverFrom();
+ +    commit.takeOverFrom();
+ +
+ +    assert(!tickEvent.scheduled());
+ +
+ +    // @todo: Figure out how to properly select the tid to put onto
+ +    // the active threads list.
+ +    int tid = 0;
+ +
+ +    list<unsigned>::iterator isActive = find(
+ +        activeThreads.begin(), activeThreads.end(), tid);
+ +
+ +    if (isActive == activeThreads.end()) {
+ +        //May Need to Re-code this if the delay variable is the delay
+ +        //needed for thread to activate
+ +        DPRINTF(O3CPU, "Adding Thread %i to active threads list\n",
+ +                tid);
+ +
+ +        activeThreads.push_back(tid);
+ +    }
+ +
+ +    // Set all statuses to active, schedule the CPU's tick event.
+ +    // @todo: Fix up statuses so this is handled properly
+ +    for (int i = 0; i < threadContexts.size(); ++i) {
+ +        ThreadContext *tc = threadContexts[i];
+ +        if (tc->status() == ThreadContext::Active && _status != Running) {
+ +            _status = Running;
+ +            tickEvent.schedule(curTick);
+ +        }
+ +    }
+ +    if (!tickEvent.scheduled())
+ +        tickEvent.schedule(curTick);
+ +}
+ +
++template <class Impl>
++void
++FullO3CPU<Impl>::serialize(std::ostream &os)
++{
++    BaseCPU::serialize(os);
++    nameOut(os, csprintf("%s.tickEvent", name()));
++    tickEvent.serialize(os);
++
++    // Use SimpleThread's ability to checkpoint to make it easier to
++    // write out the registers.  Also make this static so it doesn't
++    // get instantiated multiple times (causes a panic in statistics).
++    static CPUExecContext temp;
++
++    for (int i = 0; i < thread.size(); i++) {
++        nameOut(os, csprintf("%s.xc.%i", name(), i));
++        temp.copyXC(thread[i]->getXCProxy());
++        temp.serialize(os);
++    }
++}
++
++template <class Impl>
++void
++FullO3CPU<Impl>::unserialize(Checkpoint *cp, const std::string &section)
++{
++    BaseCPU::unserialize(cp, section);
++    tickEvent.unserialize(cp, csprintf("%s.tickEvent", section));
++
++    // Use SimpleThread's ability to checkpoint to make it easier to
++    // read in the registers.  Also make this static so it doesn't
++    // get instantiated multiple times (causes a panic in statistics).
++    static CPUExecContext temp;
++
++    for (int i = 0; i < thread.size(); i++) {
++        temp.copyXC(thread[i]->getXCProxy());
++        temp.unserialize(cp, csprintf("%s.xc.%i", section, i));
++        thread[i]->getXCProxy()->copyArchRegs(temp.getProxy());
++    }
++}
++
+ +template <class Impl>
+ +uint64_t
+ +FullO3CPU<Impl>::readIntReg(int reg_idx)
+ +{
+ +    return regFile.readIntReg(reg_idx);
+ +}
+ +
+ +template <class Impl>
+ +FloatReg
+ +FullO3CPU<Impl>::readFloatReg(int reg_idx, int width)
+ +{
+ +    return regFile.readFloatReg(reg_idx, width);
+ +}
+ +
+ +template <class Impl>
+ +FloatReg
+ +FullO3CPU<Impl>::readFloatReg(int reg_idx)
+ +{
+ +    return regFile.readFloatReg(reg_idx);
+ +}
+ +
+ +template <class Impl>
+ +FloatRegBits
+ +FullO3CPU<Impl>::readFloatRegBits(int reg_idx, int width)
+ +{
+ +    return regFile.readFloatRegBits(reg_idx, width);
+ +}
+ +
+ +template <class Impl>
+ +FloatRegBits
+ +FullO3CPU<Impl>::readFloatRegBits(int reg_idx)
+ +{
+ +    return regFile.readFloatRegBits(reg_idx);
+ +}
+ +
+ +template <class Impl>
+ +void
+ +FullO3CPU<Impl>::setIntReg(int reg_idx, uint64_t val)
+ +{
+ +    regFile.setIntReg(reg_idx, val);
+ +}
+ +
+ +template <class Impl>
+ +void
+ +FullO3CPU<Impl>::setFloatReg(int reg_idx, FloatReg val, int width)
+ +{
+ +    regFile.setFloatReg(reg_idx, val, width);
+ +}
+ +
+ +template <class Impl>
+ +void
+ +FullO3CPU<Impl>::setFloatReg(int reg_idx, FloatReg val)
+ +{
+ +    regFile.setFloatReg(reg_idx, val);
+ +}
+ +
+ +template <class Impl>
+ +void
+ +FullO3CPU<Impl>::setFloatRegBits(int reg_idx, FloatRegBits val, int width)
+ +{
+ +    regFile.setFloatRegBits(reg_idx, val, width);
+ +}
+ +
+ +template <class Impl>
+ +void
+ +FullO3CPU<Impl>::setFloatRegBits(int reg_idx, FloatRegBits val)
+ +{
+ +    regFile.setFloatRegBits(reg_idx, val);
+ +}
+ +
+ +template <class Impl>
+ +uint64_t
+ +FullO3CPU<Impl>::readArchIntReg(int reg_idx, unsigned tid)
+ +{
+ +    PhysRegIndex phys_reg = commitRenameMap[tid].lookup(reg_idx);
+ +
+ +    return regFile.readIntReg(phys_reg);
+ +}
+ +
+ +template <class Impl>
+ +float
+ +FullO3CPU<Impl>::readArchFloatRegSingle(int reg_idx, unsigned tid)
+ +{
+ +    int idx = reg_idx + TheISA::FP_Base_DepTag;
+ +    PhysRegIndex phys_reg = commitRenameMap[tid].lookup(idx);
+ +
+ +    return regFile.readFloatReg(phys_reg);
+ +}
+ +
+ +template <class Impl>
+ +double
+ +FullO3CPU<Impl>::readArchFloatRegDouble(int reg_idx, unsigned tid)
+ +{
+ +    int idx = reg_idx + TheISA::FP_Base_DepTag;
+ +    PhysRegIndex phys_reg = commitRenameMap[tid].lookup(idx);
+ +
+ +    return regFile.readFloatReg(phys_reg, 64);
+ +}
+ +
+ +template <class Impl>
+ +uint64_t
+ +FullO3CPU<Impl>::readArchFloatRegInt(int reg_idx, unsigned tid)
+ +{
+ +    int idx = reg_idx + TheISA::FP_Base_DepTag;
+ +    PhysRegIndex phys_reg = commitRenameMap[tid].lookup(idx);
+ +
+ +    return regFile.readFloatRegBits(phys_reg);
+ +}
+ +
+ +template <class Impl>
+ +void
+ +FullO3CPU<Impl>::setArchIntReg(int reg_idx, uint64_t val, unsigned tid)
+ +{
+ +    PhysRegIndex phys_reg = commitRenameMap[tid].lookup(reg_idx);
+ +
+ +    regFile.setIntReg(phys_reg, val);
+ +}
+ +
+ +template <class Impl>
+ +void
+ +FullO3CPU<Impl>::setArchFloatRegSingle(int reg_idx, float val, unsigned tid)
+ +{
+ +    int idx = reg_idx + TheISA::FP_Base_DepTag;
+ +    PhysRegIndex phys_reg = commitRenameMap[tid].lookup(idx);
+ +
+ +    regFile.setFloatReg(phys_reg, val);
+ +}
+ +
+ +template <class Impl>
+ +void
+ +FullO3CPU<Impl>::setArchFloatRegDouble(int reg_idx, double val, unsigned tid)
+ +{
+ +    int idx = reg_idx + TheISA::FP_Base_DepTag;
+ +    PhysRegIndex phys_reg = commitRenameMap[tid].lookup(idx);
+ +
+ +    regFile.setFloatReg(phys_reg, val, 64);
+ +}
+ +
+ +template <class Impl>
+ +void
+ +FullO3CPU<Impl>::setArchFloatRegInt(int reg_idx, uint64_t val, unsigned tid)
+ +{
+ +    int idx = reg_idx + TheISA::FP_Base_DepTag;
+ +    PhysRegIndex phys_reg = commitRenameMap[tid].lookup(idx);
+ +
+ +    regFile.setFloatRegBits(phys_reg, val);
+ +}
+ +
+ +template <class Impl>
+ +uint64_t
+ +FullO3CPU<Impl>::readPC(unsigned tid)
+ +{
+ +    return commit.readPC(tid);
+ +}
+ +
+ +template <class Impl>
+ +void
+ +FullO3CPU<Impl>::setPC(Addr new_PC,unsigned tid)
+ +{
+ +    commit.setPC(new_PC, tid);
+ +}
+ +
+ +template <class Impl>
+ +uint64_t
+ +FullO3CPU<Impl>::readNextPC(unsigned tid)
+ +{
+ +    return commit.readNextPC(tid);
+ +}
+ +
+ +template <class Impl>
+ +void
+ +FullO3CPU<Impl>::setNextPC(uint64_t val,unsigned tid)
+ +{
+ +    commit.setNextPC(val, tid);
+ +}
+ +
+ +template <class Impl>
+ +uint64_t
+ +FullO3CPU<Impl>::readNextNPC(unsigned tid)
+ +{
+ +    return commit.readNextNPC(tid);
+ +}
+ +
+ +template <class Impl>
+ +void
+ +FullO3CPU<Impl>::setNextNPC(uint64_t val,unsigned tid)
+ +{
+ +    commit.setNextNPC(val, tid);
+ +}
+ +
+ +template <class Impl>
+ +typename FullO3CPU<Impl>::ListIt
+ +FullO3CPU<Impl>::addInst(DynInstPtr &inst)
+ +{
+ +    instList.push_back(inst);
+ +
+ +    return --(instList.end());
+ +}
+ +
+ +template <class Impl>
+ +void
+ +FullO3CPU<Impl>::instDone(unsigned tid)
+ +{
+ +    // Keep an instruction count.
+ +    thread[tid]->numInst++;
+ +    thread[tid]->numInsts++;
+ +    committedInsts[tid]++;
+ +    totalCommittedInsts++;
+ +
+ +    // Check for instruction-count-based events.
+ +    comInstEventQueue[tid]->serviceEvents(thread[tid]->numInst);
+ +}
+ +
+ +template <class Impl>
+ +void
+ +FullO3CPU<Impl>::addToRemoveList(DynInstPtr &inst)
+ +{
+ +    removeInstsThisCycle = true;
+ +
+ +    removeList.push(inst->getInstListIt());
+ +}
+ +
+ +template <class Impl>
+ +void
+ +FullO3CPU<Impl>::removeFrontInst(DynInstPtr &inst)
+ +{
+ +    DPRINTF(O3CPU, "Removing committed instruction [tid:%i] PC %#x "
+ +            "[sn:%lli]\n",
+ +            inst->threadNumber, inst->readPC(), inst->seqNum);
+ +
+ +    removeInstsThisCycle = true;
+ +
+ +    // Remove the front instruction.
+ +    removeList.push(inst->getInstListIt());
+ +}
+ +
+ +template <class Impl>
+ +void
+ +FullO3CPU<Impl>::removeInstsNotInROB(unsigned tid,
+ +                                     bool squash_delay_slot,
+ +                                     const InstSeqNum &delay_slot_seq_num)
+ +{
+ +    DPRINTF(O3CPU, "Thread %i: Deleting instructions from instruction"
+ +            " list.\n", tid);
+ +
+ +    ListIt end_it;
+ +
+ +    bool rob_empty = false;
+ +
+ +    if (instList.empty()) {
+ +        return;
+ +    } else if (rob.isEmpty(/*tid*/)) {
+ +        DPRINTF(O3CPU, "ROB is empty, squashing all insts.\n");
+ +        end_it = instList.begin();
+ +        rob_empty = true;
+ +    } else {
+ +        end_it = (rob.readTailInst(tid))->getInstListIt();
+ +        DPRINTF(O3CPU, "ROB is not empty, squashing insts not in ROB.\n");
+ +    }
+ +
+ +    removeInstsThisCycle = true;
+ +
+ +    ListIt inst_it = instList.end();
+ +
+ +    inst_it--;
+ +
+ +    // Walk through the instruction list, removing any instructions
+ +    // that were inserted after the given instruction iterator, end_it.
+ +    while (inst_it != end_it) {
+ +        assert(!instList.empty());
+ +
+ +#if ISA_HAS_DELAY_SLOT
+ +        if(!squash_delay_slot &&
+ +           delay_slot_seq_num >= (*inst_it)->seqNum) {
+ +            break;
+ +        }
+ +#endif
+ +        squashInstIt(inst_it, tid);
+ +
+ +        inst_it--;
+ +    }
+ +
+ +    // If the ROB was empty, then we actually need to remove the first
+ +    // instruction as well.
+ +    if (rob_empty) {
+ +        squashInstIt(inst_it, tid);
+ +    }
+ +}
+ +
+ +template <class Impl>
+ +void
+ +FullO3CPU<Impl>::removeInstsUntil(const InstSeqNum &seq_num,
+ +                                  unsigned tid)
+ +{
+ +    assert(!instList.empty());
+ +
+ +    removeInstsThisCycle = true;
+ +
+ +    ListIt inst_iter = instList.end();
+ +
+ +    inst_iter--;
+ +
+ +    DPRINTF(O3CPU, "Deleting instructions from instruction "
+ +            "list that are from [tid:%i] and above [sn:%lli] (end=%lli).\n",
+ +            tid, seq_num, (*inst_iter)->seqNum);
+ +
+ +    while ((*inst_iter)->seqNum > seq_num) {
+ +
+ +        bool break_loop = (inst_iter == instList.begin());
+ +
+ +        squashInstIt(inst_iter, tid);
+ +
+ +        inst_iter--;
+ +
+ +        if (break_loop)
+ +            break;
+ +    }
+ +}
+ +
+ +template <class Impl>
+ +inline void
+ +FullO3CPU<Impl>::squashInstIt(const ListIt &instIt, const unsigned &tid)
+ +{
+ +    if ((*instIt)->threadNumber == tid) {
+ +        DPRINTF(O3CPU, "Squashing instruction, "
+ +                "[tid:%i] [sn:%lli] PC %#x\n",
+ +                (*instIt)->threadNumber,
+ +                (*instIt)->seqNum,
+ +                (*instIt)->readPC());
+ +
+ +        // Mark it as squashed.
+ +        (*instIt)->setSquashed();
+ +
+ +        // @todo: Formulate a consistent method for deleting
+ +        // instructions from the instruction list
+ +        // Remove the instruction from the list.
+ +        removeList.push(instIt);
+ +    }
+ +}
+ +
+ +template <class Impl>
+ +void
+ +FullO3CPU<Impl>::cleanUpRemovedInsts()
+ +{
+ +    while (!removeList.empty()) {
+ +        DPRINTF(O3CPU, "Removing instruction, "
+ +                "[tid:%i] [sn:%lli] PC %#x\n",
+ +                (*removeList.front())->threadNumber,
+ +                (*removeList.front())->seqNum,
+ +                (*removeList.front())->readPC());
+ +
+ +        instList.erase(removeList.front());
+ +
+ +        removeList.pop();
+ +    }
+ +
+ +    removeInstsThisCycle = false;
+ +}
+ +/*
+ +template <class Impl>
+ +void
+ +FullO3CPU<Impl>::removeAllInsts()
+ +{
+ +    instList.clear();
+ +}
+ +*/
+ +template <class Impl>
+ +void
+ +FullO3CPU<Impl>::dumpInsts()
+ +{
+ +    int num = 0;
+ +
+ +    ListIt inst_list_it = instList.begin();
+ +
+ +    cprintf("Dumping Instruction List\n");
+ +
+ +    while (inst_list_it != instList.end()) {
+ +        cprintf("Instruction:%i\nPC:%#x\n[tid:%i]\n[sn:%lli]\nIssued:%i\n"
+ +                "Squashed:%i\n\n",
+ +                num, (*inst_list_it)->readPC(), (*inst_list_it)->threadNumber,
+ +                (*inst_list_it)->seqNum, (*inst_list_it)->isIssued(),
+ +                (*inst_list_it)->isSquashed());
+ +        inst_list_it++;
+ +        ++num;
+ +    }
+ +}
+ +/*
+ +template <class Impl>
+ +void
+ +FullO3CPU<Impl>::wakeDependents(DynInstPtr &inst)
+ +{
+ +    iew.wakeDependents(inst);
+ +}
+ +*/
+ +template <class Impl>
+ +void
+ +FullO3CPU<Impl>::wakeCPU()
+ +{
+ +    if (activityRec.active() || tickEvent.scheduled()) {
+ +        DPRINTF(Activity, "CPU already running.\n");
+ +        return;
+ +    }
+ +
+ +    DPRINTF(Activity, "Waking up CPU\n");
+ +
+ +    idleCycles += (curTick - 1) - lastRunningCycle;
+ +
+ +    tickEvent.schedule(curTick);
+ +}
+ +
+ +template <class Impl>
+ +int
+ +FullO3CPU<Impl>::getFreeTid()
+ +{
+ +    for (int i=0; i < numThreads; i++) {
+ +        if (!tids[i]) {
+ +            tids[i] = true;
+ +            return i;
+ +        }
+ +    }
+ +
+ +    return -1;
+ +}
+ +
+ +template <class Impl>
+ +void
+ +FullO3CPU<Impl>::doContextSwitch()
+ +{
+ +    if (contextSwitch) {
+ +
+ +        //ADD CODE TO DEACTIVE THREAD HERE (???)
+ +
+ +        for (int tid=0; tid < cpuWaitList.size(); tid++) {
+ +            activateWhenReady(tid);
+ +        }
+ +
+ +        if (cpuWaitList.size() == 0)
+ +            contextSwitch = true;
+ +    }
+ +}
+ +
+ +template <class Impl>
+ +void
+ +FullO3CPU<Impl>::updateThreadPriority()
+ +{
+ +    if (activeThreads.size() > 1)
+ +    {
+ +        //DEFAULT TO ROUND ROBIN SCHEME
+ +        //e.g. Move highest priority to end of thread list
+ +        list<unsigned>::iterator list_begin = activeThreads.begin();
+ +        list<unsigned>::iterator list_end   = activeThreads.end();
+ +
+ +        unsigned high_thread = *list_begin;
+ +
+ +        activeThreads.erase(list_begin);
+ +
+ +        activeThreads.push_back(high_thread);
+ +    }
+ +}
+ +
+ +// Forward declaration of FullO3CPU.
+ +template class FullO3CPU<O3CPUImpl>;
diff --cc src/cpu/o3/fetch_impl.hh

index 1e080181c72401dfd3df8f7036eeede64d913927,0000000000000000000000000000000000000000..2d447bfe55d5ab9ca59d2aa20a315fcfd5493228

mode 100644,000000..100644
--- 1/src/cpu/o3/fetch_impl.hh
--- /dev/null
+++ b/src/cpu/o3/fetch_impl.hh
@@@ -1,1444 -1,0 +1,1445 @@@
-     if (cacheBlocked || (interruptPending && flags == 0)) {
+ +/*
+ + * Copyright (c) 2004-2006 The Regents of The University of Michigan
+ + * All rights reserved.
+ + *
+ + * Redistribution and use in source and binary forms, with or without
+ + * modification, are permitted provided that the following conditions are
+ + * met: redistributions of source code must retain the above copyright
+ + * notice, this list of conditions and the following disclaimer;
+ + * redistributions in binary form must reproduce the above copyright
+ + * notice, this list of conditions and the following disclaimer in the
+ + * documentation and/or other materials provided with the distribution;
+ + * neither the name of the copyright holders nor the names of its
+ + * contributors may be used to endorse or promote products derived from
+ + * this software without specific prior written permission.
+ + *
+ + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ + *
+ + * Authors: Kevin Lim
+ + *          Korey Sewell
+ + */
+ +
+ +#include "config/use_checker.hh"
+ +
+ +#include "arch/isa_traits.hh"
+ +#include "arch/utility.hh"
+ +#include "cpu/checker/cpu.hh"
+ +#include "cpu/exetrace.hh"
+ +#include "cpu/o3/fetch.hh"
+ +#include "mem/packet.hh"
+ +#include "mem/request.hh"
+ +#include "sim/byteswap.hh"
+ +#include "sim/host.hh"
+ +#include "sim/root.hh"
+ +
+ +#if FULL_SYSTEM
+ +#include "arch/tlb.hh"
+ +#include "arch/vtophys.hh"
+ +#include "base/remote_gdb.hh"
+ +#include "sim/system.hh"
+ +#endif // FULL_SYSTEM
+ +
+ +#include <algorithm>
+ +
+ +template<class Impl>
+ +Tick
+ +DefaultFetch<Impl>::IcachePort::recvAtomic(PacketPtr pkt)
+ +{
+ +    panic("DefaultFetch doesn't expect recvAtomic callback!");
+ +    return curTick;
+ +}
+ +
+ +template<class Impl>
+ +void
+ +DefaultFetch<Impl>::IcachePort::recvFunctional(PacketPtr pkt)
+ +{
+ +    panic("DefaultFetch doesn't expect recvFunctional callback!");
+ +}
+ +
+ +template<class Impl>
+ +void
+ +DefaultFetch<Impl>::IcachePort::recvStatusChange(Status status)
+ +{
+ +    if (status == RangeChange)
+ +        return;
+ +
+ +    panic("DefaultFetch doesn't expect recvStatusChange callback!");
+ +}
+ +
+ +template<class Impl>
+ +bool
+ +DefaultFetch<Impl>::IcachePort::recvTiming(Packet *pkt)
+ +{
+ +    fetch->processCacheCompletion(pkt);
+ +    return true;
+ +}
+ +
+ +template<class Impl>
+ +void
+ +DefaultFetch<Impl>::IcachePort::recvRetry()
+ +{
+ +    fetch->recvRetry();
+ +}
+ +
+ +template<class Impl>
+ +DefaultFetch<Impl>::DefaultFetch(Params *params)
+ +    : mem(params->mem),
+ +      branchPred(params),
+ +      decodeToFetchDelay(params->decodeToFetchDelay),
+ +      renameToFetchDelay(params->renameToFetchDelay),
+ +      iewToFetchDelay(params->iewToFetchDelay),
+ +      commitToFetchDelay(params->commitToFetchDelay),
+ +      fetchWidth(params->fetchWidth),
+ +      cacheBlocked(false),
+ +      retryPkt(NULL),
+ +      retryTid(-1),
+ +      numThreads(params->numberOfThreads),
+ +      numFetchingThreads(params->smtNumFetchingThreads),
+ +      interruptPending(false),
+ +      drainPending(false),
+ +      switchedOut(false)
+ +{
+ +    if (numThreads > Impl::MaxThreads)
+ +        fatal("numThreads is not a valid value\n");
+ +
+ +    // Set fetch stage's status to inactive.
+ +    _status = Inactive;
+ +
+ +    std::string policy = params->smtFetchPolicy;
+ +
+ +    // Convert string to lowercase
+ +    std::transform(policy.begin(), policy.end(), policy.begin(),
+ +                   (int(*)(int)) tolower);
+ +
+ +    // Figure out fetch policy
+ +    if (policy == "singlethread") {
+ +        fetchPolicy = SingleThread;
+ +        if (numThreads > 1)
+ +            panic("Invalid Fetch Policy for a SMT workload.");
+ +    } else if (policy == "roundrobin") {
+ +        fetchPolicy = RoundRobin;
+ +        DPRINTF(Fetch, "Fetch policy set to Round Robin\n");
+ +    } else if (policy == "branch") {
+ +        fetchPolicy = Branch;
+ +        DPRINTF(Fetch, "Fetch policy set to Branch Count\n");
+ +    } else if (policy == "iqcount") {
+ +        fetchPolicy = IQ;
+ +        DPRINTF(Fetch, "Fetch policy set to IQ count\n");
+ +    } else if (policy == "lsqcount") {
+ +        fetchPolicy = LSQ;
+ +        DPRINTF(Fetch, "Fetch policy set to LSQ count\n");
+ +    } else {
+ +        fatal("Invalid Fetch Policy. Options Are: {SingleThread,"
+ +              " RoundRobin,LSQcount,IQcount}\n");
+ +    }
+ +
+ +    // Size of cache block.
+ +    cacheBlkSize = 64;
+ +
+ +    // Create mask to get rid of offset bits.
+ +    cacheBlkMask = (cacheBlkSize - 1);
+ +
+ +    for (int tid=0; tid < numThreads; tid++) {
+ +
+ +        fetchStatus[tid] = Running;
+ +
+ +        priorityList.push_back(tid);
+ +
+ +        memReq[tid] = NULL;
+ +
+ +        // Create space to store a cache line.
+ +        cacheData[tid] = new uint8_t[cacheBlkSize];
+ +        cacheDataPC[tid] = 0;
+ +        cacheDataValid[tid] = false;
+ +
+ +        delaySlotInfo[tid].branchSeqNum = -1;
+ +        delaySlotInfo[tid].numInsts = 0;
+ +        delaySlotInfo[tid].targetAddr = 0;
+ +        delaySlotInfo[tid].targetReady = false;
+ +
+ +        stalls[tid].decode = false;
+ +        stalls[tid].rename = false;
+ +        stalls[tid].iew = false;
+ +        stalls[tid].commit = false;
+ +    }
+ +
+ +    // Get the size of an instruction.
+ +    instSize = sizeof(TheISA::MachInst);
+ +}
+ +
+ +template <class Impl>
+ +std::string
+ +DefaultFetch<Impl>::name() const
+ +{
+ +    return cpu->name() + ".fetch";
+ +}
+ +
+ +template <class Impl>
+ +void
+ +DefaultFetch<Impl>::regStats()
+ +{
+ +    icacheStallCycles
+ +        .name(name() + ".icacheStallCycles")
+ +        .desc("Number of cycles fetch is stalled on an Icache miss")
+ +        .prereq(icacheStallCycles);
+ +
+ +    fetchedInsts
+ +        .name(name() + ".Insts")
+ +        .desc("Number of instructions fetch has processed")
+ +        .prereq(fetchedInsts);
+ +
+ +    fetchedBranches
+ +        .name(name() + ".Branches")
+ +        .desc("Number of branches that fetch encountered")
+ +        .prereq(fetchedBranches);
+ +
+ +    predictedBranches
+ +        .name(name() + ".predictedBranches")
+ +        .desc("Number of branches that fetch has predicted taken")
+ +        .prereq(predictedBranches);
+ +
+ +    fetchCycles
+ +        .name(name() + ".Cycles")
+ +        .desc("Number of cycles fetch has run and was not squashing or"
+ +              " blocked")
+ +        .prereq(fetchCycles);
+ +
+ +    fetchSquashCycles
+ +        .name(name() + ".SquashCycles")
+ +        .desc("Number of cycles fetch has spent squashing")
+ +        .prereq(fetchSquashCycles);
+ +
+ +    fetchIdleCycles
+ +        .name(name() + ".IdleCycles")
+ +        .desc("Number of cycles fetch was idle")
+ +        .prereq(fetchIdleCycles);
+ +
+ +    fetchBlockedCycles
+ +        .name(name() + ".BlockedCycles")
+ +        .desc("Number of cycles fetch has spent blocked")
+ +        .prereq(fetchBlockedCycles);
+ +
+ +    fetchedCacheLines
+ +        .name(name() + ".CacheLines")
+ +        .desc("Number of cache lines fetched")
+ +        .prereq(fetchedCacheLines);
+ +
+ +    fetchMiscStallCycles
+ +        .name(name() + ".MiscStallCycles")
+ +        .desc("Number of cycles fetch has spent waiting on interrupts, or "
+ +              "bad addresses, or out of MSHRs")
+ +        .prereq(fetchMiscStallCycles);
+ +
+ +    fetchIcacheSquashes
+ +        .name(name() + ".IcacheSquashes")
+ +        .desc("Number of outstanding Icache misses that were squashed")
+ +        .prereq(fetchIcacheSquashes);
+ +
+ +    fetchNisnDist
+ +        .init(/* base value */ 0,
+ +              /* last value */ fetchWidth,
+ +              /* bucket size */ 1)
+ +        .name(name() + ".rateDist")
+ +        .desc("Number of instructions fetched each cycle (Total)")
+ +        .flags(Stats::pdf);
+ +
+ +    idleRate
+ +        .name(name() + ".idleRate")
+ +        .desc("Percent of cycles fetch was idle")
+ +        .prereq(idleRate);
+ +    idleRate = fetchIdleCycles * 100 / cpu->numCycles;
+ +
+ +    branchRate
+ +        .name(name() + ".branchRate")
+ +        .desc("Number of branch fetches per cycle")
+ +        .flags(Stats::total);
+ +    branchRate = fetchedBranches / cpu->numCycles;
+ +
+ +    fetchRate
+ +        .name(name() + ".rate")
+ +        .desc("Number of inst fetches per cycle")
+ +        .flags(Stats::total);
+ +    fetchRate = fetchedInsts / cpu->numCycles;
+ +
+ +    branchPred.regStats();
+ +}
+ +
+ +template<class Impl>
+ +void
+ +DefaultFetch<Impl>::setCPU(O3CPU *cpu_ptr)
+ +{
+ +    DPRINTF(Fetch, "Setting the CPU pointer.\n");
+ +    cpu = cpu_ptr;
+ +
+ +    // Name is finally available, so create the port.
+ +    icachePort = new IcachePort(this);
+ +
+ +#if USE_CHECKER
+ +    if (cpu->checker) {
+ +        cpu->checker->setIcachePort(icachePort);
+ +    }
+ +#endif
+ +
+ +    // Schedule fetch to get the correct PC from the CPU
+ +    // scheduleFetchStartupEvent(1);
+ +
+ +    // Fetch needs to start fetching instructions at the very beginning,
+ +    // so it must start up in active state.
+ +    switchToActive();
+ +}
+ +
+ +template<class Impl>
+ +void
+ +DefaultFetch<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *time_buffer)
+ +{
+ +    DPRINTF(Fetch, "Setting the time buffer pointer.\n");
+ +    timeBuffer = time_buffer;
+ +
+ +    // Create wires to get information from proper places in time buffer.
+ +    fromDecode = timeBuffer->getWire(-decodeToFetchDelay);
+ +    fromRename = timeBuffer->getWire(-renameToFetchDelay);
+ +    fromIEW = timeBuffer->getWire(-iewToFetchDelay);
+ +    fromCommit = timeBuffer->getWire(-commitToFetchDelay);
+ +}
+ +
+ +template<class Impl>
+ +void
+ +DefaultFetch<Impl>::setActiveThreads(std::list<unsigned> *at_ptr)
+ +{
+ +    DPRINTF(Fetch, "Setting active threads list pointer.\n");
+ +    activeThreads = at_ptr;
+ +}
+ +
+ +template<class Impl>
+ +void
+ +DefaultFetch<Impl>::setFetchQueue(TimeBuffer<FetchStruct> *fq_ptr)
+ +{
+ +    DPRINTF(Fetch, "Setting the fetch queue pointer.\n");
+ +    fetchQueue = fq_ptr;
+ +
+ +    // Create wire to write information to proper place in fetch queue.
+ +    toDecode = fetchQueue->getWire(0);
+ +}
+ +
+ +template<class Impl>
+ +void
+ +DefaultFetch<Impl>::initStage()
+ +{
+ +    // Setup PC and nextPC with initial state.
+ +    for (int tid = 0; tid < numThreads; tid++) {
+ +        PC[tid] = cpu->readPC(tid);
+ +        nextPC[tid] = cpu->readNextPC(tid);
+ +#if ISA_HAS_DELAY_SLOT
+ +        nextNPC[tid] = cpu->readNextNPC(tid);
+ +#endif
+ +    }
+ +}
+ +
+ +template<class Impl>
+ +void
+ +DefaultFetch<Impl>::processCacheCompletion(PacketPtr pkt)
+ +{
+ +    unsigned tid = pkt->req->getThreadNum();
+ +
+ +    DPRINTF(Fetch, "[tid:%u] Waking up from cache miss.\n",tid);
+ +
+ +    // Only change the status if it's still waiting on the icache access
+ +    // to return.
+ +    if (fetchStatus[tid] != IcacheWaitResponse ||
+ +        pkt->req != memReq[tid] ||
+ +        isSwitchedOut()) {
+ +        ++fetchIcacheSquashes;
+ +        delete pkt->req;
+ +        delete pkt;
+ +        return;
+ +    }
+ +
+ +    memcpy(cacheData[tid], pkt->getPtr<uint8_t *>(), cacheBlkSize);
+ +    cacheDataValid[tid] = true;
+ +
+ +    if (!drainPending) {
+ +        // Wake up the CPU (if it went to sleep and was waiting on
+ +        // this completion event).
+ +        cpu->wakeCPU();
+ +
+ +        DPRINTF(Activity, "[tid:%u] Activating fetch due to cache completion\n",
+ +                tid);
+ +
+ +        switchToActive();
+ +    }
+ +
+ +    // Only switch to IcacheAccessComplete if we're not stalled as well.
+ +    if (checkStall(tid)) {
+ +        fetchStatus[tid] = Blocked;
+ +    } else {
+ +        fetchStatus[tid] = IcacheAccessComplete;
+ +    }
+ +
+ +    // Reset the mem req to NULL.
+ +    delete pkt->req;
+ +    delete pkt;
+ +    memReq[tid] = NULL;
+ +}
+ +
+ +template <class Impl>
+ +bool
+ +DefaultFetch<Impl>::drain()
+ +{
+ +    // Fetch is ready to drain at any time.
+ +    cpu->signalDrained();
+ +    drainPending = true;
+ +    return true;
+ +}
+ +
+ +template <class Impl>
+ +void
+ +DefaultFetch<Impl>::resume()
+ +{
+ +    drainPending = false;
+ +}
+ +
+ +template <class Impl>
+ +void
+ +DefaultFetch<Impl>::switchOut()
+ +{
+ +    switchedOut = true;
+ +    // Branch predictor needs to have its state cleared.
+ +    branchPred.switchOut();
+ +}
+ +
+ +template <class Impl>
+ +void
+ +DefaultFetch<Impl>::takeOverFrom()
+ +{
+ +    // Reset all state
+ +    for (int i = 0; i < Impl::MaxThreads; ++i) {
+ +        stalls[i].decode = 0;
+ +        stalls[i].rename = 0;
+ +        stalls[i].iew = 0;
+ +        stalls[i].commit = 0;
+ +        PC[i] = cpu->readPC(i);
+ +        nextPC[i] = cpu->readNextPC(i);
+ +#if ISA_HAS_DELAY_SLOT
+ +        nextNPC[i] = cpu->readNextNPC(i);
+ +        delaySlotInfo[i].branchSeqNum = -1;
+ +        delaySlotInfo[i].numInsts = 0;
+ +        delaySlotInfo[i].targetAddr = 0;
+ +        delaySlotInfo[i].targetReady = false;
+ +#endif
+ +        fetchStatus[i] = Running;
+ +    }
+ +    numInst = 0;
+ +    wroteToTimeBuffer = false;
+ +    _status = Inactive;
+ +    switchedOut = false;
++    interruptPending = false;
+ +    branchPred.takeOverFrom();
+ +}
+ +
+ +template <class Impl>
+ +void
+ +DefaultFetch<Impl>::wakeFromQuiesce()
+ +{
+ +    DPRINTF(Fetch, "Waking up from quiesce\n");
+ +    // Hopefully this is safe
+ +    // @todo: Allow other threads to wake from quiesce.
+ +    fetchStatus[0] = Running;
+ +}
+ +
+ +template <class Impl>
+ +inline void
+ +DefaultFetch<Impl>::switchToActive()
+ +{
+ +    if (_status == Inactive) {
+ +        DPRINTF(Activity, "Activating stage.\n");
+ +
+ +        cpu->activateStage(O3CPU::FetchIdx);
+ +
+ +        _status = Active;
+ +    }
+ +}
+ +
+ +template <class Impl>
+ +inline void
+ +DefaultFetch<Impl>::switchToInactive()
+ +{
+ +    if (_status == Active) {
+ +        DPRINTF(Activity, "Deactivating stage.\n");
+ +
+ +        cpu->deactivateStage(O3CPU::FetchIdx);
+ +
+ +        _status = Inactive;
+ +    }
+ +}
+ +
+ +template <class Impl>
+ +bool
+ +DefaultFetch<Impl>::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC,
+ +                                          Addr &next_NPC)
+ +{
+ +    // Do branch prediction check here.
+ +    // A bit of a misnomer...next_PC is actually the current PC until
+ +    // this function updates it.
+ +    bool predict_taken;
+ +
+ +    if (!inst->isControl()) {
+ +#if ISA_HAS_DELAY_SLOT
+ +        Addr cur_PC = next_PC;
+ +        next_PC  = cur_PC + instSize;      //next_NPC;
+ +        next_NPC = cur_PC + (2 * instSize);//next_NPC + instSize;
+ +        inst->setPredTarg(next_NPC);
+ +#else
+ +        next_PC = next_PC + instSize;
+ +        inst->setPredTarg(next_PC);
+ +#endif
+ +        return false;
+ +    }
+ +
+ +    int tid = inst->threadNumber;
+ +#if ISA_HAS_DELAY_SLOT
+ +    Addr pred_PC = next_PC;
+ +    predict_taken = branchPred.predict(inst, pred_PC, tid);
+ +
+ +    if (predict_taken) {
+ +        DPRINTF(Fetch, "[tid:%i]: Branch predicted to be true.\n", tid);
+ +    } else {
+ +        DPRINTF(Fetch, "[tid:%i]: Branch predicted to be false.\n", tid);
+ +    }
+ +
+ +    if (predict_taken) {
+ +        next_PC = next_NPC;
+ +        next_NPC = pred_PC;
+ +
+ +        // Update delay slot info
+ +        ++delaySlotInfo[tid].numInsts;
+ +        delaySlotInfo[tid].targetAddr = pred_PC;
+ +        DPRINTF(Fetch, "[tid:%i]: %i delay slot inst(s) to process.\n", tid,
+ +                delaySlotInfo[tid].numInsts);
+ +    } else { // !predict_taken
+ +        if (inst->isCondDelaySlot()) {
+ +            next_PC = pred_PC;
+ +            // The delay slot is skipped here if there is on
+ +            // prediction
+ +        } else {
+ +            next_PC = next_NPC;
+ +            // No need to declare a delay slot here since
+ +            // there is no for the pred. target to jump
+ +        }
+ +
+ +        next_NPC = next_NPC + instSize;
+ +    }
+ +#else
+ +    predict_taken = branchPred.predict(inst, next_PC, tid);
+ +#endif
+ +
+ +    ++fetchedBranches;
+ +
+ +    if (predict_taken) {
+ +        ++predictedBranches;
+ +    }
+ +
+ +    return predict_taken;
+ +}
+ +
+ +template <class Impl>
+ +bool
+ +DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid)
+ +{
+ +    Fault fault = NoFault;
+ +
+ +#if FULL_SYSTEM
+ +    // Flag to say whether or not address is physical addr.
+ +    unsigned flags = cpu->inPalMode(fetch_PC) ? PHYSICAL : 0;
+ +#else
+ +    unsigned flags = 0;
+ +#endif // FULL_SYSTEM
+ +
-                 warn("cycle %lli: Quiesce instruction encountered, halting fetch!",
-                      curTick);
++    if (cacheBlocked || isSwitchedOut() || (interruptPending && flags == 0)) {
+ +        // Hold off fetch from getting new instructions when:
+ +        // Cache is blocked, or
+ +        // while an interrupt is pending and we're not in PAL mode, or
+ +        // fetch is switched out.
+ +        return false;
+ +    }
+ +
+ +    // Align the fetch PC so it's at the start of a cache block.
+ +    fetch_PC = icacheBlockAlignPC(fetch_PC);
+ +
+ +    // If we've already got the block, no need to try to fetch it again.
+ +    if (cacheDataValid[tid] && fetch_PC == cacheDataPC[tid]) {
+ +        return true;
+ +    }
+ +
+ +    // Setup the memReq to do a read of the first instruction's address.
+ +    // Set the appropriate read size and flags as well.
+ +    // Build request here.
+ +    RequestPtr mem_req = new Request(tid, fetch_PC, cacheBlkSize, flags,
+ +                                     fetch_PC, cpu->readCpuId(), tid);
+ +
+ +    memReq[tid] = mem_req;
+ +
+ +    // Translate the instruction request.
+ +    fault = cpu->translateInstReq(mem_req, cpu->thread[tid]);
+ +
+ +    // In the case of faults, the fetch stage may need to stall and wait
+ +    // for the ITB miss to be handled.
+ +
+ +    // If translation was successful, attempt to read the first
+ +    // instruction.
+ +    if (fault == NoFault) {
+ +#if 0
+ +        if (cpu->system->memctrl->badaddr(memReq[tid]->paddr) ||
+ +            memReq[tid]->flags & UNCACHEABLE) {
+ +            DPRINTF(Fetch, "Fetch: Bad address %#x (hopefully on a "
+ +                    "misspeculating path)!",
+ +                    memReq[tid]->paddr);
+ +            ret_fault = TheISA::genMachineCheckFault();
+ +            return false;
+ +        }
+ +#endif
+ +
+ +        // Build packet here.
+ +        PacketPtr data_pkt = new Packet(mem_req,
+ +                                        Packet::ReadReq, Packet::Broadcast);
+ +        data_pkt->dataDynamicArray(new uint8_t[cacheBlkSize]);
+ +
+ +        cacheDataPC[tid] = fetch_PC;
+ +        cacheDataValid[tid] = false;
+ +
+ +        DPRINTF(Fetch, "Fetch: Doing instruction read.\n");
+ +
+ +        fetchedCacheLines++;
+ +
+ +        // Now do the timing access to see whether or not the instruction
+ +        // exists within the cache.
+ +        if (!icachePort->sendTiming(data_pkt)) {
+ +            assert(retryPkt == NULL);
+ +            assert(retryTid == -1);
+ +            DPRINTF(Fetch, "[tid:%i] Out of MSHRs!\n", tid);
+ +            fetchStatus[tid] = IcacheWaitRetry;
+ +            retryPkt = data_pkt;
+ +            retryTid = tid;
+ +            cacheBlocked = true;
+ +            return false;
+ +        }
+ +
+ +        DPRINTF(Fetch, "[tid:%i]: Doing cache access.\n", tid);
+ +
+ +        lastIcacheStall[tid] = curTick;
+ +
+ +        DPRINTF(Activity, "[tid:%i]: Activity: Waiting on I-cache "
+ +                "response.\n", tid);
+ +
+ +        fetchStatus[tid] = IcacheWaitResponse;
+ +    } else {
+ +        delete mem_req;
+ +        memReq[tid] = NULL;
+ +    }
+ +
+ +    ret_fault = fault;
+ +    return true;
+ +}
+ +
+ +template <class Impl>
+ +inline void
+ +DefaultFetch<Impl>::doSquash(const Addr &new_PC, unsigned tid)
+ +{
+ +    DPRINTF(Fetch, "[tid:%i]: Squashing, setting PC to: %#x.\n",
+ +            tid, new_PC);
+ +
+ +    PC[tid] = new_PC;
+ +    nextPC[tid] = new_PC + instSize;
+ +    nextNPC[tid] = new_PC + (2 * instSize);
+ +
+ +    // Clear the icache miss if it's outstanding.
+ +    if (fetchStatus[tid] == IcacheWaitResponse) {
+ +        DPRINTF(Fetch, "[tid:%i]: Squashing outstanding Icache miss.\n",
+ +                tid);
+ +        memReq[tid] = NULL;
+ +    }
+ +
+ +    // Get rid of the retrying packet if it was from this thread.
+ +    if (retryTid == tid) {
+ +        assert(cacheBlocked);
+ +        cacheBlocked = false;
+ +        retryTid = -1;
+ +        delete retryPkt->req;
+ +        delete retryPkt;
+ +        retryPkt = NULL;
+ +    }
+ +
+ +    fetchStatus[tid] = Squashing;
+ +
+ +    ++fetchSquashCycles;
+ +}
+ +
+ +template<class Impl>
+ +void
+ +DefaultFetch<Impl>::squashFromDecode(const Addr &new_PC,
+ +                                     const InstSeqNum &seq_num,
+ +                                     unsigned tid)
+ +{
+ +    DPRINTF(Fetch, "[tid:%i]: Squashing from decode.\n",tid);
+ +
+ +    doSquash(new_PC, tid);
+ +
+ +#if ISA_HAS_DELAY_SLOT
+ +    if (seq_num <=  delaySlotInfo[tid].branchSeqNum) {
+ +        delaySlotInfo[tid].numInsts = 0;
+ +        delaySlotInfo[tid].targetAddr = 0;
+ +        delaySlotInfo[tid].targetReady = false;
+ +    }
+ +#endif
+ +
+ +    // Tell the CPU to remove any instructions that are in flight between
+ +    // fetch and decode.
+ +    cpu->removeInstsUntil(seq_num, tid);
+ +}
+ +
+ +template<class Impl>
+ +bool
+ +DefaultFetch<Impl>::checkStall(unsigned tid) const
+ +{
+ +    bool ret_val = false;
+ +
+ +    if (cpu->contextSwitch) {
+ +        DPRINTF(Fetch,"[tid:%i]: Stalling for a context switch.\n",tid);
+ +        ret_val = true;
+ +    } else if (stalls[tid].decode) {
+ +        DPRINTF(Fetch,"[tid:%i]: Stall from Decode stage detected.\n",tid);
+ +        ret_val = true;
+ +    } else if (stalls[tid].rename) {
+ +        DPRINTF(Fetch,"[tid:%i]: Stall from Rename stage detected.\n",tid);
+ +        ret_val = true;
+ +    } else if (stalls[tid].iew) {
+ +        DPRINTF(Fetch,"[tid:%i]: Stall from IEW stage detected.\n",tid);
+ +        ret_val = true;
+ +    } else if (stalls[tid].commit) {
+ +        DPRINTF(Fetch,"[tid:%i]: Stall from Commit stage detected.\n",tid);
+ +        ret_val = true;
+ +    }
+ +
+ +    return ret_val;
+ +}
+ +
+ +template<class Impl>
+ +typename DefaultFetch<Impl>::FetchStatus
+ +DefaultFetch<Impl>::updateFetchStatus()
+ +{
+ +    //Check Running
+ +    std::list<unsigned>::iterator threads = (*activeThreads).begin();
+ +
+ +    while (threads != (*activeThreads).end()) {
+ +
+ +        unsigned tid = *threads++;
+ +
+ +        if (fetchStatus[tid] == Running ||
+ +            fetchStatus[tid] == Squashing ||
+ +            fetchStatus[tid] == IcacheAccessComplete) {
+ +
+ +            if (_status == Inactive) {
+ +                DPRINTF(Activity, "[tid:%i]: Activating stage.\n",tid);
+ +
+ +                if (fetchStatus[tid] == IcacheAccessComplete) {
+ +                    DPRINTF(Activity, "[tid:%i]: Activating fetch due to cache"
+ +                            "completion\n",tid);
+ +                }
+ +
+ +                cpu->activateStage(O3CPU::FetchIdx);
+ +            }
+ +
+ +            return Active;
+ +        }
+ +    }
+ +
+ +    // Stage is switching from active to inactive, notify CPU of it.
+ +    if (_status == Active) {
+ +        DPRINTF(Activity, "Deactivating stage.\n");
+ +
+ +        cpu->deactivateStage(O3CPU::FetchIdx);
+ +    }
+ +
+ +    return Inactive;
+ +}
+ +
+ +template <class Impl>
+ +void
+ +DefaultFetch<Impl>::squash(const Addr &new_PC, const InstSeqNum &seq_num,
+ +                           bool squash_delay_slot, unsigned tid)
+ +{
+ +    DPRINTF(Fetch, "[tid:%u]: Squash from commit.\n",tid);
+ +
+ +    doSquash(new_PC, tid);
+ +
+ +#if ISA_HAS_DELAY_SLOT
+ +    if (seq_num <=  delaySlotInfo[tid].branchSeqNum) {
+ +        delaySlotInfo[tid].numInsts = 0;
+ +        delaySlotInfo[tid].targetAddr = 0;
+ +        delaySlotInfo[tid].targetReady = false;
+ +    }
+ +
+ +    // Tell the CPU to remove any instructions that are not in the ROB.
+ +    cpu->removeInstsNotInROB(tid, squash_delay_slot, seq_num);
+ +#else
+ +    // Tell the CPU to remove any instructions that are not in the ROB.
+ +    cpu->removeInstsNotInROB(tid, true, 0);
+ +#endif
+ +}
+ +
+ +template <class Impl>
+ +void
+ +DefaultFetch<Impl>::tick()
+ +{
+ +    std::list<unsigned>::iterator threads = (*activeThreads).begin();
+ +    bool status_change = false;
+ +
+ +    wroteToTimeBuffer = false;
+ +
+ +    while (threads != (*activeThreads).end()) {
+ +        unsigned tid = *threads++;
+ +
+ +        // Check the signals for each thread to determine the proper status
+ +        // for each thread.
+ +        bool updated_status = checkSignalsAndUpdate(tid);
+ +        status_change =  status_change || updated_status;
+ +    }
+ +
+ +    DPRINTF(Fetch, "Running stage.\n");
+ +
+ +    // Reset the number of the instruction we're fetching.
+ +    numInst = 0;
+ +
+ +#if FULL_SYSTEM
+ +    if (fromCommit->commitInfo[0].interruptPending) {
+ +        interruptPending = true;
+ +    }
+ +
+ +    if (fromCommit->commitInfo[0].clearInterrupt) {
+ +        interruptPending = false;
+ +    }
+ +#endif
+ +
+ +    for (threadFetched = 0; threadFetched < numFetchingThreads;
+ +         threadFetched++) {
+ +        // Fetch each of the actively fetching threads.
+ +        fetch(status_change);
+ +    }
+ +
+ +    // Record number of instructions fetched this cycle for distribution.
+ +    fetchNisnDist.sample(numInst);
+ +
+ +    if (status_change) {
+ +        // Change the fetch stage status if there was a status change.
+ +        _status = updateFetchStatus();
+ +    }
+ +
+ +    // If there was activity this cycle, inform the CPU of it.
+ +    if (wroteToTimeBuffer || cpu->contextSwitch) {
+ +        DPRINTF(Activity, "Activity this cycle.\n");
+ +
+ +        cpu->activityThisCycle();
+ +    }
+ +}
+ +
+ +template <class Impl>
+ +bool
+ +DefaultFetch<Impl>::checkSignalsAndUpdate(unsigned tid)
+ +{
+ +    // Update the per thread stall statuses.
+ +    if (fromDecode->decodeBlock[tid]) {
+ +        stalls[tid].decode = true;
+ +    }
+ +
+ +    if (fromDecode->decodeUnblock[tid]) {
+ +        assert(stalls[tid].decode);
+ +        assert(!fromDecode->decodeBlock[tid]);
+ +        stalls[tid].decode = false;
+ +    }
+ +
+ +    if (fromRename->renameBlock[tid]) {
+ +        stalls[tid].rename = true;
+ +    }
+ +
+ +    if (fromRename->renameUnblock[tid]) {
+ +        assert(stalls[tid].rename);
+ +        assert(!fromRename->renameBlock[tid]);
+ +        stalls[tid].rename = false;
+ +    }
+ +
+ +    if (fromIEW->iewBlock[tid]) {
+ +        stalls[tid].iew = true;
+ +    }
+ +
+ +    if (fromIEW->iewUnblock[tid]) {
+ +        assert(stalls[tid].iew);
+ +        assert(!fromIEW->iewBlock[tid]);
+ +        stalls[tid].iew = false;
+ +    }
+ +
+ +    if (fromCommit->commitBlock[tid]) {
+ +        stalls[tid].commit = true;
+ +    }
+ +
+ +    if (fromCommit->commitUnblock[tid]) {
+ +        assert(stalls[tid].commit);
+ +        assert(!fromCommit->commitBlock[tid]);
+ +        stalls[tid].commit = false;
+ +    }
+ +
+ +    // Check squash signals from commit.
+ +    if (fromCommit->commitInfo[tid].squash) {
+ +
+ +        DPRINTF(Fetch, "[tid:%u]: Squashing instructions due to squash "
+ +                "from commit.\n",tid);
+ +
+ +#if ISA_HAS_DELAY_SLOT
+ +    InstSeqNum doneSeqNum = fromCommit->commitInfo[tid].bdelayDoneSeqNum;
+ +#else
+ +    InstSeqNum doneSeqNum = fromCommit->commitInfo[tid].doneSeqNum;
+ +#endif
+ +        // In any case, squash.
+ +        squash(fromCommit->commitInfo[tid].nextPC,
+ +               doneSeqNum,
+ +               fromCommit->commitInfo[tid].squashDelaySlot,
+ +               tid);
+ +
+ +        // Also check if there's a mispredict that happened.
+ +        if (fromCommit->commitInfo[tid].branchMispredict) {
+ +            branchPred.squash(fromCommit->commitInfo[tid].doneSeqNum,
+ +                              fromCommit->commitInfo[tid].nextPC,
+ +                              fromCommit->commitInfo[tid].branchTaken,
+ +                              tid);
+ +        } else {
+ +            branchPred.squash(fromCommit->commitInfo[tid].doneSeqNum,
+ +                              tid);
+ +        }
+ +
+ +        return true;
+ +    } else if (fromCommit->commitInfo[tid].doneSeqNum) {
+ +        // Update the branch predictor if it wasn't a squashed instruction
+ +        // that was broadcasted.
+ +        branchPred.update(fromCommit->commitInfo[tid].doneSeqNum, tid);
+ +    }
+ +
+ +    // Check ROB squash signals from commit.
+ +    if (fromCommit->commitInfo[tid].robSquashing) {
+ +        DPRINTF(Fetch, "[tid:%u]: ROB is still squashing.\n", tid);
+ +
+ +        // Continue to squash.
+ +        fetchStatus[tid] = Squashing;
+ +
+ +        return true;
+ +    }
+ +
+ +    // Check squash signals from decode.
+ +    if (fromDecode->decodeInfo[tid].squash) {
+ +        DPRINTF(Fetch, "[tid:%u]: Squashing instructions due to squash "
+ +                "from decode.\n",tid);
+ +
+ +        // Update the branch predictor.
+ +        if (fromDecode->decodeInfo[tid].branchMispredict) {
+ +            branchPred.squash(fromDecode->decodeInfo[tid].doneSeqNum,
+ +                              fromDecode->decodeInfo[tid].nextPC,
+ +                              fromDecode->decodeInfo[tid].branchTaken,
+ +                              tid);
+ +        } else {
+ +            branchPred.squash(fromDecode->decodeInfo[tid].doneSeqNum,
+ +                              tid);
+ +        }
+ +
+ +        if (fetchStatus[tid] != Squashing) {
+ +
+ +#if ISA_HAS_DELAY_SLOT
+ +            InstSeqNum doneSeqNum = fromDecode->decodeInfo[tid].bdelayDoneSeqNum;
+ +#else
+ +            InstSeqNum doneSeqNum = fromDecode->decodeInfo[tid].doneSeqNum;
+ +#endif
+ +            // Squash unless we're already squashing
+ +            squashFromDecode(fromDecode->decodeInfo[tid].nextPC,
+ +                             doneSeqNum,
+ +                             tid);
+ +
+ +            return true;
+ +        }
+ +    }
+ +
+ +    if (checkStall(tid) && fetchStatus[tid] != IcacheWaitResponse) {
+ +        DPRINTF(Fetch, "[tid:%i]: Setting to blocked\n",tid);
+ +
+ +        fetchStatus[tid] = Blocked;
+ +
+ +        return true;
+ +    }
+ +
+ +    if (fetchStatus[tid] == Blocked ||
+ +        fetchStatus[tid] == Squashing) {
+ +        // Switch status to running if fetch isn't being told to block or
+ +        // squash this cycle.
+ +        DPRINTF(Fetch, "[tid:%i]: Done squashing, switching to running.\n",
+ +                tid);
+ +
+ +        fetchStatus[tid] = Running;
+ +
+ +        return true;
+ +    }
+ +
+ +    // If we've reached this point, we have not gotten any signals that
+ +    // cause fetch to change its status.  Fetch remains the same as before.
+ +    return false;
+ +}
+ +
+ +template<class Impl>
+ +void
+ +DefaultFetch<Impl>::fetch(bool &status_change)
+ +{
+ +    //////////////////////////////////////////
+ +    // Start actual fetch
+ +    //////////////////////////////////////////
+ +    int tid = getFetchingThread(fetchPolicy);
+ +
+ +    if (tid == -1 || drainPending) {
+ +        DPRINTF(Fetch,"There are no more threads available to fetch from.\n");
+ +
+ +        // Breaks looping condition in tick()
+ +        threadFetched = numFetchingThreads;
+ +        return;
+ +    }
+ +
+ +    DPRINTF(Fetch, "Attempting to fetch from [tid:%i]\n", tid);
+ +
+ +    // The current PC.
+ +    Addr &fetch_PC = PC[tid];
+ +
+ +    // Fault code for memory access.
+ +    Fault fault = NoFault;
+ +
+ +    // If returning from the delay of a cache miss, then update the status
+ +    // to running, otherwise do the cache access.  Possibly move this up
+ +    // to tick() function.
+ +    if (fetchStatus[tid] == IcacheAccessComplete) {
+ +        DPRINTF(Fetch, "[tid:%i]: Icache miss is complete.\n",
+ +                tid);
+ +
+ +        fetchStatus[tid] = Running;
+ +        status_change = true;
+ +    } else if (fetchStatus[tid] == Running) {
+ +        DPRINTF(Fetch, "[tid:%i]: Attempting to translate and read "
+ +                "instruction, starting at PC %08p.\n",
+ +                tid, fetch_PC);
+ +
+ +        bool fetch_success = fetchCacheLine(fetch_PC, fault, tid);
+ +        if (!fetch_success) {
+ +            if (cacheBlocked) {
+ +                ++icacheStallCycles;
+ +            } else {
+ +                ++fetchMiscStallCycles;
+ +            }
+ +            return;
+ +        }
+ +    } else {
+ +        if (fetchStatus[tid] == Idle) {
+ +            ++fetchIdleCycles;
+ +        } else if (fetchStatus[tid] == Blocked) {
+ +            ++fetchBlockedCycles;
+ +        } else if (fetchStatus[tid] == Squashing) {
+ +            ++fetchSquashCycles;
+ +        } else if (fetchStatus[tid] == IcacheWaitResponse) {
+ +            ++icacheStallCycles;
+ +        }
+ +
+ +        // Status is Idle, Squashing, Blocked, or IcacheWaitResponse, so
+ +        // fetch should do nothing.
+ +        return;
+ +    }
+ +
+ +    ++fetchCycles;
+ +
+ +    // If we had a stall due to an icache miss, then return.
+ +    if (fetchStatus[tid] == IcacheWaitResponse) {
+ +        ++icacheStallCycles;
+ +        status_change = true;
+ +        return;
+ +    }
+ +
+ +    Addr next_PC = fetch_PC;
+ +    Addr next_NPC = next_PC + instSize;
+ +    InstSeqNum inst_seq;
+ +    MachInst inst;
+ +    ExtMachInst ext_inst;
+ +    // @todo: Fix this hack.
+ +    unsigned offset = (fetch_PC & cacheBlkMask) & ~3;
+ +
+ +    if (fault == NoFault) {
+ +        // If the read of the first instruction was successful, then grab the
+ +        // instructions from the rest of the cache line and put them into the
+ +        // queue heading to decode.
+ +
+ +        DPRINTF(Fetch, "[tid:%i]: Adding instructions to queue to "
+ +                "decode.\n",tid);
+ +
+ +        // Need to keep track of whether or not a predicted branch
+ +        // ended this fetch block.
+ +        bool predicted_branch = false;
+ +
+ +        // Need to keep track of whether or not a delay slot
+ +        // instruction has been fetched
+ +
+ +        for (;
+ +             offset < cacheBlkSize &&
+ +                 numInst < fetchWidth &&
+ +                 (!predicted_branch || delaySlotInfo[tid].numInsts > 0);
+ +             ++numInst) {
+ +
+ +            // Get a sequence number.
+ +            inst_seq = cpu->getAndIncrementInstSeq();
+ +
+ +            // Make sure this is a valid index.
+ +            assert(offset <= cacheBlkSize - instSize);
+ +
+ +            // Get the instruction from the array of the cache line.
+ +            inst = TheISA::gtoh(*reinterpret_cast<TheISA::MachInst *>
+ +                        (&cacheData[tid][offset]));
+ +
+ +            ext_inst = TheISA::makeExtMI(inst, fetch_PC);
+ +
+ +            // Create a new DynInst from the instruction fetched.
+ +            DynInstPtr instruction = new DynInst(ext_inst, fetch_PC,
+ +                                                 next_PC,
+ +                                                 inst_seq, cpu);
+ +            instruction->setTid(tid);
+ +
+ +            instruction->setASID(tid);
+ +
+ +            instruction->setThreadState(cpu->thread[tid]);
+ +
+ +            DPRINTF(Fetch, "[tid:%i]: Instruction PC %#x created "
+ +                    "[sn:%lli]\n",
+ +                    tid, instruction->readPC(), inst_seq);
+ +
+ +            DPRINTF(Fetch, "[tid:%i]: Instruction is: %s\n",
+ +                    tid, instruction->staticInst->disassemble(fetch_PC));
+ +
+ +            instruction->traceData =
+ +                Trace::getInstRecord(curTick, cpu->tcBase(tid),
+ +                                     instruction->staticInst,
+ +                                     instruction->readPC());
+ +
+ +            predicted_branch = lookupAndUpdateNextPC(instruction, next_PC,
+ +                                                     next_NPC);
+ +
+ +            // Add instruction to the CPU's list of instructions.
+ +            instruction->setInstListIt(cpu->addInst(instruction));
+ +
+ +            // Write the instruction to the first slot in the queue
+ +            // that heads to decode.
+ +            toDecode->insts[numInst] = instruction;
+ +
+ +            toDecode->size++;
+ +
+ +            // Increment stat of fetched instructions.
+ +            ++fetchedInsts;
+ +
+ +            // Move to the next instruction, unless we have a branch.
+ +            fetch_PC = next_PC;
+ +
+ +            if (instruction->isQuiesce()) {
-         warn("cycle %lli: fault (%s) detected @ PC %08p", curTick, fault->name(), PC[tid]);
++//                warn("%lli: Quiesce instruction encountered, halting fetch!",
++//                     curTick);
+ +                fetchStatus[tid] = QuiescePending;
+ +                ++numInst;
+ +                status_change = true;
+ +                break;
+ +            }
+ +
+ +            offset += instSize;
+ +
+ +#if ISA_HAS_DELAY_SLOT
+ +            if (predicted_branch) {
+ +                delaySlotInfo[tid].branchSeqNum = inst_seq;
+ +
+ +                DPRINTF(Fetch, "[tid:%i]: Delay slot branch set to [sn:%i]\n",
+ +                        tid, inst_seq);
+ +                continue;
+ +            } else if (delaySlotInfo[tid].numInsts > 0) {
+ +                --delaySlotInfo[tid].numInsts;
+ +
+ +                // It's OK to set PC to target of branch
+ +                if (delaySlotInfo[tid].numInsts == 0) {
+ +                    delaySlotInfo[tid].targetReady = true;
+ +
+ +                    // Break the looping condition
+ +                    predicted_branch = true;
+ +                }
+ +
+ +                DPRINTF(Fetch, "[tid:%i]: %i delay slot inst(s) left to"
+ +                        " process.\n", tid, delaySlotInfo[tid].numInsts);
+ +            }
+ +#endif
+ +        }
+ +
+ +        if (offset >= cacheBlkSize) {
+ +            DPRINTF(Fetch, "[tid:%i]: Done fetching, reached the end of cache "
+ +                    "block.\n", tid);
+ +        } else if (numInst >= fetchWidth) {
+ +            DPRINTF(Fetch, "[tid:%i]: Done fetching, reached fetch bandwidth "
+ +                    "for this cycle.\n", tid);
+ +        } else if (predicted_branch && delaySlotInfo[tid].numInsts <= 0) {
+ +            DPRINTF(Fetch, "[tid:%i]: Done fetching, predicted branch "
+ +                    "instruction encountered.\n", tid);
+ +        }
+ +    }
+ +
+ +    if (numInst > 0) {
+ +        wroteToTimeBuffer = true;
+ +    }
+ +
+ +    // Now that fetching is completed, update the PC to signify what the next
+ +    // cycle will be.
+ +    if (fault == NoFault) {
+ +#if ISA_HAS_DELAY_SLOT
+ +        if (delaySlotInfo[tid].targetReady &&
+ +            delaySlotInfo[tid].numInsts == 0) {
+ +            // Set PC to target
+ +            PC[tid] = delaySlotInfo[tid].targetAddr; //next_PC
+ +            nextPC[tid] = next_PC + instSize;        //next_NPC
+ +            nextNPC[tid] = next_PC + (2 * instSize);
+ +
+ +            delaySlotInfo[tid].targetReady = false;
+ +        } else {
+ +            PC[tid] = next_PC;
+ +            nextPC[tid] = next_NPC;
+ +            nextNPC[tid] = next_NPC + instSize;
+ +        }
+ +
+ +        DPRINTF(Fetch, "[tid:%i]: Setting PC to %08p.\n", tid, PC[tid]);
+ +#else
+ +        DPRINTF(Fetch, "[tid:%i]: Setting PC to %08p.\n",tid, next_PC);
+ +        PC[tid] = next_PC;
+ +        nextPC[tid] = next_PC + instSize;
+ +#endif
+ +    } else {
+ +        // We shouldn't be in an icache miss and also have a fault (an ITB
+ +        // miss)
+ +        if (fetchStatus[tid] == IcacheWaitResponse) {
+ +            panic("Fetch should have exited prior to this!");
+ +        }
+ +
+ +        // Send the fault to commit.  This thread will not do anything
+ +        // until commit handles the fault.  The only other way it can
+ +        // wake up is if a squash comes along and changes the PC.
+ +#if FULL_SYSTEM
+ +        assert(numInst != fetchWidth);
+ +        // Get a sequence number.
+ +        inst_seq = cpu->getAndIncrementInstSeq();
+ +        // We will use a nop in order to carry the fault.
+ +        ext_inst = TheISA::NoopMachInst;
+ +
+ +        // Create a new DynInst from the dummy nop.
+ +        DynInstPtr instruction = new DynInst(ext_inst, fetch_PC,
+ +                                             next_PC,
+ +                                             inst_seq, cpu);
+ +        instruction->setPredTarg(next_PC + instSize);
+ +        instruction->setTid(tid);
+ +
+ +        instruction->setASID(tid);
+ +
+ +        instruction->setThreadState(cpu->thread[tid]);
+ +
+ +        instruction->traceData = NULL;
+ +
+ +        instruction->setInstListIt(cpu->addInst(instruction));
+ +
+ +        instruction->fault = fault;
+ +
+ +        toDecode->insts[numInst] = instruction;
+ +        toDecode->size++;
+ +
+ +        DPRINTF(Fetch, "[tid:%i]: Blocked, need to handle the trap.\n",tid);
+ +
+ +        fetchStatus[tid] = TrapPending;
+ +        status_change = true;
+ +
++//        warn("%lli fault (%d) detected @ PC %08p", curTick, fault, PC[tid]);
+ +#else // !FULL_SYSTEM
+ +        warn("cycle %lli: fault (%s) detected @ PC %08p", curTick, fault->name(), PC[tid]);
+ +#endif // FULL_SYSTEM
+ +    }
+ +}
+ +
+ +template<class Impl>
+ +void
+ +DefaultFetch<Impl>::recvRetry()
+ +{
+ +    assert(cacheBlocked);
+ +    if (retryPkt != NULL) {
+ +        assert(retryTid != -1);
+ +        assert(fetchStatus[retryTid] == IcacheWaitRetry);
+ +
+ +        if (icachePort->sendTiming(retryPkt)) {
+ +            fetchStatus[retryTid] = IcacheWaitResponse;
+ +            retryPkt = NULL;
+ +            retryTid = -1;
+ +            cacheBlocked = false;
+ +        }
+ +    } else {
+ +        assert(retryTid == -1);
+ +        // Access has been squashed since it was sent out.  Just clear
+ +        // the cache being blocked.
+ +        cacheBlocked = false;
+ +    }
+ +}
+ +
+ +///////////////////////////////////////
+ +//                                   //
+ +//  SMT FETCH POLICY MAINTAINED HERE //
+ +//                                   //
+ +///////////////////////////////////////
+ +template<class Impl>
+ +int
+ +DefaultFetch<Impl>::getFetchingThread(FetchPriority &fetch_priority)
+ +{
+ +    if (numThreads > 1) {
+ +        switch (fetch_priority) {
+ +
+ +          case SingleThread:
+ +            return 0;
+ +
+ +          case RoundRobin:
+ +            return roundRobin();
+ +
+ +          case IQ:
+ +            return iqCount();
+ +
+ +          case LSQ:
+ +            return lsqCount();
+ +
+ +          case Branch:
+ +            return branchCount();
+ +
+ +          default:
+ +            return -1;
+ +        }
+ +    } else {
+ +        int tid = *((*activeThreads).begin());
+ +
+ +        if (fetchStatus[tid] == Running ||
+ +            fetchStatus[tid] == IcacheAccessComplete ||
+ +            fetchStatus[tid] == Idle) {
+ +            return tid;
+ +        } else {
+ +            return -1;
+ +        }
+ +    }
+ +
+ +}
+ +
+ +
+ +template<class Impl>
+ +int
+ +DefaultFetch<Impl>::roundRobin()
+ +{
+ +    std::list<unsigned>::iterator pri_iter = priorityList.begin();
+ +    std::list<unsigned>::iterator end      = priorityList.end();
+ +
+ +    int high_pri;
+ +
+ +    while (pri_iter != end) {
+ +        high_pri = *pri_iter;
+ +
+ +        assert(high_pri <= numThreads);
+ +
+ +        if (fetchStatus[high_pri] == Running ||
+ +            fetchStatus[high_pri] == IcacheAccessComplete ||
+ +            fetchStatus[high_pri] == Idle) {
+ +
+ +            priorityList.erase(pri_iter);
+ +            priorityList.push_back(high_pri);
+ +
+ +            return high_pri;
+ +        }
+ +
+ +        pri_iter++;
+ +    }
+ +
+ +    return -1;
+ +}
+ +
+ +template<class Impl>
+ +int
+ +DefaultFetch<Impl>::iqCount()
+ +{
+ +    std::priority_queue<unsigned> PQ;
+ +
+ +    std::list<unsigned>::iterator threads = (*activeThreads).begin();
+ +
+ +    while (threads != (*activeThreads).end()) {
+ +        unsigned tid = *threads++;
+ +
+ +        PQ.push(fromIEW->iewInfo[tid].iqCount);
+ +    }
+ +
+ +    while (!PQ.empty()) {
+ +
+ +        unsigned high_pri = PQ.top();
+ +
+ +        if (fetchStatus[high_pri] == Running ||
+ +            fetchStatus[high_pri] == IcacheAccessComplete ||
+ +            fetchStatus[high_pri] == Idle)
+ +            return high_pri;
+ +        else
+ +            PQ.pop();
+ +
+ +    }
+ +
+ +    return -1;
+ +}
+ +
+ +template<class Impl>
+ +int
+ +DefaultFetch<Impl>::lsqCount()
+ +{
+ +    std::priority_queue<unsigned> PQ;
+ +
+ +
+ +    std::list<unsigned>::iterator threads = (*activeThreads).begin();
+ +
+ +    while (threads != (*activeThreads).end()) {
+ +        unsigned tid = *threads++;
+ +
+ +        PQ.push(fromIEW->iewInfo[tid].ldstqCount);
+ +    }
+ +
+ +    while (!PQ.empty()) {
+ +
+ +        unsigned high_pri = PQ.top();
+ +
+ +        if (fetchStatus[high_pri] == Running ||
+ +            fetchStatus[high_pri] == IcacheAccessComplete ||
+ +            fetchStatus[high_pri] == Idle)
+ +            return high_pri;
+ +        else
+ +            PQ.pop();
+ +
+ +    }
+ +
+ +    return -1;
+ +}
+ +
+ +template<class Impl>
+ +int
+ +DefaultFetch<Impl>::branchCount()
+ +{
+ +    std::list<unsigned>::iterator threads = (*activeThreads).begin();
+ +    panic("Branch Count Fetch policy unimplemented\n");
+ +    return *threads;
+ +}
diff --cc src/cpu/o3/iew.hh

index 76fa008eec8ab93dd3605ccc215a7d43a75d68fa,0000000000000000000000000000000000000000..a400c9fa8c8cf225b4b58b37ba544722575698c5

mode 100644,000000..100644
--- 1/src/cpu/o3/iew.hh
--- /dev/null
+++ b/src/cpu/o3/iew.hh
@@@ -1,545 -1,0 +1,551 @@@
-     unsigned wbOutstanding;
+ +/*
+ + * Copyright (c) 2004-2006 The Regents of The University of Michigan
+ + * All rights reserved.
+ + *
+ + * Redistribution and use in source and binary forms, with or without
+ + * modification, are permitted provided that the following conditions are
+ + * met: redistributions of source code must retain the above copyright
+ + * notice, this list of conditions and the following disclaimer;
+ + * redistributions in binary form must reproduce the above copyright
+ + * notice, this list of conditions and the following disclaimer in the
+ + * documentation and/or other materials provided with the distribution;
+ + * neither the name of the copyright holders nor the names of its
+ + * contributors may be used to endorse or promote products derived from
+ + * this software without specific prior written permission.
+ + *
+ + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ + *
+ + * Authors: Kevin Lim
+ + */
+ +
+ +#ifndef __CPU_O3_IEW_HH__
+ +#define __CPU_O3_IEW_HH__
+ +
+ +#include "config/full_system.hh"
+ +
+ +#include <queue>
+ +
+ +#include "base/statistics.hh"
+ +#include "base/timebuf.hh"
+ +#include "cpu/o3/comm.hh"
+ +#include "cpu/o3/scoreboard.hh"
+ +#include "cpu/o3/lsq.hh"
+ +
+ +class FUPool;
+ +
+ +/**
+ + * DefaultIEW handles both single threaded and SMT IEW
+ + * (issue/execute/writeback).  It handles the dispatching of
+ + * instructions to the LSQ/IQ as part of the issue stage, and has the
+ + * IQ try to issue instructions each cycle. The execute latency is
+ + * actually tied into the issue latency to allow the IQ to be able to
+ + * do back-to-back scheduling without having to speculatively schedule
+ + * instructions. This happens by having the IQ have access to the
+ + * functional units, and the IQ gets the execution latencies from the
+ + * FUs when it issues instructions. Instructions reach the execute
+ + * stage on the last cycle of their execution, which is when the IQ
+ + * knows to wake up any dependent instructions, allowing back to back
+ + * scheduling. The execute portion of IEW separates memory
+ + * instructions from non-memory instructions, either telling the LSQ
+ + * to execute the instruction, or executing the instruction directly.
+ + * The writeback portion of IEW completes the instructions by waking
+ + * up any dependents, and marking the register ready on the
+ + * scoreboard.
+ + */
+ +template<class Impl>
+ +class DefaultIEW
+ +{
+ +  private:
+ +    //Typedefs from Impl
+ +    typedef typename Impl::CPUPol CPUPol;
+ +    typedef typename Impl::DynInstPtr DynInstPtr;
+ +    typedef typename Impl::O3CPU O3CPU;
+ +    typedef typename Impl::Params Params;
+ +
+ +    typedef typename CPUPol::IQ IQ;
+ +    typedef typename CPUPol::RenameMap RenameMap;
+ +    typedef typename CPUPol::LSQ LSQ;
+ +
+ +    typedef typename CPUPol::TimeStruct TimeStruct;
+ +    typedef typename CPUPol::IEWStruct IEWStruct;
+ +    typedef typename CPUPol::RenameStruct RenameStruct;
+ +    typedef typename CPUPol::IssueStruct IssueStruct;
+ +
+ +    friend class Impl::O3CPU;
+ +    friend class CPUPol::IQ;
+ +
+ +  public:
+ +    /** Overall IEW stage status. Used to determine if the CPU can
+ +     * deschedule itself due to a lack of activity.
+ +     */
+ +    enum Status {
+ +        Active,
+ +        Inactive
+ +    };
+ +
+ +    /** Status for Issue, Execute, and Writeback stages. */
+ +    enum StageStatus {
+ +        Running,
+ +        Blocked,
+ +        Idle,
+ +        StartSquash,
+ +        Squashing,
+ +        Unblocking
+ +    };
+ +
+ +  private:
+ +    /** Overall stage status. */
+ +    Status _status;
+ +    /** Dispatch status. */
+ +    StageStatus dispatchStatus[Impl::MaxThreads];
+ +    /** Execute status. */
+ +    StageStatus exeStatus;
+ +    /** Writeback status. */
+ +    StageStatus wbStatus;
+ +
+ +  public:
+ +    /** Constructs a DefaultIEW with the given parameters. */
+ +    DefaultIEW(Params *params);
+ +
+ +    /** Returns the name of the DefaultIEW stage. */
+ +    std::string name() const;
+ +
+ +    /** Registers statistics. */
+ +    void regStats();
+ +
+ +    /** Initializes stage; sends back the number of free IQ and LSQ entries. */
+ +    void initStage();
+ +
+ +    /** Returns the dcache port. */
+ +    Port *getDcachePort() { return ldstQueue.getDcachePort(); }
+ +
+ +    /** Sets CPU pointer for IEW, IQ, and LSQ. */
+ +    void setCPU(O3CPU *cpu_ptr);
+ +
+ +    /** Sets main time buffer used for backwards communication. */
+ +    void setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr);
+ +
+ +    /** Sets time buffer for getting instructions coming from rename. */
+ +    void setRenameQueue(TimeBuffer<RenameStruct> *rq_ptr);
+ +
+ +    /** Sets time buffer to pass on instructions to commit. */
+ +    void setIEWQueue(TimeBuffer<IEWStruct> *iq_ptr);
+ +
+ +    /** Sets pointer to list of active threads. */
+ +    void setActiveThreads(std::list<unsigned> *at_ptr);
+ +
+ +    /** Sets pointer to the scoreboard. */
+ +    void setScoreboard(Scoreboard *sb_ptr);
+ +
+ +    /** Drains IEW stage. */
+ +    bool drain();
+ +
+ +    /** Resumes execution after a drain. */
+ +    void resume();
+ +
+ +    /** Completes switch out of IEW stage. */
+ +    void switchOut();
+ +
+ +    /** Takes over from another CPU's thread. */
+ +    void takeOverFrom();
+ +
+ +    /** Returns if IEW is switched out. */
+ +    bool isSwitchedOut() { return switchedOut; }
+ +
+ +    /** Squashes instructions in IEW for a specific thread. */
+ +    void squash(unsigned tid);
+ +
+ +    /** Wakes all dependents of a completed instruction. */
+ +    void wakeDependents(DynInstPtr &inst);
+ +
+ +    /** Tells memory dependence unit that a memory instruction needs to be
+ +     * rescheduled. It will re-execute once replayMemInst() is called.
+ +     */
+ +    void rescheduleMemInst(DynInstPtr &inst);
+ +
+ +    /** Re-executes all rescheduled memory instructions. */
+ +    void replayMemInst(DynInstPtr &inst);
+ +
+ +    /** Sends an instruction to commit through the time buffer. */
+ +    void instToCommit(DynInstPtr &inst);
+ +
+ +    /** Inserts unused instructions of a thread into the skid buffer. */
+ +    void skidInsert(unsigned tid);
+ +
+ +    /** Returns the max of the number of entries in all of the skid buffers. */
+ +    int skidCount();
+ +
+ +    /** Returns if all of the skid buffers are empty. */
+ +    bool skidsEmpty();
+ +
+ +    /** Updates overall IEW status based on all of the stages' statuses. */
+ +    void updateStatus();
+ +
+ +    /** Resets entries of the IQ and the LSQ. */
+ +    void resetEntries();
+ +
+ +    /** Tells the CPU to wakeup if it has descheduled itself due to no
+ +     * activity. Used mainly by the LdWritebackEvent.
+ +     */
+ +    void wakeCPU();
+ +
+ +    /** Reports to the CPU that there is activity this cycle. */
+ +    void activityThisCycle();
+ +
+ +    /** Tells CPU that the IEW stage is active and running. */
+ +    inline void activateStage();
+ +
+ +    /** Tells CPU that the IEW stage is inactive and idle. */
+ +    inline void deactivateStage();
+ +
+ +    /** Returns if the LSQ has any stores to writeback. */
+ +    bool hasStoresToWB() { return ldstQueue.hasStoresToWB(); }
+ +
+ +    void incrWb(InstSeqNum &sn)
+ +    {
+ +        if (++wbOutstanding == wbMax)
+ +            ableToIssue = false;
+ +        DPRINTF(IEW, "wbOutstanding: %i\n", wbOutstanding);
++        assert(wbOutstanding <= wbMax);
+ +#ifdef DEBUG
+ +        wbList.insert(sn);
+ +#endif
+ +    }
+ +
+ +    void decrWb(InstSeqNum &sn)
+ +    {
+ +        if (wbOutstanding-- == wbMax)
+ +            ableToIssue = true;
+ +        DPRINTF(IEW, "wbOutstanding: %i\n", wbOutstanding);
++        assert(wbOutstanding >= 0);
+ +#ifdef DEBUG
+ +        assert(wbList.find(sn) != wbList.end());
+ +        wbList.erase(sn);
+ +#endif
+ +    }
+ +
+ +#ifdef DEBUG
+ +    std::set<InstSeqNum> wbList;
+ +
+ +    void dumpWb()
+ +    {
+ +        std::set<InstSeqNum>::iterator wb_it = wbList.begin();
+ +        while (wb_it != wbList.end()) {
+ +            cprintf("[sn:%lli]\n",
+ +                    (*wb_it));
+ +            wb_it++;
+ +        }
+ +    }
+ +#endif
+ +
+ +    bool canIssue() { return ableToIssue; }
+ +
+ +    bool ableToIssue;
+ +
+ +  private:
+ +    /** Sends commit proper information for a squash due to a branch
+ +     * mispredict.
+ +     */
+ +    void squashDueToBranch(DynInstPtr &inst, unsigned thread_id);
+ +
+ +    /** Sends commit proper information for a squash due to a memory order
+ +     * violation.
+ +     */
+ +    void squashDueToMemOrder(DynInstPtr &inst, unsigned thread_id);
+ +
+ +    /** Sends commit proper information for a squash due to memory becoming
+ +     * blocked (younger issued instructions must be retried).
+ +     */
+ +    void squashDueToMemBlocked(DynInstPtr &inst, unsigned thread_id);
+ +
+ +    /** Sets Dispatch to blocked, and signals back to other stages to block. */
+ +    void block(unsigned thread_id);
+ +
+ +    /** Unblocks Dispatch if the skid buffer is empty, and signals back to
+ +     * other stages to unblock.
+ +     */
+ +    void unblock(unsigned thread_id);
+ +
+ +    /** Determines proper actions to take given Dispatch's status. */
+ +    void dispatch(unsigned tid);
+ +
+ +    /** Dispatches instructions to IQ and LSQ. */
+ +    void dispatchInsts(unsigned tid);
+ +
+ +    /** Executes instructions. In the case of memory operations, it informs the
+ +     * LSQ to execute the instructions. Also handles any redirects that occur
+ +     * due to the executed instructions.
+ +     */
+ +    void executeInsts();
+ +
+ +    /** Writebacks instructions. In our model, the instruction's execute()
+ +     * function atomically reads registers, executes, and writes registers.
+ +     * Thus this writeback only wakes up dependent instructions, and informs
+ +     * the scoreboard of registers becoming ready.
+ +     */
+ +    void writebackInsts();
+ +
+ +    /** Returns the number of valid, non-squashed instructions coming from
+ +     * rename to dispatch.
+ +     */
+ +    unsigned validInstsFromRename();
+ +
+ +    /** Reads the stall signals. */
+ +    void readStallSignals(unsigned tid);
+ +
+ +    /** Checks if any of the stall conditions are currently true. */
+ +    bool checkStall(unsigned tid);
+ +
+ +    /** Processes inputs and changes state accordingly. */
+ +    void checkSignalsAndUpdate(unsigned tid);
+ +
+ +    /** Removes instructions from rename from a thread's instruction list. */
+ +    void emptyRenameInsts(unsigned tid);
+ +
+ +    /** Sorts instructions coming from rename into lists separated by thread. */
+ +    void sortInsts();
+ +
+ +  public:
+ +    /** Ticks IEW stage, causing Dispatch, the IQ, the LSQ, Execute, and
+ +     * Writeback to run for one cycle.
+ +     */
+ +    void tick();
+ +
+ +  private:
+ +    /** Updates execution stats based on the instruction. */
+ +    void updateExeInstStats(DynInstPtr &inst);
+ +
+ +    /** Pointer to main time buffer used for backwards communication. */
+ +    TimeBuffer<TimeStruct> *timeBuffer;
+ +
+ +    /** Wire to write information heading to previous stages. */
+ +    typename TimeBuffer<TimeStruct>::wire toFetch;
+ +
+ +    /** Wire to get commit's output from backwards time buffer. */
+ +    typename TimeBuffer<TimeStruct>::wire fromCommit;
+ +
+ +    /** Wire to write information heading to previous stages. */
+ +    typename TimeBuffer<TimeStruct>::wire toRename;
+ +
+ +    /** Rename instruction queue interface. */
+ +    TimeBuffer<RenameStruct> *renameQueue;
+ +
+ +    /** Wire to get rename's output from rename queue. */
+ +    typename TimeBuffer<RenameStruct>::wire fromRename;
+ +
+ +    /** Issue stage queue. */
+ +    TimeBuffer<IssueStruct> issueToExecQueue;
+ +
+ +    /** Wire to read information from the issue stage time queue. */
+ +    typename TimeBuffer<IssueStruct>::wire fromIssue;
+ +
+ +    /**
+ +     * IEW stage time buffer.  Holds ROB indices of instructions that
+ +     * can be marked as completed.
+ +     */
+ +    TimeBuffer<IEWStruct> *iewQueue;
+ +
+ +    /** Wire to write infromation heading to commit. */
+ +    typename TimeBuffer<IEWStruct>::wire toCommit;
+ +
+ +    /** Queue of all instructions coming from rename this cycle. */
+ +    std::queue<DynInstPtr> insts[Impl::MaxThreads];
+ +
+ +    /** Skid buffer between rename and IEW. */
+ +    std::queue<DynInstPtr> skidBuffer[Impl::MaxThreads];
+ +
+ +    /** Scoreboard pointer. */
+ +    Scoreboard* scoreboard;
+ +
+ +  public:
+ +    /** Instruction queue. */
+ +    IQ instQueue;
+ +
+ +    /** Load / store queue. */
+ +    LSQ ldstQueue;
+ +
+ +    /** Pointer to the functional unit pool. */
+ +    FUPool *fuPool;
+ +
+ +  private:
+ +    /** CPU pointer. */
+ +    O3CPU *cpu;
+ +
+ +    /** Records if IEW has written to the time buffer this cycle, so that the
+ +     * CPU can deschedule itself if there is no activity.
+ +     */
+ +    bool wroteToTimeBuffer;
+ +
+ +    /** Source of possible stalls. */
+ +    struct Stalls {
+ +        bool commit;
+ +    };
+ +
+ +    /** Stages that are telling IEW to stall. */
+ +    Stalls stalls[Impl::MaxThreads];
+ +
+ +    /** Debug function to print instructions that are issued this cycle. */
+ +    void printAvailableInsts();
+ +
+ +  public:
+ +    /** Records if the LSQ needs to be updated on the next cycle, so that
+ +     * IEW knows if there will be activity on the next cycle.
+ +     */
+ +    bool updateLSQNextCycle;
+ +
+ +  private:
+ +    /** Records if there is a fetch redirect on this cycle for each thread. */
+ +    bool fetchRedirect[Impl::MaxThreads];
+ +
+ +    /** Keeps track of the last valid branch delay slot instss for threads */
+ +    InstSeqNum bdelayDoneSeqNum[Impl::MaxThreads];
+ +
+ +    /** Used to track if all instructions have been dispatched this cycle.
+ +     * If they have not, then blocking must have occurred, and the instructions
+ +     * would already be added to the skid buffer.
+ +     * @todo: Fix this hack.
+ +     */
+ +    bool dispatchedAllInsts;
+ +
+ +    /** Records if the queues have been changed (inserted or issued insts),
+ +     * so that IEW knows to broadcast the updated amount of free entries.
+ +     */
+ +    bool updatedQueues;
+ +
+ +    /** Commit to IEW delay, in ticks. */
+ +    unsigned commitToIEWDelay;
+ +
+ +    /** Rename to IEW delay, in ticks. */
+ +    unsigned renameToIEWDelay;
+ +
+ +    /**
+ +     * Issue to execute delay, in ticks.  What this actually represents is
+ +     * the amount of time it takes for an instruction to wake up, be
+ +     * scheduled, and sent to a FU for execution.
+ +     */
+ +    unsigned issueToExecuteDelay;
+ +
+ +    /** Width of dispatch, in instructions. */
+ +    unsigned dispatchWidth;
+ +
+ +    /** Width of issue, in instructions. */
+ +    unsigned issueWidth;
+ +
+ +    /** Index into queue of instructions being written back. */
+ +    unsigned wbNumInst;
+ +
+ +    /** Cycle number within the queue of instructions being written back.
+ +     * Used in case there are too many instructions writing back at the current
+ +     * cycle and writesbacks need to be scheduled for the future. See comments
+ +     * in instToCommit().
+ +     */
+ +    unsigned wbCycle;
+ +
+ +    /** Number of instructions in flight that will writeback. */
++
++    /** Number of instructions in flight that will writeback. */
++    int wbOutstanding;
+ +
+ +    /** Writeback width. */
+ +    unsigned wbWidth;
+ +
+ +    /** Writeback width * writeback depth, where writeback depth is
+ +     * the number of cycles of writing back instructions that can be
+ +     * buffered. */
+ +    unsigned wbMax;
+ +
+ +    /** Number of active threads. */
+ +    unsigned numThreads;
+ +
+ +    /** Pointer to list of active threads. */
+ +    std::list<unsigned> *activeThreads;
+ +
+ +    /** Maximum size of the skid buffer. */
+ +    unsigned skidBufferMax;
+ +
+ +    /** Is this stage switched out. */
+ +    bool switchedOut;
+ +
+ +    /** Stat for total number of idle cycles. */
+ +    Stats::Scalar<> iewIdleCycles;
+ +    /** Stat for total number of squashing cycles. */
+ +    Stats::Scalar<> iewSquashCycles;
+ +    /** Stat for total number of blocking cycles. */
+ +    Stats::Scalar<> iewBlockCycles;
+ +    /** Stat for total number of unblocking cycles. */
+ +    Stats::Scalar<> iewUnblockCycles;
+ +    /** Stat for total number of instructions dispatched. */
+ +    Stats::Scalar<> iewDispatchedInsts;
+ +    /** Stat for total number of squashed instructions dispatch skips. */
+ +    Stats::Scalar<> iewDispSquashedInsts;
+ +    /** Stat for total number of dispatched load instructions. */
+ +    Stats::Scalar<> iewDispLoadInsts;
+ +    /** Stat for total number of dispatched store instructions. */
+ +    Stats::Scalar<> iewDispStoreInsts;
+ +    /** Stat for total number of dispatched non speculative instructions. */
+ +    Stats::Scalar<> iewDispNonSpecInsts;
+ +    /** Stat for number of times the IQ becomes full. */
+ +    Stats::Scalar<> iewIQFullEvents;
+ +    /** Stat for number of times the LSQ becomes full. */
+ +    Stats::Scalar<> iewLSQFullEvents;
+ +    /** Stat for total number of memory ordering violation events. */
+ +    Stats::Scalar<> memOrderViolationEvents;
+ +    /** Stat for total number of incorrect predicted taken branches. */
+ +    Stats::Scalar<> predictedTakenIncorrect;
+ +    /** Stat for total number of incorrect predicted not taken branches. */
+ +    Stats::Scalar<> predictedNotTakenIncorrect;
+ +    /** Stat for total number of mispredicted branches detected at execute. */
+ +    Stats::Formula branchMispredicts;
+ +
+ +    /** Stat for total number of executed instructions. */
+ +    Stats::Scalar<> iewExecutedInsts;
+ +    /** Stat for total number of executed load instructions. */
+ +    Stats::Vector<> iewExecLoadInsts;
++    /** Stat for total number of executed store instructions. */
++//    Stats::Scalar<> iewExecStoreInsts;
+ +    /** Stat for total number of squashed instructions skipped at execute. */
+ +    Stats::Scalar<> iewExecSquashedInsts;
+ +    /** Number of executed software prefetches. */
+ +    Stats::Vector<> iewExecutedSwp;
+ +    /** Number of executed nops. */
+ +    Stats::Vector<> iewExecutedNop;
+ +    /** Number of executed meomory references. */
+ +    Stats::Vector<> iewExecutedRefs;
+ +    /** Number of executed branches. */
+ +    Stats::Vector<> iewExecutedBranches;
+ +    /** Number of executed store instructions. */
+ +    Stats::Formula iewExecStoreInsts;
+ +    /** Number of instructions executed per cycle. */
+ +    Stats::Formula iewExecRate;
+ +
+ +    /** Number of instructions sent to commit. */
+ +    Stats::Vector<> iewInstsToCommit;
+ +    /** Number of instructions that writeback. */
+ +    Stats::Vector<> writebackCount;
+ +    /** Number of instructions that wake consumers. */
+ +    Stats::Vector<> producerInst;
+ +    /** Number of instructions that wake up from producers. */
+ +    Stats::Vector<> consumerInst;
+ +    /** Number of instructions that were delayed in writing back due
+ +     * to resource contention.
+ +     */
+ +    Stats::Vector<> wbPenalized;
+ +    /** Number of instructions per cycle written back. */
+ +    Stats::Formula wbRate;
+ +    /** Average number of woken instructions per writeback. */
+ +    Stats::Formula wbFanout;
+ +    /** Number of instructions per cycle delayed in writing back . */
+ +    Stats::Formula wbPenalizedRate;
+ +};
+ +
+ +#endif // __CPU_O3_IEW_HH__
diff --cc src/cpu/o3/iew_impl.hh

index e9b24a6d43840f16f1c315fdc126b68a412bc754,0000000000000000000000000000000000000000..c82f6dd216753a109e7866a28d8a4b10e3c22f67

mode 100644,000000..100644
--- 1/src/cpu/o3/iew_impl.hh
--- /dev/null
+++ b/src/cpu/o3/iew_impl.hh
@@@ -1,1584 -1,0 +1,1613 @@@
-         .name(name() + ".EXEC:insts")
+ +/*
+ + * Copyright (c) 2004-2006 The Regents of The University of Michigan
+ + * All rights reserved.
+ + *
+ + * Redistribution and use in source and binary forms, with or without
+ + * modification, are permitted provided that the following conditions are
+ + * met: redistributions of source code must retain the above copyright
+ + * notice, this list of conditions and the following disclaimer;
+ + * redistributions in binary form must reproduce the above copyright
+ + * notice, this list of conditions and the following disclaimer in the
+ + * documentation and/or other materials provided with the distribution;
+ + * neither the name of the copyright holders nor the names of its
+ + * contributors may be used to endorse or promote products derived from
+ + * this software without specific prior written permission.
+ + *
+ + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ + *
+ + * Authors: Kevin Lim
+ + */
+ +
+ +// @todo: Fix the instantaneous communication among all the stages within
+ +// iew.  There's a clear delay between issue and execute, yet backwards
+ +// communication happens simultaneously.
+ +
+ +#include <queue>
+ +
+ +#include "base/timebuf.hh"
+ +#include "cpu/o3/fu_pool.hh"
+ +#include "cpu/o3/iew.hh"
+ +
+ +template<class Impl>
+ +DefaultIEW<Impl>::DefaultIEW(Params *params)
+ +    : issueToExecQueue(params->backComSize, params->forwardComSize),
+ +      instQueue(params),
+ +      ldstQueue(params),
+ +      fuPool(params->fuPool),
+ +      commitToIEWDelay(params->commitToIEWDelay),
+ +      renameToIEWDelay(params->renameToIEWDelay),
+ +      issueToExecuteDelay(params->issueToExecuteDelay),
+ +      dispatchWidth(params->dispatchWidth),
+ +      issueWidth(params->issueWidth),
+ +      wbOutstanding(0),
+ +      wbWidth(params->wbWidth),
+ +      numThreads(params->numberOfThreads),
+ +      switchedOut(false)
+ +{
+ +    _status = Active;
+ +    exeStatus = Running;
+ +    wbStatus = Idle;
+ +
+ +    // Setup wire to read instructions coming from issue.
+ +    fromIssue = issueToExecQueue.getWire(-issueToExecuteDelay);
+ +
+ +    // Instruction queue needs the queue between issue and execute.
+ +    instQueue.setIssueToExecuteQueue(&issueToExecQueue);
+ +
+ +    instQueue.setIEW(this);
+ +    ldstQueue.setIEW(this);
+ +
+ +    for (int i=0; i < numThreads; i++) {
+ +        dispatchStatus[i] = Running;
+ +        stalls[i].commit = false;
+ +        fetchRedirect[i] = false;
+ +        bdelayDoneSeqNum[i] = 0;
+ +    }
+ +
+ +    wbMax = wbWidth * params->wbDepth;
+ +
+ +    updateLSQNextCycle = false;
+ +
+ +    ableToIssue = true;
+ +
+ +    skidBufferMax = (3 * (renameToIEWDelay * params->renameWidth)) + issueWidth;
+ +}
+ +
+ +template <class Impl>
+ +std::string
+ +DefaultIEW<Impl>::name() const
+ +{
+ +    return cpu->name() + ".iew";
+ +}
+ +
+ +template <class Impl>
+ +void
+ +DefaultIEW<Impl>::regStats()
+ +{
+ +    using namespace Stats;
+ +
+ +    instQueue.regStats();
+ +    ldstQueue.regStats();
+ +
+ +    iewIdleCycles
+ +        .name(name() + ".iewIdleCycles")
+ +        .desc("Number of cycles IEW is idle");
+ +
+ +    iewSquashCycles
+ +        .name(name() + ".iewSquashCycles")
+ +        .desc("Number of cycles IEW is squashing");
+ +
+ +    iewBlockCycles
+ +        .name(name() + ".iewBlockCycles")
+ +        .desc("Number of cycles IEW is blocking");
+ +
+ +    iewUnblockCycles
+ +        .name(name() + ".iewUnblockCycles")
+ +        .desc("Number of cycles IEW is unblocking");
+ +
+ +    iewDispatchedInsts
+ +        .name(name() + ".iewDispatchedInsts")
+ +        .desc("Number of instructions dispatched to IQ");
+ +
+ +    iewDispSquashedInsts
+ +        .name(name() + ".iewDispSquashedInsts")
+ +        .desc("Number of squashed instructions skipped by dispatch");
+ +
+ +    iewDispLoadInsts
+ +        .name(name() + ".iewDispLoadInsts")
+ +        .desc("Number of dispatched load instructions");
+ +
+ +    iewDispStoreInsts
+ +        .name(name() + ".iewDispStoreInsts")
+ +        .desc("Number of dispatched store instructions");
+ +
+ +    iewDispNonSpecInsts
+ +        .name(name() + ".iewDispNonSpecInsts")
+ +        .desc("Number of dispatched non-speculative instructions");
+ +
+ +    iewIQFullEvents
+ +        .name(name() + ".iewIQFullEvents")
+ +        .desc("Number of times the IQ has become full, causing a stall");
+ +
+ +    iewLSQFullEvents
+ +        .name(name() + ".iewLSQFullEvents")
+ +        .desc("Number of times the LSQ has become full, causing a stall");
+ +
+ +    memOrderViolationEvents
+ +        .name(name() + ".memOrderViolationEvents")
+ +        .desc("Number of memory order violations");
+ +
+ +    predictedTakenIncorrect
+ +        .name(name() + ".predictedTakenIncorrect")
+ +        .desc("Number of branches that were predicted taken incorrectly");
+ +
+ +    predictedNotTakenIncorrect
+ +        .name(name() + ".predictedNotTakenIncorrect")
+ +        .desc("Number of branches that were predicted not taken incorrectly");
+ +
+ +    branchMispredicts
+ +        .name(name() + ".branchMispredicts")
+ +        .desc("Number of branch mispredicts detected at execute");
+ +
+ +    branchMispredicts = predictedTakenIncorrect + predictedNotTakenIncorrect;
+ +
+ +    iewExecutedInsts
-         .name(name() + ".EXEC:loads")
++        .name(name() + ".iewExecutedInsts")
+ +        .desc("Number of executed instructions");
+ +
+ +    iewExecLoadInsts
+ +        .init(cpu->number_of_threads)
-         .name(name() + ".EXEC:squashedInsts")
++        .name(name() + ".iewExecLoadInsts")
+ +        .desc("Number of load instructions executed")
+ +        .flags(total);
+ +
+ +    iewExecSquashedInsts
-     // @todo: Fix hardcoded number
++        .name(name() + ".iewExecSquashedInsts")
+ +        .desc("Number of squashed instructions skipped in execute");
+ +
+ +    iewExecutedSwp
+ +        .init(cpu->number_of_threads)
+ +        .name(name() + ".EXEC:swp")
+ +        .desc("number of swp insts executed")
+ +        .flags(total);
+ +
+ +    iewExecutedNop
+ +        .init(cpu->number_of_threads)
+ +        .name(name() + ".EXEC:nop")
+ +        .desc("number of nop insts executed")
+ +        .flags(total);
+ +
+ +    iewExecutedRefs
+ +        .init(cpu->number_of_threads)
+ +        .name(name() + ".EXEC:refs")
+ +        .desc("number of memory reference insts executed")
+ +        .flags(total);
+ +
+ +    iewExecutedBranches
+ +        .init(cpu->number_of_threads)
+ +        .name(name() + ".EXEC:branches")
+ +        .desc("Number of branches executed")
+ +        .flags(total);
+ +
+ +    iewExecStoreInsts
+ +        .name(name() + ".EXEC:stores")
+ +        .desc("Number of stores executed")
+ +        .flags(total);
+ +    iewExecStoreInsts = iewExecutedRefs - iewExecLoadInsts;
+ +
+ +    iewExecRate
+ +        .name(name() + ".EXEC:rate")
+ +        .desc("Inst execution rate")
+ +        .flags(total);
+ +
+ +    iewExecRate = iewExecutedInsts / cpu->numCycles;
+ +
+ +    iewInstsToCommit
+ +        .init(cpu->number_of_threads)
+ +        .name(name() + ".WB:sent")
+ +        .desc("cumulative count of insts sent to commit")
+ +        .flags(total);
+ +
+ +    writebackCount
+ +        .init(cpu->number_of_threads)
+ +        .name(name() + ".WB:count")
+ +        .desc("cumulative count of insts written-back")
+ +        .flags(total);
+ +
+ +    producerInst
+ +        .init(cpu->number_of_threads)
+ +        .name(name() + ".WB:producers")
+ +        .desc("num instructions producing a value")
+ +        .flags(total);
+ +
+ +    consumerInst
+ +        .init(cpu->number_of_threads)
+ +        .name(name() + ".WB:consumers")
+ +        .desc("num instructions consuming a value")
+ +        .flags(total);
+ +
+ +    wbPenalized
+ +        .init(cpu->number_of_threads)
+ +        .name(name() + ".WB:penalized")
+ +        .desc("number of instrctions required to write to 'other' IQ")
+ +        .flags(total);
+ +
+ +    wbPenalizedRate
+ +        .name(name() + ".WB:penalized_rate")
+ +        .desc ("fraction of instructions written-back that wrote to 'other' IQ")
+ +        .flags(total);
+ +
+ +    wbPenalizedRate = wbPenalized / writebackCount;
+ +
+ +    wbFanout
+ +        .name(name() + ".WB:fanout")
+ +        .desc("average fanout of values written-back")
+ +        .flags(total);
+ +
+ +    wbFanout = producerInst / consumerInst;
+ +
+ +    wbRate
+ +        .name(name() + ".WB:rate")
+ +        .desc("insts written-back per cycle")
+ +        .flags(total);
+ +    wbRate = writebackCount / cpu->numCycles;
+ +}
+ +
+ +template<class Impl>
+ +void
+ +DefaultIEW<Impl>::initStage()
+ +{
+ +    for (int tid=0; tid < numThreads; tid++) {
+ +        toRename->iewInfo[tid].usedIQ = true;
+ +        toRename->iewInfo[tid].freeIQEntries =
+ +            instQueue.numFreeEntries(tid);
+ +
+ +        toRename->iewInfo[tid].usedLSQ = true;
+ +        toRename->iewInfo[tid].freeLSQEntries =
+ +            ldstQueue.numFreeEntries(tid);
+ +    }
+ +}
+ +
+ +template<class Impl>
+ +void
+ +DefaultIEW<Impl>::setCPU(O3CPU *cpu_ptr)
+ +{
+ +    DPRINTF(IEW, "Setting CPU pointer.\n");
+ +    cpu = cpu_ptr;
+ +
+ +    instQueue.setCPU(cpu_ptr);
+ +    ldstQueue.setCPU(cpu_ptr);
+ +
+ +    cpu->activateStage(O3CPU::IEWIdx);
+ +}
+ +
+ +template<class Impl>
+ +void
+ +DefaultIEW<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr)
+ +{
+ +    DPRINTF(IEW, "Setting time buffer pointer.\n");
+ +    timeBuffer = tb_ptr;
+ +
+ +    // Setup wire to read information from time buffer, from commit.
+ +    fromCommit = timeBuffer->getWire(-commitToIEWDelay);
+ +
+ +    // Setup wire to write information back to previous stages.
+ +    toRename = timeBuffer->getWire(0);
+ +
+ +    toFetch = timeBuffer->getWire(0);
+ +
+ +    // Instruction queue also needs main time buffer.
+ +    instQueue.setTimeBuffer(tb_ptr);
+ +}
+ +
+ +template<class Impl>
+ +void
+ +DefaultIEW<Impl>::setRenameQueue(TimeBuffer<RenameStruct> *rq_ptr)
+ +{
+ +    DPRINTF(IEW, "Setting rename queue pointer.\n");
+ +    renameQueue = rq_ptr;
+ +
+ +    // Setup wire to read information from rename queue.
+ +    fromRename = renameQueue->getWire(-renameToIEWDelay);
+ +}
+ +
+ +template<class Impl>
+ +void
+ +DefaultIEW<Impl>::setIEWQueue(TimeBuffer<IEWStruct> *iq_ptr)
+ +{
+ +    DPRINTF(IEW, "Setting IEW queue pointer.\n");
+ +    iewQueue = iq_ptr;
+ +
+ +    // Setup wire to write instructions to commit.
+ +    toCommit = iewQueue->getWire(0);
+ +}
+ +
+ +template<class Impl>
+ +void
+ +DefaultIEW<Impl>::setActiveThreads(std::list<unsigned> *at_ptr)
+ +{
+ +    DPRINTF(IEW, "Setting active threads list pointer.\n");
+ +    activeThreads = at_ptr;
+ +
+ +    ldstQueue.setActiveThreads(at_ptr);
+ +    instQueue.setActiveThreads(at_ptr);
+ +}
+ +
+ +template<class Impl>
+ +void
+ +DefaultIEW<Impl>::setScoreboard(Scoreboard *sb_ptr)
+ +{
+ +    DPRINTF(IEW, "Setting scoreboard pointer.\n");
+ +    scoreboard = sb_ptr;
+ +}
+ +
+ +template <class Impl>
+ +bool
+ +DefaultIEW<Impl>::drain()
+ +{
+ +    // IEW is ready to drain at any time.
+ +    cpu->signalDrained();
+ +    return true;
+ +}
+ +
+ +template <class Impl>
+ +void
+ +DefaultIEW<Impl>::resume()
+ +{
+ +}
+ +
+ +template <class Impl>
+ +void
+ +DefaultIEW<Impl>::switchOut()
+ +{
+ +    // Clear any state.
+ +    switchedOut = true;
++    assert(insts[0].empty());
++    assert(skidBuffer[0].empty());
+ +
+ +    instQueue.switchOut();
+ +    ldstQueue.switchOut();
+ +    fuPool->switchOut();
+ +
+ +    for (int i = 0; i < numThreads; i++) {
+ +        while (!insts[i].empty())
+ +            insts[i].pop();
+ +        while (!skidBuffer[i].empty())
+ +            skidBuffer[i].pop();
+ +    }
+ +}
+ +
+ +template <class Impl>
+ +void
+ +DefaultIEW<Impl>::takeOverFrom()
+ +{
+ +    // Reset all state.
+ +    _status = Active;
+ +    exeStatus = Running;
+ +    wbStatus = Idle;
+ +    switchedOut = false;
+ +
+ +    instQueue.takeOverFrom();
+ +    ldstQueue.takeOverFrom();
+ +    fuPool->takeOverFrom();
+ +
+ +    initStage();
+ +    cpu->activityThisCycle();
+ +
+ +    for (int i=0; i < numThreads; i++) {
+ +        dispatchStatus[i] = Running;
+ +        stalls[i].commit = false;
+ +        fetchRedirect[i] = false;
+ +    }
+ +
+ +    updateLSQNextCycle = false;
+ +
-         assert((wbCycle * wbWidth + wbNumInst) < wbMax);
+ +    for (int i = 0; i < issueToExecQueue.getSize(); ++i) {
+ +        issueToExecQueue.advance();
+ +    }
+ +}
+ +
+ +template<class Impl>
+ +void
+ +DefaultIEW<Impl>::squash(unsigned tid)
+ +{
+ +    DPRINTF(IEW, "[tid:%i]: Squashing all instructions.\n",
+ +            tid);
+ +
+ +    // Tell the IQ to start squashing.
+ +    instQueue.squash(tid);
+ +
+ +    // Tell the LDSTQ to start squashing.
+ +#if ISA_HAS_DELAY_SLOT
+ +    ldstQueue.squash(fromCommit->commitInfo[tid].bdelayDoneSeqNum, tid);
+ +#else
+ +    ldstQueue.squash(fromCommit->commitInfo[tid].doneSeqNum, tid);
+ +#endif
+ +    updatedQueues = true;
+ +
+ +    // Clear the skid buffer in case it has any data in it.
+ +    DPRINTF(IEW, "[tid:%i]: Removing skidbuffer instructions until [sn:%i].\n",
+ +            tid, fromCommit->commitInfo[tid].bdelayDoneSeqNum);
+ +
+ +    while (!skidBuffer[tid].empty()) {
+ +#if ISA_HAS_DELAY_SLOT
+ +        if (skidBuffer[tid].front()->seqNum <=
+ +            fromCommit->commitInfo[tid].bdelayDoneSeqNum) {
+ +            DPRINTF(IEW, "[tid:%i]: Cannot remove skidbuffer instructions "
+ +                    "that occur before delay slot [sn:%i].\n",
+ +                    fromCommit->commitInfo[tid].bdelayDoneSeqNum,
+ +                    tid);
+ +            break;
+ +        } else {
+ +            DPRINTF(IEW, "[tid:%i]: Removing instruction [sn:%i] from "
+ +                    "skidBuffer.\n", tid, skidBuffer[tid].front()->seqNum);
+ +        }
+ +#endif
+ +        if (skidBuffer[tid].front()->isLoad() ||
+ +            skidBuffer[tid].front()->isStore() ) {
+ +            toRename->iewInfo[tid].dispatchedToLSQ++;
+ +        }
+ +
+ +        toRename->iewInfo[tid].dispatched++;
+ +
+ +        skidBuffer[tid].pop();
+ +    }
+ +
+ +    bdelayDoneSeqNum[tid] = fromCommit->commitInfo[tid].bdelayDoneSeqNum;
+ +
+ +    emptyRenameInsts(tid);
+ +}
+ +
+ +template<class Impl>
+ +void
+ +DefaultIEW<Impl>::squashDueToBranch(DynInstPtr &inst, unsigned tid)
+ +{
+ +    DPRINTF(IEW, "[tid:%i]: Squashing from a specific instruction, PC: %#x "
+ +            "[sn:%i].\n", tid, inst->readPC(), inst->seqNum);
+ +
+ +    toCommit->squash[tid] = true;
+ +    toCommit->squashedSeqNum[tid] = inst->seqNum;
+ +    toCommit->mispredPC[tid] = inst->readPC();
+ +    toCommit->branchMispredict[tid] = true;
+ +
+ +#if ISA_HAS_DELAY_SLOT
+ +    bool branch_taken = inst->readNextNPC() !=
+ +        (inst->readNextPC() + sizeof(TheISA::MachInst));
+ +
+ +    toCommit->branchTaken[tid] = branch_taken;
+ +
+ +    toCommit->condDelaySlotBranch[tid] = inst->isCondDelaySlot();
+ +
+ +    if (inst->isCondDelaySlot() && branch_taken) {
+ +        toCommit->nextPC[tid] = inst->readNextPC();
+ +    } else {
+ +        toCommit->nextPC[tid] = inst->readNextNPC();
+ +    }
+ +#else
+ +    toCommit->branchTaken[tid] = inst->readNextPC() !=
+ +        (inst->readPC() + sizeof(TheISA::MachInst));
+ +    toCommit->nextPC[tid] = inst->readNextPC();
+ +#endif
+ +
+ +    toCommit->includeSquashInst[tid] = false;
+ +
+ +    wroteToTimeBuffer = true;
+ +}
+ +
+ +template<class Impl>
+ +void
+ +DefaultIEW<Impl>::squashDueToMemOrder(DynInstPtr &inst, unsigned tid)
+ +{
+ +    DPRINTF(IEW, "[tid:%i]: Squashing from a specific instruction, "
+ +            "PC: %#x [sn:%i].\n", tid, inst->readPC(), inst->seqNum);
+ +
+ +    toCommit->squash[tid] = true;
+ +    toCommit->squashedSeqNum[tid] = inst->seqNum;
+ +    toCommit->nextPC[tid] = inst->readNextPC();
+ +
+ +    toCommit->includeSquashInst[tid] = false;
+ +
+ +    wroteToTimeBuffer = true;
+ +}
+ +
+ +template<class Impl>
+ +void
+ +DefaultIEW<Impl>::squashDueToMemBlocked(DynInstPtr &inst, unsigned tid)
+ +{
+ +    DPRINTF(IEW, "[tid:%i]: Memory blocked, squashing load and younger insts, "
+ +            "PC: %#x [sn:%i].\n", tid, inst->readPC(), inst->seqNum);
+ +
+ +    toCommit->squash[tid] = true;
+ +    toCommit->squashedSeqNum[tid] = inst->seqNum;
+ +    toCommit->nextPC[tid] = inst->readPC();
+ +
+ +    // Must include the broadcasted SN in the squash.
+ +    toCommit->includeSquashInst[tid] = true;
+ +
+ +    ldstQueue.setLoadBlockedHandled(tid);
+ +
+ +    wroteToTimeBuffer = true;
+ +}
+ +
+ +template<class Impl>
+ +void
+ +DefaultIEW<Impl>::block(unsigned tid)
+ +{
+ +    DPRINTF(IEW, "[tid:%u]: Blocking.\n", tid);
+ +
+ +    if (dispatchStatus[tid] != Blocked &&
+ +        dispatchStatus[tid] != Unblocking) {
+ +        toRename->iewBlock[tid] = true;
+ +        wroteToTimeBuffer = true;
+ +    }
+ +
+ +    // Add the current inputs to the skid buffer so they can be
+ +    // reprocessed when this stage unblocks.
+ +    skidInsert(tid);
+ +
+ +    dispatchStatus[tid] = Blocked;
+ +}
+ +
+ +template<class Impl>
+ +void
+ +DefaultIEW<Impl>::unblock(unsigned tid)
+ +{
+ +    DPRINTF(IEW, "[tid:%i]: Reading instructions out of the skid "
+ +            "buffer %u.\n",tid, tid);
+ +
+ +    // If the skid bufffer is empty, signal back to previous stages to unblock.
+ +    // Also switch status to running.
+ +    if (skidBuffer[tid].empty()) {
+ +        toRename->iewUnblock[tid] = true;
+ +        wroteToTimeBuffer = true;
+ +        DPRINTF(IEW, "[tid:%i]: Done unblocking.\n",tid);
+ +        dispatchStatus[tid] = Running;
+ +    }
+ +}
+ +
+ +template<class Impl>
+ +void
+ +DefaultIEW<Impl>::wakeDependents(DynInstPtr &inst)
+ +{
+ +    instQueue.wakeDependents(inst);
+ +}
+ +
+ +template<class Impl>
+ +void
+ +DefaultIEW<Impl>::rescheduleMemInst(DynInstPtr &inst)
+ +{
+ +    instQueue.rescheduleMemInst(inst);
+ +}
+ +
+ +template<class Impl>
+ +void
+ +DefaultIEW<Impl>::replayMemInst(DynInstPtr &inst)
+ +{
+ +    instQueue.replayMemInst(inst);
+ +}
+ +
+ +template<class Impl>
+ +void
+ +DefaultIEW<Impl>::instToCommit(DynInstPtr &inst)
+ +{
+ +    // First check the time slot that this instruction will write
+ +    // to.  If there are free write ports at the time, then go ahead
+ +    // and write the instruction to that time.  If there are not,
+ +    // keep looking back to see where's the first time there's a
+ +    // free slot.
+ +    while ((*iewQueue)[wbCycle].insts[wbNumInst]) {
+ +        ++wbNumInst;
+ +        if (wbNumInst == wbWidth) {
+ +            ++wbCycle;
+ +            wbNumInst = 0;
+ +        }
+ +
-                 ldstQueue.executeStore(inst);
++        assert((wbCycle * wbWidth + wbNumInst) <= wbMax);
+ +    }
+ +
++    DPRINTF(IEW, "Current wb cycle: %i, width: %i, numInst: %i\nwbActual:%i\n",
++            wbCycle, wbWidth, wbNumInst, wbCycle * wbWidth + wbNumInst);
+ +    // Add finished instruction to queue to commit.
+ +    (*iewQueue)[wbCycle].insts[wbNumInst] = inst;
+ +    (*iewQueue)[wbCycle].size++;
+ +}
+ +
+ +template <class Impl>
+ +unsigned
+ +DefaultIEW<Impl>::validInstsFromRename()
+ +{
+ +    unsigned inst_count = 0;
+ +
+ +    for (int i=0; i<fromRename->size; i++) {
+ +        if (!fromRename->insts[i]->isSquashed())
+ +            inst_count++;
+ +    }
+ +
+ +    return inst_count;
+ +}
+ +
+ +template<class Impl>
+ +void
+ +DefaultIEW<Impl>::skidInsert(unsigned tid)
+ +{
+ +    DynInstPtr inst = NULL;
+ +
+ +    while (!insts[tid].empty()) {
+ +        inst = insts[tid].front();
+ +
+ +        insts[tid].pop();
+ +
+ +        DPRINTF(Decode,"[tid:%i]: Inserting [sn:%lli] PC:%#x into "
+ +                "dispatch skidBuffer %i\n",tid, inst->seqNum,
+ +                inst->readPC(),tid);
+ +
+ +        skidBuffer[tid].push(inst);
+ +    }
+ +
+ +    assert(skidBuffer[tid].size() <= skidBufferMax &&
+ +           "Skidbuffer Exceeded Max Size");
+ +}
+ +
+ +template<class Impl>
+ +int
+ +DefaultIEW<Impl>::skidCount()
+ +{
+ +    int max=0;
+ +
+ +    std::list<unsigned>::iterator threads = (*activeThreads).begin();
+ +
+ +    while (threads != (*activeThreads).end()) {
+ +        unsigned thread_count = skidBuffer[*threads++].size();
+ +        if (max < thread_count)
+ +            max = thread_count;
+ +    }
+ +
+ +    return max;
+ +}
+ +
+ +template<class Impl>
+ +bool
+ +DefaultIEW<Impl>::skidsEmpty()
+ +{
+ +    std::list<unsigned>::iterator threads = (*activeThreads).begin();
+ +
+ +    while (threads != (*activeThreads).end()) {
+ +        if (!skidBuffer[*threads++].empty())
+ +            return false;
+ +    }
+ +
+ +    return true;
+ +}
+ +
+ +template <class Impl>
+ +void
+ +DefaultIEW<Impl>::updateStatus()
+ +{
+ +    bool any_unblocking = false;
+ +
+ +    std::list<unsigned>::iterator threads = (*activeThreads).begin();
+ +
+ +    threads = (*activeThreads).begin();
+ +
+ +    while (threads != (*activeThreads).end()) {
+ +        unsigned tid = *threads++;
+ +
+ +        if (dispatchStatus[tid] == Unblocking) {
+ +            any_unblocking = true;
+ +            break;
+ +        }
+ +    }
+ +
+ +    // If there are no ready instructions waiting to be scheduled by the IQ,
+ +    // and there's no stores waiting to write back, and dispatch is not
+ +    // unblocking, then there is no internal activity for the IEW stage.
+ +    if (_status == Active && !instQueue.hasReadyInsts() &&
+ +        !ldstQueue.willWB() && !any_unblocking) {
+ +        DPRINTF(IEW, "IEW switching to idle\n");
+ +
+ +        deactivateStage();
+ +
+ +        _status = Inactive;
+ +    } else if (_status == Inactive && (instQueue.hasReadyInsts() ||
+ +                                       ldstQueue.willWB() ||
+ +                                       any_unblocking)) {
+ +        // Otherwise there is internal activity.  Set to active.
+ +        DPRINTF(IEW, "IEW switching to active\n");
+ +
+ +        activateStage();
+ +
+ +        _status = Active;
+ +    }
+ +}
+ +
+ +template <class Impl>
+ +void
+ +DefaultIEW<Impl>::resetEntries()
+ +{
+ +    instQueue.resetEntries();
+ +    ldstQueue.resetEntries();
+ +}
+ +
+ +template <class Impl>
+ +void
+ +DefaultIEW<Impl>::readStallSignals(unsigned tid)
+ +{
+ +    if (fromCommit->commitBlock[tid]) {
+ +        stalls[tid].commit = true;
+ +    }
+ +
+ +    if (fromCommit->commitUnblock[tid]) {
+ +        assert(stalls[tid].commit);
+ +        stalls[tid].commit = false;
+ +    }
+ +}
+ +
+ +template <class Impl>
+ +bool
+ +DefaultIEW<Impl>::checkStall(unsigned tid)
+ +{
+ +    bool ret_val(false);
+ +
+ +    if (stalls[tid].commit) {
+ +        DPRINTF(IEW,"[tid:%i]: Stall from Commit stage detected.\n",tid);
+ +        ret_val = true;
+ +    } else if (instQueue.isFull(tid)) {
+ +        DPRINTF(IEW,"[tid:%i]: Stall: IQ  is full.\n",tid);
+ +        ret_val = true;
+ +    } else if (ldstQueue.isFull(tid)) {
+ +        DPRINTF(IEW,"[tid:%i]: Stall: LSQ is full\n",tid);
+ +
+ +        if (ldstQueue.numLoads(tid) > 0 ) {
+ +
+ +            DPRINTF(IEW,"[tid:%i]: LSQ oldest load: [sn:%i] \n",
+ +                    tid,ldstQueue.getLoadHeadSeqNum(tid));
+ +        }
+ +
+ +        if (ldstQueue.numStores(tid) > 0) {
+ +
+ +            DPRINTF(IEW,"[tid:%i]: LSQ oldest store: [sn:%i] \n",
+ +                    tid,ldstQueue.getStoreHeadSeqNum(tid));
+ +        }
+ +
+ +        ret_val = true;
+ +    } else if (ldstQueue.isStalled(tid)) {
+ +        DPRINTF(IEW,"[tid:%i]: Stall: LSQ stall detected.\n",tid);
+ +        ret_val = true;
+ +    }
+ +
+ +    return ret_val;
+ +}
+ +
+ +template <class Impl>
+ +void
+ +DefaultIEW<Impl>::checkSignalsAndUpdate(unsigned tid)
+ +{
+ +    // Check if there's a squash signal, squash if there is
+ +    // Check stall signals, block if there is.
+ +    // If status was Blocked
+ +    //     if so then go to unblocking
+ +    // If status was Squashing
+ +    //     check if squashing is not high.  Switch to running this cycle.
+ +
+ +    readStallSignals(tid);
+ +
+ +    if (fromCommit->commitInfo[tid].squash) {
+ +        squash(tid);
+ +
+ +        if (dispatchStatus[tid] == Blocked ||
+ +            dispatchStatus[tid] == Unblocking) {
+ +            toRename->iewUnblock[tid] = true;
+ +            wroteToTimeBuffer = true;
+ +        }
+ +
+ +        dispatchStatus[tid] = Squashing;
+ +
+ +        fetchRedirect[tid] = false;
+ +        return;
+ +    }
+ +
+ +    if (fromCommit->commitInfo[tid].robSquashing) {
+ +        DPRINTF(IEW, "[tid:%i]: ROB is still squashing.\n", tid);
+ +
+ +        dispatchStatus[tid] = Squashing;
+ +
+ +        emptyRenameInsts(tid);
+ +        wroteToTimeBuffer = true;
+ +        return;
+ +    }
+ +
+ +    if (checkStall(tid)) {
+ +        block(tid);
+ +        dispatchStatus[tid] = Blocked;
+ +        return;
+ +    }
+ +
+ +    if (dispatchStatus[tid] == Blocked) {
+ +        // Status from previous cycle was blocked, but there are no more stall
+ +        // conditions.  Switch over to unblocking.
+ +        DPRINTF(IEW, "[tid:%i]: Done blocking, switching to unblocking.\n",
+ +                tid);
+ +
+ +        dispatchStatus[tid] = Unblocking;
+ +
+ +        unblock(tid);
+ +
+ +        return;
+ +    }
+ +
+ +    if (dispatchStatus[tid] == Squashing) {
+ +        // Switch status to running if rename isn't being told to block or
+ +        // squash this cycle.
+ +        DPRINTF(IEW, "[tid:%i]: Done squashing, switching to running.\n",
+ +                tid);
+ +
+ +        dispatchStatus[tid] = Running;
+ +
+ +        return;
+ +    }
+ +}
+ +
+ +template <class Impl>
+ +void
+ +DefaultIEW<Impl>::sortInsts()
+ +{
+ +    int insts_from_rename = fromRename->size;
+ +#ifdef DEBUG
+ +#if !ISA_HAS_DELAY_SLOT
+ +    for (int i = 0; i < numThreads; i++)
+ +        assert(insts[i].empty());
+ +#endif
+ +#endif
+ +    for (int i = 0; i < insts_from_rename; ++i) {
+ +        insts[fromRename->insts[i]->threadNumber].push(fromRename->insts[i]);
+ +    }
+ +}
+ +
+ +template <class Impl>
+ +void
+ +DefaultIEW<Impl>::emptyRenameInsts(unsigned tid)
+ +{
+ +    DPRINTF(IEW, "[tid:%i]: Removing incoming rename instructions until "
+ +            "[sn:%i].\n", tid, bdelayDoneSeqNum[tid]);
+ +
+ +    while (!insts[tid].empty()) {
+ +#if ISA_HAS_DELAY_SLOT
+ +        if (insts[tid].front()->seqNum <= bdelayDoneSeqNum[tid]) {
+ +            DPRINTF(IEW, "[tid:%i]: Done removing, cannot remove instruction"
+ +                    " that occurs at or before delay slot [sn:%i].\n",
+ +                    tid, bdelayDoneSeqNum[tid]);
+ +            break;
+ +        } else {
+ +            DPRINTF(IEW, "[tid:%i]: Removing incoming rename instruction "
+ +                    "[sn:%i].\n", tid, insts[tid].front()->seqNum);
+ +        }
+ +#endif
+ +
+ +        if (insts[tid].front()->isLoad() ||
+ +            insts[tid].front()->isStore() ) {
+ +            toRename->iewInfo[tid].dispatchedToLSQ++;
+ +        }
+ +
+ +        toRename->iewInfo[tid].dispatched++;
+ +
+ +        insts[tid].pop();
+ +    }
+ +}
+ +
++template <class Impl>
++void
++DefaultIEW<Impl>::emptyRenameInsts(unsigned tid)
++{
++    while (!insts[tid].empty()) {
++        if (insts[tid].front()->isLoad() ||
++            insts[tid].front()->isStore() ) {
++            toRename->iewInfo[tid].dispatchedToLSQ++;
++        }
++
++        toRename->iewInfo[tid].dispatched++;
++
++        insts[tid].pop();
++    }
++}
++
+ +template <class Impl>
+ +void
+ +DefaultIEW<Impl>::wakeCPU()
+ +{
+ +    cpu->wakeCPU();
+ +}
+ +
+ +template <class Impl>
+ +void
+ +DefaultIEW<Impl>::activityThisCycle()
+ +{
+ +    DPRINTF(Activity, "Activity this cycle.\n");
+ +    cpu->activityThisCycle();
+ +}
+ +
+ +template <class Impl>
+ +inline void
+ +DefaultIEW<Impl>::activateStage()
+ +{
+ +    DPRINTF(Activity, "Activating stage.\n");
+ +    cpu->activateStage(O3CPU::IEWIdx);
+ +}
+ +
+ +template <class Impl>
+ +inline void
+ +DefaultIEW<Impl>::deactivateStage()
+ +{
+ +    DPRINTF(Activity, "Deactivating stage.\n");
+ +    cpu->deactivateStage(O3CPU::IEWIdx);
+ +}
+ +
+ +template<class Impl>
+ +void
+ +DefaultIEW<Impl>::dispatch(unsigned tid)
+ +{
+ +    // If status is Running or idle,
+ +    //     call dispatchInsts()
+ +    // If status is Unblocking,
+ +    //     buffer any instructions coming from rename
+ +    //     continue trying to empty skid buffer
+ +    //     check if stall conditions have passed
+ +
+ +    if (dispatchStatus[tid] == Blocked) {
+ +        ++iewBlockCycles;
+ +
+ +    } else if (dispatchStatus[tid] == Squashing) {
+ +        ++iewSquashCycles;
+ +    }
+ +
+ +    // Dispatch should try to dispatch as many instructions as its bandwidth
+ +    // will allow, as long as it is not currently blocked.
+ +    if (dispatchStatus[tid] == Running ||
+ +        dispatchStatus[tid] == Idle) {
+ +        DPRINTF(IEW, "[tid:%i] Not blocked, so attempting to run "
+ +                "dispatch.\n", tid);
+ +
+ +        dispatchInsts(tid);
+ +    } else if (dispatchStatus[tid] == Unblocking) {
+ +        // Make sure that the skid buffer has something in it if the
+ +        // status is unblocking.
+ +        assert(!skidsEmpty());
+ +
+ +        // If the status was unblocking, then instructions from the skid
+ +        // buffer were used.  Remove those instructions and handle
+ +        // the rest of unblocking.
+ +        dispatchInsts(tid);
+ +
+ +        ++iewUnblockCycles;
+ +
+ +        if (validInstsFromRename() && dispatchedAllInsts) {
+ +            // Add the current inputs to the skid buffer so they can be
+ +            // reprocessed when this stage unblocks.
+ +            skidInsert(tid);
+ +        }
+ +
+ +        unblock(tid);
+ +    }
+ +}
+ +
+ +template <class Impl>
+ +void
+ +DefaultIEW<Impl>::dispatchInsts(unsigned tid)
+ +{
+ +    dispatchedAllInsts = true;
+ +
+ +    // Obtain instructions from skid buffer if unblocking, or queue from rename
+ +    // otherwise.
+ +    std::queue<DynInstPtr> &insts_to_dispatch =
+ +        dispatchStatus[tid] == Unblocking ?
+ +        skidBuffer[tid] : insts[tid];
+ +
+ +    int insts_to_add = insts_to_dispatch.size();
+ +
+ +    DynInstPtr inst;
+ +    bool add_to_iq = false;
+ +    int dis_num_inst = 0;
+ +
+ +    // Loop through the instructions, putting them in the instruction
+ +    // queue.
+ +    for ( ; dis_num_inst < insts_to_add &&
+ +              dis_num_inst < dispatchWidth;
+ +          ++dis_num_inst)
+ +    {
+ +        inst = insts_to_dispatch.front();
+ +
+ +        if (dispatchStatus[tid] == Unblocking) {
+ +            DPRINTF(IEW, "[tid:%i]: Issue: Examining instruction from skid "
+ +                    "buffer\n", tid);
+ +        }
+ +
+ +        // Make sure there's a valid instruction there.
+ +        assert(inst);
+ +
+ +        DPRINTF(IEW, "[tid:%i]: Issue: Adding PC %#x [sn:%lli] [tid:%i] to "
+ +                "IQ.\n",
+ +                tid, inst->readPC(), inst->seqNum, inst->threadNumber);
+ +
+ +        // Be sure to mark these instructions as ready so that the
+ +        // commit stage can go ahead and execute them, and mark
+ +        // them as issued so the IQ doesn't reprocess them.
+ +
+ +        // Check for squashed instructions.
+ +        if (inst->isSquashed()) {
+ +            DPRINTF(IEW, "[tid:%i]: Issue: Squashed instruction encountered, "
+ +                    "not adding to IQ.\n", tid);
+ +
+ +            ++iewDispSquashedInsts;
+ +
+ +            insts_to_dispatch.pop();
+ +
+ +            //Tell Rename That An Instruction has been processed
+ +            if (inst->isLoad() || inst->isStore()) {
+ +                toRename->iewInfo[tid].dispatchedToLSQ++;
+ +            }
+ +            toRename->iewInfo[tid].dispatched++;
+ +
+ +            continue;
+ +        }
+ +
+ +        // Check for full conditions.
+ +        if (instQueue.isFull(tid)) {
+ +            DPRINTF(IEW, "[tid:%i]: Issue: IQ has become full.\n", tid);
+ +
+ +            // Call function to start blocking.
+ +            block(tid);
+ +
+ +            // Set unblock to false. Special case where we are using
+ +            // skidbuffer (unblocking) instructions but then we still
+ +            // get full in the IQ.
+ +            toRename->iewUnblock[tid] = false;
+ +
+ +            dispatchedAllInsts = false;
+ +
+ +            ++iewIQFullEvents;
+ +            break;
+ +        } else if (ldstQueue.isFull(tid)) {
+ +            DPRINTF(IEW, "[tid:%i]: Issue: LSQ has become full.\n",tid);
+ +
+ +            // Call function to start blocking.
+ +            block(tid);
+ +
+ +            // Set unblock to false. Special case where we are using
+ +            // skidbuffer (unblocking) instructions but then we still
+ +            // get full in the IQ.
+ +            toRename->iewUnblock[tid] = false;
+ +
+ +            dispatchedAllInsts = false;
+ +
+ +            ++iewLSQFullEvents;
+ +            break;
+ +        }
+ +
+ +        // Otherwise issue the instruction just fine.
+ +        if (inst->isLoad()) {
+ +            DPRINTF(IEW, "[tid:%i]: Issue: Memory instruction "
+ +                    "encountered, adding to LSQ.\n", tid);
+ +
+ +            // Reserve a spot in the load store queue for this
+ +            // memory access.
+ +            ldstQueue.insertLoad(inst);
+ +
+ +            ++iewDispLoadInsts;
+ +
+ +            add_to_iq = true;
+ +
+ +            toRename->iewInfo[tid].dispatchedToLSQ++;
+ +        } else if (inst->isStore()) {
+ +            DPRINTF(IEW, "[tid:%i]: Issue: Memory instruction "
+ +                    "encountered, adding to LSQ.\n", tid);
+ +
+ +            ldstQueue.insertStore(inst);
+ +
+ +            ++iewDispStoreInsts;
+ +
+ +            if (inst->isStoreConditional()) {
+ +                // Store conditionals need to be set as "canCommit()"
+ +                // so that commit can process them when they reach the
+ +                // head of commit.
+ +                // @todo: This is somewhat specific to Alpha.
+ +                inst->setCanCommit();
+ +                instQueue.insertNonSpec(inst);
+ +                add_to_iq = false;
+ +
+ +                ++iewDispNonSpecInsts;
+ +            } else {
+ +                add_to_iq = true;
+ +            }
+ +
+ +            toRename->iewInfo[tid].dispatchedToLSQ++;
+ +#if FULL_SYSTEM
+ +        } else if (inst->isMemBarrier() || inst->isWriteBarrier()) {
+ +            // Same as non-speculative stores.
+ +            inst->setCanCommit();
+ +            instQueue.insertBarrier(inst);
+ +            add_to_iq = false;
+ +#endif
+ +        } else if (inst->isNonSpeculative()) {
+ +            DPRINTF(IEW, "[tid:%i]: Issue: Nonspeculative instruction "
+ +                    "encountered, skipping.\n", tid);
+ +
+ +            // Same as non-speculative stores.
+ +            inst->setCanCommit();
+ +
+ +            // Specifically insert it as nonspeculative.
+ +            instQueue.insertNonSpec(inst);
+ +
+ +            ++iewDispNonSpecInsts;
+ +
+ +            add_to_iq = false;
+ +        } else if (inst->isNop()) {
+ +            DPRINTF(IEW, "[tid:%i]: Issue: Nop instruction encountered, "
+ +                    "skipping.\n", tid);
+ +
+ +            inst->setIssued();
+ +            inst->setExecuted();
+ +            inst->setCanCommit();
+ +
+ +            instQueue.recordProducer(inst);
+ +
+ +            iewExecutedNop[tid]++;
+ +
+ +            add_to_iq = false;
+ +        } else if (inst->isExecuted()) {
+ +            assert(0 && "Instruction shouldn't be executed.\n");
+ +            DPRINTF(IEW, "Issue: Executed branch encountered, "
+ +                    "skipping.\n");
+ +
+ +            inst->setIssued();
+ +            inst->setCanCommit();
+ +
+ +            instQueue.recordProducer(inst);
+ +
+ +            add_to_iq = false;
+ +        } else {
+ +            add_to_iq = true;
+ +        }
+ +
+ +        // If the instruction queue is not full, then add the
+ +        // instruction.
+ +        if (add_to_iq) {
+ +            instQueue.insert(inst);
+ +        }
+ +
+ +        insts_to_dispatch.pop();
+ +
+ +        toRename->iewInfo[tid].dispatched++;
+ +
+ +        ++iewDispatchedInsts;
+ +    }
+ +
+ +    if (!insts_to_dispatch.empty()) {
+ +        DPRINTF(IEW,"[tid:%i]: Issue: Bandwidth Full. Blocking.\n", tid);
+ +        block(tid);
+ +        toRename->iewUnblock[tid] = false;
+ +    }
+ +
+ +    if (dispatchStatus[tid] == Idle && dis_num_inst) {
+ +        dispatchStatus[tid] = Running;
+ +
+ +        updatedQueues = true;
+ +    }
+ +
+ +    dis_num_inst = 0;
+ +}
+ +
+ +template <class Impl>
+ +void
+ +DefaultIEW<Impl>::printAvailableInsts()
+ +{
+ +    int inst = 0;
+ +
+ +    std::cout << "Available Instructions: ";
+ +
+ +    while (fromIssue->insts[inst]) {
+ +
+ +        if (inst%3==0) std::cout << "\n\t";
+ +
+ +        std::cout << "PC: " << fromIssue->insts[inst]->readPC()
+ +             << " TN: " << fromIssue->insts[inst]->threadNumber
+ +             << " SN: " << fromIssue->insts[inst]->seqNum << " | ";
+ +
+ +        inst++;
+ +
+ +    }
+ +
+ +    std::cout << "\n";
+ +}
+ +
+ +template <class Impl>
+ +void
+ +DefaultIEW<Impl>::executeInsts()
+ +{
+ +    wbNumInst = 0;
+ +    wbCycle = 0;
+ +
+ +    std::list<unsigned>::iterator threads = (*activeThreads).begin();
+ +
+ +    while (threads != (*activeThreads).end()) {
+ +        unsigned tid = *threads++;
+ +        fetchRedirect[tid] = false;
+ +    }
+ +
+ +    // Uncomment this if you want to see all available instructions.
+ +//    printAvailableInsts();
+ +
+ +    // Execute/writeback any instructions that are available.
+ +    int insts_to_execute = fromIssue->size;
+ +    int inst_num = 0;
+ +    for (; inst_num < insts_to_execute;
+ +          ++inst_num) {
+ +
+ +        DPRINTF(IEW, "Execute: Executing instructions from IQ.\n");
+ +
+ +        DynInstPtr inst = instQueue.getInstToExecute();
+ +
+ +        DPRINTF(IEW, "Execute: Processing PC %#x, [tid:%i] [sn:%i].\n",
+ +                inst->readPC(), inst->threadNumber,inst->seqNum);
+ +
+ +        // Check if the instruction is squashed; if so then skip it
+ +        if (inst->isSquashed()) {
+ +            DPRINTF(IEW, "Execute: Instruction was squashed.\n");
+ +
+ +            // Consider this instruction executed so that commit can go
+ +            // ahead and retire the instruction.
+ +            inst->setExecuted();
+ +
+ +            // Not sure if I should set this here or just let commit try to
+ +            // commit any squashed instructions.  I like the latter a bit more.
+ +            inst->setCanCommit();
+ +
+ +            ++iewExecSquashedInsts;
+ +
+ +            decrWb(inst->seqNum);
+ +            continue;
+ +        }
+ +
+ +        Fault fault = NoFault;
+ +
+ +        // Execute instruction.
+ +        // Note that if the instruction faults, it will be handled
+ +        // at the commit stage.
+ +        if (inst->isMemRef() &&
+ +            (!inst->isDataPrefetch() && !inst->isInstPrefetch())) {
+ +            DPRINTF(IEW, "Execute: Calculating address for memory "
+ +                    "reference.\n");
+ +
+ +            // Tell the LDSTQ to execute this instruction (if it is a load).
+ +            if (inst->isLoad()) {
+ +                // Loads will mark themselves as executed, and their writeback
+ +                // event adds the instruction to the queue to commit
+ +                fault = ldstQueue.executeLoad(inst);
+ +            } else if (inst->isStore()) {
-                 if (inst->req && !(inst->req->getFlags() & LOCKED)) {
++                fault = ldstQueue.executeStore(inst);
+ +
+ +                // If the store had a fault then it may not have a mem req
-         if (!inst->isSquashed() && inst->isExecuted()) {
++                if (!inst->isStoreConditional() && fault == NoFault) {
++                    inst->setExecuted();
++
++                    instToCommit(inst);
++                } else if (fault != NoFault) {
++                    // If the instruction faulted, then we need to send it along to commit
++                    // without the instruction completing.
++
++                    // Send this instruction to commit, also make sure iew stage
++                    // realizes there is activity.
+ +                    inst->setExecuted();
+ +
+ +                    instToCommit(inst);
++                    activityThisCycle();
+ +                }
+ +
+ +                // Store conditionals will mark themselves as
+ +                // executed, and their writeback event will add the
+ +                // instruction to the queue to commit.
+ +            } else {
+ +                panic("Unexpected memory type!\n");
+ +            }
+ +
+ +        } else {
+ +            inst->execute();
+ +
+ +            inst->setExecuted();
+ +
+ +            instToCommit(inst);
+ +        }
+ +
+ +        updateExeInstStats(inst);
+ +
+ +        // Check if branch prediction was correct, if not then we need
+ +        // to tell commit to squash in flight instructions.  Only
+ +        // handle this if there hasn't already been something that
+ +        // redirects fetch in this group of instructions.
+ +
+ +        // This probably needs to prioritize the redirects if a different
+ +        // scheduler is used.  Currently the scheduler schedules the oldest
+ +        // instruction first, so the branch resolution order will be correct.
+ +        unsigned tid = inst->threadNumber;
+ +
+ +        if (!fetchRedirect[tid]) {
+ +
+ +            if (inst->mispredicted()) {
+ +                fetchRedirect[tid] = true;
+ +
+ +                DPRINTF(IEW, "Execute: Branch mispredict detected.\n");
+ +#if ISA_HAS_DELAY_SLOT
+ +                DPRINTF(IEW, "Execute: Redirecting fetch to PC: %#x.\n",
+ +                        inst->nextNPC);
+ +#else
+ +                DPRINTF(IEW, "Execute: Redirecting fetch to PC: %#x.\n",
+ +                        inst->nextPC);
+ +#endif
+ +                // If incorrect, then signal the ROB that it must be squashed.
+ +                squashDueToBranch(inst, tid);
+ +
+ +                if (inst->predTaken()) {
+ +                    predictedTakenIncorrect++;
+ +                } else {
+ +                    predictedNotTakenIncorrect++;
+ +                }
+ +            } else if (ldstQueue.violation(tid)) {
+ +                fetchRedirect[tid] = true;
+ +
+ +                // If there was an ordering violation, then get the
+ +                // DynInst that caused the violation.  Note that this
+ +                // clears the violation signal.
+ +                DynInstPtr violator;
+ +                violator = ldstQueue.getMemDepViolator(tid);
+ +
+ +                DPRINTF(IEW, "LDSTQ detected a violation.  Violator PC: "
+ +                        "%#x, inst PC: %#x.  Addr is: %#x.\n",
+ +                        violator->readPC(), inst->readPC(), inst->physEffAddr);
+ +
+ +                // Tell the instruction queue that a violation has occured.
+ +                instQueue.violation(inst, violator);
+ +
+ +                // Squash.
+ +                squashDueToMemOrder(inst,tid);
+ +
+ +                ++memOrderViolationEvents;
+ +            } else if (ldstQueue.loadBlocked(tid) &&
+ +                       !ldstQueue.isLoadBlockedHandled(tid)) {
+ +                fetchRedirect[tid] = true;
+ +
+ +                DPRINTF(IEW, "Load operation couldn't execute because the "
+ +                        "memory system is blocked.  PC: %#x [sn:%lli]\n",
+ +                        inst->readPC(), inst->seqNum);
+ +
+ +                squashDueToMemBlocked(inst, tid);
+ +            }
+ +        }
+ +    }
+ +
+ +    // Update and record activity if we processed any instructions.
+ +    if (inst_num) {
+ +        if (exeStatus == Idle) {
+ +            exeStatus = Running;
+ +        }
+ +
+ +        updatedQueues = true;
+ +
+ +        cpu->activityThisCycle();
+ +    }
+ +
+ +    // Need to reset this in case a writeback event needs to write into the
+ +    // iew queue.  That way the writeback event will write into the correct
+ +    // spot in the queue.
+ +    wbNumInst = 0;
+ +}
+ +
+ +template <class Impl>
+ +void
+ +DefaultIEW<Impl>::writebackInsts()
+ +{
+ +    // Loop through the head of the time buffer and wake any
+ +    // dependents.  These instructions are about to write back.  Also
+ +    // mark scoreboard that this instruction is finally complete.
+ +    // Either have IEW have direct access to scoreboard, or have this
+ +    // as part of backwards communication.
+ +    for (int inst_num = 0; inst_num < issueWidth &&
+ +             toCommit->insts[inst_num]; inst_num++) {
+ +        DynInstPtr inst = toCommit->insts[inst_num];
+ +        int tid = inst->threadNumber;
+ +
+ +        DPRINTF(IEW, "Sending instructions to commit, [sn:%lli] PC %#x.\n",
+ +                inst->seqNum, inst->readPC());
+ +
+ +        iewInstsToCommit[tid]++;
+ +
+ +        // Some instructions will be sent to commit without having
+ +        // executed because they need commit to handle them.
+ +        // E.g. Uncached loads have not actually executed when they
+ +        // are first sent to commit.  Instead commit must tell the LSQ
+ +        // when it's ready to execute the uncached load.
++        if (!inst->isSquashed() && inst->isExecuted() && inst->getFault() == NoFault) {
+ +            int dependents = instQueue.wakeDependents(inst);
+ +
+ +            for (int i = 0; i < inst->numDestRegs(); i++) {
+ +                //mark as Ready
+ +                DPRINTF(IEW,"Setting Destination Register %i\n",
+ +                        inst->renamedDestRegIdx(i));
+ +                scoreboard->setReg(inst->renamedDestRegIdx(i));
+ +            }
+ +
+ +            if (dependents) {
+ +                producerInst[tid]++;
+ +                consumerInst[tid]+= dependents;
+ +            }
+ +            writebackCount[tid]++;
+ +        }
+ +
+ +        decrWb(inst->seqNum);
+ +    }
+ +}
+ +
+ +template<class Impl>
+ +void
+ +DefaultIEW<Impl>::tick()
+ +{
+ +    wbNumInst = 0;
+ +    wbCycle = 0;
+ +
+ +    wroteToTimeBuffer = false;
+ +    updatedQueues = false;
+ +
+ +    sortInsts();
+ +
+ +    // Free function units marked as being freed this cycle.
+ +    fuPool->processFreeUnits();
+ +
+ +    std::list<unsigned>::iterator threads = (*activeThreads).begin();
+ +
+ +    // Check stall and squash signals, dispatch any instructions.
+ +    while (threads != (*activeThreads).end()) {
+ +           unsigned tid = *threads++;
+ +
+ +        DPRINTF(IEW,"Issue: Processing [tid:%i]\n",tid);
+ +
+ +        checkSignalsAndUpdate(tid);
+ +        dispatch(tid);
+ +    }
+ +
+ +    if (exeStatus != Squashing) {
+ +        executeInsts();
+ +
+ +        writebackInsts();
+ +
+ +        // Have the instruction queue try to schedule any ready instructions.
+ +        // (In actuality, this scheduling is for instructions that will
+ +        // be executed next cycle.)
+ +        instQueue.scheduleReadyInsts();
+ +
+ +        // Also should advance its own time buffers if the stage ran.
+ +        // Not the best place for it, but this works (hopefully).
+ +        issueToExecQueue.advance();
+ +    }
+ +
+ +    bool broadcast_free_entries = false;
+ +
+ +    if (updatedQueues || exeStatus == Running || updateLSQNextCycle) {
+ +        exeStatus = Idle;
+ +        updateLSQNextCycle = false;
+ +
+ +        broadcast_free_entries = true;
+ +    }
+ +
+ +    // Writeback any stores using any leftover bandwidth.
+ +    ldstQueue.writebackStores();
+ +
+ +    // Check the committed load/store signals to see if there's a load
+ +    // or store to commit.  Also check if it's being told to execute a
+ +    // nonspeculative instruction.
+ +    // This is pretty inefficient...
+ +
+ +    threads = (*activeThreads).begin();
+ +    while (threads != (*activeThreads).end()) {
+ +        unsigned tid = (*threads++);
+ +
+ +        DPRINTF(IEW,"Processing [tid:%i]\n",tid);
+ +
+ +        // Update structures based on instructions committed.
+ +        if (fromCommit->commitInfo[tid].doneSeqNum != 0 &&
+ +            !fromCommit->commitInfo[tid].squash &&
+ +            !fromCommit->commitInfo[tid].robSquashing) {
+ +
+ +            ldstQueue.commitStores(fromCommit->commitInfo[tid].doneSeqNum,tid);
+ +
+ +            ldstQueue.commitLoads(fromCommit->commitInfo[tid].doneSeqNum,tid);
+ +
+ +            updateLSQNextCycle = true;
+ +            instQueue.commit(fromCommit->commitInfo[tid].doneSeqNum,tid);
+ +        }
+ +
+ +        if (fromCommit->commitInfo[tid].nonSpecSeqNum != 0) {
+ +
+ +            //DPRINTF(IEW,"NonspecInst from thread %i",tid);
+ +            if (fromCommit->commitInfo[tid].uncached) {
+ +                instQueue.replayMemInst(fromCommit->commitInfo[tid].uncachedLoad);
+ +            } else {
+ +                instQueue.scheduleNonSpec(
+ +                    fromCommit->commitInfo[tid].nonSpecSeqNum);
+ +            }
+ +        }
+ +
+ +        if (broadcast_free_entries) {
+ +            toFetch->iewInfo[tid].iqCount =
+ +                instQueue.getCount(tid);
+ +            toFetch->iewInfo[tid].ldstqCount =
+ +                ldstQueue.getCount(tid);
+ +
+ +            toRename->iewInfo[tid].usedIQ = true;
+ +            toRename->iewInfo[tid].freeIQEntries =
+ +                instQueue.numFreeEntries();
+ +            toRename->iewInfo[tid].usedLSQ = true;
+ +            toRename->iewInfo[tid].freeLSQEntries =
+ +                ldstQueue.numFreeEntries(tid);
+ +
+ +            wroteToTimeBuffer = true;
+ +        }
+ +
+ +        DPRINTF(IEW, "[tid:%i], Dispatch dispatched %i instructions.\n",
+ +                tid, toRename->iewInfo[tid].dispatched);
+ +    }
+ +
+ +    DPRINTF(IEW, "IQ has %i free entries (Can schedule: %i).  "
+ +            "LSQ has %i free entries.\n",
+ +            instQueue.numFreeEntries(), instQueue.hasReadyInsts(),
+ +            ldstQueue.numFreeEntries());
+ +
+ +    updateStatus();
+ +
+ +    if (wroteToTimeBuffer) {
+ +        DPRINTF(Activity, "Activity this cycle.\n");
+ +        cpu->activityThisCycle();
+ +    }
+ +}
+ +
+ +template <class Impl>
+ +void
+ +DefaultIEW<Impl>::updateExeInstStats(DynInstPtr &inst)
+ +{
+ +    int thread_number = inst->threadNumber;
+ +
+ +    //
+ +    //  Pick off the software prefetches
+ +    //
+ +#ifdef TARGET_ALPHA
+ +    if (inst->isDataPrefetch())
+ +        iewExecutedSwp[thread_number]++;
+ +    else
+ +        iewIewExecutedcutedInsts++;
+ +#else
+ +    iewExecutedInsts++;
+ +#endif
+ +
+ +    //
+ +    //  Control operations
+ +    //
+ +    if (inst->isControl())
+ +        iewExecutedBranches[thread_number]++;
+ +
+ +    //
+ +    //  Memory operations
+ +    //
+ +    if (inst->isMemRef()) {
+ +        iewExecutedRefs[thread_number]++;
+ +
+ +        if (inst->isLoad()) {
+ +            iewExecLoadInsts[thread_number]++;
+ +        }
+ +    }
+ +}
diff --cc src/cpu/o3/inst_queue.hh

index d745faf7bdbdf907998baf9b68b2b2d7231c8011,0000000000000000000000000000000000000000..3dd4dc658963143ba423713aa64d33fbfc50766f

mode 100644,000000..100644
--- 1/src/cpu/o3/inst_queue.hh
--- /dev/null
+++ b/src/cpu/o3/inst_queue.hh
@@@ -1,507 -1,0 +1,507 @@@
-     Stats::VectorDistribution<> queueResDist;
+ +/*
+ + * Copyright (c) 2004-2006 The Regents of The University of Michigan
+ + * All rights reserved.
+ + *
+ + * Redistribution and use in source and binary forms, with or without
+ + * modification, are permitted provided that the following conditions are
+ + * met: redistributions of source code must retain the above copyright
+ + * notice, this list of conditions and the following disclaimer;
+ + * redistributions in binary form must reproduce the above copyright
+ + * notice, this list of conditions and the following disclaimer in the
+ + * documentation and/or other materials provided with the distribution;
+ + * neither the name of the copyright holders nor the names of its
+ + * contributors may be used to endorse or promote products derived from
+ + * this software without specific prior written permission.
+ + *
+ + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ + *
+ + * Authors: Kevin Lim
+ + */
+ +
+ +#ifndef __CPU_O3_INST_QUEUE_HH__
+ +#define __CPU_O3_INST_QUEUE_HH__
+ +
+ +#include <list>
+ +#include <map>
+ +#include <queue>
+ +#include <vector>
+ +
+ +#include "base/statistics.hh"
+ +#include "base/timebuf.hh"
+ +#include "cpu/inst_seq.hh"
+ +#include "cpu/o3/dep_graph.hh"
+ +#include "cpu/op_class.hh"
+ +#include "sim/host.hh"
+ +
+ +class FUPool;
+ +class MemInterface;
+ +
+ +/**
+ + * A standard instruction queue class.  It holds ready instructions, in
+ + * order, in seperate priority queues to facilitate the scheduling of
+ + * instructions.  The IQ uses a separate linked list to track dependencies.
+ + * Similar to the rename map and the free list, it expects that
+ + * floating point registers have their indices start after the integer
+ + * registers (ie with 96 int and 96 fp registers, regs 0-95 are integer
+ + * and 96-191 are fp).  This remains true even for both logical and
+ + * physical register indices. The IQ depends on the memory dependence unit to
+ + * track when memory operations are ready in terms of ordering; register
+ + * dependencies are tracked normally. Right now the IQ also handles the
+ + * execution timing; this is mainly to allow back-to-back scheduling without
+ + * requiring IEW to be able to peek into the IQ. At the end of the execution
+ + * latency, the instruction is put into the queue to execute, where it will
+ + * have the execute() function called on it.
+ + * @todo: Make IQ able to handle multiple FU pools.
+ + */
+ +template <class Impl>
+ +class InstructionQueue
+ +{
+ +  public:
+ +    //Typedefs from the Impl.
+ +    typedef typename Impl::O3CPU O3CPU;
+ +    typedef typename Impl::DynInstPtr DynInstPtr;
+ +    typedef typename Impl::Params Params;
+ +
+ +    typedef typename Impl::CPUPol::IEW IEW;
+ +    typedef typename Impl::CPUPol::MemDepUnit MemDepUnit;
+ +    typedef typename Impl::CPUPol::IssueStruct IssueStruct;
+ +    typedef typename Impl::CPUPol::TimeStruct TimeStruct;
+ +
+ +    // Typedef of iterator through the list of instructions.
+ +    typedef typename std::list<DynInstPtr>::iterator ListIt;
+ +
+ +    friend class Impl::O3CPU;
+ +
+ +    /** FU completion event class. */
+ +    class FUCompletion : public Event {
+ +      private:
+ +        /** Executing instruction. */
+ +        DynInstPtr inst;
+ +
+ +        /** Index of the FU used for executing. */
+ +        int fuIdx;
+ +
+ +        /** Pointer back to the instruction queue. */
+ +        InstructionQueue<Impl> *iqPtr;
+ +
+ +        /** Should the FU be added to the list to be freed upon
+ +         * completing this event.
+ +         */
+ +        bool freeFU;
+ +
+ +      public:
+ +        /** Construct a FU completion event. */
+ +        FUCompletion(DynInstPtr &_inst, int fu_idx,
+ +                     InstructionQueue<Impl> *iq_ptr);
+ +
+ +        virtual void process();
+ +        virtual const char *description();
+ +        void setFreeFU() { freeFU = true; }
+ +    };
+ +
+ +    /** Constructs an IQ. */
+ +    InstructionQueue(Params *params);
+ +
+ +    /** Destructs the IQ. */
+ +    ~InstructionQueue();
+ +
+ +    /** Returns the name of the IQ. */
+ +    std::string name() const;
+ +
+ +    /** Registers statistics. */
+ +    void regStats();
+ +
+ +    /** Resets all instruction queue state. */
+ +    void resetState();
+ +
+ +    /** Sets CPU pointer. */
+ +    void setCPU(O3CPU *_cpu) { cpu = _cpu; }
+ +
+ +    /** Sets active threads list. */
+ +    void setActiveThreads(std::list<unsigned> *at_ptr);
+ +
+ +    /** Sets the IEW pointer. */
+ +    void setIEW(IEW *iew_ptr) { iewStage = iew_ptr; }
+ +
+ +    /** Sets the timer buffer between issue and execute. */
+ +    void setIssueToExecuteQueue(TimeBuffer<IssueStruct> *i2eQueue);
+ +
+ +    /** Sets the global time buffer. */
+ +    void setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr);
+ +
+ +    /** Switches out the instruction queue. */
+ +    void switchOut();
+ +
+ +    /** Takes over execution from another CPU's thread. */
+ +    void takeOverFrom();
+ +
+ +    /** Returns if the IQ is switched out. */
+ +    bool isSwitchedOut() { return switchedOut; }
+ +
+ +    /** Number of entries needed for given amount of threads. */
+ +    int entryAmount(int num_threads);
+ +
+ +    /** Resets max entries for all threads. */
+ +    void resetEntries();
+ +
+ +    /** Returns total number of free entries. */
+ +    unsigned numFreeEntries();
+ +
+ +    /** Returns number of free entries for a thread. */
+ +    unsigned numFreeEntries(unsigned tid);
+ +
+ +    /** Returns whether or not the IQ is full. */
+ +    bool isFull();
+ +
+ +    /** Returns whether or not the IQ is full for a specific thread. */
+ +    bool isFull(unsigned tid);
+ +
+ +    /** Returns if there are any ready instructions in the IQ. */
+ +    bool hasReadyInsts();
+ +
+ +    /** Inserts a new instruction into the IQ. */
+ +    void insert(DynInstPtr &new_inst);
+ +
+ +    /** Inserts a new, non-speculative instruction into the IQ. */
+ +    void insertNonSpec(DynInstPtr &new_inst);
+ +
+ +    /** Inserts a memory or write barrier into the IQ to make sure
+ +     *  loads and stores are ordered properly.
+ +     */
+ +    void insertBarrier(DynInstPtr &barr_inst);
+ +
+ +    /** Returns the oldest scheduled instruction, and removes it from
+ +     * the list of instructions waiting to execute.
+ +     */
+ +    DynInstPtr getInstToExecute();
+ +
+ +    /**
+ +     * Records the instruction as the producer of a register without
+ +     * adding it to the rest of the IQ.
+ +     */
+ +    void recordProducer(DynInstPtr &inst)
+ +    { addToProducers(inst); }
+ +
+ +    /** Process FU completion event. */
+ +    void processFUCompletion(DynInstPtr &inst, int fu_idx);
+ +
+ +    /**
+ +     * Schedules ready instructions, adding the ready ones (oldest first) to
+ +     * the queue to execute.
+ +     */
+ +    void scheduleReadyInsts();
+ +
+ +    /** Schedules a single specific non-speculative instruction. */
+ +    void scheduleNonSpec(const InstSeqNum &inst);
+ +
+ +    /**
+ +     * Commits all instructions up to and including the given sequence number,
+ +     * for a specific thread.
+ +     */
+ +    void commit(const InstSeqNum &inst, unsigned tid = 0);
+ +
+ +    /** Wakes all dependents of a completed instruction. */
+ +    int wakeDependents(DynInstPtr &completed_inst);
+ +
+ +    /** Adds a ready memory instruction to the ready list. */
+ +    void addReadyMemInst(DynInstPtr &ready_inst);
+ +
+ +    /**
+ +     * Reschedules a memory instruction. It will be ready to issue once
+ +     * replayMemInst() is called.
+ +     */
+ +    void rescheduleMemInst(DynInstPtr &resched_inst);
+ +
+ +    /** Replays a memory instruction. It must be rescheduled first. */
+ +    void replayMemInst(DynInstPtr &replay_inst);
+ +
+ +    /** Completes a memory operation. */
+ +    void completeMemInst(DynInstPtr &completed_inst);
+ +
+ +    /** Indicates an ordering violation between a store and a load. */
+ +    void violation(DynInstPtr &store, DynInstPtr &faulting_load);
+ +
+ +    /**
+ +     * Squashes instructions for a thread. Squashing information is obtained
+ +     * from the time buffer.
+ +     */
+ +    void squash(unsigned tid);
+ +
+ +    /** Returns the number of used entries for a thread. */
+ +    unsigned getCount(unsigned tid) { return count[tid]; };
+ +
+ +    /** Debug function to print all instructions. */
+ +    void printInsts();
+ +
+ +  private:
+ +    /** Does the actual squashing. */
+ +    void doSquash(unsigned tid);
+ +
+ +    /////////////////////////
+ +    // Various pointers
+ +    /////////////////////////
+ +
+ +    /** Pointer to the CPU. */
+ +    O3CPU *cpu;
+ +
+ +    /** Cache interface. */
+ +    MemInterface *dcacheInterface;
+ +
+ +    /** Pointer to IEW stage. */
+ +    IEW *iewStage;
+ +
+ +    /** The memory dependence unit, which tracks/predicts memory dependences
+ +     *  between instructions.
+ +     */
+ +    MemDepUnit memDepUnit[Impl::MaxThreads];
+ +
+ +    /** The queue to the execute stage.  Issued instructions will be written
+ +     *  into it.
+ +     */
+ +    TimeBuffer<IssueStruct> *issueToExecuteQueue;
+ +
+ +    /** The backwards time buffer. */
+ +    TimeBuffer<TimeStruct> *timeBuffer;
+ +
+ +    /** Wire to read information from timebuffer. */
+ +    typename TimeBuffer<TimeStruct>::wire fromCommit;
+ +
+ +    /** Function unit pool. */
+ +    FUPool *fuPool;
+ +
+ +    //////////////////////////////////////
+ +    // Instruction lists, ready queues, and ordering
+ +    //////////////////////////////////////
+ +
+ +    /** List of all the instructions in the IQ (some of which may be issued). */
+ +    std::list<DynInstPtr> instList[Impl::MaxThreads];
+ +
+ +    /** List of instructions that are ready to be executed. */
+ +    std::list<DynInstPtr> instsToExecute;
+ +
+ +    /**
+ +     * Struct for comparing entries to be added to the priority queue.
+ +     * This gives reverse ordering to the instructions in terms of
+ +     * sequence numbers: the instructions with smaller sequence
+ +     * numbers (and hence are older) will be at the top of the
+ +     * priority queue.
+ +     */
+ +    struct pqCompare {
+ +        bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const
+ +        {
+ +            return lhs->seqNum > rhs->seqNum;
+ +        }
+ +    };
+ +
+ +    typedef std::priority_queue<DynInstPtr, std::vector<DynInstPtr>, pqCompare>
+ +    ReadyInstQueue;
+ +
+ +    /** List of ready instructions, per op class.  They are separated by op
+ +     *  class to allow for easy mapping to FUs.
+ +     */
+ +    ReadyInstQueue readyInsts[Num_OpClasses];
+ +
+ +    /** List of non-speculative instructions that will be scheduled
+ +     *  once the IQ gets a signal from commit.  While it's redundant to
+ +     *  have the key be a part of the value (the sequence number is stored
+ +     *  inside of DynInst), when these instructions are woken up only
+ +     *  the sequence number will be available.  Thus it is most efficient to be
+ +     *  able to search by the sequence number alone.
+ +     */
+ +    std::map<InstSeqNum, DynInstPtr> nonSpecInsts;
+ +
+ +    typedef typename std::map<InstSeqNum, DynInstPtr>::iterator NonSpecMapIt;
+ +
+ +    /** Entry for the list age ordering by op class. */
+ +    struct ListOrderEntry {
+ +        OpClass queueType;
+ +        InstSeqNum oldestInst;
+ +    };
+ +
+ +    /** List that contains the age order of the oldest instruction of each
+ +     *  ready queue.  Used to select the oldest instruction available
+ +     *  among op classes.
+ +     *  @todo: Might be better to just move these entries around instead
+ +     *  of creating new ones every time the position changes due to an
+ +     *  instruction issuing.  Not sure std::list supports this.
+ +     */
+ +    std::list<ListOrderEntry> listOrder;
+ +
+ +    typedef typename std::list<ListOrderEntry>::iterator ListOrderIt;
+ +
+ +    /** Tracks if each ready queue is on the age order list. */
+ +    bool queueOnList[Num_OpClasses];
+ +
+ +    /** Iterators of each ready queue.  Points to their spot in the age order
+ +     *  list.
+ +     */
+ +    ListOrderIt readyIt[Num_OpClasses];
+ +
+ +    /** Add an op class to the age order list. */
+ +    void addToOrderList(OpClass op_class);
+ +
+ +    /**
+ +     * Called when the oldest instruction has been removed from a ready queue;
+ +     * this places that ready queue into the proper spot in the age order list.
+ +     */
+ +    void moveToYoungerInst(ListOrderIt age_order_it);
+ +
+ +    DependencyGraph<DynInstPtr> dependGraph;
+ +
+ +    //////////////////////////////////////
+ +    // Various parameters
+ +    //////////////////////////////////////
+ +
+ +    /** IQ Resource Sharing Policy */
+ +    enum IQPolicy {
+ +        Dynamic,
+ +        Partitioned,
+ +        Threshold
+ +    };
+ +
+ +    /** IQ sharing policy for SMT. */
+ +    IQPolicy iqPolicy;
+ +
+ +    /** Number of Total Threads*/
+ +    unsigned numThreads;
+ +
+ +    /** Pointer to list of active threads. */
+ +    std::list<unsigned> *activeThreads;
+ +
+ +    /** Per Thread IQ count */
+ +    unsigned count[Impl::MaxThreads];
+ +
+ +    /** Max IQ Entries Per Thread */
+ +    unsigned maxEntries[Impl::MaxThreads];
+ +
+ +    /** Number of free IQ entries left. */
+ +    unsigned freeEntries;
+ +
+ +    /** The number of entries in the instruction queue. */
+ +    unsigned numEntries;
+ +
+ +    /** The total number of instructions that can be issued in one cycle. */
+ +    unsigned totalWidth;
+ +
+ +    /** The number of physical registers in the CPU. */
+ +    unsigned numPhysRegs;
+ +
+ +    /** The number of physical integer registers in the CPU. */
+ +    unsigned numPhysIntRegs;
+ +
+ +    /** The number of floating point registers in the CPU. */
+ +    unsigned numPhysFloatRegs;
+ +
+ +    /** Delay between commit stage and the IQ.
+ +     *  @todo: Make there be a distinction between the delays within IEW.
+ +     */
+ +    unsigned commitToIEWDelay;
+ +
+ +    /** Is the IQ switched out. */
+ +    bool switchedOut;
+ +
+ +    /** The sequence number of the squashed instruction. */
+ +    InstSeqNum squashedSeqNum[Impl::MaxThreads];
+ +
+ +    /** A cache of the recently woken registers.  It is 1 if the register
+ +     *  has been woken up recently, and 0 if the register has been added
+ +     *  to the dependency graph and has not yet received its value.  It
+ +     *  is basically a secondary scoreboard, and should pretty much mirror
+ +     *  the scoreboard that exists in the rename map.
+ +     */
+ +    std::vector<bool> regScoreboard;
+ +
+ +    /** Adds an instruction to the dependency graph, as a consumer. */
+ +    bool addToDependents(DynInstPtr &new_inst);
+ +
+ +    /** Adds an instruction to the dependency graph, as a producer. */
+ +    void addToProducers(DynInstPtr &new_inst);
+ +
+ +    /** Moves an instruction to the ready queue if it is ready. */
+ +    void addIfReady(DynInstPtr &inst);
+ +
+ +    /** Debugging function to count how many entries are in the IQ.  It does
+ +     *  a linear walk through the instructions, so do not call this function
+ +     *  during normal execution.
+ +     */
+ +    int countInsts();
+ +
+ +    /** Debugging function to dump all the list sizes, as well as print
+ +     *  out the list of nonspeculative instructions.  Should not be used
+ +     *  in any other capacity, but it has no harmful sideaffects.
+ +     */
+ +    void dumpLists();
+ +
+ +    /** Debugging function to dump out all instructions that are in the
+ +     *  IQ.
+ +     */
+ +    void dumpInsts();
+ +
+ +    /** Stat for number of instructions added. */
+ +    Stats::Scalar<> iqInstsAdded;
+ +    /** Stat for number of non-speculative instructions added. */
+ +    Stats::Scalar<> iqNonSpecInstsAdded;
+ +
+ +    Stats::Scalar<> iqInstsIssued;
+ +    /** Stat for number of integer instructions issued. */
+ +    Stats::Scalar<> iqIntInstsIssued;
+ +    /** Stat for number of floating point instructions issued. */
+ +    Stats::Scalar<> iqFloatInstsIssued;
+ +    /** Stat for number of branch instructions issued. */
+ +    Stats::Scalar<> iqBranchInstsIssued;
+ +    /** Stat for number of memory instructions issued. */
+ +    Stats::Scalar<> iqMemInstsIssued;
+ +    /** Stat for number of miscellaneous instructions issued. */
+ +    Stats::Scalar<> iqMiscInstsIssued;
+ +    /** Stat for number of squashed instructions that were ready to issue. */
+ +    Stats::Scalar<> iqSquashedInstsIssued;
+ +    /** Stat for number of squashed instructions examined when squashing. */
+ +    Stats::Scalar<> iqSquashedInstsExamined;
+ +    /** Stat for number of squashed instruction operands examined when
+ +     * squashing.
+ +     */
+ +    Stats::Scalar<> iqSquashedOperandsExamined;
+ +    /** Stat for number of non-speculative instructions removed due to a squash.
+ +     */
+ +    Stats::Scalar<> iqSquashedNonSpecRemoved;
+ +    // Also include number of instructions rescheduled and replayed.
+ +
+ +    /** Distribution of number of instructions in the queue.
+ +     * @todo: Need to create struct to track the entry time for each
+ +     * instruction. */
-     Stats::VectorDistribution<> issueDelayDist;
++//    Stats::VectorDistribution<> queueResDist;
+ +    /** Distribution of the number of instructions issued. */
+ +    Stats::Distribution<> numIssuedDist;
+ +    /** Distribution of the cycles it takes to issue an instruction.
+ +     * @todo: Need to create struct to track the ready time for each
+ +     * instruction. */
++//    Stats::VectorDistribution<> issueDelayDist;
+ +
+ +    /** Number of times an instruction could not be issued because a
+ +     * FU was busy.
+ +     */
+ +    Stats::Vector<> statFuBusy;
+ +//    Stats::Vector<> dist_unissued;
+ +    /** Stat for total number issued for each instruction type. */
+ +    Stats::Vector2d<> statIssuedInstType;
+ +
+ +    /** Number of instructions issued per cycle. */
+ +    Stats::Formula issueRate;
+ +
+ +    /** Number of times the FU was busy. */
+ +    Stats::Vector<> fuBusy;
+ +    /** Number of times the FU was busy per instruction issued. */
+ +    Stats::Formula fuBusyRate;
+ +};
+ +
+ +#endif //__CPU_O3_INST_QUEUE_HH__
diff --cc src/cpu/o3/inst_queue_impl.hh

index 47634f6450636e037656e3e5c86b71ddbed7f262,0000000000000000000000000000000000000000..6edb528a99df5d3327027c7798cc4ea001c3452f

mode 100644,000000..100644
--- 1/src/cpu/o3/inst_queue_impl.hh
--- /dev/null
+++ b/src/cpu/o3/inst_queue_impl.hh
@@@ -1,1393 -1,0 +1,1405 @@@
- 
+ +/*
+ + * Copyright (c) 2004-2006 The Regents of The University of Michigan
+ + * All rights reserved.
+ + *
+ + * Redistribution and use in source and binary forms, with or without
+ + * modification, are permitted provided that the following conditions are
+ + * met: redistributions of source code must retain the above copyright
+ + * notice, this list of conditions and the following disclaimer;
+ + * redistributions in binary form must reproduce the above copyright
+ + * notice, this list of conditions and the following disclaimer in the
+ + * documentation and/or other materials provided with the distribution;
+ + * neither the name of the copyright holders nor the names of its
+ + * contributors may be used to endorse or promote products derived from
+ + * this software without specific prior written permission.
+ + *
+ + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ + *
+ + * Authors: Kevin Lim
+ + *          Korey Sewell
+ + */
+ +
+ +#include <limits>
+ +#include <vector>
+ +
+ +#include "sim/root.hh"
+ +
+ +#include "cpu/o3/fu_pool.hh"
+ +#include "cpu/o3/inst_queue.hh"
+ +
+ +template <class Impl>
+ +InstructionQueue<Impl>::FUCompletion::FUCompletion(DynInstPtr &_inst,
+ +                                                   int fu_idx,
+ +                                                   InstructionQueue<Impl> *iq_ptr)
+ +    : Event(&mainEventQueue, Stat_Event_Pri),
+ +      inst(_inst), fuIdx(fu_idx), iqPtr(iq_ptr), freeFU(false)
+ +{
+ +    this->setFlags(Event::AutoDelete);
+ +}
+ +
+ +template <class Impl>
+ +void
+ +InstructionQueue<Impl>::FUCompletion::process()
+ +{
+ +    iqPtr->processFUCompletion(inst, freeFU ? fuIdx : -1);
+ +    inst = NULL;
+ +}
+ +
+ +
+ +template <class Impl>
+ +const char *
+ +InstructionQueue<Impl>::FUCompletion::description()
+ +{
+ +    return "Functional unit completion event";
+ +}
+ +
+ +template <class Impl>
+ +InstructionQueue<Impl>::InstructionQueue(Params *params)
+ +    : fuPool(params->fuPool),
+ +      numEntries(params->numIQEntries),
+ +      totalWidth(params->issueWidth),
+ +      numPhysIntRegs(params->numPhysIntRegs),
+ +      numPhysFloatRegs(params->numPhysFloatRegs),
+ +      commitToIEWDelay(params->commitToIEWDelay)
+ +{
+ +    assert(fuPool);
+ +
+ +    switchedOut = false;
+ +
+ +    numThreads = params->numberOfThreads;
+ +
+ +    // Set the number of physical registers as the number of int + float
+ +    numPhysRegs = numPhysIntRegs + numPhysFloatRegs;
+ +
+ +    DPRINTF(IQ, "There are %i physical registers.\n", numPhysRegs);
+ +
+ +    //Create an entry for each physical register within the
+ +    //dependency graph.
+ +    dependGraph.resize(numPhysRegs);
+ +
+ +    // Resize the register scoreboard.
+ +    regScoreboard.resize(numPhysRegs);
+ +
+ +    //Initialize Mem Dependence Units
+ +    for (int i = 0; i < numThreads; i++) {
+ +        memDepUnit[i].init(params,i);
+ +        memDepUnit[i].setIQ(this);
+ +    }
+ +
+ +    resetState();
+ +
+ +    std::string policy = params->smtIQPolicy;
+ +
+ +    //Convert string to lowercase
+ +    std::transform(policy.begin(), policy.end(), policy.begin(),
+ +                   (int(*)(int)) tolower);
+ +
+ +    //Figure out resource sharing policy
+ +    if (policy == "dynamic") {
+ +        iqPolicy = Dynamic;
+ +
+ +        //Set Max Entries to Total ROB Capacity
+ +        for (int i = 0; i < numThreads; i++) {
+ +            maxEntries[i] = numEntries;
+ +        }
+ +
+ +    } else if (policy == "partitioned") {
+ +        iqPolicy = Partitioned;
+ +
+ +        //@todo:make work if part_amt doesnt divide evenly.
+ +        int part_amt = numEntries / numThreads;
+ +
+ +        //Divide ROB up evenly
+ +        for (int i = 0; i < numThreads; i++) {
+ +            maxEntries[i] = part_amt;
+ +        }
+ +
+ +        DPRINTF(IQ, "IQ sharing policy set to Partitioned:"
+ +                "%i entries per thread.\n",part_amt);
+ +
+ +    } else if (policy == "threshold") {
+ +        iqPolicy = Threshold;
+ +
+ +        double threshold =  (double)params->smtIQThreshold / 100;
+ +
+ +        int thresholdIQ = (int)((double)threshold * numEntries);
+ +
+ +        //Divide up by threshold amount
+ +        for (int i = 0; i < numThreads; i++) {
+ +            maxEntries[i] = thresholdIQ;
+ +        }
+ +
+ +        DPRINTF(IQ, "IQ sharing policy set to Threshold:"
+ +                "%i entries per thread.\n",thresholdIQ);
+ +   } else {
+ +       assert(0 && "Invalid IQ Sharing Policy.Options Are:{Dynamic,"
+ +              "Partitioned, Threshold}");
+ +   }
+ +}
+ +
+ +template <class Impl>
+ +InstructionQueue<Impl>::~InstructionQueue()
+ +{
+ +    dependGraph.reset();
+ +#ifdef DEBUG
+ +    cprintf("Nodes traversed: %i, removed: %i\n",
+ +            dependGraph.nodesTraversed, dependGraph.nodesRemoved);
+ +#endif
+ +}
+ +
+ +template <class Impl>
+ +std::string
+ +InstructionQueue<Impl>::name() const
+ +{
+ +    return cpu->name() + ".iq";
+ +}
+ +
+ +template <class Impl>
+ +void
+ +InstructionQueue<Impl>::regStats()
+ +{
+ +    using namespace Stats;
+ +    iqInstsAdded
+ +        .name(name() + ".iqInstsAdded")
+ +        .desc("Number of instructions added to the IQ (excludes non-spec)")
+ +        .prereq(iqInstsAdded);
+ +
+ +    iqNonSpecInstsAdded
+ +        .name(name() + ".iqNonSpecInstsAdded")
+ +        .desc("Number of non-speculative instructions added to the IQ")
+ +        .prereq(iqNonSpecInstsAdded);
+ +
+ +    iqInstsIssued
+ +        .name(name() + ".iqInstsIssued")
+ +        .desc("Number of instructions issued")
+ +        .prereq(iqInstsIssued);
+ +
+ +    iqIntInstsIssued
+ +        .name(name() + ".iqIntInstsIssued")
+ +        .desc("Number of integer instructions issued")
+ +        .prereq(iqIntInstsIssued);
+ +
+ +    iqFloatInstsIssued
+ +        .name(name() + ".iqFloatInstsIssued")
+ +        .desc("Number of float instructions issued")
+ +        .prereq(iqFloatInstsIssued);
+ +
+ +    iqBranchInstsIssued
+ +        .name(name() + ".iqBranchInstsIssued")
+ +        .desc("Number of branch instructions issued")
+ +        .prereq(iqBranchInstsIssued);
+ +
+ +    iqMemInstsIssued
+ +        .name(name() + ".iqMemInstsIssued")
+ +        .desc("Number of memory instructions issued")
+ +        .prereq(iqMemInstsIssued);
+ +
+ +    iqMiscInstsIssued
+ +        .name(name() + ".iqMiscInstsIssued")
+ +        .desc("Number of miscellaneous instructions issued")
+ +        .prereq(iqMiscInstsIssued);
+ +
+ +    iqSquashedInstsIssued
+ +        .name(name() + ".iqSquashedInstsIssued")
+ +        .desc("Number of squashed instructions issued")
+ +        .prereq(iqSquashedInstsIssued);
+ +
+ +    iqSquashedInstsExamined
+ +        .name(name() + ".iqSquashedInstsExamined")
+ +        .desc("Number of squashed instructions iterated over during squash;"
+ +              " mainly for profiling")
+ +        .prereq(iqSquashedInstsExamined);
+ +
+ +    iqSquashedOperandsExamined
+ +        .name(name() + ".iqSquashedOperandsExamined")
+ +        .desc("Number of squashed operands that are examined and possibly "
+ +              "removed from graph")
+ +        .prereq(iqSquashedOperandsExamined);
+ +
+ +    iqSquashedNonSpecRemoved
+ +        .name(name() + ".iqSquashedNonSpecRemoved")
+ +        .desc("Number of squashed non-spec instructions that were removed")
+ +        .prereq(iqSquashedNonSpecRemoved);
- 
++/*
+ +    queueResDist
+ +        .init(Num_OpClasses, 0, 99, 2)
+ +        .name(name() + ".IQ:residence:")
+ +        .desc("cycles from dispatch to issue")
+ +        .flags(total | pdf | cdf )
+ +        ;
+ +    for (int i = 0; i < Num_OpClasses; ++i) {
+ +        queueResDist.subname(i, opClassStrings[i]);
+ +    }
++*/
+ +    numIssuedDist
+ +        .init(0,totalWidth,1)
+ +        .name(name() + ".ISSUE:issued_per_cycle")
+ +        .desc("Number of insts issued each cycle")
+ +        .flags(pdf)
+ +        ;
+ +/*
+ +    dist_unissued
+ +        .init(Num_OpClasses+2)
+ +        .name(name() + ".ISSUE:unissued_cause")
+ +        .desc("Reason ready instruction not issued")
+ +        .flags(pdf | dist)
+ +        ;
+ +    for (int i=0; i < (Num_OpClasses + 2); ++i) {
+ +        dist_unissued.subname(i, unissued_names[i]);
+ +    }
+ +*/
+ +    statIssuedInstType
+ +        .init(numThreads,Num_OpClasses)
+ +        .name(name() + ".ISSUE:FU_type")
+ +        .desc("Type of FU issued")
+ +        .flags(total | pdf | dist)
+ +        ;
+ +    statIssuedInstType.ysubnames(opClassStrings);
+ +
+ +    //
+ +    //  How long did instructions for a particular FU type wait prior to issue
+ +    //
- 
++/*
+ +    issueDelayDist
+ +        .init(Num_OpClasses,0,99,2)
+ +        .name(name() + ".ISSUE:")
+ +        .desc("cycles from operands ready to issue")
+ +        .flags(pdf | cdf)
+ +        ;
+ +
+ +    for (int i=0; i<Num_OpClasses; ++i) {
+ +        std::stringstream subname;
+ +        subname << opClassStrings[i] << "_delay";
+ +        issueDelayDist.subname(i, subname.str());
+ +    }
-             } else {
++*/
+ +    issueRate
+ +        .name(name() + ".ISSUE:rate")
+ +        .desc("Inst issue rate")
+ +        .flags(total)
+ +        ;
+ +    issueRate = iqInstsIssued / cpu->numCycles;
+ +
+ +    statFuBusy
+ +        .init(Num_OpClasses)
+ +        .name(name() + ".ISSUE:fu_full")
+ +        .desc("attempts to use FU when none available")
+ +        .flags(pdf | dist)
+ +        ;
+ +    for (int i=0; i < Num_OpClasses; ++i) {
+ +        statFuBusy.subname(i, opClassStrings[i]);
+ +    }
+ +
+ +    fuBusy
+ +        .init(numThreads)
+ +        .name(name() + ".ISSUE:fu_busy_cnt")
+ +        .desc("FU busy when requested")
+ +        .flags(total)
+ +        ;
+ +
+ +    fuBusyRate
+ +        .name(name() + ".ISSUE:fu_busy_rate")
+ +        .desc("FU busy rate (busy events/executed inst)")
+ +        .flags(total)
+ +        ;
+ +    fuBusyRate = fuBusy / iqInstsIssued;
+ +
+ +    for ( int i=0; i < numThreads; i++) {
+ +        // Tell mem dependence unit to reg stats as well.
+ +        memDepUnit[i].regStats();
+ +    }
+ +}
+ +
+ +template <class Impl>
+ +void
+ +InstructionQueue<Impl>::resetState()
+ +{
+ +    //Initialize thread IQ counts
+ +    for (int i = 0; i <numThreads; i++) {
+ +        count[i] = 0;
+ +        instList[i].clear();
+ +    }
+ +
+ +    // Initialize the number of free IQ entries.
+ +    freeEntries = numEntries;
+ +
+ +    // Note that in actuality, the registers corresponding to the logical
+ +    // registers start off as ready.  However this doesn't matter for the
+ +    // IQ as the instruction should have been correctly told if those
+ +    // registers are ready in rename.  Thus it can all be initialized as
+ +    // unready.
+ +    for (int i = 0; i < numPhysRegs; ++i) {
+ +        regScoreboard[i] = false;
+ +    }
+ +
+ +    for (int i = 0; i < numThreads; ++i) {
+ +        squashedSeqNum[i] = 0;
+ +    }
+ +
+ +    for (int i = 0; i < Num_OpClasses; ++i) {
+ +        while (!readyInsts[i].empty())
+ +            readyInsts[i].pop();
+ +        queueOnList[i] = false;
+ +        readyIt[i] = listOrder.end();
+ +    }
+ +    nonSpecInsts.clear();
+ +    listOrder.clear();
+ +}
+ +
+ +template <class Impl>
+ +void
+ +InstructionQueue<Impl>::setActiveThreads(std::list<unsigned> *at_ptr)
+ +{
+ +    DPRINTF(IQ, "Setting active threads list pointer.\n");
+ +    activeThreads = at_ptr;
+ +}
+ +
+ +template <class Impl>
+ +void
+ +InstructionQueue<Impl>::setIssueToExecuteQueue(TimeBuffer<IssueStruct> *i2e_ptr)
+ +{
+ +    DPRINTF(IQ, "Set the issue to execute queue.\n");
+ +    issueToExecuteQueue = i2e_ptr;
+ +}
+ +
+ +template <class Impl>
+ +void
+ +InstructionQueue<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr)
+ +{
+ +    DPRINTF(IQ, "Set the time buffer.\n");
+ +    timeBuffer = tb_ptr;
+ +
+ +    fromCommit = timeBuffer->getWire(-commitToIEWDelay);
+ +}
+ +
+ +template <class Impl>
+ +void
+ +InstructionQueue<Impl>::switchOut()
+ +{
++/*
++    if (!instList[0].empty() || (numEntries != freeEntries) ||
++        !readyInsts[0].empty() || !nonSpecInsts.empty() || !listOrder.empty()) {
++        dumpInsts();
++//        assert(0);
++    }
++*/
+ +    resetState();
+ +    dependGraph.reset();
++    instsToExecute.clear();
+ +    switchedOut = true;
+ +    for (int i = 0; i < numThreads; ++i) {
+ +        memDepUnit[i].switchOut();
+ +    }
+ +}
+ +
+ +template <class Impl>
+ +void
+ +InstructionQueue<Impl>::takeOverFrom()
+ +{
+ +    switchedOut = false;
+ +}
+ +
+ +template <class Impl>
+ +int
+ +InstructionQueue<Impl>::entryAmount(int num_threads)
+ +{
+ +    if (iqPolicy == Partitioned) {
+ +        return numEntries / num_threads;
+ +    } else {
+ +        return 0;
+ +    }
+ +}
+ +
+ +
+ +template <class Impl>
+ +void
+ +InstructionQueue<Impl>::resetEntries()
+ +{
+ +    if (iqPolicy != Dynamic || numThreads > 1) {
+ +        int active_threads = (*activeThreads).size();
+ +
+ +        std::list<unsigned>::iterator threads  = (*activeThreads).begin();
+ +        std::list<unsigned>::iterator list_end = (*activeThreads).end();
+ +
+ +        while (threads != list_end) {
+ +            if (iqPolicy == Partitioned) {
+ +                maxEntries[*threads++] = numEntries / active_threads;
+ +            } else if(iqPolicy == Threshold && active_threads == 1) {
+ +                maxEntries[*threads++] = numEntries;
+ +            }
+ +        }
+ +    }
+ +}
+ +
+ +template <class Impl>
+ +unsigned
+ +InstructionQueue<Impl>::numFreeEntries()
+ +{
+ +    return freeEntries;
+ +}
+ +
+ +template <class Impl>
+ +unsigned
+ +InstructionQueue<Impl>::numFreeEntries(unsigned tid)
+ +{
+ +    return maxEntries[tid] - count[tid];
+ +}
+ +
+ +// Might want to do something more complex if it knows how many instructions
+ +// will be issued this cycle.
+ +template <class Impl>
+ +bool
+ +InstructionQueue<Impl>::isFull()
+ +{
+ +    if (freeEntries == 0) {
+ +        return(true);
+ +    } else {
+ +        return(false);
+ +    }
+ +}
+ +
+ +template <class Impl>
+ +bool
+ +InstructionQueue<Impl>::isFull(unsigned tid)
+ +{
+ +    if (numFreeEntries(tid) == 0) {
+ +        return(true);
+ +    } else {
+ +        return(false);
+ +    }
+ +}
+ +
+ +template <class Impl>
+ +bool
+ +InstructionQueue<Impl>::hasReadyInsts()
+ +{
+ +    if (!listOrder.empty()) {
+ +        return true;
+ +    }
+ +
+ +    for (int i = 0; i < Num_OpClasses; ++i) {
+ +        if (!readyInsts[i].empty()) {
+ +            return true;
+ +        }
+ +    }
+ +
+ +    return false;
+ +}
+ +
+ +template <class Impl>
+ +void
+ +InstructionQueue<Impl>::insert(DynInstPtr &new_inst)
+ +{
+ +    // Make sure the instruction is valid
+ +    assert(new_inst);
+ +
+ +    DPRINTF(IQ, "Adding instruction [sn:%lli] PC %#x to the IQ.\n",
+ +            new_inst->seqNum, new_inst->readPC());
+ +
+ +    assert(freeEntries != 0);
+ +
+ +    instList[new_inst->threadNumber].push_back(new_inst);
+ +
+ +    --freeEntries;
+ +
+ +    new_inst->setInIQ();
+ +
+ +    // Look through its source registers (physical regs), and mark any
+ +    // dependencies.
+ +    addToDependents(new_inst);
+ +
+ +    // Have this instruction set itself as the producer of its destination
+ +    // register(s).
+ +    addToProducers(new_inst);
+ +
+ +    if (new_inst->isMemRef()) {
+ +        memDepUnit[new_inst->threadNumber].insert(new_inst);
+ +    } else {
+ +        addIfReady(new_inst);
+ +    }
+ +
+ +    ++iqInstsAdded;
+ +
+ +    count[new_inst->threadNumber]++;
+ +
+ +    assert(freeEntries == (numEntries - countInsts()));
+ +}
+ +
+ +template <class Impl>
+ +void
+ +InstructionQueue<Impl>::insertNonSpec(DynInstPtr &new_inst)
+ +{
+ +    // @todo: Clean up this code; can do it by setting inst as unable
+ +    // to issue, then calling normal insert on the inst.
+ +
+ +    assert(new_inst);
+ +
+ +    nonSpecInsts[new_inst->seqNum] = new_inst;
+ +
+ +    DPRINTF(IQ, "Adding non-speculative instruction [sn:%lli] PC %#x "
+ +            "to the IQ.\n",
+ +            new_inst->seqNum, new_inst->readPC());
+ +
+ +    assert(freeEntries != 0);
+ +
+ +    instList[new_inst->threadNumber].push_back(new_inst);
+ +
+ +    --freeEntries;
+ +
+ +    new_inst->setInIQ();
+ +
+ +    // Have this instruction set itself as the producer of its destination
+ +    // register(s).
+ +    addToProducers(new_inst);
+ +
+ +    // If it's a memory instruction, add it to the memory dependency
+ +    // unit.
+ +    if (new_inst->isMemRef()) {
+ +        memDepUnit[new_inst->threadNumber].insertNonSpec(new_inst);
+ +    }
+ +
+ +    ++iqNonSpecInstsAdded;
+ +
+ +    count[new_inst->threadNumber]++;
+ +
+ +    assert(freeEntries == (numEntries - countInsts()));
+ +}
+ +
+ +template <class Impl>
+ +void
+ +InstructionQueue<Impl>::insertBarrier(DynInstPtr &barr_inst)
+ +{
+ +    memDepUnit[barr_inst->threadNumber].insertBarrier(barr_inst);
+ +
+ +    insertNonSpec(barr_inst);
+ +}
+ +
+ +template <class Impl>
+ +typename Impl::DynInstPtr
+ +InstructionQueue<Impl>::getInstToExecute()
+ +{
+ +    assert(!instsToExecute.empty());
+ +    DynInstPtr inst = instsToExecute.front();
+ +    instsToExecute.pop_front();
+ +    return inst;
+ +}
+ +
+ +template <class Impl>
+ +void
+ +InstructionQueue<Impl>::addToOrderList(OpClass op_class)
+ +{
+ +    assert(!readyInsts[op_class].empty());
+ +
+ +    ListOrderEntry queue_entry;
+ +
+ +    queue_entry.queueType = op_class;
+ +
+ +    queue_entry.oldestInst = readyInsts[op_class].top()->seqNum;
+ +
+ +    ListOrderIt list_it = listOrder.begin();
+ +    ListOrderIt list_end_it = listOrder.end();
+ +
+ +    while (list_it != list_end_it) {
+ +        if ((*list_it).oldestInst > queue_entry.oldestInst) {
+ +            break;
+ +        }
+ +
+ +        list_it++;
+ +    }
+ +
+ +    readyIt[op_class] = listOrder.insert(list_it, queue_entry);
+ +    queueOnList[op_class] = true;
+ +}
+ +
+ +template <class Impl>
+ +void
+ +InstructionQueue<Impl>::moveToYoungerInst(ListOrderIt list_order_it)
+ +{
+ +    // Get iterator of next item on the list
+ +    // Delete the original iterator
+ +    // Determine if the next item is either the end of the list or younger
+ +    // than the new instruction.  If so, then add in a new iterator right here.
+ +    // If not, then move along.
+ +    ListOrderEntry queue_entry;
+ +    OpClass op_class = (*list_order_it).queueType;
+ +    ListOrderIt next_it = list_order_it;
+ +
+ +    ++next_it;
+ +
+ +    queue_entry.queueType = op_class;
+ +    queue_entry.oldestInst = readyInsts[op_class].top()->seqNum;
+ +
+ +    while (next_it != listOrder.end() &&
+ +           (*next_it).oldestInst < queue_entry.oldestInst) {
+ +        ++next_it;
+ +    }
+ +
+ +    readyIt[op_class] = listOrder.insert(next_it, queue_entry);
+ +}
+ +
+ +template <class Impl>
+ +void
+ +InstructionQueue<Impl>::processFUCompletion(DynInstPtr &inst, int fu_idx)
+ +{
++    DPRINTF(IQ, "Processing FU completion [sn:%lli]\n", inst->seqNum);
+ +    // The CPU could have been sleeping until this op completed (*extremely*
+ +    // long latency op).  Wake it if it was.  This may be overkill.
+ +    if (isSwitchedOut()) {
++        DPRINTF(IQ, "FU completion not processed, IQ is switched out [sn:%lli]\n",
++                inst->seqNum);
+ +        return;
+ +    }
+ +
+ +    iewStage->wakeCPU();
+ +
+ +    if (fu_idx > -1)
+ +        fuPool->freeUnitNextCycle(fu_idx);
+ +
+ +    // @todo: Ensure that these FU Completions happen at the beginning
+ +    // of a cycle, otherwise they could add too many instructions to
+ +    // the queue.
+ +    issueToExecuteQueue->access(0)->size++;
+ +    instsToExecute.push_back(inst);
+ +}
+ +
+ +// @todo: Figure out a better way to remove the squashed items from the
+ +// lists.  Checking the top item of each list to see if it's squashed
+ +// wastes time and forces jumps.
+ +template <class Impl>
+ +void
+ +InstructionQueue<Impl>::scheduleReadyInsts()
+ +{
+ +    DPRINTF(IQ, "Attempting to schedule ready instructions from "
+ +            "the IQ.\n");
+ +
+ +    IssueStruct *i2e_info = issueToExecuteQueue->access(0);
+ +
+ +    // Have iterator to head of the list
+ +    // While I haven't exceeded bandwidth or reached the end of the list,
+ +    // Try to get a FU that can do what this op needs.
+ +    // If successful, change the oldestInst to the new top of the list, put
+ +    // the queue in the proper place in the list.
+ +    // Increment the iterator.
+ +    // This will avoid trying to schedule a certain op class if there are no
+ +    // FUs that handle it.
+ +    ListOrderIt order_it = listOrder.begin();
+ +    ListOrderIt order_end_it = listOrder.end();
+ +    int total_issued = 0;
+ +
+ +    while (total_issued < totalWidth &&
+ +           iewStage->canIssue() &&
+ +           order_it != order_end_it) {
+ +        OpClass op_class = (*order_it).queueType;
+ +
+ +        assert(!readyInsts[op_class].empty());
+ +
+ +        DynInstPtr issuing_inst = readyInsts[op_class].top();
+ +
+ +        assert(issuing_inst->seqNum == (*order_it).oldestInst);
+ +
+ +        if (issuing_inst->isSquashed()) {
+ +            readyInsts[op_class].pop();
+ +
+ +            if (!readyInsts[op_class].empty()) {
+ +                moveToYoungerInst(order_it);
+ +            } else {
+ +                readyIt[op_class] = listOrder.end();
+ +                queueOnList[op_class] = false;
+ +            }
+ +
+ +            listOrder.erase(order_it++);
+ +
+ +            ++iqSquashedInstsIssued;
+ +
+ +            continue;
+ +        }
+ +
+ +        int idx = -2;
+ +        int op_latency = 1;
+ +        int tid = issuing_inst->threadNumber;
+ +
+ +        if (op_class != No_OpClass) {
+ +            idx = fuPool->getUnit(op_class);
+ +
+ +            if (idx > -1) {
+ +                op_latency = fuPool->getOpLatency(op_class);
+ +            }
+ +        }
+ +
+ +        // If we have an instruction that doesn't require a FU, or a
+ +        // valid FU, then schedule for execution.
+ +        if (idx == -2 || idx != -1) {
+ +            if (op_latency == 1) {
+ +                i2e_info->size++;
+ +                instsToExecute.push_back(issuing_inst);
+ +
+ +                // Add the FU onto the list of FU's to be freed next
+ +                // cycle if we used one.
+ +                if (idx >= 0)
+ +                    fuPool->freeUnitNextCycle(idx);
+ +            } else {
+ +                int issue_latency = fuPool->getIssueLatency(op_class);
+ +                // Generate completion event for the FU
+ +                FUCompletion *execution = new FUCompletion(issuing_inst,
+ +                                                           idx, this);
+ +
+ +                execution->schedule(curTick + cpu->cycles(issue_latency - 1));
+ +
+ +                // @todo: Enforce that issue_latency == 1 or op_latency
+ +                if (issue_latency > 1) {
+ +                    // If FU isn't pipelined, then it must be freed
+ +                    // upon the execution completing.
+ +                    execution->setFreeFU();
+ +                } else {
+ +                    // Add the FU onto the list of FU's to be freed next cycle.
+ +                    fuPool->freeUnitNextCycle(idx);
+ +                }
+ +            }
+ +
+ +            DPRINTF(IQ, "Thread %i: Issuing instruction PC %#x "
+ +                    "[sn:%lli]\n",
+ +                    tid, issuing_inst->readPC(),
+ +                    issuing_inst->seqNum);
+ +
+ +            readyInsts[op_class].pop();
+ +
+ +            if (!readyInsts[op_class].empty()) {
+ +                moveToYoungerInst(order_it);
+ +            } else {
+ +                readyIt[op_class] = listOrder.end();
+ +                queueOnList[op_class] = false;
+ +            }
+ +
+ +            issuing_inst->setIssued();
+ +            ++total_issued;
+ +
+ +            if (!issuing_inst->isMemRef()) {
+ +                // Memory instructions can not be freed from the IQ until they
+ +                // complete.
+ +                ++freeEntries;
+ +                count[tid]--;
+ +                issuing_inst->clearInIQ();
+ +            } else {
+ +                memDepUnit[tid].issue(issuing_inst);
+ +            }
+ +
+ +            listOrder.erase(order_it++);
+ +            statIssuedInstType[tid][op_class]++;
+ +            iewStage->incrWb(issuing_inst->seqNum);
+ +        } else {
+ +            statFuBusy[op_class]++;
+ +            fuBusy[tid]++;
+ +            ++order_it;
+ +        }
+ +    }
+ +
+ +    numIssuedDist.sample(total_issued);
+ +    iqInstsIssued+= total_issued;
+ +
+ +    // If we issued any instructions, tell the CPU we had activity.
+ +    if (total_issued) {
+ +        cpu->activityThisCycle();
+ +    } else {
+ +        DPRINTF(IQ, "Not able to schedule any instructions.\n");
+ +    }
+ +}
+ +
+ +template <class Impl>
+ +void
+ +InstructionQueue<Impl>::scheduleNonSpec(const InstSeqNum &inst)
+ +{
+ +    DPRINTF(IQ, "Marking nonspeculative instruction [sn:%lli] as ready "
+ +            "to execute.\n", inst);
+ +
+ +    NonSpecMapIt inst_it = nonSpecInsts.find(inst);
+ +
+ +    assert(inst_it != nonSpecInsts.end());
+ +
+ +    unsigned tid = (*inst_it).second->threadNumber;
+ +
+ +    (*inst_it).second->setCanIssue();
+ +
+ +    if (!(*inst_it).second->isMemRef()) {
+ +        addIfReady((*inst_it).second);
+ +    } else {
+ +        memDepUnit[tid].nonSpecInstReady((*inst_it).second);
+ +    }
+ +
+ +    (*inst_it).second = NULL;
+ +
+ +    nonSpecInsts.erase(inst_it);
+ +}
+ +
+ +template <class Impl>
+ +void
+ +InstructionQueue<Impl>::commit(const InstSeqNum &inst, unsigned tid)
+ +{
+ +    DPRINTF(IQ, "[tid:%i]: Committing instructions older than [sn:%i]\n",
+ +            tid,inst);
+ +
+ +    ListIt iq_it = instList[tid].begin();
+ +
+ +    while (iq_it != instList[tid].end() &&
+ +           (*iq_it)->seqNum <= inst) {
+ +        ++iq_it;
+ +        instList[tid].pop_front();
+ +    }
+ +
+ +    assert(freeEntries == (numEntries - countInsts()));
+ +}
+ +
+ +template <class Impl>
+ +int
+ +InstructionQueue<Impl>::wakeDependents(DynInstPtr &completed_inst)
+ +{
+ +    int dependents = 0;
+ +
+ +    DPRINTF(IQ, "Waking dependents of completed instruction.\n");
+ +
+ +    assert(!completed_inst->isSquashed());
+ +
+ +    // Tell the memory dependence unit to wake any dependents on this
+ +    // instruction if it is a memory instruction.  Also complete the memory
+ +    // instruction at this point since we know it executed without issues.
+ +    // @todo: Might want to rename "completeMemInst" to something that
+ +    // indicates that it won't need to be replayed, and call this
+ +    // earlier.  Might not be a big deal.
+ +    if (completed_inst->isMemRef()) {
+ +        memDepUnit[completed_inst->threadNumber].wakeDependents(completed_inst);
+ +        completeMemInst(completed_inst);
+ +    } else if (completed_inst->isMemBarrier() ||
+ +               completed_inst->isWriteBarrier()) {
+ +        memDepUnit[completed_inst->threadNumber].completeBarrier(completed_inst);
+ +    }
+ +
+ +    for (int dest_reg_idx = 0;
+ +         dest_reg_idx < completed_inst->numDestRegs();
+ +         dest_reg_idx++)
+ +    {
+ +        PhysRegIndex dest_reg =
+ +            completed_inst->renamedDestRegIdx(dest_reg_idx);
+ +
+ +        // Special case of uniq or control registers.  They are not
+ +        // handled by the IQ and thus have no dependency graph entry.
+ +        // @todo Figure out a cleaner way to handle this.
+ +        if (dest_reg >= numPhysRegs) {
+ +            continue;
+ +        }
+ +
+ +        DPRINTF(IQ, "Waking any dependents on register %i.\n",
+ +                (int) dest_reg);
+ +
+ +        //Go through the dependency chain, marking the registers as
+ +        //ready within the waiting instructions.
+ +        DynInstPtr dep_inst = dependGraph.pop(dest_reg);
+ +
+ +        while (dep_inst) {
+ +            DPRINTF(IQ, "Waking up a dependent instruction, PC%#x.\n",
+ +                    dep_inst->readPC());
+ +
+ +            // Might want to give more information to the instruction
+ +            // so that it knows which of its source registers is
+ +            // ready.  However that would mean that the dependency
+ +            // graph entries would need to hold the src_reg_idx.
+ +            dep_inst->markSrcRegReady();
+ +
+ +            addIfReady(dep_inst);
+ +
+ +            dep_inst = dependGraph.pop(dest_reg);
+ +
+ +            ++dependents;
+ +        }
+ +
+ +        // Reset the head node now that all of its dependents have
+ +        // been woken up.
+ +        assert(dependGraph.empty(dest_reg));
+ +        dependGraph.clearInst(dest_reg);
+ +
+ +        // Mark the scoreboard as having that register ready.
+ +        regScoreboard[dest_reg] = true;
+ +    }
+ +    return dependents;
+ +}
+ +
+ +template <class Impl>
+ +void
+ +InstructionQueue<Impl>::addReadyMemInst(DynInstPtr &ready_inst)
+ +{
+ +    OpClass op_class = ready_inst->opClass();
+ +
+ +    readyInsts[op_class].push(ready_inst);
+ +
+ +    // Will need to reorder the list if either a queue is not on the list,
+ +    // or it has an older instruction than last time.
+ +    if (!queueOnList[op_class]) {
+ +        addToOrderList(op_class);
+ +    } else if (readyInsts[op_class].top()->seqNum  <
+ +               (*readyIt[op_class]).oldestInst) {
+ +        listOrder.erase(readyIt[op_class]);
+ +        addToOrderList(op_class);
+ +    }
+ +
+ +    DPRINTF(IQ, "Instruction is ready to issue, putting it onto "
+ +            "the ready list, PC %#x opclass:%i [sn:%lli].\n",
+ +            ready_inst->readPC(), op_class, ready_inst->seqNum);
+ +}
+ +
+ +template <class Impl>
+ +void
+ +InstructionQueue<Impl>::rescheduleMemInst(DynInstPtr &resched_inst)
+ +{
+ +    memDepUnit[resched_inst->threadNumber].reschedule(resched_inst);
+ +}
+ +
+ +template <class Impl>
+ +void
+ +InstructionQueue<Impl>::replayMemInst(DynInstPtr &replay_inst)
+ +{
+ +    memDepUnit[replay_inst->threadNumber].replay(replay_inst);
+ +}
+ +
+ +template <class Impl>
+ +void
+ +InstructionQueue<Impl>::completeMemInst(DynInstPtr &completed_inst)
+ +{
+ +    int tid = completed_inst->threadNumber;
+ +
+ +    DPRINTF(IQ, "Completing mem instruction PC:%#x [sn:%lli]\n",
+ +            completed_inst->readPC(), completed_inst->seqNum);
+ +
+ +    ++freeEntries;
+ +
+ +    completed_inst->memOpDone = true;
+ +
+ +    memDepUnit[tid].completed(completed_inst);
+ +
+ +    count[tid]--;
+ +}
+ +
+ +template <class Impl>
+ +void
+ +InstructionQueue<Impl>::violation(DynInstPtr &store,
+ +                                  DynInstPtr &faulting_load)
+ +{
+ +    memDepUnit[store->threadNumber].violation(store, faulting_load);
+ +}
+ +
+ +template <class Impl>
+ +void
+ +InstructionQueue<Impl>::squash(unsigned tid)
+ +{
+ +    DPRINTF(IQ, "[tid:%i]: Starting to squash instructions in "
+ +            "the IQ.\n", tid);
+ +
+ +    // Read instruction sequence number of last instruction out of the
+ +    // time buffer.
+ +#if ISA_HAS_DELAY_SLOT
+ +    squashedSeqNum[tid] = fromCommit->commitInfo[tid].bdelayDoneSeqNum;
+ +#else
+ +    squashedSeqNum[tid] = fromCommit->commitInfo[tid].doneSeqNum;
+ +#endif
+ +
+ +    // Call doSquash if there are insts in the IQ
+ +    if (count[tid] > 0) {
+ +        doSquash(tid);
+ +    }
+ +
+ +    // Also tell the memory dependence unit to squash.
+ +    memDepUnit[tid].squash(squashedSeqNum[tid], tid);
+ +}
+ +
+ +template <class Impl>
+ +void
+ +InstructionQueue<Impl>::doSquash(unsigned tid)
+ +{
+ +    // Start at the tail.
+ +    ListIt squash_it = instList[tid].end();
+ +    --squash_it;
+ +
+ +    DPRINTF(IQ, "[tid:%i]: Squashing until sequence number %i!\n",
+ +            tid, squashedSeqNum[tid]);
+ +
+ +    // Squash any instructions younger than the squashed sequence number
+ +    // given.
+ +    while (squash_it != instList[tid].end() &&
+ +           (*squash_it)->seqNum > squashedSeqNum[tid]) {
+ +
+ +        DynInstPtr squashed_inst = (*squash_it);
+ +
+ +        // Only handle the instruction if it actually is in the IQ and
+ +        // hasn't already been squashed in the IQ.
+ +        if (squashed_inst->threadNumber != tid ||
+ +            squashed_inst->isSquashedInIQ()) {
+ +            --squash_it;
+ +            continue;
+ +        }
+ +
+ +        if (!squashed_inst->isIssued() ||
+ +            (squashed_inst->isMemRef() &&
+ +             !squashed_inst->memOpDone)) {
+ +
++            DPRINTF(IQ, "[tid:%i]: Instruction [sn:%lli] PC %#x "
++                    "squashed.\n",
++                    tid, squashed_inst->seqNum, squashed_inst->readPC());
++
+ +            // Remove the instruction from the dependency list.
+ +            if (!squashed_inst->isNonSpeculative() &&
+ +                !squashed_inst->isStoreConditional() &&
+ +                !squashed_inst->isMemBarrier() &&
+ +                !squashed_inst->isWriteBarrier()) {
+ +
+ +                for (int src_reg_idx = 0;
+ +                     src_reg_idx < squashed_inst->numSrcRegs();
+ +                     src_reg_idx++)
+ +                {
+ +                    PhysRegIndex src_reg =
+ +                        squashed_inst->renamedSrcRegIdx(src_reg_idx);
+ +
+ +                    // Only remove it from the dependency graph if it
+ +                    // was placed there in the first place.
+ +
+ +                    // Instead of doing a linked list traversal, we
+ +                    // can just remove these squashed instructions
+ +                    // either at issue time, or when the register is
+ +                    // overwritten.  The only downside to this is it
+ +                    // leaves more room for error.
+ +
+ +                    if (!squashed_inst->isReadySrcRegIdx(src_reg_idx) &&
+ +                        src_reg < numPhysRegs) {
+ +                        dependGraph.remove(src_reg, squashed_inst);
+ +                    }
+ +
+ +
+ +                    ++iqSquashedOperandsExamined;
+ +                }
- 
-             DPRINTF(IQ, "[tid:%i]: Instruction [sn:%lli] PC %#x "
-                     "squashed.\n",
-                     tid, squashed_inst->seqNum, squashed_inst->readPC());
++            } else if (!squashed_inst->isStoreConditional() || !squashed_inst->isCompleted()) {
+ +                NonSpecMapIt ns_inst_it =
+ +                    nonSpecInsts.find(squashed_inst->seqNum);
+ +                assert(ns_inst_it != nonSpecInsts.end());
+ +
+ +                (*ns_inst_it).second = NULL;
+ +
+ +                nonSpecInsts.erase(ns_inst_it);
+ +
+ +                ++iqSquashedNonSpecRemoved;
+ +            }
+ +
+ +            // Might want to also clear out the head of the dependency graph.
+ +
+ +            // Mark it as squashed within the IQ.
+ +            squashed_inst->setSquashedInIQ();
+ +
+ +            // @todo: Remove this hack where several statuses are set so the
+ +            // inst will flow through the rest of the pipeline.
+ +            squashed_inst->setIssued();
+ +            squashed_inst->setCanCommit();
+ +            squashed_inst->clearInIQ();
+ +
+ +            //Update Thread IQ Count
+ +            count[squashed_inst->threadNumber]--;
+ +
+ +            ++freeEntries;
+ +        }
+ +
+ +        instList[tid].erase(squash_it--);
+ +        ++iqSquashedInstsExamined;
+ +    }
+ +}
+ +
+ +template <class Impl>
+ +bool
+ +InstructionQueue<Impl>::addToDependents(DynInstPtr &new_inst)
+ +{
+ +    // Loop through the instruction's source registers, adding
+ +    // them to the dependency list if they are not ready.
+ +    int8_t total_src_regs = new_inst->numSrcRegs();
+ +    bool return_val = false;
+ +
+ +    for (int src_reg_idx = 0;
+ +         src_reg_idx < total_src_regs;
+ +         src_reg_idx++)
+ +    {
+ +        // Only add it to the dependency graph if it's not ready.
+ +        if (!new_inst->isReadySrcRegIdx(src_reg_idx)) {
+ +            PhysRegIndex src_reg = new_inst->renamedSrcRegIdx(src_reg_idx);
+ +
+ +            // Check the IQ's scoreboard to make sure the register
+ +            // hasn't become ready while the instruction was in flight
+ +            // between stages.  Only if it really isn't ready should
+ +            // it be added to the dependency graph.
+ +            if (src_reg >= numPhysRegs) {
+ +                continue;
+ +            } else if (regScoreboard[src_reg] == false) {
+ +                DPRINTF(IQ, "Instruction PC %#x has src reg %i that "
+ +                        "is being added to the dependency chain.\n",
+ +                        new_inst->readPC(), src_reg);
+ +
+ +                dependGraph.insert(src_reg, new_inst);
+ +
+ +                // Change the return value to indicate that something
+ +                // was added to the dependency graph.
+ +                return_val = true;
+ +            } else {
+ +                DPRINTF(IQ, "Instruction PC %#x has src reg %i that "
+ +                        "became ready before it reached the IQ.\n",
+ +                        new_inst->readPC(), src_reg);
+ +                // Mark a register ready within the instruction.
+ +                new_inst->markSrcRegReady(src_reg_idx);
+ +            }
+ +        }
+ +    }
+ +
+ +    return return_val;
+ +}
+ +
+ +template <class Impl>
+ +void
+ +InstructionQueue<Impl>::addToProducers(DynInstPtr &new_inst)
+ +{
+ +    // Nothing really needs to be marked when an instruction becomes
+ +    // the producer of a register's value, but for convenience a ptr
+ +    // to the producing instruction will be placed in the head node of
+ +    // the dependency links.
+ +    int8_t total_dest_regs = new_inst->numDestRegs();
+ +
+ +    for (int dest_reg_idx = 0;
+ +         dest_reg_idx < total_dest_regs;
+ +         dest_reg_idx++)
+ +    {
+ +        PhysRegIndex dest_reg = new_inst->renamedDestRegIdx(dest_reg_idx);
+ +
+ +        // Instructions that use the misc regs will have a reg number
+ +        // higher than the normal physical registers.  In this case these
+ +        // registers are not renamed, and there is no need to track
+ +        // dependencies as these instructions must be executed at commit.
+ +        if (dest_reg >= numPhysRegs) {
+ +            continue;
+ +        }
+ +
+ +        if (!dependGraph.empty(dest_reg)) {
+ +            dependGraph.dump();
+ +            panic("Dependency graph %i not empty!", dest_reg);
+ +        }
+ +
+ +        dependGraph.setInst(dest_reg, new_inst);
+ +
+ +        // Mark the scoreboard to say it's not yet ready.
+ +        regScoreboard[dest_reg] = false;
+ +    }
+ +}
+ +
+ +template <class Impl>
+ +void
+ +InstructionQueue<Impl>::addIfReady(DynInstPtr &inst)
+ +{
+ +    // If the instruction now has all of its source registers
+ +    // available, then add it to the list of ready instructions.
+ +    if (inst->readyToIssue()) {
+ +
+ +        //Add the instruction to the proper ready list.
+ +        if (inst->isMemRef()) {
+ +
+ +            DPRINTF(IQ, "Checking if memory instruction can issue.\n");
+ +
+ +            // Message to the mem dependence unit that this instruction has
+ +            // its registers ready.
+ +            memDepUnit[inst->threadNumber].regsReady(inst);
+ +
+ +            return;
+ +        }
+ +
+ +        OpClass op_class = inst->opClass();
+ +
+ +        DPRINTF(IQ, "Instruction is ready to issue, putting it onto "
+ +                "the ready list, PC %#x opclass:%i [sn:%lli].\n",
+ +                inst->readPC(), op_class, inst->seqNum);
+ +
+ +        readyInsts[op_class].push(inst);
+ +
+ +        // Will need to reorder the list if either a queue is not on the list,
+ +        // or it has an older instruction than last time.
+ +        if (!queueOnList[op_class]) {
+ +            addToOrderList(op_class);
+ +        } else if (readyInsts[op_class].top()->seqNum  <
+ +                   (*readyIt[op_class]).oldestInst) {
+ +            listOrder.erase(readyIt[op_class]);
+ +            addToOrderList(op_class);
+ +        }
+ +    }
+ +}
+ +
+ +template <class Impl>
+ +int
+ +InstructionQueue<Impl>::countInsts()
+ +{
+ +#if 0
+ +    //ksewell:This works but definitely could use a cleaner write
+ +    //with a more intuitive way of counting. Right now it's
+ +    //just brute force ....
+ +    // Change the #if if you want to use this method.
+ +    int total_insts = 0;
+ +
+ +    for (int i = 0; i < numThreads; ++i) {
+ +        ListIt count_it = instList[i].begin();
+ +
+ +        while (count_it != instList[i].end()) {
+ +            if (!(*count_it)->isSquashed() && !(*count_it)->isSquashedInIQ()) {
+ +                if (!(*count_it)->isIssued()) {
+ +                    ++total_insts;
+ +                } else if ((*count_it)->isMemRef() &&
+ +                           !(*count_it)->memOpDone) {
+ +                    // Loads that have not been marked as executed still count
+ +                    // towards the total instructions.
+ +                    ++total_insts;
+ +                }
+ +            }
+ +
+ +            ++count_it;
+ +        }
+ +    }
+ +
+ +    return total_insts;
+ +#else
+ +    return numEntries - freeEntries;
+ +#endif
+ +}
+ +
+ +template <class Impl>
+ +void
+ +InstructionQueue<Impl>::dumpLists()
+ +{
+ +    for (int i = 0; i < Num_OpClasses; ++i) {
+ +        cprintf("Ready list %i size: %i\n", i, readyInsts[i].size());
+ +
+ +        cprintf("\n");
+ +    }
+ +
+ +    cprintf("Non speculative list size: %i\n", nonSpecInsts.size());
+ +
+ +    NonSpecMapIt non_spec_it = nonSpecInsts.begin();
+ +    NonSpecMapIt non_spec_end_it = nonSpecInsts.end();
+ +
+ +    cprintf("Non speculative list: ");
+ +
+ +    while (non_spec_it != non_spec_end_it) {
+ +        cprintf("%#x [sn:%lli]", (*non_spec_it).second->readPC(),
+ +                (*non_spec_it).second->seqNum);
+ +        ++non_spec_it;
+ +    }
+ +
+ +    cprintf("\n");
+ +
+ +    ListOrderIt list_order_it = listOrder.begin();
+ +    ListOrderIt list_order_end_it = listOrder.end();
+ +    int i = 1;
+ +
+ +    cprintf("List order: ");
+ +
+ +    while (list_order_it != list_order_end_it) {
+ +        cprintf("%i OpClass:%i [sn:%lli] ", i, (*list_order_it).queueType,
+ +                (*list_order_it).oldestInst);
+ +
+ +        ++list_order_it;
+ +        ++i;
+ +    }
+ +
+ +    cprintf("\n");
+ +}
+ +
+ +
+ +template <class Impl>
+ +void
+ +InstructionQueue<Impl>::dumpInsts()
+ +{
+ +    for (int i = 0; i < numThreads; ++i) {
+ +        int num = 0;
+ +        int valid_num = 0;
+ +        ListIt inst_list_it = instList[i].begin();
+ +
+ +        while (inst_list_it != instList[i].end())
+ +        {
+ +            cprintf("Instruction:%i\n",
+ +                    num);
+ +            if (!(*inst_list_it)->isSquashed()) {
+ +                if (!(*inst_list_it)->isIssued()) {
+ +                    ++valid_num;
+ +                    cprintf("Count:%i\n", valid_num);
+ +                } else if ((*inst_list_it)->isMemRef() &&
+ +                           !(*inst_list_it)->memOpDone) {
+ +                    // Loads that have not been marked as executed
+ +                    // still count towards the total instructions.
+ +                    ++valid_num;
+ +                    cprintf("Count:%i\n", valid_num);
+ +                }
+ +            }
+ +
+ +            cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
+ +                    "Issued:%i\nSquashed:%i\n",
+ +                    (*inst_list_it)->readPC(),
+ +                    (*inst_list_it)->seqNum,
+ +                    (*inst_list_it)->threadNumber,
+ +                    (*inst_list_it)->isIssued(),
+ +                    (*inst_list_it)->isSquashed());
+ +
+ +            if ((*inst_list_it)->isMemRef()) {
+ +                cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
+ +            }
+ +
+ +            cprintf("\n");
+ +
+ +            inst_list_it++;
+ +            ++num;
+ +        }
+ +    }
+ +
+ +    cprintf("Insts to Execute list:\n");
+ +
+ +    int num = 0;
+ +    int valid_num = 0;
+ +    ListIt inst_list_it = instsToExecute.begin();
+ +
+ +    while (inst_list_it != instsToExecute.end())
+ +    {
+ +        cprintf("Instruction:%i\n",
+ +                num);
+ +        if (!(*inst_list_it)->isSquashed()) {
+ +            if (!(*inst_list_it)->isIssued()) {
+ +                ++valid_num;
+ +                cprintf("Count:%i\n", valid_num);
+ +            } else if ((*inst_list_it)->isMemRef() &&
+ +                       !(*inst_list_it)->memOpDone) {
+ +                // Loads that have not been marked as executed
+ +                // still count towards the total instructions.
+ +                ++valid_num;
+ +                cprintf("Count:%i\n", valid_num);
+ +            }
+ +        }
+ +
+ +        cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
+ +                "Issued:%i\nSquashed:%i\n",
+ +                (*inst_list_it)->readPC(),
+ +                (*inst_list_it)->seqNum,
+ +                (*inst_list_it)->threadNumber,
+ +                (*inst_list_it)->isIssued(),
+ +                (*inst_list_it)->isSquashed());
+ +
+ +        if ((*inst_list_it)->isMemRef()) {
+ +            cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
+ +        }
+ +
+ +        cprintf("\n");
+ +
+ +        inst_list_it++;
+ +        ++num;
+ +    }
+ +}
diff --cc src/cpu/o3/lsq_impl.hh

index 2bbab71f050a685ffa65530cecc7810f8107fd94,0000000000000000000000000000000000000000..a1ac5adb8d545cafd98cf61360f2231758b5d9ad

mode 100644,000000..100644
--- 1/src/cpu/o3/lsq_impl.hh
--- /dev/null
+++ b/src/cpu/o3/lsq_impl.hh
@@@ -1,593 -1,0 +1,603 @@@
+ +/*
+ + * Copyright (c) 2005-2006 The Regents of The University of Michigan
+ + * All rights reserved.
+ + *
+ + * Redistribution and use in source and binary forms, with or without
+ + * modification, are permitted provided that the following conditions are
+ + * met: redistributions of source code must retain the above copyright
+ + * notice, this list of conditions and the following disclaimer;
+ + * redistributions in binary form must reproduce the above copyright
+ + * notice, this list of conditions and the following disclaimer in the
+ + * documentation and/or other materials provided with the distribution;
+ + * neither the name of the copyright holders nor the names of its
+ + * contributors may be used to endorse or promote products derived from
+ + * this software without specific prior written permission.
+ + *
+ + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ + *
+ + * Authors: Korey Sewell
+ + */
+ +
+ +#include <algorithm>
+ +#include <list>
+ +#include <string>
+ +
+ +#include "cpu/o3/lsq.hh"
+ +
+ +template <class Impl>
+ +Tick
+ +LSQ<Impl>::DcachePort::recvAtomic(PacketPtr pkt)
+ +{
+ +    panic("O3CPU model does not work with atomic mode!");
+ +    return curTick;
+ +}
+ +
+ +template <class Impl>
+ +void
+ +LSQ<Impl>::DcachePort::recvFunctional(PacketPtr pkt)
+ +{
+ +    panic("O3CPU doesn't expect recvFunctional callback!");
+ +}
+ +
+ +template <class Impl>
+ +void
+ +LSQ<Impl>::DcachePort::recvStatusChange(Status status)
+ +{
+ +    if (status == RangeChange)
+ +        return;
+ +
+ +    panic("O3CPU doesn't expect recvStatusChange callback!");
+ +}
+ +
+ +template <class Impl>
+ +bool
+ +LSQ<Impl>::DcachePort::recvTiming(PacketPtr pkt)
+ +{
+ +    lsq->thread[pkt->req->getThreadNum()].completeDataAccess(pkt);
+ +    return true;
+ +}
+ +
+ +template <class Impl>
+ +void
+ +LSQ<Impl>::DcachePort::recvRetry()
+ +{
+ +    if (lsq->retryTid == -1)
+ +    {
+ +        //Squashed, so drop it
+ +        return;
+ +    }
+ +    lsq->thread[lsq->retryTid].recvRetry();
+ +    // Speculatively clear the retry Tid.  This will get set again if
+ +    // the LSQUnit was unable to complete its access.
+ +    lsq->retryTid = -1;
+ +}
+ +
+ +template <class Impl>
+ +LSQ<Impl>::LSQ(Params *params)
+ +    : dcachePort(this), LQEntries(params->LQEntries),
+ +      SQEntries(params->SQEntries), numThreads(params->numberOfThreads),
+ +      retryTid(-1)
+ +{
+ +    DPRINTF(LSQ, "Creating LSQ object.\n");
+ +
+ +    //**********************************************/
+ +    //************ Handle SMT Parameters ***********/
+ +    //**********************************************/
+ +    std::string policy = params->smtLSQPolicy;
+ +
+ +    //Convert string to lowercase
+ +    std::transform(policy.begin(), policy.end(), policy.begin(),
+ +                   (int(*)(int)) tolower);
+ +
+ +    //Figure out fetch policy
+ +    if (policy == "dynamic") {
+ +        lsqPolicy = Dynamic;
+ +
+ +        maxLQEntries = LQEntries;
+ +        maxSQEntries = SQEntries;
+ +
+ +        DPRINTF(LSQ, "LSQ sharing policy set to Dynamic\n");
+ +
+ +    } else if (policy == "partitioned") {
+ +        lsqPolicy = Partitioned;
+ +
+ +        //@todo:make work if part_amt doesnt divide evenly.
+ +        maxLQEntries = LQEntries / numThreads;
+ +        maxSQEntries = SQEntries / numThreads;
+ +
+ +        DPRINTF(Fetch, "LSQ sharing policy set to Partitioned: "
+ +                "%i entries per LQ | %i entries per SQ",
+ +                maxLQEntries,maxSQEntries);
+ +
+ +    } else if (policy == "threshold") {
+ +        lsqPolicy = Threshold;
+ +
+ +        assert(params->smtLSQThreshold > LQEntries);
+ +        assert(params->smtLSQThreshold > SQEntries);
+ +
+ +        //Divide up by threshold amount
+ +        //@todo: Should threads check the max and the total
+ +        //amount of the LSQ
+ +        maxLQEntries  = params->smtLSQThreshold;
+ +        maxSQEntries  = params->smtLSQThreshold;
+ +
+ +        DPRINTF(LSQ, "LSQ sharing policy set to Threshold: "
+ +                "%i entries per LQ | %i entries per SQ",
+ +                maxLQEntries,maxSQEntries);
+ +
+ +    } else {
+ +        assert(0 && "Invalid LSQ Sharing Policy.Options Are:{Dynamic,"
+ +                    "Partitioned, Threshold}");
+ +    }
+ +
+ +    //Initialize LSQs
+ +    for (int tid=0; tid < numThreads; tid++) {
+ +        thread[tid].init(params, this, maxLQEntries, maxSQEntries, tid);
+ +        thread[tid].setDcachePort(&dcachePort);
+ +    }
+ +}
+ +
+ +
+ +template<class Impl>
+ +std::string
+ +LSQ<Impl>::name() const
+ +{
+ +    return iewStage->name() + ".lsq";
+ +}
+ +
+ +template<class Impl>
+ +void
+ +LSQ<Impl>::regStats()
+ +{
+ +    //Initialize LSQs
+ +    for (int tid=0; tid < numThreads; tid++) {
+ +        thread[tid].regStats();
+ +    }
+ +}
+ +
++template<class Impl>
++void
++LSQ<Impl>::regStats()
++{
++    //Initialize LSQs
++    for (int tid=0; tid < numThreads; tid++) {
++        thread[tid].regStats();
++    }
++}
++
+ +template<class Impl>
+ +void
+ +LSQ<Impl>::setActiveThreads(std::list<unsigned> *at_ptr)
+ +{
+ +    activeThreads = at_ptr;
+ +    assert(activeThreads != 0);
+ +}
+ +
+ +template<class Impl>
+ +void
+ +LSQ<Impl>::setCPU(O3CPU *cpu_ptr)
+ +{
+ +    cpu = cpu_ptr;
+ +
+ +    dcachePort.setName(name());
+ +
+ +    for (int tid=0; tid < numThreads; tid++) {
+ +        thread[tid].setCPU(cpu_ptr);
+ +    }
+ +}
+ +
+ +template<class Impl>
+ +void
+ +LSQ<Impl>::setIEW(IEW *iew_ptr)
+ +{
+ +    iewStage = iew_ptr;
+ +
+ +    for (int tid=0; tid < numThreads; tid++) {
+ +        thread[tid].setIEW(iew_ptr);
+ +    }
+ +}
+ +
+ +template <class Impl>
+ +void
+ +LSQ<Impl>::switchOut()
+ +{
+ +    for (int tid = 0; tid < numThreads; tid++) {
+ +        thread[tid].switchOut();
+ +    }
+ +}
+ +
+ +template <class Impl>
+ +void
+ +LSQ<Impl>::takeOverFrom()
+ +{
+ +    for (int tid = 0; tid < numThreads; tid++) {
+ +        thread[tid].takeOverFrom();
+ +    }
+ +}
+ +
+ +template <class Impl>
+ +int
+ +LSQ<Impl>::entryAmount(int num_threads)
+ +{
+ +    if (lsqPolicy == Partitioned) {
+ +        return LQEntries / num_threads;
+ +    } else {
+ +        return 0;
+ +    }
+ +}
+ +
+ +template <class Impl>
+ +void
+ +LSQ<Impl>::resetEntries()
+ +{
+ +    if (lsqPolicy != Dynamic || numThreads > 1) {
+ +        int active_threads = (*activeThreads).size();
+ +
+ +        std::list<unsigned>::iterator threads  = (*activeThreads).begin();
+ +        std::list<unsigned>::iterator list_end = (*activeThreads).end();
+ +
+ +        int maxEntries;
+ +
+ +        if (lsqPolicy == Partitioned) {
+ +            maxEntries = LQEntries / active_threads;
+ +        } else if (lsqPolicy == Threshold && active_threads == 1) {
+ +            maxEntries = LQEntries;
+ +        } else {
+ +            maxEntries = LQEntries;
+ +        }
+ +
+ +        while (threads != list_end) {
+ +            resizeEntries(maxEntries,*threads++);
+ +        }
+ +    }
+ +}
+ +
+ +template<class Impl>
+ +void
+ +LSQ<Impl>::removeEntries(unsigned tid)
+ +{
+ +    thread[tid].clearLQ();
+ +    thread[tid].clearSQ();
+ +}
+ +
+ +template<class Impl>
+ +void
+ +LSQ<Impl>::resizeEntries(unsigned size,unsigned tid)
+ +{
+ +    thread[tid].resizeLQ(size);
+ +    thread[tid].resizeSQ(size);
+ +}
+ +
+ +template<class Impl>
+ +void
+ +LSQ<Impl>::tick()
+ +{
+ +    std::list<unsigned>::iterator active_threads = (*activeThreads).begin();
+ +
+ +    while (active_threads != (*activeThreads).end()) {
+ +        unsigned tid = *active_threads++;
+ +
+ +        thread[tid].tick();
+ +    }
+ +}
+ +
+ +template<class Impl>
+ +void
+ +LSQ<Impl>::insertLoad(DynInstPtr &load_inst)
+ +{
+ +    unsigned tid = load_inst->threadNumber;
+ +
+ +    thread[tid].insertLoad(load_inst);
+ +}
+ +
+ +template<class Impl>
+ +void
+ +LSQ<Impl>::insertStore(DynInstPtr &store_inst)
+ +{
+ +    unsigned tid = store_inst->threadNumber;
+ +
+ +    thread[tid].insertStore(store_inst);
+ +}
+ +
+ +template<class Impl>
+ +Fault
+ +LSQ<Impl>::executeLoad(DynInstPtr &inst)
+ +{
+ +    unsigned tid = inst->threadNumber;
+ +
+ +    return thread[tid].executeLoad(inst);
+ +}
+ +
+ +template<class Impl>
+ +Fault
+ +LSQ<Impl>::executeStore(DynInstPtr &inst)
+ +{
+ +    unsigned tid = inst->threadNumber;
+ +
+ +    return thread[tid].executeStore(inst);
+ +}
+ +
+ +template<class Impl>
+ +void
+ +LSQ<Impl>::writebackStores()
+ +{
+ +    std::list<unsigned>::iterator active_threads = (*activeThreads).begin();
+ +
+ +    while (active_threads != (*activeThreads).end()) {
+ +        unsigned tid = *active_threads++;
+ +
+ +        if (numStoresToWB(tid) > 0) {
+ +            DPRINTF(Writeback,"[tid:%i] Writing back stores. %i stores "
+ +                "available for Writeback.\n", tid, numStoresToWB(tid));
+ +        }
+ +
+ +        thread[tid].writebackStores();
+ +    }
+ +}
+ +
+ +template<class Impl>
+ +bool
+ +LSQ<Impl>::violation()
+ +{
+ +    /* Answers: Does Anybody Have a Violation?*/
+ +    std::list<unsigned>::iterator active_threads = (*activeThreads).begin();
+ +
+ +    while (active_threads != (*activeThreads).end()) {
+ +        unsigned tid = *active_threads++;
+ +        if (thread[tid].violation())
+ +            return true;
+ +    }
+ +
+ +    return false;
+ +}
+ +
+ +template<class Impl>
+ +int
+ +LSQ<Impl>::getCount()
+ +{
+ +    unsigned total = 0;
+ +
+ +    std::list<unsigned>::iterator active_threads = (*activeThreads).begin();
+ +
+ +    while (active_threads != (*activeThreads).end()) {
+ +        unsigned tid = *active_threads++;
+ +        total += getCount(tid);
+ +    }
+ +
+ +    return total;
+ +}
+ +
+ +template<class Impl>
+ +int
+ +LSQ<Impl>::numLoads()
+ +{
+ +    unsigned total = 0;
+ +
+ +    std::list<unsigned>::iterator active_threads = (*activeThreads).begin();
+ +
+ +    while (active_threads != (*activeThreads).end()) {
+ +        unsigned tid = *active_threads++;
+ +        total += numLoads(tid);
+ +    }
+ +
+ +    return total;
+ +}
+ +
+ +template<class Impl>
+ +int
+ +LSQ<Impl>::numStores()
+ +{
+ +    unsigned total = 0;
+ +
+ +    std::list<unsigned>::iterator active_threads = (*activeThreads).begin();
+ +
+ +    while (active_threads != (*activeThreads).end()) {
+ +        unsigned tid = *active_threads++;
+ +        total += thread[tid].numStores();
+ +    }
+ +
+ +    return total;
+ +}
+ +
+ +template<class Impl>
+ +int
+ +LSQ<Impl>::numLoadsReady()
+ +{
+ +    unsigned total = 0;
+ +
+ +    std::list<unsigned>::iterator active_threads = (*activeThreads).begin();
+ +
+ +    while (active_threads != (*activeThreads).end()) {
+ +        unsigned tid = *active_threads++;
+ +        total += thread[tid].numLoadsReady();
+ +    }
+ +
+ +    return total;
+ +}
+ +
+ +template<class Impl>
+ +unsigned
+ +LSQ<Impl>::numFreeEntries()
+ +{
+ +    unsigned total = 0;
+ +
+ +    std::list<unsigned>::iterator active_threads = (*activeThreads).begin();
+ +
+ +    while (active_threads != (*activeThreads).end()) {
+ +        unsigned tid = *active_threads++;
+ +        total += thread[tid].numFreeEntries();
+ +    }
+ +
+ +    return total;
+ +}
+ +
+ +template<class Impl>
+ +unsigned
+ +LSQ<Impl>::numFreeEntries(unsigned tid)
+ +{
+ +    //if( lsqPolicy == Dynamic )
+ +    //return numFreeEntries();
+ +    //else
+ +        return thread[tid].numFreeEntries();
+ +}
+ +
+ +template<class Impl>
+ +bool
+ +LSQ<Impl>::isFull()
+ +{
+ +    std::list<unsigned>::iterator active_threads = (*activeThreads).begin();
+ +
+ +    while (active_threads != (*activeThreads).end()) {
+ +        unsigned tid = *active_threads++;
+ +        if (! (thread[tid].lqFull() || thread[tid].sqFull()) )
+ +            return false;
+ +    }
+ +
+ +    return true;
+ +}
+ +
+ +template<class Impl>
+ +bool
+ +LSQ<Impl>::isFull(unsigned tid)
+ +{
+ +    //@todo: Change to Calculate All Entries for
+ +    //Dynamic Policy
+ +    if( lsqPolicy == Dynamic )
+ +        return isFull();
+ +    else
+ +        return thread[tid].lqFull() || thread[tid].sqFull();
+ +}
+ +
+ +template<class Impl>
+ +bool
+ +LSQ<Impl>::lqFull()
+ +{
+ +    std::list<unsigned>::iterator active_threads = (*activeThreads).begin();
+ +
+ +    while (active_threads != (*activeThreads).end()) {
+ +        unsigned tid = *active_threads++;
+ +        if (!thread[tid].lqFull())
+ +            return false;
+ +    }
+ +
+ +    return true;
+ +}
+ +
+ +template<class Impl>
+ +bool
+ +LSQ<Impl>::lqFull(unsigned tid)
+ +{
+ +    //@todo: Change to Calculate All Entries for
+ +    //Dynamic Policy
+ +    if( lsqPolicy == Dynamic )
+ +        return lqFull();
+ +    else
+ +        return thread[tid].lqFull();
+ +}
+ +
+ +template<class Impl>
+ +bool
+ +LSQ<Impl>::sqFull()
+ +{
+ +    std::list<unsigned>::iterator active_threads = (*activeThreads).begin();
+ +
+ +    while (active_threads != (*activeThreads).end()) {
+ +        unsigned tid = *active_threads++;
+ +        if (!sqFull(tid))
+ +            return false;
+ +    }
+ +
+ +    return true;
+ +}
+ +
+ +template<class Impl>
+ +bool
+ +LSQ<Impl>::sqFull(unsigned tid)
+ +{
+ +     //@todo: Change to Calculate All Entries for
+ +    //Dynamic Policy
+ +    if( lsqPolicy == Dynamic )
+ +        return sqFull();
+ +    else
+ +        return thread[tid].sqFull();
+ +}
+ +
+ +template<class Impl>
+ +bool
+ +LSQ<Impl>::isStalled()
+ +{
+ +    std::list<unsigned>::iterator active_threads = (*activeThreads).begin();
+ +
+ +    while (active_threads != (*activeThreads).end()) {
+ +        unsigned tid = *active_threads++;
+ +        if (!thread[tid].isStalled())
+ +            return false;
+ +    }
+ +
+ +    return true;
+ +}
+ +
+ +template<class Impl>
+ +bool
+ +LSQ<Impl>::isStalled(unsigned tid)
+ +{
+ +    if( lsqPolicy == Dynamic )
+ +        return isStalled();
+ +    else
+ +        return thread[tid].isStalled();
+ +}
+ +
+ +template<class Impl>
+ +bool
+ +LSQ<Impl>::hasStoresToWB()
+ +{
+ +    std::list<unsigned>::iterator active_threads = (*activeThreads).begin();
+ +
+ +    if ((*activeThreads).empty())
+ +        return false;
+ +
+ +    while (active_threads != (*activeThreads).end()) {
+ +        unsigned tid = *active_threads++;
+ +        if (!hasStoresToWB(tid))
+ +            return false;
+ +    }
+ +
+ +    return true;
+ +}
+ +
+ +template<class Impl>
+ +bool
+ +LSQ<Impl>::willWB()
+ +{
+ +    std::list<unsigned>::iterator active_threads = (*activeThreads).begin();
+ +
+ +    while (active_threads != (*activeThreads).end()) {
+ +        unsigned tid = *active_threads++;
+ +        if (!willWB(tid))
+ +            return false;
+ +    }
+ +
+ +    return true;
+ +}
+ +
+ +template<class Impl>
+ +void
+ +LSQ<Impl>::dumpInsts()
+ +{
+ +    std::list<unsigned>::iterator active_threads = (*activeThreads).begin();
+ +
+ +    while (active_threads != (*activeThreads).end()) {
+ +        unsigned tid = *active_threads++;
+ +        thread[tid].dumpInsts();
+ +    }
+ +}
diff --cc src/cpu/o3/lsq_unit.hh

index 1358a3699fe0b3a5bb0d5adc5db8678b8fb81c36,0000000000000000000000000000000000000000..8537e9dd764068517a83d159f9f92ebeb97e782c

mode 100644,000000..100644
--- 1/src/cpu/o3/lsq_unit.hh
--- /dev/null
+++ b/src/cpu/o3/lsq_unit.hh
@@@ -1,699 -1,0 +1,688 @@@
- 
+ +/*
+ + * Copyright (c) 2004-2006 The Regents of The University of Michigan
+ + * All rights reserved.
+ + *
+ + * Redistribution and use in source and binary forms, with or without
+ + * modification, are permitted provided that the following conditions are
+ + * met: redistributions of source code must retain the above copyright
+ + * notice, this list of conditions and the following disclaimer;
+ + * redistributions in binary form must reproduce the above copyright
+ + * notice, this list of conditions and the following disclaimer in the
+ + * documentation and/or other materials provided with the distribution;
+ + * neither the name of the copyright holders nor the names of its
+ + * contributors may be used to endorse or promote products derived from
+ + * this software without specific prior written permission.
+ + *
+ + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ + *
+ + * Authors: Kevin Lim
+ + *          Korey Sewell
+ + */
+ +
+ +#ifndef __CPU_O3_LSQ_UNIT_HH__
+ +#define __CPU_O3_LSQ_UNIT_HH__
+ +
+ +#include <algorithm>
+ +#include <map>
+ +#include <queue>
+ +
+ +#include "arch/faults.hh"
+ +#include "config/full_system.hh"
+ +#include "base/hashmap.hh"
+ +#include "cpu/inst_seq.hh"
+ +#include "mem/packet_impl.hh"
+ +#include "mem/port.hh"
+ +
+ +/**
+ + * Class that implements the actual LQ and SQ for each specific
+ + * thread.  Both are circular queues; load entries are freed upon
+ + * committing, while store entries are freed once they writeback. The
+ + * LSQUnit tracks if there are memory ordering violations, and also
+ + * detects partial load to store forwarding cases (a store only has
+ + * part of a load's data) that requires the load to wait until the
+ + * store writes back. In the former case it holds onto the instruction
+ + * until the dependence unit looks at it, and in the latter it stalls
+ + * the LSQ until the store writes back. At that point the load is
+ + * replayed.
+ + */
+ +template <class Impl>
+ +class LSQUnit {
+ +  protected:
+ +    typedef TheISA::IntReg IntReg;
+ +  public:
+ +    typedef typename Impl::Params Params;
+ +    typedef typename Impl::O3CPU O3CPU;
+ +    typedef typename Impl::DynInstPtr DynInstPtr;
+ +    typedef typename Impl::CPUPol::IEW IEW;
+ +    typedef typename Impl::CPUPol::LSQ LSQ;
+ +    typedef typename Impl::CPUPol::IssueStruct IssueStruct;
+ +
+ +  public:
+ +    /** Constructs an LSQ unit. init() must be called prior to use. */
+ +    LSQUnit();
+ +
+ +    /** Initializes the LSQ unit with the specified number of entries. */
+ +    void init(Params *params, LSQ *lsq_ptr, unsigned maxLQEntries,
+ +              unsigned maxSQEntries, unsigned id);
+ +
+ +    /** Returns the name of the LSQ unit. */
+ +    std::string name() const;
+ +
+ +    /** Registers statistics. */
+ +    void regStats();
+ +
+ +    /** Sets the CPU pointer. */
+ +    void setCPU(O3CPU *cpu_ptr);
+ +
+ +    /** Sets the IEW stage pointer. */
+ +    void setIEW(IEW *iew_ptr)
+ +    { iewStage = iew_ptr; }
+ +
+ +    /** Sets the pointer to the dcache port. */
+ +    void setDcachePort(Port *dcache_port)
+ +    { dcachePort = dcache_port; }
+ +
+ +    /** Switches out LSQ unit. */
+ +    void switchOut();
+ +
+ +    /** Takes over from another CPU's thread. */
+ +    void takeOverFrom();
+ +
+ +    /** Returns if the LSQ is switched out. */
+ +    bool isSwitchedOut() { return switchedOut; }
+ +
+ +    /** Ticks the LSQ unit, which in this case only resets the number of
+ +     * used cache ports.
+ +     * @todo: Move the number of used ports up to the LSQ level so it can
+ +     * be shared by all LSQ units.
+ +     */
+ +    void tick() { usedPorts = 0; }
+ +
+ +    /** Inserts an instruction. */
+ +    void insert(DynInstPtr &inst);
+ +    /** Inserts a load instruction. */
+ +    void insertLoad(DynInstPtr &load_inst);
+ +    /** Inserts a store instruction. */
+ +    void insertStore(DynInstPtr &store_inst);
+ +
+ +    /** Executes a load instruction. */
+ +    Fault executeLoad(DynInstPtr &inst);
+ +
+ +    Fault executeLoad(int lq_idx) { panic("Not implemented"); return NoFault; }
+ +    /** Executes a store instruction. */
+ +    Fault executeStore(DynInstPtr &inst);
+ +
+ +    /** Commits the head load. */
+ +    void commitLoad();
+ +    /** Commits loads older than a specific sequence number. */
+ +    void commitLoads(InstSeqNum &youngest_inst);
+ +
+ +    /** Commits stores older than a specific sequence number. */
+ +    void commitStores(InstSeqNum &youngest_inst);
+ +
+ +    /** Writes back stores. */
+ +    void writebackStores();
+ +
+ +    /** Completes the data access that has been returned from the
+ +     * memory system. */
+ +    void completeDataAccess(PacketPtr pkt);
+ +
+ +    /** Clears all the entries in the LQ. */
+ +    void clearLQ();
+ +
+ +    /** Clears all the entries in the SQ. */
+ +    void clearSQ();
+ +
+ +    /** Resizes the LQ to a given size. */
+ +    void resizeLQ(unsigned size);
+ +
+ +    /** Resizes the SQ to a given size. */
+ +    void resizeSQ(unsigned size);
+ +
+ +    /** Squashes all instructions younger than a specific sequence number. */
+ +    void squash(const InstSeqNum &squashed_num);
+ +
+ +    /** Returns if there is a memory ordering violation. Value is reset upon
+ +     * call to getMemDepViolator().
+ +     */
+ +    bool violation() { return memDepViolator; }
+ +
+ +    /** Returns the memory ordering violator. */
+ +    DynInstPtr getMemDepViolator();
+ +
+ +    /** Returns if a load became blocked due to the memory system. */
+ +    bool loadBlocked()
+ +    { return isLoadBlocked; }
+ +
+ +    /** Clears the signal that a load became blocked. */
+ +    void clearLoadBlocked()
+ +    { isLoadBlocked = false; }
+ +
+ +    /** Returns if the blocked load was handled. */
+ +    bool isLoadBlockedHandled()
+ +    { return loadBlockedHandled; }
+ +
+ +    /** Records the blocked load as being handled. */
+ +    void setLoadBlockedHandled()
+ +    { loadBlockedHandled = true; }
+ +
+ +    /** Returns the number of free entries (min of free LQ and SQ entries). */
+ +    unsigned numFreeEntries();
+ +
+ +    /** Returns the number of loads ready to execute. */
+ +    int numLoadsReady();
+ +
+ +    /** Returns the number of loads in the LQ. */
+ +    int numLoads() { return loads; }
+ +
+ +    /** Returns the number of stores in the SQ. */
+ +    int numStores() { return stores; }
+ +
+ +    /** Returns if either the LQ or SQ is full. */
+ +    bool isFull() { return lqFull() || sqFull(); }
+ +
+ +    /** Returns if the LQ is full. */
+ +    bool lqFull() { return loads >= (LQEntries - 1); }
+ +
+ +    /** Returns if the SQ is full. */
+ +    bool sqFull() { return stores >= (SQEntries - 1); }
+ +
+ +    /** Returns the number of instructions in the LSQ. */
+ +    unsigned getCount() { return loads + stores; }
+ +
+ +    /** Returns if there are any stores to writeback. */
+ +    bool hasStoresToWB() { return storesToWB; }
+ +
+ +    /** Returns the number of stores to writeback. */
+ +    int numStoresToWB() { return storesToWB; }
+ +
+ +    /** Returns if the LSQ unit will writeback on this cycle. */
+ +    bool willWB() { return storeQueue[storeWBIdx].canWB &&
+ +                        !storeQueue[storeWBIdx].completed &&
+ +                        !isStoreBlocked; }
+ +
+ +    /** Handles doing the retry. */
+ +    void recvRetry();
+ +
+ +  private:
+ +    /** Writes back the instruction, sending it to IEW. */
+ +    void writeback(DynInstPtr &inst, PacketPtr pkt);
+ +
+ +    /** Handles completing the send of a store to memory. */
+ +    void storePostSend(Packet *pkt);
+ +
+ +    /** Completes the store at the specified index. */
+ +    void completeStore(int store_idx);
+ +
+ +    /** Increments the given store index (circular queue). */
+ +    inline void incrStIdx(int &store_idx);
+ +    /** Decrements the given store index (circular queue). */
+ +    inline void decrStIdx(int &store_idx);
+ +    /** Increments the given load index (circular queue). */
+ +    inline void incrLdIdx(int &load_idx);
+ +    /** Decrements the given load index (circular queue). */
+ +    inline void decrLdIdx(int &load_idx);
+ +
+ +  public:
+ +    /** Debugging function to dump instructions in the LSQ. */
+ +    void dumpInsts();
+ +
+ +  private:
+ +    /** Pointer to the CPU. */
+ +    O3CPU *cpu;
+ +
+ +    /** Pointer to the IEW stage. */
+ +    IEW *iewStage;
+ +
+ +    /** Pointer to the LSQ. */
+ +    LSQ *lsq;
+ +
+ +    /** Pointer to the dcache port.  Used only for sending. */
+ +    Port *dcachePort;
+ +
+ +    /** Derived class to hold any sender state the LSQ needs. */
+ +    class LSQSenderState : public Packet::SenderState
+ +    {
+ +      public:
+ +        /** Default constructor. */
+ +        LSQSenderState()
+ +            : noWB(false)
+ +        { }
+ +
+ +        /** Instruction who initiated the access to memory. */
+ +        DynInstPtr inst;
+ +        /** Whether or not it is a load. */
+ +        bool isLoad;
+ +        /** The LQ/SQ index of the instruction. */
+ +        int idx;
+ +        /** Whether or not the instruction will need to writeback. */
+ +        bool noWB;
+ +    };
+ +
+ +    /** Writeback event, specifically for when stores forward data to loads. */
+ +    class WritebackEvent : public Event {
+ +      public:
+ +        /** Constructs a writeback event. */
+ +        WritebackEvent(DynInstPtr &_inst, PacketPtr pkt, LSQUnit *lsq_ptr);
+ +
+ +        /** Processes the writeback event. */
+ +        void process();
+ +
+ +        /** Returns the description of this event. */
+ +        const char *description();
+ +
+ +      private:
+ +        /** Instruction whose results are being written back. */
+ +        DynInstPtr inst;
+ +
+ +        /** The packet that would have been sent to memory. */
+ +        PacketPtr pkt;
+ +
+ +        /** The pointer to the LSQ unit that issued the store. */
+ +        LSQUnit<Impl> *lsqPtr;
+ +    };
+ +
+ +  public:
+ +    struct SQEntry {
+ +        /** Constructs an empty store queue entry. */
+ +        SQEntry()
+ +            : inst(NULL), req(NULL), size(0), data(0),
+ +              canWB(0), committed(0), completed(0)
+ +        { }
+ +
+ +        /** Constructs a store queue entry for a given instruction. */
+ +        SQEntry(DynInstPtr &_inst)
+ +            : inst(_inst), req(NULL), size(0), data(0),
+ +              canWB(0), committed(0), completed(0)
+ +        { }
+ +
+ +        /** The store instruction. */
+ +        DynInstPtr inst;
+ +        /** The request for the store. */
+ +        RequestPtr req;
+ +        /** The size of the store. */
+ +        int size;
+ +        /** The store data. */
+ +        IntReg data;
+ +        /** Whether or not the store can writeback. */
+ +        bool canWB;
+ +        /** Whether or not the store is committed. */
+ +        bool committed;
+ +        /** Whether or not the store is completed. */
+ +        bool completed;
+ +    };
+ +
+ +  private:
+ +    /** The LSQUnit thread id. */
+ +    unsigned lsqID;
+ +
+ +    /** The store queue. */
+ +    std::vector<SQEntry> storeQueue;
+ +
+ +    /** The load queue. */
+ +    std::vector<DynInstPtr> loadQueue;
+ +
+ +    /** The number of LQ entries, plus a sentinel entry (circular queue).
+ +     *  @todo: Consider having var that records the true number of LQ entries.
+ +     */
+ +    unsigned LQEntries;
+ +    /** The number of SQ entries, plus a sentinel entry (circular queue).
+ +     *  @todo: Consider having var that records the true number of SQ entries.
+ +     */
+ +    unsigned SQEntries;
+ +
+ +    /** The number of load instructions in the LQ. */
+ +    int loads;
+ +    /** The number of store instructions in the SQ. */
+ +    int stores;
+ +    /** The number of store instructions in the SQ waiting to writeback. */
+ +    int storesToWB;
+ +
+ +    /** The index of the head instruction in the LQ. */
+ +    int loadHead;
+ +    /** The index of the tail instruction in the LQ. */
+ +    int loadTail;
+ +
+ +    /** The index of the head instruction in the SQ. */
+ +    int storeHead;
+ +    /** The index of the first instruction that may be ready to be
+ +     * written back, and has not yet been written back.
+ +     */
+ +    int storeWBIdx;
+ +    /** The index of the tail instruction in the SQ. */
+ +    int storeTail;
+ +
+ +    /// @todo Consider moving to a more advanced model with write vs read ports
+ +    /** The number of cache ports available each cycle. */
+ +    int cachePorts;
+ +
+ +    /** The number of used cache ports in this cycle. */
+ +    int usedPorts;
+ +
+ +    /** Is the LSQ switched out. */
+ +    bool switchedOut;
+ +
+ +    //list<InstSeqNum> mshrSeqNums;
+ +
+ +    /** Wire to read information from the issue stage time queue. */
+ +    typename TimeBuffer<IssueStruct>::wire fromIssue;
+ +
+ +    /** Whether or not the LSQ is stalled. */
+ +    bool stalled;
+ +    /** The store that causes the stall due to partial store to load
+ +     * forwarding.
+ +     */
+ +    InstSeqNum stallingStoreIsn;
+ +    /** The index of the above store. */
+ +    int stallingLoadIdx;
+ +
+ +    /** The packet that needs to be retried. */
+ +    PacketPtr retryPkt;
+ +
+ +    /** Whehter or not a store is blocked due to the memory system. */
+ +    bool isStoreBlocked;
+ +
+ +    /** Whether or not a load is blocked due to the memory system. */
+ +    bool isLoadBlocked;
+ +
+ +    /** Has the blocked load been handled. */
+ +    bool loadBlockedHandled;
+ +
+ +    /** The sequence number of the blocked load. */
+ +    InstSeqNum blockedLoadSeqNum;
+ +
+ +    /** The oldest load that caused a memory ordering violation. */
+ +    DynInstPtr memDepViolator;
+ +
+ +    // Will also need how many read/write ports the Dcache has.  Or keep track
+ +    // of that in stage that is one level up, and only call executeLoad/Store
+ +    // the appropriate number of times.
-     /** Total number of loads ignored due to invalid addresses. */
-     Stats::Scalar<> invAddrLoads;
- 
-     /** Total number of squashed loads. */
-     Stats::Scalar<> lsqSquashedLoads;
- 
-     /** Total number of responses from the memory system that are
-      * ignored due to the instruction already being squashed. */
-     Stats::Scalar<> lsqIgnoredResponses;
- 
+ +    /** Total number of loads forwaded from LSQ stores. */
+ +    Stats::Scalar<> lsqForwLoads;
+ +
+ +    /** Total number of squashed stores. */
+ +    Stats::Scalar<> lsqSquashedStores;
+ +
+ +    /** Total number of software prefetches ignored due to invalid addresses. */
+ +    Stats::Scalar<> invAddrSwpfs;
+ +
+ +    /** Ready loads blocked due to partial store-forwarding. */
+ +    Stats::Scalar<> lsqBlockedLoads;
+ +
+ +    /** Number of loads that were rescheduled. */
+ +    Stats::Scalar<> lsqRescheduledLoads;
+ +
+ +    /** Number of times the LSQ is blocked due to the cache. */
+ +    Stats::Scalar<> lsqCacheBlocked;
+ +
+ +  public:
+ +    /** Executes the load at the given index. */
+ +    template <class T>
+ +    Fault read(Request *req, T &data, int load_idx);
+ +
+ +    /** Executes the store at the given index. */
+ +    template <class T>
+ +    Fault write(Request *req, T &data, int store_idx);
+ +
+ +    /** Returns the index of the head load instruction. */
+ +    int getLoadHead() { return loadHead; }
+ +    /** Returns the sequence number of the head load instruction. */
+ +    InstSeqNum getLoadHeadSeqNum()
+ +    {
+ +        if (loadQueue[loadHead]) {
+ +            return loadQueue[loadHead]->seqNum;
+ +        } else {
+ +            return 0;
+ +        }
+ +
+ +    }
+ +
+ +    /** Returns the index of the head store instruction. */
+ +    int getStoreHead() { return storeHead; }
+ +    /** Returns the sequence number of the head store instruction. */
+ +    InstSeqNum getStoreHeadSeqNum()
+ +    {
+ +        if (storeQueue[storeHead].inst) {
+ +            return storeQueue[storeHead].inst->seqNum;
+ +        } else {
+ +            return 0;
+ +        }
+ +
+ +    }
+ +
+ +    /** Returns whether or not the LSQ unit is stalled. */
+ +    bool isStalled()  { return stalled; }
+ +};
+ +
+ +template <class Impl>
+ +template <class T>
+ +Fault
+ +LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
+ +{
+ +    DynInstPtr load_inst = loadQueue[load_idx];
+ +
+ +    assert(load_inst);
+ +
+ +    assert(!load_inst->isExecuted());
+ +
+ +    // Make sure this isn't an uncacheable access
+ +    // A bit of a hackish way to get uncached accesses to work only if they're
+ +    // at the head of the LSQ and are ready to commit (at the head of the ROB
+ +    // too).
+ +    if (req->getFlags() & UNCACHEABLE &&
+ +        (load_idx != loadHead || !load_inst->isAtCommit())) {
+ +        iewStage->rescheduleMemInst(load_inst);
+ +        ++lsqRescheduledLoads;
+ +        return TheISA::genMachineCheckFault();
+ +    }
+ +
+ +    // Check the SQ for any previous stores that might lead to forwarding
+ +    int store_idx = load_inst->sqIdx;
+ +
+ +    int store_size = 0;
+ +
+ +    DPRINTF(LSQUnit, "Read called, load idx: %i, store idx: %i, "
+ +            "storeHead: %i addr: %#x\n",
+ +            load_idx, store_idx, storeHead, req->getPaddr());
+ +
+ +#if FULL_SYSTEM
+ +    if (req->getFlags() & LOCKED) {
+ +        cpu->lockAddr = req->getPaddr();
+ +        cpu->lockFlag = true;
+ +    }
+ +#endif
+ +
+ +    while (store_idx != -1) {
+ +        // End once we've reached the top of the LSQ
+ +        if (store_idx == storeWBIdx) {
+ +            break;
+ +        }
+ +
+ +        // Move the index to one younger
+ +        if (--store_idx < 0)
+ +            store_idx += SQEntries;
+ +
+ +        assert(storeQueue[store_idx].inst);
+ +
+ +        store_size = storeQueue[store_idx].size;
+ +
+ +        if (store_size == 0)
+ +            continue;
+ +
+ +        // Check if the store data is within the lower and upper bounds of
+ +        // addresses that the request needs.
+ +        bool store_has_lower_limit =
+ +            req->getVaddr() >= storeQueue[store_idx].inst->effAddr;
+ +        bool store_has_upper_limit =
+ +            (req->getVaddr() + req->getSize()) <=
+ +            (storeQueue[store_idx].inst->effAddr + store_size);
+ +        bool lower_load_has_store_part =
+ +            req->getVaddr() < (storeQueue[store_idx].inst->effAddr +
+ +                           store_size);
+ +        bool upper_load_has_store_part =
+ +            (req->getVaddr() + req->getSize()) >
+ +            storeQueue[store_idx].inst->effAddr;
+ +
+ +        // If the store's data has all of the data needed, we can forward.
+ +        if (store_has_lower_limit && store_has_upper_limit) {
+ +            // Get shift amount for offset into the store's data.
+ +            int shift_amt = req->getVaddr() & (store_size - 1);
+ +            // @todo: Magic number, assumes byte addressing
+ +            shift_amt = shift_amt << 3;
+ +
+ +            // Cast this to type T?
+ +            data = storeQueue[store_idx].data >> shift_amt;
+ +
+ +            assert(!load_inst->memData);
+ +            load_inst->memData = new uint8_t[64];
+ +
+ +            memcpy(load_inst->memData, &data, req->getSize());
+ +
+ +            DPRINTF(LSQUnit, "Forwarding from store idx %i to load to "
+ +                    "addr %#x, data %#x\n",
+ +                    store_idx, req->getVaddr(), data);
+ +
+ +            PacketPtr data_pkt = new Packet(req, Packet::ReadReq, Packet::Broadcast);
+ +            data_pkt->dataStatic(load_inst->memData);
+ +
+ +            WritebackEvent *wb = new WritebackEvent(load_inst, data_pkt, this);
+ +
+ +            // We'll say this has a 1 cycle load-store forwarding latency
+ +            // for now.
+ +            // @todo: Need to make this a parameter.
+ +            wb->schedule(curTick);
+ +
+ +            ++lsqForwLoads;
+ +            return NoFault;
+ +        } else if ((store_has_lower_limit && lower_load_has_store_part) ||
+ +                   (store_has_upper_limit && upper_load_has_store_part) ||
+ +                   (lower_load_has_store_part && upper_load_has_store_part)) {
+ +            // This is the partial store-load forwarding case where a store
+ +            // has only part of the load's data.
+ +
+ +            // If it's already been written back, then don't worry about
+ +            // stalling on it.
+ +            if (storeQueue[store_idx].completed) {
+ +                continue;
+ +            }
+ +
+ +            // Must stall load and force it to retry, so long as it's the oldest
+ +            // load that needs to do so.
+ +            if (!stalled ||
+ +                (stalled &&
+ +                 load_inst->seqNum <
+ +                 loadQueue[stallingLoadIdx]->seqNum)) {
+ +                stalled = true;
+ +                stallingStoreIsn = storeQueue[store_idx].inst->seqNum;
+ +                stallingLoadIdx = load_idx;
+ +            }
+ +
+ +            // Tell IQ/mem dep unit that this instruction will need to be
+ +            // rescheduled eventually
+ +            iewStage->rescheduleMemInst(load_inst);
+ +            iewStage->decrWb(load_inst->seqNum);
+ +            ++lsqRescheduledLoads;
+ +
+ +            // Do not generate a writeback event as this instruction is not
+ +            // complete.
+ +            DPRINTF(LSQUnit, "Load-store forwarding mis-match. "
+ +                    "Store idx %i to load addr %#x\n",
+ +                    store_idx, req->getVaddr());
+ +
+ +            ++lsqBlockedLoads;
+ +            return NoFault;
+ +        }
+ +    }
+ +
+ +    // If there's no forwarding case, then go access memory
+ +    DPRINTF(LSQUnit, "Doing memory access for inst [sn:%lli] PC %#x\n",
+ +            load_inst->seqNum, load_inst->readPC());
+ +
+ +    assert(!load_inst->memData);
+ +    load_inst->memData = new uint8_t[64];
+ +
+ +    ++usedPorts;
+ +
+ +    PacketPtr data_pkt = new Packet(req, Packet::ReadReq, Packet::Broadcast);
+ +    data_pkt->dataStatic(load_inst->memData);
+ +
+ +    LSQSenderState *state = new LSQSenderState;
+ +    state->isLoad = true;
+ +    state->idx = load_idx;
+ +    state->inst = load_inst;
+ +    data_pkt->senderState = state;
+ +
+ +    // if we the cache is not blocked, do cache access
+ +    if (!lsq->cacheBlocked()) {
+ +        if (!dcachePort->sendTiming(data_pkt)) {
+ +            // If the access didn't succeed, tell the LSQ by setting
+ +            // the retry thread id.
+ +            lsq->setRetryTid(lsqID);
+ +        }
+ +    }
+ +
+ +    // If the cache was blocked, or has become blocked due to the access,
+ +    // handle it.
+ +    if (lsq->cacheBlocked()) {
+ +        ++lsqCacheBlocked;
+ +
+ +        iewStage->decrWb(load_inst->seqNum);
+ +        // There's an older load that's already going to squash.
+ +        if (isLoadBlocked && blockedLoadSeqNum < load_inst->seqNum)
+ +            return NoFault;
+ +
+ +        // Record that the load was blocked due to memory.  This
+ +        // load will squash all instructions after it, be
+ +        // refetched, and re-executed.
+ +        isLoadBlocked = true;
+ +        loadBlockedHandled = false;
+ +        blockedLoadSeqNum = load_inst->seqNum;
+ +        // No fault occurred, even though the interface is blocked.
+ +        return NoFault;
+ +    }
+ +
+ +    if (data_pkt->result != Packet::Success) {
+ +        DPRINTF(LSQUnit, "LSQUnit: D-cache miss!\n");
+ +        DPRINTF(Activity, "Activity: ld accessing mem miss [sn:%lli]\n",
+ +                load_inst->seqNum);
+ +    } else {
+ +        DPRINTF(LSQUnit, "LSQUnit: D-cache hit!\n");
+ +        DPRINTF(Activity, "Activity: ld accessing mem hit [sn:%lli]\n",
+ +                load_inst->seqNum);
+ +    }
+ +
+ +    return NoFault;
+ +}
+ +
+ +template <class Impl>
+ +template <class T>
+ +Fault
+ +LSQUnit<Impl>::write(Request *req, T &data, int store_idx)
+ +{
+ +    assert(storeQueue[store_idx].inst);
+ +
+ +    DPRINTF(LSQUnit, "Doing write to store idx %i, addr %#x data %#x"
+ +            " | storeHead:%i [sn:%i]\n",
+ +            store_idx, req->getPaddr(), data, storeHead,
+ +            storeQueue[store_idx].inst->seqNum);
+ +
+ +    storeQueue[store_idx].req = req;
+ +    storeQueue[store_idx].size = sizeof(T);
+ +    storeQueue[store_idx].data = data;
+ +
+ +    // This function only writes the data to the store queue, so no fault
+ +    // can happen here.
+ +    return NoFault;
+ +}
+ +
+ +#endif // __CPU_O3_LSQ_UNIT_HH__
diff --cc src/cpu/o3/lsq_unit_impl.hh

index fa716c7129af7ff9597e5776701b19326ed99cd8,0000000000000000000000000000000000000000..2922b81bd9d32fac4e4cd36c863406a01491b94f

mode 100644,000000..100644
--- 1/src/cpu/o3/lsq_unit_impl.hh
--- /dev/null
+++ b/src/cpu/o3/lsq_unit_impl.hh
@@@ -1,947 -1,0 +1,959 @@@
-     for (int i = 0; i < loadQueue.size(); ++i)
+ +/*
+ + * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ + * All rights reserved.
+ + *
+ + * Redistribution and use in source and binary forms, with or without
+ + * modification, are permitted provided that the following conditions are
+ + * met: redistributions of source code must retain the above copyright
+ + * notice, this list of conditions and the following disclaimer;
+ + * redistributions in binary form must reproduce the above copyright
+ + * notice, this list of conditions and the following disclaimer in the
+ + * documentation and/or other materials provided with the distribution;
+ + * neither the name of the copyright holders nor the names of its
+ + * contributors may be used to endorse or promote products derived from
+ + * this software without specific prior written permission.
+ + *
+ + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ + *
+ + * Authors: Kevin Lim
+ + *          Korey Sewell
+ + */
+ +
+ +#include "config/use_checker.hh"
+ +
+ +#include "cpu/o3/lsq.hh"
+ +#include "cpu/o3/lsq_unit.hh"
+ +#include "base/str.hh"
+ +#include "mem/packet.hh"
+ +#include "mem/request.hh"
+ +
+ +#if USE_CHECKER
+ +#include "cpu/checker/cpu.hh"
+ +#endif
+ +
+ +template<class Impl>
+ +LSQUnit<Impl>::WritebackEvent::WritebackEvent(DynInstPtr &_inst, PacketPtr _pkt,
+ +                                              LSQUnit *lsq_ptr)
+ +    : Event(&mainEventQueue), inst(_inst), pkt(_pkt), lsqPtr(lsq_ptr)
+ +{
+ +    this->setFlags(Event::AutoDelete);
+ +}
+ +
+ +template<class Impl>
+ +void
+ +LSQUnit<Impl>::WritebackEvent::process()
+ +{
+ +    if (!lsqPtr->isSwitchedOut()) {
+ +        lsqPtr->writeback(inst, pkt);
+ +    }
+ +    delete pkt;
+ +}
+ +
+ +template<class Impl>
+ +const char *
+ +LSQUnit<Impl>::WritebackEvent::description()
+ +{
+ +    return "Store writeback event";
+ +}
+ +
+ +template<class Impl>
+ +void
+ +LSQUnit<Impl>::completeDataAccess(PacketPtr pkt)
+ +{
+ +    LSQSenderState *state = dynamic_cast<LSQSenderState *>(pkt->senderState);
+ +    DynInstPtr inst = state->inst;
+ +    DPRINTF(IEW, "Writeback event [sn:%lli]\n", inst->seqNum);
+ +    DPRINTF(Activity, "Activity: Writeback event [sn:%lli]\n", inst->seqNum);
+ +
+ +    //iewStage->ldstQueue.removeMSHR(inst->threadNumber,inst->seqNum);
+ +
+ +    if (isSwitchedOut() || inst->isSquashed()) {
+ +        iewStage->decrWb(inst->seqNum);
+ +        delete state;
+ +        delete pkt;
+ +        return;
+ +    } else {
+ +        if (!state->noWB) {
+ +            writeback(inst, pkt);
+ +        }
+ +
+ +        if (inst->isStore()) {
+ +            completeStore(state->idx);
+ +        }
+ +    }
+ +
+ +    delete state;
+ +    delete pkt;
+ +}
+ +
+ +template <class Impl>
+ +LSQUnit<Impl>::LSQUnit()
+ +    : loads(0), stores(0), storesToWB(0), stalled(false),
+ +      isStoreBlocked(false), isLoadBlocked(false),
+ +      loadBlockedHandled(false)
+ +{
+ +}
+ +
+ +template<class Impl>
+ +void
+ +LSQUnit<Impl>::init(Params *params, LSQ *lsq_ptr, unsigned maxLQEntries,
+ +                    unsigned maxSQEntries, unsigned id)
+ +{
+ +    DPRINTF(LSQUnit, "Creating LSQUnit%i object.\n",id);
+ +
+ +    switchedOut = false;
+ +
+ +    lsq = lsq_ptr;
+ +
+ +    lsqID = id;
+ +
+ +    // Add 1 for the sentinel entry (they are circular queues).
+ +    LQEntries = maxLQEntries + 1;
+ +    SQEntries = maxSQEntries + 1;
+ +
+ +    loadQueue.resize(LQEntries);
+ +    storeQueue.resize(SQEntries);
+ +
+ +    loadHead = loadTail = 0;
+ +
+ +    storeHead = storeWBIdx = storeTail = 0;
+ +
+ +    usedPorts = 0;
+ +    cachePorts = params->cachePorts;
+ +
+ +    memDepViolator = NULL;
+ +
+ +    blockedLoadSeqNum = 0;
+ +}
+ +
+ +template<class Impl>
+ +void
+ +LSQUnit<Impl>::setCPU(O3CPU *cpu_ptr)
+ +{
+ +    cpu = cpu_ptr;
+ +
+ +#if USE_CHECKER
+ +    if (cpu->checker) {
+ +        cpu->checker->setDcachePort(dcachePort);
+ +    }
+ +#endif
+ +}
+ +
+ +template<class Impl>
+ +std::string
+ +LSQUnit<Impl>::name() const
+ +{
+ +    if (Impl::MaxThreads == 1) {
+ +        return iewStage->name() + ".lsq";
+ +    } else {
+ +        return iewStage->name() + ".lsq.thread." + to_string(lsqID);
+ +    }
+ +}
+ +
+ +template<class Impl>
+ +void
+ +LSQUnit<Impl>::regStats()
+ +{
+ +    lsqForwLoads
+ +        .name(name() + ".forwLoads")
+ +        .desc("Number of loads that had data forwarded from stores");
+ +
+ +    invAddrLoads
+ +        .name(name() + ".invAddrLoads")
+ +        .desc("Number of loads ignored due to an invalid address");
+ +
+ +    lsqSquashedLoads
+ +        .name(name() + ".squashedLoads")
+ +        .desc("Number of loads squashed");
+ +
+ +    lsqIgnoredResponses
+ +        .name(name() + ".ignoredResponses")
+ +        .desc("Number of memory responses ignored because the instruction is squashed");
+ +
++    lsqMemOrderViolation
++        .name(name() + ".memOrderViolation")
++        .desc("Number of memory ordering violations");
++
+ +    lsqSquashedStores
+ +        .name(name() + ".squashedStores")
+ +        .desc("Number of stores squashed");
+ +
+ +    invAddrSwpfs
+ +        .name(name() + ".invAddrSwpfs")
+ +        .desc("Number of software prefetches ignored due to an invalid address");
+ +
+ +    lsqBlockedLoads
+ +        .name(name() + ".blockedLoads")
+ +        .desc("Number of blocked loads due to partial load-store forwarding");
+ +
+ +    lsqRescheduledLoads
+ +        .name(name() + ".rescheduledLoads")
+ +        .desc("Number of loads that were rescheduled");
+ +
+ +    lsqCacheBlocked
+ +        .name(name() + ".cacheBlocked")
+ +        .desc("Number of times an access to memory failed due to the cache being blocked");
+ +}
+ +
+ +template<class Impl>
+ +void
+ +LSQUnit<Impl>::clearLQ()
+ +{
+ +    loadQueue.clear();
+ +}
+ +
+ +template<class Impl>
+ +void
+ +LSQUnit<Impl>::clearSQ()
+ +{
+ +    storeQueue.clear();
+ +}
+ +
+ +template<class Impl>
+ +void
+ +LSQUnit<Impl>::switchOut()
+ +{
+ +    switchedOut = true;
++    for (int i = 0; i < loadQueue.size(); ++i) {
++        assert(!loadQueue[i]);
+ +        loadQueue[i] = NULL;
++    }
+ +
+ +    assert(storesToWB == 0);
+ +}
+ +
+ +template<class Impl>
+ +void
+ +LSQUnit<Impl>::takeOverFrom()
+ +{
+ +    switchedOut = false;
+ +    loads = stores = storesToWB = 0;
+ +
+ +    loadHead = loadTail = 0;
+ +
+ +    storeHead = storeWBIdx = storeTail = 0;
+ +
+ +    usedPorts = 0;
+ +
+ +    memDepViolator = NULL;
+ +
+ +    blockedLoadSeqNum = 0;
+ +
+ +    stalled = false;
+ +    isLoadBlocked = false;
+ +    loadBlockedHandled = false;
+ +}
+ +
+ +template<class Impl>
+ +void
+ +LSQUnit<Impl>::resizeLQ(unsigned size)
+ +{
+ +    unsigned size_plus_sentinel = size + 1;
+ +    assert(size_plus_sentinel >= LQEntries);
+ +
+ +    if (size_plus_sentinel > LQEntries) {
+ +        while (size_plus_sentinel > loadQueue.size()) {
+ +            DynInstPtr dummy;
+ +            loadQueue.push_back(dummy);
+ +            LQEntries++;
+ +        }
+ +    } else {
+ +        LQEntries = size_plus_sentinel;
+ +    }
+ +
+ +}
+ +
+ +template<class Impl>
+ +void
+ +LSQUnit<Impl>::resizeSQ(unsigned size)
+ +{
+ +    unsigned size_plus_sentinel = size + 1;
+ +    if (size_plus_sentinel > SQEntries) {
+ +        while (size_plus_sentinel > storeQueue.size()) {
+ +            SQEntry dummy;
+ +            storeQueue.push_back(dummy);
+ +            SQEntries++;
+ +        }
+ +    } else {
+ +        SQEntries = size_plus_sentinel;
+ +    }
+ +}
+ +
+ +template <class Impl>
+ +void
+ +LSQUnit<Impl>::insert(DynInstPtr &inst)
+ +{
+ +    assert(inst->isMemRef());
+ +
+ +    assert(inst->isLoad() || inst->isStore());
+ +
+ +    if (inst->isLoad()) {
+ +        insertLoad(inst);
+ +    } else {
+ +        insertStore(inst);
+ +    }
+ +
+ +    inst->setInLSQ();
+ +}
+ +
+ +template <class Impl>
+ +void
+ +LSQUnit<Impl>::insertLoad(DynInstPtr &load_inst)
+ +{
+ +    assert((loadTail + 1) % LQEntries != loadHead);
+ +    assert(loads < LQEntries);
+ +
+ +    DPRINTF(LSQUnit, "Inserting load PC %#x, idx:%i [sn:%lli]\n",
+ +            load_inst->readPC(), loadTail, load_inst->seqNum);
+ +
+ +    load_inst->lqIdx = loadTail;
+ +
+ +    if (stores == 0) {
+ +        load_inst->sqIdx = -1;
+ +    } else {
+ +        load_inst->sqIdx = storeTail;
+ +    }
+ +
+ +    loadQueue[loadTail] = load_inst;
+ +
+ +    incrLdIdx(loadTail);
+ +
+ +    ++loads;
+ +}
+ +
+ +template <class Impl>
+ +void
+ +LSQUnit<Impl>::insertStore(DynInstPtr &store_inst)
+ +{
+ +    // Make sure it is not full before inserting an instruction.
+ +    assert((storeTail + 1) % SQEntries != storeHead);
+ +    assert(stores < SQEntries);
+ +
+ +    DPRINTF(LSQUnit, "Inserting store PC %#x, idx:%i [sn:%lli]\n",
+ +            store_inst->readPC(), storeTail, store_inst->seqNum);
+ +
+ +    store_inst->sqIdx = storeTail;
+ +    store_inst->lqIdx = loadTail;
+ +
+ +    storeQueue[storeTail] = SQEntry(store_inst);
+ +
+ +    incrStIdx(storeTail);
+ +
+ +    ++stores;
+ +}
+ +
+ +template <class Impl>
+ +typename Impl::DynInstPtr
+ +LSQUnit<Impl>::getMemDepViolator()
+ +{
+ +    DynInstPtr temp = memDepViolator;
+ +
+ +    memDepViolator = NULL;
+ +
+ +    return temp;
+ +}
+ +
+ +template <class Impl>
+ +unsigned
+ +LSQUnit<Impl>::numFreeEntries()
+ +{
+ +    unsigned free_lq_entries = LQEntries - loads;
+ +    unsigned free_sq_entries = SQEntries - stores;
+ +
+ +    // Both the LQ and SQ entries have an extra dummy entry to differentiate
+ +    // empty/full conditions.  Subtract 1 from the free entries.
+ +    if (free_lq_entries < free_sq_entries) {
+ +        return free_lq_entries - 1;
+ +    } else {
+ +        return free_sq_entries - 1;
+ +    }
+ +}
+ +
+ +template <class Impl>
+ +int
+ +LSQUnit<Impl>::numLoadsReady()
+ +{
+ +    int load_idx = loadHead;
+ +    int retval = 0;
+ +
+ +    while (load_idx != loadTail) {
+ +        assert(loadQueue[load_idx]);
+ +
+ +        if (loadQueue[load_idx]->readyToIssue()) {
+ +            ++retval;
+ +        }
+ +    }
+ +
+ +    return retval;
+ +}
+ +
+ +template <class Impl>
+ +Fault
+ +LSQUnit<Impl>::executeLoad(DynInstPtr &inst)
+ +{
+ +    // Execute a specific load.
+ +    Fault load_fault = NoFault;
+ +
+ +    DPRINTF(LSQUnit, "Executing load PC %#x, [sn:%lli]\n",
+ +            inst->readPC(),inst->seqNum);
+ +
+ +    load_fault = inst->initiateAcc();
+ +
+ +    // If the instruction faulted, then we need to send it along to commit
+ +    // without the instruction completing.
+ +    if (load_fault != NoFault) {
+ +        // Send this instruction to commit, also make sure iew stage
+ +        // realizes there is activity.
++        // Mark it as executed unless it is an uncached load that
++        // needs to hit the head of commit.
++        if (!(inst->req->flags & UNCACHEABLE) || inst->isAtCommit()) {
++            inst->setExecuted();
++        }
+ +        iewStage->instToCommit(inst);
+ +        iewStage->activityThisCycle();
+ +    }
+ +
+ +    return load_fault;
+ +}
+ +
+ +template <class Impl>
+ +Fault
+ +LSQUnit<Impl>::executeStore(DynInstPtr &store_inst)
+ +{
+ +    using namespace TheISA;
+ +    // Make sure that a store exists.
+ +    assert(stores != 0);
+ +
+ +    int store_idx = store_inst->sqIdx;
+ +
+ +    DPRINTF(LSQUnit, "Executing store PC %#x [sn:%lli]\n",
+ +            store_inst->readPC(), store_inst->seqNum);
+ +
+ +    // Check the recently completed loads to see if any match this store's
+ +    // address.  If so, then we have a memory ordering violation.
+ +    int load_idx = store_inst->lqIdx;
+ +
+ +    Fault store_fault = store_inst->initiateAcc();
+ +
+ +    if (storeQueue[store_idx].size == 0) {
+ +        DPRINTF(LSQUnit,"Fault on Store PC %#x, [sn:%lli],Size = 0\n",
+ +                store_inst->readPC(),store_inst->seqNum);
+ +
+ +        return store_fault;
+ +    }
+ +
+ +    assert(store_fault == NoFault);
+ +
+ +    if (store_inst->isStoreConditional()) {
+ +        // Store conditionals need to set themselves as able to
+ +        // writeback if we haven't had a fault by here.
+ +        storeQueue[store_idx].canWB = true;
+ +
+ +        ++storesToWB;
+ +    }
+ +
+ +    if (!memDepViolator) {
+ +        while (load_idx != loadTail) {
+ +            // Really only need to check loads that have actually executed
+ +            // It's safe to check all loads because effAddr is set to
+ +            // InvalAddr when the dyn inst is created.
+ +
+ +            // @todo: For now this is extra conservative, detecting a
+ +            // violation if the addresses match assuming all accesses
+ +            // are quad word accesses.
+ +
+ +            // @todo: Fix this, magic number being used here
+ +            if ((loadQueue[load_idx]->effAddr >> 8) ==
+ +                (store_inst->effAddr >> 8)) {
+ +                // A load incorrectly passed this store.  Squash and refetch.
+ +                // For now return a fault to show that it was unsuccessful.
+ +                memDepViolator = loadQueue[load_idx];
++                ++lsqMemOrderViolation;
+ +
+ +                return genMachineCheckFault();
+ +            }
+ +
+ +            incrLdIdx(load_idx);
+ +        }
+ +
+ +        // If we've reached this point, there was no violation.
+ +        memDepViolator = NULL;
+ +    }
+ +
+ +    return store_fault;
+ +}
+ +
+ +template <class Impl>
+ +void
+ +LSQUnit<Impl>::commitLoad()
+ +{
+ +    assert(loadQueue[loadHead]);
+ +
+ +    DPRINTF(LSQUnit, "Committing head load instruction, PC %#x\n",
+ +            loadQueue[loadHead]->readPC());
+ +
+ +    loadQueue[loadHead] = NULL;
+ +
+ +    incrLdIdx(loadHead);
+ +
+ +    --loads;
+ +}
+ +
+ +template <class Impl>
+ +void
+ +LSQUnit<Impl>::commitLoads(InstSeqNum &youngest_inst)
+ +{
+ +    assert(loads == 0 || loadQueue[loadHead]);
+ +
+ +    while (loads != 0 && loadQueue[loadHead]->seqNum <= youngest_inst) {
+ +        commitLoad();
+ +    }
+ +}
+ +
+ +template <class Impl>
+ +void
+ +LSQUnit<Impl>::commitStores(InstSeqNum &youngest_inst)
+ +{
+ +    assert(stores == 0 || storeQueue[storeHead].inst);
+ +
+ +    int store_idx = storeHead;
+ +
+ +    while (store_idx != storeTail) {
+ +        assert(storeQueue[store_idx].inst);
+ +        // Mark any stores that are now committed and have not yet
+ +        // been marked as able to write back.
+ +        if (!storeQueue[store_idx].canWB) {
+ +            if (storeQueue[store_idx].inst->seqNum > youngest_inst) {
+ +                break;
+ +            }
+ +            DPRINTF(LSQUnit, "Marking store as able to write back, PC "
+ +                    "%#x [sn:%lli]\n",
+ +                    storeQueue[store_idx].inst->readPC(),
+ +                    storeQueue[store_idx].inst->seqNum);
+ +
+ +            storeQueue[store_idx].canWB = true;
+ +
+ +            ++storesToWB;
+ +        }
+ +
+ +        incrStIdx(store_idx);
+ +    }
+ +}
+ +
+ +template <class Impl>
+ +void
+ +LSQUnit<Impl>::writebackStores()
+ +{
+ +    while (storesToWB > 0 &&
+ +           storeWBIdx != storeTail &&
+ +           storeQueue[storeWBIdx].inst &&
+ +           storeQueue[storeWBIdx].canWB &&
+ +           usedPorts < cachePorts) {
+ +
+ +        if (isStoreBlocked || lsq->cacheBlocked()) {
+ +            DPRINTF(LSQUnit, "Unable to write back any more stores, cache"
+ +                    " is blocked!\n");
+ +            break;
+ +        }
+ +
+ +        // Store didn't write any data so no need to write it back to
+ +        // memory.
+ +        if (storeQueue[storeWBIdx].size == 0) {
+ +            completeStore(storeWBIdx);
+ +
+ +            incrStIdx(storeWBIdx);
+ +
+ +            continue;
+ +        }
+ +
+ +        ++usedPorts;
+ +
+ +        if (storeQueue[storeWBIdx].inst->isDataPrefetch()) {
+ +            incrStIdx(storeWBIdx);
+ +
+ +            continue;
+ +        }
+ +
+ +        assert(storeQueue[storeWBIdx].req);
+ +        assert(!storeQueue[storeWBIdx].committed);
+ +
+ +        DynInstPtr inst = storeQueue[storeWBIdx].inst;
+ +
+ +        Request *req = storeQueue[storeWBIdx].req;
+ +        storeQueue[storeWBIdx].committed = true;
+ +
+ +        assert(!inst->memData);
+ +        inst->memData = new uint8_t[64];
+ +        memcpy(inst->memData, (uint8_t *)&storeQueue[storeWBIdx].data,
+ +               req->getSize());
+ +
+ +        PacketPtr data_pkt = new Packet(req, Packet::WriteReq, Packet::Broadcast);
+ +        data_pkt->dataStatic(inst->memData);
+ +
+ +        LSQSenderState *state = new LSQSenderState;
+ +        state->isLoad = false;
+ +        state->idx = storeWBIdx;
+ +        state->inst = inst;
+ +        data_pkt->senderState = state;
+ +
+ +        DPRINTF(LSQUnit, "D-Cache: Writing back store idx:%i PC:%#x "
+ +                "to Addr:%#x, data:%#x [sn:%lli]\n",
+ +                storeWBIdx, storeQueue[storeWBIdx].inst->readPC(),
+ +                req->getPaddr(), *(inst->memData),
+ +                storeQueue[storeWBIdx].inst->seqNum);
+ +
+ +        // @todo: Remove this SC hack once the memory system handles it.
+ +        if (req->getFlags() & LOCKED) {
+ +            if (req->getFlags() & UNCACHEABLE) {
+ +                req->setScResult(2);
+ +            } else {
+ +                if (cpu->lockFlag) {
+ +                    req->setScResult(1);
+ +                } else {
+ +                    req->setScResult(0);
+ +                    // Hack: Instantly complete this store.
+ +                    completeDataAccess(data_pkt);
+ +                    incrStIdx(storeWBIdx);
+ +                    continue;
+ +                }
+ +            }
+ +        } else {
+ +            // Non-store conditionals do not need a writeback.
+ +            state->noWB = true;
+ +        }
+ +
+ +        if (!dcachePort->sendTiming(data_pkt)) {
+ +            // Need to handle becoming blocked on a store.
+ +            isStoreBlocked = true;
+ +            ++lsqCacheBlocked;
+ +            assert(retryPkt == NULL);
+ +            retryPkt = data_pkt;
+ +            lsq->setRetryTid(lsqID);
+ +        } else {
+ +            storePostSend(data_pkt);
+ +        }
+ +    }
+ +
+ +    // Not sure this should set it to 0.
+ +    usedPorts = 0;
+ +
+ +    assert(stores >= 0 && storesToWB >= 0);
+ +}
+ +
+ +/*template <class Impl>
+ +void
+ +LSQUnit<Impl>::removeMSHR(InstSeqNum seqNum)
+ +{
+ +    list<InstSeqNum>::iterator mshr_it = find(mshrSeqNums.begin(),
+ +                                              mshrSeqNums.end(),
+ +                                              seqNum);
+ +
+ +    if (mshr_it != mshrSeqNums.end()) {
+ +        mshrSeqNums.erase(mshr_it);
+ +        DPRINTF(LSQUnit, "Removing MSHR. count = %i\n",mshrSeqNums.size());
+ +    }
+ +}*/
+ +
+ +template <class Impl>
+ +void
+ +LSQUnit<Impl>::squash(const InstSeqNum &squashed_num)
+ +{
+ +    DPRINTF(LSQUnit, "Squashing until [sn:%lli]!"
+ +            "(Loads:%i Stores:%i)\n", squashed_num, loads, stores);
+ +
+ +    int load_idx = loadTail;
+ +    decrLdIdx(load_idx);
+ +
+ +    while (loads != 0 && loadQueue[load_idx]->seqNum > squashed_num) {
+ +        DPRINTF(LSQUnit,"Load Instruction PC %#x squashed, "
+ +                "[sn:%lli]\n",
+ +                loadQueue[load_idx]->readPC(),
+ +                loadQueue[load_idx]->seqNum);
+ +
+ +        if (isStalled() && load_idx == stallingLoadIdx) {
+ +            stalled = false;
+ +            stallingStoreIsn = 0;
+ +            stallingLoadIdx = 0;
+ +        }
+ +
+ +        // Clear the smart pointer to make sure it is decremented.
+ +        loadQueue[load_idx]->setSquashed();
+ +        loadQueue[load_idx] = NULL;
+ +        --loads;
+ +
+ +        // Inefficient!
+ +        loadTail = load_idx;
+ +
+ +        decrLdIdx(load_idx);
+ +        ++lsqSquashedLoads;
+ +    }
+ +
+ +    if (isLoadBlocked) {
+ +        if (squashed_num < blockedLoadSeqNum) {
+ +            isLoadBlocked = false;
+ +            loadBlockedHandled = false;
+ +            blockedLoadSeqNum = 0;
+ +        }
+ +    }
+ +
+ +    int store_idx = storeTail;
+ +    decrStIdx(store_idx);
+ +
+ +    while (stores != 0 &&
+ +           storeQueue[store_idx].inst->seqNum > squashed_num) {
+ +        // Instructions marked as can WB are already committed.
+ +        if (storeQueue[store_idx].canWB) {
+ +            break;
+ +        }
+ +
+ +        DPRINTF(LSQUnit,"Store Instruction PC %#x squashed, "
+ +                "idx:%i [sn:%lli]\n",
+ +                storeQueue[store_idx].inst->readPC(),
+ +                store_idx, storeQueue[store_idx].inst->seqNum);
+ +
+ +        // I don't think this can happen.  It should have been cleared
+ +        // by the stalling load.
+ +        if (isStalled() &&
+ +            storeQueue[store_idx].inst->seqNum == stallingStoreIsn) {
+ +            panic("Is stalled should have been cleared by stalling load!\n");
+ +            stalled = false;
+ +            stallingStoreIsn = 0;
+ +        }
+ +
+ +        // Clear the smart pointer to make sure it is decremented.
+ +        storeQueue[store_idx].inst->setSquashed();
+ +        storeQueue[store_idx].inst = NULL;
+ +        storeQueue[store_idx].canWB = 0;
+ +
+ +        storeQueue[store_idx].req = NULL;
+ +        --stores;
+ +
+ +        // Inefficient!
+ +        storeTail = store_idx;
+ +
+ +        decrStIdx(store_idx);
+ +        ++lsqSquashedStores;
+ +    }
+ +}
+ +
+ +template <class Impl>
+ +void
+ +LSQUnit<Impl>::storePostSend(Packet *pkt)
+ +{
+ +    if (isStalled() &&
+ +        storeQueue[storeWBIdx].inst->seqNum == stallingStoreIsn) {
+ +        DPRINTF(LSQUnit, "Unstalling, stalling store [sn:%lli] "
+ +                "load idx:%i\n",
+ +                stallingStoreIsn, stallingLoadIdx);
+ +        stalled = false;
+ +        stallingStoreIsn = 0;
+ +        iewStage->replayMemInst(loadQueue[stallingLoadIdx]);
+ +    }
+ +
+ +    if (!storeQueue[storeWBIdx].inst->isStoreConditional()) {
+ +        // The store is basically completed at this time. This
+ +        // only works so long as the checker doesn't try to
+ +        // verify the value in memory for stores.
+ +        storeQueue[storeWBIdx].inst->setCompleted();
+ +#if USE_CHECKER
+ +        if (cpu->checker) {
+ +            cpu->checker->verify(storeQueue[storeWBIdx].inst);
+ +        }
+ +#endif
+ +    }
+ +
+ +    if (pkt->result != Packet::Success) {
+ +        DPRINTF(LSQUnit,"D-Cache Write Miss on idx:%i!\n",
+ +                storeWBIdx);
+ +
+ +        DPRINTF(Activity, "Active st accessing mem miss [sn:%lli]\n",
+ +                storeQueue[storeWBIdx].inst->seqNum);
+ +
+ +        //mshrSeqNums.push_back(storeQueue[storeWBIdx].inst->seqNum);
+ +
+ +        //DPRINTF(LSQUnit, "Added MSHR. count = %i\n",mshrSeqNums.size());
+ +
+ +        // @todo: Increment stat here.
+ +    } else {
+ +        DPRINTF(LSQUnit,"D-Cache: Write Hit on idx:%i !\n",
+ +                storeWBIdx);
+ +
+ +        DPRINTF(Activity, "Active st accessing mem hit [sn:%lli]\n",
+ +                storeQueue[storeWBIdx].inst->seqNum);
+ +    }
+ +
+ +    incrStIdx(storeWBIdx);
+ +}
+ +
+ +template <class Impl>
+ +void
+ +LSQUnit<Impl>::writeback(DynInstPtr &inst, PacketPtr pkt)
+ +{
+ +    iewStage->wakeCPU();
+ +
+ +    // Squashed instructions do not need to complete their access.
+ +    if (inst->isSquashed()) {
+ +        iewStage->decrWb(inst->seqNum);
+ +        assert(!inst->isStore());
+ +        ++lsqIgnoredResponses;
+ +        return;
+ +    }
+ +
+ +    if (!inst->isExecuted()) {
+ +        inst->setExecuted();
+ +
+ +        // Complete access to copy data to proper place.
+ +        inst->completeAcc(pkt);
+ +    }
+ +
+ +    // Need to insert instruction into queue to commit
+ +    iewStage->instToCommit(inst);
+ +
+ +    iewStage->activityThisCycle();
+ +}
+ +
+ +template <class Impl>
+ +void
+ +LSQUnit<Impl>::completeStore(int store_idx)
+ +{
+ +    assert(storeQueue[store_idx].inst);
+ +    storeQueue[store_idx].completed = true;
+ +    --storesToWB;
+ +    // A bit conservative because a store completion may not free up entries,
+ +    // but hopefully avoids two store completions in one cycle from making
+ +    // the CPU tick twice.
+ +    cpu->activityThisCycle();
+ +
+ +    if (store_idx == storeHead) {
+ +        do {
+ +            incrStIdx(storeHead);
+ +
+ +            --stores;
+ +        } while (storeQueue[storeHead].completed &&
+ +                 storeHead != storeTail);
+ +
+ +        iewStage->updateLSQNextCycle = true;
+ +    }
+ +
+ +    DPRINTF(LSQUnit, "Completing store [sn:%lli], idx:%i, store head "
+ +            "idx:%i\n",
+ +            storeQueue[store_idx].inst->seqNum, store_idx, storeHead);
+ +
+ +    if (isStalled() &&
+ +        storeQueue[store_idx].inst->seqNum == stallingStoreIsn) {
+ +        DPRINTF(LSQUnit, "Unstalling, stalling store [sn:%lli] "
+ +                "load idx:%i\n",
+ +                stallingStoreIsn, stallingLoadIdx);
+ +        stalled = false;
+ +        stallingStoreIsn = 0;
+ +        iewStage->replayMemInst(loadQueue[stallingLoadIdx]);
+ +    }
+ +
+ +    storeQueue[store_idx].inst->setCompleted();
+ +
+ +    // Tell the checker we've completed this instruction.  Some stores
+ +    // may get reported twice to the checker, but the checker can
+ +    // handle that case.
+ +#if USE_CHECKER
+ +    if (cpu->checker) {
+ +        cpu->checker->verify(storeQueue[store_idx].inst);
+ +    }
+ +#endif
+ +}
+ +
+ +template <class Impl>
+ +void
+ +LSQUnit<Impl>::recvRetry()
+ +{
+ +    if (isStoreBlocked) {
+ +        assert(retryPkt != NULL);
+ +
+ +        if (dcachePort->sendTiming(retryPkt)) {
+ +            storePostSend(retryPkt);
+ +            retryPkt = NULL;
+ +            isStoreBlocked = false;
+ +            lsq->setRetryTid(-1);
+ +        } else {
+ +            // Still blocked!
+ +            ++lsqCacheBlocked;
+ +            lsq->setRetryTid(lsqID);
+ +        }
+ +    } else if (isLoadBlocked) {
+ +        DPRINTF(LSQUnit, "Loads squash themselves and all younger insts, "
+ +                "no need to resend packet.\n");
+ +    } else {
+ +        DPRINTF(LSQUnit, "Retry received but LSQ is no longer blocked.\n");
+ +    }
+ +}
+ +
+ +template <class Impl>
+ +inline void
+ +LSQUnit<Impl>::incrStIdx(int &store_idx)
+ +{
+ +    if (++store_idx >= SQEntries)
+ +        store_idx = 0;
+ +}
+ +
+ +template <class Impl>
+ +inline void
+ +LSQUnit<Impl>::decrStIdx(int &store_idx)
+ +{
+ +    if (--store_idx < 0)
+ +        store_idx += SQEntries;
+ +}
+ +
+ +template <class Impl>
+ +inline void
+ +LSQUnit<Impl>::incrLdIdx(int &load_idx)
+ +{
+ +    if (++load_idx >= LQEntries)
+ +        load_idx = 0;
+ +}
+ +
+ +template <class Impl>
+ +inline void
+ +LSQUnit<Impl>::decrLdIdx(int &load_idx)
+ +{
+ +    if (--load_idx < 0)
+ +        load_idx += LQEntries;
+ +}
+ +
+ +template <class Impl>
+ +void
+ +LSQUnit<Impl>::dumpInsts()
+ +{
+ +    cprintf("Load store queue: Dumping instructions.\n");
+ +    cprintf("Load queue size: %i\n", loads);
+ +    cprintf("Load queue: ");
+ +
+ +    int load_idx = loadHead;
+ +
+ +    while (load_idx != loadTail && loadQueue[load_idx]) {
+ +        cprintf("%#x ", loadQueue[load_idx]->readPC());
+ +
+ +        incrLdIdx(load_idx);
+ +    }
+ +
+ +    cprintf("Store queue size: %i\n", stores);
+ +    cprintf("Store queue: ");
+ +
+ +    int store_idx = storeHead;
+ +
+ +    while (store_idx != storeTail && storeQueue[store_idx].inst) {
+ +        cprintf("%#x ", storeQueue[store_idx].inst->readPC());
+ +
+ +        incrStIdx(store_idx);
+ +    }
+ +
+ +    cprintf("\n");
+ +}
diff --cc src/cpu/o3/mem_dep_unit_impl.hh

index 16f67a4e0219ea01a18426b1b5745890bb408024,0000000000000000000000000000000000000000..c649ca3855eeaceec58817bbe8d153e0d91cfaec

mode 100644,000000..100644
--- 1/src/cpu/o3/mem_dep_unit_impl.hh
--- /dev/null
+++ b/src/cpu/o3/mem_dep_unit_impl.hh
@@@ -1,571 -1,0 +1,574 @@@
+ +/*
+ + * Copyright (c) 2004-2006 The Regents of The University of Michigan
+ + * All rights reserved.
+ + *
+ + * Redistribution and use in source and binary forms, with or without
+ + * modification, are permitted provided that the following conditions are
+ + * met: redistributions of source code must retain the above copyright
+ + * notice, this list of conditions and the following disclaimer;
+ + * redistributions in binary form must reproduce the above copyright
+ + * notice, this list of conditions and the following disclaimer in the
+ + * documentation and/or other materials provided with the distribution;
+ + * neither the name of the copyright holders nor the names of its
+ + * contributors may be used to endorse or promote products derived from
+ + * this software without specific prior written permission.
+ + *
+ + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ + *
+ + * Authors: Kevin Lim
+ + */
+ +
+ +#include <map>
+ +
+ +#include "cpu/o3/inst_queue.hh"
+ +#include "cpu/o3/mem_dep_unit.hh"
+ +
+ +template <class MemDepPred, class Impl>
+ +MemDepUnit<MemDepPred, Impl>::MemDepUnit(Params *params)
+ +    : depPred(params->SSITSize, params->LFSTSize), loadBarrier(false),
+ +      loadBarrierSN(0), storeBarrier(false), storeBarrierSN(0), iqPtr(NULL)
+ +{
+ +    DPRINTF(MemDepUnit, "Creating MemDepUnit object.\n");
+ +}
+ +
+ +template <class MemDepPred, class Impl>
+ +MemDepUnit<MemDepPred, Impl>::~MemDepUnit()
+ +{
+ +    for (int tid=0; tid < Impl::MaxThreads; tid++) {
+ +
+ +        ListIt inst_list_it = instList[tid].begin();
+ +
+ +        MemDepHashIt hash_it;
+ +
+ +        while (!instList[tid].empty()) {
+ +            hash_it = memDepHash.find((*inst_list_it)->seqNum);
+ +
+ +            assert(hash_it != memDepHash.end());
+ +
+ +            memDepHash.erase(hash_it);
+ +
+ +            instList[tid].erase(inst_list_it++);
+ +        }
+ +    }
+ +
+ +#ifdef DEBUG
+ +    assert(MemDepEntry::memdep_count == 0);
+ +#endif
+ +}
+ +
+ +template <class MemDepPred, class Impl>
+ +std::string
+ +MemDepUnit<MemDepPred, Impl>::name() const
+ +{
+ +    return "memdepunit";
+ +}
+ +
+ +template <class MemDepPred, class Impl>
+ +void
+ +MemDepUnit<MemDepPred, Impl>::init(Params *params, int tid)
+ +{
+ +    DPRINTF(MemDepUnit, "Creating MemDepUnit %i object.\n",tid);
+ +
+ +    id = tid;
+ +
+ +    depPred.init(params->SSITSize, params->LFSTSize);
+ +}
+ +
+ +template <class MemDepPred, class Impl>
+ +void
+ +MemDepUnit<MemDepPred, Impl>::regStats()
+ +{
+ +    insertedLoads
+ +        .name(name() + ".memDep.insertedLoads")
+ +        .desc("Number of loads inserted to the mem dependence unit.");
+ +
+ +    insertedStores
+ +        .name(name() + ".memDep.insertedStores")
+ +        .desc("Number of stores inserted to the mem dependence unit.");
+ +
+ +    conflictingLoads
+ +        .name(name() + ".memDep.conflictingLoads")
+ +        .desc("Number of conflicting loads.");
+ +
+ +    conflictingStores
+ +        .name(name() + ".memDep.conflictingStores")
+ +        .desc("Number of conflicting stores.");
+ +}
+ +
+ +template <class MemDepPred, class Impl>
+ +void
+ +MemDepUnit<MemDepPred, Impl>::switchOut()
+ +{
++    assert(instList[0].empty());
++    assert(instsToReplay.empty());
++    assert(memDepHash.empty());
+ +    // Clear any state.
+ +    for (int i = 0; i < Impl::MaxThreads; ++i) {
+ +        instList[i].clear();
+ +    }
+ +    instsToReplay.clear();
+ +    memDepHash.clear();
+ +}
+ +
+ +template <class MemDepPred, class Impl>
+ +void
+ +MemDepUnit<MemDepPred, Impl>::takeOverFrom()
+ +{
+ +    // Be sure to reset all state.
+ +    loadBarrier = storeBarrier = false;
+ +    loadBarrierSN = storeBarrierSN = 0;
+ +    depPred.clear();
+ +}
+ +
+ +template <class MemDepPred, class Impl>
+ +void
+ +MemDepUnit<MemDepPred, Impl>::setIQ(InstructionQueue<Impl> *iq_ptr)
+ +{
+ +    iqPtr = iq_ptr;
+ +}
+ +
+ +template <class MemDepPred, class Impl>
+ +void
+ +MemDepUnit<MemDepPred, Impl>::insert(DynInstPtr &inst)
+ +{
+ +    unsigned tid = inst->threadNumber;
+ +
+ +    MemDepEntryPtr inst_entry = new MemDepEntry(inst);
+ +
+ +    // Add the MemDepEntry to the hash.
+ +    memDepHash.insert(
+ +        std::pair<InstSeqNum, MemDepEntryPtr>(inst->seqNum, inst_entry));
+ +#ifdef DEBUG
+ +    MemDepEntry::memdep_insert++;
+ +#endif
+ +
+ +    instList[tid].push_back(inst);
+ +
+ +    inst_entry->listIt = --(instList[tid].end());
+ +
+ +    // Check any barriers and the dependence predictor for any
+ +    // producing memrefs/stores.
+ +    InstSeqNum producing_store;
+ +    if (inst->isLoad() && loadBarrier) {
+ +        producing_store = loadBarrierSN;
+ +    } else if (inst->isStore() && storeBarrier) {
+ +        producing_store = storeBarrierSN;
+ +    } else {
+ +        producing_store = depPred.checkInst(inst->readPC());
+ +    }
+ +
+ +    MemDepEntryPtr store_entry = NULL;
+ +
+ +    // If there is a producing store, try to find the entry.
+ +    if (producing_store != 0) {
+ +        MemDepHashIt hash_it = memDepHash.find(producing_store);
+ +
+ +        if (hash_it != memDepHash.end()) {
+ +            store_entry = (*hash_it).second;
+ +        }
+ +    }
+ +
+ +    // If no store entry, then instruction can issue as soon as the registers
+ +    // are ready.
+ +    if (!store_entry) {
+ +        DPRINTF(MemDepUnit, "No dependency for inst PC "
+ +                "%#x [sn:%lli].\n", inst->readPC(), inst->seqNum);
+ +
+ +        inst_entry->memDepReady = true;
+ +
+ +        if (inst->readyToIssue()) {
+ +            inst_entry->regsReady = true;
+ +
+ +            moveToReady(inst_entry);
+ +        }
+ +    } else {
+ +        // Otherwise make the instruction dependent on the store/barrier.
+ +        DPRINTF(MemDepUnit, "Adding to dependency list; "
+ +                "inst PC %#x is dependent on [sn:%lli].\n",
+ +                inst->readPC(), producing_store);
+ +
+ +        if (inst->readyToIssue()) {
+ +            inst_entry->regsReady = true;
+ +        }
+ +
+ +        // Add this instruction to the list of dependents.
+ +        store_entry->dependInsts.push_back(inst_entry);
+ +
+ +        if (inst->isLoad()) {
+ +            ++conflictingLoads;
+ +        } else {
+ +            ++conflictingStores;
+ +        }
+ +    }
+ +
+ +    if (inst->isStore()) {
+ +        DPRINTF(MemDepUnit, "Inserting store PC %#x [sn:%lli].\n",
+ +                inst->readPC(), inst->seqNum);
+ +
+ +        depPred.insertStore(inst->readPC(), inst->seqNum, inst->threadNumber);
+ +
+ +        ++insertedStores;
+ +    } else if (inst->isLoad()) {
+ +        ++insertedLoads;
+ +    } else {
+ +        panic("Unknown type! (most likely a barrier).");
+ +    }
+ +}
+ +
+ +template <class MemDepPred, class Impl>
+ +void
+ +MemDepUnit<MemDepPred, Impl>::insertNonSpec(DynInstPtr &inst)
+ +{
+ +    unsigned tid = inst->threadNumber;
+ +
+ +    MemDepEntryPtr inst_entry = new MemDepEntry(inst);
+ +
+ +    // Insert the MemDepEntry into the hash.
+ +    memDepHash.insert(
+ +        std::pair<InstSeqNum, MemDepEntryPtr>(inst->seqNum, inst_entry));
+ +#ifdef DEBUG
+ +    MemDepEntry::memdep_insert++;
+ +#endif
+ +
+ +    // Add the instruction to the list.
+ +    instList[tid].push_back(inst);
+ +
+ +    inst_entry->listIt = --(instList[tid].end());
+ +
+ +    // Might want to turn this part into an inline function or something.
+ +    // It's shared between both insert functions.
+ +    if (inst->isStore()) {
+ +        DPRINTF(MemDepUnit, "Inserting store PC %#x [sn:%lli].\n",
+ +                inst->readPC(), inst->seqNum);
+ +
+ +        depPred.insertStore(inst->readPC(), inst->seqNum, inst->threadNumber);
+ +
+ +        ++insertedStores;
+ +    } else if (inst->isLoad()) {
+ +        ++insertedLoads;
+ +    } else {
+ +        panic("Unknown type! (most likely a barrier).");
+ +    }
+ +}
+ +
+ +template <class MemDepPred, class Impl>
+ +void
+ +MemDepUnit<MemDepPred, Impl>::insertBarrier(DynInstPtr &barr_inst)
+ +{
+ +    InstSeqNum barr_sn = barr_inst->seqNum;
+ +    // Memory barriers block loads and stores, write barriers only stores.
+ +    if (barr_inst->isMemBarrier()) {
+ +        loadBarrier = true;
+ +        loadBarrierSN = barr_sn;
+ +        storeBarrier = true;
+ +        storeBarrierSN = barr_sn;
+ +        DPRINTF(MemDepUnit, "Inserted a memory barrier\n");
+ +    } else if (barr_inst->isWriteBarrier()) {
+ +        storeBarrier = true;
+ +        storeBarrierSN = barr_sn;
+ +        DPRINTF(MemDepUnit, "Inserted a write barrier\n");
+ +    }
+ +
+ +    unsigned tid = barr_inst->threadNumber;
+ +
+ +    MemDepEntryPtr inst_entry = new MemDepEntry(barr_inst);
+ +
+ +    // Add the MemDepEntry to the hash.
+ +    memDepHash.insert(
+ +        std::pair<InstSeqNum, MemDepEntryPtr>(barr_sn, inst_entry));
+ +#ifdef DEBUG
+ +    MemDepEntry::memdep_insert++;
+ +#endif
+ +
+ +    // Add the instruction to the instruction list.
+ +    instList[tid].push_back(barr_inst);
+ +
+ +    inst_entry->listIt = --(instList[tid].end());
+ +}
+ +
+ +template <class MemDepPred, class Impl>
+ +void
+ +MemDepUnit<MemDepPred, Impl>::regsReady(DynInstPtr &inst)
+ +{
+ +    DPRINTF(MemDepUnit, "Marking registers as ready for "
+ +            "instruction PC %#x [sn:%lli].\n",
+ +            inst->readPC(), inst->seqNum);
+ +
+ +    MemDepEntryPtr inst_entry = findInHash(inst);
+ +
+ +    inst_entry->regsReady = true;
+ +
+ +    if (inst_entry->memDepReady) {
+ +        DPRINTF(MemDepUnit, "Instruction has its memory "
+ +                "dependencies resolved, adding it to the ready list.\n");
+ +
+ +        moveToReady(inst_entry);
+ +    } else {
+ +        DPRINTF(MemDepUnit, "Instruction still waiting on "
+ +                "memory dependency.\n");
+ +    }
+ +}
+ +
+ +template <class MemDepPred, class Impl>
+ +void
+ +MemDepUnit<MemDepPred, Impl>::nonSpecInstReady(DynInstPtr &inst)
+ +{
+ +    DPRINTF(MemDepUnit, "Marking non speculative "
+ +            "instruction PC %#x as ready [sn:%lli].\n",
+ +            inst->readPC(), inst->seqNum);
+ +
+ +    MemDepEntryPtr inst_entry = findInHash(inst);
+ +
+ +    moveToReady(inst_entry);
+ +}
+ +
+ +template <class MemDepPred, class Impl>
+ +void
+ +MemDepUnit<MemDepPred, Impl>::reschedule(DynInstPtr &inst)
+ +{
+ +    instsToReplay.push_back(inst);
+ +}
+ +
+ +template <class MemDepPred, class Impl>
+ +void
+ +MemDepUnit<MemDepPred, Impl>::replay(DynInstPtr &inst)
+ +{
+ +    DynInstPtr temp_inst;
+ +    bool found_inst = false;
+ +
+ +    // For now this replay function replays all waiting memory ops.
+ +    while (!instsToReplay.empty()) {
+ +        temp_inst = instsToReplay.front();
+ +
+ +        MemDepEntryPtr inst_entry = findInHash(temp_inst);
+ +
+ +        DPRINTF(MemDepUnit, "Replaying mem instruction PC %#x "
+ +                "[sn:%lli].\n",
+ +                temp_inst->readPC(), temp_inst->seqNum);
+ +
+ +        moveToReady(inst_entry);
+ +
+ +        if (temp_inst == inst) {
+ +            found_inst = true;
+ +        }
+ +
+ +        instsToReplay.pop_front();
+ +    }
+ +
+ +    assert(found_inst);
+ +}
+ +
+ +template <class MemDepPred, class Impl>
+ +void
+ +MemDepUnit<MemDepPred, Impl>::completed(DynInstPtr &inst)
+ +{
+ +    DPRINTF(MemDepUnit, "Completed mem instruction PC %#x "
+ +            "[sn:%lli].\n",
+ +            inst->readPC(), inst->seqNum);
+ +
+ +    unsigned tid = inst->threadNumber;
+ +
+ +    // Remove the instruction from the hash and the list.
+ +    MemDepHashIt hash_it = memDepHash.find(inst->seqNum);
+ +
+ +    assert(hash_it != memDepHash.end());
+ +
+ +    instList[tid].erase((*hash_it).second->listIt);
+ +
+ +    (*hash_it).second = NULL;
+ +
+ +    memDepHash.erase(hash_it);
+ +#ifdef DEBUG
+ +    MemDepEntry::memdep_erase++;
+ +#endif
+ +}
+ +
+ +template <class MemDepPred, class Impl>
+ +void
+ +MemDepUnit<MemDepPred, Impl>::completeBarrier(DynInstPtr &inst)
+ +{
+ +    wakeDependents(inst);
+ +    completed(inst);
+ +
+ +    InstSeqNum barr_sn = inst->seqNum;
+ +
+ +    if (inst->isMemBarrier()) {
+ +        assert(loadBarrier && storeBarrier);
+ +        if (loadBarrierSN == barr_sn)
+ +            loadBarrier = false;
+ +        if (storeBarrierSN == barr_sn)
+ +            storeBarrier = false;
+ +    } else if (inst->isWriteBarrier()) {
+ +        assert(storeBarrier);
+ +        if (storeBarrierSN == barr_sn)
+ +            storeBarrier = false;
+ +    }
+ +}
+ +
+ +template <class MemDepPred, class Impl>
+ +void
+ +MemDepUnit<MemDepPred, Impl>::wakeDependents(DynInstPtr &inst)
+ +{
+ +    // Only stores and barriers have dependents.
+ +    if (!inst->isStore() && !inst->isMemBarrier() && !inst->isWriteBarrier()) {
+ +        return;
+ +    }
+ +
+ +    MemDepEntryPtr inst_entry = findInHash(inst);
+ +
+ +    for (int i = 0; i < inst_entry->dependInsts.size(); ++i ) {
+ +        MemDepEntryPtr woken_inst = inst_entry->dependInsts[i];
+ +
+ +        if (!woken_inst->inst) {
+ +            // Potentially removed mem dep entries could be on this list
+ +            continue;
+ +        }
+ +
+ +        DPRINTF(MemDepUnit, "Waking up a dependent inst, "
+ +                "[sn:%lli].\n",
+ +                woken_inst->inst->seqNum);
+ +
+ +        if (woken_inst->regsReady && !woken_inst->squashed) {
+ +            moveToReady(woken_inst);
+ +        } else {
+ +            woken_inst->memDepReady = true;
+ +        }
+ +    }
+ +
+ +    inst_entry->dependInsts.clear();
+ +}
+ +
+ +template <class MemDepPred, class Impl>
+ +void
+ +MemDepUnit<MemDepPred, Impl>::squash(const InstSeqNum &squashed_num,
+ +                                     unsigned tid)
+ +{
+ +    if (!instsToReplay.empty()) {
+ +        ListIt replay_it = instsToReplay.begin();
+ +        while (replay_it != instsToReplay.end()) {
+ +            if ((*replay_it)->threadNumber == tid &&
+ +                (*replay_it)->seqNum > squashed_num) {
+ +                instsToReplay.erase(replay_it++);
+ +            } else {
+ +                ++replay_it;
+ +            }
+ +        }
+ +    }
+ +
+ +    ListIt squash_it = instList[tid].end();
+ +    --squash_it;
+ +
+ +    MemDepHashIt hash_it;
+ +
+ +    while (!instList[tid].empty() &&
+ +           (*squash_it)->seqNum > squashed_num) {
+ +
+ +        DPRINTF(MemDepUnit, "Squashing inst [sn:%lli]\n",
+ +                (*squash_it)->seqNum);
+ +
+ +        hash_it = memDepHash.find((*squash_it)->seqNum);
+ +
+ +        assert(hash_it != memDepHash.end());
+ +
+ +        (*hash_it).second->squashed = true;
+ +
+ +        (*hash_it).second = NULL;
+ +
+ +        memDepHash.erase(hash_it);
+ +#ifdef DEBUG
+ +        MemDepEntry::memdep_erase++;
+ +#endif
+ +
+ +        instList[tid].erase(squash_it--);
+ +    }
+ +
+ +    // Tell the dependency predictor to squash as well.
+ +    depPred.squash(squashed_num, tid);
+ +}
+ +
+ +template <class MemDepPred, class Impl>
+ +void
+ +MemDepUnit<MemDepPred, Impl>::violation(DynInstPtr &store_inst,
+ +                                        DynInstPtr &violating_load)
+ +{
+ +    DPRINTF(MemDepUnit, "Passing violating PCs to store sets,"
+ +            " load: %#x, store: %#x\n", violating_load->readPC(),
+ +            store_inst->readPC());
+ +    // Tell the memory dependence unit of the violation.
+ +    depPred.violation(violating_load->readPC(), store_inst->readPC());
+ +}
+ +
+ +template <class MemDepPred, class Impl>
+ +void
+ +MemDepUnit<MemDepPred, Impl>::issue(DynInstPtr &inst)
+ +{
+ +    DPRINTF(MemDepUnit, "Issuing instruction PC %#x [sn:%lli].\n",
+ +            inst->readPC(), inst->seqNum);
+ +
+ +    depPred.issued(inst->readPC(), inst->seqNum, inst->isStore());
+ +}
+ +
+ +template <class MemDepPred, class Impl>
+ +inline typename MemDepUnit<MemDepPred,Impl>::MemDepEntryPtr &
+ +MemDepUnit<MemDepPred, Impl>::findInHash(const DynInstPtr &inst)
+ +{
+ +    MemDepHashIt hash_it = memDepHash.find(inst->seqNum);
+ +
+ +    assert(hash_it != memDepHash.end());
+ +
+ +    return (*hash_it).second;
+ +}
+ +
+ +template <class MemDepPred, class Impl>
+ +inline void
+ +MemDepUnit<MemDepPred, Impl>::moveToReady(MemDepEntryPtr &woken_inst_entry)
+ +{
+ +    DPRINTF(MemDepUnit, "Adding instruction [sn:%lli] "
+ +            "to the ready list.\n", woken_inst_entry->inst->seqNum);
+ +
+ +    assert(!woken_inst_entry->squashed);
+ +
+ +    iqPtr->addReadyMemInst(woken_inst_entry->inst);
+ +}
+ +
+ +
+ +template <class MemDepPred, class Impl>
+ +void
+ +MemDepUnit<MemDepPred, Impl>::dumpLists()
+ +{
+ +    for (unsigned tid=0; tid < Impl::MaxThreads; tid++) {
+ +        cprintf("Instruction list %i size: %i\n",
+ +                tid, instList[tid].size());
+ +
+ +        ListIt inst_list_it = instList[tid].begin();
+ +        int num = 0;
+ +
+ +        while (inst_list_it != instList[tid].end()) {
+ +            cprintf("Instruction:%i\nPC:%#x\n[sn:%i]\n[tid:%i]\nIssued:%i\n"
+ +                    "Squashed:%i\n\n",
+ +                    num, (*inst_list_it)->readPC(),
+ +                    (*inst_list_it)->seqNum,
+ +                    (*inst_list_it)->threadNumber,
+ +                    (*inst_list_it)->isIssued(),
+ +                    (*inst_list_it)->isSquashed());
+ +            inst_list_it++;
+ +            ++num;
+ +        }
+ +    }
+ +
+ +    cprintf("Memory dependence hash size: %i\n", memDepHash.size());
+ +
+ +#ifdef DEBUG
+ +    cprintf("Memory dependence entries: %i\n", MemDepEntry::memdep_count);
+ +#endif
+ +}
diff --cc src/cpu/o3/rename.hh

index ba26a01ddcd4eee884c0edfb94d9d4b07a7e6b99,0000000000000000000000000000000000000000..177b9cb87930bec14d8b866ab306923868bf92fd

mode 100644,000000..100644
--- 1/src/cpu/o3/rename.hh
--- /dev/null
+++ b/src/cpu/o3/rename.hh
@@@ -1,476 -1,0 +1,478 @@@
+ +/*
+ + * Copyright (c) 2004-2006 The Regents of The University of Michigan
+ + * All rights reserved.
+ + *
+ + * Redistribution and use in source and binary forms, with or without
+ + * modification, are permitted provided that the following conditions are
+ + * met: redistributions of source code must retain the above copyright
+ + * notice, this list of conditions and the following disclaimer;
+ + * redistributions in binary form must reproduce the above copyright
+ + * notice, this list of conditions and the following disclaimer in the
+ + * documentation and/or other materials provided with the distribution;
+ + * neither the name of the copyright holders nor the names of its
+ + * contributors may be used to endorse or promote products derived from
+ + * this software without specific prior written permission.
+ + *
+ + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ + *
+ + * Authors: Kevin Lim
+ + */
+ +
+ +#ifndef __CPU_O3_RENAME_HH__
+ +#define __CPU_O3_RENAME_HH__
+ +
+ +#include <list>
+ +
+ +#include "base/statistics.hh"
+ +#include "base/timebuf.hh"
+ +
+ +/**
+ + * DefaultRename handles both single threaded and SMT rename. Its
+ + * width is specified by the parameters; each cycle it tries to rename
+ + * that many instructions. It holds onto the rename history of all
+ + * instructions with destination registers, storing the
+ + * arch. register, the new physical register, and the old physical
+ + * register, to allow for undoing of mappings if squashing happens, or
+ + * freeing up registers upon commit. Rename handles blocking if the
+ + * ROB, IQ, or LSQ is going to be full. Rename also handles barriers,
+ + * and does so by stalling on the instruction until the ROB is empty
+ + * and there are no instructions in flight to the ROB.
+ + */
+ +template<class Impl>
+ +class DefaultRename
+ +{
+ +  public:
+ +    // Typedefs from the Impl.
+ +    typedef typename Impl::CPUPol CPUPol;
+ +    typedef typename Impl::DynInstPtr DynInstPtr;
+ +    typedef typename Impl::O3CPU O3CPU;
+ +    typedef typename Impl::Params Params;
+ +
+ +    // Typedefs from the CPUPol
+ +    typedef typename CPUPol::DecodeStruct DecodeStruct;
+ +    typedef typename CPUPol::RenameStruct RenameStruct;
+ +    typedef typename CPUPol::TimeStruct TimeStruct;
+ +    typedef typename CPUPol::FreeList FreeList;
+ +    typedef typename CPUPol::RenameMap RenameMap;
+ +    // These are used only for initialization.
+ +    typedef typename CPUPol::IEW IEW;
+ +    typedef typename CPUPol::Commit Commit;
+ +
+ +    // Typedefs from the ISA.
+ +    typedef TheISA::RegIndex RegIndex;
+ +
+ +    // A list is used to queue the instructions.  Barrier insts must
+ +    // be added to the front of the list, which is the only reason for
+ +    // using a list instead of a queue. (Most other stages use a
+ +    // queue)
+ +    typedef std::list<DynInstPtr> InstQueue;
+ +    typedef typename std::list<DynInstPtr>::iterator ListIt;
+ +
+ +  public:
+ +    /** Overall rename status. Used to determine if the CPU can
+ +     * deschedule itself due to a lack of activity.
+ +     */
+ +    enum RenameStatus {
+ +        Active,
+ +        Inactive
+ +    };
+ +
+ +    /** Individual thread status. */
+ +    enum ThreadStatus {
+ +        Running,
+ +        Idle,
+ +        StartSquash,
+ +        Squashing,
+ +        Blocked,
+ +        Unblocking,
+ +        SerializeStall
+ +    };
+ +
+ +  private:
+ +    /** Rename status. */
+ +    RenameStatus _status;
+ +
+ +    /** Per-thread status. */
+ +    ThreadStatus renameStatus[Impl::MaxThreads];
+ +
+ +  public:
+ +    /** DefaultRename constructor. */
+ +    DefaultRename(Params *params);
+ +
+ +    /** Returns the name of rename. */
+ +    std::string name() const;
+ +
+ +    /** Registers statistics. */
+ +    void regStats();
+ +
+ +    /** Sets CPU pointer. */
+ +    void setCPU(O3CPU *cpu_ptr);
+ +
+ +    /** Sets the main backwards communication time buffer pointer. */
+ +    void setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr);
+ +
+ +    /** Sets pointer to time buffer used to communicate to the next stage. */
+ +    void setRenameQueue(TimeBuffer<RenameStruct> *rq_ptr);
+ +
+ +    /** Sets pointer to time buffer coming from decode. */
+ +    void setDecodeQueue(TimeBuffer<DecodeStruct> *dq_ptr);
+ +
+ +    /** Sets pointer to IEW stage. Used only for initialization. */
+ +    void setIEWStage(IEW *iew_stage)
+ +    { iew_ptr = iew_stage; }
+ +
+ +    /** Sets pointer to commit stage. Used only for initialization. */
+ +    void setCommitStage(Commit *commit_stage)
+ +    { commit_ptr = commit_stage; }
+ +
+ +  private:
+ +    /** Pointer to IEW stage. Used only for initialization. */
+ +    IEW *iew_ptr;
+ +
+ +    /** Pointer to commit stage. Used only for initialization. */
+ +    Commit *commit_ptr;
+ +
+ +  public:
+ +    /** Initializes variables for the stage. */
+ +    void initStage();
+ +
+ +    /** Sets pointer to list of active threads. */
+ +    void setActiveThreads(std::list<unsigned> *at_ptr);
+ +
+ +    /** Sets pointer to rename maps (per-thread structures). */
+ +    void setRenameMap(RenameMap rm_ptr[Impl::MaxThreads]);
+ +
+ +    /** Sets pointer to the free list. */
+ +    void setFreeList(FreeList *fl_ptr);
+ +
+ +    /** Sets pointer to the scoreboard. */
+ +    void setScoreboard(Scoreboard *_scoreboard);
+ +
+ +    /** Drains the rename stage. */
+ +    bool drain();
+ +
+ +    /** Resumes execution after a drain. */
+ +    void resume() { }
+ +
+ +    /** Switches out the rename stage. */
+ +    void switchOut();
+ +
+ +    /** Takes over from another CPU's thread. */
+ +    void takeOverFrom();
+ +
+ +    /** Squashes all instructions in a thread. */
+ +    void squash(const InstSeqNum &squash_seq_num, unsigned tid);
+ +
+ +    /** Ticks rename, which processes all input signals and attempts to rename
+ +     * as many instructions as possible.
+ +     */
+ +    void tick();
+ +
+ +    /** Debugging function used to dump history buffer of renamings. */
+ +    void dumpHistory();
+ +
+ +  private:
+ +    /** Determines what to do based on rename's current status.
+ +     * @param status_change rename() sets this variable if there was a status
+ +     * change (ie switching from blocking to unblocking).
+ +     * @param tid Thread id to rename instructions from.
+ +     */
+ +    void rename(bool &status_change, unsigned tid);
+ +
+ +    /** Renames instructions for the given thread. Also handles serializing
+ +     * instructions.
+ +     */
+ +    void renameInsts(unsigned tid);
+ +
+ +    /** Inserts unused instructions from a given thread into the skid buffer,
+ +     * to be renamed once rename unblocks.
+ +     */
+ +    void skidInsert(unsigned tid);
+ +
+ +    /** Separates instructions from decode into individual lists of instructions
+ +     * sorted by thread.
+ +     */
+ +    void sortInsts();
+ +
+ +    /** Returns if all of the skid buffers are empty. */
+ +    bool skidsEmpty();
+ +
+ +    /** Updates overall rename status based on all of the threads' statuses. */
+ +    void updateStatus();
+ +
+ +    /** Switches rename to blocking, and signals back that rename has become
+ +     * blocked.
+ +     * @return Returns true if there is a status change.
+ +     */
+ +    bool block(unsigned tid);
+ +
+ +    /** Switches rename to unblocking if the skid buffer is empty, and signals
+ +     * back that rename has unblocked.
+ +     * @return Returns true if there is a status change.
+ +     */
+ +    bool unblock(unsigned tid);
+ +
+ +    /** Executes actual squash, removing squashed instructions. */
+ +    void doSquash(const InstSeqNum &squash_seq_num, unsigned tid);
+ +
+ +    /** Removes a committed instruction's rename history. */
+ +    void removeFromHistory(InstSeqNum inst_seq_num, unsigned tid);
+ +
+ +    /** Renames the source registers of an instruction. */
+ +    inline void renameSrcRegs(DynInstPtr &inst, unsigned tid);
+ +
+ +    /** Renames the destination registers of an instruction. */
+ +    inline void renameDestRegs(DynInstPtr &inst, unsigned tid);
+ +
+ +    /** Calculates the number of free ROB entries for a specific thread. */
+ +    inline int calcFreeROBEntries(unsigned tid);
+ +
+ +    /** Calculates the number of free IQ entries for a specific thread. */
+ +    inline int calcFreeIQEntries(unsigned tid);
+ +
+ +    /** Calculates the number of free LSQ entries for a specific thread. */
+ +    inline int calcFreeLSQEntries(unsigned tid);
+ +
+ +    /** Returns the number of valid instructions coming from decode. */
+ +    unsigned validInsts();
+ +
+ +    /** Reads signals telling rename to block/unblock. */
+ +    void readStallSignals(unsigned tid);
+ +
+ +    /** Checks if any stages are telling rename to block. */
+ +    bool checkStall(unsigned tid);
+ +
+ +    /** Gets the number of free entries for a specific thread. */
+ +    void readFreeEntries(unsigned tid);
+ +
+ +    /** Checks the signals and updates the status. */
+ +    bool checkSignalsAndUpdate(unsigned tid);
+ +
+ +    /** Either serializes on the next instruction available in the InstQueue,
+ +     * or records that it must serialize on the next instruction to enter
+ +     * rename.
+ +     * @param inst_list The list of younger, unprocessed instructions for the
+ +     * thread that has the serializeAfter instruction.
+ +     * @param tid The thread id.
+ +     */
+ +    void serializeAfter(InstQueue &inst_list, unsigned tid);
+ +
+ +    /** Holds the information for each destination register rename. It holds
+ +     * the instruction's sequence number, the arch register, the old physical
+ +     * register for that arch. register, and the new physical register.
+ +     */
+ +    struct RenameHistory {
+ +        RenameHistory(InstSeqNum _instSeqNum, RegIndex _archReg,
+ +                      PhysRegIndex _newPhysReg, PhysRegIndex _prevPhysReg)
+ +            : instSeqNum(_instSeqNum), archReg(_archReg),
+ +              newPhysReg(_newPhysReg), prevPhysReg(_prevPhysReg)
+ +        {
+ +        }
+ +
+ +        /** The sequence number of the instruction that renamed. */
+ +        InstSeqNum instSeqNum;
+ +        /** The architectural register index that was renamed. */
+ +        RegIndex archReg;
+ +        /** The new physical register that the arch. register is renamed to. */
+ +        PhysRegIndex newPhysReg;
+ +        /** The old physical register that the arch. register was renamed to. */
+ +        PhysRegIndex prevPhysReg;
+ +    };
+ +
+ +    /** A per-thread list of all destination register renames, used to either
+ +     * undo rename mappings or free old physical registers.
+ +     */
+ +    std::list<RenameHistory> historyBuffer[Impl::MaxThreads];
+ +
+ +    /** Pointer to CPU. */
+ +    O3CPU *cpu;
+ +
+ +    /** Pointer to main time buffer used for backwards communication. */
+ +    TimeBuffer<TimeStruct> *timeBuffer;
+ +
+ +    /** Wire to get IEW's output from backwards time buffer. */
+ +    typename TimeBuffer<TimeStruct>::wire fromIEW;
+ +
+ +    /** Wire to get commit's output from backwards time buffer. */
+ +    typename TimeBuffer<TimeStruct>::wire fromCommit;
+ +
+ +    /** Wire to write infromation heading to previous stages. */
+ +    typename TimeBuffer<TimeStruct>::wire toDecode;
+ +
+ +    /** Rename instruction queue. */
+ +    TimeBuffer<RenameStruct> *renameQueue;
+ +
+ +    /** Wire to write any information heading to IEW. */
+ +    typename TimeBuffer<RenameStruct>::wire toIEW;
+ +
+ +    /** Decode instruction queue interface. */
+ +    TimeBuffer<DecodeStruct> *decodeQueue;
+ +
+ +    /** Wire to get decode's output from decode queue. */
+ +    typename TimeBuffer<DecodeStruct>::wire fromDecode;
+ +
+ +    /** Queue of all instructions coming from decode this cycle. */
+ +    InstQueue insts[Impl::MaxThreads];
+ +
+ +    /** Skid buffer between rename and decode. */
+ +    InstQueue skidBuffer[Impl::MaxThreads];
+ +
+ +    /** Rename map interface. */
+ +    RenameMap *renameMap[Impl::MaxThreads];
+ +
+ +    /** Free list interface. */
+ +    FreeList *freeList;
+ +
+ +    /** Pointer to the list of active threads. */
+ +    std::list<unsigned> *activeThreads;
+ +
+ +    /** Pointer to the scoreboard. */
+ +    Scoreboard *scoreboard;
+ +
+ +    /** Count of instructions in progress that have been sent off to the IQ
+ +     * and ROB, but are not yet included in their occupancy counts.
+ +     */
+ +    int instsInProgress[Impl::MaxThreads];
+ +
+ +    /** Variable that tracks if decode has written to the time buffer this
+ +     * cycle. Used to tell CPU if there is activity this cycle.
+ +     */
+ +    bool wroteToTimeBuffer;
+ +
+ +    /** Structures whose free entries impact the amount of instructions that
+ +     * can be renamed.
+ +     */
+ +    struct FreeEntries {
+ +        unsigned iqEntries;
+ +        unsigned lsqEntries;
+ +        unsigned robEntries;
+ +    };
+ +
+ +    /** Per-thread tracking of the number of free entries of back-end
+ +     * structures.
+ +     */
+ +    FreeEntries freeEntries[Impl::MaxThreads];
+ +
+ +    /** Records if the ROB is empty. In SMT mode the ROB may be dynamically
+ +     * partitioned between threads, so the ROB must tell rename when it is
+ +     * empty.
+ +     */
+ +    bool emptyROB[Impl::MaxThreads];
+ +
+ +    /** Source of possible stalls. */
+ +    struct Stalls {
+ +        bool iew;
+ +        bool commit;
+ +    };
+ +
+ +    /** Tracks which stages are telling decode to stall. */
+ +    Stalls stalls[Impl::MaxThreads];
+ +
+ +    /** The serialize instruction that rename has stalled on. */
+ +    DynInstPtr serializeInst[Impl::MaxThreads];
+ +
+ +    /** Records if rename needs to serialize on the next instruction for any
+ +     * thread.
+ +     */
+ +    bool serializeOnNextInst[Impl::MaxThreads];
+ +
+ +    /** Delay between iew and rename, in ticks. */
+ +    int iewToRenameDelay;
+ +
+ +    /** Delay between decode and rename, in ticks. */
+ +    int decodeToRenameDelay;
+ +
+ +    /** Delay between commit and rename, in ticks. */
+ +    unsigned commitToRenameDelay;
+ +
+ +    /** Rename width, in instructions. */
+ +    unsigned renameWidth;
+ +
+ +    /** Commit width, in instructions.  Used so rename knows how many
+ +     *  instructions might have freed registers in the previous cycle.
+ +     */
+ +    unsigned commitWidth;
+ +
+ +    /** The index of the instruction in the time buffer to IEW that rename is
+ +     * currently using.
+ +     */
+ +    unsigned toIEWIndex;
+ +
+ +    /** Whether or not rename needs to block this cycle. */
+ +    bool blockThisCycle;
+ +
+ +    /** The number of threads active in rename. */
+ +    unsigned numThreads;
+ +
+ +    /** The maximum skid buffer size. */
+ +    unsigned skidBufferMax;
+ +
++    PhysRegIndex maxPhysicalRegs;
++
+ +    /** Enum to record the source of a structure full stall.  Can come from
+ +     * either ROB, IQ, LSQ, and it is priortized in that order.
+ +     */
+ +    enum FullSource {
+ +        ROB,
+ +        IQ,
+ +        LSQ,
+ +        NONE
+ +    };
+ +
+ +    /** Function used to increment the stat that corresponds to the source of
+ +     * the stall.
+ +     */
+ +    inline void incrFullStat(const FullSource &source);
+ +
+ +    /** Stat for total number of cycles spent squashing. */
+ +    Stats::Scalar<> renameSquashCycles;
+ +    /** Stat for total number of cycles spent idle. */
+ +    Stats::Scalar<> renameIdleCycles;
+ +    /** Stat for total number of cycles spent blocking. */
+ +    Stats::Scalar<> renameBlockCycles;
+ +    /** Stat for total number of cycles spent stalling for a serializing inst. */
+ +    Stats::Scalar<> renameSerializeStallCycles;
+ +    /** Stat for total number of cycles spent running normally. */
+ +    Stats::Scalar<> renameRunCycles;
+ +    /** Stat for total number of cycles spent unblocking. */
+ +    Stats::Scalar<> renameUnblockCycles;
+ +    /** Stat for total number of renamed instructions. */
+ +    Stats::Scalar<> renameRenamedInsts;
+ +    /** Stat for total number of squashed instructions that rename discards. */
+ +    Stats::Scalar<> renameSquashedInsts;
+ +    /** Stat for total number of times that the ROB starts a stall in rename. */
+ +    Stats::Scalar<> renameROBFullEvents;
+ +    /** Stat for total number of times that the IQ starts a stall in rename. */
+ +    Stats::Scalar<> renameIQFullEvents;
+ +    /** Stat for total number of times that the LSQ starts a stall in rename. */
+ +    Stats::Scalar<> renameLSQFullEvents;
+ +    /** Stat for total number of times that rename runs out of free registers
+ +     * to use to rename. */
+ +    Stats::Scalar<> renameFullRegistersEvents;
+ +    /** Stat for total number of renamed destination registers. */
+ +    Stats::Scalar<> renameRenamedOperands;
+ +    /** Stat for total number of source register rename lookups. */
+ +    Stats::Scalar<> renameRenameLookups;
+ +    /** Stat for total number of committed renaming mappings. */
+ +    Stats::Scalar<> renameCommittedMaps;
+ +    /** Stat for total number of mappings that were undone due to a squash. */
+ +    Stats::Scalar<> renameUndoneMaps;
+ +    /** Number of serialize instructions handled. */
+ +    Stats::Scalar<> renamedSerializing;
+ +    /** Number of instructions marked as temporarily serializing. */
+ +    Stats::Scalar<> renamedTempSerializing;
+ +    /** Number of instructions inserted into skid buffers. */
+ +    Stats::Scalar<> renameSkidInsts;
+ +};
+ +
+ +#endif // __CPU_O3_RENAME_HH__
diff --cc src/cpu/o3/rename_impl.hh

index 782c0fe5f8c48344e3e9b2d9df82bc1fefd2ee68,0000000000000000000000000000000000000000..248d7deb671268c9a40121fc3a1ff1233827b6f8

mode 100644,000000..100644
--- 1/src/cpu/o3/rename_impl.hh
--- /dev/null
+++ b/src/cpu/o3/rename_impl.hh
@@@ -1,1320 -1,0 +1,1331 @@@
-       numThreads(params->numberOfThreads)
+ +/*
+ + * Copyright (c) 2004-2006 The Regents of The University of Michigan
+ + * All rights reserved.
+ + *
+ + * Redistribution and use in source and binary forms, with or without
+ + * modification, are permitted provided that the following conditions are
+ + * met: redistributions of source code must retain the above copyright
+ + * notice, this list of conditions and the following disclaimer;
+ + * redistributions in binary form must reproduce the above copyright
+ + * notice, this list of conditions and the following disclaimer in the
+ + * documentation and/or other materials provided with the distribution;
+ + * neither the name of the copyright holders nor the names of its
+ + * contributors may be used to endorse or promote products derived from
+ + * this software without specific prior written permission.
+ + *
+ + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ + *
+ + * Authors: Kevin Lim
+ + *          Korey Sewell
+ + */
+ +
+ +#include <list>
+ +
+ +#include "config/full_system.hh"
+ +#include "cpu/o3/rename.hh"
+ +
+ +template <class Impl>
+ +DefaultRename<Impl>::DefaultRename(Params *params)
+ +    : iewToRenameDelay(params->iewToRenameDelay),
+ +      decodeToRenameDelay(params->decodeToRenameDelay),
+ +      commitToRenameDelay(params->commitToRenameDelay),
+ +      renameWidth(params->renameWidth),
+ +      commitWidth(params->commitWidth),
++      numThreads(params->numberOfThreads),
++      maxPhysicalRegs(params->numPhysIntRegs + params->numPhysFloatRegs)
+ +{
+ +    _status = Inactive;
+ +
+ +    for (int i=0; i< numThreads; i++) {
+ +        renameStatus[i] = Idle;
+ +
+ +        freeEntries[i].iqEntries = 0;
+ +        freeEntries[i].lsqEntries = 0;
+ +        freeEntries[i].robEntries = 0;
+ +
+ +        stalls[i].iew = false;
+ +        stalls[i].commit = false;
+ +        serializeInst[i] = NULL;
+ +
+ +        instsInProgress[i] = 0;
+ +
+ +        emptyROB[i] = true;
+ +
+ +        serializeOnNextInst[i] = false;
+ +    }
+ +
+ +    // @todo: Make into a parameter.
+ +    skidBufferMax = (2 * (iewToRenameDelay * params->decodeWidth)) + renameWidth;
+ +}
+ +
+ +template <class Impl>
+ +std::string
+ +DefaultRename<Impl>::name() const
+ +{
+ +    return cpu->name() + ".rename";
+ +}
+ +
+ +template <class Impl>
+ +void
+ +DefaultRename<Impl>::regStats()
+ +{
+ +    renameSquashCycles
+ +        .name(name() + ".RENAME:SquashCycles")
+ +        .desc("Number of cycles rename is squashing")
+ +        .prereq(renameSquashCycles);
+ +    renameIdleCycles
+ +        .name(name() + ".RENAME:IdleCycles")
+ +        .desc("Number of cycles rename is idle")
+ +        .prereq(renameIdleCycles);
+ +    renameBlockCycles
+ +        .name(name() + ".RENAME:BlockCycles")
+ +        .desc("Number of cycles rename is blocking")
+ +        .prereq(renameBlockCycles);
+ +    renameSerializeStallCycles
+ +        .name(name() + ".RENAME:serializeStallCycles")
+ +        .desc("count of cycles rename stalled for serializing inst")
+ +        .flags(Stats::total);
+ +    renameRunCycles
+ +        .name(name() + ".RENAME:RunCycles")
+ +        .desc("Number of cycles rename is running")
+ +        .prereq(renameIdleCycles);
+ +    renameUnblockCycles
+ +        .name(name() + ".RENAME:UnblockCycles")
+ +        .desc("Number of cycles rename is unblocking")
+ +        .prereq(renameUnblockCycles);
+ +    renameRenamedInsts
+ +        .name(name() + ".RENAME:RenamedInsts")
+ +        .desc("Number of instructions processed by rename")
+ +        .prereq(renameRenamedInsts);
+ +    renameSquashedInsts
+ +        .name(name() + ".RENAME:SquashedInsts")
+ +        .desc("Number of squashed instructions processed by rename")
+ +        .prereq(renameSquashedInsts);
+ +    renameROBFullEvents
+ +        .name(name() + ".RENAME:ROBFullEvents")
+ +        .desc("Number of times rename has blocked due to ROB full")
+ +        .prereq(renameROBFullEvents);
+ +    renameIQFullEvents
+ +        .name(name() + ".RENAME:IQFullEvents")
+ +        .desc("Number of times rename has blocked due to IQ full")
+ +        .prereq(renameIQFullEvents);
+ +    renameLSQFullEvents
+ +        .name(name() + ".RENAME:LSQFullEvents")
+ +        .desc("Number of times rename has blocked due to LSQ full")
+ +        .prereq(renameLSQFullEvents);
+ +    renameFullRegistersEvents
+ +        .name(name() + ".RENAME:FullRegisterEvents")
+ +        .desc("Number of times there has been no free registers")
+ +        .prereq(renameFullRegistersEvents);
+ +    renameRenamedOperands
+ +        .name(name() + ".RENAME:RenamedOperands")
+ +        .desc("Number of destination operands rename has renamed")
+ +        .prereq(renameRenamedOperands);
+ +    renameRenameLookups
+ +        .name(name() + ".RENAME:RenameLookups")
+ +        .desc("Number of register rename lookups that rename has made")
+ +        .prereq(renameRenameLookups);
+ +    renameCommittedMaps
+ +        .name(name() + ".RENAME:CommittedMaps")
+ +        .desc("Number of HB maps that are committed")
+ +        .prereq(renameCommittedMaps);
+ +    renameUndoneMaps
+ +        .name(name() + ".RENAME:UndoneMaps")
+ +        .desc("Number of HB maps that are undone due to squashing")
+ +        .prereq(renameUndoneMaps);
+ +    renamedSerializing
+ +        .name(name() + ".RENAME:serializingInsts")
+ +        .desc("count of serializing insts renamed")
+ +        .flags(Stats::total)
+ +        ;
+ +    renamedTempSerializing
+ +        .name(name() + ".RENAME:tempSerializingInsts")
+ +        .desc("count of temporary serializing insts renamed")
+ +        .flags(Stats::total)
+ +        ;
+ +    renameSkidInsts
+ +        .name(name() + ".RENAME:skidInsts")
+ +        .desc("count of insts added to the skid buffer")
+ +        .flags(Stats::total)
+ +        ;
+ +}
+ +
+ +template <class Impl>
+ +void
+ +DefaultRename<Impl>::setCPU(O3CPU *cpu_ptr)
+ +{
+ +    DPRINTF(Rename, "Setting CPU pointer.\n");
+ +    cpu = cpu_ptr;
+ +}
+ +
+ +template <class Impl>
+ +void
+ +DefaultRename<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr)
+ +{
+ +    DPRINTF(Rename, "Setting time buffer pointer.\n");
+ +    timeBuffer = tb_ptr;
+ +
+ +    // Setup wire to read information from time buffer, from IEW stage.
+ +    fromIEW = timeBuffer->getWire(-iewToRenameDelay);
+ +
+ +    // Setup wire to read infromation from time buffer, from commit stage.
+ +    fromCommit = timeBuffer->getWire(-commitToRenameDelay);
+ +
+ +    // Setup wire to write information to previous stages.
+ +    toDecode = timeBuffer->getWire(0);
+ +}
+ +
+ +template <class Impl>
+ +void
+ +DefaultRename<Impl>::setRenameQueue(TimeBuffer<RenameStruct> *rq_ptr)
+ +{
+ +    DPRINTF(Rename, "Setting rename queue pointer.\n");
+ +    renameQueue = rq_ptr;
+ +
+ +    // Setup wire to write information to future stages.
+ +    toIEW = renameQueue->getWire(0);
+ +}
+ +
+ +template <class Impl>
+ +void
+ +DefaultRename<Impl>::setDecodeQueue(TimeBuffer<DecodeStruct> *dq_ptr)
+ +{
+ +    DPRINTF(Rename, "Setting decode queue pointer.\n");
+ +    decodeQueue = dq_ptr;
+ +
+ +    // Setup wire to get information from decode.
+ +    fromDecode = decodeQueue->getWire(-decodeToRenameDelay);
+ +}
+ +
+ +template <class Impl>
+ +void
+ +DefaultRename<Impl>::initStage()
+ +{
+ +    // Grab the number of free entries directly from the stages.
+ +    for (int tid=0; tid < numThreads; tid++) {
+ +        freeEntries[tid].iqEntries = iew_ptr->instQueue.numFreeEntries(tid);
+ +        freeEntries[tid].lsqEntries = iew_ptr->ldstQueue.numFreeEntries(tid);
+ +        freeEntries[tid].robEntries = commit_ptr->numROBFreeEntries(tid);
+ +        emptyROB[tid] = true;
+ +    }
+ +}
+ +
+ +template<class Impl>
+ +void
+ +DefaultRename<Impl>::setActiveThreads(std::list<unsigned> *at_ptr)
+ +{
+ +    DPRINTF(Rename, "Setting active threads list pointer.\n");
+ +    activeThreads = at_ptr;
+ +}
+ +
+ +
+ +template <class Impl>
+ +void
+ +DefaultRename<Impl>::setRenameMap(RenameMap rm_ptr[])
+ +{
+ +    DPRINTF(Rename, "Setting rename map pointers.\n");
+ +
+ +    for (int i=0; i<numThreads; i++) {
+ +        renameMap[i] = &rm_ptr[i];
+ +    }
+ +}
+ +
+ +template <class Impl>
+ +void
+ +DefaultRename<Impl>::setFreeList(FreeList *fl_ptr)
+ +{
+ +    DPRINTF(Rename, "Setting free list pointer.\n");
+ +    freeList = fl_ptr;
+ +}
+ +
+ +template<class Impl>
+ +void
+ +DefaultRename<Impl>::setScoreboard(Scoreboard *_scoreboard)
+ +{
+ +    DPRINTF(Rename, "Setting scoreboard pointer.\n");
+ +    scoreboard = _scoreboard;
+ +}
+ +
+ +template <class Impl>
+ +bool
+ +DefaultRename<Impl>::drain()
+ +{
+ +    // Rename is ready to switch out at any time.
+ +    cpu->signalDrained();
+ +    return true;
+ +}
+ +
+ +template <class Impl>
+ +void
+ +DefaultRename<Impl>::switchOut()
+ +{
+ +    // Clear any state, fix up the rename map.
+ +    for (int i = 0; i < numThreads; i++) {
+ +        typename std::list<RenameHistory>::iterator hb_it =
+ +            historyBuffer[i].begin();
+ +
+ +        while (!historyBuffer[i].empty()) {
+ +            assert(hb_it != historyBuffer[i].end());
+ +
+ +            DPRINTF(Rename, "[tid:%u]: Removing history entry with sequence "
+ +                    "number %i.\n", i, (*hb_it).instSeqNum);
+ +
+ +            // Tell the rename map to set the architected register to the
+ +            // previous physical register that it was renamed to.
+ +            renameMap[i]->setEntry(hb_it->archReg, hb_it->prevPhysReg);
+ +
+ +            // Put the renamed physical register back on the free list.
+ +            freeList->addReg(hb_it->newPhysReg);
+ +
++            // Be sure to mark its register as ready if it's a misc register.
++            if (hb_it->newPhysReg >= maxPhysicalRegs) {
++                scoreboard->setReg(hb_it->newPhysReg);
++            }
++
+ +            historyBuffer[i].erase(hb_it++);
+ +        }
+ +        insts[i].clear();
+ +        skidBuffer[i].clear();
+ +    }
+ +}
+ +
+ +template <class Impl>
+ +void
+ +DefaultRename<Impl>::takeOverFrom()
+ +{
+ +    _status = Inactive;
+ +    initStage();
+ +
+ +    // Reset all state prior to taking over from the other CPU.
+ +    for (int i=0; i< numThreads; i++) {
+ +        renameStatus[i] = Idle;
+ +
+ +        stalls[i].iew = false;
+ +        stalls[i].commit = false;
+ +        serializeInst[i] = NULL;
+ +
+ +        instsInProgress[i] = 0;
+ +
+ +        emptyROB[i] = true;
+ +
+ +        serializeOnNextInst[i] = false;
+ +    }
+ +}
+ +
+ +template <class Impl>
+ +void
+ +DefaultRename<Impl>::squash(const InstSeqNum &squash_seq_num, unsigned tid)
+ +{
+ +    DPRINTF(Rename, "[tid:%u]: Squashing instructions.\n",tid);
+ +
+ +    // Clear the stall signal if rename was blocked or unblocking before.
+ +    // If it still needs to block, the blocking should happen the next
+ +    // cycle and there should be space to hold everything due to the squash.
+ +    if (renameStatus[tid] == Blocked ||
+ +        renameStatus[tid] == Unblocking ||
+ +        renameStatus[tid] == SerializeStall) {
+ +
+ +        toDecode->renameUnblock[tid] = 1;
+ +
+ +        serializeInst[tid] = NULL;
+ +    }
+ +
+ +    // Set the status to Squashing.
+ +    renameStatus[tid] = Squashing;
+ +
+ +    // Squash any instructions from decode.
+ +    unsigned squashCount = 0;
+ +
+ +    for (int i=0; i<fromDecode->size; i++) {
+ +        if (fromDecode->insts[i]->threadNumber == tid &&
+ +            fromDecode->insts[i]->seqNum > squash_seq_num) {
+ +            fromDecode->insts[i]->setSquashed();
+ +            wroteToTimeBuffer = true;
+ +            squashCount++;
+ +        }
+ +
+ +    }
+ +
+ +    // Clear the instruction list and skid buffer in case they have any
+ +    // insts in them. Since we support multiple ISAs, we cant just:
+ +    // "insts[tid].clear();" or "skidBuffer[tid].clear()" since there is
+ +    // a possible delay slot inst for different architectures
+ +    // insts[tid].clear();
+ +#if ISA_HAS_DELAY_SLOT
+ +    DPRINTF(Rename, "[tid:%i] Squashing incoming decode instructions until "
+ +            "[sn:%i].\n",tid, squash_seq_num);
+ +    ListIt ilist_it = insts[tid].begin();
+ +    while (ilist_it != insts[tid].end()) {
+ +        if ((*ilist_it)->seqNum > squash_seq_num) {
+ +            (*ilist_it)->setSquashed();
+ +            DPRINTF(Rename, "Squashing incoming decode instruction, "
+ +                    "[tid:%i] [sn:%i] PC %08p.\n", tid, (*ilist_it)->seqNum, (*ilist_it)->PC);
+ +        }
+ +        ilist_it++;
+ +    }
+ +#else
+ +    insts[tid].clear();
+ +#endif
+ +
+ +    // Clear the skid buffer in case it has any data in it.
+ +    // See comments above.
+ +    //     skidBuffer[tid].clear();
+ +#if ISA_HAS_DELAY_SLOT
+ +    DPRINTF(Rename, "[tid:%i] Squashing incoming skidbuffer instructions "
+ +            "until [sn:%i].\n", tid, squash_seq_num);
+ +    ListIt slist_it = skidBuffer[tid].begin();
+ +    while (slist_it != skidBuffer[tid].end()) {
+ +        if ((*slist_it)->seqNum > squash_seq_num) {
+ +            (*slist_it)->setSquashed();
+ +            DPRINTF(Rename, "Squashing skidbuffer instruction, [tid:%i] [sn:%i]"
+ +                    "PC %08p.\n", tid, (*slist_it)->seqNum, (*slist_it)->PC);
+ +        }
+ +        slist_it++;
+ +    }
+ +#else
+ +    skidBuffer[tid].clear();
+ +#endif
+ +    doSquash(squash_seq_num, tid);
+ +}
+ +
+ +template <class Impl>
+ +void
+ +DefaultRename<Impl>::tick()
+ +{
+ +    wroteToTimeBuffer = false;
+ +
+ +    blockThisCycle = false;
+ +
+ +    bool status_change = false;
+ +
+ +    toIEWIndex = 0;
+ +
+ +    sortInsts();
+ +
+ +    std::list<unsigned>::iterator threads = (*activeThreads).begin();
+ +
+ +    // Check stall and squash signals.
+ +    while (threads != (*activeThreads).end()) {
+ +        unsigned tid = *threads++;
+ +
+ +        DPRINTF(Rename, "Processing [tid:%i]\n", tid);
+ +
+ +        status_change = checkSignalsAndUpdate(tid) || status_change;
+ +
+ +        rename(status_change, tid);
+ +    }
+ +
+ +    if (status_change) {
+ +        updateStatus();
+ +    }
+ +
+ +    if (wroteToTimeBuffer) {
+ +        DPRINTF(Activity, "Activity this cycle.\n");
+ +        cpu->activityThisCycle();
+ +    }
+ +
+ +    threads = (*activeThreads).begin();
+ +
+ +    while (threads != (*activeThreads).end()) {
+ +        unsigned tid = *threads++;
+ +
+ +        // If we committed this cycle then doneSeqNum will be > 0
+ +        if (fromCommit->commitInfo[tid].doneSeqNum != 0 &&
+ +            !fromCommit->commitInfo[tid].squash &&
+ +            renameStatus[tid] != Squashing) {
+ +
+ +            removeFromHistory(fromCommit->commitInfo[tid].doneSeqNum,
+ +                                  tid);
+ +        }
+ +    }
+ +
+ +    // @todo: make into updateProgress function
+ +    for (int tid=0; tid < numThreads; tid++) {
+ +        instsInProgress[tid] -= fromIEW->iewInfo[tid].dispatched;
+ +
+ +        assert(instsInProgress[tid] >=0);
+ +    }
+ +
+ +}
+ +
+ +template<class Impl>
+ +void
+ +DefaultRename<Impl>::rename(bool &status_change, unsigned tid)
+ +{
+ +    // If status is Running or idle,
+ +    //     call renameInsts()
+ +    // If status is Unblocking,
+ +    //     buffer any instructions coming from decode
+ +    //     continue trying to empty skid buffer
+ +    //     check if stall conditions have passed
+ +
+ +    if (renameStatus[tid] == Blocked) {
+ +        ++renameBlockCycles;
+ +    } else if (renameStatus[tid] == Squashing) {
+ +        ++renameSquashCycles;
+ +    } else if (renameStatus[tid] == SerializeStall) {
+ +        ++renameSerializeStallCycles;
+ +    }
+ +
+ +    if (renameStatus[tid] == Running ||
+ +        renameStatus[tid] == Idle) {
+ +        DPRINTF(Rename, "[tid:%u]: Not blocked, so attempting to run "
+ +                "stage.\n", tid);
+ +
+ +        renameInsts(tid);
+ +    } else if (renameStatus[tid] == Unblocking) {
+ +        renameInsts(tid);
+ +
+ +        if (validInsts()) {
+ +            // Add the current inputs to the skid buffer so they can be
+ +            // reprocessed when this stage unblocks.
+ +            skidInsert(tid);
+ +        }
+ +
+ +        // If we switched over to blocking, then there's a potential for
+ +        // an overall status change.
+ +        status_change = unblock(tid) || status_change || blockThisCycle;
+ +    }
+ +}
+ +
+ +template <class Impl>
+ +void
+ +DefaultRename<Impl>::renameInsts(unsigned tid)
+ +{
+ +    // Instructions can be either in the skid buffer or the queue of
+ +    // instructions coming from decode, depending on the status.
+ +    int insts_available = renameStatus[tid] == Unblocking ?
+ +        skidBuffer[tid].size() : insts[tid].size();
+ +
+ +    // Check the decode queue to see if instructions are available.
+ +    // If there are no available instructions to rename, then do nothing.
+ +    if (insts_available == 0) {
+ +        DPRINTF(Rename, "[tid:%u]: Nothing to do, breaking out early.\n",
+ +                tid);
+ +        // Should I change status to idle?
+ +        ++renameIdleCycles;
+ +        return;
+ +    } else if (renameStatus[tid] == Unblocking) {
+ +        ++renameUnblockCycles;
+ +    } else if (renameStatus[tid] == Running) {
+ +        ++renameRunCycles;
+ +    }
+ +
+ +    DynInstPtr inst;
+ +
+ +    // Will have to do a different calculation for the number of free
+ +    // entries.
+ +    int free_rob_entries = calcFreeROBEntries(tid);
+ +    int free_iq_entries  = calcFreeIQEntries(tid);
+ +    int free_lsq_entries = calcFreeLSQEntries(tid);
+ +    int min_free_entries = free_rob_entries;
+ +
+ +    FullSource source = ROB;
+ +
+ +    if (free_iq_entries < min_free_entries) {
+ +        min_free_entries = free_iq_entries;
+ +        source = IQ;
+ +    }
+ +
+ +    if (free_lsq_entries < min_free_entries) {
+ +        min_free_entries = free_lsq_entries;
+ +        source = LSQ;
+ +    }
+ +
+ +    // Check if there's any space left.
+ +    if (min_free_entries <= 0) {
+ +        DPRINTF(Rename, "[tid:%u]: Blocking due to no free ROB/IQ/LSQ "
+ +                "entries.\n"
+ +                "ROB has %i free entries.\n"
+ +                "IQ has %i free entries.\n"
+ +                "LSQ has %i free entries.\n",
+ +                tid,
+ +                free_rob_entries,
+ +                free_iq_entries,
+ +                free_lsq_entries);
+ +
+ +        blockThisCycle = true;
+ +
+ +        block(tid);
+ +
+ +        incrFullStat(source);
+ +
+ +        return;
+ +    } else if (min_free_entries < insts_available) {
+ +        DPRINTF(Rename, "[tid:%u]: Will have to block this cycle."
+ +                "%i insts available, but only %i insts can be "
+ +                "renamed due to ROB/IQ/LSQ limits.\n",
+ +                tid, insts_available, min_free_entries);
+ +
+ +        insts_available = min_free_entries;
+ +
+ +        blockThisCycle = true;
+ +
+ +        incrFullStat(source);
+ +    }
+ +
+ +    InstQueue &insts_to_rename = renameStatus[tid] == Unblocking ?
+ +        skidBuffer[tid] : insts[tid];
+ +
+ +    DPRINTF(Rename, "[tid:%u]: %i available instructions to "
+ +            "send iew.\n", tid, insts_available);
+ +
+ +    DPRINTF(Rename, "[tid:%u]: %i insts pipelining from Rename | %i insts "
+ +            "dispatched to IQ last cycle.\n",
+ +            tid, instsInProgress[tid], fromIEW->iewInfo[tid].dispatched);
+ +
+ +    // Handle serializing the next instruction if necessary.
+ +    if (serializeOnNextInst[tid]) {
+ +        if (emptyROB[tid] && instsInProgress[tid] == 0) {
+ +            // ROB already empty; no need to serialize.
+ +            serializeOnNextInst[tid] = false;
+ +        } else if (!insts_to_rename.empty()) {
+ +            insts_to_rename.front()->setSerializeBefore();
+ +        }
+ +    }
+ +
+ +    int renamed_insts = 0;
+ +
+ +    while (insts_available > 0 &&  toIEWIndex < renameWidth) {
+ +        DPRINTF(Rename, "[tid:%u]: Sending instructions to IEW.\n", tid);
+ +
+ +        assert(!insts_to_rename.empty());
+ +
+ +        inst = insts_to_rename.front();
+ +
+ +        insts_to_rename.pop_front();
+ +
+ +        if (renameStatus[tid] == Unblocking) {
+ +            DPRINTF(Rename,"[tid:%u]: Removing [sn:%lli] PC:%#x from rename "
+ +                    "skidBuffer\n",
+ +                    tid, inst->seqNum, inst->readPC());
+ +        }
+ +
+ +        if (inst->isSquashed()) {
+ +            DPRINTF(Rename, "[tid:%u]: instruction %i with PC %#x is "
+ +                    "squashed, skipping.\n",
+ +                    tid, inst->seqNum, inst->readPC());
+ +
+ +            ++renameSquashedInsts;
+ +
+ +            // Decrement how many instructions are available.
+ +            --insts_available;
+ +
+ +            continue;
+ +        }
+ +
+ +        DPRINTF(Rename, "[tid:%u]: Processing instruction [sn:%lli] with "
+ +                "PC %#x.\n",
+ +                tid, inst->seqNum, inst->readPC());
+ +
+ +        // Handle serializeAfter/serializeBefore instructions.
+ +        // serializeAfter marks the next instruction as serializeBefore.
+ +        // serializeBefore makes the instruction wait in rename until the ROB
+ +        // is empty.
+ +
+ +        // In this model, IPR accesses are serialize before
+ +        // instructions, and store conditionals are serialize after
+ +        // instructions.  This is mainly due to lack of support for
+ +        // out-of-order operations of either of those classes of
+ +        // instructions.
+ +        if ((inst->isIprAccess() || inst->isSerializeBefore()) &&
+ +            !inst->isSerializeHandled()) {
+ +            DPRINTF(Rename, "Serialize before instruction encountered.\n");
+ +
+ +            if (!inst->isTempSerializeBefore()) {
+ +                renamedSerializing++;
+ +                inst->setSerializeHandled();
+ +            } else {
+ +                renamedTempSerializing++;
+ +            }
+ +
+ +            // Change status over to SerializeStall so that other stages know
+ +            // what this is blocked on.
+ +            renameStatus[tid] = SerializeStall;
+ +
+ +            serializeInst[tid] = inst;
+ +
+ +            blockThisCycle = true;
+ +
+ +            break;
+ +        } else if ((inst->isStoreConditional() || inst->isSerializeAfter()) &&
+ +                   !inst->isSerializeHandled()) {
+ +            DPRINTF(Rename, "Serialize after instruction encountered.\n");
+ +
+ +            renamedSerializing++;
+ +
+ +            inst->setSerializeHandled();
+ +
+ +            serializeAfter(insts_to_rename, tid);
+ +        }
+ +
+ +        // Check here to make sure there are enough destination registers
+ +        // to rename to.  Otherwise block.
+ +        if (renameMap[tid]->numFreeEntries() < inst->numDestRegs()) {
+ +            DPRINTF(Rename, "Blocking due to lack of free "
+ +                    "physical registers to rename to.\n");
+ +            blockThisCycle = true;
+ +
+ +            ++renameFullRegistersEvents;
+ +
+ +            break;
+ +        }
+ +
+ +        renameSrcRegs(inst, inst->threadNumber);
+ +
+ +        renameDestRegs(inst, inst->threadNumber);
+ +
+ +        ++renamed_insts;
+ +
+ +        // Put instruction in rename queue.
+ +        toIEW->insts[toIEWIndex] = inst;
+ +        ++(toIEW->size);
+ +
+ +        // Increment which instruction we're on.
+ +        ++toIEWIndex;
+ +
+ +        // Decrement how many instructions are available.
+ +        --insts_available;
+ +    }
+ +
+ +    instsInProgress[tid] += renamed_insts;
+ +    renameRenamedInsts += renamed_insts;
+ +
+ +    // If we wrote to the time buffer, record this.
+ +    if (toIEWIndex) {
+ +        wroteToTimeBuffer = true;
+ +    }
+ +
+ +    // Check if there's any instructions left that haven't yet been renamed.
+ +    // If so then block.
+ +    if (insts_available) {
+ +        blockThisCycle = true;
+ +    }
+ +
+ +    if (blockThisCycle) {
+ +        block(tid);
+ +        toDecode->renameUnblock[tid] = false;
+ +    }
+ +}
+ +
+ +template<class Impl>
+ +void
+ +DefaultRename<Impl>::skidInsert(unsigned tid)
+ +{
+ +    DynInstPtr inst = NULL;
+ +
+ +    while (!insts[tid].empty()) {
+ +        inst = insts[tid].front();
+ +
+ +        insts[tid].pop_front();
+ +
+ +        assert(tid == inst->threadNumber);
+ +
+ +        DPRINTF(Rename, "[tid:%u]: Inserting [sn:%lli] PC:%#x into Rename "
+ +                "skidBuffer\n", tid, inst->seqNum, inst->readPC());
+ +
+ +        ++renameSkidInsts;
+ +
+ +        skidBuffer[tid].push_back(inst);
+ +    }
+ +
+ +    if (skidBuffer[tid].size() > skidBufferMax)
+ +        panic("Skidbuffer Exceeded Max Size");
+ +}
+ +
+ +template <class Impl>
+ +void
+ +DefaultRename<Impl>::sortInsts()
+ +{
+ +    int insts_from_decode = fromDecode->size;
+ +#ifdef DEBUG
+ +#if !ISA_HAS_DELAY_SLOT
+ +    for (int i=0; i < numThreads; i++)
+ +        assert(insts[i].empty());
+ +#endif
+ +#endif
+ +    for (int i = 0; i < insts_from_decode; ++i) {
+ +        DynInstPtr inst = fromDecode->insts[i];
+ +        insts[inst->threadNumber].push_back(inst);
+ +    }
+ +}
+ +
+ +template<class Impl>
+ +bool
+ +DefaultRename<Impl>::skidsEmpty()
+ +{
+ +    std::list<unsigned>::iterator threads = (*activeThreads).begin();
+ +
+ +    while (threads != (*activeThreads).end()) {
+ +        if (!skidBuffer[*threads++].empty())
+ +            return false;
+ +    }
+ +
+ +    return true;
+ +}
+ +
+ +template<class Impl>
+ +void
+ +DefaultRename<Impl>::updateStatus()
+ +{
+ +    bool any_unblocking = false;
+ +
+ +    std::list<unsigned>::iterator threads = (*activeThreads).begin();
+ +
+ +    threads = (*activeThreads).begin();
+ +
+ +    while (threads != (*activeThreads).end()) {
+ +        unsigned tid = *threads++;
+ +
+ +        if (renameStatus[tid] == Unblocking) {
+ +            any_unblocking = true;
+ +            break;
+ +        }
+ +    }
+ +
+ +    // Rename will have activity if it's unblocking.
+ +    if (any_unblocking) {
+ +        if (_status == Inactive) {
+ +            _status = Active;
+ +
+ +            DPRINTF(Activity, "Activating stage.\n");
+ +
+ +            cpu->activateStage(O3CPU::RenameIdx);
+ +        }
+ +    } else {
+ +        // If it's not unblocking, then rename will not have any internal
+ +        // activity.  Switch it to inactive.
+ +        if (_status == Active) {
+ +            _status = Inactive;
+ +            DPRINTF(Activity, "Deactivating stage.\n");
+ +
+ +            cpu->deactivateStage(O3CPU::RenameIdx);
+ +        }
+ +    }
+ +}
+ +
+ +template <class Impl>
+ +bool
+ +DefaultRename<Impl>::block(unsigned tid)
+ +{
+ +    DPRINTF(Rename, "[tid:%u]: Blocking.\n", tid);
+ +
+ +    // Add the current inputs onto the skid buffer, so they can be
+ +    // reprocessed when this stage unblocks.
+ +    skidInsert(tid);
+ +
+ +    // Only signal backwards to block if the previous stages do not think
+ +    // rename is already blocked.
+ +    if (renameStatus[tid] != Blocked) {
+ +        if (renameStatus[tid] != Unblocking) {
+ +            toDecode->renameBlock[tid] = true;
+ +            toDecode->renameUnblock[tid] = false;
+ +            wroteToTimeBuffer = true;
+ +        }
+ +
+ +        // Rename can not go from SerializeStall to Blocked, otherwise
+ +        // it would not know to complete the serialize stall.
+ +        if (renameStatus[tid] != SerializeStall) {
+ +            // Set status to Blocked.
+ +            renameStatus[tid] = Blocked;
+ +            return true;
+ +        }
+ +    }
+ +
+ +    return false;
+ +}
+ +
+ +template <class Impl>
+ +bool
+ +DefaultRename<Impl>::unblock(unsigned tid)
+ +{
+ +    DPRINTF(Rename, "[tid:%u]: Trying to unblock.\n", tid);
+ +
+ +    // Rename is done unblocking if the skid buffer is empty.
+ +    if (skidBuffer[tid].empty() && renameStatus[tid] != SerializeStall) {
+ +
+ +        DPRINTF(Rename, "[tid:%u]: Done unblocking.\n", tid);
+ +
+ +        toDecode->renameUnblock[tid] = true;
+ +        wroteToTimeBuffer = true;
+ +
+ +        renameStatus[tid] = Running;
+ +        return true;
+ +    }
+ +
+ +    return false;
+ +}
+ +
+ +template <class Impl>
+ +void
+ +DefaultRename<Impl>::doSquash(const InstSeqNum &squashed_seq_num, unsigned tid)
+ +{
+ +    typename std::list<RenameHistory>::iterator hb_it =
+ +        historyBuffer[tid].begin();
+ +
+ +    // After a syscall squashes everything, the history buffer may be empty
+ +    // but the ROB may still be squashing instructions.
+ +    if (historyBuffer[tid].empty()) {
+ +        return;
+ +    }
+ +
+ +    // Go through the most recent instructions, undoing the mappings
+ +    // they did and freeing up the registers.
+ +    while (!historyBuffer[tid].empty() &&
+ +           (*hb_it).instSeqNum > squashed_seq_num) {
+ +        assert(hb_it != historyBuffer[tid].end());
+ +
+ +        DPRINTF(Rename, "[tid:%u]: Removing history entry with sequence "
+ +                "number %i.\n", tid, (*hb_it).instSeqNum);
+ +
+ +        // Tell the rename map to set the architected register to the
+ +        // previous physical register that it was renamed to.
+ +        renameMap[tid]->setEntry(hb_it->archReg, hb_it->prevPhysReg);
+ +
+ +        // Put the renamed physical register back on the free list.
+ +        freeList->addReg(hb_it->newPhysReg);
+ +
++        // Be sure to mark its register as ready if it's a misc register.
++        if (hb_it->newPhysReg >= maxPhysicalRegs) {
++            scoreboard->setReg(hb_it->newPhysReg);
++        }
++
+ +        historyBuffer[tid].erase(hb_it++);
+ +
+ +        ++renameUndoneMaps;
+ +    }
+ +}
+ +
+ +template<class Impl>
+ +void
+ +DefaultRename<Impl>::removeFromHistory(InstSeqNum inst_seq_num, unsigned tid)
+ +{
+ +    DPRINTF(Rename, "[tid:%u]: Removing a committed instruction from the "
+ +            "history buffer %u (size=%i), until [sn:%lli].\n",
+ +            tid, tid, historyBuffer[tid].size(), inst_seq_num);
+ +
+ +    typename std::list<RenameHistory>::iterator hb_it =
+ +        historyBuffer[tid].end();
+ +
+ +    --hb_it;
+ +
+ +    if (historyBuffer[tid].empty()) {
+ +        DPRINTF(Rename, "[tid:%u]: History buffer is empty.\n", tid);
+ +        return;
+ +    } else if (hb_it->instSeqNum > inst_seq_num) {
+ +        DPRINTF(Rename, "[tid:%u]: Old sequence number encountered.  Ensure "
+ +                "that a syscall happened recently.\n", tid);
+ +        return;
+ +    }
+ +
+ +    // Commit all the renames up until (and including) the committed sequence
+ +    // number. Some or even all of the committed instructions may not have
+ +    // rename histories if they did not have destination registers that were
+ +    // renamed.
+ +    while (!historyBuffer[tid].empty() &&
+ +           hb_it != historyBuffer[tid].end() &&
+ +           (*hb_it).instSeqNum <= inst_seq_num) {
+ +
+ +        DPRINTF(Rename, "[tid:%u]: Freeing up older rename of reg %i, "
+ +                "[sn:%lli].\n",
+ +                tid, (*hb_it).prevPhysReg, (*hb_it).instSeqNum);
+ +
+ +        freeList->addReg((*hb_it).prevPhysReg);
+ +        ++renameCommittedMaps;
+ +
+ +        historyBuffer[tid].erase(hb_it--);
+ +    }
+ +}
+ +
+ +template <class Impl>
+ +inline void
+ +DefaultRename<Impl>::renameSrcRegs(DynInstPtr &inst,unsigned tid)
+ +{
+ +    assert(renameMap[tid] != 0);
+ +
+ +    unsigned num_src_regs = inst->numSrcRegs();
+ +
+ +    // Get the architectual register numbers from the source and
+ +    // destination operands, and redirect them to the right register.
+ +    // Will need to mark dependencies though.
+ +    for (int src_idx = 0; src_idx < num_src_regs; src_idx++) {
+ +        RegIndex src_reg = inst->srcRegIdx(src_idx);
+ +
+ +        // Look up the source registers to get the phys. register they've
+ +        // been renamed to, and set the sources to those registers.
+ +        PhysRegIndex renamed_reg = renameMap[tid]->lookup(src_reg);
+ +
+ +        DPRINTF(Rename, "[tid:%u]: Looking up arch reg %i, got "
+ +                "physical reg %i.\n", tid, (int)src_reg,
+ +                (int)renamed_reg);
+ +
+ +        inst->renameSrcReg(src_idx, renamed_reg);
+ +
+ +        // See if the register is ready or not.
+ +        if (scoreboard->getReg(renamed_reg) == true) {
+ +            DPRINTF(Rename, "[tid:%u]: Register is ready.\n", tid);
+ +
+ +            inst->markSrcRegReady(src_idx);
+ +        }
+ +
+ +        ++renameRenameLookups;
+ +    }
+ +}
+ +
+ +template <class Impl>
+ +inline void
+ +DefaultRename<Impl>::renameDestRegs(DynInstPtr &inst,unsigned tid)
+ +{
+ +    typename RenameMap::RenameInfo rename_result;
+ +
+ +    unsigned num_dest_regs = inst->numDestRegs();
+ +
+ +    // Rename the destination registers.
+ +    for (int dest_idx = 0; dest_idx < num_dest_regs; dest_idx++) {
+ +        RegIndex dest_reg = inst->destRegIdx(dest_idx);
+ +
+ +        // Get the physical register that the destination will be
+ +        // renamed to.
+ +        rename_result = renameMap[tid]->rename(dest_reg);
+ +
+ +        //Mark Scoreboard entry as not ready
+ +        scoreboard->unsetReg(rename_result.first);
+ +
+ +        DPRINTF(Rename, "[tid:%u]: Renaming arch reg %i to physical "
+ +                "reg %i.\n", tid, (int)dest_reg,
+ +                (int)rename_result.first);
+ +
+ +        // Record the rename information so that a history can be kept.
+ +        RenameHistory hb_entry(inst->seqNum, dest_reg,
+ +                               rename_result.first,
+ +                               rename_result.second);
+ +
+ +        historyBuffer[tid].push_front(hb_entry);
+ +
+ +        DPRINTF(Rename, "[tid:%u]: Adding instruction to history buffer "
+ +                "(size=%i), [sn:%lli].\n",tid,
+ +                historyBuffer[tid].size(),
+ +                (*historyBuffer[tid].begin()).instSeqNum);
+ +
+ +        // Tell the instruction to rename the appropriate destination
+ +        // register (dest_idx) to the new physical register
+ +        // (rename_result.first), and record the previous physical
+ +        // register that the same logical register was renamed to
+ +        // (rename_result.second).
+ +        inst->renameDestReg(dest_idx,
+ +                            rename_result.first,
+ +                            rename_result.second);
+ +
+ +        ++renameRenamedOperands;
+ +    }
+ +}
+ +
+ +template <class Impl>
+ +inline int
+ +DefaultRename<Impl>::calcFreeROBEntries(unsigned tid)
+ +{
+ +    int num_free = freeEntries[tid].robEntries -
+ +                  (instsInProgress[tid] - fromIEW->iewInfo[tid].dispatched);
+ +
+ +    //DPRINTF(Rename,"[tid:%i]: %i rob free\n",tid,num_free);
+ +
+ +    return num_free;
+ +}
+ +
+ +template <class Impl>
+ +inline int
+ +DefaultRename<Impl>::calcFreeIQEntries(unsigned tid)
+ +{
+ +    int num_free = freeEntries[tid].iqEntries -
+ +                  (instsInProgress[tid] - fromIEW->iewInfo[tid].dispatched);
+ +
+ +    //DPRINTF(Rename,"[tid:%i]: %i iq free\n",tid,num_free);
+ +
+ +    return num_free;
+ +}
+ +
+ +template <class Impl>
+ +inline int
+ +DefaultRename<Impl>::calcFreeLSQEntries(unsigned tid)
+ +{
+ +    int num_free = freeEntries[tid].lsqEntries -
+ +                  (instsInProgress[tid] - fromIEW->iewInfo[tid].dispatchedToLSQ);
+ +
+ +    //DPRINTF(Rename,"[tid:%i]: %i lsq free\n",tid,num_free);
+ +
+ +    return num_free;
+ +}
+ +
+ +template <class Impl>
+ +unsigned
+ +DefaultRename<Impl>::validInsts()
+ +{
+ +    unsigned inst_count = 0;
+ +
+ +    for (int i=0; i<fromDecode->size; i++) {
+ +        if (!fromDecode->insts[i]->isSquashed())
+ +            inst_count++;
+ +    }
+ +
+ +    return inst_count;
+ +}
+ +
+ +template <class Impl>
+ +void
+ +DefaultRename<Impl>::readStallSignals(unsigned tid)
+ +{
+ +    if (fromIEW->iewBlock[tid]) {
+ +        stalls[tid].iew = true;
+ +    }
+ +
+ +    if (fromIEW->iewUnblock[tid]) {
+ +        assert(stalls[tid].iew);
+ +        stalls[tid].iew = false;
+ +    }
+ +
+ +    if (fromCommit->commitBlock[tid]) {
+ +        stalls[tid].commit = true;
+ +    }
+ +
+ +    if (fromCommit->commitUnblock[tid]) {
+ +        assert(stalls[tid].commit);
+ +        stalls[tid].commit = false;
+ +    }
+ +}
+ +
+ +template <class Impl>
+ +bool
+ +DefaultRename<Impl>::checkStall(unsigned tid)
+ +{
+ +    bool ret_val = false;
+ +
+ +    if (stalls[tid].iew) {
+ +        DPRINTF(Rename,"[tid:%i]: Stall from IEW stage detected.\n", tid);
+ +        ret_val = true;
+ +    } else if (stalls[tid].commit) {
+ +        DPRINTF(Rename,"[tid:%i]: Stall from Commit stage detected.\n", tid);
+ +        ret_val = true;
+ +    } else if (calcFreeROBEntries(tid) <= 0) {
+ +        DPRINTF(Rename,"[tid:%i]: Stall: ROB has 0 free entries.\n", tid);
+ +        ret_val = true;
+ +    } else if (calcFreeIQEntries(tid) <= 0) {
+ +        DPRINTF(Rename,"[tid:%i]: Stall: IQ has 0 free entries.\n", tid);
+ +        ret_val = true;
+ +    } else if (calcFreeLSQEntries(tid) <= 0) {
+ +        DPRINTF(Rename,"[tid:%i]: Stall: LSQ has 0 free entries.\n", tid);
+ +        ret_val = true;
+ +    } else if (renameMap[tid]->numFreeEntries() <= 0) {
+ +        DPRINTF(Rename,"[tid:%i]: Stall: RenameMap has 0 free entries.\n", tid);
+ +        ret_val = true;
+ +    } else if (renameStatus[tid] == SerializeStall &&
+ +               (!emptyROB[tid] || instsInProgress[tid])) {
+ +        DPRINTF(Rename,"[tid:%i]: Stall: Serialize stall and ROB is not "
+ +                "empty.\n",
+ +                tid);
+ +        ret_val = true;
+ +    }
+ +
+ +    return ret_val;
+ +}
+ +
+ +template <class Impl>
+ +void
+ +DefaultRename<Impl>::readFreeEntries(unsigned tid)
+ +{
+ +    bool updated = false;
+ +    if (fromIEW->iewInfo[tid].usedIQ) {
+ +        freeEntries[tid].iqEntries =
+ +            fromIEW->iewInfo[tid].freeIQEntries;
+ +        updated = true;
+ +    }
+ +
+ +    if (fromIEW->iewInfo[tid].usedLSQ) {
+ +        freeEntries[tid].lsqEntries =
+ +            fromIEW->iewInfo[tid].freeLSQEntries;
+ +        updated = true;
+ +    }
+ +
+ +    if (fromCommit->commitInfo[tid].usedROB) {
+ +        freeEntries[tid].robEntries =
+ +            fromCommit->commitInfo[tid].freeROBEntries;
+ +        emptyROB[tid] = fromCommit->commitInfo[tid].emptyROB;
+ +        updated = true;
+ +    }
+ +
+ +    DPRINTF(Rename, "[tid:%i]: Free IQ: %i, Free ROB: %i, Free LSQ: %i\n",
+ +            tid,
+ +            freeEntries[tid].iqEntries,
+ +            freeEntries[tid].robEntries,
+ +            freeEntries[tid].lsqEntries);
+ +
+ +    DPRINTF(Rename, "[tid:%i]: %i instructions not yet in ROB\n",
+ +            tid, instsInProgress[tid]);
+ +}
+ +
+ +template <class Impl>
+ +bool
+ +DefaultRename<Impl>::checkSignalsAndUpdate(unsigned tid)
+ +{
+ +    // Check if there's a squash signal, squash if there is
+ +    // Check stall signals, block if necessary.
+ +    // If status was blocked
+ +    //     check if stall conditions have passed
+ +    //         if so then go to unblocking
+ +    // If status was Squashing
+ +    //     check if squashing is not high.  Switch to running this cycle.
+ +    // If status was serialize stall
+ +    //     check if ROB is empty and no insts are in flight to the ROB
+ +
+ +    readFreeEntries(tid);
+ +    readStallSignals(tid);
+ +
+ +    if (fromCommit->commitInfo[tid].squash) {
+ +        DPRINTF(Rename, "[tid:%u]: Squashing instructions due to squash from "
+ +                "commit.\n", tid);
+ +
+ +#if ISA_HAS_DELAY_SLOT
+ +        InstSeqNum squashed_seq_num = fromCommit->commitInfo[tid].bdelayDoneSeqNum;
+ +#else
+ +        InstSeqNum squashed_seq_num = fromCommit->commitInfo[tid].doneSeqNum;
+ +#endif
+ +
+ +        squash(squashed_seq_num, tid);
+ +
+ +        return true;
+ +    }
+ +
+ +    if (fromCommit->commitInfo[tid].robSquashing) {
+ +        DPRINTF(Rename, "[tid:%u]: ROB is still squashing.\n", tid);
+ +
+ +        renameStatus[tid] = Squashing;
+ +
+ +        return true;
+ +    }
+ +
+ +    if (checkStall(tid)) {
+ +        return block(tid);
+ +    }
+ +
+ +    if (renameStatus[tid] == Blocked) {
+ +        DPRINTF(Rename, "[tid:%u]: Done blocking, switching to unblocking.\n",
+ +                tid);
+ +
+ +        renameStatus[tid] = Unblocking;
+ +
+ +        unblock(tid);
+ +
+ +        return true;
+ +    }
+ +
+ +    if (renameStatus[tid] == Squashing) {
+ +        // Switch status to running if rename isn't being told to block or
+ +        // squash this cycle.
+ +        DPRINTF(Rename, "[tid:%u]: Done squashing, switching to running.\n",
+ +                tid);
+ +
+ +        renameStatus[tid] = Running;
+ +
+ +        return false;
+ +    }
+ +
+ +    if (renameStatus[tid] == SerializeStall) {
+ +        // Stall ends once the ROB is free.
+ +        DPRINTF(Rename, "[tid:%u]: Done with serialize stall, switching to "
+ +                "unblocking.\n", tid);
+ +
+ +        DynInstPtr serial_inst = serializeInst[tid];
+ +
+ +        renameStatus[tid] = Unblocking;
+ +
+ +        unblock(tid);
+ +
+ +        DPRINTF(Rename, "[tid:%u]: Processing instruction [%lli] with "
+ +                "PC %#x.\n",
+ +                tid, serial_inst->seqNum, serial_inst->readPC());
+ +
+ +        // Put instruction into queue here.
+ +        serial_inst->clearSerializeBefore();
+ +
+ +        if (!skidBuffer[tid].empty()) {
+ +            skidBuffer[tid].push_front(serial_inst);
+ +        } else {
+ +            insts[tid].push_front(serial_inst);
+ +        }
+ +
+ +        DPRINTF(Rename, "[tid:%u]: Instruction must be processed by rename."
+ +                " Adding to front of list.\n", tid);
+ +
+ +        serializeInst[tid] = NULL;
+ +
+ +        return true;
+ +    }
+ +
+ +    // If we've reached this point, we have not gotten any signals that
+ +    // cause rename to change its status.  Rename remains the same as before.
+ +    return false;
+ +}
+ +
+ +template<class Impl>
+ +void
+ +DefaultRename<Impl>::serializeAfter(InstQueue &inst_list,
+ +                                   unsigned tid)
+ +{
+ +    if (inst_list.empty()) {
+ +        // Mark a bit to say that I must serialize on the next instruction.
+ +        serializeOnNextInst[tid] = true;
+ +        return;
+ +    }
+ +
+ +    // Set the next instruction as serializing.
+ +    inst_list.front()->setSerializeBefore();
+ +}
+ +
+ +template <class Impl>
+ +inline void
+ +DefaultRename<Impl>::incrFullStat(const FullSource &source)
+ +{
+ +    switch (source) {
+ +      case ROB:
+ +        ++renameROBFullEvents;
+ +        break;
+ +      case IQ:
+ +        ++renameIQFullEvents;
+ +        break;
+ +      case LSQ:
+ +        ++renameLSQFullEvents;
+ +        break;
+ +      default:
+ +        panic("Rename full stall stat should be incremented for a reason!");
+ +        break;
+ +    }
+ +}
+ +
+ +template <class Impl>
+ +void
+ +DefaultRename<Impl>::dumpHistory()
+ +{
+ +    typename std::list<RenameHistory>::iterator buf_it;
+ +
+ +    for (int i = 0; i < numThreads; i++) {
+ +
+ +        buf_it = historyBuffer[i].begin();
+ +
+ +        while (buf_it != historyBuffer[i].end()) {
+ +            cprintf("Seq num: %i\nArch reg: %i New phys reg: %i Old phys "
+ +                    "reg: %i\n", (*buf_it).instSeqNum, (int)(*buf_it).archReg,
+ +                    (int)(*buf_it).newPhysReg, (int)(*buf_it).prevPhysReg);
+ +
+ +            buf_it++;
+ +        }
+ +    }
+ +}
diff --cc src/cpu/o3/thread_state.hh

index b6f2e14c0b6657ee797d800cb776764dfd69aa56,0000000000000000000000000000000000000000..0247deb527d34efe6e3d4f3dd44deb8ce1e42f6b

mode 100644,000000..100644
--- 1/src/cpu/o3/thread_state.hh
--- /dev/null
+++ b/src/cpu/o3/thread_state.hh
@@@ -1,100 -1,0 +1,125 @@@
-           inSyscall(0), trapPending(0)
-     { }
+ +/*
+ + * Copyright (c) 2006 The Regents of The University of Michigan
+ + * All rights reserved.
+ + *
+ + * Redistribution and use in source and binary forms, with or without
+ + * modification, are permitted provided that the following conditions are
+ + * met: redistributions of source code must retain the above copyright
+ + * notice, this list of conditions and the following disclaimer;
+ + * redistributions in binary form must reproduce the above copyright
+ + * notice, this list of conditions and the following disclaimer in the
+ + * documentation and/or other materials provided with the distribution;
+ + * neither the name of the copyright holders nor the names of its
+ + * contributors may be used to endorse or promote products derived from
+ + * this software without specific prior written permission.
+ + *
+ + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ + *
+ + * Authors: Kevin Lim
+ + */
+ +
+ +#ifndef __CPU_O3_THREAD_STATE_HH__
+ +#define __CPU_O3_THREAD_STATE_HH__
+ +
++#include "base/callback.hh"
++#include "base/output.hh"
+ +#include "cpu/thread_context.hh"
+ +#include "cpu/thread_state.hh"
++#include "sim/sim_exit.hh"
+ +
+ +class Event;
+ +class Process;
+ +
+ +#if FULL_SYSTEM
+ +class EndQuiesceEvent;
+ +class FunctionProfile;
+ +class ProfileNode;
+ +#else
+ +class FunctionalMemory;
+ +class Process;
+ +#endif
+ +
+ +/**
+ + * Class that has various thread state, such as the status, the
+ + * current instruction being processed, whether or not the thread has
+ + * a trap pending or is being externally updated, the ThreadContext
+ + * pointer, etc.  It also handles anything related to a specific
+ + * thread's process, such as syscalls and checking valid addresses.
+ + */
+ +template <class Impl>
+ +struct O3ThreadState : public ThreadState {
+ +    typedef ThreadContext::Status Status;
+ +    typedef typename Impl::O3CPU O3CPU;
+ +
+ +  private:
+ +    /** Pointer to the CPU. */
+ +    O3CPU *cpu;
+ +  public:
+ +    /** Whether or not the thread is currently in syscall mode, and
+ +     * thus able to be externally updated without squashing.
+ +     */
+ +    bool inSyscall;
+ +
+ +    /** Whether or not the thread is currently waiting on a trap, and
+ +     * thus able to be externally updated without squashing.
+ +     */
+ +    bool trapPending;
+ +
+ +#if FULL_SYSTEM
+ +    O3ThreadState(O3CPU *_cpu, int _thread_num)
+ +        : ThreadState(-1, _thread_num),
++          cpu(_cpu), inSyscall(0), trapPending(0)
++    {
++        if (cpu->params->profile) {
++            profile = new FunctionProfile(cpu->params->system->kernelSymtab);
++            Callback *cb =
++                new MakeCallback<O3ThreadState,
++                &O3ThreadState::dumpFuncProfile>(this);
++            registerExitCallback(cb);
++        }
++
++        // let's fill with a dummy node for now so we don't get a segfault
++        // on the first cycle when there's no node available.
++        static ProfileNode dummyNode;
++        profileNode = &dummyNode;
++        profilePC = 3;
++    }
+ +#else
+ +    O3ThreadState(O3CPU *_cpu, int _thread_num, Process *_process, int _asid,
+ +                  MemObject *mem)
+ +        : ThreadState(-1, _thread_num, _process, _asid, mem),
+ +          cpu(_cpu), inSyscall(0), trapPending(0)
+ +    { }
+ +#endif
+ +
+ +    /** Pointer to the ThreadContext of this thread. */
+ +    ThreadContext *tc;
+ +
+ +    /** Returns a pointer to the TC of this thread. */
+ +    ThreadContext *getTC() { return tc; }
+ +
+ +#if !FULL_SYSTEM
+ +    /** Handles the syscall. */
+ +    void syscall(int64_t callnum) { process->syscall(callnum, tc); }
+ +#endif
++
++#if FULL_SYSTEM
++    void dumpFuncProfile()
++    {
++        std::ostream *os = simout.create(csprintf("profile.%s.dat", cpu->name()));
++        profile->dump(xcProxy, *os);
++    }
++#endif
+ +};
+ +
+ +#endif // __CPU_O3_THREAD_STATE_HH__
diff --cc src/cpu/o3/tournament_pred.cc

index 7cf78dcb1cd374c0560e66d02d9f40d913565624,0000000000000000000000000000000000000000..ffb941c7756575be622f3ba2f6cf0d7b24ecfc4f

mode 100644,000000..100644
--- 1/src/cpu/o3/tournament_pred.cc
--- /dev/null
+++ b/src/cpu/o3/tournament_pred.cc
@@@ -1,293 -1,0 +1,297 @@@
-         & localHistoryMask;
+ +/*
+ + * Copyright (c) 2004-2006 The Regents of The University of Michigan
+ + * All rights reserved.
+ + *
+ + * Redistribution and use in source and binary forms, with or without
+ + * modification, are permitted provided that the following conditions are
+ + * met: redistributions of source code must retain the above copyright
+ + * notice, this list of conditions and the following disclaimer;
+ + * redistributions in binary form must reproduce the above copyright
+ + * notice, this list of conditions and the following disclaimer in the
+ + * documentation and/or other materials provided with the distribution;
+ + * neither the name of the copyright holders nor the names of its
+ + * contributors may be used to endorse or promote products derived from
+ + * this software without specific prior written permission.
+ + *
+ + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ + *
+ + * Authors: Kevin Lim
+ + */
+ +
+ +#include "base/intmath.hh"
+ +#include "cpu/o3/tournament_pred.hh"
+ +
+ +TournamentBP::TournamentBP(unsigned _localPredictorSize,
+ +                           unsigned _localCtrBits,
+ +                           unsigned _localHistoryTableSize,
+ +                           unsigned _localHistoryBits,
+ +                           unsigned _globalPredictorSize,
+ +                           unsigned _globalCtrBits,
+ +                           unsigned _globalHistoryBits,
+ +                           unsigned _choicePredictorSize,
+ +                           unsigned _choiceCtrBits,
+ +                           unsigned _instShiftAmt)
+ +    : localPredictorSize(_localPredictorSize),
+ +      localCtrBits(_localCtrBits),
+ +      localHistoryTableSize(_localHistoryTableSize),
+ +      localHistoryBits(_localHistoryBits),
+ +      globalPredictorSize(_globalPredictorSize),
+ +      globalCtrBits(_globalCtrBits),
+ +      globalHistoryBits(_globalHistoryBits),
+ +      choicePredictorSize(_globalPredictorSize),
+ +      choiceCtrBits(_choiceCtrBits),
+ +      instShiftAmt(_instShiftAmt)
+ +{
+ +    if (!isPowerOf2(localPredictorSize)) {
+ +        fatal("Invalid local predictor size!\n");
+ +    }
+ +
+ +    //Setup the array of counters for the local predictor
+ +    localCtrs.resize(localPredictorSize);
+ +
+ +    for (int i = 0; i < localPredictorSize; ++i)
+ +        localCtrs[i].setBits(localCtrBits);
+ +
++    localPredictorMask = floorPow2(localPredictorSize) - 1;
++
+ +    if (!isPowerOf2(localHistoryTableSize)) {
+ +        fatal("Invalid local history table size!\n");
+ +    }
+ +
+ +    //Setup the history table for the local table
+ +    localHistoryTable.resize(localHistoryTableSize);
+ +
+ +    for (int i = 0; i < localHistoryTableSize; ++i)
+ +        localHistoryTable[i] = 0;
+ +
+ +    // Setup the local history mask
+ +    localHistoryMask = (1 << localHistoryBits) - 1;
+ +
+ +    if (!isPowerOf2(globalPredictorSize)) {
+ +        fatal("Invalid global predictor size!\n");
+ +    }
+ +
+ +    //Setup the array of counters for the global predictor
+ +    globalCtrs.resize(globalPredictorSize);
+ +
+ +    for (int i = 0; i < globalPredictorSize; ++i)
+ +        globalCtrs[i].setBits(globalCtrBits);
+ +
+ +    //Clear the global history
+ +    globalHistory = 0;
+ +    // Setup the global history mask
+ +    globalHistoryMask = (1 << globalHistoryBits) - 1;
+ +
+ +    if (!isPowerOf2(choicePredictorSize)) {
+ +        fatal("Invalid choice predictor size!\n");
+ +    }
+ +
+ +    //Setup the array of counters for the choice predictor
+ +    choiceCtrs.resize(choicePredictorSize);
+ +
+ +    for (int i = 0; i < choicePredictorSize; ++i)
+ +        choiceCtrs[i].setBits(choiceCtrBits);
+ +
+ +    // @todo: Allow for different thresholds between the predictors.
+ +    threshold = (1 << (localCtrBits - 1)) - 1;
+ +    threshold = threshold / 2;
+ +}
+ +
+ +inline
+ +unsigned
+ +TournamentBP::calcLocHistIdx(Addr &branch_addr)
+ +{
+ +    // Get low order bits after removing instruction offset.
+ +    return (branch_addr >> instShiftAmt) & (localHistoryTableSize - 1);
+ +}
+ +
+ +inline
+ +void
+ +TournamentBP::updateGlobalHistTaken()
+ +{
+ +    globalHistory = (globalHistory << 1) | 1;
+ +    globalHistory = globalHistory & globalHistoryMask;
+ +}
+ +
+ +inline
+ +void
+ +TournamentBP::updateGlobalHistNotTaken()
+ +{
+ +    globalHistory = (globalHistory << 1);
+ +    globalHistory = globalHistory & globalHistoryMask;
+ +}
+ +
+ +inline
+ +void
+ +TournamentBP::updateLocalHistTaken(unsigned local_history_idx)
+ +{
+ +    localHistoryTable[local_history_idx] =
+ +        (localHistoryTable[local_history_idx] << 1) | 1;
+ +}
+ +
+ +inline
+ +void
+ +TournamentBP::updateLocalHistNotTaken(unsigned local_history_idx)
+ +{
+ +    localHistoryTable[local_history_idx] =
+ +        (localHistoryTable[local_history_idx] << 1);
+ +}
+ +
+ +bool
+ +TournamentBP::lookup(Addr &branch_addr, void * &bp_history)
+ +{
+ +    bool local_prediction;
+ +    unsigned local_history_idx;
+ +    unsigned local_predictor_idx;
+ +
+ +    bool global_prediction;
+ +    bool choice_prediction;
+ +
+ +    //Lookup in the local predictor to get its branch prediction
+ +    local_history_idx = calcLocHistIdx(branch_addr);
+ +    local_predictor_idx = localHistoryTable[local_history_idx]
-            local_history_idx < localPredictorSize);
++        & localPredictorMask;
+ +    local_prediction = localCtrs[local_predictor_idx].read() > threshold;
+ +
+ +    //Lookup in the global predictor to get its branch prediction
+ +    global_prediction = globalCtrs[globalHistory].read() > threshold;
+ +
+ +    //Lookup in the choice predictor to see which one to use
+ +    choice_prediction = choiceCtrs[globalHistory].read() > threshold;
+ +
+ +    // Create BPHistory and pass it back to be recorded.
+ +    BPHistory *history = new BPHistory;
+ +    history->globalHistory = globalHistory;
+ +    history->localPredTaken = local_prediction;
+ +    history->globalPredTaken = global_prediction;
+ +    history->globalUsed = choice_prediction;
+ +    bp_history = (void *)history;
+ +
+ +    assert(globalHistory < globalPredictorSize &&
-     local_predictor_idx = local_predictor_hist & localHistoryMask;
++           local_history_idx < localHistoryTableSize &&
++           local_predictor_idx < localPredictorSize);
+ +
+ +    // Commented code is for doing speculative update of counters and
+ +    // all histories.
+ +    if (choice_prediction) {
+ +        if (global_prediction) {
+ +//            updateHistoriesTaken(local_history_idx);
+ +//            globalCtrs[globalHistory].increment();
+ +//            localCtrs[local_history_idx].increment();
+ +            updateGlobalHistTaken();
+ +            return true;
+ +        } else {
+ +//            updateHistoriesNotTaken(local_history_idx);
+ +//            globalCtrs[globalHistory].decrement();
+ +//            localCtrs[local_history_idx].decrement();
+ +            updateGlobalHistNotTaken();
+ +            return false;
+ +        }
+ +    } else {
+ +        if (local_prediction) {
+ +//            updateHistoriesTaken(local_history_idx);
+ +//            globalCtrs[globalHistory].increment();
+ +//            localCtrs[local_history_idx].increment();
+ +            updateGlobalHistTaken();
+ +            return true;
+ +        } else {
+ +//            updateHistoriesNotTaken(local_history_idx);
+ +//            globalCtrs[globalHistory].decrement();
+ +//            localCtrs[local_history_idx].decrement();
+ +            updateGlobalHistNotTaken();
+ +            return false;
+ +        }
+ +    }
+ +}
+ +
+ +void
+ +TournamentBP::uncondBr(void * &bp_history)
+ +{
+ +    // Create BPHistory and pass it back to be recorded.
+ +    BPHistory *history = new BPHistory;
+ +    history->globalHistory = globalHistory;
+ +    history->localPredTaken = true;
+ +    history->globalPredTaken = true;
+ +    bp_history = static_cast<void *>(history);
+ +
+ +    updateGlobalHistTaken();
+ +}
+ +
+ +void
+ +TournamentBP::update(Addr &branch_addr, bool taken, void *bp_history)
+ +{
+ +    unsigned local_history_idx;
+ +    unsigned local_predictor_idx;
+ +    unsigned local_predictor_hist;
+ +
+ +    // Get the local predictor's current prediction
+ +    local_history_idx = calcLocHistIdx(branch_addr);
+ +    local_predictor_hist = localHistoryTable[local_history_idx];
++    local_predictor_idx = local_predictor_hist & localPredictorMask;
+ +
+ +    // Update the choice predictor to tell it which one was correct if
+ +    // there was a prediction.
+ +    if (bp_history) {
+ +        BPHistory *history = static_cast<BPHistory *>(bp_history);
+ +        if (history->localPredTaken != history->globalPredTaken) {
+ +            // If the local prediction matches the actual outcome,
+ +            // decerement the counter.  Otherwise increment the
+ +            // counter.
+ +            if (history->localPredTaken == taken) {
+ +                choiceCtrs[globalHistory].decrement();
+ +            } else if (history->globalPredTaken == taken){
+ +                choiceCtrs[globalHistory].increment();
+ +            }
+ +        }
+ +
+ +        // We're done with this history, now delete it.
+ +        delete history;
+ +    }
+ +
+ +    assert(globalHistory < globalPredictorSize &&
++           local_history_idx < localHistoryTableSize &&
+ +           local_predictor_idx < localPredictorSize);
+ +
+ +    // Update the counters and local history with the proper
+ +    // resolution of the branch.  Global history is updated
+ +    // speculatively and restored upon squash() calls, so it does not
+ +    // need to be updated.
+ +    if (taken) {
+ +        localCtrs[local_predictor_idx].increment();
+ +        globalCtrs[globalHistory].increment();
+ +
+ +        updateLocalHistTaken(local_history_idx);
+ +    } else {
+ +        localCtrs[local_predictor_idx].decrement();
+ +        globalCtrs[globalHistory].decrement();
+ +
+ +        updateLocalHistNotTaken(local_history_idx);
+ +    }
+ +}
+ +
+ +void
+ +TournamentBP::squash(void *bp_history)
+ +{
+ +    BPHistory *history = static_cast<BPHistory *>(bp_history);
+ +
+ +    // Restore global history to state prior to this branch.
+ +    globalHistory = history->globalHistory;
+ +
+ +    // Delete this BPHistory now that we're done with it.
+ +    delete history;
+ +}
+ +
+ +#ifdef DEBUG
+ +int
+ +TournamentBP::BPHistory::newCount = 0;
+ +#endif
diff --cc src/cpu/o3/tournament_pred.hh

index 66b4aaae22b2a435059631e948d7b9032b67896f,0000000000000000000000000000000000000000..47294491035c39046f758fc8d7ff9dec0327d398

mode 100644,000000..100644
--- 1/src/cpu/o3/tournament_pred.hh
--- /dev/null
+++ b/src/cpu/o3/tournament_pred.hh
@@@ -1,217 -1,0 +1,220 @@@
+ +/*
+ + * Copyright (c) 2004-2006 The Regents of The University of Michigan
+ + * All rights reserved.
+ + *
+ + * Redistribution and use in source and binary forms, with or without
+ + * modification, are permitted provided that the following conditions are
+ + * met: redistributions of source code must retain the above copyright
+ + * notice, this list of conditions and the following disclaimer;
+ + * redistributions in binary form must reproduce the above copyright
+ + * notice, this list of conditions and the following disclaimer in the
+ + * documentation and/or other materials provided with the distribution;
+ + * neither the name of the copyright holders nor the names of its
+ + * contributors may be used to endorse or promote products derived from
+ + * this software without specific prior written permission.
+ + *
+ + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ + *
+ + * Authors: Kevin Lim
+ + */
+ +
+ +#ifndef __CPU_O3_TOURNAMENT_PRED_HH__
+ +#define __CPU_O3_TOURNAMENT_PRED_HH__
+ +
+ +#include "cpu/o3/sat_counter.hh"
+ +#include "sim/host.hh"
+ +#include <vector>
+ +
+ +/**
+ + * Implements a tournament branch predictor, hopefully identical to the one
+ + * used in the 21264.  It has a local predictor, which uses a local history
+ + * table to index into a table of counters, and a global predictor, which
+ + * uses a global history to index into a table of counters.  A choice
+ + * predictor chooses between the two.  Only the global history register
+ + * is speculatively updated, the rest are updated upon branches committing
+ + * or misspeculating.
+ + */
+ +class TournamentBP
+ +{
+ +  public:
+ +    /**
+ +     * Default branch predictor constructor.
+ +     */
+ +    TournamentBP(unsigned localPredictorSize,
+ +                 unsigned localCtrBits,
+ +                 unsigned localHistoryTableSize,
+ +                 unsigned localHistoryBits,
+ +                 unsigned globalPredictorSize,
+ +                 unsigned globalHistoryBits,
+ +                 unsigned globalCtrBits,
+ +                 unsigned choicePredictorSize,
+ +                 unsigned choiceCtrBits,
+ +                 unsigned instShiftAmt);
+ +
+ +    /**
+ +     * Looks up the given address in the branch predictor and returns
+ +     * a true/false value as to whether it is taken.  Also creates a
+ +     * BPHistory object to store any state it will need on squash/update.
+ +     * @param branch_addr The address of the branch to look up.
+ +     * @param bp_history Pointer that will be set to the BPHistory object.
+ +     * @return Whether or not the branch is taken.
+ +     */
+ +    bool lookup(Addr &branch_addr, void * &bp_history);
+ +
+ +    /**
+ +     * Records that there was an unconditional branch, and modifies
+ +     * the bp history to point to an object that has the previous
+ +     * global history stored in it.
+ +     * @param bp_history Pointer that will be set to the BPHistory object.
+ +     */
+ +    void uncondBr(void * &bp_history);
+ +
+ +    /**
+ +     * Updates the branch predictor with the actual result of a branch.
+ +     * @param branch_addr The address of the branch to update.
+ +     * @param taken Whether or not the branch was taken.
+ +     * @param bp_history Pointer to the BPHistory object that was created
+ +     * when the branch was predicted.
+ +     */
+ +    void update(Addr &branch_addr, bool taken, void *bp_history);
+ +
+ +    /**
+ +     * Restores the global branch history on a squash.
+ +     * @param bp_history Pointer to the BPHistory object that has the
+ +     * previous global branch history in it.
+ +     */
+ +    void squash(void *bp_history);
+ +
+ +    /** Returns the global history. */
+ +    inline unsigned readGlobalHist() { return globalHistory; }
+ +
+ +  private:
+ +    /**
+ +     * Returns if the branch should be taken or not, given a counter
+ +     * value.
+ +     * @param count The counter value.
+ +     */
+ +    inline bool getPrediction(uint8_t &count);
+ +
+ +    /**
+ +     * Returns the local history index, given a branch address.
+ +     * @param branch_addr The branch's PC address.
+ +     */
+ +    inline unsigned calcLocHistIdx(Addr &branch_addr);
+ +
+ +    /** Updates global history as taken. */
+ +    inline void updateGlobalHistTaken();
+ +
+ +    /** Updates global history as not taken. */
+ +    inline void updateGlobalHistNotTaken();
+ +
+ +    /**
+ +     * Updates local histories as taken.
+ +     * @param local_history_idx The local history table entry that
+ +     * will be updated.
+ +     */
+ +    inline void updateLocalHistTaken(unsigned local_history_idx);
+ +
+ +    /**
+ +     * Updates local histories as not taken.
+ +     * @param local_history_idx The local history table entry that
+ +     * will be updated.
+ +     */
+ +    inline void updateLocalHistNotTaken(unsigned local_history_idx);
+ +
+ +    /**
+ +     * The branch history information that is created upon predicting
+ +     * a branch.  It will be passed back upon updating and squashing,
+ +     * when the BP can use this information to update/restore its
+ +     * state properly.
+ +     */
+ +    struct BPHistory {
+ +#ifdef DEBUG
+ +        BPHistory()
+ +        { newCount++; }
+ +        ~BPHistory()
+ +        { newCount--; }
+ +
+ +        static int newCount;
+ +#endif
+ +        unsigned globalHistory;
+ +        bool localPredTaken;
+ +        bool globalPredTaken;
+ +        bool globalUsed;
+ +    };
+ +
+ +    /** Local counters. */
+ +    std::vector<SatCounter> localCtrs;
+ +
+ +    /** Size of the local predictor. */
+ +    unsigned localPredictorSize;
+ +
++    /** Mask to get the proper index bits into the predictor. */
++    unsigned localPredictorMask;
++
+ +    /** Number of bits of the local predictor's counters. */
+ +    unsigned localCtrBits;
+ +
+ +    /** Array of local history table entries. */
+ +    std::vector<unsigned> localHistoryTable;
+ +
+ +    /** Size of the local history table. */
+ +    unsigned localHistoryTableSize;
+ +
+ +    /** Number of bits for each entry of the local history table.
+ +     *  @todo Doesn't this come from the size of the local predictor?
+ +     */
+ +    unsigned localHistoryBits;
+ +
+ +    /** Mask to get the proper local history. */
+ +    unsigned localHistoryMask;
+ +
+ +    /** Array of counters that make up the global predictor. */
+ +    std::vector<SatCounter> globalCtrs;
+ +
+ +    /** Size of the global predictor. */
+ +    unsigned globalPredictorSize;
+ +
+ +    /** Number of bits of the global predictor's counters. */
+ +    unsigned globalCtrBits;
+ +
+ +    /** Global history register. */
+ +    unsigned globalHistory;
+ +
+ +    /** Number of bits for the global history. */
+ +    unsigned globalHistoryBits;
+ +
+ +    /** Mask to get the proper global history. */
+ +    unsigned globalHistoryMask;
+ +
+ +    /** Array of counters that make up the choice predictor. */
+ +    std::vector<SatCounter> choiceCtrs;
+ +
+ +    /** Size of the choice predictor (identical to the global predictor). */
+ +    unsigned choicePredictorSize;
+ +
+ +    /** Number of bits of the choice predictor's counters. */
+ +    unsigned choiceCtrBits;
+ +
+ +    /** Number of bits to shift the instruction over to get rid of the word
+ +     *  offset.
+ +     */
+ +    unsigned instShiftAmt;
+ +
+ +    /** Threshold for the counter value; above the threshold is taken,
+ +     *  equal to or below the threshold is not taken.
+ +     */
+ +    unsigned threshold;
+ +};
+ +
+ +#endif // __CPU_O3_TOURNAMENT_PRED_HH__
diff --cc src/cpu/ozone/checker_builder.cc

index c372e51d69b054d393b21746fe94466cd679edc6,0000000000000000000000000000000000000000..99ba3e3089a58b5bf3165af7155e371c3ce5e296

mode 100644,000000..100644
--- 1/src/cpu/ozone/checker_builder.cc
--- /dev/null
+++ b/src/cpu/ozone/checker_builder.cc
@@@ -1,158 -1,0 +1,171 @@@
+ +/*
+ + * Copyright (c) 2006 The Regents of The University of Michigan
+ + * All rights reserved.
+ + *
+ + * Redistribution and use in source and binary forms, with or without
+ + * modification, are permitted provided that the following conditions are
+ + * met: redistributions of source code must retain the above copyright
+ + * notice, this list of conditions and the following disclaimer;
+ + * redistributions in binary form must reproduce the above copyright
+ + * notice, this list of conditions and the following disclaimer in the
+ + * documentation and/or other materials provided with the distribution;
+ + * neither the name of the copyright holders nor the names of its
+ + * contributors may be used to endorse or promote products derived from
+ + * this software without specific prior written permission.
+ + *
+ + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ + *
+ + * Authors: Kevin Lim
+ + */
+ +
+ +#include <string>
+ +
+ +#include "cpu/checker/cpu_impl.hh"
+ +#include "cpu/inst_seq.hh"
+ +#include "cpu/ozone/dyn_inst.hh"
+ +#include "cpu/ozone/ozone_impl.hh"
+ +#include "sim/builder.hh"
+ +#include "sim/process.hh"
+ +#include "sim/sim_object.hh"
+ +
+ +class MemObject;
+ +
+ +template
+ +class Checker<RefCountingPtr<OzoneDynInst<OzoneImpl> > >;
+ +
+ +/**
+ + * Specific non-templated derived class used for SimObject configuration.
+ + */
+ +class OzoneChecker :
+ +    public Checker<RefCountingPtr<OzoneDynInst<OzoneImpl> > >
+ +{
+ +  public:
+ +    OzoneChecker(Params *p)
+ +        : Checker<RefCountingPtr<OzoneDynInst<OzoneImpl> > >(p)
+ +    { }
+ +};
+ +
+ +////////////////////////////////////////////////////////////////////////
+ +//
+ +//  CheckerCPU Simulation Object
+ +//
+ +BEGIN_DECLARE_SIM_OBJECT_PARAMS(OzoneChecker)
+ +
+ +    Param<Counter> max_insts_any_thread;
+ +    Param<Counter> max_insts_all_threads;
+ +    Param<Counter> max_loads_any_thread;
+ +    Param<Counter> max_loads_all_threads;
++    Param<Counter> stats_reset_inst;
++    Param<Tick> progress_interval;
+ +
+ +#if FULL_SYSTEM
+ +    SimObjectParam<AlphaITB *> itb;
+ +    SimObjectParam<AlphaDTB *> dtb;
+ +    SimObjectParam<System *> system;
+ +    Param<int> cpu_id;
+ +    Param<Tick> profile;
+ +#else
+ +    SimObjectParam<Process *> workload;
+ +#endif // FULL_SYSTEM
+ +    Param<int> clock;
+ +
+ +    Param<bool> defer_registration;
+ +    Param<bool> exitOnError;
++    Param<bool> updateOnError;
+ +    Param<bool> warnOnlyOnLoadError;
+ +    Param<bool> function_trace;
+ +    Param<Tick> function_trace_start;
+ +
+ +END_DECLARE_SIM_OBJECT_PARAMS(OzoneChecker)
+ +
+ +BEGIN_INIT_SIM_OBJECT_PARAMS(OzoneChecker)
+ +
+ +    INIT_PARAM(max_insts_any_thread,
+ +               "terminate when any thread reaches this inst count"),
+ +    INIT_PARAM(max_insts_all_threads,
+ +               "terminate when all threads have reached this inst count"),
+ +    INIT_PARAM(max_loads_any_thread,
+ +               "terminate when any thread reaches this load count"),
+ +    INIT_PARAM(max_loads_all_threads,
+ +               "terminate when all threads have reached this load count"),
++    INIT_PARAM(stats_reset_inst,
++               "blah"),
++    INIT_PARAM_DFLT(progress_interval, "CPU Progress Interval", 0),
+ +
+ +#if FULL_SYSTEM
+ +    INIT_PARAM(itb, "Instruction TLB"),
+ +    INIT_PARAM(dtb, "Data TLB"),
+ +    INIT_PARAM(system, "system object"),
+ +    INIT_PARAM(cpu_id, "processor ID"),
+ +    INIT_PARAM(profile, ""),
+ +#else
+ +    INIT_PARAM(workload, "processes to run"),
+ +#endif // FULL_SYSTEM
+ +
+ +    INIT_PARAM(clock, "clock speed"),
+ +
+ +    INIT_PARAM(defer_registration, "defer system registration (for sampling)"),
+ +    INIT_PARAM(exitOnError, "exit on error"),
++    INIT_PARAM(updateOnError, "Update the checker with the main CPU's state on error"),
+ +    INIT_PARAM_DFLT(warnOnlyOnLoadError, "warn, but don't exit, if a load "
+ +                    "result errors", false),
+ +    INIT_PARAM(function_trace, "Enable function trace"),
+ +    INIT_PARAM(function_trace_start, "Cycle to start function trace")
+ +
+ +END_INIT_SIM_OBJECT_PARAMS(OzoneChecker)
+ +
+ +
+ +CREATE_SIM_OBJECT(OzoneChecker)
+ +{
+ +    OzoneChecker::Params *params = new OzoneChecker::Params();
+ +    params->name = getInstanceName();
+ +    params->numberOfThreads = 1;
+ +    params->max_insts_any_thread = 0;
+ +    params->max_insts_all_threads = 0;
+ +    params->max_loads_any_thread = 0;
+ +    params->max_loads_all_threads = 0;
++    params->stats_reset_inst = 0;
+ +    params->exitOnError = exitOnError;
++    params->updateOnError = updateOnError;
+ +    params->warnOnlyOnLoadError = warnOnlyOnLoadError;
+ +    params->deferRegistration = defer_registration;
+ +    params->functionTrace = function_trace;
+ +    params->functionTraceStart = function_trace_start;
+ +    params->clock = clock;
+ +    // Hack to touch all parameters.  Consider not deriving Checker
+ +    // from BaseCPU..it's not really a CPU in the end.
+ +    Counter temp;
+ +    temp = max_insts_any_thread;
+ +    temp = max_insts_all_threads;
+ +    temp = max_loads_any_thread;
+ +    temp = max_loads_all_threads;
++    temp = stats_reset_inst;
++    Tick temp2 = progress_interval;
++    temp2++;
++    params->progress_interval = 0;
+ +
+ +#if FULL_SYSTEM
+ +    params->itb = itb;
+ +    params->dtb = dtb;
+ +    params->system = system;
+ +    params->cpu_id = cpu_id;
+ +    params->profile = profile;
+ +#else
+ +    params->process = workload;
+ +#endif
+ +
+ +    OzoneChecker *cpu = new OzoneChecker(params);
+ +    return cpu;
+ +}
+ +
+ +REGISTER_SIM_OBJECT("OzoneChecker", OzoneChecker)
diff --cc src/cpu/ozone/cpu.hh

index e411c12bd55dca98a74bcf8636f440d3e30b405d,0000000000000000000000000000000000000000..ece68282ff82d28879efe28bac446f20838442e4

mode 100644,000000..100644
--- 1/src/cpu/ozone/cpu.hh
--- /dev/null
+++ b/src/cpu/ozone/cpu.hh
@@@ -1,626 -1,0 +1,620 @@@
-  * Copyright (c) 2005 The Regents of The University of Michigan
+ +/*
-  * Declaration of Out-of-Order CPU class.  Basically it is a SimpleCPU with
-  * simple out-of-order capabilities added to it.  It is still a 1 CPI machine
-  * (?), but is capable of handling cache misses.  Basically it models having
-  * a ROB/IQ by only allowing a certain amount of instructions to execute while
-  * the cache miss is outstanding.
++ * Copyright (c) 2006 The Regents of The University of Michigan
+ + * All rights reserved.
+ + *
+ + * Redistribution and use in source and binary forms, with or without
+ + * modification, are permitted provided that the following conditions are
+ + * met: redistributions of source code must retain the above copyright
+ + * notice, this list of conditions and the following disclaimer;
+ + * redistributions in binary form must reproduce the above copyright
+ + * notice, this list of conditions and the following disclaimer in the
+ + * documentation and/or other materials provided with the distribution;
+ + * neither the name of the copyright holders nor the names of its
+ + * contributors may be used to endorse or promote products derived from
+ + * this software without specific prior written permission.
+ + *
+ + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ + *
+ + * Authors: Kevin Lim
+ + */
+ +
+ +#ifndef __CPU_OZONE_CPU_HH__
+ +#define __CPU_OZONE_CPU_HH__
+ +
+ +#include <set>
+ +
+ +#include "base/statistics.hh"
+ +#include "base/timebuf.hh"
+ +#include "config/full_system.hh"
+ +#include "cpu/base.hh"
+ +#include "cpu/thread_context.hh"
+ +#include "cpu/inst_seq.hh"
+ +#include "cpu/ozone/rename_table.hh"
+ +#include "cpu/ozone/thread_state.hh"
+ +#include "cpu/pc_event.hh"
+ +#include "cpu/static_inst.hh"
+ +#include "mem/page_table.hh"
+ +#include "sim/eventq.hh"
+ +
+ +// forward declarations
+ +#if FULL_SYSTEM
+ +#include "arch/alpha/tlb.hh"
+ +
+ +class AlphaITB;
+ +class AlphaDTB;
+ +class PhysicalMemory;
+ +class MemoryController;
+ +
+ +class RemoteGDB;
+ +class GDBListener;
+ +
+ +namespace Kernel {
+ +    class Statistics;
+ +};
+ +
+ +#else
+ +
+ +class Process;
+ +
+ +#endif // FULL_SYSTEM
+ +
+ +class Checkpoint;
+ +class EndQuiesceEvent;
+ +class MemObject;
+ +class Request;
+ +
+ +namespace Trace {
+ +    class InstRecord;
+ +}
+ +
+ +template <class>
+ +class Checker;
+ +
+ +/**
- 
++ * Light weight out of order CPU model that approximates an out of
++ * order CPU.  It is separated into a front end and a back end, with
++ * the template parameter Impl describing the classes used for each.
++ * The goal is to be able to specify through the Impl the class to use
++ * for the front end and back end, with different classes used to
++ * model different levels of detail.
+ + */
-   private:
-     Trace::InstRecord *traceData;
- 
-     template<typename T>
-     void trace_data(T data);
- 
+ +template <class Impl>
+ +class OzoneCPU : public BaseCPU
+ +{
+ +  private:
+ +    typedef typename Impl::FrontEnd FrontEnd;
+ +    typedef typename Impl::BackEnd BackEnd;
+ +    typedef typename Impl::DynInst DynInst;
+ +    typedef typename Impl::DynInstPtr DynInstPtr;
+ +
+ +    typedef TheISA::FloatReg FloatReg;
+ +    typedef TheISA::FloatRegBits FloatRegBits;
+ +    typedef TheISA::MiscReg MiscReg;
+ +
+ +  public:
+ +    class OzoneTC : public ThreadContext {
+ +      public:
+ +        OzoneCPU<Impl> *cpu;
+ +
+ +        OzoneThreadState<Impl> *thread;
+ +
+ +        BaseCPU *getCpuPtr();
+ +
+ +        void setCpuId(int id);
+ +
+ +        int readCpuId() { return thread->readCpuId(); }
+ +
+ +#if FULL_SYSTEM
+ +        System *getSystemPtr() { return cpu->system; }
+ +
+ +        PhysicalMemory *getPhysMemPtr() { return cpu->physmem; }
+ +
+ +        AlphaITB *getITBPtr() { return cpu->itb; }
+ +
+ +        AlphaDTB * getDTBPtr() { return cpu->dtb; }
+ +
+ +        Kernel::Statistics *getKernelStats()
+ +        { return thread->getKernelStats(); }
+ +
+ +        FunctionalPort *getPhysPort() { return thread->getPhysPort(); }
+ +
+ +        VirtualPort *getVirtPort(ThreadContext *tc = NULL)
+ +        { return thread->getVirtPort(tc); }
+ +
+ +        void delVirtPort(VirtualPort *vp);
+ +#else
+ +        TranslatingPort *getMemPort() { return thread->getMemPort(); }
+ +
+ +        Process *getProcessPtr() { return thread->getProcessPtr(); }
+ +#endif
+ +
+ +        Status status() const { return thread->status(); }
+ +
+ +        void setStatus(Status new_status);
+ +
+ +        /// Set the status to Active.  Optional delay indicates number of
+ +        /// cycles to wait before beginning execution.
+ +        void activate(int delay = 1);
+ +
+ +        /// Set the status to Suspended.
+ +        void suspend();
+ +
+ +        /// Set the status to Unallocated.
+ +        void deallocate(int delay = 0);
+ +
+ +        /// Set the status to Halted.
+ +        void halt();
+ +
+ +#if FULL_SYSTEM
+ +        void dumpFuncProfile();
+ +#endif
+ +
+ +        void takeOverFrom(ThreadContext *old_context);
+ +
+ +        void regStats(const std::string &name);
+ +
+ +        void serialize(std::ostream &os);
+ +        void unserialize(Checkpoint *cp, const std::string &section);
+ +
+ +#if FULL_SYSTEM
+ +        EndQuiesceEvent *getQuiesceEvent();
+ +
+ +        Tick readLastActivate();
+ +        Tick readLastSuspend();
+ +
+ +        void profileClear();
+ +        void profileSample();
+ +#endif
+ +
+ +        int getThreadNum();
+ +
+ +        // Also somewhat obnoxious.  Really only used for the TLB fault.
+ +        TheISA::MachInst getInst();
+ +
+ +        void copyArchRegs(ThreadContext *tc);
+ +
+ +        void clearArchRegs();
+ +
+ +        uint64_t readIntReg(int reg_idx);
+ +
+ +        FloatReg readFloatReg(int reg_idx, int width);
+ +
+ +        FloatReg readFloatReg(int reg_idx);
+ +
+ +        FloatRegBits readFloatRegBits(int reg_idx, int width);
+ +
+ +        FloatRegBits readFloatRegBits(int reg_idx);
+ +
+ +        void setIntReg(int reg_idx, uint64_t val);
+ +
+ +        void setFloatReg(int reg_idx, FloatReg val, int width);
+ +
+ +        void setFloatReg(int reg_idx, FloatReg val);
+ +
+ +        void setFloatRegBits(int reg_idx, FloatRegBits val, int width);
+ +
+ +        void setFloatRegBits(int reg_idx, FloatRegBits val);
+ +
+ +        uint64_t readPC() { return thread->PC; }
+ +        void setPC(Addr val);
+ +
+ +        uint64_t readNextPC() { return thread->nextPC; }
+ +        void setNextPC(Addr val);
+ +
+ +        uint64_t readNextNPC()
+ +        {
+ +            return 0;
+ +        }
+ +
+ +        void setNextNPC(uint64_t val)
+ +        { }
+ +
+ +      public:
+ +        // ISA stuff:
+ +        MiscReg readMiscReg(int misc_reg);
+ +
+ +        MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault);
+ +
+ +        Fault setMiscReg(int misc_reg, const MiscReg &val);
+ +
+ +        Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val);
+ +
+ +        unsigned readStCondFailures()
+ +        { return thread->storeCondFailures; }
+ +
+ +        void setStCondFailures(unsigned sc_failures)
+ +        { thread->storeCondFailures = sc_failures; }
+ +
+ +#if FULL_SYSTEM
+ +        bool inPalMode() { return cpu->inPalMode(); }
+ +#endif
+ +
+ +        bool misspeculating() { return false; }
+ +
+ +#if !FULL_SYSTEM
+ +        TheISA::IntReg getSyscallArg(int i)
+ +        { return thread->renameTable[TheISA::ArgumentReg0 + i]->readIntResult(); }
+ +
+ +        // used to shift args for indirect syscall
+ +        void setSyscallArg(int i, TheISA::IntReg val)
+ +        { thread->renameTable[TheISA::ArgumentReg0 + i]->setIntResult(i); }
+ +
+ +        void setSyscallReturn(SyscallReturn return_value)
+ +        { cpu->setSyscallReturn(return_value, thread->readTid()); }
+ +
+ +        Counter readFuncExeInst() { return thread->funcExeInst; }
+ +
+ +        void setFuncExeInst(Counter new_val)
+ +        { thread->funcExeInst = new_val; }
+ +#endif
+ +        void changeRegFileContext(TheISA::RegFile::ContextParam param,
+ +                                          TheISA::RegFile::ContextVal val)
+ +        { panic("Not supported on Alpha!"); }
+ +    };
+ +
+ +    // Ozone specific thread context
+ +    OzoneTC ozoneTC;
+ +    // Thread context to be used
+ +    ThreadContext *tc;
+ +    // Checker thread context; will wrap the OzoneTC if a checker is
+ +    // being used.
+ +    ThreadContext *checkerTC;
+ +
+ +    typedef OzoneThreadState<Impl> ImplState;
+ +
+ +  private:
++    // Committed thread state for the OzoneCPU.
+ +    OzoneThreadState<Impl> thread;
+ +
+ +  public:
+ +    // main simulation loop (one cycle)
+ +    void tick();
+ +
+ +    std::set<InstSeqNum> snList;
+ +    std::set<Addr> lockAddrList;
+ +  private:
+ +    struct TickEvent : public Event
+ +    {
+ +        OzoneCPU *cpu;
+ +        int width;
+ +
+ +        TickEvent(OzoneCPU *c, int w);
+ +        void process();
+ +        const char *description();
+ +    };
+ +
+ +    TickEvent tickEvent;
+ +
+ +    /// Schedule tick event, regardless of its current state.
+ +    void scheduleTickEvent(int delay)
+ +    {
+ +        if (tickEvent.squashed())
+ +            tickEvent.reschedule(curTick + cycles(delay));
+ +        else if (!tickEvent.scheduled())
+ +            tickEvent.schedule(curTick + cycles(delay));
+ +    }
+ +
+ +    /// Unschedule tick event, regardless of its current state.
+ +    void unscheduleTickEvent()
+ +    {
+ +        if (tickEvent.scheduled())
+ +            tickEvent.squash();
+ +    }
+ +
-     bool checkInterrupts;
- 
+ +  public:
+ +    enum Status {
+ +        Running,
+ +        Idle,
+ +        SwitchedOut
+ +    };
+ +
+ +    Status _status;
+ +
+ +  public:
-   public:
+ +    void post_interrupt(int int_num, int index);
+ +
+ +    void zero_fill_64(Addr addr) {
+ +        static int warned = 0;
+ +        if (!warned) {
+ +            warn ("WH64 is not implemented");
+ +            warned = 1;
+ +        }
+ +    };
+ +
+ +    typedef typename Impl::Params Params;
+ +
+ +    OzoneCPU(Params *params);
+ +
+ +    virtual ~OzoneCPU();
+ +
+ +    void init();
+ +
+ +  public:
+ +    BaseCPU *getCpuPtr() { return this; }
+ +
+ +    void setCpuId(int id) { cpuId = id; }
+ +
+ +    int readCpuId() { return cpuId; }
+ +
+ +    int cpuId;
+ +
+ +    void switchOut();
+ +    void signalSwitched();
+ +    void takeOverFrom(BaseCPU *oldCPU);
+ +
+ +    int switchCount;
+ +
+ +#if FULL_SYSTEM
+ +    Addr dbg_vtophys(Addr addr);
+ +
+ +    bool interval_stats;
+ +
+ +    AlphaITB *itb;
+ +    AlphaDTB *dtb;
+ +    System *system;
+ +    PhysicalMemory *physmem;
+ +#endif
+ +
+ +    virtual Port *getPort(const std::string &name, int idx);
+ +
+ +    MemObject *mem;
+ +
+ +    FrontEnd *frontEnd;
+ +
+ +    BackEnd *backEnd;
++
+ +  private:
+ +    Status status() const { return _status; }
+ +    void setStatus(Status new_status) { _status = new_status; }
+ +
+ +    virtual void activateContext(int thread_num, int delay);
+ +    virtual void suspendContext(int thread_num);
+ +    virtual void deallocateContext(int thread_num, int delay);
+ +    virtual void haltContext(int thread_num);
+ +
+ +    // statistics
+ +    virtual void regStats();
+ +    virtual void resetStats();
+ +
+ +    // number of simulated instructions
+ +  public:
+ +    Counter numInst;
+ +    Counter startNumInst;
+ +
+ +    virtual Counter totalInstructions() const
+ +    {
+ +        return numInst - startNumInst;
+ +    }
+ +
+ +  private:
+ +    // number of simulated loads
+ +    Counter numLoad;
+ +    Counter startNumLoad;
+ +
+ +    // number of idle cycles
+ +    Stats::Average<> notIdleFraction;
+ +    Stats::Formula idleFraction;
- 
+ +
++  public:
+ +    virtual void serialize(std::ostream &os);
+ +    virtual void unserialize(Checkpoint *cp, const std::string &section);
+ +
-     InstSeqNum globalSeqNum;
- 
+ +#if FULL_SYSTEM
+ +    /** Translates instruction requestion. */
+ +    Fault translateInstReq(RequestPtr &req, OzoneThreadState<Impl> *thread)
+ +    {
+ +        return itb->translate(req, thread->getTC());
+ +    }
+ +
+ +    /** Translates data read request. */
+ +    Fault translateDataReadReq(RequestPtr &req, OzoneThreadState<Impl> *thread)
+ +    {
+ +        return dtb->translate(req, thread->getTC(), false);
+ +    }
+ +
+ +    /** Translates data write request. */
+ +    Fault translateDataWriteReq(RequestPtr &req, OzoneThreadState<Impl> *thread)
+ +    {
+ +        return dtb->translate(req, thread->getTC(), true);
+ +    }
+ +
+ +#else
+ +    /** Translates instruction requestion in syscall emulation mode. */
+ +    Fault translateInstReq(RequestPtr &req, OzoneThreadState<Impl> *thread)
+ +    {
+ +        return thread->getProcessPtr()->pTable->translate(req);
+ +    }
+ +
+ +    /** Translates data read request in syscall emulation mode. */
+ +    Fault translateDataReadReq(RequestPtr &req, OzoneThreadState<Impl> *thread)
+ +    {
+ +        return thread->getProcessPtr()->pTable->translate(req);
+ +    }
+ +
+ +    /** Translates data write request in syscall emulation mode. */
+ +    Fault translateDataWriteReq(RequestPtr &req, OzoneThreadState<Impl> *thread)
+ +    {
+ +        return thread->getProcessPtr()->pTable->translate(req);
+ +    }
+ +#endif
+ +
+ +    /** Old CPU read from memory function. No longer used. */
+ +    template <class T>
+ +    Fault read(Request *req, T &data)
+ +    {
+ +#if 0
+ +#if FULL_SYSTEM && defined(TARGET_ALPHA)
+ +        if (req->flags & LOCKED) {
+ +            req->xc->setMiscReg(TheISA::Lock_Addr_DepTag, req->paddr);
+ +            req->xc->setMiscReg(TheISA::Lock_Flag_DepTag, true);
+ +        }
+ +#endif
+ +        if (req->flags & LOCKED) {
+ +            lockAddrList.insert(req->paddr);
+ +            lockFlag = true;
+ +        }
+ +#endif
+ +        Fault error;
+ +
+ +        error = this->mem->read(req, data);
+ +        data = gtoh(data);
+ +        return error;
+ +    }
+ +
+ +
+ +    /** CPU read function, forwards read to LSQ. */
+ +    template <class T>
+ +    Fault read(Request *req, T &data, int load_idx)
+ +    {
+ +        return backEnd->read(req, data, load_idx);
+ +    }
+ +
+ +    /** Old CPU write to memory function. No longer used. */
+ +    template <class T>
+ +    Fault write(Request *req, T &data)
+ +    {
+ +#if 0
+ +#if FULL_SYSTEM && defined(TARGET_ALPHA)
+ +        ExecContext *xc;
+ +
+ +        // If this is a store conditional, act appropriately
+ +        if (req->flags & LOCKED) {
+ +            xc = req->xc;
+ +
+ +            if (req->flags & UNCACHEABLE) {
+ +                // Don't update result register (see stq_c in isa_desc)
+ +                req->result = 2;
+ +                xc->setStCondFailures(0);//Needed? [RGD]
+ +            } else {
+ +                bool lock_flag = xc->readMiscReg(TheISA::Lock_Flag_DepTag);
+ +                Addr lock_addr = xc->readMiscReg(TheISA::Lock_Addr_DepTag);
+ +                req->result = lock_flag;
+ +                if (!lock_flag ||
+ +                    ((lock_addr & ~0xf) != (req->paddr & ~0xf))) {
+ +                    xc->setMiscReg(TheISA::Lock_Flag_DepTag, false);
+ +                    xc->setStCondFailures(xc->readStCondFailures() + 1);
+ +                    if (((xc->readStCondFailures()) % 100000) == 0) {
+ +                        std::cerr << "Warning: "
+ +                                  << xc->readStCondFailures()
+ +                                  << " consecutive store conditional failures "
+ +                                  << "on cpu " << req->xc->readCpuId()
+ +                                  << std::endl;
+ +                    }
+ +                    return NoFault;
+ +                }
+ +                else xc->setStCondFailures(0);
+ +            }
+ +        }
+ +
+ +        // Need to clear any locked flags on other proccessors for
+ +        // this address.  Only do this for succsful Store Conditionals
+ +        // and all other stores (WH64?).  Unsuccessful Store
+ +        // Conditionals would have returned above, and wouldn't fall
+ +        // through.
+ +        for (int i = 0; i < this->system->threadContexts.size(); i++){
+ +            xc = this->system->threadContexts[i];
+ +            if ((xc->readMiscReg(TheISA::Lock_Addr_DepTag) & ~0xf) ==
+ +                (req->paddr & ~0xf)) {
+ +                xc->setMiscReg(TheISA::Lock_Flag_DepTag, false);
+ +            }
+ +        }
+ +
+ +#endif
+ +
+ +        if (req->flags & LOCKED) {
+ +            if (req->flags & UNCACHEABLE) {
+ +                req->result = 2;
+ +            } else {
+ +                if (this->lockFlag) {
+ +                    if (lockAddrList.find(req->paddr) !=
+ +                        lockAddrList.end()) {
+ +                        req->result = 1;
+ +                    } else {
+ +                        req->result = 0;
+ +                        return NoFault;
+ +                    }
+ +                } else {
+ +                    req->result = 0;
+ +                    return NoFault;
+ +                }
+ +            }
+ +        }
+ +#endif
+ +
+ +        return this->mem->write(req, (T)htog(data));
+ +    }
+ +
+ +    /** CPU write function, forwards write to LSQ. */
+ +    template <class T>
+ +    Fault write(Request *req, T &data, int store_idx)
+ +    {
+ +        return backEnd->write(req, data, store_idx);
+ +    }
+ +
+ +    void prefetch(Addr addr, unsigned flags)
+ +    {
+ +        // need to do this...
+ +    }
+ +
+ +    void writeHint(Addr addr, int size, unsigned flags)
+ +    {
+ +        // need to do this...
+ +    }
+ +
+ +    Fault copySrcTranslate(Addr src);
+ +
+ +    Fault copy(Addr dest);
+ +
-     // @todo: This can be a useful debug function.  Implement it.
+ +  public:
+ +    void squashFromTC();
+ +
-     bool decoupledFrontEnd;
+ +    void dumpInsts() { frontEnd->dumpInsts(); }
+ +
+ +#if FULL_SYSTEM
+ +    Fault hwrei();
+ +    int readIntrFlag() { return thread.intrflag; }
+ +    void setIntrFlag(int val) { thread.intrflag = val; }
+ +    bool inPalMode() { return AlphaISA::PcPAL(thread.PC); }
+ +    bool inPalMode(Addr pc) { return AlphaISA::PcPAL(pc); }
+ +    bool simPalCheck(int palFunc);
+ +    void processInterrupts();
+ +#else
+ +    void syscall(uint64_t &callnum);
+ +    void setSyscallReturn(SyscallReturn return_value, int tid);
+ +#endif
+ +
+ +    ThreadContext *tcBase() { return tc; }
+ +
+ +    struct CommStruct {
+ +        InstSeqNum doneSeqNum;
+ +        InstSeqNum nonSpecSeqNum;
+ +        bool uncached;
+ +        unsigned lqIdx;
+ +
+ +        bool stall;
+ +    };
++
++    InstSeqNum globalSeqNum;
++
+ +    TimeBuffer<CommStruct> comm;
+ +
++    bool decoupledFrontEnd;
++
+ +    bool lockFlag;
+ +
+ +    Stats::Scalar<> quiesceCycles;
+ +
+ +    Checker<DynInstPtr> *checker;
+ +};
+ +
+ +#endif // __CPU_OZONE_CPU_HH__
diff --cc src/cpu/ozone/cpu_builder.cc

index e239b7a94b7496484ff79a4a904895974bd590fe,0000000000000000000000000000000000000000..e3e4ec43326b0f87f0973e2d35e48794eb8409c0

mode 100644,000000..100644
--- 1/src/cpu/ozone/cpu_builder.cc
--- /dev/null
+++ b/src/cpu/ozone/cpu_builder.cc
@@@ -1,451 -1,0 +1,468 @@@
- Param<unsigned> backEndLatency;
+ +/*
+ + * Copyright (c) 2006 The Regents of The University of Michigan
+ + * All rights reserved.
+ + *
+ + * Redistribution and use in source and binary forms, with or without
+ + * modification, are permitted provided that the following conditions are
+ + * met: redistributions of source code must retain the above copyright
+ + * notice, this list of conditions and the following disclaimer;
+ + * redistributions in binary form must reproduce the above copyright
+ + * notice, this list of conditions and the following disclaimer in the
+ + * documentation and/or other materials provided with the distribution;
+ + * neither the name of the copyright holders nor the names of its
+ + * contributors may be used to endorse or promote products derived from
+ + * this software without specific prior written permission.
+ + *
+ + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ + *
+ + * Authors: Kevin Lim
+ + */
+ +
+ +#include <string>
+ +
+ +#include "cpu/checker/cpu.hh"
+ +#include "cpu/inst_seq.hh"
+ +#include "cpu/ozone/cpu.hh"
+ +#include "cpu/ozone/ozone_impl.hh"
+ +#include "cpu/ozone/simple_params.hh"
+ +#include "sim/builder.hh"
+ +#include "sim/process.hh"
+ +#include "sim/sim_object.hh"
+ +
+ +class DerivOzoneCPU : public OzoneCPU<OzoneImpl>
+ +{
+ +  public:
+ +    DerivOzoneCPU(SimpleParams *p)
+ +        : OzoneCPU<OzoneImpl>(p)
+ +    { }
+ +};
+ +
+ +
+ +////////////////////////////////////////////////////////////////////////
+ +//
+ +//  OzoneCPU Simulation Object
+ +//
+ +
+ +BEGIN_DECLARE_SIM_OBJECT_PARAMS(DerivOzoneCPU)
+ +
+ +    Param<int> clock;
+ +    Param<int> numThreads;
+ +
+ +#if FULL_SYSTEM
+ +SimObjectParam<System *> system;
+ +Param<int> cpu_id;
+ +SimObjectParam<AlphaITB *> itb;
+ +SimObjectParam<AlphaDTB *> dtb;
++Param<Tick> profile;
+ +#else
+ +SimObjectVectorParam<Process *> workload;
+ +//SimObjectParam<PageTable *> page_table;
+ +#endif // FULL_SYSTEM
+ +
+ +SimObjectParam<MemObject *> mem;
+ +
+ +SimObjectParam<BaseCPU *> checker;
+ +
+ +Param<Counter> max_insts_any_thread;
+ +Param<Counter> max_insts_all_threads;
+ +Param<Counter> max_loads_any_thread;
+ +Param<Counter> max_loads_all_threads;
++Param<Counter> stats_reset_inst;
++Param<Tick> progress_interval;
+ +
+ +//SimObjectParam<BaseCache *> icache;
+ +//SimObjectParam<BaseCache *> dcache;
+ +
+ +Param<unsigned> cachePorts;
+ +Param<unsigned> width;
++Param<unsigned> frontEndLatency;
+ +Param<unsigned> frontEndWidth;
++Param<unsigned> backEndLatency;
+ +Param<unsigned> backEndWidth;
+ +Param<unsigned> backEndSquashLatency;
-     INIT_PARAM_DFLT(backEndLatency, "Back end latency", 1),
+ +Param<unsigned> maxInstBufferSize;
+ +Param<unsigned> numPhysicalRegs;
+ +Param<unsigned> maxOutstandingMemOps;
+ +
+ +Param<unsigned> decodeToFetchDelay;
+ +Param<unsigned> renameToFetchDelay;
+ +Param<unsigned> iewToFetchDelay;
+ +Param<unsigned> commitToFetchDelay;
+ +Param<unsigned> fetchWidth;
+ +
+ +Param<unsigned> renameToDecodeDelay;
+ +Param<unsigned> iewToDecodeDelay;
+ +Param<unsigned> commitToDecodeDelay;
+ +Param<unsigned> fetchToDecodeDelay;
+ +Param<unsigned> decodeWidth;
+ +
+ +Param<unsigned> iewToRenameDelay;
+ +Param<unsigned> commitToRenameDelay;
+ +Param<unsigned> decodeToRenameDelay;
+ +Param<unsigned> renameWidth;
+ +
+ +Param<unsigned> commitToIEWDelay;
+ +Param<unsigned> renameToIEWDelay;
+ +Param<unsigned> issueToExecuteDelay;
+ +Param<unsigned> issueWidth;
+ +Param<unsigned> executeWidth;
+ +Param<unsigned> executeIntWidth;
+ +Param<unsigned> executeFloatWidth;
+ +Param<unsigned> executeBranchWidth;
+ +Param<unsigned> executeMemoryWidth;
+ +
+ +Param<unsigned> iewToCommitDelay;
+ +Param<unsigned> renameToROBDelay;
+ +Param<unsigned> commitWidth;
+ +Param<unsigned> squashWidth;
+ +
+ +Param<std::string> predType;
+ +Param<unsigned> localPredictorSize;
+ +Param<unsigned> localCtrBits;
+ +Param<unsigned> localHistoryTableSize;
+ +Param<unsigned> localHistoryBits;
+ +Param<unsigned> globalPredictorSize;
+ +Param<unsigned> globalCtrBits;
+ +Param<unsigned> globalHistoryBits;
+ +Param<unsigned> choicePredictorSize;
+ +Param<unsigned> choiceCtrBits;
+ +
+ +Param<unsigned> BTBEntries;
+ +Param<unsigned> BTBTagSize;
+ +
+ +Param<unsigned> RASSize;
+ +
+ +Param<unsigned> LQEntries;
+ +Param<unsigned> SQEntries;
++Param<bool> lsqLimits;
+ +Param<unsigned> LFSTSize;
+ +Param<unsigned> SSITSize;
+ +
+ +Param<unsigned> numPhysIntRegs;
+ +Param<unsigned> numPhysFloatRegs;
+ +Param<unsigned> numIQEntries;
+ +Param<unsigned> numROBEntries;
+ +
+ +Param<bool> decoupledFrontEnd;
+ +Param<int> dispatchWidth;
+ +Param<int> wbWidth;
+ +
+ +Param<unsigned> smtNumFetchingThreads;
+ +Param<std::string>   smtFetchPolicy;
+ +Param<std::string>   smtLSQPolicy;
+ +Param<unsigned> smtLSQThreshold;
+ +Param<std::string>   smtIQPolicy;
+ +Param<unsigned> smtIQThreshold;
+ +Param<std::string>   smtROBPolicy;
+ +Param<unsigned> smtROBThreshold;
+ +Param<std::string>   smtCommitPolicy;
+ +
+ +Param<unsigned> instShiftAmt;
+ +
+ +Param<bool> defer_registration;
+ +
+ +Param<bool> function_trace;
+ +Param<Tick> function_trace_start;
+ +
+ +END_DECLARE_SIM_OBJECT_PARAMS(DerivOzoneCPU)
+ +
+ +BEGIN_INIT_SIM_OBJECT_PARAMS(DerivOzoneCPU)
+ +
+ +    INIT_PARAM(clock, "clock speed"),
+ +    INIT_PARAM(numThreads, "number of HW thread contexts"),
+ +
+ +#if FULL_SYSTEM
+ +    INIT_PARAM(system, "System object"),
+ +    INIT_PARAM(cpu_id, "processor ID"),
+ +    INIT_PARAM(itb, "Instruction translation buffer"),
+ +    INIT_PARAM(dtb, "Data translation buffer"),
++    INIT_PARAM(profile, ""),
+ +#else
+ +    INIT_PARAM(workload, "Processes to run"),
+ +//    INIT_PARAM(page_table, "Page table"),
+ +#endif // FULL_SYSTEM
+ +
+ +    INIT_PARAM_DFLT(mem, "Memory", NULL),
+ +
+ +    INIT_PARAM_DFLT(checker, "Checker CPU", NULL),
+ +
+ +    INIT_PARAM_DFLT(max_insts_any_thread,
+ +                    "Terminate when any thread reaches this inst count",
+ +                    0),
+ +    INIT_PARAM_DFLT(max_insts_all_threads,
+ +                    "Terminate when all threads have reached"
+ +                    "this inst count",
+ +                    0),
+ +    INIT_PARAM_DFLT(max_loads_any_thread,
+ +                    "Terminate when any thread reaches this load count",
+ +                    0),
+ +    INIT_PARAM_DFLT(max_loads_all_threads,
+ +                    "Terminate when all threads have reached this load"
+ +                    "count",
+ +                    0),
++    INIT_PARAM_DFLT(stats_reset_inst,
++                    "blah",
++                    0),
++    INIT_PARAM_DFLT(progress_interval, "Progress interval", 0),
+ +
+ +//    INIT_PARAM_DFLT(icache, "L1 instruction cache", NULL),
+ +//    INIT_PARAM_DFLT(dcache, "L1 data cache", NULL),
+ +
+ +    INIT_PARAM_DFLT(cachePorts, "Cache Ports", 200),
+ +    INIT_PARAM_DFLT(width, "Width", 1),
++    INIT_PARAM_DFLT(frontEndLatency, "Front end latency", 1),
+ +    INIT_PARAM_DFLT(frontEndWidth, "Front end width", 1),
++    INIT_PARAM_DFLT(backEndLatency, "Back end latency", 1),
+ +    INIT_PARAM_DFLT(backEndWidth, "Back end width", 1),
+ +    INIT_PARAM_DFLT(backEndSquashLatency, "Back end squash latency", 1),
+ +    INIT_PARAM_DFLT(maxInstBufferSize, "Maximum instruction buffer size", 16),
+ +    INIT_PARAM(numPhysicalRegs, "Number of physical registers"),
+ +    INIT_PARAM_DFLT(maxOutstandingMemOps, "Maximum outstanding memory operations", 4),
+ +
+ +    INIT_PARAM(decodeToFetchDelay, "Decode to fetch delay"),
+ +    INIT_PARAM(renameToFetchDelay, "Rename to fetch delay"),
+ +    INIT_PARAM(iewToFetchDelay, "Issue/Execute/Writeback to fetch"
+ +               "delay"),
+ +    INIT_PARAM(commitToFetchDelay, "Commit to fetch delay"),
+ +    INIT_PARAM(fetchWidth, "Fetch width"),
+ +    INIT_PARAM(renameToDecodeDelay, "Rename to decode delay"),
+ +    INIT_PARAM(iewToDecodeDelay, "Issue/Execute/Writeback to decode"
+ +               "delay"),
+ +    INIT_PARAM(commitToDecodeDelay, "Commit to decode delay"),
+ +    INIT_PARAM(fetchToDecodeDelay, "Fetch to decode delay"),
+ +    INIT_PARAM(decodeWidth, "Decode width"),
+ +
+ +    INIT_PARAM(iewToRenameDelay, "Issue/Execute/Writeback to rename"
+ +               "delay"),
+ +    INIT_PARAM(commitToRenameDelay, "Commit to rename delay"),
+ +    INIT_PARAM(decodeToRenameDelay, "Decode to rename delay"),
+ +    INIT_PARAM(renameWidth, "Rename width"),
+ +
+ +    INIT_PARAM(commitToIEWDelay, "Commit to "
+ +               "Issue/Execute/Writeback delay"),
+ +    INIT_PARAM(renameToIEWDelay, "Rename to "
+ +               "Issue/Execute/Writeback delay"),
+ +    INIT_PARAM(issueToExecuteDelay, "Issue to execute delay (internal"
+ +               "to the IEW stage)"),
+ +    INIT_PARAM(issueWidth, "Issue width"),
+ +    INIT_PARAM(executeWidth, "Execute width"),
+ +    INIT_PARAM(executeIntWidth, "Integer execute width"),
+ +    INIT_PARAM(executeFloatWidth, "Floating point execute width"),
+ +    INIT_PARAM(executeBranchWidth, "Branch execute width"),
+ +    INIT_PARAM(executeMemoryWidth, "Memory execute width"),
+ +
+ +    INIT_PARAM(iewToCommitDelay, "Issue/Execute/Writeback to commit "
+ +               "delay"),
+ +    INIT_PARAM(renameToROBDelay, "Rename to reorder buffer delay"),
+ +    INIT_PARAM(commitWidth, "Commit width"),
+ +    INIT_PARAM(squashWidth, "Squash width"),
+ +
+ +    INIT_PARAM(predType, "Type of branch predictor ('local', 'tournament')"),
+ +    INIT_PARAM(localPredictorSize, "Size of local predictor"),
+ +    INIT_PARAM(localCtrBits, "Bits per counter"),
+ +    INIT_PARAM(localHistoryTableSize, "Size of local history table"),
+ +    INIT_PARAM(localHistoryBits, "Bits for the local history"),
+ +    INIT_PARAM(globalPredictorSize, "Size of global predictor"),
+ +    INIT_PARAM(globalCtrBits, "Bits per counter"),
+ +    INIT_PARAM(globalHistoryBits, "Bits of history"),
+ +    INIT_PARAM(choicePredictorSize, "Size of choice predictor"),
+ +    INIT_PARAM(choiceCtrBits, "Bits of choice counters"),
+ +
+ +    INIT_PARAM(BTBEntries, "Number of BTB entries"),
+ +    INIT_PARAM(BTBTagSize, "Size of the BTB tags, in bits"),
+ +
+ +    INIT_PARAM(RASSize, "RAS size"),
+ +
+ +    INIT_PARAM(LQEntries, "Number of load queue entries"),
+ +    INIT_PARAM(SQEntries, "Number of store queue entries"),
++    INIT_PARAM_DFLT(lsqLimits, "LSQ size limits dispatch", true),
+ +    INIT_PARAM(LFSTSize, "Last fetched store table size"),
+ +    INIT_PARAM(SSITSize, "Store set ID table size"),
+ +
+ +    INIT_PARAM(numPhysIntRegs, "Number of physical integer registers"),
+ +    INIT_PARAM(numPhysFloatRegs, "Number of physical floating point "
+ +               "registers"),
+ +    INIT_PARAM(numIQEntries, "Number of instruction queue entries"),
+ +    INIT_PARAM(numROBEntries, "Number of reorder buffer entries"),
+ +
+ +    INIT_PARAM_DFLT(decoupledFrontEnd, "Decoupled front end", true),
+ +    INIT_PARAM_DFLT(dispatchWidth, "Dispatch width", 0),
+ +    INIT_PARAM_DFLT(wbWidth, "Writeback width", 0),
+ +
+ +    INIT_PARAM_DFLT(smtNumFetchingThreads, "SMT Number of Fetching Threads", 1),
+ +    INIT_PARAM_DFLT(smtFetchPolicy, "SMT Fetch Policy", "SingleThread"),
+ +    INIT_PARAM_DFLT(smtLSQPolicy,   "SMT LSQ Sharing Policy",    "Partitioned"),
+ +    INIT_PARAM_DFLT(smtLSQThreshold,"SMT LSQ Threshold", 100),
+ +    INIT_PARAM_DFLT(smtIQPolicy,    "SMT IQ Policy",    "Partitioned"),
+ +    INIT_PARAM_DFLT(smtIQThreshold, "SMT IQ Threshold", 100),
+ +    INIT_PARAM_DFLT(smtROBPolicy,   "SMT ROB Sharing Policy", "Partitioned"),
+ +    INIT_PARAM_DFLT(smtROBThreshold,"SMT ROB Threshold", 100),
+ +    INIT_PARAM_DFLT(smtCommitPolicy,"SMT Commit Fetch Policy", "RoundRobin"),
+ +
+ +    INIT_PARAM(instShiftAmt, "Number of bits to shift instructions by"),
+ +    INIT_PARAM(defer_registration, "defer system registration (for sampling)"),
+ +
+ +    INIT_PARAM(function_trace, "Enable function trace"),
+ +    INIT_PARAM(function_trace_start, "Cycle to start function trace")
+ +
+ +END_INIT_SIM_OBJECT_PARAMS(DerivOzoneCPU)
+ +
+ +CREATE_SIM_OBJECT(DerivOzoneCPU)
+ +{
+ +    DerivOzoneCPU *cpu;
+ +
+ +#if FULL_SYSTEM
+ +    // Full-system only supports a single thread for the moment.
+ +    int actual_num_threads = 1;
+ +#else
+ +    // In non-full-system mode, we infer the number of threads from
+ +    // the workload if it's not explicitly specified.
+ +    int actual_num_threads =
+ +        numThreads.isValid() ? numThreads : workload.size();
+ +
+ +    if (workload.size() == 0) {
+ +        fatal("Must specify at least one workload!");
+ +    }
+ +
+ +#endif
+ +
+ +    SimpleParams *params = new SimpleParams;
+ +
+ +    params->clock = clock;
+ +
+ +    params->name = getInstanceName();
+ +    params->numberOfThreads = actual_num_threads;
+ +
+ +#if FULL_SYSTEM
+ +    params->system = system;
+ +    params->cpu_id = cpu_id;
+ +    params->itb = itb;
+ +    params->dtb = dtb;
++    params->profile = profile;
+ +#else
+ +    params->workload = workload;
+ +//    params->pTable = page_table;
+ +#endif // FULL_SYSTEM
+ +
+ +    params->mem = mem;
+ +    params->checker = checker;
+ +    params->max_insts_any_thread = max_insts_any_thread;
+ +    params->max_insts_all_threads = max_insts_all_threads;
+ +    params->max_loads_any_thread = max_loads_any_thread;
+ +    params->max_loads_all_threads = max_loads_all_threads;
++    params->stats_reset_inst = stats_reset_inst;
++    params->progress_interval = progress_interval;
+ +
+ +    //
+ +    // Caches
+ +    //
+ +//    params->icacheInterface = icache ? icache->getInterface() : NULL;
+ +//    params->dcacheInterface = dcache ? dcache->getInterface() : NULL;
+ +    params->cachePorts = cachePorts;
+ +
+ +    params->width = width;
+ +    params->frontEndWidth = frontEndWidth;
++    params->frontEndLatency = frontEndLatency;
+ +    params->backEndWidth = backEndWidth;
+ +    params->backEndSquashLatency = backEndSquashLatency;
+ +    params->backEndLatency = backEndLatency;
+ +    params->maxInstBufferSize = maxInstBufferSize;
+ +    params->numPhysicalRegs = numPhysIntRegs + numPhysFloatRegs;
+ +    params->maxOutstandingMemOps = maxOutstandingMemOps;
+ +
+ +    params->decodeToFetchDelay = decodeToFetchDelay;
+ +    params->renameToFetchDelay = renameToFetchDelay;
+ +    params->iewToFetchDelay = iewToFetchDelay;
+ +    params->commitToFetchDelay = commitToFetchDelay;
+ +    params->fetchWidth = fetchWidth;
+ +
+ +    params->renameToDecodeDelay = renameToDecodeDelay;
+ +    params->iewToDecodeDelay = iewToDecodeDelay;
+ +    params->commitToDecodeDelay = commitToDecodeDelay;
+ +    params->fetchToDecodeDelay = fetchToDecodeDelay;
+ +    params->decodeWidth = decodeWidth;
+ +
+ +    params->iewToRenameDelay = iewToRenameDelay;
+ +    params->commitToRenameDelay = commitToRenameDelay;
+ +    params->decodeToRenameDelay = decodeToRenameDelay;
+ +    params->renameWidth = renameWidth;
+ +
+ +    params->commitToIEWDelay = commitToIEWDelay;
+ +    params->renameToIEWDelay = renameToIEWDelay;
+ +    params->issueToExecuteDelay = issueToExecuteDelay;
+ +    params->issueWidth = issueWidth;
+ +    params->executeWidth = executeWidth;
+ +    params->executeIntWidth = executeIntWidth;
+ +    params->executeFloatWidth = executeFloatWidth;
+ +    params->executeBranchWidth = executeBranchWidth;
+ +    params->executeMemoryWidth = executeMemoryWidth;
+ +
+ +    params->iewToCommitDelay = iewToCommitDelay;
+ +    params->renameToROBDelay = renameToROBDelay;
+ +    params->commitWidth = commitWidth;
+ +    params->squashWidth = squashWidth;
+ +
+ +    params->predType = predType;
+ +    params->localPredictorSize = localPredictorSize;
+ +    params->localCtrBits = localCtrBits;
+ +    params->localHistoryTableSize = localHistoryTableSize;
+ +    params->localHistoryBits = localHistoryBits;
+ +    params->globalPredictorSize = globalPredictorSize;
+ +    params->globalCtrBits = globalCtrBits;
+ +    params->globalHistoryBits = globalHistoryBits;
+ +    params->choicePredictorSize = choicePredictorSize;
+ +    params->choiceCtrBits = choiceCtrBits;
+ +
+ +    params->BTBEntries = BTBEntries;
+ +    params->BTBTagSize = BTBTagSize;
+ +
+ +    params->RASSize = RASSize;
+ +
+ +    params->LQEntries = LQEntries;
+ +    params->SQEntries = SQEntries;
++    params->lsqLimits = lsqLimits;
+ +
+ +    params->SSITSize = SSITSize;
+ +    params->LFSTSize = LFSTSize;
+ +
+ +    params->numPhysIntRegs = numPhysIntRegs;
+ +    params->numPhysFloatRegs = numPhysFloatRegs;
+ +    params->numIQEntries = numIQEntries;
+ +    params->numROBEntries = numROBEntries;
+ +
+ +    params->decoupledFrontEnd = decoupledFrontEnd;
+ +    params->dispatchWidth = dispatchWidth;
+ +    params->wbWidth = wbWidth;
+ +
+ +    params->smtNumFetchingThreads = smtNumFetchingThreads;
+ +    params->smtFetchPolicy = smtFetchPolicy;
+ +    params->smtIQPolicy    = smtIQPolicy;
+ +    params->smtLSQPolicy    = smtLSQPolicy;
+ +    params->smtLSQThreshold = smtLSQThreshold;
+ +    params->smtROBPolicy   = smtROBPolicy;
+ +    params->smtROBThreshold = smtROBThreshold;
+ +    params->smtCommitPolicy = smtCommitPolicy;
+ +
+ +    params->instShiftAmt = 2;
+ +
+ +    params->deferRegistration = defer_registration;
+ +
+ +    params->functionTrace = function_trace;
+ +    params->functionTraceStart = function_trace_start;
+ +
+ +    cpu = new DerivOzoneCPU(params);
+ +
+ +    return cpu;
+ +}
+ +
+ +REGISTER_SIM_OBJECT("DerivOzoneCPU", DerivOzoneCPU)
diff --cc src/cpu/ozone/cpu_impl.hh

index 80f18434c3a767111562378c87e292505b56792d,0000000000000000000000000000000000000000..5c8b5001de3e9c85dcd8cfab6dc2283ca5a96a49

mode 100644,000000..100644
--- 1/src/cpu/ozone/cpu_impl.hh
--- /dev/null
+++ b/src/cpu/ozone/cpu_impl.hh
@@@ -1,1136 -1,0 +1,1188 @@@
- //#include "base/remote_gdb.hh"
+ +/*
+ + * Copyright (c) 2006 The Regents of The University of Michigan
+ + * All rights reserved.
+ + *
+ + * Redistribution and use in source and binary forms, with or without
+ + * modification, are permitted provided that the following conditions are
+ + * met: redistributions of source code must retain the above copyright
+ + * notice, this list of conditions and the following disclaimer;
+ + * redistributions in binary form must reproduce the above copyright
+ + * notice, this list of conditions and the following disclaimer in the
+ + * documentation and/or other materials provided with the distribution;
+ + * neither the name of the copyright holders nor the names of its
+ + * contributors may be used to endorse or promote products derived from
+ + * this software without specific prior written permission.
+ + *
+ + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ + *
+ + * Authors: Kevin Lim
+ + *          Nathan Binkert
+ + */
+ +
+ +#include "config/full_system.hh"
+ +#include "config/use_checker.hh"
+ +
+ +#include "arch/isa_traits.hh" // For MachInst
+ +#include "base/trace.hh"
+ +#include "cpu/base.hh"
+ +#include "cpu/thread_context.hh"
+ +#include "cpu/exetrace.hh"
+ +#include "cpu/ozone/cpu.hh"
+ +#include "cpu/quiesce_event.hh"
+ +#include "cpu/static_inst.hh"
+ +#include "sim/sim_object.hh"
+ +#include "sim/stats.hh"
+ +
+ +#if FULL_SYSTEM
+ +#include "arch/faults.hh"
+ +#include "arch/alpha/osfpal.hh"
+ +#include "arch/alpha/tlb.hh"
+ +#include "arch/alpha/types.hh"
+ +#include "arch/vtophys.hh"
+ +#include "base/callback.hh"
- template <class Impl>
- template<typename T>
- void
- OzoneCPU<Impl>::trace_data(T data) {
-     if (traceData) {
-         traceData->setData(data);
-     }
- }
- 
+ +#include "cpu/profile.hh"
+ +#include "kern/kernel_stats.hh"
+ +#include "sim/faults.hh"
+ +#include "sim/sim_events.hh"
+ +#include "sim/sim_exit.hh"
+ +#include "sim/system.hh"
+ +#else // !FULL_SYSTEM
+ +#include "sim/process.hh"
+ +#endif // FULL_SYSTEM
+ +
+ +#if USE_CHECKER
+ +#include "cpu/checker/thread_context.hh"
+ +#endif
+ +
+ +using namespace TheISA;
+ +
- #if USE_CHECKER
+ +template <class Impl>
+ +OzoneCPU<Impl>::TickEvent::TickEvent(OzoneCPU *c, int w)
+ +    : Event(&mainEventQueue, CPU_Tick_Pri), cpu(c), width(w)
+ +{
+ +}
+ +
+ +template <class Impl>
+ +void
+ +OzoneCPU<Impl>::TickEvent::process()
+ +{
+ +    cpu->tick();
+ +}
+ +
+ +template <class Impl>
+ +const char *
+ +OzoneCPU<Impl>::TickEvent::description()
+ +{
+ +    return "OzoneCPU tick event";
+ +}
+ +
+ +template <class Impl>
+ +OzoneCPU<Impl>::OzoneCPU(Params *p)
+ +#if FULL_SYSTEM
+ +    : BaseCPU(p), thread(this, 0), tickEvent(this, p->width),
+ +#else
+ +    : BaseCPU(p), thread(this, 0, p->workload[0], 0, p->mem),
+ +      tickEvent(this, p->width),
+ +#endif
+ +      mem(p->mem), comm(5, 5)
+ +{
+ +    frontEnd = new FrontEnd(p);
+ +    backEnd = new BackEnd(p);
+ +
+ +    _status = Idle;
+ +
+ +    if (p->checker) {
-     /***** All thread state stuff *****/
++
+ +        BaseCPU *temp_checker = p->checker;
+ +        checker = dynamic_cast<Checker<DynInstPtr> *>(temp_checker);
+ +        checker->setMemory(mem);
+ +#if FULL_SYSTEM
+ +        checker->setSystem(p->system);
+ +#endif
+ +        checkerTC = new CheckerThreadContext<OzoneTC>(&ozoneTC, checker);
+ +        thread.tc = checkerTC;
+ +        tc = checkerTC;
+ +#else
+ +        panic("Checker enabled but not compiled in!");
+ +#endif
+ +    } else {
++        // If checker is not being used, then the xcProxy points
++        // directly to the CPU's ExecContext.
+ +        checker = NULL;
+ +        thread.tc = &ozoneTC;
+ +        tc = &ozoneTC;
+ +    }
+ +
+ +    ozoneTC.cpu = this;
+ +    ozoneTC.thread = &thread;
+ +
+ +    thread.inSyscall = false;
+ +
+ +    thread.setStatus(ThreadContext::Suspended);
+ +#if FULL_SYSTEM
-     decoupledFrontEnd = p->decoupledFrontEnd;
- 
++    // Setup thread state stuff.
+ +    thread.cpu = this;
+ +    thread.setTid(0);
+ +
+ +    thread.quiesceEvent = new EndQuiesceEvent(tc);
+ +
+ +    system = p->system;
+ +    itb = p->itb;
+ +    dtb = p->dtb;
+ +    physmem = p->system->physmem;
+ +
+ +    if (p->profile) {
+ +        thread.profile = new FunctionProfile(p->system->kernelSymtab);
+ +        // @todo: This might be better as an ThreadContext instead of OzoneTC
+ +        Callback *cb =
+ +            new MakeCallback<OzoneTC,
+ +            &OzoneTC::dumpFuncProfile>(&ozoneTC);
+ +        registerExitCallback(cb);
+ +    }
+ +
+ +    // let's fill with a dummy node for now so we don't get a segfault
+ +    // on the first cycle when there's no node available.
+ +    static ProfileNode dummyNode;
+ +    thread.profileNode = &dummyNode;
+ +    thread.profilePC = 3;
+ +#else
+ +    thread.cpu = this;
+ +#endif // !FULL_SYSTEM
+ +
+ +    numInst = 0;
+ +    startNumInst = 0;
+ +
+ +    threadContexts.push_back(tc);
+ +
+ +    frontEnd->setCPU(this);
+ +    backEnd->setCPU(this);
+ +
+ +    frontEnd->setTC(tc);
+ +    backEnd->setTC(tc);
+ +
+ +    frontEnd->setThreadState(&thread);
+ +    backEnd->setThreadState(&thread);
+ +
+ +    frontEnd->setCommBuffer(&comm);
+ +    backEnd->setCommBuffer(&comm);
+ +
+ +    frontEnd->setBackEnd(backEnd);
+ +    backEnd->setFrontEnd(frontEnd);
+ +
-     lockFlag = 0;
- 
+ +    globalSeqNum = 1;
+ +
++#if FULL_SYSTEM
+ +    checkInterrupts = false;
++#endif
++
++    lockFlag = 0;
+ +
++    // Setup rename table, initializing all values to ready.
+ +    for (int i = 0; i < TheISA::TotalNumRegs; ++i) {
+ +        thread.renameTable[i] = new DynInst(this);
+ +        thread.renameTable[i]->setResultReady();
+ +    }
+ +
+ +    frontEnd->renameTable.copyFrom(thread.renameTable);
+ +    backEnd->renameTable.copyFrom(thread.renameTable);
+ +
+ +#if !FULL_SYSTEM
+ +    /* Use this port to for syscall emulation writes to memory. */
+ +    Port *mem_port;
+ +    TranslatingPort *trans_port;
+ +    trans_port = new TranslatingPort(csprintf("%s-%d-funcport",
+ +                                              name(), 0),
+ +                                     p->workload[0]->pTable,
+ +                                     false);
+ +    mem_port = p->mem->getPort("functional");
+ +    mem_port->setPeer(trans_port);
+ +    trans_port->setPeer(mem_port);
+ +    thread.setMemPort(trans_port);
+ +#else
+ +    Port *mem_port;
+ +    FunctionalPort *phys_port;
+ +    VirtualPort *virt_port;
+ +    phys_port = new FunctionalPort(csprintf("%s-%d-funcport",
+ +                                            name(), 0));
+ +    mem_port = system->physmem->getPort("functional");
+ +    mem_port->setPeer(phys_port);
+ +    phys_port->setPeer(mem_port);
+ +
+ +    virt_port = new VirtualPort(csprintf("%s-%d-vport",
+ +                                         name(), 0));
+ +    mem_port = system->physmem->getPort("functional");
+ +    mem_port->setPeer(virt_port);
+ +    virt_port->setPeer(mem_port);
+ +
+ +    thread.setPhysPort(phys_port);
+ +    thread.setVirtPort(virt_port);
+ +#endif
+ +
-     for (int i = 0; i < 6; ++i) {
+ +    DPRINTF(OzoneCPU, "OzoneCPU: Created Ozone cpu object.\n");
+ +}
+ +
+ +template <class Impl>
+ +OzoneCPU<Impl>::~OzoneCPU()
+ +{
+ +}
+ +
+ +template <class Impl>
+ +void
+ +OzoneCPU<Impl>::switchOut()
+ +{
++    BaseCPU::switchOut(_sampler);
+ +    switchCount = 0;
+ +    // Front end needs state from back end, so switch out the back end first.
+ +    backEnd->switchOut();
+ +    frontEnd->switchOut();
+ +}
+ +
+ +template <class Impl>
+ +void
+ +OzoneCPU<Impl>::signalSwitched()
+ +{
++    // Only complete the switchout when both the front end and back
++    // end have signalled they are ready to switch.
+ +    if (++switchCount == 2) {
+ +        backEnd->doSwitchOut();
+ +        frontEnd->doSwitchOut();
+ +#if USE_CHECKER
+ +        if (checker)
+ +            checker->switchOut();
+ +#endif
+ +
+ +        _status = SwitchedOut;
++#ifndef NDEBUG
++        // Loop through all registers
++        for (int i = 0; i < AlphaISA::TotalNumRegs; ++i) {
++            assert(thread.renameTable[i] == frontEnd->renameTable[i]);
++
++            assert(thread.renameTable[i] == backEnd->renameTable[i]);
++
++            DPRINTF(OzoneCPU, "Checking if register %i matches.\n", i);
++        }
++#endif
++
+ +        if (tickEvent.scheduled())
+ +            tickEvent.squash();
+ +    }
+ +    assert(switchCount <= 2);
+ +}
+ +
+ +template <class Impl>
+ +void
+ +OzoneCPU<Impl>::takeOverFrom(BaseCPU *oldCPU)
+ +{
+ +    BaseCPU::takeOverFrom(oldCPU);
+ +
++    thread.trapPending = false;
++    thread.inSyscall = false;
++
+ +    backEnd->takeOverFrom();
+ +    frontEnd->takeOverFrom();
++    frontEnd->renameTable.copyFrom(thread.renameTable);
++    backEnd->renameTable.copyFrom(thread.renameTable);
+ +    assert(!tickEvent.scheduled());
+ +
++#ifndef NDEBUG
++    // Check rename table.
++    for (int i = 0; i < TheISA::TotalNumRegs; ++i) {
++        assert(thread.renameTable[i]->isResultReady());
++    }
++#endif
++
+ +    // @todo: Fix hardcoded number
+ +    // Clear out any old information in time buffer.
-     startNumInst = numInst;
++    for (int i = 0; i < 15; ++i) {
+ +        comm.advance();
+ +    }
+ +
+ +    // if any of this CPU's ThreadContexts are active, mark the CPU as
+ +    // running and schedule its tick event.
+ +    for (int i = 0; i < threadContexts.size(); ++i) {
+ +        ThreadContext *tc = threadContexts[i];
+ +        if (tc->status() == ThreadContext::Active &&
+ +            _status != Running) {
+ +            _status = Running;
+ +            tickEvent.schedule(curTick);
+ +        }
+ +    }
+ +    // Nothing running, change status to reflect that we're no longer
+ +    // switched out.
+ +    if (_status == SwitchedOut) {
+ +        _status = Idle;
+ +    }
+ +}
+ +
+ +template <class Impl>
+ +void
+ +OzoneCPU<Impl>::activateContext(int thread_num, int delay)
+ +{
+ +    // Eventually change this in SMT.
+ +    assert(thread_num == 0);
+ +
+ +    assert(_status == Idle);
+ +    notIdleFraction++;
+ +    scheduleTickEvent(delay);
+ +    _status = Running;
++#if FULL_SYSTEM
++    if (thread.quiesceEvent && thread.quiesceEvent->scheduled())
++        thread.quiesceEvent->deschedule();
++#endif
+ +    thread.setStatus(ThreadContext::Active);
+ +    frontEnd->wakeFromQuiesce();
+ +}
+ +
+ +template <class Impl>
+ +void
+ +OzoneCPU<Impl>::suspendContext(int thread_num)
+ +{
+ +    // Eventually change this in SMT.
+ +    assert(thread_num == 0);
+ +    // @todo: Figure out how to initially set the status properly so
+ +    // this is running.
+ +//    assert(_status == Running);
+ +    notIdleFraction--;
+ +    unscheduleTickEvent();
+ +    _status = Idle;
+ +}
+ +
+ +template <class Impl>
+ +void
+ +OzoneCPU<Impl>::deallocateContext(int thread_num, int delay)
+ +{
+ +    // for now, these are equivalent
+ +    suspendContext(thread_num);
+ +}
+ +
+ +template <class Impl>
+ +void
+ +OzoneCPU<Impl>::haltContext(int thread_num)
+ +{
+ +    // for now, these are equivalent
+ +    suspendContext(thread_num);
+ +}
+ +
+ +template <class Impl>
+ +void
+ +OzoneCPU<Impl>::regStats()
+ +{
+ +    using namespace Stats;
+ +
+ +    BaseCPU::regStats();
+ +
+ +    thread.numInsts
+ +        .name(name() + ".num_insts")
+ +        .desc("Number of instructions executed")
+ +        ;
+ +
+ +    thread.numMemRefs
+ +        .name(name() + ".num_refs")
+ +        .desc("Number of memory references")
+ +        ;
+ +
+ +    notIdleFraction
+ +        .name(name() + ".not_idle_fraction")
+ +        .desc("Percentage of non-idle cycles")
+ +        ;
+ +
+ +    idleFraction
+ +        .name(name() + ".idle_fraction")
+ +        .desc("Percentage of idle cycles")
+ +        ;
+ +
+ +    quiesceCycles
+ +        .name(name() + ".quiesce_cycles")
+ +        .desc("Number of cycles spent in quiesce")
+ +        ;
+ +
+ +    idleFraction = constant(1.0) - notIdleFraction;
+ +
+ +    frontEnd->regStats();
+ +    backEnd->regStats();
+ +}
+ +
+ +template <class Impl>
+ +void
+ +OzoneCPU<Impl>::resetStats()
+ +{
- { }
++//    startNumInst = numInst;
+ +    notIdleFraction = (_status != Idle);
+ +}
+ +
+ +template <class Impl>
+ +void
+ +OzoneCPU<Impl>::init()
+ +{
+ +    BaseCPU::init();
+ +
+ +    // Mark this as in syscall so it won't need to squash
+ +    thread.inSyscall = true;
+ +#if FULL_SYSTEM
+ +    for (int i = 0; i < threadContexts.size(); ++i) {
+ +        ThreadContext *tc = threadContexts[i];
+ +
+ +        // initialize CPU, including PC
+ +        TheISA::initCPU(tc, tc->readCpuId());
+ +    }
+ +#endif
+ +    frontEnd->renameTable.copyFrom(thread.renameTable);
+ +    backEnd->renameTable.copyFrom(thread.renameTable);
+ +
+ +    thread.inSyscall = false;
+ +}
+ +
+ +template <class Impl>
+ +Port *
+ +OzoneCPU<Impl>::getPort(const std::string &if_name, int idx)
+ +{
+ +    if (if_name == "dcache_port")
+ +        return backEnd->getDcachePort();
+ +    else if (if_name == "icache_port")
+ +        return frontEnd->getIcachePort();
+ +    else
+ +        panic("No Such Port\n");
+ +}
+ +
+ +template <class Impl>
+ +void
+ +OzoneCPU<Impl>::serialize(std::ostream &os)
+ +{
+ +    BaseCPU::serialize(os);
+ +    SERIALIZE_ENUM(_status);
+ +    nameOut(os, csprintf("%s.tc", name()));
+ +    ozoneTC.serialize(os);
+ +    nameOut(os, csprintf("%s.tickEvent", name()));
+ +    tickEvent.serialize(os);
++
++    // Use SimpleThread's ability to checkpoint to make it easier to
++    // write out the registers.  Also make this static so it doesn't
++    // get instantiated multiple times (causes a panic in statistics).
++    static CPUExecContext temp;
++
++    nameOut(os, csprintf("%s.xc.0", name()));
++    temp.copyXC(thread.getXCProxy());
++    temp.serialize(os);
+ +}
+ +
+ +template <class Impl>
+ +void
+ +OzoneCPU<Impl>::unserialize(Checkpoint *cp, const std::string &section)
+ +{
+ +    BaseCPU::unserialize(cp, section);
+ +    UNSERIALIZE_ENUM(_status);
+ +    ozoneTC.unserialize(cp, csprintf("%s.tc", section));
+ +    tickEvent.unserialize(cp, csprintf("%s.tickEvent", section));
++
++    // Use SimpleThread's ability to checkpoint to make it easier to
++    // read in the registers.  Also make this static so it doesn't
++    // get instantiated multiple times (causes a panic in statistics).
++    static CPUExecContext temp;
++
++    temp.copyXC(thread.getXCProxy());
++    temp.unserialize(cp, csprintf("%s.xc.0", section));
++    thread.getXCProxy()->copyArchRegs(temp.getProxy());
+ +}
+ +
+ +template <class Impl>
+ +Fault
+ +OzoneCPU<Impl>::copySrcTranslate(Addr src)
+ +{
+ +    panic("Copy not implemented!\n");
+ +    return NoFault;
+ +#if 0
+ +    static bool no_warn = true;
+ +    int blk_size = (dcacheInterface) ? dcacheInterface->getBlockSize() : 64;
+ +    // Only support block sizes of 64 atm.
+ +    assert(blk_size == 64);
+ +    int offset = src & (blk_size - 1);
+ +
+ +    // Make sure block doesn't span page
+ +    if (no_warn &&
+ +        (src & TheISA::PageMask) != ((src + blk_size) & TheISA::PageMask) &&
+ +        (src >> 40) != 0xfffffc) {
+ +        warn("Copied block source spans pages %x.", src);
+ +        no_warn = false;
+ +    }
+ +
+ +    memReq->reset(src & ~(blk_size - 1), blk_size);
+ +
+ +    // translate to physical address
+ +    Fault fault = tc->translateDataReadReq(memReq);
+ +
+ +    assert(fault != Alignment_Fault);
+ +
+ +    if (fault == NoFault) {
+ +        tc->copySrcAddr = src;
+ +        tc->copySrcPhysAddr = memReq->paddr + offset;
+ +    } else {
+ +        tc->copySrcAddr = 0;
+ +        tc->copySrcPhysAddr = 0;
+ +    }
+ +    return fault;
+ +#endif
+ +}
+ +
+ +template <class Impl>
+ +Fault
+ +OzoneCPU<Impl>::copy(Addr dest)
+ +{
+ +    panic("Copy not implemented!\n");
+ +    return NoFault;
+ +#if 0
+ +    static bool no_warn = true;
+ +    int blk_size = (dcacheInterface) ? dcacheInterface->getBlockSize() : 64;
+ +    // Only support block sizes of 64 atm.
+ +    assert(blk_size == 64);
+ +    uint8_t data[blk_size];
+ +    //assert(tc->copySrcAddr);
+ +    int offset = dest & (blk_size - 1);
+ +
+ +    // Make sure block doesn't span page
+ +    if (no_warn &&
+ +        (dest & TheISA::PageMask) != ((dest + blk_size) & TheISA::PageMask) &&
+ +        (dest >> 40) != 0xfffffc) {
+ +        no_warn = false;
+ +        warn("Copied block destination spans pages %x. ", dest);
+ +    }
+ +
+ +    memReq->reset(dest & ~(blk_size -1), blk_size);
+ +    // translate to physical address
+ +    Fault fault = tc->translateDataWriteReq(memReq);
+ +
+ +    assert(fault != Alignment_Fault);
+ +
+ +    if (fault == NoFault) {
+ +        Addr dest_addr = memReq->paddr + offset;
+ +        // Need to read straight from memory since we have more than 8 bytes.
+ +        memReq->paddr = tc->copySrcPhysAddr;
+ +        tc->mem->read(memReq, data);
+ +        memReq->paddr = dest_addr;
+ +        tc->mem->write(memReq, data);
+ +        if (dcacheInterface) {
+ +            memReq->cmd = Copy;
+ +            memReq->completionEvent = NULL;
+ +            memReq->paddr = tc->copySrcPhysAddr;
+ +            memReq->dest = dest_addr;
+ +            memReq->size = 64;
+ +            memReq->time = curTick;
+ +            dcacheInterface->access(memReq);
+ +        }
+ +    }
+ +    return fault;
+ +#endif
+ +}
+ +
+ +#if FULL_SYSTEM
+ +template <class Impl>
+ +Addr
+ +OzoneCPU<Impl>::dbg_vtophys(Addr addr)
+ +{
+ +    return vtophys(tc, addr);
+ +}
+ +#endif // FULL_SYSTEM
+ +
+ +#if FULL_SYSTEM
+ +template <class Impl>
+ +void
+ +OzoneCPU<Impl>::post_interrupt(int int_num, int index)
+ +{
+ +    BaseCPU::post_interrupt(int_num, index);
+ +
+ +    if (_status == Idle) {
+ +        DPRINTF(IPI,"Suspended Processor awoke\n");
+ +//    thread.activate();
+ +        // Hack for now.  Otherwise might have to go through the tc, or
+ +        // I need to figure out what's the right thing to call.
+ +        activateContext(thread.readTid(), 1);
+ +    }
+ +}
+ +#endif // FULL_SYSTEM
+ +
+ +/* start simulation, program loaded, processor precise state initialized */
+ +template <class Impl>
+ +void
+ +OzoneCPU<Impl>::tick()
+ +{
+ +    DPRINTF(OzoneCPU, "\n\nOzoneCPU: Ticking cpu.\n");
+ +
+ +    _status = Running;
+ +    thread.renameTable[ZeroReg]->setIntResult(0);
+ +    thread.renameTable[ZeroReg+TheISA::FP_Base_DepTag]->
+ +        setDoubleResult(0.0);
+ +
+ +    comm.advance();
+ +    frontEnd->tick();
+ +    backEnd->tick();
+ +
+ +    // check for instruction-count-based events
+ +    comInstEventQueue[0]->serviceEvents(numInst);
+ +
+ +    if (!tickEvent.scheduled() && _status == Running)
+ +        tickEvent.schedule(curTick + cycles(1));
+ +}
+ +
+ +template <class Impl>
+ +void
+ +OzoneCPU<Impl>::squashFromTC()
+ +{
+ +    thread.inSyscall = true;
+ +    backEnd->generateTCEvent();
+ +}
+ +
+ +#if !FULL_SYSTEM
+ +template <class Impl>
+ +void
+ +OzoneCPU<Impl>::syscall(uint64_t &callnum)
+ +{
+ +    // Not sure this copy is needed, depending on how the TC proxy is made.
+ +    thread.renameTable.copyFrom(backEnd->renameTable);
+ +
+ +    thread.inSyscall = true;
+ +
+ +    thread.funcExeInst++;
+ +
+ +    DPRINTF(OzoneCPU, "FuncExeInst: %i\n", thread.funcExeInst);
+ +
+ +    thread.process->syscall(callnum, tc);
+ +
+ +    thread.funcExeInst--;
+ +
+ +    thread.inSyscall = false;
+ +
+ +    frontEnd->renameTable.copyFrom(thread.renameTable);
+ +    backEnd->renameTable.copyFrom(thread.renameTable);
+ +}
+ +
+ +template <class Impl>
+ +void
+ +OzoneCPU<Impl>::setSyscallReturn(SyscallReturn return_value, int tid)
+ +{
+ +    // check for error condition.  Alpha syscall convention is to
+ +    // indicate success/failure in reg a3 (r19) and put the
+ +    // return value itself in the standard return value reg (v0).
+ +    if (return_value.successful()) {
+ +        // no error
+ +        thread.renameTable[SyscallSuccessReg]->setIntResult(0);
+ +        thread.renameTable[ReturnValueReg]->setIntResult(
+ +            return_value.value());
+ +    } else {
+ +        // got an error, return details
+ +        thread.renameTable[SyscallSuccessReg]->setIntResult((IntReg) -1);
+ +        thread.renameTable[ReturnValueReg]->setIntResult(
+ +            -return_value.value());
+ +    }
+ +}
+ +#else
+ +template <class Impl>
+ +Fault
+ +OzoneCPU<Impl>::hwrei()
+ +{
+ +    // Need to move this to ISA code
+ +    // May also need to make this per thread
+ +
+ +    lockFlag = false;
+ +    lockAddrList.clear();
+ +    thread.kernelStats->hwrei();
+ +
+ +    checkInterrupts = true;
+ +
+ +    // FIXME: XXX check for interrupts? XXX
+ +    return NoFault;
+ +}
+ +
+ +template <class Impl>
+ +void
+ +OzoneCPU<Impl>::processInterrupts()
+ +{
+ +    // Check for interrupts here.  For now can copy the code that
+ +    // exists within isa_fullsys_traits.hh.  Also assume that thread 0
+ +    // is the one that handles the interrupts.
+ +
+ +    // Check if there are any outstanding interrupts
+ +    //Handle the interrupts
+ +    int ipl = 0;
+ +    int summary = 0;
+ +
+ +    checkInterrupts = false;
+ +
+ +    if (thread.readMiscReg(IPR_ASTRR))
+ +        panic("asynchronous traps not implemented\n");
+ +
+ +    if (thread.readMiscReg(IPR_SIRR)) {
+ +        for (int i = INTLEVEL_SOFTWARE_MIN;
+ +             i < INTLEVEL_SOFTWARE_MAX; i++) {
+ +            if (thread.readMiscReg(IPR_SIRR) & (ULL(1) << i)) {
+ +                // See table 4-19 of the 21164 hardware reference
+ +                ipl = (i - INTLEVEL_SOFTWARE_MIN) + 1;
+ +                summary |= (ULL(1) << i);
+ +            }
+ +        }
+ +    }
+ +
+ +    uint64_t interrupts = intr_status();
+ +
+ +    if (interrupts) {
+ +        for (int i = INTLEVEL_EXTERNAL_MIN;
+ +             i < INTLEVEL_EXTERNAL_MAX; i++) {
+ +            if (interrupts & (ULL(1) << i)) {
+ +                // See table 4-19 of the 21164 hardware reference
+ +                ipl = i;
+ +                summary |= (ULL(1) << i);
+ +            }
+ +        }
+ +    }
+ +
+ +    if (ipl && ipl > thread.readMiscReg(IPR_IPLR)) {
+ +        thread.setMiscReg(IPR_ISR, summary);
+ +        thread.setMiscReg(IPR_INTID, ipl);
+ +        // @todo: Make this more transparent
+ +        if (checker) {
+ +            checker->threadBase()->setMiscReg(IPR_ISR, summary);
+ +            checker->threadBase()->setMiscReg(IPR_INTID, ipl);
+ +        }
+ +        Fault fault = new InterruptFault;
+ +        fault->invoke(thread.getTC());
+ +        DPRINTF(Flow, "Interrupt! IPLR=%d ipl=%d summary=%x\n",
+ +                thread.readMiscReg(IPR_IPLR), ipl, summary);
+ +    }
+ +}
+ +
+ +template <class Impl>
+ +bool
+ +OzoneCPU<Impl>::simPalCheck(int palFunc)
+ +{
+ +    // Need to move this to ISA code
+ +    // May also need to make this per thread
+ +    thread.kernelStats->callpal(palFunc, tc);
+ +
+ +    switch (palFunc) {
+ +      case PAL::halt:
+ +        haltContext(thread.readTid());
+ +        if (--System::numSystemsRunning == 0)
+ +            exitSimLoop("all cpus halted");
+ +        break;
+ +
+ +      case PAL::bpt:
+ +      case PAL::bugchk:
+ +        if (system->breakpoint())
+ +            return false;
+ +        break;
+ +    }
+ +
+ +    return true;
+ +}
+ +#endif
+ +
+ +template <class Impl>
+ +BaseCPU *
+ +OzoneCPU<Impl>::OzoneTC::getCpuPtr()
+ +{
+ +    return cpu;
+ +}
+ +
+ +template <class Impl>
+ +void
+ +OzoneCPU<Impl>::OzoneTC::setCpuId(int id)
+ +{
+ +    cpu->cpuId = id;
+ +    thread->setCpuId(id);
+ +}
+ +
+ +#if FULL_SYSTEM
+ +template <class Impl>
+ +void
+ +OzoneCPU<Impl>::OzoneTC::delVirtPort(VirtualPort *vp)
+ +{
+ +    delete vp->getPeer();
+ +    delete vp;
+ +}
+ +#endif
+ +
+ +template <class Impl>
+ +void
+ +OzoneCPU<Impl>::OzoneTC::setStatus(Status new_status)
+ +{
+ +    thread->setStatus(new_status);
+ +}
+ +
+ +template <class Impl>
+ +void
+ +OzoneCPU<Impl>::OzoneTC::activate(int delay)
+ +{
+ +    cpu->activateContext(thread->readTid(), delay);
+ +}
+ +
+ +/// Set the status to Suspended.
+ +template <class Impl>
+ +void
+ +OzoneCPU<Impl>::OzoneTC::suspend()
+ +{
+ +    cpu->suspendContext(thread->readTid());
+ +}
+ +
+ +/// Set the status to Unallocated.
+ +template <class Impl>
+ +void
+ +OzoneCPU<Impl>::OzoneTC::deallocate(int delay)
+ +{
+ +    cpu->deallocateContext(thread->readTid(), delay);
+ +}
+ +
+ +/// Set the status to Halted.
+ +template <class Impl>
+ +void
+ +OzoneCPU<Impl>::OzoneTC::halt()
+ +{
+ +    cpu->haltContext(thread->readTid());
+ +}
+ +
+ +#if FULL_SYSTEM
+ +template <class Impl>
+ +void
+ +OzoneCPU<Impl>::OzoneTC::dumpFuncProfile()
- { }
++{
++    thread->dumpFuncProfile();
++}
+ +#endif
+ +
+ +template <class Impl>
+ +void
+ +OzoneCPU<Impl>::OzoneTC::takeOverFrom(ThreadContext *old_context)
+ +{
+ +    // some things should already be set up
+ +#if FULL_SYSTEM
+ +    assert(getSystemPtr() == old_context->getSystemPtr());
+ +#else
+ +    assert(getProcessPtr() == old_context->getProcessPtr());
+ +#endif
+ +
+ +    // copy over functional state
+ +    setStatus(old_context->status());
+ +    copyArchRegs(old_context);
+ +    setCpuId(old_context->readCpuId());
+ +
++    thread->inst = old_context->getInst();
+ +#if !FULL_SYSTEM
+ +    setFuncExeInst(old_context->readFuncExeInst());
+ +#else
+ +    EndQuiesceEvent *other_quiesce = old_context->getQuiesceEvent();
+ +    if (other_quiesce) {
+ +        // Point the quiesce event's TC at this TC so that it wakes up
+ +        // the proper CPU.
+ +        other_quiesce->tc = this;
+ +    }
+ +    if (thread->quiesceEvent) {
+ +        thread->quiesceEvent->tc = this;
+ +    }
+ +
++    // Copy kernel stats pointer from old context.
+ +    thread->kernelStats = old_context->getKernelStats();
+ +//    storeCondFailures = 0;
+ +    cpu->lockFlag = false;
+ +#endif
+ +
+ +    old_context->setStatus(ThreadContext::Unallocated);
+ +}
+ +
+ +template <class Impl>
+ +void
+ +OzoneCPU<Impl>::OzoneTC::regStats(const std::string &name)
+ +{
+ +#if FULL_SYSTEM
+ +    thread->kernelStats = new Kernel::Statistics(cpu->system);
+ +    thread->kernelStats->regStats(name + ".kern");
+ +#endif
+ +}
+ +
+ +template <class Impl>
+ +void
+ +OzoneCPU<Impl>::OzoneTC::serialize(std::ostream &os)
-     if (thread->profile)
-         thread->profile->clear();
++{
++    // Once serialization is added, serialize the quiesce event and
++    // kernel stats.  Will need to make sure there aren't multiple
++    // things that serialize them.
++}
+ +
+ +template <class Impl>
+ +void
+ +OzoneCPU<Impl>::OzoneTC::unserialize(Checkpoint *cp, const std::string &section)
+ +{ }
+ +
+ +#if FULL_SYSTEM
+ +template <class Impl>
+ +EndQuiesceEvent *
+ +OzoneCPU<Impl>::OzoneTC::getQuiesceEvent()
+ +{
+ +    return thread->quiesceEvent;
+ +}
+ +
+ +template <class Impl>
+ +Tick
+ +OzoneCPU<Impl>::OzoneTC::readLastActivate()
+ +{
+ +    return thread->lastActivate;
+ +}
+ +
+ +template <class Impl>
+ +Tick
+ +OzoneCPU<Impl>::OzoneTC::readLastSuspend()
+ +{
+ +    return thread->lastSuspend;
+ +}
+ +
+ +template <class Impl>
+ +void
+ +OzoneCPU<Impl>::OzoneTC::profileClear()
+ +{
-     if (thread->profile)
-         thread->profile->sample(thread->profileNode, thread->profilePC);
++    thread->profileClear();
+ +}
+ +
+ +template <class Impl>
+ +void
+ +OzoneCPU<Impl>::OzoneTC::profileSample()
+ +{
- // Also somewhat obnoxious.  Really only used for the TLB fault.
++    thread->profileSample();
+ +}
+ +#endif
+ +
+ +template <class Impl>
+ +int
+ +OzoneCPU<Impl>::OzoneTC::getThreadNum()
+ +{
+ +    return thread->readTid();
+ +}
+ +
-     for (int i = 0; i < TheISA::TotalNumRegs; ++i) {
-         if (i < TheISA::FP_Base_DepTag) {
-             thread->renameTable[i]->setIntResult(tc->readIntReg(i));
-         } else if (i < (TheISA::FP_Base_DepTag + TheISA::NumFloatRegs)) {
-             int fp_idx = i - TheISA::FP_Base_DepTag;
-             thread->renameTable[i]->setDoubleResult(
-                 tc->readFloatReg(fp_idx, 64));
-         }
+ +template <class Impl>
+ +TheISA::MachInst
+ +OzoneCPU<Impl>::OzoneTC::getInst()
+ +{
+ +    return thread->getInst();
+ +}
+ +
+ +template <class Impl>
+ +void
+ +OzoneCPU<Impl>::OzoneTC::copyArchRegs(ThreadContext *tc)
+ +{
+ +    thread->PC = tc->readPC();
+ +    thread->nextPC = tc->readNextPC();
+ +
+ +    cpu->frontEnd->setPC(thread->PC);
+ +    cpu->frontEnd->setNextPC(thread->nextPC);
+ +
++    // First loop through the integer registers.
++    for (int i = 0; i < TheISA::NumIntRegs; ++i) {
++/*        DPRINTF(OzoneCPU, "Copying over register %i, had data %lli, "
++                "now has data %lli.\n",
++                i, thread->renameTable[i]->readIntResult(),
++                tc->readIntReg(i));
++*/
++        thread->renameTable[i]->setIntResult(tc->readIntReg(i));
++    }
++
++    // Then loop through the floating point registers.
++    for (int i = 0; i < TheISA::NumFloatRegs; ++i) {
++        int fp_idx = i + TheISA::FP_Base_DepTag;
++        thread->renameTable[fp_idx]->setIntResult(tc->readFloatRegBits(i));
+ +    }
+ +
+ +#if !FULL_SYSTEM
+ +    thread->funcExeInst = tc->readFuncExeInst();
+ +#endif
+ +
+ +    // Need to copy the TC values into the current rename table,
+ +    // copy the misc regs.
+ +    copyMiscRegs(tc, this);
+ +}
+ +
+ +template <class Impl>
+ +void
+ +OzoneCPU<Impl>::OzoneTC::clearArchRegs()
+ +{
+ +    panic("Unimplemented!");
+ +}
+ +
+ +template <class Impl>
+ +uint64_t
+ +OzoneCPU<Impl>::OzoneTC::readIntReg(int reg_idx)
+ +{
+ +    return thread->renameTable[reg_idx]->readIntResult();
+ +}
+ +
+ +template <class Impl>
+ +TheISA::FloatReg
+ +OzoneCPU<Impl>::OzoneTC::readFloatReg(int reg_idx, int width)
+ +{
+ +    int idx = reg_idx + TheISA::FP_Base_DepTag;
+ +    switch(width) {
+ +      case 32:
+ +        return thread->renameTable[idx]->readFloatResult();
+ +      case 64:
+ +        return thread->renameTable[idx]->readDoubleResult();
+ +      default:
+ +        panic("Unsupported width!");
+ +        return 0;
+ +    }
+ +}
+ +
+ +template <class Impl>
+ +double
+ +OzoneCPU<Impl>::OzoneTC::readFloatReg(int reg_idx)
+ +{
+ +    int idx = reg_idx + TheISA::FP_Base_DepTag;
+ +    return thread->renameTable[idx]->readFloatResult();
+ +}
+ +
+ +template <class Impl>
+ +uint64_t
+ +OzoneCPU<Impl>::OzoneTC::readFloatRegBits(int reg_idx, int width)
+ +{
+ +    int idx = reg_idx + TheISA::FP_Base_DepTag;
+ +    return thread->renameTable[idx]->readIntResult();
+ +}
+ +
+ +template <class Impl>
+ +uint64_t
+ +OzoneCPU<Impl>::OzoneTC::readFloatRegBits(int reg_idx)
+ +{
+ +    int idx = reg_idx + TheISA::FP_Base_DepTag;
+ +    return thread->renameTable[idx]->readIntResult();
+ +}
+ +
+ +template <class Impl>
+ +void
+ +OzoneCPU<Impl>::OzoneTC::setIntReg(int reg_idx, uint64_t val)
+ +{
+ +    thread->renameTable[reg_idx]->setIntResult(val);
+ +
+ +    if (!thread->inSyscall) {
+ +        cpu->squashFromTC();
+ +    }
+ +}
+ +
+ +template <class Impl>
+ +void
+ +OzoneCPU<Impl>::OzoneTC::setFloatReg(int reg_idx, FloatReg val, int width)
+ +{
+ +    int idx = reg_idx + TheISA::FP_Base_DepTag;
+ +    switch(width) {
+ +      case 32:
+ +        panic("Unimplemented!");
+ +        break;
+ +      case 64:
+ +        thread->renameTable[idx]->setDoubleResult(val);
+ +        break;
+ +      default:
+ +        panic("Unsupported width!");
+ +    }
+ +
+ +    if (!thread->inSyscall) {
+ +        cpu->squashFromTC();
+ +    }
+ +}
+ +
+ +template <class Impl>
+ +void
+ +OzoneCPU<Impl>::OzoneTC::setFloatReg(int reg_idx, FloatReg val)
+ +{
+ +    int idx = reg_idx + TheISA::FP_Base_DepTag;
+ +
+ +    thread->renameTable[idx]->setDoubleResult(val);
+ +
+ +    if (!thread->inSyscall) {
+ +        cpu->squashFromTC();
+ +    }
+ +}
+ +
+ +template <class Impl>
+ +void
+ +OzoneCPU<Impl>::OzoneTC::setFloatRegBits(int reg_idx, FloatRegBits val,
+ +                                         int width)
+ +{
+ +    panic("Unimplemented!");
+ +}
+ +
+ +template <class Impl>
+ +void
+ +OzoneCPU<Impl>::OzoneTC::setFloatRegBits(int reg_idx, FloatRegBits val)
+ +{
+ +    panic("Unimplemented!");
+ +}
+ +
+ +template <class Impl>
+ +void
+ +OzoneCPU<Impl>::OzoneTC::setPC(Addr val)
+ +{
+ +    thread->PC = val;
+ +    cpu->frontEnd->setPC(val);
+ +
+ +    if (!thread->inSyscall) {
+ +        cpu->squashFromTC();
+ +    }
+ +}
+ +
+ +template <class Impl>
+ +void
+ +OzoneCPU<Impl>::OzoneTC::setNextPC(Addr val)
+ +{
+ +    thread->nextPC = val;
+ +    cpu->frontEnd->setNextPC(val);
+ +
+ +    if (!thread->inSyscall) {
+ +        cpu->squashFromTC();
+ +    }
+ +}
+ +
+ +template <class Impl>
+ +TheISA::MiscReg
+ +OzoneCPU<Impl>::OzoneTC::readMiscReg(int misc_reg)
+ +{
+ +    return thread->miscRegFile.readReg(misc_reg);
+ +}
+ +
+ +template <class Impl>
+ +TheISA::MiscReg
+ +OzoneCPU<Impl>::OzoneTC::readMiscRegWithEffect(int misc_reg, Fault &fault)
+ +{
+ +    return thread->miscRegFile.readRegWithEffect(misc_reg,
+ +                                                 fault, this);
+ +}
+ +
+ +template <class Impl>
+ +Fault
+ +OzoneCPU<Impl>::OzoneTC::setMiscReg(int misc_reg, const MiscReg &val)
+ +{
+ +    // Needs to setup a squash event unless we're in syscall mode
+ +    Fault ret_fault = thread->miscRegFile.setReg(misc_reg, val);
+ +
+ +    if (!thread->inSyscall) {
+ +        cpu->squashFromTC();
+ +    }
+ +
+ +    return ret_fault;
+ +}
+ +
+ +template <class Impl>
+ +Fault
+ +OzoneCPU<Impl>::OzoneTC::setMiscRegWithEffect(int misc_reg, const MiscReg &val)
+ +{
+ +    // Needs to setup a squash event unless we're in syscall mode
+ +    Fault ret_fault = thread->miscRegFile.setRegWithEffect(misc_reg, val,
+ +                                                           this);
+ +
+ +    if (!thread->inSyscall) {
+ +        cpu->squashFromTC();
+ +    }
+ +
+ +    return ret_fault;
+ +}
diff --cc src/cpu/ozone/front_end.hh

index 3ed3c4d18d89d4f1570bf76a4c2e0979cb1a2a57,0000000000000000000000000000000000000000..5ffd3666eacc740ec3c00f146c3d7e1cbdec7398

mode 100644,000000..100644
--- 1/src/cpu/ozone/front_end.hh
--- /dev/null
+++ b/src/cpu/ozone/front_end.hh
@@@ -1,320 -1,0 +1,327 @@@
+ +/*
+ + * Copyright (c) 2006 The Regents of The University of Michigan
+ + * All rights reserved.
+ + *
+ + * Redistribution and use in source and binary forms, with or without
+ + * modification, are permitted provided that the following conditions are
+ + * met: redistributions of source code must retain the above copyright
+ + * notice, this list of conditions and the following disclaimer;
+ + * redistributions in binary form must reproduce the above copyright
+ + * notice, this list of conditions and the following disclaimer in the
+ + * documentation and/or other materials provided with the distribution;
+ + * neither the name of the copyright holders nor the names of its
+ + * contributors may be used to endorse or promote products derived from
+ + * this software without specific prior written permission.
+ + *
+ + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ + *
+ + * Authors: Kevin Lim
+ + */
+ +
+ +#ifndef __CPU_OZONE_FRONT_END_HH__
+ +#define __CPU_OZONE_FRONT_END_HH__
+ +
+ +#include <deque>
+ +
+ +#include "arch/utility.hh"
++#include "base/timebuf.hh"
+ +#include "cpu/inst_seq.hh"
+ +#include "cpu/o3/bpred_unit.hh"
+ +#include "cpu/ozone/rename_table.hh"
+ +#include "mem/port.hh"
+ +#include "mem/request.hh"
+ +#include "sim/eventq.hh"
+ +#include "sim/stats.hh"
+ +
+ +class ThreadContext;
+ +class MemObject;
+ +template <class>
+ +class OzoneThreadState;
+ +class PageTable;
+ +template <class>
+ +class TimeBuffer;
+ +
+ +template <class Impl>
+ +class FrontEnd
+ +{
+ +  public:
+ +    typedef typename Impl::Params Params;
+ +    typedef typename Impl::DynInst DynInst;
+ +    typedef typename Impl::DynInstPtr DynInstPtr;
+ +    typedef typename Impl::CPUType CPUType;
+ +    typedef typename Impl::BackEnd BackEnd;
+ +
+ +    typedef typename Impl::CPUType::OzoneTC OzoneTC;
+ +    typedef typename Impl::CPUType::CommStruct CommStruct;
+ +
+ +    /** IcachePort class.  Handles doing the communication with the
+ +     * cache/memory.
+ +     */
+ +    class IcachePort : public Port
+ +    {
+ +      protected:
+ +        /** Pointer to FE. */
+ +        FrontEnd<Impl> *fe;
+ +
+ +      public:
+ +        /** Default constructor. */
+ +        IcachePort(FrontEnd<Impl> *_fe)
+ +            : fe(_fe)
+ +        { }
+ +
+ +      protected:
+ +        /** Atomic version of receive.  Panics. */
+ +        virtual Tick recvAtomic(PacketPtr pkt);
+ +
+ +        /** Functional version of receive.  Panics. */
+ +        virtual void recvFunctional(PacketPtr pkt);
+ +
+ +        /** Receives status change.  Other than range changing, panics. */
+ +        virtual void recvStatusChange(Status status);
+ +
+ +        /** Returns the address ranges of this device. */
+ +        virtual void getDeviceAddressRanges(AddrRangeList &resp,
+ +                                            AddrRangeList &snoop)
+ +        { resp.clear(); snoop.clear(); }
+ +
+ +        /** Timing version of receive.  Handles setting fetch to the
+ +         * proper status to start fetching. */
+ +        virtual bool recvTiming(PacketPtr pkt);
+ +
+ +        /** Handles doing a retry of a failed fetch. */
+ +        virtual void recvRetry();
+ +    };
+ +
+ +    FrontEnd(Params *params);
+ +
+ +    std::string name() const;
+ +
+ +    void setCPU(CPUType *cpu_ptr);
+ +
+ +    void setBackEnd(BackEnd *back_end_ptr)
+ +    { backEnd = back_end_ptr; }
+ +
+ +    void setCommBuffer(TimeBuffer<CommStruct> *_comm);
+ +
+ +    void setTC(ThreadContext *tc_ptr);
+ +
+ +    void setThreadState(OzoneThreadState<Impl> *thread_ptr)
+ +    { thread = thread_ptr; }
+ +
+ +    void regStats();
+ +
+ +    Port *getIcachePort() { return &icachePort; }
+ +
+ +    void tick();
+ +    Fault fetchCacheLine();
+ +    void processInst(DynInstPtr &inst);
+ +    void squash(const InstSeqNum &squash_num, const Addr &next_PC,
+ +                const bool is_branch = false, const bool branch_taken = false);
+ +    DynInstPtr getInst();
+ +
+ +    void processCacheCompletion(Packet *pkt);
+ +
+ +    void addFreeRegs(int num_freed);
+ +
+ +    bool isEmpty() { return instBuffer.empty(); }
+ +
+ +    void switchOut();
+ +
+ +    void doSwitchOut();
+ +
+ +    void takeOverFrom(ThreadContext *old_tc = NULL);
+ +
+ +    bool isSwitchedOut() { return switchedOut; }
+ +
+ +    bool switchedOut;
+ +
+ +  private:
+ +    void recvRetry();
+ +
+ +    bool updateStatus();
+ +
+ +    void checkBE();
+ +    DynInstPtr getInstFromCacheline();
+ +    void renameInst(DynInstPtr &inst);
+ +    // Returns true if we need to stop the front end this cycle
+ +    bool processBarriers(DynInstPtr &inst);
+ +
+ +    void handleFault(Fault &fault);
+ +  public:
+ +    Fault getFault() { return fetchFault; }
+ +  private:
+ +    Fault fetchFault;
+ +
+ +    // Align an address (typically a PC) to the start of an I-cache block.
+ +    // We fold in the PISA 64- to 32-bit conversion here as well.
+ +    Addr icacheBlockAlignPC(Addr addr)
+ +    {
+ +        addr = TheISA::realPCToFetchPC(addr);
+ +        return (addr & ~(cacheBlkMask));
+ +    }
+ +
+ +    InstSeqNum getAndIncrementInstSeq()
+ +    { return cpu->globalSeqNum++; }
+ +
+ +  public:
+ +    CPUType *cpu;
+ +
+ +    BackEnd *backEnd;
+ +
+ +    ThreadContext *tc;
+ +
+ +    OzoneThreadState<Impl> *thread;
+ +
+ +    enum Status {
+ +        Running,
+ +        Idle,
+ +        IcacheWaitResponse,
+ +        IcacheWaitRetry,
+ +        IcacheAccessComplete,
+ +        SerializeBlocked,
+ +        SerializeComplete,
+ +        RenameBlocked,
+ +        QuiescePending,
+ +        TrapPending,
+ +        BEBlocked
+ +    };
+ +
+ +    Status status;
+ +
+ +  private:
+ +    TimeBuffer<CommStruct> *comm;
+ +    typename TimeBuffer<CommStruct>::wire fromCommit;
+ +
+ +    typedef typename Impl::BranchPred BranchPred;
+ +
+ +    BranchPred branchPred;
+ +
+ +    IcachePort icachePort;
+ +
+ +    MemObject *mem;
+ +
+ +    RequestPtr memReq;
+ +
+ +    /** Mask to get a cache block's address. */
+ +    Addr cacheBlkMask;
+ +
+ +    unsigned cacheBlkSize;
+ +
+ +    Addr cacheBlkPC;
+ +
+ +    /** The cache line being fetched. */
+ +    uint8_t *cacheData;
+ +
+ +    bool fetchCacheLineNextCycle;
+ +
+ +    bool cacheBlkValid;
+ +
+ +    bool cacheBlocked;
+ +
+ +    /** The packet that is waiting to be retried. */
+ +    PacketPtr retryPkt;
+ +
+ +  public:
+ +    RenameTable<Impl> renameTable;
+ +
+ +  private:
+ +    Addr PC;
+ +    Addr nextPC;
+ +
+ +  public:
+ +    void setPC(Addr val) { PC = val; }
+ +    void setNextPC(Addr val) { nextPC = val; }
+ +
+ +    void wakeFromQuiesce();
+ +
+ +    void dumpInsts();
+ +
+ +  private:
++    TimeBuffer<int> numInstsReady;
++
+ +    typedef typename std::deque<DynInstPtr> InstBuff;
+ +    typedef typename InstBuff::iterator InstBuffIt;
+ +
++    InstBuff feBuffer;
++
+ +    InstBuff instBuffer;
+ +
+ +    int instBufferSize;
+ +
+ +    int maxInstBufferSize;
+ +
++    int latency;
++
+ +    int width;
+ +
+ +    int freeRegs;
+ +
+ +    int numPhysRegs;
+ +
+ +    bool serializeNext;
+ +
+ +    DynInstPtr barrierInst;
+ +
+ +  public:
+ +    bool interruptPending;
+ +  private:
+ +    // number of idle cycles
+ +/*
+ +    Stats::Average<> notIdleFraction;
+ +    Stats::Formula idleFraction;
+ +*/
+ +    // @todo: Consider making these vectors and tracking on a per thread basis.
+ +    /** Stat for total number of cycles stalled due to an icache miss. */
+ +    Stats::Scalar<> icacheStallCycles;
+ +    /** Stat for total number of fetched instructions. */
+ +    Stats::Scalar<> fetchedInsts;
+ +    Stats::Scalar<> fetchedBranches;
+ +    /** Stat for total number of predicted branches. */
+ +    Stats::Scalar<> predictedBranches;
+ +    /** Stat for total number of cycles spent fetching. */
+ +    Stats::Scalar<> fetchCycles;
+ +
+ +    Stats::Scalar<> fetchIdleCycles;
+ +    /** Stat for total number of cycles spent squashing. */
+ +    Stats::Scalar<> fetchSquashCycles;
+ +    /** Stat for total number of cycles spent blocked due to other stages in
+ +     * the pipeline.
+ +     */
+ +    Stats::Scalar<> fetchBlockedCycles;
+ +    /** Stat for total number of fetched cache lines. */
+ +    Stats::Scalar<> fetchedCacheLines;
+ +
+ +    Stats::Scalar<> fetchIcacheSquashes;
+ +    /** Distribution of number of instructions fetched each cycle. */
+ +    Stats::Distribution<> fetchNisnDist;
+ +//    Stats::Vector<> qfull_iq_occupancy;
+ +//    Stats::VectorDistribution<> qfull_iq_occ_dist_;
+ +    Stats::Formula idleRate;
+ +    Stats::Formula branchRate;
+ +    Stats::Formula fetchRate;
+ +    Stats::Scalar<> IFQCount; // cumulative IFQ occupancy
+ +    Stats::Formula IFQOccupancy;
+ +    Stats::Formula IFQLatency;
+ +    Stats::Scalar<> IFQFcount; // cumulative IFQ full count
+ +    Stats::Formula IFQFullRate;
+ +
+ +    Stats::Scalar<> dispatchCountStat;
+ +    Stats::Scalar<> dispatchedSerializing;
+ +    Stats::Scalar<> dispatchedTempSerializing;
+ +    Stats::Scalar<> dispatchSerializeStallCycles;
+ +    Stats::Formula dispatchRate;
+ +    Stats::Formula regIntFull;
+ +    Stats::Formula regFpFull;
+ +};
+ +
+ +#endif // __CPU_OZONE_FRONT_END_HH__
diff --cc src/cpu/ozone/front_end_impl.hh

index 1b120460a24f274d8c41140dfdfdb82215f023a3,0000000000000000000000000000000000000000..d34716de6e8458f88ac2964a7fb9e13f4eba8721

mode 100644,000000..100644
--- 1/src/cpu/ozone/front_end_impl.hh
--- /dev/null
+++ b/src/cpu/ozone/front_end_impl.hh
@@@ -1,974 -1,0 +1,1016 @@@
-         if (barrierInst)
-             status = SerializeBlocked;
+ +/*
+ + * Copyright (c) 2006 The Regents of The University of Michigan
+ + * All rights reserved.
+ + *
+ + * Redistribution and use in source and binary forms, with or without
+ + * modification, are permitted provided that the following conditions are
+ + * met: redistributions of source code must retain the above copyright
+ + * notice, this list of conditions and the following disclaimer;
+ + * redistributions in binary form must reproduce the above copyright
+ + * notice, this list of conditions and the following disclaimer in the
+ + * documentation and/or other materials provided with the distribution;
+ + * neither the name of the copyright holders nor the names of its
+ + * contributors may be used to endorse or promote products derived from
+ + * this software without specific prior written permission.
+ + *
+ + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ + *
+ + * Authors: Kevin Lim
+ + */
+ +
+ +#include "config/use_checker.hh"
+ +
+ +#include "arch/faults.hh"
+ +#include "arch/isa_traits.hh"
+ +#include "arch/utility.hh"
+ +#include "base/statistics.hh"
+ +#include "cpu/thread_context.hh"
+ +#include "cpu/exetrace.hh"
+ +#include "cpu/ozone/front_end.hh"
+ +#include "mem/mem_object.hh"
+ +#include "mem/packet.hh"
+ +#include "mem/request.hh"
+ +
+ +#if USE_CHECKER
+ +#include "cpu/checker/cpu.hh"
+ +#endif
+ +
+ +using namespace TheISA;
+ +
+ +template<class Impl>
+ +Tick
+ +FrontEnd<Impl>::IcachePort::recvAtomic(PacketPtr pkt)
+ +{
+ +    panic("FrontEnd doesn't expect recvAtomic callback!");
+ +    return curTick;
+ +}
+ +
+ +template<class Impl>
+ +void
+ +FrontEnd<Impl>::IcachePort::recvFunctional(PacketPtr pkt)
+ +{
+ +    panic("FrontEnd doesn't expect recvFunctional callback!");
+ +}
+ +
+ +template<class Impl>
+ +void
+ +FrontEnd<Impl>::IcachePort::recvStatusChange(Status status)
+ +{
+ +    if (status == RangeChange)
+ +        return;
+ +
+ +    panic("FrontEnd doesn't expect recvStatusChange callback!");
+ +}
+ +
+ +template<class Impl>
+ +bool
+ +FrontEnd<Impl>::IcachePort::recvTiming(Packet *pkt)
+ +{
+ +    fe->processCacheCompletion(pkt);
+ +    return true;
+ +}
+ +
+ +template<class Impl>
+ +void
+ +FrontEnd<Impl>::IcachePort::recvRetry()
+ +{
+ +    fe->recvRetry();
+ +}
+ +
+ +template <class Impl>
+ +FrontEnd<Impl>::FrontEnd(Params *params)
+ +    : branchPred(params),
+ +      icachePort(this),
+ +      mem(params->mem),
++      numInstsReady(params->frontEndLatency, 0),
+ +      instBufferSize(0),
+ +      maxInstBufferSize(params->maxInstBufferSize),
++      latency(params->frontEndLatency),
+ +      width(params->frontEndWidth),
+ +      freeRegs(params->numPhysicalRegs),
+ +      numPhysRegs(params->numPhysicalRegs),
+ +      serializeNext(false),
+ +      interruptPending(false)
+ +{
+ +    switchedOut = false;
+ +
+ +    status = Idle;
+ +
+ +    memReq = NULL;
+ +    // Size of cache block.
+ +    cacheBlkSize = 64;
+ +
+ +    assert(isPowerOf2(cacheBlkSize));
+ +
+ +    // Create mask to get rid of offset bits.
+ +    cacheBlkMask = (cacheBlkSize - 1);
+ +
+ +    // Create space to store a cache line.
+ +    cacheData = new uint8_t[cacheBlkSize];
+ +
+ +    fetchCacheLineNextCycle = true;
+ +
+ +    cacheBlkValid = cacheBlocked = false;
+ +
+ +    retryPkt = NULL;
+ +
+ +    fetchFault = NoFault;
+ +}
+ +
+ +template <class Impl>
+ +std::string
+ +FrontEnd<Impl>::name() const
+ +{
+ +    return cpu->name() + ".frontend";
+ +}
+ +
+ +template <class Impl>
+ +void
+ +FrontEnd<Impl>::setCPU(CPUType *cpu_ptr)
+ +{
+ +    cpu = cpu_ptr;
+ +
+ +    icachePort.setName(this->name() + "-iport");
+ +
+ +#if USE_CHECKER
+ +    if (cpu->checker) {
+ +        cpu->checker->setIcachePort(&icachePort);
+ +    }
+ +#endif
+ +}
+ +
+ +template <class Impl>
+ +void
+ +FrontEnd<Impl>::setCommBuffer(TimeBuffer<CommStruct> *_comm)
+ +{
+ +    comm = _comm;
+ +    // @todo: Hardcoded for now.  Allow this to be set by a latency.
+ +    fromCommit = comm->getWire(-1);
+ +}
+ +
+ +template <class Impl>
+ +void
+ +FrontEnd<Impl>::setTC(ThreadContext *tc_ptr)
+ +{
+ +    tc = tc_ptr;
+ +}
+ +
+ +template <class Impl>
+ +void
+ +FrontEnd<Impl>::regStats()
+ +{
+ +    icacheStallCycles
+ +        .name(name() + ".icacheStallCycles")
+ +        .desc("Number of cycles fetch is stalled on an Icache miss")
+ +        .prereq(icacheStallCycles);
+ +
+ +    fetchedInsts
+ +        .name(name() + ".fetchedInsts")
+ +        .desc("Number of instructions fetch has processed")
+ +        .prereq(fetchedInsts);
+ +
+ +    fetchedBranches
+ +        .name(name() + ".fetchedBranches")
+ +        .desc("Number of fetched branches")
+ +        .prereq(fetchedBranches);
+ +
+ +    predictedBranches
+ +        .name(name() + ".predictedBranches")
+ +        .desc("Number of branches that fetch has predicted taken")
+ +        .prereq(predictedBranches);
+ +
+ +    fetchCycles
+ +        .name(name() + ".fetchCycles")
+ +        .desc("Number of cycles fetch has run and was not squashing or"
+ +              " blocked")
+ +        .prereq(fetchCycles);
+ +
+ +    fetchIdleCycles
+ +        .name(name() + ".fetchIdleCycles")
+ +        .desc("Number of cycles fetch was idle")
+ +        .prereq(fetchIdleCycles);
+ +
+ +    fetchSquashCycles
+ +        .name(name() + ".fetchSquashCycles")
+ +        .desc("Number of cycles fetch has spent squashing")
+ +        .prereq(fetchSquashCycles);
+ +
+ +    fetchBlockedCycles
+ +        .name(name() + ".fetchBlockedCycles")
+ +        .desc("Number of cycles fetch has spent blocked")
+ +        .prereq(fetchBlockedCycles);
+ +
+ +    fetchedCacheLines
+ +        .name(name() + ".fetchedCacheLines")
+ +        .desc("Number of cache lines fetched")
+ +        .prereq(fetchedCacheLines);
+ +
+ +    fetchIcacheSquashes
+ +        .name(name() + ".fetchIcacheSquashes")
+ +        .desc("Number of outstanding Icache misses that were squashed")
+ +        .prereq(fetchIcacheSquashes);
+ +
+ +    fetchNisnDist
+ +        .init(/* base value */ 0,
+ +              /* last value */ width,
+ +              /* bucket size */ 1)
+ +        .name(name() + ".rateDist")
+ +        .desc("Number of instructions fetched each cycle (Total)")
+ +        .flags(Stats::pdf);
+ +
+ +    idleRate
+ +        .name(name() + ".idleRate")
+ +        .desc("Percent of cycles fetch was idle")
+ +        .prereq(idleRate);
+ +    idleRate = fetchIdleCycles * 100 / cpu->numCycles;
+ +
+ +    branchRate
+ +        .name(name() + ".branchRate")
+ +        .desc("Number of branch fetches per cycle")
+ +        .flags(Stats::total);
+ +    branchRate = fetchedBranches / cpu->numCycles;
+ +
+ +    fetchRate
+ +        .name(name() + ".rate")
+ +        .desc("Number of inst fetches per cycle")
+ +        .flags(Stats::total);
+ +    fetchRate = fetchedInsts / cpu->numCycles;
+ +
+ +    IFQCount
+ +        .name(name() + ".IFQ:count")
+ +        .desc("cumulative IFQ occupancy")
+ +        ;
+ +
+ +    IFQFcount
+ +        .name(name() + ".IFQ:fullCount")
+ +        .desc("cumulative IFQ full count")
+ +        .flags(Stats::total)
+ +        ;
+ +
+ +    IFQOccupancy
+ +        .name(name() + ".IFQ:occupancy")
+ +        .desc("avg IFQ occupancy (inst's)")
+ +        ;
+ +    IFQOccupancy = IFQCount / cpu->numCycles;
+ +
+ +    IFQLatency
+ +        .name(name() + ".IFQ:latency")
+ +        .desc("avg IFQ occupant latency (cycle's)")
+ +        .flags(Stats::total)
+ +        ;
+ +
+ +    IFQFullRate
+ +        .name(name() + ".IFQ:fullRate")
+ +        .desc("fraction of time (cycles) IFQ was full")
+ +        .flags(Stats::total);
+ +        ;
+ +    IFQFullRate = IFQFcount * Stats::constant(100) / cpu->numCycles;
+ +
+ +    dispatchCountStat
+ +        .name(name() + ".DIS:count")
+ +        .desc("cumulative count of dispatched insts")
+ +        .flags(Stats::total)
+ +        ;
+ +
+ +    dispatchedSerializing
+ +        .name(name() + ".DIS:serializingInsts")
+ +        .desc("count of serializing insts dispatched")
+ +        .flags(Stats::total)
+ +        ;
+ +
+ +    dispatchedTempSerializing
+ +        .name(name() + ".DIS:tempSerializingInsts")
+ +        .desc("count of temporary serializing insts dispatched")
+ +        .flags(Stats::total)
+ +        ;
+ +
+ +    dispatchSerializeStallCycles
+ +        .name(name() + ".DIS:serializeStallCycles")
+ +        .desc("count of cycles dispatch stalled for serializing inst")
+ +        .flags(Stats::total)
+ +        ;
+ +
+ +    dispatchRate
+ +        .name(name() + ".DIS:rate")
+ +        .desc("dispatched insts per cycle")
+ +        .flags(Stats::total)
+ +        ;
+ +    dispatchRate = dispatchCountStat / cpu->numCycles;
+ +
+ +    regIntFull
+ +        .name(name() + ".REG:int:full")
+ +        .desc("number of cycles where there were no INT registers")
+ +        ;
+ +
+ +    regFpFull
+ +        .name(name() + ".REG:fp:full")
+ +        .desc("number of cycles where there were no FP registers")
+ +        ;
+ +    IFQLatency = IFQOccupancy / dispatchRate;
+ +
+ +    branchPred.regStats();
+ +}
+ +
+ +template <class Impl>
+ +void
+ +FrontEnd<Impl>::tick()
+ +{
+ +    if (switchedOut)
+ +        return;
+ +
++    for (int insts_to_queue = numInstsReady[-latency];
++         !instBuffer.empty() && insts_to_queue;
++         --insts_to_queue)
++    {
++        DPRINTF(FE, "Transferring instruction [sn:%lli] to the feBuffer\n",
++                instBuffer.front()->seqNum);
++        feBuffer.push_back(instBuffer.front());
++        instBuffer.pop_front();
++    }
++
++    numInstsReady.advance();
++
+ +    // @todo: Maybe I want to just have direct communication...
+ +    if (fromCommit->doneSeqNum) {
+ +        branchPred.update(fromCommit->doneSeqNum, 0);
+ +    }
+ +
+ +    IFQCount += instBufferSize;
+ +    IFQFcount += instBufferSize == maxInstBufferSize;
+ +
+ +    // Fetch cache line
+ +    if (status == IcacheAccessComplete) {
+ +        cacheBlkValid = true;
+ +
+ +        status = Running;
-             warn("%lli: Quiesce instruction encountered, halting fetch!", curTick);
++//        if (barrierInst)
++//            status = SerializeBlocked;
+ +        if (freeRegs <= 0)
+ +            status = RenameBlocked;
+ +        checkBE();
+ +    } else if (status == IcacheWaitResponse || status == IcacheWaitRetry) {
+ +        DPRINTF(FE, "Still in Icache wait.\n");
+ +        icacheStallCycles++;
+ +        return;
+ +    }
+ +
+ +    if (status == RenameBlocked || status == SerializeBlocked ||
+ +        status == TrapPending || status == BEBlocked) {
+ +        // Will cause a one cycle bubble between changing state and
+ +        // restarting.
+ +        DPRINTF(FE, "In blocked status.\n");
+ +
+ +        fetchBlockedCycles++;
+ +
+ +        if (status == SerializeBlocked) {
+ +            dispatchSerializeStallCycles++;
+ +        }
+ +        updateStatus();
+ +        return;
+ +    } else if (status == QuiescePending) {
+ +        DPRINTF(FE, "Waiting for quiesce to execute or get squashed.\n");
+ +        return;
+ +    } else if (status != IcacheAccessComplete) {
+ +        if (fetchCacheLineNextCycle) {
+ +            Fault fault = fetchCacheLine();
+ +            if (fault != NoFault) {
+ +                handleFault(fault);
+ +                fetchFault = fault;
+ +                return;
+ +            }
+ +            fetchCacheLineNextCycle = false;
+ +        }
+ +        // If miss, stall until it returns.
+ +        if (status == IcacheWaitResponse || status == IcacheWaitRetry) {
+ +            // Tell CPU to not tick me for now.
+ +            return;
+ +        }
+ +    }
+ +
+ +    fetchCycles++;
+ +
+ +    int num_inst = 0;
+ +
+ +    // Otherwise loop and process instructions.
+ +    // One way to hack infinite width is to set width and maxInstBufferSize
+ +    // both really high.  Inelegant, but probably will work.
+ +    while (num_inst < width &&
+ +           instBufferSize < maxInstBufferSize) {
+ +        // Get instruction from cache line.
+ +        DynInstPtr inst = getInstFromCacheline();
+ +
+ +        if (!inst) {
+ +            // PC is no longer in the cache line, end fetch.
+ +            // Might want to check this at the end of the cycle so that
+ +            // there's no cycle lost to checking for a new cache line.
+ +            DPRINTF(FE, "Need to get new cache line\n");
+ +            fetchCacheLineNextCycle = true;
+ +            break;
+ +        }
+ +
+ +        processInst(inst);
+ +
+ +        if (status == SerializeBlocked) {
+ +            break;
+ +        }
+ +
+ +        // Possibly push into a time buffer that estimates the front end
+ +        // latency
+ +        instBuffer.push_back(inst);
+ +        ++instBufferSize;
++        numInstsReady[0]++;
+ +        ++num_inst;
+ +
+ +#if FULL_SYSTEM
+ +        if (inst->isQuiesce()) {
-         status = SerializeBlocked;
++//            warn("%lli: Quiesce instruction encountered, halting fetch!", curTick);
+ +            status = QuiescePending;
+ +            break;
+ +        }
+ +#endif
+ +
+ +        if (inst->predTaken()) {
+ +            // Start over with tick?
+ +            break;
+ +        } else if (freeRegs <= 0) {
+ +            DPRINTF(FE, "Ran out of free registers to rename to!\n");
+ +            status = RenameBlocked;
+ +            break;
+ +        } else if (serializeNext) {
+ +            break;
+ +        }
+ +    }
+ +
+ +    fetchNisnDist.sample(num_inst);
+ +    checkBE();
+ +
+ +    DPRINTF(FE, "Num insts processed: %i, Inst Buffer size: %i, Free "
+ +            "Regs %i\n", num_inst, instBufferSize, freeRegs);
+ +}
+ +
+ +template <class Impl>
+ +Fault
+ +FrontEnd<Impl>::fetchCacheLine()
+ +{
+ +    // Read a cache line, based on the current PC.
+ +#if FULL_SYSTEM
+ +    // Flag to say whether or not address is physical addr.
+ +    unsigned flags = cpu->inPalMode(PC) ? PHYSICAL : 0;
+ +#else
+ +    unsigned flags = 0;
+ +#endif // FULL_SYSTEM
+ +    Fault fault = NoFault;
+ +
+ +    if (interruptPending && flags == 0) {
+ +        return fault;
+ +    }
+ +
+ +    // Align the fetch PC so it's at the start of a cache block.
+ +    Addr fetch_PC = icacheBlockAlignPC(PC);
+ +
+ +    DPRINTF(FE, "Fetching cache line starting at %#x.\n", fetch_PC);
+ +
+ +    // Setup the memReq to do a read of the first isntruction's address.
+ +    // Set the appropriate read size and flags as well.
+ +    memReq = new Request(0, fetch_PC, cacheBlkSize, flags,
+ +                         fetch_PC, cpu->readCpuId(), 0);
+ +
+ +    // Translate the instruction request.
+ +    fault = cpu->translateInstReq(memReq, thread);
+ +
+ +    // Now do the timing access to see whether or not the instruction
+ +    // exists within the cache.
+ +    if (fault == NoFault) {
+ +#if 0
+ +        if (cpu->system->memctrl->badaddr(memReq->paddr) ||
+ +            memReq->flags & UNCACHEABLE) {
+ +            DPRINTF(FE, "Fetch: Bad address %#x (hopefully on a "
+ +                    "misspeculating path!",
+ +                    memReq->paddr);
+ +            return TheISA::genMachineCheckFault();
+ +        }
+ +#endif
+ +
+ +        // Build packet here.
+ +        PacketPtr data_pkt = new Packet(memReq,
+ +                                        Packet::ReadReq, Packet::Broadcast);
+ +        data_pkt->dataStatic(cacheData);
+ +
+ +        if (!icachePort.sendTiming(data_pkt)) {
+ +            assert(retryPkt == NULL);
+ +            DPRINTF(Fetch, "Out of MSHRs!\n");
+ +            status = IcacheWaitRetry;
+ +            retryPkt = data_pkt;
+ +            cacheBlocked = true;
+ +            return NoFault;
+ +        }
+ +
+ +        status = IcacheWaitResponse;
+ +    }
+ +
+ +    // Note that this will set the cache block PC a bit earlier than it should
+ +    // be set.
+ +    cacheBlkPC = fetch_PC;
+ +
+ +    ++fetchedCacheLines;
+ +
+ +    DPRINTF(FE, "Done fetching cache line.\n");
+ +
+ +    return fault;
+ +}
+ +
+ +template <class Impl>
+ +void
+ +FrontEnd<Impl>::processInst(DynInstPtr &inst)
+ +{
+ +    if (processBarriers(inst)) {
+ +        return;
+ +    }
+ +
+ +    Addr inst_PC = inst->readPC();
+ +
+ +    if (!inst->isControl()) {
+ +        inst->setPredTarg(inst->readNextPC());
+ +    } else {
+ +        fetchedBranches++;
+ +        if (branchPred.predict(inst, inst_PC, inst->threadNumber)) {
+ +            predictedBranches++;
+ +        }
+ +    }
+ +
+ +    Addr next_PC = inst->readPredTarg();
+ +
+ +    DPRINTF(FE, "[sn:%lli] Predicted and processed inst PC %#x, next PC "
+ +            "%#x\n", inst->seqNum, inst_PC, next_PC);
+ +
+ +//    inst->setNextPC(next_PC);
+ +
+ +    // Not sure where I should set this
+ +    PC = next_PC;
+ +
+ +    renameInst(inst);
+ +}
+ +
+ +template <class Impl>
+ +bool
+ +FrontEnd<Impl>::processBarriers(DynInstPtr &inst)
+ +{
+ +    if (serializeNext) {
+ +        inst->setSerializeBefore();
+ +        serializeNext = false;
+ +    } else if (!inst->isSerializing() &&
+ +               !inst->isIprAccess() &&
+ +               !inst->isStoreConditional()) {
+ +        return false;
+ +    }
+ +
+ +    if ((inst->isIprAccess() || inst->isSerializeBefore()) &&
+ +        !inst->isSerializeHandled()) {
+ +        DPRINTF(FE, "Serialize before instruction encountered.\n");
+ +
+ +        if (!inst->isTempSerializeBefore()) {
+ +            dispatchedSerializing++;
+ +            inst->setSerializeHandled();
+ +        } else {
+ +            dispatchedTempSerializing++;
+ +        }
+ +
+ +        // Change status over to SerializeBlocked so that other stages know
+ +        // what this is blocked on.
-         barrierInst = inst;
-         return true;
++//        status = SerializeBlocked;
+ +
- 
++//        barrierInst = inst;
++//        return true;
+ +    } else if ((inst->isStoreConditional() || inst->isSerializeAfter())
+ +               && !inst->isSerializeHandled()) {
+ +        DPRINTF(FE, "Serialize after instruction encountered.\n");
+ +
+ +        inst->setSerializeHandled();
+ +
+ +        dispatchedSerializing++;
+ +
+ +        serializeNext = true;
+ +        return false;
+ +    }
+ +    return false;
+ +}
+ +
+ +template <class Impl>
+ +void
+ +FrontEnd<Impl>::handleFault(Fault &fault)
+ +{
+ +    DPRINTF(FE, "Fault at fetch, telling commit\n");
+ +
+ +    // We're blocked on the back end until it handles this fault.
+ +    status = TrapPending;
+ +
+ +    // Get a sequence number.
+ +    InstSeqNum inst_seq = getAndIncrementInstSeq();
+ +    // We will use a nop in order to carry the fault.
+ +    ExtMachInst ext_inst = TheISA::NoopMachInst;
+ +
+ +    // Create a new DynInst from the dummy nop.
+ +    DynInstPtr instruction = new DynInst(ext_inst, PC,
+ +                                         PC+sizeof(MachInst),
+ +                                         inst_seq, cpu);
+ +    instruction->setPredTarg(instruction->readNextPC());
+ +//    instruction->setThread(tid);
+ +
+ +//    instruction->setASID(tid);
+ +
+ +    instruction->setThreadState(thread);
+ +
+ +    instruction->traceData = NULL;
+ +
+ +    instruction->fault = fault;
+ +    instruction->setCanIssue();
+ +    instBuffer.push_back(instruction);
++    numInstsReady[0]++;
+ +    ++instBufferSize;
+ +}
+ +
+ +template <class Impl>
+ +void
+ +FrontEnd<Impl>::squash(const InstSeqNum &squash_num, const Addr &next_PC,
+ +                       const bool is_branch, const bool branch_taken)
+ +{
+ +    DPRINTF(FE, "Squashing from [sn:%lli], setting PC to %#x\n",
+ +            squash_num, next_PC);
+ +
+ +    if (fetchFault != NoFault)
+ +        fetchFault = NoFault;
+ +
+ +    while (!instBuffer.empty() &&
+ +           instBuffer.back()->seqNum > squash_num) {
+ +        DynInstPtr inst = instBuffer.back();
+ +
+ +        DPRINTF(FE, "Squashing instruction [sn:%lli] PC %#x\n",
+ +                inst->seqNum, inst->readPC());
+ +
+ +        inst->clearDependents();
+ +
+ +        instBuffer.pop_back();
+ +        --instBufferSize;
+ +
+ +        freeRegs+= inst->numDestRegs();
+ +    }
+ +
++    while (!feBuffer.empty() &&
++           feBuffer.back()->seqNum > squash_num) {
++        DynInstPtr inst = feBuffer.back();
++
++        DPRINTF(FE, "Squashing instruction [sn:%lli] PC %#x\n",
++                inst->seqNum, inst->readPC());
++
++        inst->clearDependents();
++
++        feBuffer.pop_back();
++        --instBufferSize;
++
++        freeRegs+= inst->numDestRegs();
++    }
++
+ +    // Copy over rename table from the back end.
+ +    renameTable.copyFrom(backEnd->renameTable);
+ +
+ +    PC = next_PC;
+ +
+ +    // Update BP with proper information.
+ +    if (is_branch) {
+ +        branchPred.squash(squash_num, next_PC, branch_taken, 0);
+ +    } else {
+ +        branchPred.squash(squash_num, 0);
+ +    }
+ +
+ +    // Clear the icache miss if it's outstanding.
+ +    if (status == IcacheWaitResponse) {
+ +        DPRINTF(FE, "Squashing outstanding Icache access.\n");
+ +        memReq = NULL;
+ +    }
- 
++/*
+ +    if (status == SerializeBlocked) {
+ +        assert(barrierInst->seqNum > squash_num);
+ +        barrierInst = NULL;
+ +    }
-     if (instBufferSize == 0) {
++*/
+ +    // Unless this squash originated from the front end, we're probably
+ +    // in running mode now.
+ +    // Actually might want to make this latency dependent.
+ +    status = Running;
+ +    fetchCacheLineNextCycle = true;
+ +}
+ +
+ +template <class Impl>
+ +typename Impl::DynInstPtr
+ +FrontEnd<Impl>::getInst()
+ +{
-     DynInstPtr inst = instBuffer.front();
++    if (feBuffer.empty()) {
+ +        return NULL;
+ +    }
+ +
-     instBuffer.pop_front();
++    DynInstPtr inst = feBuffer.front();
+ +
-         if (barrierInst) {
-             status = SerializeBlocked;
-         } else {
++    if (inst->isSerializeBefore() || inst->isIprAccess()) {
++        DPRINTF(FE, "Back end is getting a serialize before inst\n");
++        if (!backEnd->robEmpty()) {
++            DPRINTF(FE, "Rob is not empty yet, not returning inst\n");
++            return NULL;
++        }
++        inst->clearSerializeBefore();
++    }
++
++    feBuffer.pop_front();
+ +
+ +    --instBufferSize;
+ +
+ +    dispatchCountStat++;
+ +
+ +    return inst;
+ +}
+ +
+ +template <class Impl>
+ +void
+ +FrontEnd<Impl>::processCacheCompletion(PacketPtr pkt)
+ +{
+ +    DPRINTF(FE, "Processing cache completion\n");
+ +
+ +    // Do something here.
+ +    if (status != IcacheWaitResponse ||
+ +        pkt->req != memReq ||
+ +        switchedOut) {
+ +        DPRINTF(FE, "Previous fetch was squashed.\n");
+ +        fetchIcacheSquashes++;
+ +        delete pkt->req;
+ +        delete pkt;
+ +        return;
+ +    }
+ +
+ +    status = IcacheAccessComplete;
+ +
+ +/*    if (checkStall(tid)) {
+ +        fetchStatus[tid] = Blocked;
+ +    } else {
+ +        fetchStatus[tid] = IcacheMissComplete;
+ +    }
+ +*/
+ +//    memcpy(cacheData, memReq->data, memReq->size);
+ +
+ +    // Reset the completion event to NULL.
+ +//    memReq->completionEvent = NULL;
+ +    delete pkt->req;
+ +    delete pkt;
+ +    memReq = NULL;
+ +}
+ +
+ +template <class Impl>
+ +void
+ +FrontEnd<Impl>::addFreeRegs(int num_freed)
+ +{
+ +    if (status == RenameBlocked && freeRegs + num_freed > 0) {
+ +        status = Running;
+ +    }
+ +
+ +    DPRINTF(FE, "Adding %i freed registers\n", num_freed);
+ +
+ +    freeRegs+= num_freed;
+ +
+ +//    assert(freeRegs <= numPhysRegs);
+ +    if (freeRegs > numPhysRegs)
+ +        freeRegs = numPhysRegs;
+ +}
+ +
+ +template <class Impl>
+ +void
+ +FrontEnd<Impl>::recvRetry()
+ +{
+ +    assert(cacheBlocked);
+ +    if (retryPkt != NULL) {
+ +        assert(status == IcacheWaitRetry);
+ +
+ +        if (icachePort.sendTiming(retryPkt)) {
+ +            status = IcacheWaitResponse;
+ +            retryPkt = NULL;
+ +            cacheBlocked = false;
+ +        }
+ +    } else {
+ +        // Access has been squashed since it was sent out.  Just clear
+ +        // the cache being blocked.
+ +        cacheBlocked = false;
+ +    }
+ +
+ +}
+ +
+ +template <class Impl>
+ +bool
+ +FrontEnd<Impl>::updateStatus()
+ +{
+ +    bool serialize_block = !backEnd->robEmpty() || instBufferSize;
+ +    bool be_block = cpu->decoupledFrontEnd ? false : backEnd->isBlocked();
+ +    bool ret_val = false;
+ +
+ +    if (status == SerializeBlocked && !serialize_block) {
+ +        status = SerializeComplete;
+ +        ret_val = true;
+ +    }
+ +
+ +    if (status == BEBlocked && !be_block) {
-         }
++//        if (barrierInst) {
++//            status = SerializeBlocked;
++//        } else {
+ +            status = Running;
- 
++//        }
+ +        ret_val = true;
+ +    }
+ +    return ret_val;
+ +}
+ +
+ +template <class Impl>
+ +void
+ +FrontEnd<Impl>::checkBE()
+ +{
+ +    bool be_block = cpu->decoupledFrontEnd ? false : backEnd->isBlocked();
+ +    if (be_block) {
+ +        if (status == Running || status == Idle) {
+ +            status = BEBlocked;
+ +        }
+ +    }
+ +}
+ +
+ +template <class Impl>
+ +typename Impl::DynInstPtr
+ +FrontEnd<Impl>::getInstFromCacheline()
+ +{
++/*
+ +    if (status == SerializeComplete) {
+ +        DynInstPtr inst = barrierInst;
+ +        status = Running;
+ +        barrierInst = NULL;
+ +        inst->clearSerializeBefore();
+ +        return inst;
+ +    }
++*/
+ +    InstSeqNum inst_seq;
+ +    MachInst inst;
+ +    // @todo: Fix this magic number used here to handle word offset (and
+ +    // getting rid of PAL bit)
+ +    unsigned offset = (PC & cacheBlkMask) & ~3;
+ +
+ +    // PC of inst is not in this cache block
+ +    if (PC >= (cacheBlkPC + cacheBlkSize) || PC < cacheBlkPC || !cacheBlkValid) {
+ +        return NULL;
+ +    }
+ +
+ +    //////////////////////////
+ +    // Fetch one instruction
+ +    //////////////////////////
+ +
+ +    // Get a sequence number.
+ +    inst_seq = getAndIncrementInstSeq();
+ +
+ +    // Make sure this is a valid index.
+ +    assert(offset <= cacheBlkSize - sizeof(MachInst));
+ +
+ +    // Get the instruction from the array of the cache line.
+ +    inst = htog(*reinterpret_cast<MachInst *>(&cacheData[offset]));
+ +
+ +    ExtMachInst decode_inst = TheISA::makeExtMI(inst, PC);
+ +
+ +    // Create a new DynInst from the instruction fetched.
+ +    DynInstPtr instruction = new DynInst(decode_inst, PC, PC+sizeof(MachInst),
+ +                                         inst_seq, cpu);
+ +
+ +    instruction->setThreadState(thread);
+ +
+ +    DPRINTF(FE, "Instruction [sn:%lli] created, with PC %#x\n%s\n",
+ +            inst_seq, instruction->readPC(),
+ +            instruction->staticInst->disassemble(PC));
+ +
+ +    instruction->traceData =
+ +        Trace::getInstRecord(curTick, tc,
+ +                             instruction->staticInst,
+ +                             instruction->readPC());
+ +
+ +    // Increment stat of fetched instructions.
+ +    ++fetchedInsts;
+ +
+ +    return instruction;
+ +}
+ +
+ +template <class Impl>
+ +void
+ +FrontEnd<Impl>::renameInst(DynInstPtr &inst)
+ +{
+ +    DynInstPtr src_inst = NULL;
+ +    int num_src_regs = inst->numSrcRegs();
+ +    if (num_src_regs == 0) {
+ +        inst->setCanIssue();
+ +    } else {
+ +        for (int i = 0; i < num_src_regs; ++i) {
+ +            src_inst = renameTable[inst->srcRegIdx(i)];
+ +
+ +            inst->setSrcInst(src_inst, i);
+ +
+ +            DPRINTF(FE, "[sn:%lli]: Src reg %i is inst [sn:%lli]\n",
+ +                    inst->seqNum, (int)inst->srcRegIdx(i), src_inst->seqNum);
+ +
+ +            if (src_inst->isResultReady()) {
+ +                DPRINTF(FE, "Reg ready.\n");
+ +                inst->markSrcRegReady(i);
+ +            } else {
+ +                DPRINTF(FE, "Adding to dependent list.\n");
+ +                src_inst->addDependent(inst);
+ +            }
+ +        }
+ +    }
+ +
+ +    for (int i = 0; i < inst->numDestRegs(); ++i) {
+ +        RegIndex idx = inst->destRegIdx(i);
+ +
+ +        DPRINTF(FE, "Dest reg %i is now inst [sn:%lli], was previously "
+ +                "[sn:%lli]\n",
+ +                (int)inst->destRegIdx(i), inst->seqNum,
+ +                renameTable[idx]->seqNum);
+ +
+ +        inst->setPrevDestInst(renameTable[idx], i);
+ +
+ +        renameTable[idx] = inst;
+ +        --freeRegs;
+ +    }
+ +}
+ +
+ +template <class Impl>
+ +void
+ +FrontEnd<Impl>::wakeFromQuiesce()
+ +{
+ +    DPRINTF(FE, "Waking up from quiesce\n");
+ +    // Hopefully this is safe
+ +    status = Running;
+ +}
+ +
+ +template <class Impl>
+ +void
+ +FrontEnd<Impl>::switchOut()
+ +{
+ +    switchedOut = true;
+ +    cpu->signalSwitched();
+ +}
+ +
+ +template <class Impl>
+ +void
+ +FrontEnd<Impl>::doSwitchOut()
+ +{
+ +    memReq = NULL;
+ +    squash(0, 0);
+ +    instBuffer.clear();
+ +    instBufferSize = 0;
++    feBuffer.clear();
+ +    status = Idle;
+ +}
+ +
+ +template <class Impl>
+ +void
+ +FrontEnd<Impl>::takeOverFrom(ThreadContext *old_tc)
+ +{
+ +    assert(freeRegs == numPhysRegs);
+ +    fetchCacheLineNextCycle = true;
+ +
+ +    cacheBlkValid = false;
+ +
+ +#if !FULL_SYSTEM
+ +//    pTable = params->pTable;
+ +#endif
+ +    fetchFault = NoFault;
+ +    serializeNext = false;
+ +    barrierInst = NULL;
+ +    status = Running;
+ +    switchedOut = false;
+ +    interruptPending = false;
+ +}
+ +
+ +template <class Impl>
+ +void
+ +FrontEnd<Impl>::dumpInsts()
+ +{
+ +    cprintf("instBuffer size: %i\n", instBuffer.size());
+ +
+ +    InstBuffIt buff_it = instBuffer.begin();
+ +
+ +    for (int num = 0; buff_it != instBuffer.end(); num++) {
+ +        cprintf("Instruction:%i\nPC:%#x\n[tid:%i]\n[sn:%lli]\nIssued:%i\n"
+ +                "Squashed:%i\n\n",
+ +                num, (*buff_it)->readPC(), (*buff_it)->threadNumber,
+ +                (*buff_it)->seqNum, (*buff_it)->isIssued(),
+ +                (*buff_it)->isSquashed());
+ +        buff_it++;
+ +    }
+ +}
diff --cc src/cpu/ozone/inorder_back_end_impl.hh

index 701fc0ee9f1e5f446ec63ff11f1012cf8a8b05b4,0000000000000000000000000000000000000000..16ebac163779627e1baaf63e3a8eff973c4173d1

mode 100644,000000..100644
--- 1/src/cpu/ozone/inorder_back_end_impl.hh
--- /dev/null
+++ b/src/cpu/ozone/inorder_back_end_impl.hh
@@@ -1,546 -1,0 +1,546 @@@
-         inst->setCompleted();
+ +/*
+ + * Copyright (c) 2006 The Regents of The University of Michigan
+ + * All rights reserved.
+ + *
+ + * Redistribution and use in source and binary forms, with or without
+ + * modification, are permitted provided that the following conditions are
+ + * met: redistributions of source code must retain the above copyright
+ + * notice, this list of conditions and the following disclaimer;
+ + * redistributions in binary form must reproduce the above copyright
+ + * notice, this list of conditions and the following disclaimer in the
+ + * documentation and/or other materials provided with the distribution;
+ + * neither the name of the copyright holders nor the names of its
+ + * contributors may be used to endorse or promote products derived from
+ + * this software without specific prior written permission.
+ + *
+ + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ + *
+ + * Authors: Kevin Lim
+ + */
+ +
+ +#include "arch/faults.hh"
+ +#include "arch/types.hh"
+ +#include "cpu/ozone/inorder_back_end.hh"
+ +#include "cpu/ozone/thread_state.hh"
+ +
+ +template <class Impl>
+ +InorderBackEnd<Impl>::InorderBackEnd(Params *params)
+ +    : squashPending(false),
+ +      squashSeqNum(0),
+ +      squashNextPC(0),
+ +      faultFromFetch(NoFault),
+ +      interruptBlocked(false),
+ +      cacheCompletionEvent(this),
+ +      dcacheInterface(params->dcacheInterface),
+ +      width(params->backEndWidth),
+ +      latency(params->backEndLatency),
+ +      squashLatency(params->backEndSquashLatency),
+ +      numInstsToWB(0, latency + 1)
+ +{
+ +    instsAdded = numInstsToWB.getWire(latency);
+ +    instsToExecute = numInstsToWB.getWire(0);
+ +
+ +    memReq = new MemReq;
+ +    memReq->data = new uint8_t[64];
+ +    status = Running;
+ +}
+ +
+ +template <class Impl>
+ +std::string
+ +InorderBackEnd<Impl>::name() const
+ +{
+ +    return cpu->name() + ".inorderbackend";
+ +}
+ +
+ +template <class Impl>
+ +void
+ +InorderBackEnd<Impl>::setXC(ExecContext *xc_ptr)
+ +{
+ +    xc = xc_ptr;
+ +    memReq->xc = xc;
+ +}
+ +
+ +template <class Impl>
+ +void
+ +InorderBackEnd<Impl>::setThreadState(OzoneThreadState<Impl> *thread_ptr)
+ +{
+ +    thread = thread_ptr;
+ +    thread->setFuncExeInst(0);
+ +}
+ +
+ +#if FULL_SYSTEM
+ +template <class Impl>
+ +void
+ +InorderBackEnd<Impl>::checkInterrupts()
+ +{
+ +    //Check if there are any outstanding interrupts
+ +    //Handle the interrupts
+ +    int ipl = 0;
+ +    int summary = 0;
+ +
+ +    cpu->checkInterrupts = false;
+ +
+ +    if (thread->readMiscReg(IPR_ASTRR))
+ +        panic("asynchronous traps not implemented\n");
+ +
+ +    if (thread->readMiscReg(IPR_SIRR)) {
+ +        for (int i = INTLEVEL_SOFTWARE_MIN;
+ +             i < INTLEVEL_SOFTWARE_MAX; i++) {
+ +            if (thread->readMiscReg(IPR_SIRR) & (ULL(1) << i)) {
+ +                // See table 4-19 of the 21164 hardware reference
+ +                ipl = (i - INTLEVEL_SOFTWARE_MIN) + 1;
+ +                summary |= (ULL(1) << i);
+ +            }
+ +        }
+ +    }
+ +
+ +    uint64_t interrupts = cpu->intr_status();
+ +
+ +    if (interrupts) {
+ +        for (int i = INTLEVEL_EXTERNAL_MIN;
+ +             i < INTLEVEL_EXTERNAL_MAX; i++) {
+ +            if (interrupts & (ULL(1) << i)) {
+ +                // See table 4-19 of the 21164 hardware reference
+ +                ipl = i;
+ +                summary |= (ULL(1) << i);
+ +            }
+ +        }
+ +    }
+ +
+ +    if (ipl && ipl > thread->readMiscReg(IPR_IPLR)) {
+ +        thread->inSyscall = true;
+ +
+ +        thread->setMiscReg(IPR_ISR, summary);
+ +        thread->setMiscReg(IPR_INTID, ipl);
+ +        Fault(new InterruptFault)->invoke(xc);
+ +        DPRINTF(Flow, "Interrupt! IPLR=%d ipl=%d summary=%x\n",
+ +                thread->readMiscReg(IPR_IPLR), ipl, summary);
+ +
+ +        // May need to go 1 inst prior
+ +        squashPending = true;
+ +
+ +        thread->inSyscall = false;
+ +
+ +        setSquashInfoFromXC();
+ +    }
+ +}
+ +#endif
+ +
+ +template <class Impl>
+ +void
+ +InorderBackEnd<Impl>::tick()
+ +{
+ +    // Squash due to an external source
+ +    // Not sure if this or an interrupt has higher priority
+ +    if (squashPending) {
+ +        squash(squashSeqNum, squashNextPC);
+ +        return;
+ +    }
+ +
+ +    // if (interrupt) then set thread PC, stall front end, record that
+ +    // I'm waiting for it to drain.  (for now just squash)
+ +#if FULL_SYSTEM
+ +    if (interruptBlocked ||
+ +        (cpu->checkInterrupts &&
+ +        cpu->check_interrupts() &&
+ +        !cpu->inPalMode())) {
+ +        if (!robEmpty()) {
+ +            interruptBlocked = true;
+ +        } else if (robEmpty() && cpu->inPalMode()) {
+ +            // Will need to let the front end continue a bit until
+ +            // we're out of pal mode.  Hopefully we never get into an
+ +            // infinite loop...
+ +            interruptBlocked = false;
+ +        } else {
+ +            interruptBlocked = false;
+ +            checkInterrupts();
+ +            return;
+ +        }
+ +    }
+ +#endif
+ +
+ +    if (status != DcacheMissLoadStall &&
+ +        status != DcacheMissStoreStall) {
+ +        for (int i = 0; i < width && (*instsAdded) < width; ++i) {
+ +            DynInstPtr inst = frontEnd->getInst();
+ +
+ +            if (!inst)
+ +                break;
+ +
+ +            instList.push_back(inst);
+ +
+ +            (*instsAdded)++;
+ +        }
+ +
+ +#if FULL_SYSTEM
+ +        if (faultFromFetch && robEmpty() && frontEnd->isEmpty()) {
+ +            handleFault();
+ +        } else {
+ +            executeInsts();
+ +        }
+ +#else
+ +        executeInsts();
+ +#endif
+ +    }
+ +}
+ +
+ +template <class Impl>
+ +void
+ +InorderBackEnd<Impl>::executeInsts()
+ +{
+ +    bool completed_last_inst = true;
+ +    int insts_to_execute = *instsToExecute;
+ +    int freed_regs = 0;
+ +
+ +    while (insts_to_execute > 0) {
+ +        assert(!instList.empty());
+ +        DynInstPtr inst = instList.front();
+ +
+ +        commitPC = inst->readPC();
+ +
+ +        thread->setPC(commitPC);
+ +        thread->setNextPC(inst->readNextPC());
+ +
+ +#if FULL_SYSTEM
+ +        int count = 0;
+ +        Addr oldpc;
+ +        do {
+ +            if (count == 0)
+ +                assert(!thread->inSyscall && !thread->trapPending);
+ +            oldpc = thread->readPC();
+ +            cpu->system->pcEventQueue.service(
+ +                thread->getXCProxy());
+ +            count++;
+ +        } while (oldpc != thread->readPC());
+ +        if (count > 1) {
+ +            DPRINTF(IBE, "PC skip function event, stopping commit\n");
+ +            completed_last_inst = false;
+ +            squashPending = true;
+ +            break;
+ +        }
+ +#endif
+ +
+ +        Fault inst_fault = NoFault;
+ +
+ +        if (status == DcacheMissComplete) {
+ +            DPRINTF(IBE, "Completing inst [sn:%lli]\n", inst->seqNum);
+ +            status = Running;
+ +        } else if (inst->isMemRef() && status != DcacheMissComplete &&
+ +            (!inst->isDataPrefetch() && !inst->isInstPrefetch())) {
+ +            DPRINTF(IBE, "Initiating mem op inst [sn:%lli] PC: %#x\n",
+ +                    inst->seqNum, inst->readPC());
+ +
+ +            cacheCompletionEvent.inst = inst;
+ +            inst_fault = inst->initiateAcc();
+ +            if (inst_fault == NoFault &&
+ +                status != DcacheMissLoadStall &&
+ +                status != DcacheMissStoreStall) {
+ +                inst_fault = inst->completeAcc();
+ +            }
+ +            ++thread->funcExeInst;
+ +        } else {
+ +            DPRINTF(IBE, "Executing inst [sn:%lli] PC: %#x\n",
+ +                    inst->seqNum, inst->readPC());
+ +            inst_fault = inst->execute();
+ +            ++thread->funcExeInst;
+ +        }
+ +
+ +        // Will need to be able to break this loop in case the load
+ +        // misses.  Split access/complete ops would be useful here
+ +        // with writeback events.
+ +        if (status == DcacheMissLoadStall) {
+ +            *instsToExecute = insts_to_execute;
+ +
+ +            completed_last_inst = false;
+ +            break;
+ +        } else if (status == DcacheMissStoreStall) {
+ +            // Figure out how to fix this hack.  Probably have DcacheMissLoad
+ +            // vs DcacheMissStore.
+ +            *instsToExecute = insts_to_execute;
+ +            completed_last_inst = false;
+ +/*
+ +            instList.pop_front();
+ +            --insts_to_execute;
+ +            if (inst->traceData) {
+ +                inst->traceData->finalize();
+ +            }
+ +*/
+ +
+ +            // Don't really need to stop for a store stall as long as
+ +            // the memory system is able to handle store forwarding
+ +            // and such.  Breaking out might help avoid the cache
+ +            // interface becoming blocked.
+ +            break;
+ +        }
+ +
+ +        inst->setExecuted();
++        inst->setResultReady();
+ +        inst->setCanCommit();
+ +
+ +        instList.pop_front();
+ +
+ +        --insts_to_execute;
+ +        --(*instsToExecute);
+ +
+ +        if (inst->traceData) {
+ +            inst->traceData->finalize();
+ +            inst->traceData = NULL;
+ +        }
+ +
+ +        if (inst_fault != NoFault) {
+ +#if FULL_SYSTEM
+ +            DPRINTF(IBE, "Inst [sn:%lli] PC %#x has a fault\n",
+ +                    inst->seqNum, inst->readPC());
+ +
+ +            assert(!thread->inSyscall);
+ +
+ +            thread->inSyscall = true;
+ +
+ +            // Hack for now; DTB will sometimes need the machine instruction
+ +            // for when faults happen.  So we will set it here, prior to the
+ +            // DTB possibly needing it for this translation.
+ +            thread->setInst(
+ +                static_cast<TheISA::MachInst>(inst->staticInst->machInst));
+ +
+ +            // Consider holding onto the trap and waiting until the trap event
+ +            // happens for this to be executed.
+ +            inst_fault->invoke(xc);
+ +
+ +            // Exit state update mode to avoid accidental updating.
+ +            thread->inSyscall = false;
+ +
+ +            squashPending = true;
+ +
+ +            // Generate trap squash event.
+ +//            generateTrapEvent(tid);
+ +            completed_last_inst = false;
+ +            break;
+ +#else // !FULL_SYSTEM
+ +            panic("fault (%d) detected @ PC %08p", inst_fault,
+ +                  inst->PC);
+ +#endif // FULL_SYSTEM
+ +        }
+ +
+ +        for (int i = 0; i < inst->numDestRegs(); ++i) {
+ +            renameTable[inst->destRegIdx(i)] = inst;
+ +            thread->renameTable[inst->destRegIdx(i)] = inst;
+ +            ++freed_regs;
+ +        }
+ +
+ +        inst->clearDependents();
+ +
+ +        comm->access(0)->doneSeqNum = inst->seqNum;
+ +
+ +        if (inst->mispredicted()) {
+ +            squash(inst->seqNum, inst->readNextPC());
+ +
+ +            thread->setNextPC(inst->readNextPC());
+ +
+ +            break;
+ +        } else if (squashPending) {
+ +            // Something external happened that caused the CPU to squash.
+ +            // Break out of commit and handle the squash next cycle.
+ +            break;
+ +        }
+ +        // If it didn't mispredict, then it executed fine.  Send back its
+ +        // registers and BP info?  What about insts that may still have
+ +        // latency, like loads?  Probably can send back the information after
+ +        // it is completed.
+ +
+ +        // keep an instruction count
+ +        cpu->numInst++;
+ +        thread->numInsts++;
+ +    }
+ +
+ +    frontEnd->addFreeRegs(freed_regs);
+ +
+ +    assert(insts_to_execute >= 0);
+ +
+ +    // Should only advance this if I have executed all instructions.
+ +    if (insts_to_execute == 0) {
+ +        numInstsToWB.advance();
+ +    }
+ +
+ +    // Should I set the PC to the next PC here?  What do I set next PC to?
+ +    if (completed_last_inst) {
+ +        thread->setPC(thread->readNextPC());
+ +        thread->setNextPC(thread->readPC() + sizeof(MachInst));
+ +    }
+ +
+ +    if (squashPending) {
+ +        setSquashInfoFromXC();
+ +    }
+ +}
+ +
+ +template <class Impl>
+ +void
+ +InorderBackEnd<Impl>::handleFault()
+ +{
+ +    DPRINTF(Commit, "Handling fault from fetch\n");
+ +
+ +    assert(!thread->inSyscall);
+ +
+ +    thread->inSyscall = true;
+ +
+ +    // Consider holding onto the trap and waiting until the trap event
+ +    // happens for this to be executed.
+ +    faultFromFetch->invoke(xc);
+ +
+ +    // Exit state update mode to avoid accidental updating.
+ +    thread->inSyscall = false;
+ +
+ +    squashPending = true;
+ +
+ +    setSquashInfoFromXC();
+ +}
+ +
+ +template <class Impl>
+ +void
+ +InorderBackEnd<Impl>::squash(const InstSeqNum &squash_num, const Addr &next_PC)
+ +{
+ +    DPRINTF(IBE, "Squashing from [sn:%lli], setting PC to %#x\n",
+ +            squash_num, next_PC);
+ +
+ +    InstListIt squash_it = --(instList.end());
+ +
+ +    int freed_regs = 0;
+ +
+ +    while (!instList.empty() && (*squash_it)->seqNum > squash_num) {
+ +        DynInstPtr inst = *squash_it;
+ +
+ +        DPRINTF(IBE, "Squashing instruction PC %#x, [sn:%lli].\n",
+ +                inst->readPC(),
+ +                inst->seqNum);
+ +
+ +        // May cause problems with misc regs
+ +        freed_regs+= inst->numDestRegs();
+ +        inst->clearDependents();
+ +        squash_it--;
+ +        instList.pop_back();
+ +    }
+ +
+ +    frontEnd->addFreeRegs(freed_regs);
+ +
+ +    for (int i = 0; i < latency+1; ++i) {
+ +        numInstsToWB.advance();
+ +    }
+ +
+ +    squashPending = false;
+ +
+ +    // Probably want to make sure that this squash is the one that set the
+ +    // thread into inSyscall mode.
+ +    thread->inSyscall = false;
+ +
+ +    // Tell front end to squash, reset PC to new one.
+ +    frontEnd->squash(squash_num, next_PC);
+ +
+ +    faultFromFetch = NULL;
+ +}
+ +
+ +template <class Impl>
+ +void
+ +InorderBackEnd<Impl>::squashFromXC()
+ +{
+ +    // Record that I need to squash
+ +    squashPending = true;
+ +
+ +    thread->inSyscall = true;
+ +}
+ +
+ +template <class Impl>
+ +void
+ +InorderBackEnd<Impl>::setSquashInfoFromXC()
+ +{
+ +    // Need to handle the case of the instList being empty.  In that case
+ +    // probably any number works, except maybe with stores in the store buffer.
+ +    squashSeqNum = instList.empty() ? 0 : instList.front()->seqNum - 1;
+ +
+ +    squashNextPC = thread->PC;
+ +}
+ +
+ +template <class Impl>
+ +void
+ +InorderBackEnd<Impl>::fetchFault(Fault &fault)
+ +{
+ +    faultFromFetch = fault;
+ +}
+ +
+ +template <class Impl>
+ +void
+ +InorderBackEnd<Impl>::dumpInsts()
+ +{
+ +    int num = 0;
+ +    int valid_num = 0;
+ +
+ +    InstListIt inst_list_it = instList.begin();
+ +
+ +    cprintf("Inst list size: %i\n", instList.size());
+ +
+ +    while (inst_list_it != instList.end())
+ +    {
+ +        cprintf("Instruction:%i\n",
+ +                num);
+ +        if (!(*inst_list_it)->isSquashed()) {
+ +            if (!(*inst_list_it)->isIssued()) {
+ +                ++valid_num;
+ +                cprintf("Count:%i\n", valid_num);
+ +            } else if ((*inst_list_it)->isMemRef() &&
+ +                       !(*inst_list_it)->memOpDone) {
+ +                // Loads that have not been marked as executed still count
+ +                // towards the total instructions.
+ +                ++valid_num;
+ +                cprintf("Count:%i\n", valid_num);
+ +            }
+ +        }
+ +
+ +        cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
+ +                "Issued:%i\nSquashed:%i\n",
+ +                (*inst_list_it)->readPC(),
+ +                (*inst_list_it)->seqNum,
+ +                (*inst_list_it)->threadNumber,
+ +                (*inst_list_it)->isIssued(),
+ +                (*inst_list_it)->isSquashed());
+ +
+ +        if ((*inst_list_it)->isMemRef()) {
+ +            cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
+ +        }
+ +
+ +        cprintf("\n");
+ +
+ +        inst_list_it++;
+ +        ++num;
+ +    }
+ +}
+ +
+ +template <class Impl>
+ +InorderBackEnd<Impl>::DCacheCompletionEvent::DCacheCompletionEvent(
+ +    InorderBackEnd *_be)
+ +    : Event(&mainEventQueue, CPU_Tick_Pri), be(_be)
+ +{
+ +//    this->setFlags(Event::AutoDelete);
+ +}
+ +
+ +template <class Impl>
+ +void
+ +InorderBackEnd<Impl>::DCacheCompletionEvent::process()
+ +{
+ +    inst->completeAcc();
+ +    be->status = DcacheMissComplete;
+ +}
+ +
+ +template <class Impl>
+ +const char *
+ +InorderBackEnd<Impl>::DCacheCompletionEvent::description()
+ +{
+ +    return "DCache completion event";
+ +}
diff --cc src/cpu/ozone/inst_queue_impl.hh

index f2d80e6215895b2716f9902ee90490c69a52ed05,0000000000000000000000000000000000000000..32a9402412267b22facdc1817e4807676eef759f

mode 100644,000000..100644
--- 1/src/cpu/ozone/inst_queue_impl.hh
--- /dev/null
+++ b/src/cpu/ozone/inst_queue_impl.hh
@@@ -1,1343 -1,0 +1,1343 @@@
-     OpClass op_class = ready_inst->opClass();
+ +/*
+ + * Copyright (c) 2004-2006 The Regents of The University of Michigan
+ + * All rights reserved.
+ + *
+ + * Redistribution and use in source and binary forms, with or without
+ + * modification, are permitted provided that the following conditions are
+ + * met: redistributions of source code must retain the above copyright
+ + * notice, this list of conditions and the following disclaimer;
+ + * redistributions in binary form must reproduce the above copyright
+ + * notice, this list of conditions and the following disclaimer in the
+ + * documentation and/or other materials provided with the distribution;
+ + * neither the name of the copyright holders nor the names of its
+ + * contributors may be used to endorse or promote products derived from
+ + * this software without specific prior written permission.
+ + *
+ + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ + *
+ + * Authors: Kevin Lim
+ + */
+ +
+ +// Todo:
+ +// Current ordering allows for 0 cycle added-to-scheduled.  Could maybe fake
+ +// it; either do in reverse order, or have added instructions put into a
+ +// different ready queue that, in scheduleRreadyInsts(), gets put onto the
+ +// normal ready queue.  This would however give only a one cycle delay,
+ +// but probably is more flexible to actually add in a delay parameter than
+ +// just running it backwards.
+ +
+ +#include <vector>
+ +
+ +#include "sim/root.hh"
+ +
+ +#include "cpu/ozone/inst_queue.hh"
+ +#if 0
+ +template <class Impl>
+ +InstQueue<Impl>::FUCompletion::FUCompletion(DynInstPtr &_inst,
+ +                                                   int fu_idx,
+ +                                                   InstQueue<Impl> *iq_ptr)
+ +    : Event(&mainEventQueue, Stat_Event_Pri),
+ +      inst(_inst), fuIdx(fu_idx), iqPtr(iq_ptr)
+ +{
+ +    this->setFlags(Event::AutoDelete);
+ +}
+ +
+ +template <class Impl>
+ +void
+ +InstQueue<Impl>::FUCompletion::process()
+ +{
+ +    iqPtr->processFUCompletion(inst, fuIdx);
+ +}
+ +
+ +
+ +template <class Impl>
+ +const char *
+ +InstQueue<Impl>::FUCompletion::description()
+ +{
+ +    return "Functional unit completion event";
+ +}
+ +#endif
+ +template <class Impl>
+ +InstQueue<Impl>::InstQueue(Params *params)
+ +    : dcacheInterface(params->dcacheInterface),
+ +//      fuPool(params->fuPool),
+ +      numEntries(params->numIQEntries),
+ +      totalWidth(params->issueWidth),
+ +//      numPhysIntRegs(params->numPhysIntRegs),
+ +//      numPhysFloatRegs(params->numPhysFloatRegs),
+ +      commitToIEWDelay(params->commitToIEWDelay)
+ +{
+ +//    assert(fuPool);
+ +
+ +//    numThreads = params->numberOfThreads;
+ +    numThreads = 1;
+ +
+ +    //Initialize thread IQ counts
+ +    for (int i = 0; i <numThreads; i++) {
+ +        count[i] = 0;
+ +    }
+ +
+ +    // Initialize the number of free IQ entries.
+ +    freeEntries = numEntries;
+ +
+ +    // Set the number of physical registers as the number of int + float
+ +//    numPhysRegs = numPhysIntRegs + numPhysFloatRegs;
+ +
+ +//    DPRINTF(IQ, "There are %i physical registers.\n", numPhysRegs);
+ +
+ +    //Create an entry for each physical register within the
+ +    //dependency graph.
+ +//    dependGraph = new DependencyEntry[numPhysRegs];
+ +
+ +    // Resize the register scoreboard.
+ +//    regScoreboard.resize(numPhysRegs);
+ +/*
+ +    //Initialize Mem Dependence Units
+ +    for (int i = 0; i < numThreads; i++) {
+ +        memDepUnit[i].init(params,i);
+ +        memDepUnit[i].setIQ(this);
+ +    }
+ +
+ +    // Initialize all the head pointers to point to NULL, and all the
+ +    // entries as unready.
+ +    // Note that in actuality, the registers corresponding to the logical
+ +    // registers start off as ready.  However this doesn't matter for the
+ +    // IQ as the instruction should have been correctly told if those
+ +    // registers are ready in rename.  Thus it can all be initialized as
+ +    // unready.
+ +    for (int i = 0; i < numPhysRegs; ++i) {
+ +        dependGraph[i].next = NULL;
+ +        dependGraph[i].inst = NULL;
+ +        regScoreboard[i] = false;
+ +    }
+ +*/
+ +    for (int i = 0; i < numThreads; ++i) {
+ +        squashedSeqNum[i] = 0;
+ +    }
+ +/*
+ +    for (int i = 0; i < Num_OpClasses; ++i) {
+ +        queueOnList[i] = false;
+ +        readyIt[i] = listOrder.end();
+ +    }
+ +
+ +    string policy = params->smtIQPolicy;
+ +
+ +    //Convert string to lowercase
+ +    std::transform(policy.begin(), policy.end(), policy.begin(),
+ +                   (int(*)(int)) tolower);
+ +
+ +    //Figure out resource sharing policy
+ +    if (policy == "dynamic") {
+ +        iqPolicy = Dynamic;
+ +
+ +        //Set Max Entries to Total ROB Capacity
+ +        for (int i = 0; i < numThreads; i++) {
+ +            maxEntries[i] = numEntries;
+ +        }
+ +
+ +    } else if (policy == "partitioned") {
+ +        iqPolicy = Partitioned;
+ +
+ +        //@todo:make work if part_amt doesnt divide evenly.
+ +        int part_amt = numEntries / numThreads;
+ +
+ +        //Divide ROB up evenly
+ +        for (int i = 0; i < numThreads; i++) {
+ +            maxEntries[i] = part_amt;
+ +        }
+ +
+ +        DPRINTF(Fetch, "IQ sharing policy set to Partitioned:"
+ +                "%i entries per thread.\n",part_amt);
+ +
+ +    } else if (policy == "threshold") {
+ +        iqPolicy = Threshold;
+ +
+ +        double threshold =  (double)params->smtIQThreshold / 100;
+ +
+ +        int thresholdIQ = (int)((double)threshold * numEntries);
+ +
+ +        //Divide up by threshold amount
+ +        for (int i = 0; i < numThreads; i++) {
+ +            maxEntries[i] = thresholdIQ;
+ +        }
+ +
+ +        DPRINTF(Fetch, "IQ sharing policy set to Threshold:"
+ +                "%i entries per thread.\n",thresholdIQ);
+ +   } else {
+ +       assert(0 && "Invalid IQ Sharing Policy.Options Are:{Dynamic,"
+ +              "Partitioned, Threshold}");
+ +   }
+ +*/
+ +}
+ +
+ +template <class Impl>
+ +InstQueue<Impl>::~InstQueue()
+ +{
+ +    // Clear the dependency graph
+ +/*
+ +    DependencyEntry *curr;
+ +    DependencyEntry *prev;
+ +
+ +    for (int i = 0; i < numPhysRegs; ++i) {
+ +        curr = dependGraph[i].next;
+ +
+ +        while (curr) {
+ +            DependencyEntry::mem_alloc_counter--;
+ +
+ +            prev = curr;
+ +            curr = prev->next;
+ +            prev->inst = NULL;
+ +
+ +            delete prev;
+ +        }
+ +
+ +        if (dependGraph[i].inst) {
+ +            dependGraph[i].inst = NULL;
+ +        }
+ +
+ +        dependGraph[i].next = NULL;
+ +    }
+ +
+ +    assert(DependencyEntry::mem_alloc_counter == 0);
+ +
+ +    delete [] dependGraph;
+ +*/
+ +}
+ +
+ +template <class Impl>
+ +std::string
+ +InstQueue<Impl>::name() const
+ +{
+ +    return cpu->name() + ".iq";
+ +}
+ +
+ +template <class Impl>
+ +void
+ +InstQueue<Impl>::regStats()
+ +{
+ +    iqInstsAdded
+ +        .name(name() + ".iqInstsAdded")
+ +        .desc("Number of instructions added to the IQ (excludes non-spec)")
+ +        .prereq(iqInstsAdded);
+ +
+ +    iqNonSpecInstsAdded
+ +        .name(name() + ".iqNonSpecInstsAdded")
+ +        .desc("Number of non-speculative instructions added to the IQ")
+ +        .prereq(iqNonSpecInstsAdded);
+ +
+ +//    iqIntInstsAdded;
+ +
+ +    iqIntInstsIssued
+ +        .name(name() + ".iqIntInstsIssued")
+ +        .desc("Number of integer instructions issued")
+ +        .prereq(iqIntInstsIssued);
+ +
+ +//    iqFloatInstsAdded;
+ +
+ +    iqFloatInstsIssued
+ +        .name(name() + ".iqFloatInstsIssued")
+ +        .desc("Number of float instructions issued")
+ +        .prereq(iqFloatInstsIssued);
+ +
+ +//    iqBranchInstsAdded;
+ +
+ +    iqBranchInstsIssued
+ +        .name(name() + ".iqBranchInstsIssued")
+ +        .desc("Number of branch instructions issued")
+ +        .prereq(iqBranchInstsIssued);
+ +
+ +//    iqMemInstsAdded;
+ +
+ +    iqMemInstsIssued
+ +        .name(name() + ".iqMemInstsIssued")
+ +        .desc("Number of memory instructions issued")
+ +        .prereq(iqMemInstsIssued);
+ +
+ +//    iqMiscInstsAdded;
+ +
+ +    iqMiscInstsIssued
+ +        .name(name() + ".iqMiscInstsIssued")
+ +        .desc("Number of miscellaneous instructions issued")
+ +        .prereq(iqMiscInstsIssued);
+ +
+ +    iqSquashedInstsIssued
+ +        .name(name() + ".iqSquashedInstsIssued")
+ +        .desc("Number of squashed instructions issued")
+ +        .prereq(iqSquashedInstsIssued);
+ +
+ +    iqSquashedInstsExamined
+ +        .name(name() + ".iqSquashedInstsExamined")
+ +        .desc("Number of squashed instructions iterated over during squash;"
+ +              " mainly for profiling")
+ +        .prereq(iqSquashedInstsExamined);
+ +
+ +    iqSquashedOperandsExamined
+ +        .name(name() + ".iqSquashedOperandsExamined")
+ +        .desc("Number of squashed operands that are examined and possibly "
+ +              "removed from graph")
+ +        .prereq(iqSquashedOperandsExamined);
+ +
+ +    iqSquashedNonSpecRemoved
+ +        .name(name() + ".iqSquashedNonSpecRemoved")
+ +        .desc("Number of squashed non-spec instructions that were removed")
+ +        .prereq(iqSquashedNonSpecRemoved);
+ +/*
+ +    for ( int i=0; i < numThreads; i++) {
+ +        // Tell mem dependence unit to reg stats as well.
+ +        memDepUnit[i].regStats();
+ +    }
+ +*/
+ +}
+ +/*
+ +template <class Impl>
+ +void
+ +InstQueue<Impl>::setActiveThreads(list<unsigned> *at_ptr)
+ +{
+ +    DPRINTF(IQ, "Setting active threads list pointer.\n");
+ +    activeThreads = at_ptr;
+ +}
+ +*/
+ +template <class Impl>
+ +void
+ +InstQueue<Impl>::setIssueToExecuteQueue(TimeBuffer<IssueStruct> *i2e_ptr)
+ +{
+ +    DPRINTF(IQ, "Set the issue to execute queue.\n");
+ +    issueToExecuteQueue = i2e_ptr;
+ +}
+ +/*
+ +template <class Impl>
+ +void
+ +InstQueue<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr)
+ +{
+ +    DPRINTF(IQ, "Set the time buffer.\n");
+ +    timeBuffer = tb_ptr;
+ +
+ +    fromCommit = timeBuffer->getWire(-commitToIEWDelay);
+ +}
+ +
+ +template <class Impl>
+ +int
+ +InstQueue<Impl>::entryAmount(int num_threads)
+ +{
+ +    if (iqPolicy == Partitioned) {
+ +        return numEntries / num_threads;
+ +    } else {
+ +        return 0;
+ +    }
+ +}
+ +
+ +
+ +template <class Impl>
+ +void
+ +InstQueue<Impl>::resetEntries()
+ +{
+ +    if (iqPolicy != Dynamic || numThreads > 1) {
+ +        int active_threads = (*activeThreads).size();
+ +
+ +        list<unsigned>::iterator threads  = (*activeThreads).begin();
+ +        list<unsigned>::iterator list_end = (*activeThreads).end();
+ +
+ +        while (threads != list_end) {
+ +            if (iqPolicy == Partitioned) {
+ +                maxEntries[*threads++] = numEntries / active_threads;
+ +            } else if(iqPolicy == Threshold && active_threads == 1) {
+ +                maxEntries[*threads++] = numEntries;
+ +            }
+ +        }
+ +    }
+ +}
+ +*/
+ +template <class Impl>
+ +unsigned
+ +InstQueue<Impl>::numFreeEntries()
+ +{
+ +    return freeEntries;
+ +}
+ +
+ +template <class Impl>
+ +unsigned
+ +InstQueue<Impl>::numFreeEntries(unsigned tid)
+ +{
+ +    return maxEntries[tid] - count[tid];
+ +}
+ +
+ +// Might want to do something more complex if it knows how many instructions
+ +// will be issued this cycle.
+ +template <class Impl>
+ +bool
+ +InstQueue<Impl>::isFull()
+ +{
+ +    if (freeEntries == 0) {
+ +        return(true);
+ +    } else {
+ +        return(false);
+ +    }
+ +}
+ +
+ +template <class Impl>
+ +bool
+ +InstQueue<Impl>::isFull(unsigned tid)
+ +{
+ +    if (numFreeEntries(tid) == 0) {
+ +        return(true);
+ +    } else {
+ +        return(false);
+ +    }
+ +}
+ +
+ +template <class Impl>
+ +bool
+ +InstQueue<Impl>::hasReadyInsts()
+ +{
+ +/*
+ +    if (!listOrder.empty()) {
+ +        return true;
+ +    }
+ +
+ +    for (int i = 0; i < Num_OpClasses; ++i) {
+ +        if (!readyInsts[i].empty()) {
+ +            return true;
+ +        }
+ +    }
+ +
+ +    return false;
+ +*/
+ +    return readyInsts.empty();
+ +}
+ +
+ +template <class Impl>
+ +void
+ +InstQueue<Impl>::insert(DynInstPtr &new_inst)
+ +{
+ +    // Make sure the instruction is valid
+ +    assert(new_inst);
+ +
+ +    DPRINTF(IQ, "Adding instruction PC %#x to the IQ.\n",
+ +            new_inst->readPC());
+ +
+ +    // Check if there are any free entries.  Panic if there are none.
+ +    // Might want to have this return a fault in the future instead of
+ +    // panicing.
+ +    assert(freeEntries != 0);
+ +
+ +    instList[new_inst->threadNumber].push_back(new_inst);
+ +
+ +    // Decrease the number of free entries.
+ +    --freeEntries;
+ +
+ +    //Mark Instruction as in IQ
+ +//    new_inst->setInIQ();
+ +/*
+ +    // Look through its source registers (physical regs), and mark any
+ +    // dependencies.
+ +    addToDependents(new_inst);
+ +
+ +    // Have this instruction set itself as the producer of its destination
+ +    // register(s).
+ +    createDependency(new_inst);
+ +*/
+ +    // If it's a memory instruction, add it to the memory dependency
+ +    // unit.
+ +//    if (new_inst->isMemRef()) {
+ +//        memDepUnit[new_inst->threadNumber].insert(new_inst);
+ +//    } else {
+ +        // If the instruction is ready then add it to the ready list.
+ +        addIfReady(new_inst);
+ +//    }
+ +
+ +    ++iqInstsAdded;
+ +
+ +
+ +    //Update Thread IQ Count
+ +    count[new_inst->threadNumber]++;
+ +
+ +    assert(freeEntries == (numEntries - countInsts()));
+ +}
+ +
+ +template <class Impl>
+ +void
+ +InstQueue<Impl>::insertNonSpec(DynInstPtr &new_inst)
+ +{
+ +    nonSpecInsts[new_inst->seqNum] = new_inst;
+ +
+ +    // @todo: Clean up this code; can do it by setting inst as unable
+ +    // to issue, then calling normal insert on the inst.
+ +
+ +    // Make sure the instruction is valid
+ +    assert(new_inst);
+ +
+ +    DPRINTF(IQ, "Adding instruction PC %#x to the IQ.\n",
+ +            new_inst->readPC());
+ +
+ +    // Check if there are any free entries.  Panic if there are none.
+ +    // Might want to have this return a fault in the future instead of
+ +    // panicing.
+ +    assert(freeEntries != 0);
+ +
+ +    instList[new_inst->threadNumber].push_back(new_inst);
+ +
+ +    // Decrease the number of free entries.
+ +    --freeEntries;
+ +
+ +    //Mark Instruction as in IQ
+ +//    new_inst->setInIQ();
+ +/*
+ +    // Have this instruction set itself as the producer of its destination
+ +    // register(s).
+ +    createDependency(new_inst);
+ +
+ +    // If it's a memory instruction, add it to the memory dependency
+ +    // unit.
+ +    if (new_inst->isMemRef()) {
+ +        memDepUnit[new_inst->threadNumber].insertNonSpec(new_inst);
+ +    }
+ +*/
+ +    ++iqNonSpecInstsAdded;
+ +
+ +    //Update Thread IQ Count
+ +    count[new_inst->threadNumber]++;
+ +
+ +    assert(freeEntries == (numEntries - countInsts()));
+ +}
+ +/*
+ +template <class Impl>
+ +void
+ +InstQueue<Impl>::advanceTail(DynInstPtr &inst)
+ +{
+ +    // Have this instruction set itself as the producer of its destination
+ +    // register(s).
+ +    createDependency(inst);
+ +}
+ +
+ +template <class Impl>
+ +void
+ +InstQueue<Impl>::addToOrderList(OpClass op_class)
+ +{
+ +    assert(!readyInsts[op_class].empty());
+ +
+ +    ListOrderEntry queue_entry;
+ +
+ +    queue_entry.queueType = op_class;
+ +
+ +    queue_entry.oldestInst = readyInsts[op_class].top()->seqNum;
+ +
+ +    ListOrderIt list_it = listOrder.begin();
+ +    ListOrderIt list_end_it = listOrder.end();
+ +
+ +    while (list_it != list_end_it) {
+ +        if ((*list_it).oldestInst > queue_entry.oldestInst) {
+ +            break;
+ +        }
+ +
+ +        list_it++;
+ +    }
+ +
+ +    readyIt[op_class] = listOrder.insert(list_it, queue_entry);
+ +    queueOnList[op_class] = true;
+ +}
+ +
+ +template <class Impl>
+ +void
+ +InstQueue<Impl>::moveToYoungerInst(ListOrderIt list_order_it)
+ +{
+ +    // Get iterator of next item on the list
+ +    // Delete the original iterator
+ +    // Determine if the next item is either the end of the list or younger
+ +    // than the new instruction.  If so, then add in a new iterator right here.
+ +    // If not, then move along.
+ +    ListOrderEntry queue_entry;
+ +    OpClass op_class = (*list_order_it).queueType;
+ +    ListOrderIt next_it = list_order_it;
+ +
+ +    ++next_it;
+ +
+ +    queue_entry.queueType = op_class;
+ +    queue_entry.oldestInst = readyInsts[op_class].top()->seqNum;
+ +
+ +    while (next_it != listOrder.end() &&
+ +           (*next_it).oldestInst < queue_entry.oldestInst) {
+ +        ++next_it;
+ +    }
+ +
+ +    readyIt[op_class] = listOrder.insert(next_it, queue_entry);
+ +}
+ +
+ +template <class Impl>
+ +void
+ +InstQueue<Impl>::processFUCompletion(DynInstPtr &inst, int fu_idx)
+ +{
+ +    // The CPU could have been sleeping until this op completed (*extremely*
+ +    // long latency op).  Wake it if it was.  This may be overkill.
+ +    iewStage->wakeCPU();
+ +
+ +    fuPool->freeUnit(fu_idx);
+ +
+ +    int &size = issueToExecuteQueue->access(0)->size;
+ +
+ +    issueToExecuteQueue->access(0)->insts[size++] = inst;
+ +}
+ +*/
+ +// @todo: Figure out a better way to remove the squashed items from the
+ +// lists.  Checking the top item of each list to see if it's squashed
+ +// wastes time and forces jumps.
+ +template <class Impl>
+ +void
+ +InstQueue<Impl>::scheduleReadyInsts()
+ +{
+ +    DPRINTF(IQ, "Attempting to schedule ready instructions from "
+ +            "the IQ.\n");
+ +
+ +//    IssueStruct *i2e_info = issueToExecuteQueue->access(0);
+ +/*
+ +    // Will need to reorder the list if either a queue is not on the list,
+ +    // or it has an older instruction than last time.
+ +    for (int i = 0; i < Num_OpClasses; ++i) {
+ +        if (!readyInsts[i].empty()) {
+ +            if (!queueOnList[i]) {
+ +                addToOrderList(OpClass(i));
+ +            } else if (readyInsts[i].top()->seqNum  <
+ +                       (*readyIt[i]).oldestInst) {
+ +                listOrder.erase(readyIt[i]);
+ +                addToOrderList(OpClass(i));
+ +            }
+ +        }
+ +    }
+ +
+ +    // Have iterator to head of the list
+ +    // While I haven't exceeded bandwidth or reached the end of the list,
+ +    // Try to get a FU that can do what this op needs.
+ +    // If successful, change the oldestInst to the new top of the list, put
+ +    // the queue in the proper place in the list.
+ +    // Increment the iterator.
+ +    // This will avoid trying to schedule a certain op class if there are no
+ +    // FUs that handle it.
+ +    ListOrderIt order_it = listOrder.begin();
+ +    ListOrderIt order_end_it = listOrder.end();
+ +    int total_issued = 0;
+ +    int exec_queue_slot = i2e_info->size;
+ +
+ +    while (exec_queue_slot < totalWidth && order_it != order_end_it) {
+ +        OpClass op_class = (*order_it).queueType;
+ +
+ +        assert(!readyInsts[op_class].empty());
+ +
+ +        DynInstPtr issuing_inst = readyInsts[op_class].top();
+ +
+ +        assert(issuing_inst->seqNum == (*order_it).oldestInst);
+ +
+ +        if (issuing_inst->isSquashed()) {
+ +            readyInsts[op_class].pop();
+ +
+ +            if (!readyInsts[op_class].empty()) {
+ +                moveToYoungerInst(order_it);
+ +            } else {
+ +                readyIt[op_class] = listOrder.end();
+ +                queueOnList[op_class] = false;
+ +            }
+ +
+ +            listOrder.erase(order_it++);
+ +
+ +            ++iqSquashedInstsIssued;
+ +
+ +            continue;
+ +        }
+ +
+ +        int idx = fuPool->getUnit(op_class);
+ +
+ +        if (idx != -1) {
+ +            int op_latency = fuPool->getOpLatency(op_class);
+ +
+ +            if (op_latency == 1) {
+ +                i2e_info->insts[exec_queue_slot++] = issuing_inst;
+ +                i2e_info->size++;
+ +
+ +                // Add the FU onto the list of FU's to be freed next cycle.
+ +                fuPool->freeUnit(idx);
+ +            } else {
+ +                int issue_latency = fuPool->getIssueLatency(op_class);
+ +
+ +                if (issue_latency > 1) {
+ +                    // Generate completion event for the FU
+ +                    FUCompletion *execution = new FUCompletion(issuing_inst,
+ +                                                               idx, this);
+ +
+ +                    execution->schedule(curTick + issue_latency - 1);
+ +                } else {
+ +                    i2e_info->insts[exec_queue_slot++] = issuing_inst;
+ +                    i2e_info->size++;
+ +
+ +                    // Add the FU onto the list of FU's to be freed next cycle.
+ +                    fuPool->freeUnit(idx);
+ +                }
+ +            }
+ +
+ +            DPRINTF(IQ, "Thread %i: Issuing instruction PC %#x "
+ +                    "[sn:%lli]\n",
+ +                    issuing_inst->threadNumber, issuing_inst->readPC(),
+ +                    issuing_inst->seqNum);
+ +
+ +            readyInsts[op_class].pop();
+ +
+ +            if (!readyInsts[op_class].empty()) {
+ +                moveToYoungerInst(order_it);
+ +            } else {
+ +                readyIt[op_class] = listOrder.end();
+ +                queueOnList[op_class] = false;
+ +            }
+ +
+ +            issuing_inst->setIssued();
+ +            ++total_issued;
+ +
+ +            if (!issuing_inst->isMemRef()) {
+ +                // Memory instructions can not be freed from the IQ until they
+ +                // complete.
+ +                ++freeEntries;
+ +                count[issuing_inst->threadNumber]--;
+ +                issuing_inst->removeInIQ();
+ +            } else {
+ +                memDepUnit[issuing_inst->threadNumber].issue(issuing_inst);
+ +            }
+ +
+ +            listOrder.erase(order_it++);
+ +        } else {
+ +            ++order_it;
+ +        }
+ +    }
+ +
+ +    if (total_issued) {
+ +        cpu->activityThisCycle();
+ +    } else {
+ +        DPRINTF(IQ, "Not able to schedule any instructions.\n");
+ +    }
+ +*/
+ +}
+ +
+ +template <class Impl>
+ +void
+ +InstQueue<Impl>::scheduleNonSpec(const InstSeqNum &inst)
+ +{
+ +    DPRINTF(IQ, "Marking nonspeculative instruction with sequence "
+ +            "number %i as ready to execute.\n", inst);
+ +
+ +    NonSpecMapIt inst_it = nonSpecInsts.find(inst);
+ +
+ +    assert(inst_it != nonSpecInsts.end());
+ +
+ +//    unsigned tid = (*inst_it).second->threadNumber;
+ +
+ +    // Mark this instruction as ready to issue.
+ +    (*inst_it).second->setCanIssue();
+ +
+ +    // Now schedule the instruction.
+ +//    if (!(*inst_it).second->isMemRef()) {
+ +        addIfReady((*inst_it).second);
+ +//    } else {
+ +//        memDepUnit[tid].nonSpecInstReady((*inst_it).second);
+ +//    }
+ +
+ +    nonSpecInsts.erase(inst_it);
+ +}
+ +
+ +template <class Impl>
+ +void
+ +InstQueue<Impl>::commit(const InstSeqNum &inst, unsigned tid)
+ +{
+ +    /*Need to go through each thread??*/
+ +    DPRINTF(IQ, "[tid:%i]: Committing instructions older than [sn:%i]\n",
+ +            tid,inst);
+ +
+ +    ListIt iq_it = instList[tid].begin();
+ +
+ +    while (iq_it != instList[tid].end() &&
+ +           (*iq_it)->seqNum <= inst) {
+ +        ++iq_it;
+ +        instList[tid].pop_front();
+ +    }
+ +
+ +    assert(freeEntries == (numEntries - countInsts()));
+ +}
+ +
+ +template <class Impl>
+ +void
+ +InstQueue<Impl>::wakeDependents(DynInstPtr &completed_inst)
+ +{
+ +    DPRINTF(IQ, "Waking dependents of completed instruction.\n");
+ +    // Look at the physical destination register of the DynInst
+ +    // and look it up on the dependency graph.  Then mark as ready
+ +    // any instructions within the instruction queue.
+ +/*
+ +    DependencyEntry *curr;
+ +    DependencyEntry *prev;
+ +*/
+ +    // Tell the memory dependence unit to wake any dependents on this
+ +    // instruction if it is a memory instruction.  Also complete the memory
+ +    // instruction at this point since we know it executed fine.
+ +    // @todo: Might want to rename "completeMemInst" to
+ +    // something that indicates that it won't need to be replayed, and call
+ +    // this earlier.  Might not be a big deal.
+ +    if (completed_inst->isMemRef()) {
+ +//        memDepUnit[completed_inst->threadNumber].wakeDependents(completed_inst);
+ +        completeMemInst(completed_inst);
+ +    }
+ +    completed_inst->wakeDependents();
+ +/*
+ +    for (int dest_reg_idx = 0;
+ +         dest_reg_idx < completed_inst->numDestRegs();
+ +         dest_reg_idx++)
+ +    {
+ +        PhysRegIndex dest_reg =
+ +            completed_inst->renamedDestRegIdx(dest_reg_idx);
+ +
+ +        // Special case of uniq or control registers.  They are not
+ +        // handled by the IQ and thus have no dependency graph entry.
+ +        // @todo Figure out a cleaner way to handle this.
+ +        if (dest_reg >= numPhysRegs) {
+ +            continue;
+ +        }
+ +
+ +        DPRINTF(IQ, "Waking any dependents on register %i.\n",
+ +                (int) dest_reg);
+ +
+ +        //Maybe abstract this part into a function.
+ +        //Go through the dependency chain, marking the registers as ready
+ +        //within the waiting instructions.
+ +
+ +        curr = dependGraph[dest_reg].next;
+ +
+ +        while (curr) {
+ +            DPRINTF(IQ, "Waking up a dependent instruction, PC%#x.\n",
+ +                    curr->inst->readPC());
+ +
+ +            // Might want to give more information to the instruction
+ +            // so that it knows which of its source registers is ready.
+ +            // However that would mean that the dependency graph entries
+ +            // would need to hold the src_reg_idx.
+ +            curr->inst->markSrcRegReady();
+ +
+ +            addIfReady(curr->inst);
+ +
+ +            DependencyEntry::mem_alloc_counter--;
+ +
+ +            prev = curr;
+ +            curr = prev->next;
+ +            prev->inst = NULL;
+ +
+ +            delete prev;
+ +        }
+ +
+ +        // Reset the head node now that all of its dependents have been woken
+ +        // up.
+ +        dependGraph[dest_reg].next = NULL;
+ +        dependGraph[dest_reg].inst = NULL;
+ +
+ +        // Mark the scoreboard as having that register ready.
+ +        regScoreboard[dest_reg] = true;
+ +    }
+ +*/
+ +}
+ +
+ +template <class Impl>
+ +void
+ +InstQueue<Impl>::addReadyMemInst(DynInstPtr &ready_inst)
+ +{
-             ready_inst->readPC(), op_class, ready_inst->seqNum);
++//    OpClass op_class = ready_inst->opClass();
+ +
+ +    readyInsts.push(ready_inst);
+ +
+ +    DPRINTF(IQ, "Instruction is ready to issue, putting it onto "
+ +            "the ready list, PC %#x opclass:%i [sn:%lli].\n",
-         OpClass op_class = inst->opClass();
++            ready_inst->readPC(), ready_inst->opClass(), ready_inst->seqNum);
+ +}
+ +/*
+ +template <class Impl>
+ +void
+ +InstQueue<Impl>::rescheduleMemInst(DynInstPtr &resched_inst)
+ +{
+ +    memDepUnit[resched_inst->threadNumber].reschedule(resched_inst);
+ +}
+ +
+ +template <class Impl>
+ +void
+ +InstQueue<Impl>::replayMemInst(DynInstPtr &replay_inst)
+ +{
+ +    memDepUnit[replay_inst->threadNumber].replay(replay_inst);
+ +}
+ +*/
+ +template <class Impl>
+ +void
+ +InstQueue<Impl>::completeMemInst(DynInstPtr &completed_inst)
+ +{
+ +    int tid = completed_inst->threadNumber;
+ +
+ +    DPRINTF(IQ, "Completing mem instruction PC:%#x [sn:%lli]\n",
+ +            completed_inst->readPC(), completed_inst->seqNum);
+ +
+ +    ++freeEntries;
+ +
+ +//    completed_inst->memOpDone = true;
+ +
+ +//    memDepUnit[tid].completed(completed_inst);
+ +
+ +    count[tid]--;
+ +}
+ +/*
+ +template <class Impl>
+ +void
+ +InstQueue<Impl>::violation(DynInstPtr &store,
+ +                                  DynInstPtr &faulting_load)
+ +{
+ +    memDepUnit[store->threadNumber].violation(store, faulting_load);
+ +}
+ +*/
+ +template <class Impl>
+ +void
+ +InstQueue<Impl>::squash(unsigned tid)
+ +{
+ +    DPRINTF(IQ, "[tid:%i]: Starting to squash instructions in "
+ +            "the IQ.\n", tid);
+ +
+ +    // Read instruction sequence number of last instruction out of the
+ +    // time buffer.
+ +//    squashedSeqNum[tid] = fromCommit->commitInfo[tid].doneSeqNum;
+ +
+ +    // Setup the squash iterator to point to the tail.
+ +    squashIt[tid] = instList[tid].end();
+ +    --squashIt[tid];
+ +
+ +    // Call doSquash if there are insts in the IQ
+ +    if (count[tid] > 0) {
+ +        doSquash(tid);
+ +    }
+ +
+ +    // Also tell the memory dependence unit to squash.
+ +//    memDepUnit[tid].squash(squashedSeqNum[tid], tid);
+ +}
+ +
+ +template <class Impl>
+ +void
+ +InstQueue<Impl>::doSquash(unsigned tid)
+ +{
+ +    // Make sure the squashed sequence number is valid.
+ +    assert(squashedSeqNum[tid] != 0);
+ +
+ +    DPRINTF(IQ, "[tid:%i]: Squashing until sequence number %i!\n",
+ +            tid, squashedSeqNum[tid]);
+ +
+ +    // Squash any instructions younger than the squashed sequence number
+ +    // given.
+ +    while (squashIt[tid] != instList[tid].end() &&
+ +           (*squashIt[tid])->seqNum > squashedSeqNum[tid]) {
+ +
+ +        DynInstPtr squashed_inst = (*squashIt[tid]);
+ +
+ +        // Only handle the instruction if it actually is in the IQ and
+ +        // hasn't already been squashed in the IQ.
+ +        if (squashed_inst->threadNumber != tid ||
+ +            squashed_inst->isSquashedInIQ()) {
+ +            --squashIt[tid];
+ +            continue;
+ +        }
+ +
+ +        if (!squashed_inst->isIssued() ||
+ +            (squashed_inst->isMemRef()/* &&
+ +                                         !squashed_inst->memOpDone*/)) {
+ +
+ +            // Remove the instruction from the dependency list.
+ +            if (!squashed_inst->isNonSpeculative()) {
+ +/*
+ +                for (int src_reg_idx = 0;
+ +                     src_reg_idx < squashed_inst->numSrcRegs();
+ +                     src_reg_idx++)
+ +                {
+ +                    PhysRegIndex src_reg =
+ +                        squashed_inst->renamedSrcRegIdx(src_reg_idx);
+ +
+ +                    // Only remove it from the dependency graph if it was
+ +                    // placed there in the first place.
+ +                    // HACK: This assumes that instructions woken up from the
+ +                    // dependency chain aren't informed that a specific src
+ +                    // register has become ready.  This may not always be true
+ +                    // in the future.
+ +                    // Instead of doing a linked list traversal, we can just
+ +                    // remove these squashed instructions either at issue time,
+ +                    // or when the register is overwritten.  The only downside
+ +                    // to this is it leaves more room for error.
+ +
+ +                    if (!squashed_inst->isReadySrcRegIdx(src_reg_idx) &&
+ +                        src_reg < numPhysRegs) {
+ +                        dependGraph[src_reg].remove(squashed_inst);
+ +                    }
+ +
+ +
+ +                    ++iqSquashedOperandsExamined;
+ +                }
+ +*/
+ +                // Might want to remove producers as well.
+ +            } else {
+ +                nonSpecInsts[squashed_inst->seqNum] = NULL;
+ +
+ +                nonSpecInsts.erase(squashed_inst->seqNum);
+ +
+ +                ++iqSquashedNonSpecRemoved;
+ +            }
+ +
+ +            // Might want to also clear out the head of the dependency graph.
+ +
+ +            // Mark it as squashed within the IQ.
+ +            squashed_inst->setSquashedInIQ();
+ +
+ +            // @todo: Remove this hack where several statuses are set so the
+ +            // inst will flow through the rest of the pipeline.
+ +            squashed_inst->setIssued();
+ +            squashed_inst->setCanCommit();
+ +//            squashed_inst->removeInIQ();
+ +
+ +            //Update Thread IQ Count
+ +            count[squashed_inst->threadNumber]--;
+ +
+ +            ++freeEntries;
+ +
+ +            if (numThreads > 1) {
+ +                DPRINTF(IQ, "[tid:%i]: Instruction PC %#x squashed.\n",
+ +                        tid, squashed_inst->readPC());
+ +            } else {
+ +                DPRINTF(IQ, "Instruction PC %#x squashed.\n",
+ +                        squashed_inst->readPC());
+ +            }
+ +        }
+ +
+ +        --squashIt[tid];
+ +        ++iqSquashedInstsExamined;
+ +    }
+ +}
+ +/*
+ +template <class Impl>
+ +void
+ +InstQueue<Impl>::DependencyEntry::insert(DynInstPtr &new_inst)
+ +{
+ +    //Add this new, dependent instruction at the head of the dependency
+ +    //chain.
+ +
+ +    // First create the entry that will be added to the head of the
+ +    // dependency chain.
+ +    DependencyEntry *new_entry = new DependencyEntry;
+ +    new_entry->next = this->next;
+ +    new_entry->inst = new_inst;
+ +
+ +    // Then actually add it to the chain.
+ +    this->next = new_entry;
+ +
+ +    ++mem_alloc_counter;
+ +}
+ +
+ +template <class Impl>
+ +void
+ +InstQueue<Impl>::DependencyEntry::remove(DynInstPtr &inst_to_remove)
+ +{
+ +    DependencyEntry *prev = this;
+ +    DependencyEntry *curr = this->next;
+ +
+ +    // Make sure curr isn't NULL.  Because this instruction is being
+ +    // removed from a dependency list, it must have been placed there at
+ +    // an earlier time.  The dependency chain should not be empty,
+ +    // unless the instruction dependent upon it is already ready.
+ +    if (curr == NULL) {
+ +        return;
+ +    }
+ +
+ +    // Find the instruction to remove within the dependency linked list.
+ +    while (curr->inst != inst_to_remove) {
+ +        prev = curr;
+ +        curr = curr->next;
+ +
+ +        assert(curr != NULL);
+ +    }
+ +
+ +    // Now remove this instruction from the list.
+ +    prev->next = curr->next;
+ +
+ +    --mem_alloc_counter;
+ +
+ +    // Could push this off to the destructor of DependencyEntry
+ +    curr->inst = NULL;
+ +
+ +    delete curr;
+ +}
+ +
+ +template <class Impl>
+ +bool
+ +InstQueue<Impl>::addToDependents(DynInstPtr &new_inst)
+ +{
+ +    // Loop through the instruction's source registers, adding
+ +    // them to the dependency list if they are not ready.
+ +    int8_t total_src_regs = new_inst->numSrcRegs();
+ +    bool return_val = false;
+ +
+ +    for (int src_reg_idx = 0;
+ +         src_reg_idx < total_src_regs;
+ +         src_reg_idx++)
+ +    {
+ +        // Only add it to the dependency graph if it's not ready.
+ +        if (!new_inst->isReadySrcRegIdx(src_reg_idx)) {
+ +            PhysRegIndex src_reg = new_inst->renamedSrcRegIdx(src_reg_idx);
+ +
+ +            // Check the IQ's scoreboard to make sure the register
+ +            // hasn't become ready while the instruction was in flight
+ +            // between stages.  Only if it really isn't ready should
+ +            // it be added to the dependency graph.
+ +            if (src_reg >= numPhysRegs) {
+ +                continue;
+ +            } else if (regScoreboard[src_reg] == false) {
+ +                DPRINTF(IQ, "Instruction PC %#x has src reg %i that "
+ +                        "is being added to the dependency chain.\n",
+ +                        new_inst->readPC(), src_reg);
+ +
+ +                dependGraph[src_reg].insert(new_inst);
+ +
+ +                // Change the return value to indicate that something
+ +                // was added to the dependency graph.
+ +                return_val = true;
+ +            } else {
+ +                DPRINTF(IQ, "Instruction PC %#x has src reg %i that "
+ +                        "became ready before it reached the IQ.\n",
+ +                        new_inst->readPC(), src_reg);
+ +                // Mark a register ready within the instruction.
+ +                new_inst->markSrcRegReady();
+ +            }
+ +        }
+ +    }
+ +
+ +    return return_val;
+ +}
+ +
+ +template <class Impl>
+ +void
+ +InstQueue<Impl>::createDependency(DynInstPtr &new_inst)
+ +{
+ +    //Actually nothing really needs to be marked when an
+ +    //instruction becomes the producer of a register's value,
+ +    //but for convenience a ptr to the producing instruction will
+ +    //be placed in the head node of the dependency links.
+ +    int8_t total_dest_regs = new_inst->numDestRegs();
+ +
+ +    for (int dest_reg_idx = 0;
+ +         dest_reg_idx < total_dest_regs;
+ +         dest_reg_idx++)
+ +    {
+ +        PhysRegIndex dest_reg = new_inst->renamedDestRegIdx(dest_reg_idx);
+ +
+ +        // Instructions that use the misc regs will have a reg number
+ +        // higher than the normal physical registers.  In this case these
+ +        // registers are not renamed, and there is no need to track
+ +        // dependencies as these instructions must be executed at commit.
+ +        if (dest_reg >= numPhysRegs) {
+ +            continue;
+ +        }
+ +
+ +        if (dependGraph[dest_reg].next) {
+ +            dumpDependGraph();
+ +            panic("Dependency graph %i not empty!", dest_reg);
+ +        }
+ +
+ +        dependGraph[dest_reg].inst = new_inst;
+ +
+ +        // Mark the scoreboard to say it's not yet ready.
+ +        regScoreboard[dest_reg] = false;
+ +    }
+ +}
+ +*/
+ +template <class Impl>
+ +void
+ +InstQueue<Impl>::addIfReady(DynInstPtr &inst)
+ +{
+ +    //If the instruction now has all of its source registers
+ +    // available, then add it to the list of ready instructions.
+ +    if (inst->readyToIssue()) {
+ +
+ +        //Add the instruction to the proper ready list.
+ +        if (inst->isMemRef()) {
+ +
+ +            DPRINTF(IQ, "Checking if memory instruction can issue.\n");
+ +
+ +            // Message to the mem dependence unit that this instruction has
+ +            // its registers ready.
+ +
+ +//            memDepUnit[inst->threadNumber].regsReady(inst);
+ +
+ +            return;
+ +        }
+ +
-                 inst->readPC(), op_class, inst->seqNum);
++//        OpClass op_class = inst->opClass();
+ +
+ +        DPRINTF(IQ, "Instruction is ready to issue, putting it onto "
+ +                "the ready list, PC %#x opclass:%i [sn:%lli].\n",
++                inst->readPC(), inst->opClass(), inst->seqNum);
+ +
+ +        readyInsts.push(inst);
+ +    }
+ +}
+ +
+ +template <class Impl>
+ +int
+ +InstQueue<Impl>::countInsts()
+ +{
+ +    //ksewell:This works but definitely could use a cleaner write
+ +    //with a more intuitive way of counting. Right now it's
+ +    //just brute force ....
+ +
+ +#if 0
+ +    int total_insts = 0;
+ +
+ +    for (int i = 0; i < numThreads; ++i) {
+ +        ListIt count_it = instList[i].begin();
+ +
+ +        while (count_it != instList[i].end()) {
+ +            if (!(*count_it)->isSquashed() && !(*count_it)->isSquashedInIQ()) {
+ +                if (!(*count_it)->isIssued()) {
+ +                    ++total_insts;
+ +                } else if ((*count_it)->isMemRef() &&
+ +                           !(*count_it)->memOpDone) {
+ +                    // Loads that have not been marked as executed still count
+ +                    // towards the total instructions.
+ +                    ++total_insts;
+ +                }
+ +            }
+ +
+ +            ++count_it;
+ +        }
+ +    }
+ +
+ +    return total_insts;
+ +#else
+ +    return numEntries - freeEntries;
+ +#endif
+ +}
+ +/*
+ +template <class Impl>
+ +void
+ +InstQueue<Impl>::dumpDependGraph()
+ +{
+ +    DependencyEntry *curr;
+ +
+ +    for (int i = 0; i < numPhysRegs; ++i)
+ +    {
+ +        curr = &dependGraph[i];
+ +
+ +        if (curr->inst) {
+ +            cprintf("dependGraph[%i]: producer: %#x [sn:%lli] consumer: ",
+ +                    i, curr->inst->readPC(), curr->inst->seqNum);
+ +        } else {
+ +            cprintf("dependGraph[%i]: No producer. consumer: ", i);
+ +        }
+ +
+ +        while (curr->next != NULL) {
+ +            curr = curr->next;
+ +
+ +            cprintf("%#x [sn:%lli] ",
+ +                    curr->inst->readPC(), curr->inst->seqNum);
+ +        }
+ +
+ +        cprintf("\n");
+ +    }
+ +}
+ +*/
+ +template <class Impl>
+ +void
+ +InstQueue<Impl>::dumpLists()
+ +{
+ +    for (int i = 0; i < Num_OpClasses; ++i) {
+ +        cprintf("Ready list %i size: %i\n", i, readyInsts.size());
+ +
+ +        cprintf("\n");
+ +    }
+ +
+ +    cprintf("Non speculative list size: %i\n", nonSpecInsts.size());
+ +
+ +    NonSpecMapIt non_spec_it = nonSpecInsts.begin();
+ +    NonSpecMapIt non_spec_end_it = nonSpecInsts.end();
+ +
+ +    cprintf("Non speculative list: ");
+ +
+ +    while (non_spec_it != non_spec_end_it) {
+ +        cprintf("%#x [sn:%lli]", (*non_spec_it).second->readPC(),
+ +                (*non_spec_it).second->seqNum);
+ +        ++non_spec_it;
+ +    }
+ +
+ +    cprintf("\n");
+ +/*
+ +    ListOrderIt list_order_it = listOrder.begin();
+ +    ListOrderIt list_order_end_it = listOrder.end();
+ +    int i = 1;
+ +
+ +    cprintf("List order: ");
+ +
+ +    while (list_order_it != list_order_end_it) {
+ +        cprintf("%i OpClass:%i [sn:%lli] ", i, (*list_order_it).queueType,
+ +                (*list_order_it).oldestInst);
+ +
+ +        ++list_order_it;
+ +        ++i;
+ +    }
+ +*/
+ +    cprintf("\n");
+ +}
+ +
+ +
+ +template <class Impl>
+ +void
+ +InstQueue<Impl>::dumpInsts()
+ +{
+ +    for (int i = 0; i < numThreads; ++i) {
+ +//        int num = 0;
+ +//        int valid_num = 0;
+ +/*
+ +      ListIt inst_list_it = instList[i].begin();
+ +
+ +        while (inst_list_it != instList[i].end())
+ +        {
+ +            cprintf("Instruction:%i\n",
+ +                    num);
+ +            if (!(*inst_list_it)->isSquashed()) {
+ +                if (!(*inst_list_it)->isIssued()) {
+ +                    ++valid_num;
+ +                    cprintf("Count:%i\n", valid_num);
+ +                } else if ((*inst_list_it)->isMemRef() &&
+ +                           !(*inst_list_it)->memOpDone) {
+ +                    // Loads that have not been marked as executed still count
+ +                    // towards the total instructions.
+ +                    ++valid_num;
+ +                    cprintf("Count:%i\n", valid_num);
+ +                }
+ +            }
+ +
+ +            cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
+ +                    "Issued:%i\nSquashed:%i\n",
+ +                    (*inst_list_it)->readPC(),
+ +                    (*inst_list_it)->seqNum,
+ +                    (*inst_list_it)->threadNumber,
+ +                    (*inst_list_it)->isIssued(),
+ +                    (*inst_list_it)->isSquashed());
+ +
+ +            if ((*inst_list_it)->isMemRef()) {
+ +                cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
+ +            }
+ +
+ +            cprintf("\n");
+ +
+ +            inst_list_it++;
+ +            ++num;
+ +        }
+ +*/
+ +    }
+ +}
diff --cc src/cpu/ozone/lw_back_end.hh

index d836ceebd62246d9ee00440bdb903f8c383697ef,0000000000000000000000000000000000000000..49c6a1ae209d65f5e301e8142276e64f9f292b4c

mode 100644,000000..100644
--- 1/src/cpu/ozone/lw_back_end.hh
--- /dev/null
+++ b/src/cpu/ozone/lw_back_end.hh
@@@ -1,435 -1,0 +1,435 @@@
-     TimeBuffer<Writeback> numInstsToWB;
+ +/*
+ + * Copyright (c) 2006 The Regents of The University of Michigan
+ + * All rights reserved.
+ + *
+ + * Redistribution and use in source and binary forms, with or without
+ + * modification, are permitted provided that the following conditions are
+ + * met: redistributions of source code must retain the above copyright
+ + * notice, this list of conditions and the following disclaimer;
+ + * redistributions in binary form must reproduce the above copyright
+ + * notice, this list of conditions and the following disclaimer in the
+ + * documentation and/or other materials provided with the distribution;
+ + * neither the name of the copyright holders nor the names of its
+ + * contributors may be used to endorse or promote products derived from
+ + * this software without specific prior written permission.
+ + *
+ + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ + *
+ + * Authors: Kevin Lim
+ + */
+ +
+ +#ifndef __CPU_OZONE_LW_BACK_END_HH__
+ +#define __CPU_OZONE_LW_BACK_END_HH__
+ +
+ +#include <list>
+ +#include <queue>
+ +#include <set>
+ +#include <string>
+ +
+ +#include "arch/faults.hh"
+ +#include "base/timebuf.hh"
+ +#include "cpu/inst_seq.hh"
+ +#include "cpu/ozone/rename_table.hh"
+ +#include "cpu/ozone/thread_state.hh"
+ +#include "mem/request.hh"
+ +#include "sim/eventq.hh"
+ +
+ +template <class>
+ +class Checker;
+ +class ThreadContext;
+ +
+ +template <class Impl>
+ +class OzoneThreadState;
+ +
+ +class Port;
+ +
+ +template <class Impl>
+ +class LWBackEnd
+ +{
+ +  public:
+ +    typedef OzoneThreadState<Impl> Thread;
+ +
+ +    typedef typename Impl::Params Params;
+ +    typedef typename Impl::DynInst DynInst;
+ +    typedef typename Impl::DynInstPtr DynInstPtr;
+ +    typedef typename Impl::OzoneCPU OzoneCPU;
+ +    typedef typename Impl::FrontEnd FrontEnd;
+ +    typedef typename Impl::OzoneCPU::CommStruct CommStruct;
+ +
+ +    struct SizeStruct {
+ +        int size;
+ +    };
+ +
+ +    typedef SizeStruct DispatchToIssue;
+ +    typedef SizeStruct IssueToExec;
+ +    typedef SizeStruct ExecToCommit;
+ +    typedef SizeStruct Writeback;
+ +
+ +    TimeBuffer<DispatchToIssue> d2i;
+ +    typename TimeBuffer<DispatchToIssue>::wire instsToDispatch;
+ +    TimeBuffer<IssueToExec> i2e;
+ +    typename TimeBuffer<IssueToExec>::wire instsToExecute;
+ +    TimeBuffer<ExecToCommit> e2c;
-     bool robEmpty() { return instList.empty(); }
++    TimeBuffer<int> numInstsToWB;
+ +
+ +    TimeBuffer<CommStruct> *comm;
+ +    typename TimeBuffer<CommStruct>::wire toIEW;
+ +    typename TimeBuffer<CommStruct>::wire fromCommit;
+ +
+ +    class TrapEvent : public Event {
+ +      private:
+ +        LWBackEnd<Impl> *be;
+ +
+ +      public:
+ +        TrapEvent(LWBackEnd<Impl> *_be);
+ +
+ +        void process();
+ +        const char *description();
+ +    };
+ +
+ +    LWBackEnd(Params *params);
+ +
+ +    std::string name() const;
+ +
+ +    void regStats();
+ +
+ +    void setCPU(OzoneCPU *cpu_ptr);
+ +
+ +    void setFrontEnd(FrontEnd *front_end_ptr)
+ +    { frontEnd = front_end_ptr; }
+ +
+ +    void setTC(ThreadContext *tc_ptr)
+ +    { tc = tc_ptr; }
+ +
+ +    void setThreadState(Thread *thread_ptr)
+ +    { thread = thread_ptr; }
+ +
+ +    void setCommBuffer(TimeBuffer<CommStruct> *_comm);
+ +
+ +    Port *getDcachePort() { return LSQ.getDcachePort(); }
+ +
+ +    void tick();
+ +    void squash();
+ +    void generateTCEvent() { tcSquash = true; }
+ +    void squashFromTC();
+ +    void squashFromTrap();
+ +    void checkInterrupts();
+ +    bool trapSquash;
+ +    bool tcSquash;
+ +
+ +    template <class T>
+ +    Fault read(RequestPtr req, T &data, int load_idx);
+ +
+ +    template <class T>
+ +    Fault write(RequestPtr req, T &data, int store_idx);
+ +
+ +    Addr readCommitPC() { return commitPC; }
+ +
+ +    Addr commitPC;
+ +
+ +    Tick lastCommitCycle;
+ +
-     int numDispatchEntries;
++    bool robEmpty() { return numInsts == 0; }
+ +
+ +    bool isFull() { return numInsts >= numROBEntries; }
+ +    bool isBlocked() { return status == Blocked || dispatchStatus == Blocked; }
+ +
+ +    void fetchFault(Fault &fault);
+ +
+ +    int wakeDependents(DynInstPtr &inst, bool memory_deps = false);
+ +
+ +    /** Tells memory dependence unit that a memory instruction needs to be
+ +     * rescheduled. It will re-execute once replayMemInst() is called.
+ +     */
+ +    void rescheduleMemInst(DynInstPtr &inst);
+ +
+ +    /** Re-executes all rescheduled memory instructions. */
+ +    void replayMemInst(DynInstPtr &inst);
+ +
+ +    /** Completes memory instruction. */
+ +    void completeMemInst(DynInstPtr &inst) { }
+ +
+ +    void addDcacheMiss(DynInstPtr &inst)
+ +    {
+ +        waitingMemOps.insert(inst->seqNum);
+ +        numWaitingMemOps++;
+ +        DPRINTF(BE, "Adding a Dcache miss mem op [sn:%lli], total %i\n",
+ +                inst->seqNum, numWaitingMemOps);
+ +    }
+ +
+ +    void removeDcacheMiss(DynInstPtr &inst)
+ +    {
+ +        assert(waitingMemOps.find(inst->seqNum) != waitingMemOps.end());
+ +        waitingMemOps.erase(inst->seqNum);
+ +        numWaitingMemOps--;
+ +        DPRINTF(BE, "Removing a Dcache miss mem op [sn:%lli], total %i\n",
+ +                inst->seqNum, numWaitingMemOps);
+ +    }
+ +
+ +    void addWaitingMemOp(DynInstPtr &inst)
+ +    {
+ +        waitingMemOps.insert(inst->seqNum);
+ +        numWaitingMemOps++;
+ +        DPRINTF(BE, "Adding a waiting mem op [sn:%lli], total %i\n",
+ +                inst->seqNum, numWaitingMemOps);
+ +    }
+ +
+ +    void removeWaitingMemOp(DynInstPtr &inst)
+ +    {
+ +        assert(waitingMemOps.find(inst->seqNum) != waitingMemOps.end());
+ +        waitingMemOps.erase(inst->seqNum);
+ +        numWaitingMemOps--;
+ +        DPRINTF(BE, "Removing a waiting mem op [sn:%lli], total %i\n",
+ +                inst->seqNum, numWaitingMemOps);
+ +    }
+ +
+ +    void instToCommit(DynInstPtr &inst);
++    void readyInstsForCommit();
+ +
+ +    void switchOut();
+ +    void doSwitchOut();
+ +    void takeOverFrom(ThreadContext *old_tc = NULL);
+ +
+ +    bool isSwitchedOut() { return switchedOut; }
+ +
+ +  private:
+ +    void generateTrapEvent(Tick latency = 0);
+ +    void handleFault(Fault &fault, Tick latency = 0);
+ +    void updateStructures();
+ +    void dispatchInsts();
+ +    void dispatchStall();
+ +    void checkDispatchStatus();
+ +    void executeInsts();
+ +    void commitInsts();
+ +    void addToLSQ(DynInstPtr &inst);
+ +    void writebackInsts();
+ +    bool commitInst(int inst_num);
+ +    void squash(const InstSeqNum &sn);
+ +    void squashDueToBranch(DynInstPtr &inst);
+ +    void squashDueToMemViolation(DynInstPtr &inst);
+ +    void squashDueToMemBlocked(DynInstPtr &inst);
+ +    void updateExeInstStats(DynInstPtr &inst);
+ +    void updateComInstStats(DynInstPtr &inst);
+ +
+ +  public:
+ +    OzoneCPU *cpu;
+ +
+ +    FrontEnd *frontEnd;
+ +
+ +    ThreadContext *tc;
+ +
+ +    Thread *thread;
+ +
+ +    enum Status {
+ +        Running,
+ +        Idle,
+ +        DcacheMissStall,
+ +        DcacheMissComplete,
+ +        Blocked,
+ +        TrapPending
+ +    };
+ +
+ +    Status status;
+ +
+ +    Status dispatchStatus;
+ +
+ +    Status commitStatus;
+ +
+ +    Counter funcExeInst;
+ +
+ +  private:
+ +    typedef typename Impl::LdstQueue LdstQueue;
+ +
+ +    LdstQueue LSQ;
+ +  public:
+ +    RenameTable<Impl> commitRenameTable;
+ +
+ +    RenameTable<Impl> renameTable;
+ +  private:
++    int latency;
++
+ +    // General back end width. Used if the more specific isn't given.
+ +    int width;
+ +
+ +    // Dispatch width.
+ +    int dispatchWidth;
-     Fault faultFromFetch;
-     bool fetchHasFault;
- 
+ +    int dispatchSize;
+ +
+ +    int waitingInsts;
+ +
+ +    int issueWidth;
+ +
+ +    // Writeback width
+ +    int wbWidth;
+ +
+ +    // Commit width
+ +    int commitWidth;
+ +
+ +    /** Index into queue of instructions being written back. */
+ +    unsigned wbNumInst;
+ +
+ +    /** Cycle number within the queue of instructions being written
+ +     * back.  Used in case there are too many instructions writing
+ +     * back at the current cycle and writesbacks need to be scheduled
+ +     * for the future. See comments in instToCommit().
+ +     */
+ +    unsigned wbCycle;
+ +
+ +    int numROBEntries;
+ +    int numInsts;
++    bool lsqLimits;
+ +
+ +    std::set<InstSeqNum> waitingMemOps;
+ +    typedef std::set<InstSeqNum>::iterator MemIt;
+ +    int numWaitingMemOps;
+ +    unsigned maxOutstandingMemOps;
+ +
+ +    bool squashPending;
+ +    InstSeqNum squashSeqNum;
+ +    Addr squashNextPC;
+ +
-     int latency;
- 
+ +    bool switchedOut;
+ +    bool switchPending;
+ +
+ +    DynInstPtr memBarrier;
+ +
+ +  private:
+ +    struct pqCompare {
+ +        bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const
+ +        {
+ +            return lhs->seqNum > rhs->seqNum;
+ +        }
+ +    };
+ +
+ +    typedef typename std::priority_queue<DynInstPtr, std::vector<DynInstPtr>, pqCompare> ReadyInstQueue;
+ +    ReadyInstQueue exeList;
+ +
+ +    typedef typename std::list<DynInstPtr>::iterator InstListIt;
+ +
+ +    std::list<DynInstPtr> instList;
+ +    std::list<DynInstPtr> waitingList;
+ +    std::list<DynInstPtr> replayList;
+ +    std::list<DynInstPtr> writeback;
+ +
-     Stats::Vector<> rob_cap_events;
-     Stats::Vector<> rob_cap_inst_count;
-     Stats::Vector<> iq_cap_events;
-     Stats::Vector<> iq_cap_inst_count;
+ +    int squashLatency;
+ +
+ +    bool exactFullStall;
+ +
+ +    // number of cycles stalled for D-cache misses
+ +/*    Stats::Scalar<> dcacheStallCycles;
+ +      Counter lastDcacheStall;
+ +*/
-     Stats::Vector<> exe_inst;
-     Stats::Vector<> exe_swp;
-     Stats::Vector<> exe_nop;
-     Stats::Vector<> exe_refs;
-     Stats::Vector<> exe_loads;
-     Stats::Vector<> exe_branches;
++    Stats::Vector<> robCapEvents;
++    Stats::Vector<> robCapInstCount;
++    Stats::Vector<> iqCapEvents;
++    Stats::Vector<> iqCapInstCount;
+ +    // total number of instructions executed
-     Stats::Vector<> issued_ops;
++    Stats::Vector<> exeInst;
++    Stats::Vector<> exeSwp;
++    Stats::Vector<> exeNop;
++    Stats::Vector<> exeRefs;
++    Stats::Vector<> exeLoads;
++    Stats::Vector<> exeBranches;
+ +
-     Stats::Vector<> lsq_forw_loads;
++    Stats::Vector<> issuedOps;
+ +
+ +    // total number of loads forwaded from LSQ stores
-     Stats::Vector<> inv_addr_loads;
++    Stats::Vector<> lsqForwLoads;
+ +
+ +    // total number of loads ignored due to invalid addresses
-     Stats::Vector<> inv_addr_swpfs;
++    Stats::Vector<> invAddrLoads;
+ +
+ +    // total number of software prefetches ignored due to invalid addresses
-     Stats::Vector<> lsq_blocked_loads;
++    Stats::Vector<> invAddrSwpfs;
+ +    // ready loads blocked due to memory disambiguation
-     Stats::Vector<> n_issued_dist;
-     Stats::VectorDistribution<> issue_delay_dist;
++    Stats::Vector<> lsqBlockedLoads;
+ +
+ +    Stats::Scalar<> lsqInversion;
+ +
-     Stats::VectorDistribution<> queue_res_dist;
++    Stats::Vector<> nIssuedDist;
++/*
++    Stats::VectorDistribution<> issueDelayDist;
+ +
-     Stats::Vector<> writeback_count;
-     Stats::Vector<> producer_inst;
-     Stats::Vector<> consumer_inst;
-     Stats::Vector<> wb_penalized;
++    Stats::VectorDistribution<> queueResDist;
++*/
+ +/*
+ +    Stats::Vector<> stat_fu_busy;
+ +    Stats::Vector2d<> stat_fuBusy;
+ +    Stats::Vector<> dist_unissued;
+ +    Stats::Vector2d<> stat_issued_inst_type;
+ +
+ +    Stats::Formula misspec_cnt;
+ +    Stats::Formula misspec_ipc;
+ +    Stats::Formula issue_rate;
+ +    Stats::Formula issue_stores;
+ +    Stats::Formula issue_op_rate;
+ +    Stats::Formula fu_busy_rate;
+ +    Stats::Formula commit_stores;
+ +    Stats::Formula commit_ipc;
+ +    Stats::Formula commit_ipb;
+ +    Stats::Formula lsq_inv_rate;
+ +*/
-     Stats::Formula wb_rate;
-     Stats::Formula wb_fanout;
-     Stats::Formula wb_penalized_rate;
++    Stats::Vector<> writebackCount;
++    Stats::Vector<> producerInst;
++    Stats::Vector<> consumerInst;
++    Stats::Vector<> wbPenalized;
+ +
-     Stats::Vector<> stat_com_inst;
-     Stats::Vector<> stat_com_swp;
-     Stats::Vector<> stat_com_refs;
-     Stats::Vector<> stat_com_loads;
-     Stats::Vector<> stat_com_membars;
-     Stats::Vector<> stat_com_branches;
++    Stats::Formula wbRate;
++    Stats::Formula wbFanout;
++    Stats::Formula wbPenalizedRate;
+ +
+ +    // total number of instructions committed
-     Stats::Distribution<> n_committed_dist;
++    Stats::Vector<> statComInst;
++    Stats::Vector<> statComSwp;
++    Stats::Vector<> statComRefs;
++    Stats::Vector<> statComLoads;
++    Stats::Vector<> statComMembars;
++    Stats::Vector<> statComBranches;
+ +
-     Stats::Scalar<> commit_eligible_samples;
-     Stats::Vector<> commit_eligible;
++    Stats::Distribution<> nCommittedDist;
+ +
-     Stats::Scalar<> ROB_fcount;
-     Stats::Formula ROB_full_rate;
++    Stats::Scalar<> commitEligibleSamples;
++    Stats::Vector<> commitEligible;
+ +
+ +    Stats::Vector<> squashedInsts;
+ +    Stats::Vector<> ROBSquashedInsts;
+ +
-     Stats::Vector<>  ROB_count;        // cumulative ROB occupancy
-     Stats::Formula ROB_occ_rate;
-     Stats::VectorDistribution<> ROB_occ_dist;
++    Stats::Scalar<> ROBFcount;
++    Stats::Formula ROBFullRate;
+ +
++    Stats::Vector<>  ROBCount;         // cumulative ROB occupancy
++    Stats::Formula ROBOccRate;
++//    Stats::VectorDistribution<> ROBOccDist;
+ +  public:
+ +    void dumpInsts();
+ +
+ +    Checker<DynInstPtr> *checker;
+ +};
+ +
+ +template <class Impl>
+ +template <class T>
+ +Fault
+ +LWBackEnd<Impl>::read(RequestPtr req, T &data, int load_idx)
+ +{
+ +    return LSQ.read(req, data, load_idx);
+ +}
+ +
+ +template <class Impl>
+ +template <class T>
+ +Fault
+ +LWBackEnd<Impl>::write(RequestPtr req, T &data, int store_idx)
+ +{
+ +    return LSQ.write(req, data, store_idx);
+ +}
+ +
+ +#endif // __CPU_OZONE_LW_BACK_END_HH__
diff --cc src/cpu/ozone/lw_back_end_impl.hh

index a4f1d805ebd8c6b9aae2876549071742491f37dd,0000000000000000000000000000000000000000..f87a2bc572cbd468186e7a502ec8b0293593849c

mode 100644,000000..100644
--- 1/src/cpu/ozone/lw_back_end_impl.hh
--- /dev/null
+++ b/src/cpu/ozone/lw_back_end_impl.hh
@@@ -1,1622 -1,0 +1,1719 @@@
-     : d2i(5, 5), i2e(5, 5), e2c(5, 5), numInstsToWB(5, 5),
+ +/*
+ + * Copyright (c) 2006 The Regents of The University of Michigan
+ + * All rights reserved.
+ + *
+ + * Redistribution and use in source and binary forms, with or without
+ + * modification, are permitted provided that the following conditions are
+ + * met: redistributions of source code must retain the above copyright
+ + * notice, this list of conditions and the following disclaimer;
+ + * redistributions in binary form must reproduce the above copyright
+ + * notice, this list of conditions and the following disclaimer in the
+ + * documentation and/or other materials provided with the distribution;
+ + * neither the name of the copyright holders nor the names of its
+ + * contributors may be used to endorse or promote products derived from
+ + * this software without specific prior written permission.
+ + *
+ + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ + *
+ + * Authors: Kevin Lim
+ + */
+ +
+ +#include "config/use_checker.hh"
+ +
+ +#include "cpu/ozone/lw_back_end.hh"
+ +#include "cpu/op_class.hh"
+ +
+ +#if USE_CHECKER
+ +#include "cpu/checker/cpu.hh"
+ +#endif
+ +
+ +template <class Impl>
+ +void
+ +LWBackEnd<Impl>::generateTrapEvent(Tick latency)
+ +{
+ +    DPRINTF(BE, "Generating trap event\n");
+ +
+ +    TrapEvent *trap = new TrapEvent(this);
+ +
+ +    trap->schedule(curTick + cpu->cycles(latency));
+ +
+ +    thread->trapPending = true;
+ +}
+ +
+ +template <class Impl>
+ +int
+ +LWBackEnd<Impl>::wakeDependents(DynInstPtr &inst, bool memory_deps)
+ +{
+ +    assert(!inst->isSquashed());
+ +    std::vector<DynInstPtr> &dependents = memory_deps ? inst->getMemDeps() :
+ +        inst->getDependents();
+ +    int num_outputs = dependents.size();
+ +
+ +    DPRINTF(BE, "Waking instruction [sn:%lli] dependents in IQ\n", inst->seqNum);
+ +
+ +    for (int i = 0; i < num_outputs; i++) {
+ +        DynInstPtr dep_inst = dependents[i];
+ +        if (!memory_deps) {
+ +            dep_inst->markSrcRegReady();
+ +        } else {
+ +            if (!dep_inst->isSquashed())
+ +                dep_inst->markMemInstReady(inst.get());
+ +        }
+ +
+ +        DPRINTF(BE, "Marking source reg ready [sn:%lli] in IQ\n", dep_inst->seqNum);
+ +
+ +        if (dep_inst->readyToIssue() && dep_inst->isInROB() &&
+ +            !dep_inst->isNonSpeculative() && !dep_inst->isStoreConditional() &&
+ +            dep_inst->memDepReady() && !dep_inst->isMemBarrier() &&
+ +            !dep_inst->isWriteBarrier()) {
+ +            DPRINTF(BE, "Adding instruction to exeList [sn:%lli]\n",
+ +                    dep_inst->seqNum);
+ +            exeList.push(dep_inst);
+ +            if (dep_inst->iqItValid) {
+ +                DPRINTF(BE, "Removing instruction from waiting list\n");
+ +                waitingList.erase(dep_inst->iqIt);
+ +                waitingInsts--;
+ +                dep_inst->iqItValid = false;
+ +                assert(waitingInsts >= 0);
+ +            }
+ +            if (dep_inst->isMemRef()) {
+ +                removeWaitingMemOp(dep_inst);
+ +                DPRINTF(BE, "Issued a waiting mem op [sn:%lli]\n",
+ +                        dep_inst->seqNum);
+ +            }
+ +        }
+ +    }
+ +    return num_outputs;
+ +}
+ +
+ +template <class Impl>
+ +void
+ +LWBackEnd<Impl>::rescheduleMemInst(DynInstPtr &inst)
+ +{
+ +    replayList.push_front(inst);
+ +}
+ +
+ +template <class Impl>
+ +LWBackEnd<Impl>::TrapEvent::TrapEvent(LWBackEnd<Impl> *_be)
+ +    : Event(&mainEventQueue, CPU_Tick_Pri), be(_be)
+ +{
+ +    this->setFlags(Event::AutoDelete);
+ +}
+ +
+ +template <class Impl>
+ +void
+ +LWBackEnd<Impl>::TrapEvent::process()
+ +{
+ +    be->trapSquash = true;
+ +}
+ +
+ +template <class Impl>
+ +const char *
+ +LWBackEnd<Impl>::TrapEvent::description()
+ +{
+ +    return "Trap event";
+ +}
+ +
+ +template <class Impl>
+ +void
+ +LWBackEnd<Impl>::replayMemInst(DynInstPtr &inst)
+ +{
+ +    bool found_inst = false;
+ +    while (!replayList.empty()) {
+ +        exeList.push(replayList.front());
+ +        if (replayList.front() == inst) {
+ +            found_inst = true;
+ +        }
+ +        replayList.pop_front();
+ +    }
+ +    assert(found_inst);
+ +}
+ +
+ +template <class Impl>
+ +LWBackEnd<Impl>::LWBackEnd(Params *params)
-       width(params->backEndWidth), exactFullStall(true)
++    : d2i(5, 5), i2e(5, 5), e2c(5, 5), numInstsToWB(params->backEndLatency, 0),
+ +      trapSquash(false), tcSquash(false),
-     numDispatchEntries = 32;
++      latency(params->backEndLatency),
++      width(params->backEndWidth), lsqLimits(params->lsqLimits),
++      exactFullStall(true)
+ +{
+ +    numROBEntries = params->numROBEntries;
+ +    numInsts = 0;
-     rob_cap_events
+ +    maxOutstandingMemOps = params->maxOutstandingMemOps;
+ +    numWaitingMemOps = 0;
+ +    waitingInsts = 0;
+ +    switchedOut = false;
+ +    switchPending = false;
+ +
+ +    LSQ.setBE(this);
+ +
+ +    // Setup IQ and LSQ with their parameters here.
+ +    instsToDispatch = d2i.getWire(-1);
+ +
+ +    instsToExecute = i2e.getWire(-1);
+ +
+ +    dispatchWidth = params->dispatchWidth ? params->dispatchWidth : width;
+ +    issueWidth = params->issueWidth ? params->issueWidth : width;
+ +    wbWidth = params->wbWidth ? params->wbWidth : width;
+ +    commitWidth = params->commitWidth ? params->commitWidth : width;
+ +
+ +    LSQ.init(params, params->LQEntries, params->SQEntries, 0);
+ +
+ +    dispatchStatus = Running;
+ +    commitStatus = Running;
+ +}
+ +
+ +template <class Impl>
+ +std::string
+ +LWBackEnd<Impl>::name() const
+ +{
+ +    return cpu->name() + ".backend";
+ +}
+ +
+ +template <class Impl>
+ +void
+ +LWBackEnd<Impl>::regStats()
+ +{
+ +    using namespace Stats;
-     rob_cap_inst_count
++    LSQ.regStats();
++
++    robCapEvents
+ +        .init(cpu->number_of_threads)
+ +        .name(name() + ".ROB:cap_events")
+ +        .desc("number of cycles where ROB cap was active")
+ +        .flags(total)
+ +        ;
+ +
-     iq_cap_events
++    robCapInstCount
+ +        .init(cpu->number_of_threads)
+ +        .name(name() + ".ROB:cap_inst")
+ +        .desc("number of instructions held up by ROB cap")
+ +        .flags(total)
+ +        ;
+ +
-     iq_cap_inst_count
++    iqCapEvents
+ +        .init(cpu->number_of_threads)
+ +        .name(name() +".IQ:cap_events" )
+ +        .desc("number of cycles where IQ cap was active")
+ +        .flags(total)
+ +        ;
+ +
- 
-     exe_inst
++    iqCapInstCount
+ +        .init(cpu->number_of_threads)
+ +        .name(name() + ".IQ:cap_inst")
+ +        .desc("number of instructions held up by IQ cap")
+ +        .flags(total)
+ +        ;
+ +
-     exe_swp
++    exeInst
+ +        .init(cpu->number_of_threads)
+ +        .name(name() + ".ISSUE:count")
+ +        .desc("number of insts issued")
+ +        .flags(total)
+ +        ;
+ +
-     exe_nop
++    exeSwp
+ +        .init(cpu->number_of_threads)
+ +        .name(name() + ".ISSUE:swp")
+ +        .desc("number of swp insts issued")
+ +        .flags(total)
+ +        ;
+ +
-     exe_refs
++    exeNop
+ +        .init(cpu->number_of_threads)
+ +        .name(name() + ".ISSUE:nop")
+ +        .desc("number of nop insts issued")
+ +        .flags(total)
+ +        ;
+ +
-     exe_loads
++    exeRefs
+ +        .init(cpu->number_of_threads)
+ +        .name(name() + ".ISSUE:refs")
+ +        .desc("number of memory reference insts issued")
+ +        .flags(total)
+ +        ;
+ +
-     exe_branches
++    exeLoads
+ +        .init(cpu->number_of_threads)
+ +        .name(name() + ".ISSUE:loads")
+ +        .desc("number of load insts issued")
+ +        .flags(total)
+ +        ;
+ +
-     issued_ops
++    exeBranches
+ +        .init(cpu->number_of_threads)
+ +        .name(name() + ".ISSUE:branches")
+ +        .desc("Number of branches issued")
+ +        .flags(total)
+ +        ;
+ +
-     lsq_forw_loads
++    issuedOps
+ +        .init(cpu->number_of_threads)
+ +        .name(name() + ".ISSUE:op_count")
+ +        .desc("number of insts issued")
+ +        .flags(total)
+ +        ;
+ +
+ +/*
+ +    for (int i=0; i<Num_OpClasses; ++i) {
+ +        stringstream subname;
+ +        subname << opClassStrings[i] << "_delay";
+ +        issue_delay_dist.subname(i, subname.str());
+ +    }
+ +*/
+ +    //
+ +    //  Other stats
+ +    //
-     inv_addr_loads
++    lsqForwLoads
+ +        .init(cpu->number_of_threads)
+ +        .name(name() + ".LSQ:forw_loads")
+ +        .desc("number of loads forwarded via LSQ")
+ +        .flags(total)
+ +        ;
+ +
-     inv_addr_swpfs
++    invAddrLoads
+ +        .init(cpu->number_of_threads)
+ +        .name(name() + ".ISSUE:addr_loads")
+ +        .desc("number of invalid-address loads")
+ +        .flags(total)
+ +        ;
+ +
-     lsq_blocked_loads
++    invAddrSwpfs
+ +        .init(cpu->number_of_threads)
+ +        .name(name() + ".ISSUE:addr_swpfs")
+ +        .desc("number of invalid-address SW prefetches")
+ +        .flags(total)
+ +        ;
+ +
-     n_issued_dist
++    lsqBlockedLoads
+ +        .init(cpu->number_of_threads)
+ +        .name(name() + ".LSQ:blocked_loads")
+ +        .desc("number of ready loads not issued due to memory disambiguation")
+ +        .flags(total)
+ +        ;
+ +
+ +    lsqInversion
+ +        .name(name() + ".ISSUE:lsq_invert")
+ +        .desc("Number of times LSQ instruction issued early")
+ +        ;
+ +
-     issue_delay_dist
++    nIssuedDist
+ +        .init(issueWidth + 1)
+ +        .name(name() + ".ISSUE:issued_per_cycle")
+ +        .desc("Number of insts issued each cycle")
+ +        .flags(total | pdf | dist)
+ +        ;
-     queue_res_dist
++/*
++    issueDelayDist
+ +        .init(Num_OpClasses,0,99,2)
+ +        .name(name() + ".ISSUE:")
+ +        .desc("cycles from operands ready to issue")
+ +        .flags(pdf | cdf)
+ +        ;
+ +
-         queue_res_dist.subname(i, opClassStrings[i]);
++    queueResDist
+ +        .init(Num_OpClasses, 0, 99, 2)
+ +        .name(name() + ".IQ:residence:")
+ +        .desc("cycles from dispatch to issue")
+ +        .flags(total | pdf | cdf )
+ +        ;
+ +    for (int i = 0; i < Num_OpClasses; ++i) {
- 
-     writeback_count
++        queueResDist.subname(i, opClassStrings[i]);
+ +    }
-     producer_inst
++*/
++    writebackCount
+ +        .init(cpu->number_of_threads)
+ +        .name(name() + ".WB:count")
+ +        .desc("cumulative count of insts written-back")
+ +        .flags(total)
+ +        ;
+ +
-     consumer_inst
++    producerInst
+ +        .init(cpu->number_of_threads)
+ +        .name(name() + ".WB:producers")
+ +        .desc("num instructions producing a value")
+ +        .flags(total)
+ +        ;
+ +
-     wb_penalized
++    consumerInst
+ +        .init(cpu->number_of_threads)
+ +        .name(name() + ".WB:consumers")
+ +        .desc("num instructions consuming a value")
+ +        .flags(total)
+ +        ;
+ +
-     wb_penalized_rate
++    wbPenalized
+ +        .init(cpu->number_of_threads)
+ +        .name(name() + ".WB:penalized")
+ +        .desc("number of instrctions required to write to 'other' IQ")
+ +        .flags(total)
+ +        ;
+ +
+ +
-     wb_penalized_rate = wb_penalized / writeback_count;
++    wbPenalizedRate
+ +        .name(name() + ".WB:penalized_rate")
+ +        .desc ("fraction of instructions written-back that wrote to 'other' IQ")
+ +        .flags(total)
+ +        ;
+ +
-     wb_fanout
++    wbPenalizedRate = wbPenalized / writebackCount;
+ +
-     wb_fanout = producer_inst / consumer_inst;
++    wbFanout
+ +        .name(name() + ".WB:fanout")
+ +        .desc("average fanout of values written-back")
+ +        .flags(total)
+ +        ;
+ +
-     wb_rate
++    wbFanout = producerInst / consumerInst;
+ +
-     wb_rate = writeback_count / cpu->numCycles;
++    wbRate
+ +        .name(name() + ".WB:rate")
+ +        .desc("insts written-back per cycle")
+ +        .flags(total)
+ +        ;
-     stat_com_inst
++    wbRate = writebackCount / cpu->numCycles;
+ +
-     stat_com_swp
++    statComInst
+ +        .init(cpu->number_of_threads)
+ +        .name(name() + ".COM:count")
+ +        .desc("Number of instructions committed")
+ +        .flags(total)
+ +        ;
+ +
-     stat_com_refs
++    statComSwp
+ +        .init(cpu->number_of_threads)
+ +        .name(name() + ".COM:swp_count")
+ +        .desc("Number of s/w prefetches committed")
+ +        .flags(total)
+ +        ;
+ +
-     stat_com_loads
++    statComRefs
+ +        .init(cpu->number_of_threads)
+ +        .name(name() +  ".COM:refs")
+ +        .desc("Number of memory references committed")
+ +        .flags(total)
+ +        ;
+ +
-     stat_com_membars
++    statComLoads
+ +        .init(cpu->number_of_threads)
+ +        .name(name() +  ".COM:loads")
+ +        .desc("Number of loads committed")
+ +        .flags(total)
+ +        ;
+ +
-     stat_com_branches
++    statComMembars
+ +        .init(cpu->number_of_threads)
+ +        .name(name() +  ".COM:membars")
+ +        .desc("Number of memory barriers committed")
+ +        .flags(total)
+ +        ;
+ +
-     n_committed_dist
++    statComBranches
+ +        .init(cpu->number_of_threads)
+ +        .name(name() + ".COM:branches")
+ +        .desc("Number of branches committed")
+ +        .flags(total)
+ +        ;
-     commit_eligible
++    nCommittedDist
+ +        .init(0,commitWidth,1)
+ +        .name(name() + ".COM:committed_per_cycle")
+ +        .desc("Number of insts commited each cycle")
+ +        .flags(pdf)
+ +        ;
+ +
+ +    //
+ +    //  Commit-Eligible instructions...
+ +    //
+ +    //  -> The number of instructions eligible to commit in those
+ +    //  cycles where we reached our commit BW limit (less the number
+ +    //  actually committed)
+ +    //
+ +    //  -> The average value is computed over ALL CYCLES... not just
+ +    //  the BW limited cycles
+ +    //
+ +    //  -> The standard deviation is computed only over cycles where
+ +    //  we reached the BW limit
+ +    //
-     commit_eligible_samples
++    commitEligible
+ +        .init(cpu->number_of_threads)
+ +        .name(name() + ".COM:bw_limited")
+ +        .desc("number of insts not committed due to BW limits")
+ +        .flags(total)
+ +        ;
+ +
-     ROB_fcount
++    commitEligibleSamples
+ +        .name(name() + ".COM:bw_lim_events")
+ +        .desc("number cycles where commit BW limit reached")
+ +        ;
+ +
+ +    squashedInsts
+ +        .init(cpu->number_of_threads)
+ +        .name(name() + ".COM:squashed_insts")
+ +        .desc("Number of instructions removed from inst list")
+ +        ;
+ +
+ +    ROBSquashedInsts
+ +        .init(cpu->number_of_threads)
+ +        .name(name() + ".COM:rob_squashed_insts")
+ +        .desc("Number of instructions removed from inst list when they reached the head of the ROB")
+ +        ;
+ +
-     ROB_count
++    ROBFcount
+ +        .name(name() + ".ROB:full_count")
+ +        .desc("number of cycles where ROB was full")
+ +        ;
+ +
-     ROB_full_rate
++    ROBCount
+ +        .init(cpu->number_of_threads)
+ +        .name(name() + ".ROB:occupancy")
+ +        .desc(name() + ".ROB occupancy (cumulative)")
+ +        .flags(total)
+ +        ;
+ +
-     ROB_full_rate = ROB_fcount / cpu->numCycles;
++    ROBFullRate
+ +        .name(name() + ".ROB:full_rate")
+ +        .desc("ROB full per cycle")
+ +        ;
-     ROB_occ_rate
++    ROBFullRate = ROBFcount / cpu->numCycles;
+ +
-     ROB_occ_rate = ROB_count / cpu->numCycles;
- 
-     ROB_occ_dist
++    ROBOccRate
+ +        .name(name() + ".ROB:occ_rate")
+ +        .desc("ROB occupancy rate")
+ +        .flags(total)
+ +        ;
-     ROB_count[0]+= numInsts;
++    ROBOccRate = ROBCount / cpu->numCycles;
++/*
++    ROBOccDist
+ +        .init(cpu->number_of_threads,0,numROBEntries,2)
+ +        .name(name() + ".ROB:occ_dist")
+ +        .desc("ROB Occupancy per cycle")
+ +        .flags(total | cdf)
+ +        ;
++*/
+ +}
+ +
+ +template <class Impl>
+ +void
+ +LWBackEnd<Impl>::setCPU(OzoneCPU *cpu_ptr)
+ +{
+ +    cpu = cpu_ptr;
+ +    LSQ.setCPU(cpu_ptr);
+ +    checker = cpu->checker;
+ +}
+ +
+ +template <class Impl>
+ +void
+ +LWBackEnd<Impl>::setCommBuffer(TimeBuffer<CommStruct> *_comm)
+ +{
+ +    comm = _comm;
+ +    toIEW = comm->getWire(0);
+ +    fromCommit = comm->getWire(-1);
+ +}
+ +
+ +#if FULL_SYSTEM
+ +template <class Impl>
+ +void
+ +LWBackEnd<Impl>::checkInterrupts()
+ +{
+ +    if (cpu->checkInterrupts &&
+ +        cpu->check_interrupts() &&
+ +        !cpu->inPalMode(thread->readPC()) &&
+ +        !trapSquash &&
+ +        !tcSquash) {
+ +        frontEnd->interruptPending = true;
+ +        if (robEmpty() && !LSQ.hasStoresToWB()) {
+ +            // Will need to squash all instructions currently in flight and have
+ +            // the interrupt handler restart at the last non-committed inst.
+ +            // Most of that can be handled through the trap() function.  The
+ +            // processInterrupts() function really just checks for interrupts
+ +            // and then calls trap() if there is an interrupt present.
+ +
+ +            // Not sure which thread should be the one to interrupt.  For now
+ +            // always do thread 0.
+ +            assert(!thread->inSyscall);
+ +            thread->inSyscall = true;
+ +
+ +            // CPU will handle implementation of the interrupt.
+ +            cpu->processInterrupts();
+ +
+ +            // Now squash or record that I need to squash this cycle.
+ +            commitStatus = TrapPending;
+ +
+ +            // Exit state update mode to avoid accidental updating.
+ +            thread->inSyscall = false;
+ +
+ +            // Generate trap squash event.
+ +            generateTrapEvent();
+ +
+ +            DPRINTF(BE, "Interrupt detected.\n");
+ +        } else {
+ +            DPRINTF(BE, "Interrupt must wait for ROB to drain.\n");
+ +        }
+ +    }
+ +}
+ +#endif
+ +
+ +template <class Impl>
+ +void
+ +LWBackEnd<Impl>::handleFault(Fault &fault, Tick latency)
+ +{
+ +    DPRINTF(BE, "Handling fault!\n");
+ +
+ +    assert(!thread->inSyscall);
+ +
+ +    thread->inSyscall = true;
+ +
+ +    // Consider holding onto the trap and waiting until the trap event
+ +    // happens for this to be executed.
+ +    fault->invoke(thread->getTC());
+ +
+ +    // Exit state update mode to avoid accidental updating.
+ +    thread->inSyscall = false;
+ +
+ +    commitStatus = TrapPending;
+ +
+ +    // Generate trap squash event.
+ +    generateTrapEvent(latency);
+ +}
+ +
+ +template <class Impl>
+ +void
+ +LWBackEnd<Impl>::tick()
+ +{
+ +    DPRINTF(BE, "Ticking back end\n");
+ +
++    // Read in any done instruction information and update the IQ or LSQ.
++    updateStructures();
++
+ +    if (switchPending && robEmpty() && !LSQ.hasStoresToWB()) {
+ +        cpu->signalSwitched();
+ +        return;
+ +    }
+ +
-     wbCycle = 0;
++    readyInstsForCommit();
+ +
-     // Read in any done instruction information and update the IQ or LSQ.
-     updateStructures();
++    numInstsToWB.advance();
+ +
-                 LSQ.executeStore(inst);
-                 if (inst->req && !(inst->req->getFlags() & LOCKED)) {
++    ROBCount[0]+= numInsts;
++
++    wbCycle = 0;
+ +
+ +#if FULL_SYSTEM
+ +    checkInterrupts();
+ +#endif
+ +
+ +    if (trapSquash) {
+ +        assert(!tcSquash);
+ +        squashFromTrap();
+ +    } else if (tcSquash) {
+ +        squashFromTC();
+ +    }
+ +
+ +    if (dispatchStatus != Blocked) {
+ +        dispatchInsts();
+ +    } else {
+ +        checkDispatchStatus();
+ +    }
+ +
+ +    if (commitStatus != TrapPending) {
+ +        executeInsts();
+ +
+ +        commitInsts();
+ +    }
+ +
+ +    LSQ.writebackStores();
+ +
+ +    DPRINTF(BE, "Waiting insts: %i, mem ops: %i, ROB entries in use: %i, "
+ +            "LSQ loads: %i, LSQ stores: %i\n",
+ +            waitingInsts, numWaitingMemOps, numInsts,
+ +            LSQ.numLoads(), LSQ.numStores());
+ +
+ +#ifdef DEBUG
+ +    assert(numInsts == instList.size());
+ +    assert(waitingInsts == waitingList.size());
+ +    assert(numWaitingMemOps == waitingMemOps.size());
+ +    assert(!switchedOut);
+ +#endif
+ +}
+ +
+ +template <class Impl>
+ +void
+ +LWBackEnd<Impl>::updateStructures()
+ +{
+ +    if (fromCommit->doneSeqNum) {
+ +        LSQ.commitLoads(fromCommit->doneSeqNum);
+ +        LSQ.commitStores(fromCommit->doneSeqNum);
+ +    }
+ +
+ +    if (fromCommit->nonSpecSeqNum) {
+ +        if (fromCommit->uncached) {
+ +//            LSQ.executeLoad(fromCommit->lqIdx);
+ +        } else {
+ +//            IQ.scheduleNonSpec(
+ +//                fromCommit->nonSpecSeqNum);
+ +        }
+ +    }
+ +}
+ +
+ +template <class Impl>
+ +void
+ +LWBackEnd<Impl>::addToLSQ(DynInstPtr &inst)
+ +{
+ +    // Do anything LSQ specific here?
+ +    LSQ.insert(inst);
+ +}
+ +
+ +template <class Impl>
+ +void
+ +LWBackEnd<Impl>::dispatchInsts()
+ +{
+ +    DPRINTF(BE, "Trying to dispatch instructions.\n");
+ +
+ +    while (numInsts < numROBEntries &&
+ +           numWaitingMemOps < maxOutstandingMemOps) {
+ +        // Get instruction from front of time buffer
++        if (lsqLimits && LSQ.isFull()) {
++            break;
++        }
++
+ +        DynInstPtr inst = frontEnd->getInst();
+ +        if (!inst) {
+ +            break;
+ +        } else if (inst->isSquashed()) {
+ +            continue;
+ +        }
+ +
+ +        ++numInsts;
+ +        instList.push_front(inst);
+ +
+ +        inst->setInROB();
+ +
+ +        DPRINTF(BE, "Dispatching instruction [sn:%lli] PC:%#x\n",
+ +                inst->seqNum, inst->readPC());
+ +
+ +        for (int i = 0; i < inst->numDestRegs(); ++i)
+ +            renameTable[inst->destRegIdx(i)] = inst;
+ +
+ +        if (inst->isMemBarrier() || inst->isWriteBarrier()) {
+ +            if (memBarrier) {
+ +                DPRINTF(BE, "Instruction [sn:%lli] is waiting on "
+ +                        "barrier [sn:%lli].\n",
+ +                        inst->seqNum, memBarrier->seqNum);
+ +                memBarrier->addMemDependent(inst);
+ +                inst->addSrcMemInst(memBarrier);
+ +            }
+ +            memBarrier = inst;
+ +            inst->setCanCommit();
+ +        } else if (inst->readyToIssue() &&
+ +                   !inst->isNonSpeculative() &&
+ +                   !inst->isStoreConditional()) {
+ +            if (inst->isMemRef()) {
+ +
+ +                LSQ.insert(inst);
+ +                if (memBarrier) {
+ +                    DPRINTF(BE, "Instruction [sn:%lli] is waiting on "
+ +                            "barrier [sn:%lli].\n",
+ +                            inst->seqNum, memBarrier->seqNum);
+ +                    memBarrier->addMemDependent(inst);
+ +                    inst->addSrcMemInst(memBarrier);
+ +                    addWaitingMemOp(inst);
+ +
+ +                    waitingList.push_front(inst);
+ +                    inst->iqIt = waitingList.begin();
+ +                    inst->iqItValid = true;
+ +                    waitingInsts++;
+ +                } else {
+ +                    DPRINTF(BE, "Instruction [sn:%lli] ready, addding to "
+ +                            "exeList.\n",
+ +                            inst->seqNum);
+ +                    exeList.push(inst);
+ +                }
+ +            } else if (inst->isNop()) {
+ +                DPRINTF(BE, "Nop encountered [sn:%lli], skipping exeList.\n",
+ +                        inst->seqNum);
+ +                inst->setIssued();
+ +                inst->setExecuted();
+ +                inst->setCanCommit();
++                numInstsToWB[0]++;
+ +            } else {
+ +                DPRINTF(BE, "Instruction [sn:%lli] ready, addding to "
+ +                        "exeList.\n",
+ +                        inst->seqNum);
+ +                exeList.push(inst);
+ +            }
+ +        } else {
+ +            if (inst->isNonSpeculative() || inst->isStoreConditional()) {
+ +                inst->setCanCommit();
+ +                DPRINTF(BE, "Adding non speculative instruction\n");
+ +            }
+ +
+ +            if (inst->isMemRef()) {
+ +                addWaitingMemOp(inst);
+ +                LSQ.insert(inst);
+ +                if (memBarrier) {
+ +                    memBarrier->addMemDependent(inst);
+ +                    inst->addSrcMemInst(memBarrier);
+ +
+ +                    DPRINTF(BE, "Instruction [sn:%lli] is waiting on "
+ +                            "barrier [sn:%lli].\n",
+ +                            inst->seqNum, memBarrier->seqNum);
+ +                }
+ +            }
+ +
+ +            DPRINTF(BE, "Instruction [sn:%lli] not ready, addding to "
+ +                    "waitingList.\n",
+ +                    inst->seqNum);
+ +            waitingList.push_front(inst);
+ +            inst->iqIt = waitingList.begin();
+ +            inst->iqItValid = true;
+ +            waitingInsts++;
+ +        }
+ +    }
+ +
+ +    // Check if IQ or LSQ is full.  If so we'll need to break and stop
+ +    // removing instructions.  Also update the number of insts to remove
+ +    // from the queue.  Check here if we don't care about exact stall
+ +    // conditions.
+ +/*
+ +    bool stall = false;
+ +    if (IQ.isFull()) {
+ +        DPRINTF(BE, "IQ is full!\n");
+ +        stall = true;
+ +    } else if (LSQ.isFull()) {
+ +        DPRINTF(BE, "LSQ is full!\n");
+ +        stall = true;
+ +    } else if (isFull()) {
+ +        DPRINTF(BE, "ROB is full!\n");
+ +        stall = true;
+ +        ROB_fcount++;
+ +    }
+ +    if (stall) {
+ +        d2i.advance();
+ +        dispatchStall();
+ +        return;
+ +    }
+ +*/
+ +}
+ +
+ +template <class Impl>
+ +void
+ +LWBackEnd<Impl>::dispatchStall()
+ +{
+ +    dispatchStatus = Blocked;
+ +    if (!cpu->decoupledFrontEnd) {
+ +        // Tell front end to stall here through a timebuffer, or just tell
+ +        // it directly.
+ +    }
+ +}
+ +
+ +template <class Impl>
+ +void
+ +LWBackEnd<Impl>::checkDispatchStatus()
+ +{
+ +    DPRINTF(BE, "Checking dispatch status\n");
+ +    assert(dispatchStatus == Blocked);
+ +    if (!LSQ.isFull() && !isFull()) {
+ +        DPRINTF(BE, "Dispatch no longer blocked\n");
+ +        dispatchStatus = Running;
+ +        dispatchInsts();
+ +    }
+ +}
+ +
+ +template <class Impl>
+ +void
+ +LWBackEnd<Impl>::executeInsts()
+ +{
+ +    DPRINTF(BE, "Trying to execute instructions\n");
+ +
+ +    int num_executed = 0;
+ +    while (!exeList.empty() && num_executed < issueWidth) {
+ +        DynInstPtr inst = exeList.top();
+ +
+ +        DPRINTF(BE, "Executing inst [sn:%lli] PC: %#x\n",
+ +                inst->seqNum, inst->readPC());
+ +
+ +        // Check if the instruction is squashed; if so then skip it
+ +        // and don't count it towards the FU usage.
+ +        if (inst->isSquashed()) {
+ +            DPRINTF(BE, "Execute: Instruction was squashed.\n");
+ +
+ +            // Not sure how to handle this plus the method of sending # of
+ +            // instructions to use.  Probably will just have to count it
+ +            // towards the bandwidth usage, but not the FU usage.
+ +            ++num_executed;
+ +
+ +            // Consider this instruction executed so that commit can go
+ +            // ahead and retire the instruction.
+ +            inst->setExecuted();
+ +
+ +            // Not sure if I should set this here or just let commit try to
+ +            // commit any squashed instructions.  I like the latter a bit more.
+ +            inst->setCanCommit();
+ +
+ +//            ++iewExecSquashedInsts;
+ +            exeList.pop();
+ +
+ +            continue;
+ +        }
+ +
+ +        Fault fault = NoFault;
+ +
+ +        // Execute instruction.
+ +        // Note that if the instruction faults, it will be handled
+ +        // at the commit stage.
+ +        if (inst->isMemRef() &&
+ +            (!inst->isDataPrefetch() && !inst->isInstPrefetch())) {
+ +            DPRINTF(BE, "Execute: Initiating access for memory "
+ +                    "reference.\n");
+ +
+ +            if (inst->isLoad()) {
+ +                LSQ.executeLoad(inst);
+ +            } else if (inst->isStore()) {
-     issued_ops[0]+= num_executed;
-     n_issued_dist[num_executed]++;
++                Fault fault = LSQ.executeStore(inst);
++
++                if (!inst->isStoreConditional() && fault == NoFault) {
++                    inst->setExecuted();
++
++                    instToCommit(inst);
++                } else if (fault != NoFault) {
++                    // If the instruction faulted, then we need to send it along to commit
++                    // without the instruction completing.
++                    // Send this instruction to commit, also make sure iew stage
++                    // realizes there is activity.
+ +                    inst->setExecuted();
+ +
+ +                    instToCommit(inst);
+ +                }
+ +            } else {
+ +                panic("Unknown mem type!");
+ +            }
+ +        } else {
+ +            inst->execute();
+ +
+ +            inst->setExecuted();
+ +
+ +            instToCommit(inst);
+ +        }
+ +
+ +        updateExeInstStats(inst);
+ +
+ +        ++funcExeInst;
+ +        ++num_executed;
+ +
+ +        exeList.pop();
+ +
+ +        if (inst->mispredicted()) {
+ +            squashDueToBranch(inst);
+ +            break;
+ +        } else if (LSQ.violation()) {
+ +            // Get the DynInst that caused the violation.  Note that this
+ +            // clears the violation signal.
+ +            DynInstPtr violator;
+ +            violator = LSQ.getMemDepViolator();
+ +
+ +            DPRINTF(BE, "LDSTQ detected a violation.  Violator PC: "
+ +                    "%#x, inst PC: %#x.  Addr is: %#x.\n",
+ +                    violator->readPC(), inst->readPC(), inst->physEffAddr);
+ +
+ +            // Squash.
+ +            squashDueToMemViolation(inst);
+ +        }
+ +    }
+ +
- 
++    issuedOps[0]+= num_executed;
++    nIssuedDist[num_executed]++;
+ +}
+ +
+ +template<class Impl>
+ +void
+ +LWBackEnd<Impl>::instToCommit(DynInstPtr &inst)
+ +{
-         DPRINTF(BE, "Writing back instruction [sn:%lli] PC %#x.\n",
-                 inst->seqNum, inst->readPC());
- 
-         inst->setCanCommit();
- 
+ +    DPRINTF(BE, "Sending instructions to commit [sn:%lli] PC %#x.\n",
+ +            inst->seqNum, inst->readPC());
+ +
+ +    if (!inst->isSquashed()) {
-                 producer_inst[0]++;
-                 consumer_inst[0]+= dependents;
+ +        if (inst->isExecuted()) {
+ +            inst->setResultReady();
+ +            int dependents = wakeDependents(inst);
+ +            if (dependents) {
-     writeback_count[0]++;
++                producerInst[0]++;
++                consumerInst[0]+= dependents;
+ +            }
+ +        }
+ +    }
+ +
-             inst->isStoreConditional() ||
++    writeback.push_back(inst);
++
++    numInstsToWB[0]++;
++
++    writebackCount[0]++;
+ +}
++
++template <class Impl>
++void
++LWBackEnd<Impl>::readyInstsForCommit()
++{
++    for (int i = numInstsToWB[-latency];
++         !writeback.empty() && i;
++         --i)
++    {
++        DynInstPtr inst = writeback.front();
++        writeback.pop_front();
++        if (!inst->isSquashed()) {
++            DPRINTF(BE, "Writing back instruction [sn:%lli] PC %#x.\n",
++                    inst->seqNum, inst->readPC());
++
++            inst->setCanCommit();
++        }
++    }
++}
++
+ +#if 0
+ +template <class Impl>
+ +void
+ +LWBackEnd<Impl>::writebackInsts()
+ +{
+ +    int wb_width = wbWidth;
+ +    // Using this method I'm not quite sure how to prevent an
+ +    // instruction from waking its own dependents multiple times,
+ +    // without the guarantee that commit always has enough bandwidth
+ +    // to accept all instructions being written back.  This guarantee
+ +    // might not be too unrealistic.
+ +    InstListIt wb_inst_it = writeback.begin();
+ +    InstListIt wb_end_it = writeback.end();
+ +    int inst_num = 0;
+ +    int consumer_insts = 0;
+ +
+ +    for (; inst_num < wb_width &&
+ +             wb_inst_it != wb_end_it; inst_num++) {
+ +        DynInstPtr inst = (*wb_inst_it);
+ +
+ +        // Some instructions will be sent to commit without having
+ +        // executed because they need commit to handle them.
+ +        // E.g. Uncached loads have not actually executed when they
+ +        // are first sent to commit.  Instead commit must tell the LSQ
+ +        // when it's ready to execute the uncached load.
+ +        if (!inst->isSquashed()) {
+ +            DPRINTF(BE, "Writing back instruction [sn:%lli] PC %#x.\n",
+ +                    inst->seqNum, inst->readPC());
+ +
+ +            inst->setCanCommit();
+ +            inst->setResultReady();
+ +
+ +            if (inst->isExecuted()) {
+ +                int dependents = wakeDependents(inst);
+ +                if (dependents) {
+ +                    producer_inst[0]++;
+ +                    consumer_insts+= dependents;
+ +                }
+ +            }
+ +        }
+ +
+ +        writeback.erase(wb_inst_it++);
+ +    }
+ +    LSQ.writebackStores();
+ +    consumer_inst[0]+= consumer_insts;
+ +    writeback_count[0]+= inst_num;
+ +}
+ +#endif
+ +template <class Impl>
+ +bool
+ +LWBackEnd<Impl>::commitInst(int inst_num)
+ +{
+ +    // Read instruction from the head of the ROB
+ +    DynInstPtr inst = instList.back();
+ +
+ +    // Make sure instruction is valid
+ +    assert(inst);
+ +
+ +    if (!inst->readyToCommit())
+ +        return false;
+ +
+ +    DPRINTF(BE, "Trying to commit instruction [sn:%lli] PC:%#x\n",
+ +            inst->seqNum, inst->readPC());
+ +
+ +    thread->setPC(inst->readPC());
+ +    thread->setNextPC(inst->readNextPC());
+ +    inst->setAtCommit();
+ +
+ +    // If the instruction is not executed yet, then it is a non-speculative
+ +    // or store inst.  Signal backwards that it should be executed.
+ +    if (!inst->isExecuted()) {
+ +        if (inst->isNonSpeculative() ||
-             ROBSquashedInsts[instList.back()->threadNumber]++;
++            (inst->isStoreConditional() && inst->getFault() == NoFault) ||
+ +            inst->isMemBarrier() ||
+ +            inst->isWriteBarrier()) {
+ +#if !FULL_SYSTEM
+ +            // Hack to make sure syscalls aren't executed until all stores
+ +            // write back their data.  This direct communication shouldn't
+ +            // be used for anything other than this.
+ +            if (inst_num > 0 || LSQ.hasStoresToWB())
+ +#else
+ +            if ((inst->isMemBarrier() || inst->isWriteBarrier() ||
+ +                    inst->isQuiesce()) &&
+ +                LSQ.hasStoresToWB())
+ +#endif
+ +            {
+ +                DPRINTF(BE, "Waiting for all stores to writeback.\n");
+ +                return false;
+ +            }
+ +
+ +            DPRINTF(BE, "Encountered a store or non-speculative "
+ +                    "instruction at the head of the ROB, PC %#x.\n",
+ +                    inst->readPC());
+ +
+ +            if (inst->isMemBarrier() || inst->isWriteBarrier()) {
+ +                DPRINTF(BE, "Waking dependents on barrier [sn:%lli]\n",
+ +                        inst->seqNum);
+ +                assert(memBarrier);
+ +                wakeDependents(inst, true);
+ +                if (memBarrier == inst)
+ +                    memBarrier = NULL;
+ +                inst->clearMemDependents();
+ +            }
+ +
+ +            // Send back the non-speculative instruction's sequence number.
+ +            if (inst->iqItValid) {
+ +                DPRINTF(BE, "Removing instruction from waiting list\n");
+ +                waitingList.erase(inst->iqIt);
+ +                inst->iqItValid = false;
+ +                waitingInsts--;
+ +                assert(waitingInsts >= 0);
+ +                if (inst->isStore())
+ +                    removeWaitingMemOp(inst);
+ +            }
+ +
+ +            exeList.push(inst);
+ +
+ +            // Change the instruction so it won't try to commit again until
+ +            // it is executed.
+ +            inst->clearCanCommit();
+ +
+ +//            ++commitNonSpecStalls;
+ +
+ +            return false;
+ +        } else if (inst->isLoad()) {
+ +            DPRINTF(BE, "[sn:%lli]: Uncached load, PC %#x.\n",
+ +                    inst->seqNum, inst->readPC());
+ +
+ +            // Send back the non-speculative instruction's sequence
+ +            // number.  Maybe just tell the lsq to re-execute the load.
+ +
+ +            // Send back the non-speculative instruction's sequence number.
+ +            if (inst->iqItValid) {
+ +                DPRINTF(BE, "Removing instruction from waiting list\n");
+ +                waitingList.erase(inst->iqIt);
+ +                inst->iqItValid = false;
+ +                waitingInsts--;
+ +                assert(waitingInsts >= 0);
+ +                removeWaitingMemOp(inst);
+ +            }
+ +            replayMemInst(inst);
+ +
+ +            inst->clearCanCommit();
+ +
+ +            return false;
+ +        } else {
+ +            panic("Trying to commit un-executed instruction "
+ +                  "of unknown type!\n");
+ +        }
+ +    }
+ +
+ +    // Not handled for now.
+ +    assert(!inst->isThreadSync());
+ +    assert(inst->memDepReady());
+ +    // Stores will mark themselves as totally completed as they need
+ +    // to wait to writeback to memory.  @todo: Hack...attempt to fix
+ +    // having the checker be forced to wait until a store completes in
+ +    // order to check all of the instructions.  If the store at the
+ +    // head of the check list misses, but a later store hits, then
+ +    // loads in the checker may see the younger store values instead
+ +    // of the store they should see.  Either the checker needs its own
+ +    // memory (annoying to update), its own store buffer (how to tell
+ +    // which value is correct?), or something else...
+ +    if (!inst->isStore()) {
+ +        inst->setCompleted();
+ +    }
+ +    // Check if the instruction caused a fault.  If so, trap.
+ +    Fault inst_fault = inst->getFault();
+ +
+ +    // Use checker prior to updating anything due to traps or PC
+ +    // based events.
+ +#if USE_CHECKER
+ +    if (checker) {
+ +        checker->verify(inst);
+ +    }
+ +#endif
+ +
+ +    if (inst_fault != NoFault) {
+ +        DPRINTF(BE, "Inst [sn:%lli] PC %#x has a fault\n",
+ +                inst->seqNum, inst->readPC());
+ +
+ +        // Instruction is completed as it has a fault.
+ +        inst->setCompleted();
+ +
+ +        if (LSQ.hasStoresToWB()) {
+ +            DPRINTF(BE, "Stores still in flight, will wait until drained.\n");
+ +            return false;
+ +        } else if (inst_num != 0) {
+ +            DPRINTF(BE, "Will wait until instruction is head of commit group.\n");
+ +            return false;
+ +        }
+ +#if USE_CHECKER
+ +        else if (checker && inst->isStore()) {
+ +            checker->verify(inst);
+ +        }
+ +#endif
+ +
+ +        thread->setInst(
+ +            static_cast<TheISA::MachInst>(inst->staticInst->machInst));
+ +
+ +        handleFault(inst_fault);
+ +        return false;
+ +    }
+ +
+ +    int freed_regs = 0;
+ +
+ +    for (int i = 0; i < inst->numDestRegs(); ++i) {
+ +        DPRINTF(BE, "Commit rename map setting reg %i to [sn:%lli]\n",
+ +                (int)inst->destRegIdx(i), inst->seqNum);
+ +        thread->renameTable[inst->destRegIdx(i)] = inst;
+ +        ++freed_regs;
+ +    }
+ +
++#if FULL_SYSTEM
++    if (thread->profile) {
++//        bool usermode =
++//            (xc->readMiscReg(AlphaISA::IPR_DTB_CM) & 0x18) != 0;
++//        thread->profilePC = usermode ? 1 : inst->readPC();
++        thread->profilePC = inst->readPC();
++        ProfileNode *node = thread->profile->consume(thread->getXCProxy(),
++                                                     inst->staticInst);
++
++        if (node)
++            thread->profileNode = node;
++    }
++#endif
++
+ +    if (inst->traceData) {
+ +        inst->traceData->setFetchSeq(inst->seqNum);
+ +        inst->traceData->setCPSeq(thread->numInst);
+ +        inst->traceData->finalize();
+ +        inst->traceData = NULL;
+ +    }
+ +
++    if (inst->isCopy())
++        panic("Should not commit any copy instructions!");
++
+ +    inst->clearDependents();
+ +
+ +    frontEnd->addFreeRegs(freed_regs);
+ +
+ +    instList.pop_back();
+ +
+ +    --numInsts;
+ +    ++thread->funcExeInst;
+ +    // Maybe move this to where the fault is handled; if the fault is
+ +    // handled, don't try to set this myself as the fault will set it.
+ +    // If not, then I set thread->PC = thread->nextPC and
+ +    // thread->nextPC = thread->nextPC + 4.
+ +    thread->setPC(thread->readNextPC());
+ +    thread->setNextPC(thread->readNextPC() + sizeof(TheISA::MachInst));
+ +    updateComInstStats(inst);
+ +
+ +    // Write the done sequence number here.
+ +    toIEW->doneSeqNum = inst->seqNum;
+ +    lastCommitCycle = curTick;
+ +
+ +#if FULL_SYSTEM
+ +    int count = 0;
+ +    Addr oldpc;
+ +    do {
+ +        if (count == 0)
+ +            assert(!thread->inSyscall && !thread->trapPending);
+ +        oldpc = thread->readPC();
+ +        cpu->system->pcEventQueue.service(
+ +            thread->getTC());
+ +        count++;
+ +    } while (oldpc != thread->readPC());
+ +    if (count > 1) {
+ +        DPRINTF(BE, "PC skip function event, stopping commit\n");
+ +        tcSquash = true;
+ +        return false;
+ +    }
+ +#endif
+ +    return true;
+ +}
+ +
+ +template <class Impl>
+ +void
+ +LWBackEnd<Impl>::commitInsts()
+ +{
+ +    // Not sure this should be a loop or not.
+ +    int inst_num = 0;
+ +    while (!instList.empty() && inst_num < commitWidth) {
+ +        if (instList.back()->isSquashed()) {
+ +            instList.back()->clearDependents();
++            ROBSquashedInsts[instList.back()->threadNumber]++;
+ +            instList.pop_back();
+ +            --numInsts;
-     n_committed_dist.sample(inst_num);
+ +            continue;
+ +        }
+ +
+ +        if (!commitInst(inst_num++)) {
+ +            DPRINTF(BE, "Can't commit, Instruction [sn:%lli] PC "
+ +                    "%#x is head of ROB and not ready\n",
+ +                    instList.back()->seqNum, instList.back()->readPC());
+ +            --inst_num;
+ +            break;
+ +        }
+ +    }
-     InstListIt waiting_list_end = waitingList.end();
++    nCommittedDist.sample(inst_num);
+ +}
+ +
+ +template <class Impl>
+ +void
+ +LWBackEnd<Impl>::squash(const InstSeqNum &sn)
+ +{
+ +    LSQ.squash(sn);
+ +
+ +    int freed_regs = 0;
-     while (insts_it != waiting_list_end && (*insts_it)->seqNum > sn)
++    InstListIt insts_end_it = waitingList.end();
+ +    InstListIt insts_it = waitingList.begin();
+ +
-     insts_it = waitingList.begin();
-     while (!waitingList.empty() && insts_it != waitingList.end()) {
-         if ((*insts_it)->seqNum < sn) {
-             ++insts_it;
-             continue;
-         }
-         assert((*insts_it)->isSquashed());
- 
-         waitingList.erase(insts_it++);
-         waitingInsts--;
-     }
- 
++    while (insts_it != insts_end_it && (*insts_it)->seqNum > sn)
+ +    {
+ +        if ((*insts_it)->isSquashed()) {
+ +            ++insts_it;
+ +            continue;
+ +        }
+ +        DPRINTF(BE, "Squashing instruction on waitingList PC %#x, [sn:%lli].\n",
+ +                (*insts_it)->readPC(),
+ +                (*insts_it)->seqNum);
+ +
+ +        if ((*insts_it)->isMemRef()) {
+ +            DPRINTF(BE, "Squashing a waiting mem op [sn:%lli]\n",
+ +                    (*insts_it)->seqNum);
+ +            removeWaitingMemOp((*insts_it));
+ +        }
+ +
+ +        waitingList.erase(insts_it++);
+ +        waitingInsts--;
+ +    }
+ +    assert(waitingInsts >= 0);
+ +
+ +    insts_it = instList.begin();
+ +
+ +    while (!instList.empty() && (*insts_it)->seqNum > sn)
+ +    {
+ +        if ((*insts_it)->isSquashed()) {
++            panic("Instruction should not be already squashed and on list!");
+ +            ++insts_it;
+ +            continue;
+ +        }
+ +        DPRINTF(BE, "Squashing instruction on inst list PC %#x, [sn:%lli].\n",
+ +                (*insts_it)->readPC(),
+ +                (*insts_it)->seqNum);
+ +
+ +        // Mark the instruction as squashed, and ready to commit so that
+ +        // it can drain out of the pipeline.
+ +        (*insts_it)->setSquashed();
+ +
+ +        (*insts_it)->setCanCommit();
+ +
+ +        (*insts_it)->clearInROB();
+ +
+ +        for (int i = 0; i < (*insts_it)->numDestRegs(); ++i) {
+ +            DynInstPtr prev_dest = (*insts_it)->getPrevDestInst(i);
+ +            DPRINTF(BE, "Commit rename map setting reg %i to [sn:%lli]\n",
+ +                    (int)(*insts_it)->destRegIdx(i), prev_dest->seqNum);
+ +            renameTable[(*insts_it)->destRegIdx(i)] = prev_dest;
+ +            ++freed_regs;
+ +        }
+ +
+ +        (*insts_it)->clearDependents();
+ +
+ +        squashedInsts[(*insts_it)->threadNumber]++;
+ +
+ +        instList.erase(insts_it++);
+ +        --numInsts;
+ +    }
+ +
- template <class Impl>
- void
- LWBackEnd<Impl>::fetchFault(Fault &fault)
- {
-     faultFromFetch = fault;
-     fetchHasFault = true;
- }
- 
+ +    while (memBarrier && memBarrier->seqNum > sn) {
+ +        DPRINTF(BE, "[sn:%lli] Memory barrier squashed (or previously "
+ +                "squashed)\n", memBarrier->seqNum);
+ +        memBarrier->clearMemDependents();
+ +        if (memBarrier->memDepReady()) {
+ +            DPRINTF(BE, "No previous barrier\n");
+ +            memBarrier = NULL;
+ +        } else {
+ +            std::list<DynInstPtr> &srcs = memBarrier->getMemSrcs();
+ +            memBarrier = srcs.front();
+ +            srcs.pop_front();
+ +            assert(srcs.empty());
+ +            DPRINTF(BE, "Previous barrier: [sn:%lli]\n",
+ +                    memBarrier->seqNum);
+ +        }
+ +    }
+ +
++    insts_it = replayList.begin();
++    insts_end_it = replayList.end();
++    while (!replayList.empty() && insts_it != insts_end_it) {
++        if ((*insts_it)->seqNum < sn) {
++            ++insts_it;
++            continue;
++        }
++        assert((*insts_it)->isSquashed());
++
++        replayList.erase(insts_it++);
++    }
++
+ +    frontEnd->addFreeRegs(freed_regs);
+ +}
+ +
+ +template <class Impl>
+ +void
+ +LWBackEnd<Impl>::squashFromTC()
+ +{
+ +    InstSeqNum squashed_inst = robEmpty() ? 0 : instList.back()->seqNum - 1;
+ +    squash(squashed_inst);
+ +    frontEnd->squash(squashed_inst, thread->readPC(),
+ +                     false, false);
+ +    frontEnd->interruptPending = false;
+ +
+ +    thread->trapPending = false;
+ +    thread->inSyscall = false;
+ +    tcSquash = false;
+ +    commitStatus = Running;
+ +}
+ +
+ +template <class Impl>
+ +void
+ +LWBackEnd<Impl>::squashFromTrap()
+ +{
+ +    InstSeqNum squashed_inst = robEmpty() ? 0 : instList.back()->seqNum - 1;
+ +    squash(squashed_inst);
+ +    frontEnd->squash(squashed_inst, thread->readPC(),
+ +                     false, false);
+ +    frontEnd->interruptPending = false;
+ +
+ +    thread->trapPending = false;
+ +    thread->inSyscall = false;
+ +    trapSquash = false;
+ +    commitStatus = Running;
+ +}
+ +
+ +template <class Impl>
+ +void
+ +LWBackEnd<Impl>::squashDueToBranch(DynInstPtr &inst)
+ +{
+ +    // Update the branch predictor state I guess
+ +    DPRINTF(BE, "Squashing due to branch [sn:%lli], will restart at PC %#x\n",
+ +            inst->seqNum, inst->readNextPC());
+ +    squash(inst->seqNum);
+ +    frontEnd->squash(inst->seqNum, inst->readNextPC(),
+ +                     true, inst->mispredicted());
+ +}
+ +
+ +template <class Impl>
+ +void
+ +LWBackEnd<Impl>::squashDueToMemViolation(DynInstPtr &inst)
+ +{
+ +    // Update the branch predictor state I guess
+ +    DPRINTF(BE, "Squashing due to violation [sn:%lli], will restart at PC %#x\n",
+ +            inst->seqNum, inst->readNextPC());
+ +    squash(inst->seqNum);
+ +    frontEnd->squash(inst->seqNum, inst->readNextPC(),
+ +                     false, inst->mispredicted());
+ +}
+ +
+ +template <class Impl>
+ +void
+ +LWBackEnd<Impl>::squashDueToMemBlocked(DynInstPtr &inst)
+ +{
+ +    DPRINTF(IEW, "Memory blocked, squashing load and younger insts, "
+ +            "PC: %#x [sn:%i].\n", inst->readPC(), inst->seqNum);
+ +
+ +    squash(inst->seqNum - 1);
+ +    frontEnd->squash(inst->seqNum - 1, inst->readPC());
+ +}
+ +
- 
+ +template <class Impl>
+ +void
+ +LWBackEnd<Impl>::switchOut()
+ +{
+ +    switchPending = true;
+ +}
+ +
+ +template <class Impl>
+ +void
+ +LWBackEnd<Impl>::doSwitchOut()
+ +{
+ +    switchedOut = true;
+ +    switchPending = false;
+ +    // Need to get rid of all committed, non-speculative state and write it
+ +    // to memory/TC.  In this case this is stores that have committed and not
+ +    // yet written back.
+ +    assert(robEmpty());
+ +    assert(!LSQ.hasStoresToWB());
- 
-     squash(0);
++    writeback.clear();
++    for (int i = 0; i < numInstsToWB.getSize() + 1; ++i)
++        numInstsToWB.advance();
++
++//    squash(0);
++    assert(waitingList.empty());
++    assert(instList.empty());
++    assert(replayList.empty());
++    assert(writeback.empty());
+ +    LSQ.switchOut();
-     switchedOut = false;
+ +}
+ +
+ +template <class Impl>
+ +void
+ +LWBackEnd<Impl>::takeOverFrom(ThreadContext *old_tc)
+ +{
-         exe_swp[thread_number]++;
++    assert(!squashPending);
++    squashSeqNum = 0;
++    squashNextPC = 0;
+ +    tcSquash = false;
+ +    trapSquash = false;
+ +
+ +    numInsts = 0;
+ +    numWaitingMemOps = 0;
+ +    waitingMemOps.clear();
+ +    waitingInsts = 0;
+ +    switchedOut = false;
+ +    dispatchStatus = Running;
+ +    commitStatus = Running;
+ +    LSQ.takeOverFrom(old_tc);
+ +}
+ +
+ +template <class Impl>
+ +void
+ +LWBackEnd<Impl>::updateExeInstStats(DynInstPtr &inst)
+ +{
+ +    int thread_number = inst->threadNumber;
+ +
+ +    //
+ +    //  Pick off the software prefetches
+ +    //
+ +#ifdef TARGET_ALPHA
+ +    if (inst->isDataPrefetch())
-         exe_inst[thread_number]++;
++        exeSwp[thread_number]++;
+ +    else
-     exe_inst[thread_number]++;
++        exeInst[thread_number]++;
+ +#else
-         exe_branches[thread_number]++;
++    exeInst[thread_number]++;
+ +#endif
+ +
+ +    //
+ +    //  Control operations
+ +    //
+ +    if (inst->isControl())
-         exe_refs[thread_number]++;
++        exeBranches[thread_number]++;
+ +
+ +    //
+ +    //  Memory operations
+ +    //
+ +    if (inst->isMemRef()) {
-             exe_loads[thread_number]++;
++        exeRefs[thread_number]++;
+ +
+ +        if (inst->isLoad())
-         stat_com_swp[tid]++;
++            exeLoads[thread_number]++;
+ +    }
+ +}
+ +
+ +template <class Impl>
+ +void
+ +LWBackEnd<Impl>::updateComInstStats(DynInstPtr &inst)
+ +{
+ +    unsigned tid = inst->threadNumber;
+ +
+ +    // keep an instruction count
+ +    thread->numInst++;
+ +    thread->numInsts++;
+ +
+ +    cpu->numInst++;
+ +    //
+ +    //  Pick off the software prefetches
+ +    //
+ +#ifdef TARGET_ALPHA
+ +    if (inst->isDataPrefetch()) {
-         stat_com_inst[tid]++;
++        statComSwp[tid]++;
+ +    } else {
-     stat_com_inst[tid]++;
++        statComInst[tid]++;
+ +    }
+ +#else
-         stat_com_branches[tid]++;
++    statComInst[tid]++;
+ +#endif
+ +
+ +    //
+ +    //  Control Instructions
+ +    //
+ +    if (inst->isControl())
-         stat_com_refs[tid]++;
++        statComBranches[tid]++;
+ +
+ +    //
+ +    //  Memory references
+ +    //
+ +    if (inst->isMemRef()) {
-             stat_com_loads[tid]++;
++        statComRefs[tid]++;
+ +
+ +        if (inst->isLoad()) {
-         stat_com_membars[tid]++;
++            statComLoads[tid]++;
+ +        }
+ +    }
+ +
+ +    if (inst->isMemBarrier()) {
++        statComMembars[tid]++;
+ +    }
+ +}
+ +
+ +template <class Impl>
+ +void
+ +LWBackEnd<Impl>::dumpInsts()
+ +{
+ +    int num = 0;
+ +    int valid_num = 0;
+ +
+ +    InstListIt inst_list_it = --(instList.end());
+ +
+ +    cprintf("ExeList size: %i\n", exeList.size());
+ +
+ +    cprintf("Inst list size: %i\n", instList.size());
+ +
+ +    while (inst_list_it != instList.end())
++    {
++        cprintf("Instruction:%i\n",
++                num);
++        if (!(*inst_list_it)->isSquashed()) {
++            if (!(*inst_list_it)->isIssued()) {
++                ++valid_num;
++                cprintf("Count:%i\n", valid_num);
++            } else if ((*inst_list_it)->isMemRef() &&
++                       !(*inst_list_it)->memOpDone) {
++                // Loads that have not been marked as executed still count
++                // towards the total instructions.
++                ++valid_num;
++                cprintf("Count:%i\n", valid_num);
++            }
++        }
++
++        cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
++                "Issued:%i\nSquashed:%i\n",
++                (*inst_list_it)->readPC(),
++                (*inst_list_it)->seqNum,
++                (*inst_list_it)->threadNumber,
++                (*inst_list_it)->isIssued(),
++                (*inst_list_it)->isSquashed());
++
++        if ((*inst_list_it)->isMemRef()) {
++            cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
++        }
++
++        cprintf("\n");
++
++        inst_list_it--;
++        ++num;
++    }
++
++    inst_list_it = --(writeback.end());
++
++    cprintf("Writeback list size: %i\n", writeback.size());
++
++    while (inst_list_it != writeback.end())
+ +    {
+ +        cprintf("Instruction:%i\n",
+ +                num);
+ +        if (!(*inst_list_it)->isSquashed()) {
+ +            if (!(*inst_list_it)->isIssued()) {
+ +                ++valid_num;
+ +                cprintf("Count:%i\n", valid_num);
+ +            } else if ((*inst_list_it)->isMemRef() &&
+ +                       !(*inst_list_it)->memOpDone) {
+ +                // Loads that have not been marked as executed still count
+ +                // towards the total instructions.
+ +                ++valid_num;
+ +                cprintf("Count:%i\n", valid_num);
+ +            }
+ +        }
+ +
+ +        cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
+ +                "Issued:%i\nSquashed:%i\n",
+ +                (*inst_list_it)->readPC(),
+ +                (*inst_list_it)->seqNum,
+ +                (*inst_list_it)->threadNumber,
+ +                (*inst_list_it)->isIssued(),
+ +                (*inst_list_it)->isSquashed());
+ +
+ +        if ((*inst_list_it)->isMemRef()) {
+ +            cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
+ +        }
+ +
+ +        cprintf("\n");
+ +
+ +        inst_list_it--;
+ +        ++num;
+ +    }
+ +
+ +    cprintf("Waiting list size: %i\n", waitingList.size());
+ +
+ +    inst_list_it = --(waitingList.end());
+ +
+ +    while (inst_list_it != waitingList.end())
+ +    {
+ +        cprintf("Instruction:%i\n",
+ +                num);
+ +        if (!(*inst_list_it)->isSquashed()) {
+ +            if (!(*inst_list_it)->isIssued()) {
+ +                ++valid_num;
+ +                cprintf("Count:%i\n", valid_num);
+ +            } else if ((*inst_list_it)->isMemRef() &&
+ +                       !(*inst_list_it)->memOpDone) {
+ +                // Loads that have not been marked as executed still count
+ +                // towards the total instructions.
+ +                ++valid_num;
+ +                cprintf("Count:%i\n", valid_num);
+ +            }
+ +        }
+ +
+ +        cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
+ +                "Issued:%i\nSquashed:%i\n",
+ +                (*inst_list_it)->readPC(),
+ +                (*inst_list_it)->seqNum,
+ +                (*inst_list_it)->threadNumber,
+ +                (*inst_list_it)->isIssued(),
+ +                (*inst_list_it)->isSquashed());
+ +
+ +        if ((*inst_list_it)->isMemRef()) {
+ +            cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
+ +        }
+ +
+ +        cprintf("\n");
+ +
+ +        inst_list_it--;
+ +        ++num;
+ +    }
+ +
+ +    cprintf("waitingMemOps list size: %i\n", waitingMemOps.size());
+ +
+ +    MemIt waiting_it = waitingMemOps.begin();
+ +
+ +    while (waiting_it != waitingMemOps.end())
+ +    {
+ +        cprintf("[sn:%lli] ", (*waiting_it));
+ +        waiting_it++;
+ +        ++num;
+ +    }
+ +    cprintf("\n");
+ +}
diff --cc src/cpu/ozone/lw_lsq.hh

index 9a21a9d01075c1419e48bb2f115957a20876a84b,0000000000000000000000000000000000000000..6640a9f3449738456ba28372fb7b00ba5d7e36d9

mode 100644,000000..100644
--- 1/src/cpu/ozone/lw_lsq.hh
--- /dev/null
+++ b/src/cpu/ozone/lw_lsq.hh
@@@ -1,697 -1,0 +1,708 @@@
-     int numStores() { return stores; }
+ +/*
+ + * Copyright (c) 2006 The Regents of The University of Michigan
+ + * All rights reserved.
+ + *
+ + * Redistribution and use in source and binary forms, with or without
+ + * modification, are permitted provided that the following conditions are
+ + * met: redistributions of source code must retain the above copyright
+ + * notice, this list of conditions and the following disclaimer;
+ + * redistributions in binary form must reproduce the above copyright
+ + * notice, this list of conditions and the following disclaimer in the
+ + * documentation and/or other materials provided with the distribution;
+ + * neither the name of the copyright holders nor the names of its
+ + * contributors may be used to endorse or promote products derived from
+ + * this software without specific prior written permission.
+ + *
+ + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ + *
+ + * Authors: Kevin Lim
+ + */
+ +
+ +#ifndef __CPU_OZONE_LW_LSQ_HH__
+ +#define __CPU_OZONE_LW_LSQ_HH__
+ +
+ +#include <list>
+ +#include <map>
+ +#include <queue>
+ +#include <algorithm>
+ +
+ +#include "arch/faults.hh"
+ +#include "arch/types.hh"
+ +#include "config/full_system.hh"
+ +#include "base/hashmap.hh"
+ +#include "cpu/inst_seq.hh"
+ +#include "mem/packet.hh"
+ +#include "mem/port.hh"
+ +//#include "mem/page_table.hh"
+ +#include "sim/debug.hh"
+ +#include "sim/sim_object.hh"
+ +
+ +class MemObject;
+ +
+ +/**
+ + * Class that implements the actual LQ and SQ for each specific thread.
+ + * Both are circular queues; load entries are freed upon committing, while
+ + * store entries are freed once they writeback. The LSQUnit tracks if there
+ + * are memory ordering violations, and also detects partial load to store
+ + * forwarding cases (a store only has part of a load's data) that requires
+ + * the load to wait until the store writes back. In the former case it
+ + * holds onto the instruction until the dependence unit looks at it, and
+ + * in the latter it stalls the LSQ until the store writes back. At that
+ + * point the load is replayed.
+ + */
+ +template <class Impl>
+ +class OzoneLWLSQ {
+ +  public:
+ +    typedef typename Impl::Params Params;
+ +    typedef typename Impl::OzoneCPU OzoneCPU;
+ +    typedef typename Impl::BackEnd BackEnd;
+ +    typedef typename Impl::DynInstPtr DynInstPtr;
+ +    typedef typename Impl::IssueStruct IssueStruct;
+ +
+ +    typedef TheISA::IntReg IntReg;
+ +
+ +    typedef typename std::map<InstSeqNum, DynInstPtr>::iterator LdMapIt;
+ +
+ +  public:
+ +    /** Constructs an LSQ unit. init() must be called prior to use. */
+ +    OzoneLWLSQ();
+ +
+ +    /** Initializes the LSQ unit with the specified number of entries. */
+ +    void init(Params *params, unsigned maxLQEntries,
+ +              unsigned maxSQEntries, unsigned id);
+ +
+ +    /** Returns the name of the LSQ unit. */
+ +    std::string name() const;
+ +
++    void regStats();
++
+ +    /** Sets the CPU pointer. */
+ +    void setCPU(OzoneCPU *cpu_ptr);
+ +
+ +    /** Sets the back-end stage pointer. */
+ +    void setBE(BackEnd *be_ptr)
+ +    { be = be_ptr; }
+ +
+ +    Port *getDcachePort() { return &dcachePort; }
+ +
+ +    /** Ticks the LSQ unit, which in this case only resets the number of
+ +     * used cache ports.
+ +     * @todo: Move the number of used ports up to the LSQ level so it can
+ +     * be shared by all LSQ units.
+ +     */
+ +    void tick() { usedPorts = 0; }
+ +
+ +    /** Inserts an instruction. */
+ +    void insert(DynInstPtr &inst);
+ +    /** Inserts a load instruction. */
+ +    void insertLoad(DynInstPtr &load_inst);
+ +    /** Inserts a store instruction. */
+ +    void insertStore(DynInstPtr &store_inst);
+ +
+ +    /** Executes a load instruction. */
+ +    Fault executeLoad(DynInstPtr &inst);
+ +
+ +    /** Executes a store instruction. */
+ +    Fault executeStore(DynInstPtr &inst);
+ +
+ +    /** Commits the head load. */
+ +    void commitLoad();
+ +    /** Commits loads older than a specific sequence number. */
+ +    void commitLoads(InstSeqNum &youngest_inst);
+ +
+ +    /** Commits stores older than a specific sequence number. */
+ +    void commitStores(InstSeqNum &youngest_inst);
+ +
+ +    /** Writes back stores. */
+ +    void writebackStores();
+ +
+ +    /** Completes the data access that has been returned from the
+ +     * memory system. */
+ +    void completeDataAccess(PacketPtr pkt);
+ +
+ +    // @todo: Include stats in the LSQ unit.
+ +    //void regStats();
+ +
+ +    /** Clears all the entries in the LQ. */
+ +    void clearLQ();
+ +
+ +    /** Clears all the entries in the SQ. */
+ +    void clearSQ();
+ +
+ +    /** Resizes the LQ to a given size. */
+ +    void resizeLQ(unsigned size);
+ +
+ +    /** Resizes the SQ to a given size. */
+ +    void resizeSQ(unsigned size);
+ +
+ +    /** Squashes all instructions younger than a specific sequence number. */
+ +    void squash(const InstSeqNum &squashed_num);
+ +
+ +    /** Returns if there is a memory ordering violation. Value is reset upon
+ +     * call to getMemDepViolator().
+ +     */
+ +    bool violation() { return memDepViolator; }
+ +
+ +    /** Returns the memory ordering violator. */
+ +    DynInstPtr getMemDepViolator();
+ +
+ +    /** Returns if a load became blocked due to the memory system.  It clears
+ +     *  the bool's value upon this being called.
+ +     */
+ +    bool loadBlocked()
+ +    { return isLoadBlocked; }
+ +
+ +    void clearLoadBlocked()
+ +    { isLoadBlocked = false; }
+ +
+ +    bool isLoadBlockedHandled()
+ +    { return loadBlockedHandled; }
+ +
+ +    void setLoadBlockedHandled()
+ +    { loadBlockedHandled = true; }
+ +
+ +    /** Returns the number of free entries (min of free LQ and SQ entries). */
+ +    unsigned numFreeEntries();
+ +
+ +    /** Returns the number of loads ready to execute. */
+ +    int numLoadsReady();
+ +
+ +    /** Returns the number of loads in the LQ. */
+ +    int numLoads() { return loads; }
+ +
+ +    /** Returns the number of stores in the SQ. */
-     bool sqFull() { return stores >= (SQEntries - 1); }
++    int numStores() { return stores + storesInFlight; }
+ +
+ +    /** Returns if either the LQ or SQ is full. */
+ +    bool isFull() { return lqFull() || sqFull(); }
+ +
+ +    /** Returns if the LQ is full. */
+ +    bool lqFull() { return loads >= (LQEntries - 1); }
+ +
+ +    /** Returns if the SQ is full. */
-     void completeStore(int store_idx);
++    bool sqFull() { return (stores + storesInFlight) >= (SQEntries - 1); }
+ +
+ +    /** Debugging function to dump instructions in the LSQ. */
+ +    void dumpInsts();
+ +
+ +    /** Returns the number of instructions in the LSQ. */
+ +    unsigned getCount() { return loads + stores; }
+ +
+ +    /** Returns if there are any stores to writeback. */
+ +    bool hasStoresToWB() { return storesToWB; }
+ +
+ +    /** Returns the number of stores to writeback. */
+ +    int numStoresToWB() { return storesToWB; }
+ +
+ +    /** Returns if the LSQ unit will writeback on this cycle. */
+ +    bool willWB() { return storeQueue.back().canWB &&
+ +                        !storeQueue.back().completed &&
+ +                        !isStoreBlocked; }
+ +
+ +    void switchOut();
+ +
+ +    void takeOverFrom(ThreadContext *old_tc = NULL);
+ +
+ +    bool isSwitchedOut() { return switchedOut; }
+ +
+ +    bool switchedOut;
+ +
+ +  private:
+ +    /** Writes back the instruction, sending it to IEW. */
+ +    void writeback(DynInstPtr &inst, PacketPtr pkt);
+ +
+ +    /** Handles completing the send of a store to memory. */
+ +    void storePostSend(Packet *pkt, DynInstPtr &inst);
+ +
+ +    /** Completes the store at the specified index. */
-         if (store_size == 0) {
++    void completeStore(DynInstPtr &inst);
++
++    void removeStore(int store_idx);
+ +
+ +    /** Handles doing the retry. */
+ +    void recvRetry();
+ +
+ +  private:
+ +    /** Pointer to the CPU. */
+ +    OzoneCPU *cpu;
+ +
+ +    /** Pointer to the back-end stage. */
+ +    BackEnd *be;
+ +
+ +    MemObject *mem;
+ +
+ +    class DcachePort : public Port
+ +    {
+ +      protected:
+ +        OzoneLWLSQ *lsq;
+ +
+ +      public:
+ +        DcachePort(OzoneLWLSQ *_lsq)
+ +            : lsq(_lsq)
+ +        { }
+ +
+ +      protected:
+ +        virtual Tick recvAtomic(PacketPtr pkt);
+ +
+ +        virtual void recvFunctional(PacketPtr pkt);
+ +
+ +        virtual void recvStatusChange(Status status);
+ +
+ +        virtual void getDeviceAddressRanges(AddrRangeList &resp,
+ +                                            AddrRangeList &snoop)
+ +        { resp.clear(); snoop.clear(); }
+ +
+ +        virtual bool recvTiming(PacketPtr pkt);
+ +
+ +        virtual void recvRetry();
+ +    };
+ +
+ +    /** D-cache port. */
+ +    DcachePort dcachePort;
+ +
+ +  public:
+ +    struct SQEntry {
+ +        /** Constructs an empty store queue entry. */
+ +        SQEntry()
+ +            : inst(NULL), req(NULL), size(0), data(0),
+ +              canWB(0), committed(0), completed(0), lqIt(NULL)
+ +        { }
+ +
+ +        /** Constructs a store queue entry for a given instruction. */
+ +        SQEntry(DynInstPtr &_inst)
+ +            : inst(_inst), req(NULL), size(0), data(0),
+ +              canWB(0), committed(0), completed(0), lqIt(NULL)
+ +        { }
+ +
+ +        /** The store instruction. */
+ +        DynInstPtr inst;
+ +        /** The memory request for the store. */
+ +        RequestPtr req;
+ +        /** The size of the store. */
+ +        int size;
+ +        /** The store data. */
+ +        IntReg data;
+ +        /** Whether or not the store can writeback. */
+ +        bool canWB;
+ +        /** Whether or not the store is committed. */
+ +        bool committed;
+ +        /** Whether or not the store is completed. */
+ +        bool completed;
+ +
+ +        typename std::list<DynInstPtr>::iterator lqIt;
+ +    };
+ +
+ +    /** Derived class to hold any sender state the LSQ needs. */
+ +    class LSQSenderState : public Packet::SenderState
+ +    {
+ +      public:
+ +        /** Default constructor. */
+ +        LSQSenderState()
+ +            : noWB(false)
+ +        { }
+ +
+ +        /** Instruction who initiated the access to memory. */
+ +        DynInstPtr inst;
+ +        /** Whether or not it is a load. */
+ +        bool isLoad;
+ +        /** The LQ/SQ index of the instruction. */
+ +        int idx;
+ +        /** Whether or not the instruction will need to writeback. */
+ +        bool noWB;
+ +    };
+ +
+ +    /** Writeback event, specifically for when stores forward data to loads. */
+ +    class WritebackEvent : public Event {
+ +      public:
+ +        /** Constructs a writeback event. */
+ +        WritebackEvent(DynInstPtr &_inst, PacketPtr pkt, OzoneLWLSQ *lsq_ptr);
+ +
+ +        /** Processes the writeback event. */
+ +        void process();
+ +
+ +        /** Returns the description of this event. */
+ +        const char *description();
+ +
+ +      private:
+ +        /** Instruction whose results are being written back. */
+ +        DynInstPtr inst;
+ +
+ +        /** The packet that would have been sent to memory. */
+ +        PacketPtr pkt;
+ +
+ +        /** The pointer to the LSQ unit that issued the store. */
+ +        OzoneLWLSQ<Impl> *lsqPtr;
+ +    };
+ +
+ +    enum Status {
+ +        Running,
+ +        Idle,
+ +        DcacheMissStall,
+ +        DcacheMissSwitch
+ +    };
+ +
+ +  private:
+ +    /** The OzoneLWLSQ thread id. */
+ +    unsigned lsqID;
+ +
+ +    /** The status of the LSQ unit. */
+ +    Status _status;
+ +
+ +    /** The store queue. */
+ +    std::list<SQEntry> storeQueue;
+ +    /** The load queue. */
+ +    std::list<DynInstPtr> loadQueue;
+ +
+ +    typedef typename std::list<SQEntry>::iterator SQIt;
+ +    typedef typename std::list<DynInstPtr>::iterator LQIt;
+ +
+ +
+ +    struct HashFn {
+ +    size_t operator() (const int a) const
+ +    {
+ +        unsigned hash = (((a >> 14) ^ ((a >> 2) & 0xffff))) & 0x7FFFFFFF;
+ +
+ +        return hash;
+ +    }
+ +    };
+ +
+ +    m5::hash_map<int, SQIt, HashFn> SQItHash;
+ +    std::queue<int> SQIndices;
+ +    m5::hash_map<int, LQIt, HashFn> LQItHash;
+ +    std::queue<int> LQIndices;
+ +
+ +    typedef typename m5::hash_map<int, LQIt, HashFn>::iterator LQHashIt;
+ +    typedef typename m5::hash_map<int, SQIt, HashFn>::iterator SQHashIt;
+ +    // Consider making these 16 bits
+ +    /** The number of LQ entries. */
+ +    unsigned LQEntries;
+ +    /** The number of SQ entries. */
+ +    unsigned SQEntries;
+ +
+ +    /** The number of load instructions in the LQ. */
+ +    int loads;
+ +    /** The number of store instructions in the SQ (excludes those waiting to
+ +     * writeback).
+ +     */
+ +    int stores;
+ +
+ +    int storesToWB;
+ +
++  public:
++    int storesInFlight;
++
++  private:
+ +    /// @todo Consider moving to a more advanced model with write vs read ports
+ +    /** The number of cache ports available each cycle. */
+ +    int cachePorts;
+ +
+ +    /** The number of used cache ports in this cycle. */
+ +    int usedPorts;
+ +
+ +    //list<InstSeqNum> mshrSeqNums;
+ +
++    /** Tota number of memory ordering violations. */
++    Stats::Scalar<> lsqMemOrderViolation;
++
+ +     //Stats::Scalar<> dcacheStallCycles;
+ +    Counter lastDcacheStall;
+ +
+ +    // Make these per thread?
+ +    /** Whether or not the LSQ is stalled. */
+ +    bool stalled;
+ +    /** The store that causes the stall due to partial store to load
+ +     * forwarding.
+ +     */
+ +    InstSeqNum stallingStoreIsn;
+ +    /** The index of the above store. */
+ +    LQIt stallingLoad;
+ +
+ +    /** The packet that needs to be retried. */
+ +    PacketPtr retryPkt;
+ +
+ +    /** Whehter or not a store is blocked due to the memory system. */
+ +    bool isStoreBlocked;
+ +
+ +    /** Whether or not a load is blocked due to the memory system.  It is
+ +     *  cleared when this value is checked via loadBlocked().
+ +     */
+ +    bool isLoadBlocked;
+ +
+ +    bool loadBlockedHandled;
+ +
+ +    InstSeqNum blockedLoadSeqNum;
+ +
+ +    /** The oldest faulting load instruction. */
+ +    DynInstPtr loadFaultInst;
+ +    /** The oldest faulting store instruction. */
+ +    DynInstPtr storeFaultInst;
+ +
+ +    /** The oldest load that caused a memory ordering violation. */
+ +    DynInstPtr memDepViolator;
+ +
+ +    // Will also need how many read/write ports the Dcache has.  Or keep track
+ +    // of that in stage that is one level up, and only call executeLoad/Store
+ +    // the appropriate number of times.
+ +
+ +  public:
+ +    /** Executes the load at the given index. */
+ +    template <class T>
+ +    Fault read(RequestPtr req, T &data, int load_idx);
+ +
+ +    /** Executes the store at the given index. */
+ +    template <class T>
+ +    Fault write(RequestPtr req, T &data, int store_idx);
+ +
+ +    /** Returns the sequence number of the head load instruction. */
+ +    InstSeqNum getLoadHeadSeqNum()
+ +    {
+ +        if (!loadQueue.empty()) {
+ +            return loadQueue.back()->seqNum;
+ +        } else {
+ +            return 0;
+ +        }
+ +
+ +    }
+ +
+ +    /** Returns the sequence number of the head store instruction. */
+ +    InstSeqNum getStoreHeadSeqNum()
+ +    {
+ +        if (!storeQueue.empty()) {
+ +            return storeQueue.back().inst->seqNum;
+ +        } else {
+ +            return 0;
+ +        }
+ +
+ +    }
+ +
+ +    /** Returns whether or not the LSQ unit is stalled. */
+ +    bool isStalled()  { return stalled; }
+ +};
+ +
+ +template <class Impl>
+ +template <class T>
+ +Fault
+ +OzoneLWLSQ<Impl>::read(RequestPtr req, T &data, int load_idx)
+ +{
+ +    //Depending on issue2execute delay a squashed load could
+ +    //execute if it is found to be squashed in the same
+ +    //cycle it is scheduled to execute
+ +    typename m5::hash_map<int, LQIt, HashFn>::iterator
+ +        lq_hash_it = LQItHash.find(load_idx);
+ +    assert(lq_hash_it != LQItHash.end());
+ +    DynInstPtr inst = (*(*lq_hash_it).second);
+ +
+ +    // Make sure this isn't an uncacheable access
+ +    // A bit of a hackish way to get uncached accesses to work only if they're
+ +    // at the head of the LSQ and are ready to commit (at the head of the ROB
+ +    // too).
+ +    // @todo: Fix uncached accesses.
+ +    if (req->getFlags() & UNCACHEABLE &&
+ +        (inst != loadQueue.back() || !inst->isAtCommit())) {
+ +        DPRINTF(OzoneLSQ, "[sn:%lli] Uncached load and not head of "
+ +                "commit/LSQ!\n",
+ +                inst->seqNum);
+ +        be->rescheduleMemInst(inst);
+ +        return TheISA::genMachineCheckFault();
+ +    }
+ +
+ +    // Check the SQ for any previous stores that might lead to forwarding
+ +    SQIt sq_it = storeQueue.begin();
+ +    int store_size = 0;
+ +
+ +    DPRINTF(OzoneLSQ, "Read called, load idx: %i addr: %#x\n",
+ +            load_idx, req->getPaddr());
+ +
+ +    while (sq_it != storeQueue.end() && (*sq_it).inst->seqNum > inst->seqNum)
+ +        ++sq_it;
+ +
+ +    while (1) {
+ +        // End once we've reached the top of the LSQ
+ +        if (sq_it == storeQueue.end()) {
+ +            break;
+ +        }
+ +
+ +        assert((*sq_it).inst);
+ +
+ +        store_size = (*sq_it).size;
+ +
++        if (store_size == 0 || (*sq_it).committed) {
+ +            sq_it++;
+ +            continue;
+ +        }
+ +
+ +        // Check if the store data is within the lower and upper bounds of
+ +        // addresses that the request needs.
+ +        bool store_has_lower_limit =
+ +            req->getVaddr() >= (*sq_it).inst->effAddr;
+ +        bool store_has_upper_limit =
+ +            (req->getVaddr() + req->getSize()) <= ((*sq_it).inst->effAddr +
+ +                                                   store_size);
+ +        bool lower_load_has_store_part =
+ +            req->getVaddr() < ((*sq_it).inst->effAddr +
+ +                               store_size);
+ +        bool upper_load_has_store_part =
+ +            (req->getVaddr() + req->getSize()) > (*sq_it).inst->effAddr;
+ +
+ +        // If the store's data has all of the data needed, we can forward.
+ +        if (store_has_lower_limit && store_has_upper_limit) {
+ +            int shift_amt = req->getVaddr() & (store_size - 1);
+ +            // Assumes byte addressing
+ +            shift_amt = shift_amt << 3;
+ +
+ +            // Cast this to type T?
+ +            data = (*sq_it).data >> shift_amt;
+ +
+ +            assert(!inst->memData);
+ +            inst->memData = new uint8_t[64];
+ +
+ +            memcpy(inst->memData, &data, req->getSize());
+ +
+ +            DPRINTF(OzoneLSQ, "Forwarding from store [sn:%lli] to load to "
+ +                    "[sn:%lli] addr %#x, data %#x\n",
+ +                    (*sq_it).inst->seqNum, inst->seqNum, req->getVaddr(),
+ +                    *(inst->memData));
+ +
+ +            PacketPtr data_pkt = new Packet(req, Packet::ReadReq, Packet::Broadcast);
+ +            data_pkt->dataStatic(inst->memData);
+ +
+ +            WritebackEvent *wb = new WritebackEvent(inst, data_pkt, this);
+ +
+ +            // We'll say this has a 1 cycle load-store forwarding latency
+ +            // for now.
+ +            // @todo: Need to make this a parameter.
+ +            wb->schedule(curTick);
+ +
+ +            // Should keep track of stat for forwarded data
+ +            return NoFault;
+ +        } else if ((store_has_lower_limit && lower_load_has_store_part) ||
+ +                   (store_has_upper_limit && upper_load_has_store_part) ||
+ +                   (lower_load_has_store_part && upper_load_has_store_part)) {
+ +            // This is the partial store-load forwarding case where a store
+ +            // has only part of the load's data.
+ +
+ +            // If it's already been written back, then don't worry about
+ +            // stalling on it.
+ +            if ((*sq_it).completed) {
+ +                sq_it++;
+ +                break;
+ +            }
+ +
+ +            // Must stall load and force it to retry, so long as it's the oldest
+ +            // load that needs to do so.
+ +            if (!stalled ||
+ +                (stalled &&
+ +                 inst->seqNum <
+ +                 (*stallingLoad)->seqNum)) {
+ +                stalled = true;
+ +                stallingStoreIsn = (*sq_it).inst->seqNum;
+ +                stallingLoad = (*lq_hash_it).second;
+ +            }
+ +
+ +            // Tell IQ/mem dep unit that this instruction will need to be
+ +            // rescheduled eventually
+ +            be->rescheduleMemInst(inst);
+ +
+ +            DPRINTF(OzoneLSQ, "Load-store forwarding mis-match. "
+ +                    "Store [sn:%lli] to load addr %#x\n",
+ +                    (*sq_it).inst->seqNum, req->getVaddr());
+ +
+ +            return NoFault;
+ +        }
+ +        sq_it++;
+ +    }
+ +
+ +    // If there's no forwarding case, then go access memory
+ +    DPRINTF(OzoneLSQ, "Doing functional access for inst PC %#x\n",
+ +            inst->readPC());
+ +
+ +    assert(!inst->memData);
+ +    inst->memData = new uint8_t[64];
+ +
+ +    ++usedPorts;
+ +
+ +    DPRINTF(OzoneLSQ, "Doing timing access for inst PC %#x\n",
+ +            inst->readPC());
+ +
+ +    PacketPtr data_pkt = new Packet(req, Packet::ReadReq, Packet::Broadcast);
+ +    data_pkt->dataStatic(inst->memData);
+ +
+ +    LSQSenderState *state = new LSQSenderState;
+ +    state->isLoad = true;
+ +    state->idx = load_idx;
+ +    state->inst = inst;
+ +    data_pkt->senderState = state;
+ +
+ +    // if we have a cache, do cache access too
+ +    if (!dcachePort.sendTiming(data_pkt)) {
+ +        // There's an older load that's already going to squash.
+ +        if (isLoadBlocked && blockedLoadSeqNum < inst->seqNum)
+ +            return NoFault;
+ +
+ +        // Record that the load was blocked due to memory.  This
+ +        // load will squash all instructions after it, be
+ +        // refetched, and re-executed.
+ +        isLoadBlocked = true;
+ +        loadBlockedHandled = false;
+ +        blockedLoadSeqNum = inst->seqNum;
+ +        // No fault occurred, even though the interface is blocked.
+ +        return NoFault;
+ +    }
+ +
+ +    if (req->getFlags() & LOCKED) {
+ +        cpu->lockFlag = true;
+ +    }
+ +
+ +    if (data_pkt->result != Packet::Success) {
+ +        DPRINTF(OzoneLSQ, "OzoneLSQ: D-cache miss!\n");
+ +        DPRINTF(Activity, "Activity: ld accessing mem miss [sn:%lli]\n",
+ +                inst->seqNum);
+ +    } else {
+ +        DPRINTF(OzoneLSQ, "OzoneLSQ: D-cache hit!\n");
+ +        DPRINTF(Activity, "Activity: ld accessing mem hit [sn:%lli]\n",
+ +                inst->seqNum);
+ +    }
+ +
+ +    return NoFault;
+ +}
+ +
+ +template <class Impl>
+ +template <class T>
+ +Fault
+ +OzoneLWLSQ<Impl>::write(RequestPtr req, T &data, int store_idx)
+ +{
+ +    SQHashIt sq_hash_it = SQItHash.find(store_idx);
+ +    assert(sq_hash_it != SQItHash.end());
+ +
+ +    SQIt sq_it = (*sq_hash_it).second;
+ +    assert((*sq_it).inst);
+ +
+ +    DPRINTF(OzoneLSQ, "Doing write to store idx %i, addr %#x data %#x"
+ +            " | [sn:%lli]\n",
+ +            store_idx, req->getPaddr(), data, (*sq_it).inst->seqNum);
+ +
+ +    (*sq_it).req = req;
+ +    (*sq_it).size = sizeof(T);
+ +    (*sq_it).data = data;
+ +/*
+ +    assert(!req->data);
+ +    req->data = new uint8_t[64];
+ +    memcpy(req->data, (uint8_t *)&(*sq_it).data, req->size);
+ +*/
+ +
+ +    // This function only writes the data to the store queue, so no fault
+ +    // can happen here.
+ +    return NoFault;
+ +}
+ +
+ +#endif // __CPU_OZONE_LW_LSQ_HH__
diff --cc src/cpu/ozone/lw_lsq_impl.hh

index 7eef4b11f8763a994cc20acc888c1fbea6ddd29e,0000000000000000000000000000000000000000..31ffa9d67c72c701a7cbb4609847e1f85dde64f2

mode 100644,000000..100644
--- 1/src/cpu/ozone/lw_lsq_impl.hh
--- /dev/null
+++ b/src/cpu/ozone/lw_lsq_impl.hh
@@@ -1,970 -1,0 +1,993 @@@
-       storesToWB(0), stalled(false), isStoreBlocked(false),
+ +/*
+ + * Copyright (c) 2006 The Regents of The University of Michigan
+ + * All rights reserved.
+ + *
+ + * Redistribution and use in source and binary forms, with or without
+ + * modification, are permitted provided that the following conditions are
+ + * met: redistributions of source code must retain the above copyright
+ + * notice, this list of conditions and the following disclaimer;
+ + * redistributions in binary form must reproduce the above copyright
+ + * notice, this list of conditions and the following disclaimer in the
+ + * documentation and/or other materials provided with the distribution;
+ + * neither the name of the copyright holders nor the names of its
+ + * contributors may be used to endorse or promote products derived from
+ + * this software without specific prior written permission.
+ + *
+ + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ + *
+ + * Authors: Kevin Lim
+ + */
+ +
+ +#include "config/use_checker.hh"
+ +
+ +#include "arch/faults.hh"
+ +#include "base/str.hh"
+ +#include "cpu/ozone/lw_lsq.hh"
+ +#include "cpu/checker/cpu.hh"
+ +
+ +template<class Impl>
+ +OzoneLWLSQ<Impl>::WritebackEvent::WritebackEvent(DynInstPtr &_inst, PacketPtr _pkt,
+ +                                                 OzoneLWLSQ *lsq_ptr)
+ +    : Event(&mainEventQueue), inst(_inst), pkt(_pkt), lsqPtr(lsq_ptr)
+ +{
+ +    this->setFlags(Event::AutoDelete);
+ +}
+ +
+ +template<class Impl>
+ +void
+ +OzoneLWLSQ<Impl>::WritebackEvent::process()
+ +{
+ +    if (!lsqPtr->isSwitchedOut()) {
+ +        lsqPtr->writeback(inst, pkt);
+ +    }
+ +    delete pkt;
+ +}
+ +
+ +template<class Impl>
+ +const char *
+ +OzoneLWLSQ<Impl>::WritebackEvent::description()
+ +{
+ +    return "Store writeback event";
+ +}
+ +
+ +template <class Impl>
+ +Tick
+ +OzoneLWLSQ<Impl>::DcachePort::recvAtomic(PacketPtr pkt)
+ +{
+ +    panic("O3CPU model does not work with atomic mode!");
+ +    return curTick;
+ +}
+ +
+ +template <class Impl>
+ +void
+ +OzoneLWLSQ<Impl>::DcachePort::recvFunctional(PacketPtr pkt)
+ +{
+ +    panic("O3CPU doesn't expect recvFunctional callback!");
+ +}
+ +
+ +template <class Impl>
+ +void
+ +OzoneLWLSQ<Impl>::DcachePort::recvStatusChange(Status status)
+ +{
+ +    if (status == RangeChange)
+ +        return;
+ +
+ +    panic("O3CPU doesn't expect recvStatusChange callback!");
+ +}
+ +
+ +template <class Impl>
+ +bool
+ +OzoneLWLSQ<Impl>::DcachePort::recvTiming(PacketPtr pkt)
+ +{
+ +    lsq->completeDataAccess(pkt);
+ +    return true;
+ +}
+ +
+ +template <class Impl>
+ +void
+ +OzoneLWLSQ<Impl>::DcachePort::recvRetry()
+ +{
+ +    lsq->recvRetry();
+ +}
+ +
+ +template<class Impl>
+ +void
+ +OzoneLWLSQ<Impl>::completeDataAccess(PacketPtr pkt)
+ +{
+ +    LSQSenderState *state = dynamic_cast<LSQSenderState *>(pkt->senderState);
+ +    DynInstPtr inst = state->inst;
+ +    DPRINTF(IEW, "Writeback event [sn:%lli]\n", inst->seqNum);
+ +    DPRINTF(Activity, "Activity: Writeback event [sn:%lli]\n", inst->seqNum);
+ +
+ +    //iewStage->ldstQueue.removeMSHR(inst->threadNumber,inst->seqNum);
+ +
+ +    if (isSwitchedOut() || inst->isSquashed()) {
+ +        delete state;
+ +        delete pkt;
+ +        return;
+ +    } else {
+ +        if (!state->noWB) {
+ +            writeback(inst, pkt);
+ +        }
+ +
+ +        if (inst->isStore()) {
+ +            completeStore(state->idx);
+ +        }
+ +    }
+ +
+ +    delete state;
+ +    delete pkt;
+ +}
+ +
+ +template <class Impl>
+ +OzoneLWLSQ<Impl>::OzoneLWLSQ()
+ +    : switchedOut(false), dcachePort(this), loads(0), stores(0),
-     unsigned free_sq_entries = SQEntries - stores;
++      storesToWB(0), storesInFlight(0), stalled(false), isStoreBlocked(false),
+ +      isLoadBlocked(false), loadBlockedHandled(false)
+ +{
+ +}
+ +
+ +template<class Impl>
+ +void
+ +OzoneLWLSQ<Impl>::init(Params *params, unsigned maxLQEntries,
+ +                       unsigned maxSQEntries, unsigned id)
+ +{
+ +    DPRINTF(OzoneLSQ, "Creating OzoneLWLSQ%i object.\n",id);
+ +
+ +    lsqID = id;
+ +
+ +    LQEntries = maxLQEntries;
+ +    SQEntries = maxSQEntries;
+ +
+ +    for (int i = 0; i < LQEntries * 2; i++) {
+ +        LQIndices.push(i);
+ +        SQIndices.push(i);
+ +    }
+ +
+ +    mem = params->mem;
+ +
+ +    usedPorts = 0;
+ +    cachePorts = params->cachePorts;
+ +
+ +    loadFaultInst = storeFaultInst = memDepViolator = NULL;
+ +
+ +    blockedLoadSeqNum = 0;
+ +}
+ +
+ +template<class Impl>
+ +std::string
+ +OzoneLWLSQ<Impl>::name() const
+ +{
+ +    return "lsqunit";
+ +}
+ +
+ +template<class Impl>
+ +void
++OzoneLWLSQ<Impl>::regStats()
++{
++    lsqMemOrderViolation
++        .name(name() + ".memOrderViolation")
++        .desc("Number of memory ordering violations");
+ +OzoneLWLSQ<Impl>::setCPU(OzoneCPU *cpu_ptr)
+ +{
+ +    cpu = cpu_ptr;
+ +    dcachePort.setName(this->name() + "-dport");
+ +
+ +#if USE_CHECKER
+ +    if (cpu->checker) {
+ +        cpu->checker->setDcachePort(&dcachePort);
+ +    }
+ +#endif
+ +}
+ +
+ +template<class Impl>
+ +void
+ +OzoneLWLSQ<Impl>::clearLQ()
+ +{
+ +    loadQueue.clear();
+ +}
+ +
+ +template<class Impl>
+ +void
+ +OzoneLWLSQ<Impl>::clearSQ()
+ +{
+ +    storeQueue.clear();
+ +}
+ +/*
+ +template<class Impl>
+ +void
+ +OzoneLWLSQ<Impl>::setPageTable(PageTable *pt_ptr)
+ +{
+ +    DPRINTF(OzoneLSQ, "Setting the page table pointer.\n");
+ +    pTable = pt_ptr;
+ +}
+ +*/
+ +template<class Impl>
+ +void
+ +OzoneLWLSQ<Impl>::resizeLQ(unsigned size)
+ +{
+ +    assert( size >= LQEntries);
+ +
+ +    if (size > LQEntries) {
+ +        while (size > loadQueue.size()) {
+ +            DynInstPtr dummy;
+ +            loadQueue.push_back(dummy);
+ +            LQEntries++;
+ +        }
+ +    } else {
+ +        LQEntries = size;
+ +    }
+ +
+ +}
+ +
+ +template<class Impl>
+ +void
+ +OzoneLWLSQ<Impl>::resizeSQ(unsigned size)
+ +{
+ +    if (size > SQEntries) {
+ +        while (size > storeQueue.size()) {
+ +            SQEntry dummy;
+ +            storeQueue.push_back(dummy);
+ +            SQEntries++;
+ +        }
+ +    } else {
+ +        SQEntries = size;
+ +    }
+ +}
+ +
+ +template <class Impl>
+ +void
+ +OzoneLWLSQ<Impl>::insert(DynInstPtr &inst)
+ +{
+ +    // Make sure we really have a memory reference.
+ +    assert(inst->isMemRef());
+ +
+ +    // Make sure it's one of the two classes of memory references.
+ +    assert(inst->isLoad() || inst->isStore());
+ +
+ +    if (inst->isLoad()) {
+ +        insertLoad(inst);
+ +    } else {
+ +        insertStore(inst);
+ +    }
+ +}
+ +
+ +template <class Impl>
+ +void
+ +OzoneLWLSQ<Impl>::insertLoad(DynInstPtr &load_inst)
+ +{
+ +    assert(loads < LQEntries * 2);
+ +    assert(!LQIndices.empty());
+ +    int load_index = LQIndices.front();
+ +    LQIndices.pop();
+ +
+ +    DPRINTF(OzoneLSQ, "Inserting load PC %#x, idx:%i [sn:%lli]\n",
+ +            load_inst->readPC(), load_index, load_inst->seqNum);
+ +
+ +    load_inst->lqIdx = load_index;
+ +
+ +    loadQueue.push_front(load_inst);
+ +    LQItHash[load_index] = loadQueue.begin();
+ +
+ +    ++loads;
+ +}
+ +
+ +template <class Impl>
+ +void
+ +OzoneLWLSQ<Impl>::insertStore(DynInstPtr &store_inst)
+ +{
+ +    // Make sure it is not full before inserting an instruction.
+ +    assert(stores - storesToWB < SQEntries);
+ +
+ +    assert(!SQIndices.empty());
+ +    int store_index = SQIndices.front();
+ +    SQIndices.pop();
+ +
+ +    DPRINTF(OzoneLSQ, "Inserting store PC %#x, idx:%i [sn:%lli]\n",
+ +            store_inst->readPC(), store_index, store_inst->seqNum);
+ +
+ +    store_inst->sqIdx = store_index;
+ +    SQEntry entry(store_inst);
+ +    if (loadQueue.empty()) {
+ +        entry.lqIt = loadQueue.end();
+ +    } else {
+ +        entry.lqIt = loadQueue.begin();
+ +    }
+ +    storeQueue.push_front(entry);
+ +
+ +    SQItHash[store_index] = storeQueue.begin();
+ +
+ +    ++stores;
+ +}
+ +
+ +template <class Impl>
+ +typename Impl::DynInstPtr
+ +OzoneLWLSQ<Impl>::getMemDepViolator()
+ +{
+ +    DynInstPtr temp = memDepViolator;
+ +
+ +    memDepViolator = NULL;
+ +
+ +    return temp;
+ +}
+ +
+ +template <class Impl>
+ +unsigned
+ +OzoneLWLSQ<Impl>::numFreeEntries()
+ +{
+ +    unsigned free_lq_entries = LQEntries - loads;
-             completeStore(inst->sqIdx);
- 
++    unsigned free_sq_entries = SQEntries - (stores + storesInFlight);
+ +
+ +    // Both the LQ and SQ entries have an extra dummy entry to differentiate
+ +    // empty/full conditions.  Subtract 1 from the free entries.
+ +    if (free_lq_entries < free_sq_entries) {
+ +        return free_lq_entries - 1;
+ +    } else {
+ +        return free_sq_entries - 1;
+ +    }
+ +}
+ +
+ +template <class Impl>
+ +int
+ +OzoneLWLSQ<Impl>::numLoadsReady()
+ +{
+ +    int retval = 0;
+ +    LQIt lq_it = loadQueue.begin();
+ +    LQIt end_it = loadQueue.end();
+ +
+ +    while (lq_it != end_it) {
+ +        if ((*lq_it)->readyToIssue()) {
+ +            ++retval;
+ +        }
+ +    }
+ +
+ +    return retval;
+ +}
+ +
+ +template <class Impl>
+ +Fault
+ +OzoneLWLSQ<Impl>::executeLoad(DynInstPtr &inst)
+ +{
+ +    // Execute a specific load.
+ +    Fault load_fault = NoFault;
+ +
+ +    DPRINTF(OzoneLSQ, "Executing load PC %#x, [sn:%lli]\n",
+ +            inst->readPC(),inst->seqNum);
+ +
+ +    // Make sure it's really in the list.
+ +    // Normally it should always be in the list.  However,
+ +    /* due to a syscall it may not be the list.
+ +#ifdef DEBUG
+ +    int i = loadHead;
+ +    while (1) {
+ +        if (i == loadTail && !find(inst)) {
+ +            assert(0 && "Load not in the queue!");
+ +        } else if (loadQueue[i] == inst) {
+ +            break;
+ +        }
+ +
+ +        i = i + 1;
+ +        if (i >= LQEntries) {
+ +            i = 0;
+ +        }
+ +    }
+ +#endif // DEBUG*/
+ +
+ +    load_fault = inst->initiateAcc();
+ +
+ +    // Might want to make sure that I'm not overwriting a previously faulting
+ +    // instruction that hasn't been checked yet.
+ +    // Actually probably want the oldest faulting load
+ +    if (load_fault != NoFault) {
+ +        DPRINTF(OzoneLSQ, "Load [sn:%lli] has a fault\n", inst->seqNum);
++        if (!(inst->req->flags & UNCACHEABLE && !inst->isAtCommit())) {
++            inst->setExecuted();
++        }
+ +        // Maybe just set it as can commit here, although that might cause
+ +        // some other problems with sending traps to the ROB too quickly.
+ +        be->instToCommit(inst);
+ +//        iewStage->activityThisCycle();
+ +    }
+ +
+ +    return load_fault;
+ +}
+ +
+ +template <class Impl>
+ +Fault
+ +OzoneLWLSQ<Impl>::executeStore(DynInstPtr &store_inst)
+ +{
+ +    // Make sure that a store exists.
+ +    assert(stores != 0);
+ +
+ +    int store_idx = store_inst->sqIdx;
+ +    SQHashIt sq_hash_it = SQItHash.find(store_idx);
+ +    assert(sq_hash_it != SQItHash.end());
+ +    DPRINTF(OzoneLSQ, "Executing store PC %#x [sn:%lli]\n",
+ +            store_inst->readPC(), store_inst->seqNum);
+ +
+ +    SQIt sq_it = (*sq_hash_it).second;
+ +
+ +    Fault store_fault = store_inst->initiateAcc();
+ +
+ +    // Store size should now be available.  Use it to get proper offset for
+ +    // addr comparisons.
+ +    int size = (*sq_it).size;
+ +
+ +    if (size == 0) {
+ +        DPRINTF(OzoneLSQ,"Fault on Store PC %#x, [sn:%lli],Size = 0\n",
+ +                store_inst->readPC(),store_inst->seqNum);
+ +
+ +        return store_fault;
+ +    }
+ +
+ +    assert(store_fault == NoFault);
+ +
+ +    if (!storeFaultInst) {
+ +        if (store_fault != NoFault) {
+ +            panic("Fault in a store instruction!");
+ +            storeFaultInst = store_inst;
+ +        } else if (store_inst->isStoreConditional()) {
+ +            // Store conditionals need to set themselves as able to
+ +            // writeback if we haven't had a fault by here.
+ +            (*sq_it).canWB = true;
+ +
+ +            ++storesToWB;
+ +            DPRINTF(OzoneLSQ, "Nonspeculative store! storesToWB:%i\n",
+ +                    storesToWB);
+ +        }
+ +    }
+ +
+ +    LQIt lq_it = --(loadQueue.end());
+ +
+ +    if (!memDepViolator) {
+ +        while (lq_it != loadQueue.end()) {
+ +            if ((*lq_it)->seqNum < store_inst->seqNum) {
+ +                lq_it--;
+ +                continue;
+ +            }
+ +            // Actually should only check loads that have actually executed
+ +            // Might be safe because effAddr is set to InvalAddr when the
+ +            // dyn inst is created.
+ +
+ +            // Must actually check all addrs in the proper size range
+ +            // Which is more correct than needs to be.  What if for now we just
+ +            // assume all loads are quad-word loads, and do the addr based
+ +            // on that.
+ +            // @todo: Fix this, magic number being used here
+ +            if (((*lq_it)->effAddr >> 8) ==
+ +                (store_inst->effAddr >> 8)) {
+ +                // A load incorrectly passed this store.  Squash and refetch.
+ +                // For now return a fault to show that it was unsuccessful.
+ +                memDepViolator = (*lq_it);
++                ++lsqMemOrderViolation;
+ +
+ +                return TheISA::genMachineCheckFault();
+ +            }
+ +
+ +            lq_it--;
+ +        }
+ +
+ +        // If we've reached this point, there was no violation.
+ +        memDepViolator = NULL;
+ +    }
+ +
+ +    return store_fault;
+ +}
+ +
+ +template <class Impl>
+ +void
+ +OzoneLWLSQ<Impl>::commitLoad()
+ +{
+ +    assert(!loadQueue.empty());
+ +
+ +    DPRINTF(OzoneLSQ, "[sn:%lli] Committing head load instruction, PC %#x\n",
+ +            loadQueue.back()->seqNum, loadQueue.back()->readPC());
+ +
+ +    LQIndices.push(loadQueue.back()->lqIdx);
+ +    LQItHash.erase(loadQueue.back()->lqIdx);
+ +
+ +    loadQueue.pop_back();
+ +
+ +    --loads;
+ +}
+ +
+ +template <class Impl>
+ +void
+ +OzoneLWLSQ<Impl>::commitLoads(InstSeqNum &youngest_inst)
+ +{
+ +    assert(loads == 0 || !loadQueue.empty());
+ +
+ +    while (loads != 0 &&
+ +           loadQueue.back()->seqNum <= youngest_inst) {
+ +        commitLoad();
+ +    }
+ +}
+ +
+ +template <class Impl>
+ +void
+ +OzoneLWLSQ<Impl>::commitStores(InstSeqNum &youngest_inst)
+ +{
+ +    assert(stores == 0 || !storeQueue.empty());
+ +
+ +    SQIt sq_it = --(storeQueue.end());
+ +    while (!storeQueue.empty() && sq_it != storeQueue.end()) {
+ +        assert((*sq_it).inst);
+ +        if (!(*sq_it).canWB) {
+ +            if ((*sq_it).inst->seqNum > youngest_inst) {
+ +                break;
+ +            }
+ +            ++storesToWB;
+ +
+ +            DPRINTF(OzoneLSQ, "Marking store as able to write back, PC "
+ +                    "%#x [sn:%lli], storesToWB:%i\n",
+ +                    (*sq_it).inst->readPC(),
+ +                    (*sq_it).inst->seqNum,
+ +                    storesToWB);
+ +
+ +            (*sq_it).canWB = true;
+ +        }
+ +
+ +        sq_it--;
+ +    }
+ +}
+ +
+ +template <class Impl>
+ +void
+ +OzoneLWLSQ<Impl>::writebackStores()
+ +{
+ +    SQIt sq_it = --(storeQueue.end());
+ +    while (storesToWB > 0 &&
+ +           sq_it != storeQueue.end() &&
+ +           (*sq_it).inst &&
+ +           (*sq_it).canWB &&
+ +           usedPorts < cachePorts) {
+ +
+ +        if (isStoreBlocked) {
+ +            DPRINTF(OzoneLSQ, "Unable to write back any more stores, cache"
+ +                    " is blocked!\n");
+ +            break;
+ +        }
+ +
+ +        DynInstPtr inst = (*sq_it).inst;
+ +
+ +        if ((*sq_it).size == 0 && !(*sq_it).completed) {
+ +            sq_it--;
-             "(Loads:%i Stores:%i)\n",squashed_num,loads,stores);
++            removeStore(inst->sqIdx);
++            completeStore(inst);
+ +            continue;
+ +        }
+ +
+ +        if (inst->isDataPrefetch() || (*sq_it).committed) {
+ +            sq_it--;
+ +            continue;
+ +        }
+ +
+ +        ++usedPorts;
+ +
+ +        assert((*sq_it).req);
+ +        assert(!(*sq_it).committed);
+ +
+ +        Request *req = (*sq_it).req;
+ +        (*sq_it).committed = true;
+ +
+ +        assert(!inst->memData);
+ +        inst->memData = new uint8_t[64];
+ +        memcpy(inst->memData, (uint8_t *)&(*sq_it).data,
+ +               req->getSize());
+ +
+ +        PacketPtr data_pkt = new Packet(req, Packet::WriteReq, Packet::Broadcast);
+ +        data_pkt->dataStatic(inst->memData);
+ +
+ +        LSQSenderState *state = new LSQSenderState;
+ +        state->isLoad = false;
+ +        state->idx = inst->sqIdx;
+ +        state->inst = inst;
+ +        data_pkt->senderState = state;
+ +
+ +        DPRINTF(OzoneLSQ, "D-Cache: Writing back store PC:%#x "
+ +                "to Addr:%#x, data:%#x [sn:%lli]\n",
+ +                (*sq_it).inst->readPC(),
+ +                req->getPaddr(), *(inst->memData),
+ +                inst->seqNum);
+ +
+ +        // @todo: Remove this SC hack once the memory system handles it.
+ +        if (req->getFlags() & LOCKED) {
+ +            if (req->getFlags() & UNCACHEABLE) {
+ +                req->setScResult(2);
+ +            } else {
+ +                if (cpu->lockFlag) {
+ +                    req->setScResult(1);
+ +                } else {
+ +                    req->setScResult(0);
+ +                    // Hack: Instantly complete this store.
+ +                    completeDataAccess(data_pkt);
+ +                    --sq_it;
+ +                    continue;
+ +                }
+ +            }
+ +        } else {
+ +            // Non-store conditionals do not need a writeback.
+ +            state->noWB = true;
+ +        }
+ +
+ +        if (!dcachePort.sendTiming(data_pkt)) {
+ +            // Need to handle becoming blocked on a store.
+ +            isStoreBlocked = true;
+ +            assert(retryPkt == NULL);
+ +            retryPkt = data_pkt;
+ +        } else {
+ +            storePostSend(data_pkt, inst);
+ +            --sq_it;
+ +        }
+ +/*
+ +        DPRINTF(OzoneLSQ, "D-Cache: Writing back store idx:%i PC:%#x "
+ +                "to Addr:%#x, data:%#x [sn:%lli]\n",
+ +                inst->sqIdx,inst->readPC(),
+ +                req->paddr, *(req->data),
+ +                inst->seqNum);
++        DPRINTF(OzoneLSQ, "StoresInFlight: %i\n",
++                storesInFlight + 1);
+ +
+ +        if (dcacheInterface) {
+ +            assert(!req->completionEvent);
+ +            StoreCompletionEvent *store_event = new
+ +                StoreCompletionEvent(inst, be, NULL, this);
+ +            req->completionEvent = store_event;
+ +
+ +            MemAccessResult result = dcacheInterface->access(req);
+ +
+ +            if (isStalled() &&
+ +                inst->seqNum == stallingStoreIsn) {
+ +                DPRINTF(OzoneLSQ, "Unstalling, stalling store [sn:%lli] "
+ +                        "load [sn:%lli]\n",
+ +                        stallingStoreIsn, (*stallingLoad)->seqNum);
+ +                stalled = false;
+ +                stallingStoreIsn = 0;
+ +                be->replayMemInst((*stallingLoad));
+ +            }
+ +
+ +            if (result != MA_HIT && dcacheInterface->doEvents()) {
+ +                store_event->miss = true;
+ +                typename BackEnd::LdWritebackEvent *wb = NULL;
+ +                if (req->flags & LOCKED) {
+ +                    wb = new typename BackEnd::LdWritebackEvent(inst,
+ +                                                            be);
+ +                    store_event->wbEvent = wb;
+ +                }
+ +
+ +                DPRINTF(OzoneLSQ,"D-Cache Write Miss!\n");
+ +
+ +//                DPRINTF(Activity, "Active st accessing mem miss [sn:%lli]\n",
+ +//                        inst->seqNum);
+ +
+ +                be->addDcacheMiss(inst);
+ +
+ +                lastDcacheStall = curTick;
+ +
+ +                _status = DcacheMissStall;
+ +
+ +                // Increment stat here or something
+ +
+ +                sq_it--;
+ +            } else {
+ +                DPRINTF(OzoneLSQ,"D-Cache: Write Hit on idx:%i !\n",
+ +                        inst->sqIdx);
+ +
+ +//                DPRINTF(Activity, "Active st accessing mem hit [sn:%lli]\n",
+ +//                        inst->seqNum);
+ +
+ +                if (req->flags & LOCKED) {
+ +                    // Stx_C does not generate a system port
+ +                    // transaction in the 21264, but that might be
+ +                    // hard to accomplish in this model.
+ +
+ +                    typename BackEnd::LdWritebackEvent *wb =
+ +                        new typename BackEnd::LdWritebackEvent(inst,
+ +                                                               be);
+ +                    store_event->wbEvent = wb;
+ +                }
+ +                sq_it--;
+ +            }
++            ++storesInFlight;
++//            removeStore(inst->sqIdx);
+ +        } else {
+ +            panic("Must HAVE DCACHE!!!!!\n");
+ +        }
+ +*/
+ +    }
+ +
+ +    // Not sure this should set it to 0.
+ +    usedPorts = 0;
+ +
+ +    assert(stores >= 0 && storesToWB >= 0);
+ +}
+ +
+ +template <class Impl>
+ +void
+ +OzoneLWLSQ<Impl>::squash(const InstSeqNum &squashed_num)
+ +{
+ +    DPRINTF(OzoneLSQ, "Squashing until [sn:%lli]!"
- OzoneLWLSQ<Impl>::completeStore(int store_idx)
++            "(Loads:%i Stores:%i)\n",squashed_num,loads,stores+storesInFlight);
+ +
+ +
+ +    LQIt lq_it = loadQueue.begin();
+ +
+ +    while (loads != 0 && (*lq_it)->seqNum > squashed_num) {
+ +        assert(!loadQueue.empty());
+ +        // Clear the smart pointer to make sure it is decremented.
+ +        DPRINTF(OzoneLSQ,"Load Instruction PC %#x squashed, "
+ +                "[sn:%lli]\n",
+ +                (*lq_it)->readPC(),
+ +                (*lq_it)->seqNum);
+ +
+ +        if (isStalled() && lq_it == stallingLoad) {
+ +            stalled = false;
+ +            stallingStoreIsn = 0;
+ +            stallingLoad = NULL;
+ +        }
+ +
+ +        --loads;
+ +
+ +        // Inefficient!
+ +        LQHashIt lq_hash_it = LQItHash.find((*lq_it)->lqIdx);
+ +        assert(lq_hash_it != LQItHash.end());
+ +        LQItHash.erase(lq_hash_it);
+ +        LQIndices.push((*lq_it)->lqIdx);
+ +        loadQueue.erase(lq_it++);
+ +    }
+ +
+ +    if (isLoadBlocked) {
+ +        if (squashed_num < blockedLoadSeqNum) {
+ +            isLoadBlocked = false;
+ +            loadBlockedHandled = false;
+ +            blockedLoadSeqNum = 0;
+ +        }
+ +    }
+ +
+ +    SQIt sq_it = storeQueue.begin();
+ +
+ +    while (stores != 0 && (*sq_it).inst->seqNum > squashed_num) {
+ +        assert(!storeQueue.empty());
+ +
+ +        if ((*sq_it).canWB) {
+ +            break;
+ +        }
+ +
+ +        // Clear the smart pointer to make sure it is decremented.
+ +        DPRINTF(OzoneLSQ,"Store Instruction PC %#x idx:%i squashed [sn:%lli]\n",
+ +                (*sq_it).inst->readPC(), (*sq_it).inst->sqIdx,
+ +                (*sq_it).inst->seqNum);
+ +
+ +        // I don't think this can happen.  It should have been cleared by the
+ +        // stalling load.
+ +        if (isStalled() &&
+ +            (*sq_it).inst->seqNum == stallingStoreIsn) {
+ +            panic("Is stalled should have been cleared by stalling load!\n");
+ +            stalled = false;
+ +            stallingStoreIsn = 0;
+ +        }
+ +
+ +        SQHashIt sq_hash_it = SQItHash.find((*sq_it).inst->sqIdx);
+ +        assert(sq_hash_it != SQItHash.end());
+ +        SQItHash.erase(sq_hash_it);
+ +        SQIndices.push((*sq_it).inst->sqIdx);
+ +        (*sq_it).inst = NULL;
+ +        (*sq_it).canWB = 0;
+ +        (*sq_it).req = NULL;
+ +        --stores;
+ +        storeQueue.erase(sq_it++);
+ +    }
+ +}
+ +
+ +template <class Impl>
+ +void
+ +OzoneLWLSQ<Impl>::dumpInsts()
+ +{
+ +    cprintf("Load store queue: Dumping instructions.\n");
+ +    cprintf("Load queue size: %i\n", loads);
+ +    cprintf("Load queue: ");
+ +
+ +    LQIt lq_it = --(loadQueue.end());
+ +
+ +    while (lq_it != loadQueue.end() && (*lq_it)) {
+ +        cprintf("[sn:%lli] %#x ", (*lq_it)->seqNum,
+ +                (*lq_it)->readPC());
+ +
+ +        lq_it--;
+ +    }
+ +
+ +    cprintf("\nStore queue size: %i\n", stores);
+ +    cprintf("Store queue: ");
+ +
+ +    SQIt sq_it = --(storeQueue.end());
+ +
+ +    while (sq_it != storeQueue.end() && (*sq_it).inst) {
+ +        cprintf("[sn:%lli]\nPC:%#x\nSize:%i\nCommitted:%i\nCompleted:%i\ncanWB:%i\n",
+ +                (*sq_it).inst->seqNum,
+ +                (*sq_it).inst->readPC(),
+ +                (*sq_it).size,
+ +                (*sq_it).committed,
+ +                (*sq_it).completed,
+ +                (*sq_it).canWB);
+ +
+ +        sq_it--;
+ +    }
+ +
+ +    cprintf("\n");
+ +}
+ +
+ +template <class Impl>
+ +void
+ +OzoneLWLSQ<Impl>::storePostSend(Packet *pkt, DynInstPtr &inst)
+ +{
+ +    if (isStalled() &&
+ +        inst->seqNum == stallingStoreIsn) {
+ +        DPRINTF(OzoneLSQ, "Unstalling, stalling store [sn:%lli] "
+ +                "load [sn:%lli]\n",
+ +                stallingStoreIsn, (*stallingLoad)->seqNum);
+ +        stalled = false;
+ +        stallingStoreIsn = 0;
+ +        be->replayMemInst((*stallingLoad));
+ +    }
+ +
+ +    if (!inst->isStoreConditional()) {
+ +        // The store is basically completed at this time. This
+ +        // only works so long as the checker doesn't try to
+ +        // verify the value in memory for stores.
+ +        inst->setCompleted();
+ +#if USE_CHECKER
+ +        if (cpu->checker) {
+ +            cpu->checker->verify(inst);
+ +        }
+ +#endif
+ +    }
+ +
+ +    if (pkt->result != Packet::Success) {
+ +        DPRINTF(OzoneLSQ,"D-Cache Write Miss!\n");
+ +
+ +        DPRINTF(Activity, "Active st accessing mem miss [sn:%lli]\n",
+ +                inst->seqNum);
+ +
+ +        //mshrSeqNums.push_back(storeQueue[storeWBIdx].inst->seqNum);
+ +
+ +        //DPRINTF(OzoneLWLSQ, "Added MSHR. count = %i\n",mshrSeqNums.size());
+ +
+ +        // @todo: Increment stat here.
+ +    } else {
+ +        DPRINTF(OzoneLSQ,"D-Cache: Write Hit!\n");
+ +
+ +        DPRINTF(Activity, "Active st accessing mem hit [sn:%lli]\n",
+ +                inst->seqNum);
+ +    }
+ +}
+ +
+ +template <class Impl>
+ +void
+ +OzoneLWLSQ<Impl>::writeback(DynInstPtr &inst, PacketPtr pkt)
+ +{
+ +    // Squashed instructions do not need to complete their access.
+ +    if (inst->isSquashed()) {
+ +        assert(!inst->isStore());
+ +        return;
+ +    }
+ +
+ +    if (!inst->isExecuted()) {
+ +        inst->setExecuted();
+ +
+ +        // Complete access to copy data to proper place.
+ +        inst->completeAcc(pkt);
+ +    }
+ +
+ +    // Need to insert instruction into queue to commit
+ +    be->instToCommit(inst);
+ +}
+ +
+ +template <class Impl>
+ +void
-     --storesToWB;
- 
++OzoneLWLSQ<Impl>::removeStore(int store_idx)
+ +{
+ +    SQHashIt sq_hash_it = SQItHash.find(store_idx);
+ +    assert(sq_hash_it != SQItHash.end());
+ +    SQIt sq_it = (*sq_hash_it).second;
+ +
+ +    assert((*sq_it).inst);
+ +    (*sq_it).completed = true;
+ +    DynInstPtr inst = (*sq_it).inst;
+ +
-     loads = stores = storesToWB = 0;
+ +    if (isStalled() &&
+ +        inst->seqNum == stallingStoreIsn) {
+ +        DPRINTF(OzoneLSQ, "Unstalling, stalling store [sn:%lli] "
+ +                "load [sn:%lli]\n",
+ +                stallingStoreIsn, (*stallingLoad)->seqNum);
+ +        stalled = false;
+ +        stallingStoreIsn = 0;
+ +        be->replayMemInst((*stallingLoad));
+ +    }
+ +
+ +    DPRINTF(OzoneLSQ, "Completing store idx:%i [sn:%lli], storesToWB:%i\n",
+ +            inst->sqIdx, inst->seqNum, storesToWB);
+ +
+ +    assert(!storeQueue.empty());
+ +    SQItHash.erase(sq_hash_it);
+ +    SQIndices.push(inst->sqIdx);
+ +    storeQueue.erase(sq_it);
++}
++
++template <class Impl>
++void
++OzoneLWLSQ<Impl>::completeStore(DynInstPtr &inst)
++{
++    --storesToWB;
+ +    --stores;
+ +
+ +    inst->setCompleted();
+ +#if USE_CHECKER
+ +    if (cpu->checker) {
+ +        cpu->checker->verify(inst);
+ +    }
+ +#endif
+ +}
+ +
+ +template <class Impl>
+ +void
+ +OzoneLWLSQ<Impl>::recvRetry()
+ +{
+ +    panic("Unimplemented!");
+ +}
+ +
+ +template <class Impl>
+ +void
+ +OzoneLWLSQ<Impl>::switchOut()
+ +{
+ +    assert(storesToWB == 0);
+ +    switchedOut = true;
+ +
+ +    // Clear the queue to free up resources
++    assert(stores == 0);
++    assert(storeQueue.empty());
++    assert(loads == 0);
++    assert(loadQueue.empty());
++    assert(storesInFlight == 0);
+ +    storeQueue.clear();
+ +    loadQueue.clear();
++    loads = stores = storesToWB = storesInFlight = 0;
+ +}
+ +
+ +template <class Impl>
+ +void
+ +OzoneLWLSQ<Impl>::takeOverFrom(ThreadContext *old_tc)
+ +{
+ +    // Clear out any old state. May be redundant if this is the first time
+ +    // the CPU is being used.
+ +    stalled = false;
+ +    isLoadBlocked = false;
+ +    loadBlockedHandled = false;
+ +    switchedOut = false;
+ +
+ +    // Could do simple checks here to see if indices are on twice
+ +    while (!LQIndices.empty())
+ +        LQIndices.pop();
+ +    while (!SQIndices.empty())
+ +        SQIndices.pop();
+ +
+ +    for (int i = 0; i < LQEntries * 2; i++) {
+ +        LQIndices.push(i);
+ +        SQIndices.push(i);
+ +    }
+ +
+ +    usedPorts = 0;
+ +
+ +    loadFaultInst = storeFaultInst = memDepViolator = NULL;
+ +
+ +    blockedLoadSeqNum = 0;
+ +}
diff --cc src/cpu/ozone/simple_params.hh

index 11cee716f18598bae23b2c268d84b73ca9f05036,0000000000000000000000000000000000000000..3f63d2e1dc4014427abe8ca1bbd9ad2e91712dc5

mode 100644,000000..100644
--- 1/src/cpu/ozone/simple_params.hh
--- /dev/null
+++ b/src/cpu/ozone/simple_params.hh
@@@ -1,192 -1,0 +1,194 @@@
-     unsigned backEndLatency;
+ +/*
+ + * Copyright (c) 2006 The Regents of The University of Michigan
+ + * All rights reserved.
+ + *
+ + * Redistribution and use in source and binary forms, with or without
+ + * modification, are permitted provided that the following conditions are
+ + * met: redistributions of source code must retain the above copyright
+ + * notice, this list of conditions and the following disclaimer;
+ + * redistributions in binary form must reproduce the above copyright
+ + * notice, this list of conditions and the following disclaimer in the
+ + * documentation and/or other materials provided with the distribution;
+ + * neither the name of the copyright holders nor the names of its
+ + * contributors may be used to endorse or promote products derived from
+ + * this software without specific prior written permission.
+ + *
+ + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ + *
+ + * Authors: Kevin Lim
+ + */
+ +
+ +#ifndef __CPU_OZONE_SIMPLE_PARAMS_HH__
+ +#define __CPU_OZONE_SIMPLE_PARAMS_HH__
+ +
+ +#include "cpu/ozone/cpu.hh"
+ +
+ +//Forward declarations
+ +class AlphaDTB;
+ +class AlphaITB;
+ +class FUPool;
+ +class MemObject;
+ +class PageTable;
+ +class Process;
+ +class System;
+ +
+ +/**
+ + * This file defines the parameters that will be used for the OzoneCPU.
+ + * This must be defined externally so that the Impl can have a params class
+ + * defined that it can pass to all of the individual stages.
+ + */
+ +
+ +class SimpleParams : public BaseCPU::Params
+ +{
+ +  public:
+ +
+ +#if FULL_SYSTEM
+ +    AlphaITB *itb; AlphaDTB *dtb;
+ +#else
+ +    std::vector<Process *> workload;
+ +#endif // FULL_SYSTEM
+ +
+ +    //Page Table
+ +    PageTable *pTable;
+ +
+ +    MemObject *mem;
+ +
+ +    //
+ +    // Caches
+ +    //
+ +//    MemInterface *icacheInterface;
+ +//    MemInterface *dcacheInterface;
+ +
+ +    unsigned cachePorts;
+ +    unsigned width;
++    unsigned frontEndLatency;
+ +    unsigned frontEndWidth;
++    unsigned backEndLatency;
+ +    unsigned backEndWidth;
+ +    unsigned backEndSquashLatency;
+ +    unsigned maxInstBufferSize;
+ +    unsigned numPhysicalRegs;
+ +    unsigned maxOutstandingMemOps;
+ +    //
+ +    // Fetch
+ +    //
+ +    unsigned decodeToFetchDelay;
+ +    unsigned renameToFetchDelay;
+ +    unsigned iewToFetchDelay;
+ +    unsigned commitToFetchDelay;
+ +    unsigned fetchWidth;
+ +
+ +    //
+ +    // Decode
+ +    //
+ +    unsigned renameToDecodeDelay;
+ +    unsigned iewToDecodeDelay;
+ +    unsigned commitToDecodeDelay;
+ +    unsigned fetchToDecodeDelay;
+ +    unsigned decodeWidth;
+ +
+ +    //
+ +    // Rename
+ +    //
+ +    unsigned iewToRenameDelay;
+ +    unsigned commitToRenameDelay;
+ +    unsigned decodeToRenameDelay;
+ +    unsigned renameWidth;
+ +
+ +    //
+ +    // IEW
+ +    //
+ +    unsigned commitToIEWDelay;
+ +    unsigned renameToIEWDelay;
+ +    unsigned issueToExecuteDelay;
+ +    unsigned issueWidth;
+ +    unsigned executeWidth;
+ +    unsigned executeIntWidth;
+ +    unsigned executeFloatWidth;
+ +    unsigned executeBranchWidth;
+ +    unsigned executeMemoryWidth;
+ +    FUPool *fuPool;
+ +
+ +    //
+ +    // Commit
+ +    //
+ +    unsigned iewToCommitDelay;
+ +    unsigned renameToROBDelay;
+ +    unsigned commitWidth;
+ +    unsigned squashWidth;
+ +
+ +    //
+ +    // Branch predictor (BP & BTB)
+ +    //
+ +    std::string predType;
+ +    unsigned localPredictorSize;
+ +    unsigned localCtrBits;
+ +    unsigned localHistoryTableSize;
+ +    unsigned localHistoryBits;
+ +    unsigned globalPredictorSize;
+ +    unsigned globalCtrBits;
+ +    unsigned globalHistoryBits;
+ +    unsigned choicePredictorSize;
+ +    unsigned choiceCtrBits;
+ +
+ +    unsigned BTBEntries;
+ +    unsigned BTBTagSize;
+ +
+ +    unsigned RASSize;
+ +
+ +    //
+ +    // Load store queue
+ +    //
+ +    unsigned LQEntries;
+ +    unsigned SQEntries;
++    bool lsqLimits;
+ +
+ +    //
+ +    // Memory dependence
+ +    //
+ +    unsigned SSITSize;
+ +    unsigned LFSTSize;
+ +
+ +    //
+ +    // Miscellaneous
+ +    //
+ +    unsigned numPhysIntRegs;
+ +    unsigned numPhysFloatRegs;
+ +    unsigned numIQEntries;
+ +    unsigned numROBEntries;
+ +
+ +    bool decoupledFrontEnd;
+ +    int dispatchWidth;
+ +    int wbWidth;
+ +
+ +    //SMT Parameters
+ +    unsigned smtNumFetchingThreads;
+ +
+ +    std::string   smtFetchPolicy;
+ +
+ +    std::string   smtIQPolicy;
+ +    unsigned smtIQThreshold;
+ +
+ +    std::string   smtLSQPolicy;
+ +    unsigned smtLSQThreshold;
+ +
+ +    std::string   smtCommitPolicy;
+ +
+ +    std::string   smtROBPolicy;
+ +    unsigned smtROBThreshold;
+ +
+ +    // Probably can get this from somewhere.
+ +    unsigned instShiftAmt;
+ +};
+ +
+ +#endif // __CPU_OZONE_SIMPLE_PARAMS_HH__
diff --cc src/cpu/ozone/thread_state.hh

index 8234cf938dc0489b755d28315aa8add78c661867,0000000000000000000000000000000000000000..adaa8e71bac5c2cb931d7945d217c3674c92d2bc

mode 100644,000000..100644
--- 1/src/cpu/ozone/thread_state.hh
--- /dev/null
+++ b/src/cpu/ozone/thread_state.hh
@@@ -1,135 -1,0 +1,159 @@@
-           intrflag(0), inSyscall(0), trapPending(0)
+ +/*
+ + * Copyright (c) 2006 The Regents of The University of Michigan
+ + * All rights reserved.
+ + *
+ + * Redistribution and use in source and binary forms, with or without
+ + * modification, are permitted provided that the following conditions are
+ + * met: redistributions of source code must retain the above copyright
+ + * notice, this list of conditions and the following disclaimer;
+ + * redistributions in binary form must reproduce the above copyright
+ + * notice, this list of conditions and the following disclaimer in the
+ + * documentation and/or other materials provided with the distribution;
+ + * neither the name of the copyright holders nor the names of its
+ + * contributors may be used to endorse or promote products derived from
+ + * this software without specific prior written permission.
+ + *
+ + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ + *
+ + * Authors: Kevin Lim
+ + */
+ +
+ +#ifndef __CPU_OZONE_THREAD_STATE_HH__
+ +#define __CPU_OZONE_THREAD_STATE_HH__
+ +
+ +#include "arch/faults.hh"
+ +#include "arch/types.hh"
+ +#include "arch/regfile.hh"
++#include "base/callback.hh"
++#include "base/output.hh"
+ +#include "cpu/thread_context.hh"
+ +#include "cpu/thread_state.hh"
+ +#include "sim/process.hh"
++#include "sim/sim_exit.hh"
+ +
+ +class Event;
+ +//class Process;
+ +
+ +#if FULL_SYSTEM
+ +class EndQuiesceEvent;
+ +class FunctionProfile;
+ +class ProfileNode;
+ +#else
+ +class Process;
+ +class FunctionalMemory;
+ +#endif
+ +
+ +// Maybe this ozone thread state should only really have committed state?
+ +// I need to think about why I'm using this and what it's useful for.  Clearly
+ +// has benefits for SMT; basically serves same use as SimpleThread.
+ +// Makes the ExecContext proxy easier.  Gives organization/central access point
+ +// to state of a thread that can be accessed normally (i.e. not in-flight
+ +// stuff within a OoO processor).  Does this need an TC proxy within it?
+ +template <class Impl>
+ +struct OzoneThreadState : public ThreadState {
+ +    typedef typename ThreadContext::Status Status;
+ +    typedef typename Impl::CPUType CPUType;
+ +    typedef TheISA::MiscReg MiscReg;
+ +
+ +#if FULL_SYSTEM
+ +    OzoneThreadState(CPUType *_cpu, int _thread_num)
+ +        : ThreadState(-1, _thread_num),
++          cpu(_cpu), intrflag(0), inSyscall(0), trapPending(0)
+ +    {
++        if (cpu->params->profile) {
++            profile = new FunctionProfile(cpu->params->system->kernelSymtab);
++            Callback *cb =
++                new MakeCallback<OzoneThreadState,
++                &OzoneThreadState::dumpFuncProfile>(this);
++            registerExitCallback(cb);
++        }
++
++        // let's fill with a dummy node for now so we don't get a segfault
++        // on the first cycle when there's no node available.
++        static ProfileNode dummyNode;
++        profileNode = &dummyNode;
++        profilePC = 3;
+ +        miscRegFile.clear();
+ +    }
+ +#else
+ +    OzoneThreadState(CPUType *_cpu, int _thread_num, Process *_process,
+ +                     int _asid, MemObject *mem)
+ +        : ThreadState(-1, _thread_num, _process, _asid, mem),
+ +          cpu(_cpu), inSyscall(0), trapPending(0)
+ +    {
+ +        miscRegFile.clear();
+ +    }
+ +#endif
+ +
+ +    RenameTable<Impl> renameTable;
+ +
+ +    Addr PC;
+ +
+ +    Addr nextPC;
+ +
+ +    TheISA::MiscRegFile miscRegFile;
+ +
+ +    int intrflag;
+ +
+ +    typename Impl::CPUType *cpu;
+ +
+ +    bool inSyscall;
+ +
+ +    bool trapPending;
+ +
+ +    ThreadContext *tc;
+ +
+ +    ThreadContext *getTC() { return tc; }
+ +
+ +    MiscReg readMiscReg(int misc_reg)
+ +    {
+ +        return miscRegFile.readReg(misc_reg);
+ +    }
+ +
+ +    MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault)
+ +    {
+ +        return miscRegFile.readRegWithEffect(misc_reg, fault, tc);
+ +    }
+ +
+ +    Fault setMiscReg(int misc_reg, const MiscReg &val)
+ +    {
+ +        return miscRegFile.setReg(misc_reg, val);
+ +    }
+ +
+ +    Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val)
+ +    {
+ +        return miscRegFile.setRegWithEffect(misc_reg, val, tc);
+ +    }
+ +
+ +    uint64_t readPC()
+ +    { return PC; }
+ +
+ +    void setPC(uint64_t val)
+ +    { PC = val; }
+ +
+ +    uint64_t readNextPC()
+ +    { return nextPC; }
+ +
+ +    void setNextPC(uint64_t val)
+ +    { nextPC = val; }
++
++#if FULL_SYSTEM
++    void dumpFuncProfile()
++    {
++        std::ostream *os = simout.create(csprintf("profile.%s.dat", cpu->name()));
++        profile->dump(xcProxy, *os);
++    }
++#endif
+ +};
+ +
+ +#endif // __CPU_OZONE_THREAD_STATE_HH__
diff --cc src/cpu/simple/base.cc

index f801b93fa164f83e288cc6ec2cbb9c7ddbacc40b,0000000000000000000000000000000000000000..522fe79aacf6ab72194f0eb4274a6c57ce4635c1

mode 100644,000000..100644
--- 1/src/cpu/simple/base.cc
--- /dev/null
+++ b/src/cpu/simple/base.cc
@@@ -1,471 -1,0 +1,471 @@@
-     startNumInst = numInst;
+ +/*
+ + * Copyright (c) 2002-2005 The Regents of The University of Michigan
+ + * All rights reserved.
+ + *
+ + * Redistribution and use in source and binary forms, with or without
+ + * modification, are permitted provided that the following conditions are
+ + * met: redistributions of source code must retain the above copyright
+ + * notice, this list of conditions and the following disclaimer;
+ + * redistributions in binary form must reproduce the above copyright
+ + * notice, this list of conditions and the following disclaimer in the
+ + * documentation and/or other materials provided with the distribution;
+ + * neither the name of the copyright holders nor the names of its
+ + * contributors may be used to endorse or promote products derived from
+ + * this software without specific prior written permission.
+ + *
+ + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ + *
+ + * Authors: Steve Reinhardt
+ + */
+ +
+ +#include "arch/utility.hh"
+ +#include "arch/faults.hh"
+ +#include "base/cprintf.hh"
+ +#include "base/inifile.hh"
+ +#include "base/loader/symtab.hh"
+ +#include "base/misc.hh"
+ +#include "base/pollevent.hh"
+ +#include "base/range.hh"
+ +#include "base/stats/events.hh"
+ +#include "base/trace.hh"
+ +#include "cpu/base.hh"
+ +#include "cpu/exetrace.hh"
+ +#include "cpu/profile.hh"
+ +#include "cpu/simple/base.hh"
+ +#include "cpu/simple_thread.hh"
+ +#include "cpu/smt.hh"
+ +#include "cpu/static_inst.hh"
+ +#include "cpu/thread_context.hh"
+ +#include "kern/kernel_stats.hh"
+ +#include "mem/packet_impl.hh"
+ +#include "sim/builder.hh"
+ +#include "sim/byteswap.hh"
+ +#include "sim/debug.hh"
+ +#include "sim/host.hh"
+ +#include "sim/sim_events.hh"
+ +#include "sim/sim_object.hh"
+ +#include "sim/stats.hh"
+ +#include "sim/system.hh"
+ +
+ +#if FULL_SYSTEM
+ +#include "base/remote_gdb.hh"
+ +#include "arch/tlb.hh"
+ +#include "arch/stacktrace.hh"
+ +#include "arch/vtophys.hh"
+ +#else // !FULL_SYSTEM
+ +#include "mem/mem_object.hh"
+ +#endif // FULL_SYSTEM
+ +
+ +using namespace std;
+ +using namespace TheISA;
+ +
+ +BaseSimpleCPU::BaseSimpleCPU(Params *p)
+ +    : BaseCPU(p), mem(p->mem), thread(NULL)
+ +{
+ +#if FULL_SYSTEM
+ +    thread = new SimpleThread(this, 0, p->system, p->itb, p->dtb);
+ +#else
+ +    thread = new SimpleThread(this, /* thread_num */ 0, p->process,
+ +            /* asid */ 0, mem);
+ +#endif // !FULL_SYSTEM
+ +
+ +    thread->setStatus(ThreadContext::Suspended);
+ +
+ +    tc = thread->getTC();
+ +
+ +    numInst = 0;
+ +    startNumInst = 0;
+ +    numLoad = 0;
+ +    startNumLoad = 0;
+ +    lastIcacheStall = 0;
+ +    lastDcacheStall = 0;
+ +
+ +    threadContexts.push_back(tc);
+ +}
+ +
+ +BaseSimpleCPU::~BaseSimpleCPU()
+ +{
+ +}
+ +
+ +void
+ +BaseSimpleCPU::deallocateContext(int thread_num)
+ +{
+ +    // for now, these are equivalent
+ +    suspendContext(thread_num);
+ +}
+ +
+ +
+ +void
+ +BaseSimpleCPU::haltContext(int thread_num)
+ +{
+ +    // for now, these are equivalent
+ +    suspendContext(thread_num);
+ +}
+ +
+ +
+ +void
+ +BaseSimpleCPU::regStats()
+ +{
+ +    using namespace Stats;
+ +
+ +    BaseCPU::regStats();
+ +
+ +    numInsts
+ +        .name(name() + ".num_insts")
+ +        .desc("Number of instructions executed")
+ +        ;
+ +
+ +    numMemRefs
+ +        .name(name() + ".num_refs")
+ +        .desc("Number of memory references")
+ +        ;
+ +
+ +    notIdleFraction
+ +        .name(name() + ".not_idle_fraction")
+ +        .desc("Percentage of non-idle cycles")
+ +        ;
+ +
+ +    idleFraction
+ +        .name(name() + ".idle_fraction")
+ +        .desc("Percentage of idle cycles")
+ +        ;
+ +
+ +    icacheStallCycles
+ +        .name(name() + ".icache_stall_cycles")
+ +        .desc("ICache total stall cycles")
+ +        .prereq(icacheStallCycles)
+ +        ;
+ +
+ +    dcacheStallCycles
+ +        .name(name() + ".dcache_stall_cycles")
+ +        .desc("DCache total stall cycles")
+ +        .prereq(dcacheStallCycles)
+ +        ;
+ +
+ +    icacheRetryCycles
+ +        .name(name() + ".icache_retry_cycles")
+ +        .desc("ICache total retry cycles")
+ +        .prereq(icacheRetryCycles)
+ +        ;
+ +
+ +    dcacheRetryCycles
+ +        .name(name() + ".dcache_retry_cycles")
+ +        .desc("DCache total retry cycles")
+ +        .prereq(dcacheRetryCycles)
+ +        ;
+ +
+ +    idleFraction = constant(1.0) - notIdleFraction;
+ +}
+ +
+ +void
+ +BaseSimpleCPU::resetStats()
+ +{
++//    startNumInst = numInst;
+ +    // notIdleFraction = (_status != Idle);
+ +}
+ +
+ +void
+ +BaseSimpleCPU::serialize(ostream &os)
+ +{
+ +    BaseCPU::serialize(os);
+ +//    SERIALIZE_SCALAR(inst);
+ +    nameOut(os, csprintf("%s.xc.0", name()));
+ +    thread->serialize(os);
+ +}
+ +
+ +void
+ +BaseSimpleCPU::unserialize(Checkpoint *cp, const string &section)
+ +{
+ +    BaseCPU::unserialize(cp, section);
+ +//    UNSERIALIZE_SCALAR(inst);
+ +    thread->unserialize(cp, csprintf("%s.xc.0", section));
+ +}
+ +
+ +void
+ +change_thread_state(int thread_number, int activate, int priority)
+ +{
+ +}
+ +
+ +Fault
+ +BaseSimpleCPU::copySrcTranslate(Addr src)
+ +{
+ +#if 0
+ +    static bool no_warn = true;
+ +    int blk_size = (dcacheInterface) ? dcacheInterface->getBlockSize() : 64;
+ +    // Only support block sizes of 64 atm.
+ +    assert(blk_size == 64);
+ +    int offset = src & (blk_size - 1);
+ +
+ +    // Make sure block doesn't span page
+ +    if (no_warn &&
+ +        (src & PageMask) != ((src + blk_size) & PageMask) &&
+ +        (src >> 40) != 0xfffffc) {
+ +        warn("Copied block source spans pages %x.", src);
+ +        no_warn = false;
+ +    }
+ +
+ +    memReq->reset(src & ~(blk_size - 1), blk_size);
+ +
+ +    // translate to physical address
+ +    Fault fault = thread->translateDataReadReq(req);
+ +
+ +    if (fault == NoFault) {
+ +        thread->copySrcAddr = src;
+ +        thread->copySrcPhysAddr = memReq->paddr + offset;
+ +    } else {
+ +        assert(!fault->isAlignmentFault());
+ +
+ +        thread->copySrcAddr = 0;
+ +        thread->copySrcPhysAddr = 0;
+ +    }
+ +    return fault;
+ +#else
+ +    return NoFault;
+ +#endif
+ +}
+ +
+ +Fault
+ +BaseSimpleCPU::copy(Addr dest)
+ +{
+ +#if 0
+ +    static bool no_warn = true;
+ +    int blk_size = (dcacheInterface) ? dcacheInterface->getBlockSize() : 64;
+ +    // Only support block sizes of 64 atm.
+ +    assert(blk_size == 64);
+ +    uint8_t data[blk_size];
+ +    //assert(thread->copySrcAddr);
+ +    int offset = dest & (blk_size - 1);
+ +
+ +    // Make sure block doesn't span page
+ +    if (no_warn &&
+ +        (dest & PageMask) != ((dest + blk_size) & PageMask) &&
+ +        (dest >> 40) != 0xfffffc) {
+ +        no_warn = false;
+ +        warn("Copied block destination spans pages %x. ", dest);
+ +    }
+ +
+ +    memReq->reset(dest & ~(blk_size -1), blk_size);
+ +    // translate to physical address
+ +    Fault fault = thread->translateDataWriteReq(req);
+ +
+ +    if (fault == NoFault) {
+ +        Addr dest_addr = memReq->paddr + offset;
+ +        // Need to read straight from memory since we have more than 8 bytes.
+ +        memReq->paddr = thread->copySrcPhysAddr;
+ +        thread->mem->read(memReq, data);
+ +        memReq->paddr = dest_addr;
+ +        thread->mem->write(memReq, data);
+ +        if (dcacheInterface) {
+ +            memReq->cmd = Copy;
+ +            memReq->completionEvent = NULL;
+ +            memReq->paddr = thread->copySrcPhysAddr;
+ +            memReq->dest = dest_addr;
+ +            memReq->size = 64;
+ +            memReq->time = curTick;
+ +            memReq->flags &= ~INST_READ;
+ +            dcacheInterface->access(memReq);
+ +        }
+ +    }
+ +    else
+ +        assert(!fault->isAlignmentFault());
+ +
+ +    return fault;
+ +#else
+ +    panic("copy not implemented");
+ +    return NoFault;
+ +#endif
+ +}
+ +
+ +#if FULL_SYSTEM
+ +Addr
+ +BaseSimpleCPU::dbg_vtophys(Addr addr)
+ +{
+ +    return vtophys(tc, addr);
+ +}
+ +#endif // FULL_SYSTEM
+ +
+ +#if FULL_SYSTEM
+ +void
+ +BaseSimpleCPU::post_interrupt(int int_num, int index)
+ +{
+ +    BaseCPU::post_interrupt(int_num, index);
+ +
+ +    if (thread->status() == ThreadContext::Suspended) {
+ +                DPRINTF(IPI,"Suspended Processor awoke\n");
+ +        thread->activate();
+ +    }
+ +}
+ +#endif // FULL_SYSTEM
+ +
+ +void
+ +BaseSimpleCPU::checkForInterrupts()
+ +{
+ +#if FULL_SYSTEM
+ +    if (checkInterrupts && check_interrupts() && !thread->inPalMode()) {
+ +        int ipl = 0;
+ +        int summary = 0;
+ +        checkInterrupts = false;
+ +
+ +        if (thread->readMiscReg(IPR_SIRR)) {
+ +            for (int i = INTLEVEL_SOFTWARE_MIN;
+ +                 i < INTLEVEL_SOFTWARE_MAX; i++) {
+ +                if (thread->readMiscReg(IPR_SIRR) & (ULL(1) << i)) {
+ +                    // See table 4-19 of 21164 hardware reference
+ +                    ipl = (i - INTLEVEL_SOFTWARE_MIN) + 1;
+ +                    summary |= (ULL(1) << i);
+ +                }
+ +            }
+ +        }
+ +
+ +        uint64_t interrupts = thread->cpu->intr_status();
+ +        for (int i = INTLEVEL_EXTERNAL_MIN;
+ +            i < INTLEVEL_EXTERNAL_MAX; i++) {
+ +            if (interrupts & (ULL(1) << i)) {
+ +                // See table 4-19 of 21164 hardware reference
+ +                ipl = i;
+ +                summary |= (ULL(1) << i);
+ +            }
+ +        }
+ +
+ +        if (thread->readMiscReg(IPR_ASTRR))
+ +            panic("asynchronous traps not implemented\n");
+ +
+ +        if (ipl && ipl > thread->readMiscReg(IPR_IPLR)) {
+ +            thread->setMiscReg(IPR_ISR, summary);
+ +            thread->setMiscReg(IPR_INTID, ipl);
+ +
+ +            Fault(new InterruptFault)->invoke(tc);
+ +
+ +            DPRINTF(Flow, "Interrupt! IPLR=%d ipl=%d summary=%x\n",
+ +                    thread->readMiscReg(IPR_IPLR), ipl, summary);
+ +        }
+ +    }
+ +#endif
+ +}
+ +
+ +
+ +Fault
+ +BaseSimpleCPU::setupFetchRequest(Request *req)
+ +{
+ +    // set up memory request for instruction fetch
+ +#if ISA_HAS_DELAY_SLOT
+ +    DPRINTF(Fetch,"Fetch: PC:%08p NPC:%08p NNPC:%08p\n",thread->readPC(),
+ +            thread->readNextPC(),thread->readNextNPC());
+ +#else
+ +    DPRINTF(Fetch,"Fetch: PC:%08p NPC:%08p",thread->readPC(),
+ +            thread->readNextPC());
+ +#endif
+ +
+ +    req->setVirt(0, thread->readPC() & ~3, sizeof(MachInst),
+ +                 (FULL_SYSTEM && (thread->readPC() & 1)) ? PHYSICAL : 0,
+ +                 thread->readPC());
+ +
+ +    Fault fault = thread->translateInstReq(req);
+ +
+ +    return fault;
+ +}
+ +
+ +
+ +void
+ +BaseSimpleCPU::preExecute()
+ +{
+ +    // maintain $r0 semantics
+ +    thread->setIntReg(ZeroReg, 0);
+ +#if THE_ISA == ALPHA_ISA
+ +    thread->setFloatReg(ZeroReg, 0.0);
+ +#endif // ALPHA_ISA
+ +
+ +    // keep an instruction count
+ +    numInst++;
+ +    numInsts++;
+ +
+ +    thread->funcExeInst++;
+ +
+ +    // check for instruction-count-based events
+ +    comInstEventQueue[0]->serviceEvents(numInst);
+ +
+ +    // decode the instruction
+ +    inst = gtoh(inst);
+ +    curStaticInst = StaticInst::decode(makeExtMI(inst, thread->readPC()));
+ +
+ +    traceData = Trace::getInstRecord(curTick, tc, curStaticInst,
+ +                                     thread->readPC());
+ +
+ +    DPRINTF(Decode,"Decode: Decoded %s instruction (opcode: 0x%x): 0x%x\n",
+ +            curStaticInst->getName(), curStaticInst->getOpcode(),
+ +            curStaticInst->machInst);
+ +
+ +#if FULL_SYSTEM
+ +    thread->setInst(inst);
+ +#endif // FULL_SYSTEM
+ +}
+ +
+ +void
+ +BaseSimpleCPU::postExecute()
+ +{
+ +#if FULL_SYSTEM
+ +    if (thread->profile) {
+ +        bool usermode =
+ +            (thread->readMiscReg(AlphaISA::IPR_DTB_CM) & 0x18) != 0;
+ +        thread->profilePC = usermode ? 1 : thread->readPC();
+ +        ProfileNode *node = thread->profile->consume(tc, inst);
+ +        if (node)
+ +            thread->profileNode = node;
+ +    }
+ +#endif
+ +
+ +    if (curStaticInst->isMemRef()) {
+ +        numMemRefs++;
+ +    }
+ +
+ +    if (curStaticInst->isLoad()) {
+ +        ++numLoad;
+ +        comLoadEventQueue[0]->serviceEvents(numLoad);
+ +    }
+ +
+ +    traceFunctions(thread->readPC());
+ +
+ +    if (traceData) {
+ +        traceData->finalize();
+ +    }
+ +}
+ +
+ +
+ +void
+ +BaseSimpleCPU::advancePC(Fault fault)
+ +{
+ +    if (fault != NoFault) {
+ +        fault->invoke(tc);
+ +    }
+ +    else {
+ +        // go to the next instruction
+ +        thread->setPC(thread->readNextPC());
+ +#if ISA_HAS_DELAY_SLOT
+ +        thread->setNextPC(thread->readNextNPC());
+ +        thread->setNextNPC(thread->readNextNPC() + sizeof(MachInst));
+ +        assert(thread->readNextPC() != thread->readNextNPC());
+ +#else
+ +        thread->setNextPC(thread->readNextPC() + sizeof(MachInst));
+ +#endif
+ +
+ +    }
+ +
+ +#if FULL_SYSTEM
+ +    Addr oldpc;
+ +    do {
+ +        oldpc = thread->readPC();
+ +        system->pcEventQueue.service(tc);
+ +    } while (oldpc != thread->readPC());
+ +#endif
+ +}
+ +
diff --cc src/cpu/simple_thread.cc

index 5f86cf2b74b8b9445a7b12344c0f975faf70beb1,0000000000000000000000000000000000000000..4fc47c982115651287ec15575835ef00b9769569

mode 100644,000000..100644
--- 1/src/cpu/simple_thread.cc
--- /dev/null
+++ b/src/cpu/simple_thread.cc
@@@ -1,332 -1,0 +1,337 @@@
+ +/*
+ + * Copyright (c) 2001-2006 The Regents of The University of Michigan
+ + * All rights reserved.
+ + *
+ + * Redistribution and use in source and binary forms, with or without
+ + * modification, are permitted provided that the following conditions are
+ + * met: redistributions of source code must retain the above copyright
+ + * notice, this list of conditions and the following disclaimer;
+ + * redistributions in binary form must reproduce the above copyright
+ + * notice, this list of conditions and the following disclaimer in the
+ + * documentation and/or other materials provided with the distribution;
+ + * neither the name of the copyright holders nor the names of its
+ + * contributors may be used to endorse or promote products derived from
+ + * this software without specific prior written permission.
+ + *
+ + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ + *
+ + * Authors: Steve Reinhardt
+ + *          Nathan Binkert
+ + *          Lisa Hsu
+ + *          Kevin Lim
+ + */
+ +
+ +#include <string>
+ +
+ +#include "arch/isa_traits.hh"
+ +#include "cpu/base.hh"
+ +#include "cpu/simple_thread.hh"
+ +#include "cpu/thread_context.hh"
+ +
+ +#if FULL_SYSTEM
+ +#include "base/callback.hh"
+ +#include "base/cprintf.hh"
+ +#include "base/output.hh"
+ +#include "base/trace.hh"
+ +#include "cpu/profile.hh"
+ +#include "cpu/quiesce_event.hh"
+ +#include "kern/kernel_stats.hh"
+ +#include "sim/serialize.hh"
+ +#include "sim/sim_exit.hh"
+ +#include "arch/stacktrace.hh"
+ +#else
+ +#include "sim/process.hh"
+ +#include "sim/system.hh"
+ +#include "mem/translating_port.hh"
+ +#endif
+ +
+ +using namespace std;
+ +
+ +// constructor
+ +#if FULL_SYSTEM
+ +SimpleThread::SimpleThread(BaseCPU *_cpu, int _thread_num, System *_sys,
+ +                           AlphaITB *_itb, AlphaDTB *_dtb,
+ +                           bool use_kernel_stats)
+ +    : ThreadState(-1, _thread_num), cpu(_cpu), system(_sys), itb(_itb),
+ +      dtb(_dtb)
+ +
+ +{
+ +    tc = new ProxyThreadContext<SimpleThread>(this);
+ +
+ +    quiesceEvent = new EndQuiesceEvent(tc);
+ +
+ +    regs.clear();
+ +
+ +    if (cpu->params->profile) {
+ +        profile = new FunctionProfile(system->kernelSymtab);
+ +        Callback *cb =
+ +            new MakeCallback<SimpleThread,
+ +            &SimpleThread::dumpFuncProfile>(this);
+ +        registerExitCallback(cb);
+ +    }
+ +
+ +    // let's fill with a dummy node for now so we don't get a segfault
+ +    // on the first cycle when there's no node available.
+ +    static ProfileNode dummyNode;
+ +    profileNode = &dummyNode;
+ +    profilePC = 3;
+ +
+ +    if (use_kernel_stats) {
+ +        kernelStats = new Kernel::Statistics(system);
+ +    } else {
+ +        kernelStats = NULL;
+ +    }
+ +    Port *mem_port;
+ +    physPort = new FunctionalPort(csprintf("%s-%d-funcport",
+ +                                           cpu->name(), tid));
+ +    mem_port = system->physmem->getPort("functional");
+ +    mem_port->setPeer(physPort);
+ +    physPort->setPeer(mem_port);
+ +
+ +    virtPort = new VirtualPort(csprintf("%s-%d-vport",
+ +                                        cpu->name(), tid));
+ +    mem_port = system->physmem->getPort("functional");
+ +    mem_port->setPeer(virtPort);
+ +    virtPort->setPeer(mem_port);
+ +}
+ +#else
+ +SimpleThread::SimpleThread(BaseCPU *_cpu, int _thread_num,
+ +                         Process *_process, int _asid, MemObject* memobj)
+ +    : ThreadState(-1, _thread_num, _process, _asid, memobj),
+ +      cpu(_cpu)
+ +{
+ +    /* Use this port to for syscall emulation writes to memory. */
+ +    Port *mem_port;
+ +    port = new TranslatingPort(csprintf("%s-%d-funcport",
+ +                                        cpu->name(), tid),
+ +                               process->pTable, false);
+ +    mem_port = memobj->getPort("functional");
+ +    mem_port->setPeer(port);
+ +    port->setPeer(mem_port);
+ +
+ +    regs.clear();
+ +    tc = new ProxyThreadContext<SimpleThread>(this);
+ +}
+ +
+ +#endif
+ +
+ +SimpleThread::SimpleThread()
+ +#if FULL_SYSTEM
+ +    : ThreadState(-1, -1)
+ +#else
+ +    : ThreadState(-1, -1, NULL, -1, NULL)
+ +#endif
+ +{
+ +    tc = new ProxyThreadContext<SimpleThread>(this);
+ +    regs.clear();
+ +}
+ +
+ +SimpleThread::~SimpleThread()
+ +{
+ +    delete tc;
+ +}
+ +
+ +void
+ +SimpleThread::takeOverFrom(ThreadContext *oldContext)
+ +{
+ +    // some things should already be set up
+ +#if FULL_SYSTEM
+ +    assert(system == oldContext->getSystemPtr());
+ +#else
+ +    assert(process == oldContext->getProcessPtr());
+ +#endif
+ +
+ +    copyState(oldContext);
+ +#if FULL_SYSTEM
+ +    EndQuiesceEvent *quiesce = oldContext->getQuiesceEvent();
+ +    if (quiesce) {
+ +        // Point the quiesce event's TC at this TC so that it wakes up
+ +        // the proper CPU.
+ +        quiesce->tc = tc;
+ +    }
+ +    if (quiesceEvent) {
+ +        quiesceEvent->tc = tc;
+ +    }
++
++    Kernel::Statistics *stats = oldContext->getKernelStats();
++    if (stats) {
++        kernelStats = stats;
++    }
+ +#endif
+ +
+ +    storeCondFailures = 0;
+ +
+ +    oldContext->setStatus(ThreadContext::Unallocated);
+ +}
+ +
+ +void
+ +SimpleThread::copyTC(ThreadContext *context)
+ +{
+ +    copyState(context);
+ +
+ +#if FULL_SYSTEM
+ +    EndQuiesceEvent *quiesce = context->getQuiesceEvent();
+ +    if (quiesce) {
+ +        quiesceEvent = quiesce;
+ +    }
+ +    Kernel::Statistics *stats = context->getKernelStats();
+ +    if (stats) {
+ +        kernelStats = stats;
+ +    }
+ +#endif
+ +}
+ +
+ +void
+ +SimpleThread::copyState(ThreadContext *oldContext)
+ +{
+ +    // copy over functional state
+ +    _status = oldContext->status();
+ +    copyArchRegs(oldContext);
+ +    cpuId = oldContext->readCpuId();
+ +#if !FULL_SYSTEM
+ +    funcExeInst = oldContext->readFuncExeInst();
+ +#endif
+ +    inst = oldContext->getInst();
+ +}
+ +
+ +void
+ +SimpleThread::serialize(ostream &os)
+ +{
+ +    ThreadState::serialize(os);
+ +    regs.serialize(os);
+ +    // thread_num and cpu_id are deterministic from the config
+ +}
+ +
+ +
+ +void
+ +SimpleThread::unserialize(Checkpoint *cp, const std::string &section)
+ +{
+ +    ThreadState::unserialize(cp, section);
+ +    regs.unserialize(cp, section);
+ +    // thread_num and cpu_id are deterministic from the config
+ +}
+ +
+ +#if FULL_SYSTEM
+ +void
+ +SimpleThread::dumpFuncProfile()
+ +{
+ +    std::ostream *os = simout.create(csprintf("profile.%s.dat", cpu->name()));
+ +    profile->dump(tc, *os);
+ +}
+ +#endif
+ +
+ +void
+ +SimpleThread::activate(int delay)
+ +{
+ +    if (status() == ThreadContext::Active)
+ +        return;
+ +
+ +    lastActivate = curTick;
+ +
+ +    if (status() == ThreadContext::Unallocated) {
+ +        cpu->activateWhenReady(tid);
+ +        return;
+ +    }
+ +
+ +    _status = ThreadContext::Active;
+ +
+ +    // status() == Suspended
+ +    cpu->activateContext(tid, delay);
+ +}
+ +
+ +void
+ +SimpleThread::suspend()
+ +{
+ +    if (status() == ThreadContext::Suspended)
+ +        return;
+ +
+ +    lastActivate = curTick;
+ +    lastSuspend = curTick;
+ +/*
+ +#if FULL_SYSTEM
+ +    // Don't change the status from active if there are pending interrupts
+ +    if (cpu->check_interrupts()) {
+ +        assert(status() == ThreadContext::Active);
+ +        return;
+ +    }
+ +#endif
+ +*/
+ +    _status = ThreadContext::Suspended;
+ +    cpu->suspendContext(tid);
+ +}
+ +
+ +void
+ +SimpleThread::deallocate()
+ +{
+ +    if (status() == ThreadContext::Unallocated)
+ +        return;
+ +
+ +    _status = ThreadContext::Unallocated;
+ +    cpu->deallocateContext(tid);
+ +}
+ +
+ +void
+ +SimpleThread::halt()
+ +{
+ +    if (status() == ThreadContext::Halted)
+ +        return;
+ +
+ +    _status = ThreadContext::Halted;
+ +    cpu->haltContext(tid);
+ +}
+ +
+ +
+ +void
+ +SimpleThread::regStats(const string &name)
+ +{
+ +#if FULL_SYSTEM
+ +    if (kernelStats)
+ +        kernelStats->regStats(name + ".kern");
+ +#endif
+ +}
+ +
+ +void
+ +SimpleThread::copyArchRegs(ThreadContext *src_tc)
+ +{
+ +    TheISA::copyRegs(src_tc, tc);
+ +}
+ +
+ +#if FULL_SYSTEM
+ +VirtualPort*
+ +SimpleThread::getVirtPort(ThreadContext *src_tc)
+ +{
+ +    if (!src_tc)
+ +        return virtPort;
+ +
+ +    VirtualPort *vp;
+ +    Port *mem_port;
+ +
+ +    vp = new VirtualPort("tc-vport", src_tc);
+ +    mem_port = system->physmem->getPort("functional");
+ +    mem_port->setPeer(vp);
+ +    vp->setPeer(mem_port);
+ +    return vp;
+ +}
+ +
+ +void
+ +SimpleThread::delVirtPort(VirtualPort *vp)
+ +{
+ +    if (vp != virtPort) {
+ +        delete vp->getPeer();
+ +        delete vp;
+ +    }
+ +}
+ +
+ +
+ +#endif
+ +
diff --cc src/cpu/thread_state.hh

index 6e985054f4425d5600bf48e58443cfadc17434b3,0000000000000000000000000000000000000000..5479f84783ef487a4a97a68db2c34f47eef5107e

mode 100644,000000..100644
--- 1/src/cpu/thread_state.hh
--- /dev/null
+++ b/src/cpu/thread_state.hh
@@@ -1,226 -1,0 +1,242 @@@
+ +/*
+ + * Copyright (c) 2006 The Regents of The University of Michigan
+ + * All rights reserved.
+ + *
+ + * Redistribution and use in source and binary forms, with or without
+ + * modification, are permitted provided that the following conditions are
+ + * met: redistributions of source code must retain the above copyright
+ + * notice, this list of conditions and the following disclaimer;
+ + * redistributions in binary form must reproduce the above copyright
+ + * notice, this list of conditions and the following disclaimer in the
+ + * documentation and/or other materials provided with the distribution;
+ + * neither the name of the copyright holders nor the names of its
+ + * contributors may be used to endorse or promote products derived from
+ + * this software without specific prior written permission.
+ + *
+ + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ + *
+ + * Authors: Kevin Lim
+ + */
+ +
+ +#ifndef __CPU_THREAD_STATE_HH__
+ +#define __CPU_THREAD_STATE_HH__
+ +
+ +#include "arch/types.hh"
++#include "cpu/profile.hh"
+ +#include "cpu/thread_context.hh"
+ +
+ +#if !FULL_SYSTEM
+ +#include "mem/mem_object.hh"
+ +#include "mem/translating_port.hh"
+ +#include "sim/process.hh"
+ +#endif
+ +
+ +#if FULL_SYSTEM
+ +class EndQuiesceEvent;
+ +class FunctionProfile;
+ +class ProfileNode;
+ +namespace Kernel {
+ +    class Statistics;
+ +};
+ +#endif
+ +
+ +class Checkpoint;
+ +
+ +/**
+ + *  Struct for holding general thread state that is needed across CPU
+ + *  models.  This includes things such as pointers to the process,
+ + *  memory, quiesce events, and certain stats.  This can be expanded
+ + *  to hold more thread-specific stats within it.
+ + */
+ +struct ThreadState {
+ +    typedef ThreadContext::Status Status;
+ +
+ +#if FULL_SYSTEM
+ +    ThreadState(int _cpuId, int _tid);
+ +#else
+ +    ThreadState(int _cpuId, int _tid, Process *_process,
+ +                short _asid, MemObject *mem);
+ +#endif
+ +
+ +    void serialize(std::ostream &os);
+ +
+ +    void unserialize(Checkpoint *cp, const std::string &section);
+ +
+ +    void setCpuId(int id) { cpuId = id; }
+ +
+ +    int readCpuId() { return cpuId; }
+ +
+ +    void setTid(int id) { tid = id; }
+ +
+ +    int readTid() { return tid; }
+ +
+ +    Tick readLastActivate() { return lastActivate; }
+ +
+ +    Tick readLastSuspend() { return lastSuspend; }
+ +
+ +#if FULL_SYSTEM
+ +    void dumpFuncProfile();
+ +
+ +    EndQuiesceEvent *getQuiesceEvent() { return quiesceEvent; }
+ +
+ +    void profileClear();
+ +
+ +    void profileSample();
+ +
+ +    Kernel::Statistics *getKernelStats() { return kernelStats; }
+ +
+ +    FunctionalPort *getPhysPort() { return physPort; }
+ +
+ +    void setPhysPort(FunctionalPort *port) { physPort = port; }
+ +
+ +    VirtualPort *getVirtPort(ThreadContext *tc = NULL) { return virtPort; }
+ +
+ +    void setVirtPort(VirtualPort *port) { virtPort = port; }
+ +#else
+ +    Process *getProcessPtr() { return process; }
+ +
+ +    TranslatingPort *getMemPort() { return port; }
+ +
+ +    void setMemPort(TranslatingPort *_port) { port = _port; }
+ +
+ +    int getInstAsid() { return asid; }
+ +    int getDataAsid() { return asid; }
+ +#endif
+ +
+ +    /** Sets the current instruction being committed. */
+ +    void setInst(TheISA::MachInst _inst) { inst = _inst; }
+ +
+ +    /** Returns the current instruction being committed. */
+ +    TheISA::MachInst getInst() { return inst; }
+ +
+ +    /** Reads the number of instructions functionally executed and
+ +     * committed.
+ +     */
+ +    Counter readFuncExeInst() { return funcExeInst; }
+ +
+ +    /** Sets the total number of instructions functionally executed
+ +     * and committed.
+ +     */
+ +    void setFuncExeInst(Counter new_val) { funcExeInst = new_val; }
+ +
+ +    /** Returns the status of this thread. */
+ +    Status status() const { return _status; }
+ +
+ +    /** Sets the status of this thread. */
+ +    void setStatus(Status new_status) { _status = new_status; }
+ +
+ +    /** Number of instructions committed. */
+ +    Counter numInst;
+ +    /** Stat for number instructions committed. */
+ +    Stats::Scalar<> numInsts;
+ +    /** Stat for number of memory references. */
+ +    Stats::Scalar<> numMemRefs;
+ +
+ +    /** Number of simulated loads, used for tracking events based on
+ +     * the number of loads committed.
+ +     */
+ +    Counter numLoad;
+ +
+ +    /** The number of simulated loads committed prior to this run. */
+ +    Counter startNumLoad;
+ +
+ +  protected:
+ +    ThreadContext::Status _status;
+ +
+ +    // ID of this context w.r.t. the System or Process object to which
+ +    // it belongs.  For full-system mode, this is the system CPU ID.
+ +    int cpuId;
+ +
+ +    // Index of hardware thread context on the CPU that this represents.
+ +    int tid;
+ +
+ +  public:
+ +    /** Last time activate was called on this thread. */
+ +    Tick lastActivate;
+ +
+ +    /** Last time suspend was called on this thread. */
+ +    Tick lastSuspend;
+ +
+ +#if FULL_SYSTEM
+ +  public:
+ +    FunctionProfile *profile;
+ +    ProfileNode *profileNode;
+ +    Addr profilePC;
+ +    EndQuiesceEvent *quiesceEvent;
+ +
+ +    Kernel::Statistics *kernelStats;
+ +  protected:
+ +    /** A functional port outgoing only for functional accesses to physical
+ +     * addresses.*/
+ +    FunctionalPort *physPort;
+ +
+ +    /** A functional port, outgoing only, for functional accesse to virtual
+ +     * addresses. That doen't require execution context information */
+ +    VirtualPort *virtPort;
+ +#else
+ +    TranslatingPort *port;
+ +
+ +    Process *process;
+ +
+ +    // Address space ID.  Note that this is used for TIMING cache
+ +    // simulation only; all functional memory accesses should use
+ +    // one of the FunctionalMemory pointers above.
+ +    short asid;
++
++#endif
++
++#if FULL_SYSTEM
++    void profileClear()
++    {
++        if (profile)
++            profile->clear();
++    }
++
++    void profileSample()
++    {
++        if (profile)
++            profile->sample(profileNode, profilePC);
++    }
+ +#endif
+ +
+ +    /** Current instruction the thread is committing.  Only set and
+ +     * used for DTB faults currently.
+ +     */
+ +    TheISA::MachInst inst;
+ +
+ +  public:
+ +    /**
+ +     * Temporary storage to pass the source address from copy_load to
+ +     * copy_store.
+ +     * @todo Remove this temporary when we have a better way to do it.
+ +     */
+ +    Addr copySrcAddr;
+ +    /**
+ +     * Temp storage for the physical source address of a copy.
+ +     * @todo Remove this temporary when we have a better way to do it.
+ +     */
+ +    Addr copySrcPhysAddr;
+ +
+ +    /*
+ +     * number of executed instructions, for matching with syscall trace
+ +     * points in EIO files.
+ +     */
+ +    Counter funcExeInst;
+ +
+ +    //
+ +    // Count failed store conditionals so we can warn of apparent
+ +    // application deadlock situations.
+ +    unsigned storeCondFailures;
+ +};
+ +
+ +#endif // __CPU_THREAD_STATE_HH__
diff --cc src/dev/ide_disk.hh

index 0bc0b73aba0590d8fdd12a144e28266b0a20b8d0,0000000000000000000000000000000000000000..2ed860013085e991ea40a2c4d9d8678d552b6ba6

mode 100644,000000..100644
--- 1/src/dev/ide_disk.hh
--- /dev/null
+++ b/src/dev/ide_disk.hh
@@@ -1,371 -1,0 +1,375 @@@
+ +/*
+ + * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ + * All rights reserved.
+ + *
+ + * Redistribution and use in source and binary forms, with or without
+ + * modification, are permitted provided that the following conditions are
+ + * met: redistributions of source code must retain the above copyright
+ + * notice, this list of conditions and the following disclaimer;
+ + * redistributions in binary form must reproduce the above copyright
+ + * notice, this list of conditions and the following disclaimer in the
+ + * documentation and/or other materials provided with the distribution;
+ + * neither the name of the copyright holders nor the names of its
+ + * contributors may be used to endorse or promote products derived from
+ + * this software without specific prior written permission.
+ + *
+ + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ + *
+ + * Authors: Andrew Schultz
+ + */
+ +
+ +/** @file
+ + * Device model for an IDE disk
+ + */
+ +
+ +#ifndef __IDE_DISK_HH__
+ +#define __IDE_DISK_HH__
+ +
+ +#include "base/statistics.hh"
+ +#include "dev/disk_image.hh"
+ +#include "dev/ide_atareg.h"
+ +#include "dev/ide_ctrl.hh"
+ +#include "dev/ide_wdcreg.h"
+ +#include "dev/io_device.hh"
+ +#include "sim/eventq.hh"
+ +
+ +class ChunkGenerator;
+ +
+ +#define DMA_BACKOFF_PERIOD 200
+ +
+ +#define MAX_DMA_SIZE    (131072)  // 128K
+ +#define MAX_MULTSECT    (128)
+ +
+ +#define PRD_BASE_MASK  0xfffffffe
+ +#define PRD_COUNT_MASK 0xfffe
+ +#define PRD_EOT_MASK   0x8000
+ +
+ +typedef struct PrdEntry {
+ +    uint32_t baseAddr;
+ +    uint16_t byteCount;
+ +    uint16_t endOfTable;
+ +} PrdEntry_t;
+ +
+ +class PrdTableEntry {
+ +  public:
+ +    PrdEntry_t entry;
+ +
+ +    uint32_t getBaseAddr()
+ +    {
+ +        return (entry.baseAddr & PRD_BASE_MASK);
+ +    }
+ +
+ +    uint32_t getByteCount()
+ +    {
+ +        return ((entry.byteCount == 0) ? MAX_DMA_SIZE :
+ +                (entry.byteCount & PRD_COUNT_MASK));
+ +    }
+ +
+ +    uint16_t getEOT()
+ +    {
+ +        return (entry.endOfTable & PRD_EOT_MASK);
+ +    }
+ +};
+ +
+ +#define DATA_OFFSET     (0)
+ +#define ERROR_OFFSET    (1)
+ +#define FEATURES_OFFSET (1)
+ +#define NSECTOR_OFFSET  (2)
+ +#define SECTOR_OFFSET   (3)
+ +#define LCYL_OFFSET     (4)
+ +#define HCYL_OFFSET     (5)
+ +#define SELECT_OFFSET   (6)
+ +#define DRIVE_OFFSET    (6)
+ +#define STATUS_OFFSET   (7)
+ +#define COMMAND_OFFSET  (7)
+ +
+ +#define CONTROL_OFFSET  (2)
+ +#define ALTSTAT_OFFSET  (2)
+ +
+ +#define SELECT_DEV_BIT  0x10
+ +#define CONTROL_RST_BIT 0x04
+ +#define CONTROL_IEN_BIT 0x02
+ +#define STATUS_BSY_BIT  0x80
+ +#define STATUS_DRDY_BIT 0x40
+ +#define STATUS_DRQ_BIT  0x08
+ +#define STATUS_SEEK_BIT 0x10
+ +#define STATUS_DF_BIT   0x20
+ +#define DRIVE_LBA_BIT   0x40
+ +
+ +#define DEV0 (0)
+ +#define DEV1 (1)
+ +
+ +typedef struct CommandReg {
+ +    uint16_t data;
+ +    uint8_t error;
+ +    uint8_t sec_count;
+ +    uint8_t sec_num;
+ +    uint8_t cyl_low;
+ +    uint8_t cyl_high;
+ +    union {
+ +        uint8_t drive;
+ +        uint8_t head;
+ +    };
+ +    uint8_t command;
+ +} CommandReg_t;
+ +
+ +typedef enum Events {
+ +    None = 0,
+ +    Transfer,
+ +    ReadWait,
+ +    WriteWait,
+ +    PrdRead,
+ +    DmaRead,
+ +    DmaWrite
+ +} Events_t;
+ +
+ +typedef enum DevAction {
+ +    ACT_NONE = 0,
+ +    ACT_CMD_WRITE,
+ +    ACT_CMD_COMPLETE,
+ +    ACT_CMD_ERROR,
+ +    ACT_SELECT_WRITE,
+ +    ACT_STAT_READ,
+ +    ACT_DATA_READY,
+ +    ACT_DATA_READ_BYTE,
+ +    ACT_DATA_READ_SHORT,
+ +    ACT_DATA_WRITE_BYTE,
+ +    ACT_DATA_WRITE_SHORT,
+ +    ACT_DMA_READY,
+ +    ACT_DMA_DONE,
+ +    ACT_SRST_SET,
+ +    ACT_SRST_CLEAR
+ +} DevAction_t;
+ +
+ +typedef enum DevState {
+ +    // Device idle
+ +    Device_Idle_S = 0,
+ +    Device_Idle_SI,
+ +    Device_Idle_NS,
+ +
+ +    // Software reset
+ +    Device_Srst,
+ +
+ +    // Non-data commands
+ +    Command_Execution,
+ +
+ +    // PIO data-in (data to host)
+ +    Prepare_Data_In,
+ +    Data_Ready_INTRQ_In,
+ +    Transfer_Data_In,
+ +
+ +    // PIO data-out (data from host)
+ +    Prepare_Data_Out,
+ +    Data_Ready_INTRQ_Out,
+ +    Transfer_Data_Out,
+ +
+ +    // DMA protocol
+ +    Prepare_Data_Dma,
+ +    Transfer_Data_Dma
+ +} DevState_t;
+ +
+ +typedef enum DmaState {
+ +    Dma_Idle = 0,
+ +    Dma_Start,
+ +    Dma_Transfer
+ +} DmaState_t;
+ +
+ +class PhysicalMemory;
+ +class IdeController;
+ +
+ +/**
+ + * IDE Disk device model
+ + */
+ +class IdeDisk : public SimObject
+ +{
+ +  protected:
+ +    /** The IDE controller for this disk. */
+ +    IdeController *ctrl;
+ +    /** The image that contains the data of this disk. */
+ +    DiskImage *image;
+ +
+ +  protected:
+ +    /** The disk delay in microseconds. */
+ +    int diskDelay;
+ +
+ +  private:
+ +    /** Drive identification structure for this disk */
+ +    struct ataparams driveID;
+ +    /** Data buffer for transfers */
+ +    uint8_t *dataBuffer;
+ +    /** Number of bytes in command data transfer */
+ +    uint32_t cmdBytes;
+ +    /** Number of bytes left in command data transfer */
+ +    uint32_t cmdBytesLeft;
+ +    /** Number of bytes left in DRQ block */
+ +    uint32_t drqBytesLeft;
+ +    /** Current sector in access */
+ +    uint32_t curSector;
+ +    /** Command block registers */
+ +    CommandReg_t cmdReg;
+ +    /** Status register */
+ +    uint8_t status;
+ +    /** Interrupt enable bit */
+ +    bool nIENBit;
+ +    /** Device state */
+ +    DevState_t devState;
+ +    /** Dma state */
+ +    DmaState_t dmaState;
+ +    /** Dma transaction is a read */
+ +    bool dmaRead;
+ +    /** PRD table base address */
+ +    uint32_t curPrdAddr;
+ +    /** PRD entry */
+ +    PrdTableEntry curPrd;
+ +    /** Device ID (master=0/slave=1) */
+ +    int devID;
+ +    /** Interrupt pending */
+ +    bool intrPending;
+ +
+ +    Stats::Scalar<> dmaReadFullPages;
+ +    Stats::Scalar<> dmaReadBytes;
+ +    Stats::Scalar<> dmaReadTxs;
+ +    Stats::Scalar<> dmaWriteFullPages;
+ +    Stats::Scalar<> dmaWriteBytes;
+ +    Stats::Scalar<> dmaWriteTxs;
++    Stats::Formula rdBandwidth;
++    Stats::Formula wrBandwidth;
++    Stats::Formula totBandwidth;
++    Stats::Formula totBytes;
+ +
+ +  public:
+ +    /**
+ +     * Create and initialize this Disk.
+ +     * @param name The name of this disk.
+ +     * @param img The disk image of this disk.
+ +     * @param id The disk ID (master=0/slave=1)
+ +     * @param disk_delay The disk delay in milliseconds
+ +     */
+ +    IdeDisk(const std::string &name, DiskImage *img, int id, Tick disk_delay);
+ +
+ +    /**
+ +     * Delete the data buffer.
+ +     */
+ +    ~IdeDisk();
+ +
+ +    /**
+ +     * Reset the device state
+ +     */
+ +    void reset(int id);
+ +
+ +    /**
+ +     * Register Statistics
+ +     */
+ +    void regStats();
+ +
+ +    /**
+ +     * Set the controller for this device
+ +     * @param c The IDE controller
+ +     */
+ +    void setController(IdeController *c) {
+ +        if (ctrl) panic("Cannot change the controller once set!\n");
+ +        ctrl = c;
+ +    }
+ +
+ +    // Device register read/write
+ +    void read(const Addr &offset, IdeRegType regtype, uint8_t *data);
+ +    void write(const Addr &offset, IdeRegType regtype, const uint8_t *data);
+ +
+ +    // Start/abort functions
+ +    void startDma(const uint32_t &prdTableBase);
+ +    void abortDma();
+ +
+ +  private:
+ +    void startCommand();
+ +
+ +    // Interrupt management
+ +    void intrPost();
+ +    void intrClear();
+ +
+ +    // DMA stuff
+ +    void doDmaTransfer();
+ +    friend class EventWrapper<IdeDisk, &IdeDisk::doDmaTransfer>;
+ +    EventWrapper<IdeDisk, &IdeDisk::doDmaTransfer> dmaTransferEvent;
+ +
+ +    void doDmaDataRead();
+ +
+ +    void doDmaRead();
+ +    ChunkGenerator *dmaReadCG;
+ +    friend class EventWrapper<IdeDisk, &IdeDisk::doDmaRead>;
+ +    EventWrapper<IdeDisk, &IdeDisk::doDmaRead> dmaReadWaitEvent;
+ +
+ +    void doDmaDataWrite();
+ +
+ +    void doDmaWrite();
+ +    ChunkGenerator *dmaWriteCG;
+ +    friend class EventWrapper<IdeDisk, &IdeDisk::doDmaWrite>;
+ +    EventWrapper<IdeDisk, &IdeDisk::doDmaWrite> dmaWriteWaitEvent;
+ +
+ +    void dmaPrdReadDone();
+ +    friend class EventWrapper<IdeDisk, &IdeDisk::dmaPrdReadDone>;
+ +    EventWrapper<IdeDisk, &IdeDisk::dmaPrdReadDone> dmaPrdReadEvent;
+ +
+ +    void dmaReadDone();
+ +    friend class EventWrapper<IdeDisk, &IdeDisk::dmaReadDone>;
+ +    EventWrapper<IdeDisk, &IdeDisk::dmaReadDone> dmaReadEvent;
+ +
+ +    void dmaWriteDone();
+ +    friend class EventWrapper<IdeDisk, &IdeDisk::dmaWriteDone>;
+ +    EventWrapper<IdeDisk, &IdeDisk::dmaWriteDone> dmaWriteEvent;
+ +
+ +    // Disk image read/write
+ +    void readDisk(uint32_t sector, uint8_t *data);
+ +    void writeDisk(uint32_t sector, uint8_t *data);
+ +
+ +    // State machine management
+ +    void updateState(DevAction_t action);
+ +
+ +    // Utility functions
+ +    bool isBSYSet() { return (status & STATUS_BSY_BIT); }
+ +    bool isIENSet() { return nIENBit; }
+ +    bool isDEVSelect();
+ +
+ +    void setComplete()
+ +    {
+ +        // clear out the status byte
+ +        status = 0;
+ +        // set the DRDY bit
+ +        status |= STATUS_DRDY_BIT;
+ +        // set the SEEK bit
+ +        status |= STATUS_SEEK_BIT;
+ +    }
+ +
+ +    uint32_t getLBABase()
+ +    {
+ +        return  (Addr)(((cmdReg.head & 0xf) << 24) | (cmdReg.cyl_high << 16) |
+ +                       (cmdReg.cyl_low << 8) | (cmdReg.sec_num));
+ +    }
+ +
+ +    inline Addr pciToDma(Addr pciAddr);
+ +
+ +    /**
+ +     * Serialize this object to the given output stream.
+ +     * @param os The stream to serialize to.
+ +     */
+ +    void serialize(std::ostream &os);
+ +
+ +    /**
+ +     * Reconstruct the state of this object from a checkpoint.
+ +     * @param cp The checkpoint to use.
+ +     * @param section The section name describing this object.
+ +     */
+ +    void unserialize(Checkpoint *cp, const std::string &section);
+ +};
+ +
+ +
+ +#endif // __IDE_DISK_HH__
diff --cc src/python/m5/objects/BaseCPU.py

index 3dd0bda01ceae8465b795922ba4d5acb5a79cf5d,0000000000000000000000000000000000000000..05ccbca6a4d070b8822f2f283d412d24cc914274

mode 100644,000000..100644
--- 1/src/python/m5/objects/BaseCPU.py
--- /dev/null
+++ b/src/python/m5/objects/BaseCPU.py
@@@ -1,56 -1,0 +1,59 @@@
+ +from m5.SimObject import SimObject
+ +from m5.params import *
+ +from m5.proxy import *
+ +from m5 import build_env
+ +from AlphaTLB import AlphaDTB, AlphaITB
+ +from Bus import Bus
+ +
+ +class BaseCPU(SimObject):
+ +    type = 'BaseCPU'
+ +    abstract = True
+ +    mem = Param.MemObject("memory")
+ +
+ +    system = Param.System(Parent.any, "system object")
+ +    if build_env['FULL_SYSTEM']:
+ +        dtb = Param.AlphaDTB(AlphaDTB(), "Data TLB")
+ +        itb = Param.AlphaITB(AlphaITB(), "Instruction TLB")
+ +        cpu_id = Param.Int(-1, "CPU identifier")
+ +    else:
+ +        workload = VectorParam.Process("processes to run")
+ +
+ +    max_insts_all_threads = Param.Counter(0,
+ +        "terminate when all threads have reached this inst count")
+ +    max_insts_any_thread = Param.Counter(0,
+ +        "terminate when any thread reaches this inst count")
+ +    max_loads_all_threads = Param.Counter(0,
+ +        "terminate when all threads have reached this load count")
+ +    max_loads_any_thread = Param.Counter(0,
+ +        "terminate when any thread reaches this load count")
++    stats_reset_inst = Param.Counter(0,
++        "reset stats once this many instructions are committed")
++    progress_interval = Param.Tick(0, "interval to print out the progress message")
+ +
+ +    defer_registration = Param.Bool(False,
+ +        "defer registration with system (for sampling)")
+ +
+ +    clock = Param.Clock(Parent.clock, "clock speed")
+ +
+ +    _mem_ports = []
+ +
+ +    def connectMemPorts(self, bus):
+ +        for p in self._mem_ports:
+ +            exec('self.%s = bus.port' % p)
+ +
+ +    def addPrivateSplitL1Caches(self, ic, dc):
+ +        assert(len(self._mem_ports) == 2)
+ +        self.icache = ic
+ +        self.dcache = dc
+ +        self.icache_port = ic.cpu_side
+ +        self.dcache_port = dc.cpu_side
+ +        self._mem_ports = ['icache.mem_side', 'dcache.mem_side']
+ +#        self.mem = dc
+ +
+ +    def addTwoLevelCacheHierarchy(self, ic, dc, l2c):
+ +        self.addPrivateSplitL1Caches(ic, dc)
+ +        self.toL2Bus = Bus()
+ +        self.connectMemPorts(self.toL2Bus)
+ +        self.l2cache = l2c
+ +        self.l2cache.cpu_side = self.toL2Bus.port
+ +        self._mem_ports = ['l2cache.mem_side']
diff --cc src/python/m5/objects/O3CPU.py

index 5100c7ccb50695aff20dca7d90031424b24f167e,0000000000000000000000000000000000000000..59b40c6e8e8096001b9dbcabb8f702421897a3dc

mode 100644,000000..100644
--- 1/src/python/m5/objects/O3CPU.py
--- /dev/null
+++ b/src/python/m5/objects/O3CPU.py
@@@ -1,113 -1,0 +1,115 @@@
+ +from m5.params import *
+ +from m5.proxy import *
+ +from m5 import build_env
+ +from BaseCPU import BaseCPU
+ +from Checker import O3Checker
+ +
+ +class DerivO3CPU(BaseCPU):
+ +    type = 'DerivO3CPU'
+ +    activity = Param.Unsigned(0, "Initial count")
+ +    numThreads = Param.Unsigned(1, "number of HW thread contexts")
+ +
++    if build_env['FULL_SYSTEM']:
++        profile = Param.Latency('0ns', "trace the kernel stack")
+ +    if build_env['USE_CHECKER']:
+ +        if not build_env['FULL_SYSTEM']:
+ +            checker = Param.BaseCPU(O3Checker(workload=Parent.workload,
+ +                                              exitOnError=True,
+ +                                              warnOnlyOnLoadError=False),
+ +                                    "checker")
+ +        else:
+ +            checker = Param.BaseCPU(O3Checker(exitOnError=True, warnOnlyOnLoadError=False), "checker")
+ +            checker.itb = Parent.itb
+ +            checker.dtb = Parent.dtb
+ +
+ +    cachePorts = Param.Unsigned("Cache Ports")
+ +    icache_port = Port("Instruction Port")
+ +    dcache_port = Port("Data Port")
+ +    _mem_ports = ['icache_port', 'dcache_port']
+ +
+ +    decodeToFetchDelay = Param.Unsigned(1, "Decode to fetch delay")
+ +    renameToFetchDelay = Param.Unsigned(1 ,"Rename to fetch delay")
+ +    iewToFetchDelay = Param.Unsigned(1, "Issue/Execute/Writeback to fetch "
+ +                                     "delay")
+ +    commitToFetchDelay = Param.Unsigned(1, "Commit to fetch delay")
+ +    fetchWidth = Param.Unsigned(8, "Fetch width")
+ +
+ +    renameToDecodeDelay = Param.Unsigned(1, "Rename to decode delay")
+ +    iewToDecodeDelay = Param.Unsigned(1, "Issue/Execute/Writeback to decode "
+ +               "delay")
+ +    commitToDecodeDelay = Param.Unsigned(1, "Commit to decode delay")
+ +    fetchToDecodeDelay = Param.Unsigned(1, "Fetch to decode delay")
+ +    decodeWidth = Param.Unsigned(8, "Decode width")
+ +
+ +    iewToRenameDelay = Param.Unsigned(1, "Issue/Execute/Writeback to rename "
+ +               "delay")
+ +    commitToRenameDelay = Param.Unsigned(1, "Commit to rename delay")
+ +    decodeToRenameDelay = Param.Unsigned(1, "Decode to rename delay")
+ +    renameWidth = Param.Unsigned(8, "Rename width")
+ +
+ +    commitToIEWDelay = Param.Unsigned(1, "Commit to "
+ +               "Issue/Execute/Writeback delay")
+ +    renameToIEWDelay = Param.Unsigned(2, "Rename to "
+ +               "Issue/Execute/Writeback delay")
+ +    issueToExecuteDelay = Param.Unsigned(1, "Issue to execute delay (internal "
+ +              "to the IEW stage)")
+ +    dispatchWidth = Param.Unsigned(8, "Dispatch width")
+ +    issueWidth = Param.Unsigned(8, "Issue width")
+ +    wbWidth = Param.Unsigned(8, "Writeback width")
+ +    wbDepth = Param.Unsigned(1, "Writeback depth")
+ +    fuPool = Param.FUPool("Functional Unit pool")
+ +
+ +    iewToCommitDelay = Param.Unsigned(1, "Issue/Execute/Writeback to commit "
+ +               "delay")
+ +    renameToROBDelay = Param.Unsigned(1, "Rename to reorder buffer delay")
+ +    commitWidth = Param.Unsigned(8, "Commit width")
+ +    squashWidth = Param.Unsigned(8, "Squash width")
+ +    trapLatency = Param.Tick(13, "Trap latency")
+ +    fetchTrapLatency = Param.Tick(1, "Fetch trap latency")
+ +
+ +    backComSize = Param.Unsigned(5, "Time buffer size for backwards communication")
+ +    forwardComSize = Param.Unsigned(5, "Time buffer size for forward communication")
+ +
+ +    predType = Param.String("tournament", "Branch predictor type ('local', 'tournament')")
+ +    localPredictorSize = Param.Unsigned(2048, "Size of local predictor")
+ +    localCtrBits = Param.Unsigned(2, "Bits per counter")
+ +    localHistoryTableSize = Param.Unsigned(2048, "Size of local history table")
+ +    localHistoryBits = Param.Unsigned(11, "Bits for the local history")
+ +    globalPredictorSize = Param.Unsigned(8192, "Size of global predictor")
+ +    globalCtrBits = Param.Unsigned(2, "Bits per counter")
+ +    globalHistoryBits = Param.Unsigned(4096, "Bits of history")
+ +    choicePredictorSize = Param.Unsigned(8192, "Size of choice predictor")
+ +    choiceCtrBits = Param.Unsigned(2, "Bits of choice counters")
+ +
+ +    BTBEntries = Param.Unsigned(4096, "Number of BTB entries")
+ +    BTBTagSize = Param.Unsigned(16, "Size of the BTB tags, in bits")
+ +
+ +    RASSize = Param.Unsigned(16, "RAS size")
+ +
+ +    LQEntries = Param.Unsigned(32, "Number of load queue entries")
+ +    SQEntries = Param.Unsigned(32, "Number of store queue entries")
+ +    LFSTSize = Param.Unsigned(1024, "Last fetched store table size")
+ +    SSITSize = Param.Unsigned(1024, "Store set ID table size")
+ +
+ +    numRobs = Param.Unsigned(1, "Number of Reorder Buffers");
+ +
+ +    numPhysIntRegs = Param.Unsigned(256, "Number of physical integer registers")
+ +    numPhysFloatRegs = Param.Unsigned(256, "Number of physical floating point "
+ +                                      "registers")
+ +    numIQEntries = Param.Unsigned(64, "Number of instruction queue entries")
+ +    numROBEntries = Param.Unsigned(192, "Number of reorder buffer entries")
+ +
+ +    instShiftAmt = Param.Unsigned(2, "Number of bits to shift instructions by")
+ +
+ +    function_trace = Param.Bool(False, "Enable function trace")
+ +    function_trace_start = Param.Tick(0, "Cycle to start function trace")
+ +
+ +    smtNumFetchingThreads = Param.Unsigned("SMT Number of Fetching Threads")
+ +    smtFetchPolicy = Param.String("SMT Fetch policy")
+ +    smtLSQPolicy    = Param.String("SMT LSQ Sharing Policy")
+ +    smtLSQThreshold = Param.String("SMT LSQ Threshold Sharing Parameter")
+ +    smtIQPolicy    = Param.String("SMT IQ Sharing Policy")
+ +    smtIQThreshold = Param.String("SMT IQ Threshold Sharing Parameter")
+ +    smtROBPolicy   = Param.String("SMT ROB Sharing Policy")
+ +    smtROBThreshold = Param.String("SMT ROB Threshold Sharing Parameter")
+ +    smtCommitPolicy = Param.String("SMT Commit Policy")
diff --cc src/python/m5/objects/OzoneCPU.py

index 8f25d77ed697f560953f380366027d979a3fbf8b,0000000000000000000000000000000000000000..0913e044c8634adab640fa251faafd4f8aa82de4

mode 100644,000000..100644
--- 1/src/python/m5/objects/OzoneCPU.py
--- /dev/null
+++ b/src/python/m5/objects/OzoneCPU.py
@@@ -1,91 -1,0 +1,95 @@@
+ +from m5.params import *
+ +from m5 import build_env
+ +from BaseCPU import BaseCPU
+ +
+ +class DerivOzoneCPU(BaseCPU):
+ +    type = 'DerivOzoneCPU'
+ +
+ +    numThreads = Param.Unsigned("number of HW thread contexts")
+ +
+ +    checker = Param.BaseCPU("Checker CPU")
++    if build_env['FULL_SYSTEM']:
++        profile = Param.Latency('0ns', "trace the kernel stack")
+ +
+ +    icache_port = Port("Instruction Port")
+ +    dcache_port = Port("Data Port")
+ +
+ +    width = Param.Unsigned("Width")
+ +    frontEndWidth = Param.Unsigned("Front end width")
++    frontEndLatency = Param.Unsigned("Front end latency")
+ +    backEndWidth = Param.Unsigned("Back end width")
+ +    backEndSquashLatency = Param.Unsigned("Back end squash latency")
+ +    backEndLatency = Param.Unsigned("Back end latency")
+ +    maxInstBufferSize = Param.Unsigned("Maximum instruction buffer size")
+ +    maxOutstandingMemOps = Param.Unsigned("Maximum number of outstanding memory operations")
+ +    decodeToFetchDelay = Param.Unsigned("Decode to fetch delay")
+ +    renameToFetchDelay = Param.Unsigned("Rename to fetch delay")
+ +    iewToFetchDelay = Param.Unsigned("Issue/Execute/Writeback to fetch "
+ +               "delay")
+ +    commitToFetchDelay = Param.Unsigned("Commit to fetch delay")
+ +    fetchWidth = Param.Unsigned("Fetch width")
+ +
+ +    renameToDecodeDelay = Param.Unsigned("Rename to decode delay")
+ +    iewToDecodeDelay = Param.Unsigned("Issue/Execute/Writeback to decode "
+ +               "delay")
+ +    commitToDecodeDelay = Param.Unsigned("Commit to decode delay")
+ +    fetchToDecodeDelay = Param.Unsigned("Fetch to decode delay")
+ +    decodeWidth = Param.Unsigned("Decode width")
+ +
+ +    iewToRenameDelay = Param.Unsigned("Issue/Execute/Writeback to rename "
+ +               "delay")
+ +    commitToRenameDelay = Param.Unsigned("Commit to rename delay")
+ +    decodeToRenameDelay = Param.Unsigned("Decode to rename delay")
+ +    renameWidth = Param.Unsigned("Rename width")
+ +
+ +    commitToIEWDelay = Param.Unsigned("Commit to "
+ +               "Issue/Execute/Writeback delay")
+ +    renameToIEWDelay = Param.Unsigned("Rename to "
+ +               "Issue/Execute/Writeback delay")
+ +    issueToExecuteDelay = Param.Unsigned("Issue to execute delay (internal "
+ +              "to the IEW stage)")
+ +    issueWidth = Param.Unsigned("Issue width")
+ +    executeWidth = Param.Unsigned("Execute width")
+ +    executeIntWidth = Param.Unsigned("Integer execute width")
+ +    executeFloatWidth = Param.Unsigned("Floating point execute width")
+ +    executeBranchWidth = Param.Unsigned("Branch execute width")
+ +    executeMemoryWidth = Param.Unsigned("Memory execute width")
+ +
+ +    iewToCommitDelay = Param.Unsigned("Issue/Execute/Writeback to commit "
+ +               "delay")
+ +    renameToROBDelay = Param.Unsigned("Rename to reorder buffer delay")
+ +    commitWidth = Param.Unsigned("Commit width")
+ +    squashWidth = Param.Unsigned("Squash width")
+ +
+ +    predType = Param.String("Type of branch predictor ('local', 'tournament')")
+ +    localPredictorSize = Param.Unsigned("Size of local predictor")
+ +    localCtrBits = Param.Unsigned("Bits per counter")
+ +    localHistoryTableSize = Param.Unsigned("Size of local history table")
+ +    localHistoryBits = Param.Unsigned("Bits for the local history")
+ +    globalPredictorSize = Param.Unsigned("Size of global predictor")
+ +    globalCtrBits = Param.Unsigned("Bits per counter")
+ +    globalHistoryBits = Param.Unsigned("Bits of history")
+ +    choicePredictorSize = Param.Unsigned("Size of choice predictor")
+ +    choiceCtrBits = Param.Unsigned("Bits of choice counters")
+ +
+ +    BTBEntries = Param.Unsigned("Number of BTB entries")
+ +    BTBTagSize = Param.Unsigned("Size of the BTB tags, in bits")
+ +
+ +    RASSize = Param.Unsigned("RAS size")
+ +
+ +    LQEntries = Param.Unsigned("Number of load queue entries")
+ +    SQEntries = Param.Unsigned("Number of store queue entries")
++    lsqLimits = Param.Bool(True, "LSQ size limits dispatch")
+ +    LFSTSize = Param.Unsigned("Last fetched store table size")
+ +    SSITSize = Param.Unsigned("Store set ID table size")
+ +
+ +    numPhysIntRegs = Param.Unsigned("Number of physical integer registers")
+ +    numPhysFloatRegs = Param.Unsigned("Number of physical floating point "
+ +               "registers")
+ +    numIQEntries = Param.Unsigned("Number of instruction queue entries")
+ +    numROBEntries = Param.Unsigned("Number of reorder buffer entries")
+ +
+ +    instShiftAmt = Param.Unsigned("Number of bits to shift instructions by")
+ +
+ +    function_trace = Param.Bool(False, "Enable function trace")
+ +    function_trace_start = Param.Tick(0, "Cycle to start function trace")
diff --cc src/python/m5/objects/Root.py

index f01fc06c421265367f9f2d749ae3f1dbfb0395a5,0000000000000000000000000000000000000000..8e8d87f6d314ab06c6754a1d88135b778499a394

mode 100644,000000..100644
--- 1/src/python/m5/objects/Root.py
--- /dev/null
+++ b/src/python/m5/objects/Root.py
@@@ -1,24 -1,0 +1,25 @@@
+ +from m5.SimObject import SimObject
+ +from m5.params import *
+ +from Serialize import Serialize
++from Serialize import Statreset
+ +from Statistics import Statistics
+ +from Trace import Trace
+ +from ExeTrace import ExecutionTrace
+ +from Debug import Debug
+ +
+ +class Root(SimObject):
+ +    type = 'Root'
+ +    clock = Param.RootClock('1THz', "tick frequency")
+ +    max_tick = Param.Tick('0', "maximum simulation ticks (0 = infinite)")
+ +    progress_interval = Param.Tick('0',
+ +        "print a progress message every n ticks (0 = never)")
+ +    output_file = Param.String('cout', "file to dump simulator output to")
+ +    checkpoint = Param.String('', "checkpoint file to load")
+ +#    stats = Param.Statistics(Statistics(), "statistics object")
+ +#    trace = Param.Trace(Trace(), "trace object")
+ +#    serialize = Param.Serialize(Serialize(), "checkpoint generation options")
+ +    stats = Statistics()
+ +    trace = Trace()
+ +    exetrace = ExecutionTrace()
+ +    serialize = Serialize()
+ +    debug = Debug()
diff --cc src/python/m5/objects/System.py

index bc2a002cb3b11e25052473dbbe555e468e31e8b5,0000000000000000000000000000000000000000..e7dd1bc608a4deba5817e21ed4fc464f1ff04564

mode 100644,000000..100644
--- 1/src/python/m5/objects/System.py
--- /dev/null
+++ b/src/python/m5/objects/System.py
@@@ -1,25 -1,0 +1,26 @@@
+ +from m5.SimObject import SimObject
+ +from m5.params import *
+ +from m5.proxy import *
+ +from m5 import build_env
+ +
+ +class MemoryMode(Enum): vals = ['invalid', 'atomic', 'timing']
+ +
+ +class System(SimObject):
+ +    type = 'System'
+ +    physmem = Param.PhysicalMemory(Parent.any, "phsyical memory")
+ +    mem_mode = Param.MemoryMode('atomic', "The mode the memory system is in")
+ +    if build_env['FULL_SYSTEM']:
+ +        boot_cpu_frequency = Param.Frequency(Self.cpu[0].clock.frequency,
+ +                                             "boot processor frequency")
+ +        init_param = Param.UInt64(0, "numerical value to pass into simulator")
+ +        boot_osflags = Param.String("a", "boot flags to pass to the kernel")
+ +        kernel = Param.String("file that contains the kernel code")
+ +        readfile = Param.String("", "file to read startup script from")
++        symbolfile = Param.String("", "file to get the symbols from")
+ +
+ +class AlphaSystem(System):
+ +    type = 'AlphaSystem'
+ +    console = Param.String("file that contains the console code")
+ +    pal = Param.String("file that contains palcode")
+ +    system_type = Param.UInt64("Type of system we are emulating")
+ +    system_rev = Param.UInt64("Revision of system we are emulating")
diff --cc src/sim/eventq.hh

index 430473df32eadf1fef767e41ffe39166b88d3d27,0000000000000000000000000000000000000000..537bfb91838fcd418651856528c2383bccce5bbe

mode 100644,000000..100644
--- 1/src/sim/eventq.hh
--- /dev/null
+++ b/src/sim/eventq.hh
@@@ -1,414 -1,0 +1,416 @@@
-         CPU_Switch_Pri                =   31,
+ +/*
+ + * Copyright (c) 2000-2005 The Regents of The University of Michigan
+ + * All rights reserved.
+ + *
+ + * Redistribution and use in source and binary forms, with or without
+ + * modification, are permitted provided that the following conditions are
+ + * met: redistributions of source code must retain the above copyright
+ + * notice, this list of conditions and the following disclaimer;
+ + * redistributions in binary form must reproduce the above copyright
+ + * notice, this list of conditions and the following disclaimer in the
+ + * documentation and/or other materials provided with the distribution;
+ + * neither the name of the copyright holders nor the names of its
+ + * contributors may be used to endorse or promote products derived from
+ + * this software without specific prior written permission.
+ + *
+ + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ + *
+ + * Authors: Steve Reinhardt
+ + *          Nathan Binkert
+ + */
+ +
+ +/* @file
+ + * EventQueue interfaces
+ + */
+ +
+ +#ifndef __SIM_EVENTQ_HH__
+ +#define __SIM_EVENTQ_HH__
+ +
+ +#include <assert.h>
+ +
+ +#include <algorithm>
+ +#include <map>
+ +#include <string>
+ +#include <vector>
+ +
+ +#include "sim/host.hh"        // for Tick
+ +
+ +#include "base/fast_alloc.hh"
++#include "base/misc.hh"
+ +#include "base/trace.hh"
+ +#include "sim/serialize.hh"
+ +
+ +class EventQueue;     // forward declaration
+ +
+ +//////////////////////
+ +//
+ +// Main Event Queue
+ +//
+ +// Events on this queue are processed at the *beginning* of each
+ +// cycle, before the pipeline simulation is performed.
+ +//
+ +// defined in eventq.cc
+ +//
+ +//////////////////////
+ +extern EventQueue mainEventQueue;
+ +
+ +
+ +/*
+ + * An item on an event queue.  The action caused by a given
+ + * event is specified by deriving a subclass and overriding the
+ + * process() member function.
+ + */
+ +class Event : public Serializable, public FastAlloc
+ +{
+ +    friend class EventQueue;
+ +
+ +  private:
+ +    /// queue to which this event belongs (though it may or may not be
+ +    /// scheduled on this queue yet)
+ +    EventQueue *queue;
+ +
+ +    Event *next;
+ +
+ +    Tick _when;       //!< timestamp when event should be processed
+ +    int _priority;    //!< event priority
+ +    char _flags;
+ +
+ +  protected:
+ +    enum Flags {
+ +        None = 0x0,
+ +        Squashed = 0x1,
+ +        Scheduled = 0x2,
+ +        AutoDelete = 0x4,
+ +        AutoSerialize = 0x8,
+ +        IsExitEvent = 0x10
+ +    };
+ +
+ +    bool getFlags(Flags f) const { return (_flags & f) == f; }
+ +    void setFlags(Flags f) { _flags |= f; }
+ +    void clearFlags(Flags f) { _flags &= ~f; }
+ +
+ +  protected:
+ +    EventQueue *theQueue() const { return queue; }
+ +
+ +#if TRACING_ON
+ +    Tick when_created;        //!< Keep track of creation time For debugging
+ +    Tick when_scheduled;      //!< Keep track of creation time For debugging
+ +
+ +    virtual void trace(const char *action);   //!< trace event activity
+ +#else
+ +    void trace(const char *) {}
+ +#endif
+ +
+ +    unsigned annotated_value;
+ +
+ +  public:
+ +
+ +    /// Event priorities, to provide tie-breakers for events scheduled
+ +    /// at the same cycle.  Most events are scheduled at the default
+ +    /// priority; these values are used to control events that need to
+ +    /// be ordered within a cycle.
+ +    enum Priority {
+ +        /// Breakpoints should happen before anything else, so we
+ +        /// don't miss any action when debugging.
+ +        Debug_Break_Pri               = -100,
+ +
+ +        /// For some reason "delayed" inter-cluster writebacks are
+ +        /// scheduled before regular writebacks (which have default
+ +        /// priority).  Steve?
+ +        Delayed_Writeback_Pri =   -1,
+ +
+ +        /// Default is zero for historical reasons.
+ +        Default_Pri           =    0,
+ +
+ +        /// CPU switches schedule the new CPU's tick event for the
+ +        /// same cycle (after unscheduling the old CPU's tick event).
+ +        /// The switch needs to come before any tick events to make
+ +        /// sure we don't tick both CPUs in the same cycle.
-     assert(t >= curTick);
++        CPU_Switch_Pri                =   -31,
+ +
+ +        /// Serailization needs to occur before tick events also, so
+ +        /// that a serialize/unserialize is identical to an on-line
+ +        /// CPU switch.
+ +        Serialize_Pri         =   32,
+ +
+ +        /// CPU ticks must come after other associated CPU events
+ +        /// (such as writebacks).
+ +        CPU_Tick_Pri          =   50,
+ +
+ +        /// Statistics events (dump, reset, etc.) come after
+ +        /// everything else, but before exit.
+ +        Stat_Event_Pri                =   90,
+ +
+ +        /// If we want to exit on this cycle, it's the very last thing
+ +        /// we do.
+ +        Sim_Exit_Pri          =  100
+ +    };
+ +
+ +    /*
+ +     * Event constructor
+ +     * @param queue that the event gets scheduled on
+ +     */
+ +    Event(EventQueue *q, Priority p = Default_Pri)
+ +        : queue(q), next(NULL), _priority(p), _flags(None),
+ +#if TRACING_ON
+ +          when_created(curTick), when_scheduled(0),
+ +#endif
+ +          annotated_value(0)
+ +    {
+ +    }
+ +
+ +    ~Event() {}
+ +
+ +    virtual const std::string name() const {
+ +        return csprintf("Event_%x", (uintptr_t)this);
+ +    }
+ +
+ +    /// Determine if the current event is scheduled
+ +    bool scheduled() const { return getFlags(Scheduled); }
+ +
+ +    /// Schedule the event with the current priority or default priority
+ +    void schedule(Tick t);
+ +
+ +    /// Reschedule the event with the current priority
+ +    void reschedule(Tick t);
+ +
+ +    /// Remove the event from the current schedule
+ +    void deschedule();
+ +
+ +    /// Return a C string describing the event.  This string should
+ +    /// *not* be dynamically allocated; just a const char array
+ +    /// describing the event class.
+ +    virtual const char *description();
+ +
+ +    /// Dump the current event data
+ +    void dump();
+ +
+ +    /*
+ +     * This member function is invoked when the event is processed
+ +     * (occurs).  There is no default implementation; each subclass
+ +     * must provide its own implementation.  The event is not
+ +     * automatically deleted after it is processed (to allow for
+ +     * statically allocated event objects).
+ +     *
+ +     * If the AutoDestroy flag is set, the object is deleted once it
+ +     * is processed.
+ +     */
+ +    virtual void process() = 0;
+ +
+ +    void annotate(unsigned value) { annotated_value = value; };
+ +    unsigned annotation() { return annotated_value; }
+ +
+ +    /// Squash the current event
+ +    void squash() { setFlags(Squashed); }
+ +
+ +    /// Check whether the event is squashed
+ +    bool squashed() { return getFlags(Squashed); }
+ +
+ +    /// See if this is a SimExitEvent (without resorting to RTTI)
+ +    bool isExitEvent() { return getFlags(IsExitEvent); }
+ +
+ +    /// Get the time that the event is scheduled
+ +    Tick when() const { return _when; }
+ +
+ +    /// Get the event priority
+ +    int priority() const { return _priority; }
+ +
+ +    struct priority_compare :
+ +    public std::binary_function<Event *, Event *, bool>
+ +    {
+ +        bool operator()(const Event *l, const Event *r) const {
+ +            return l->when() >= r->when() || l->priority() >= r->priority();
+ +        }
+ +    };
+ +
+ +    virtual void serialize(std::ostream &os);
+ +    virtual void unserialize(Checkpoint *cp, const std::string &section);
+ +};
+ +
+ +template <class T, void (T::* F)()>
+ +void
+ +DelayFunction(Tick when, T *object)
+ +{
+ +    class DelayEvent : public Event
+ +    {
+ +      private:
+ +        T *object;
+ +
+ +      public:
+ +        DelayEvent(Tick when, T *o)
+ +            : Event(&mainEventQueue), object(o)
+ +            { setFlags(this->AutoDestroy); schedule(when); }
+ +        void process() { (object->*F)(); }
+ +        const char *description() { return "delay"; }
+ +    };
+ +
+ +    new DelayEvent(when, object);
+ +}
+ +
+ +template <class T, void (T::* F)()>
+ +class EventWrapper : public Event
+ +{
+ +  private:
+ +    T *object;
+ +
+ +  public:
+ +    EventWrapper(T *obj, bool del = false, EventQueue *q = &mainEventQueue,
+ +                 Priority p = Default_Pri)
+ +        : Event(q, p), object(obj)
+ +    {
+ +        if (del)
+ +            setFlags(AutoDelete);
+ +    }
+ +    void process() { (object->*F)(); }
+ +};
+ +
+ +/*
+ + * Queue of events sorted in time order
+ + */
+ +class EventQueue : public Serializable
+ +{
+ +  protected:
+ +    std::string objName;
+ +
+ +  private:
+ +    Event *head;
+ +
+ +    void insert(Event *event);
+ +    void remove(Event *event);
+ +
+ +  public:
+ +
+ +    // constructor
+ +    EventQueue(const std::string &n)
+ +        : objName(n), head(NULL)
+ +    {}
+ +
+ +    virtual const std::string name() const { return objName; }
+ +
+ +    // schedule the given event on this queue
+ +    void schedule(Event *ev);
+ +    void deschedule(Event *ev);
+ +    void reschedule(Event *ev);
+ +
+ +    Tick nextTick() { return head->when(); }
+ +    Event *serviceOne();
+ +
+ +    // process all events up to the given timestamp.  we inline a
+ +    // quick test to see if there are any events to process; if so,
+ +    // call the internal out-of-line version to process them all.
+ +    void serviceEvents(Tick when) {
+ +        while (!empty()) {
+ +            if (nextTick() > when)
+ +                break;
+ +
+ +            /**
+ +             * @todo this assert is a good bug catcher.  I need to
+ +             * make it true again.
+ +             */
+ +            //assert(head->when() >= when && "event scheduled in the past");
+ +            serviceOne();
+ +        }
+ +    }
+ +
+ +    // default: process all events up to 'now' (curTick)
+ +    void serviceEvents() { serviceEvents(curTick); }
+ +
+ +    // return true if no events are queued
+ +    bool empty() { return head == NULL; }
+ +
+ +    void dump();
+ +
+ +    Tick nextEventTime() { return empty() ? curTick : head->when(); }
+ +
+ +    virtual void serialize(std::ostream &os);
+ +    virtual void unserialize(Checkpoint *cp, const std::string &section);
+ +};
+ +
+ +
+ +//////////////////////
+ +//
+ +// inline functions
+ +//
+ +// can't put these inside declaration due to circular dependence
+ +// between Event and EventQueue classes.
+ +//
+ +//////////////////////
+ +
+ +// schedule at specified time (place on event queue specified via
+ +// constructor)
+ +inline void
+ +Event::schedule(Tick t)
+ +{
+ +    assert(!scheduled());
++//    if (t < curTick)
++//        warn("t is less than curTick, ensure you don't want cycles");
+ +
+ +    setFlags(Scheduled);
+ +#if TRACING_ON
+ +    when_scheduled = curTick;
+ +#endif
+ +    _when = t;
+ +    queue->schedule(this);
+ +}
+ +
+ +inline void
+ +Event::deschedule()
+ +{
+ +    assert(scheduled());
+ +
+ +    clearFlags(Squashed);
+ +    clearFlags(Scheduled);
+ +    queue->deschedule(this);
+ +}
+ +
+ +inline void
+ +Event::reschedule(Tick t)
+ +{
+ +    assert(scheduled());
+ +    clearFlags(Squashed);
+ +
+ +#if TRACING_ON
+ +    when_scheduled = curTick;
+ +#endif
+ +    _when = t;
+ +    queue->reschedule(this);
+ +}
+ +
+ +inline void
+ +EventQueue::schedule(Event *event)
+ +{
+ +    insert(event);
+ +    if (DTRACE(Event))
+ +        event->trace("scheduled");
+ +}
+ +
+ +inline void
+ +EventQueue::deschedule(Event *event)
+ +{
+ +    remove(event);
+ +    if (DTRACE(Event))
+ +        event->trace("descheduled");
+ +}
+ +
+ +inline void
+ +EventQueue::reschedule(Event *event)
+ +{
+ +    remove(event);
+ +    insert(event);
+ +    if (DTRACE(Event))
+ +        event->trace("rescheduled");
+ +}
+ +
+ +
+ +
+ +#endif // __SIM_EVENTQ_HH__
diff --cc src/sim/pseudo_inst.cc

index bd26e9dc52baa4a9e1e5c9c54c2b8a3e9569b281,0000000000000000000000000000000000000000..aae2f60216d8c4d66cd811bb24c1b56e59864863

mode 100644,000000..100644
--- 1/src/sim/pseudo_inst.cc
--- /dev/null
+++ b/src/sim/pseudo_inst.cc
@@@ -1,299 -1,0 +1,347 @@@
+ +/*
+ + * Copyright (c) 2003-2006 The Regents of The University of Michigan
+ + * All rights reserved.
+ + *
+ + * Redistribution and use in source and binary forms, with or without
+ + * modification, are permitted provided that the following conditions are
+ + * met: redistributions of source code must retain the above copyright
+ + * notice, this list of conditions and the following disclaimer;
+ + * redistributions in binary form must reproduce the above copyright
+ + * notice, this list of conditions and the following disclaimer in the
+ + * documentation and/or other materials provided with the distribution;
+ + * neither the name of the copyright holders nor the names of its
+ + * contributors may be used to endorse or promote products derived from
+ + * this software without specific prior written permission.
+ + *
+ + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ + *
+ + * Authors: Nathan Binkert
+ + */
+ +
+ +#include <errno.h>
+ +#include <fcntl.h>
+ +#include <unistd.h>
+ +
+ +#include <string>
+ +
+ +#include "sim/pseudo_inst.hh"
+ +#include "arch/vtophys.hh"
+ +#include "base/annotate.hh"
+ +#include "cpu/base.hh"
+ +#include "cpu/thread_context.hh"
+ +#include "cpu/quiesce_event.hh"
+ +#include "kern/kernel_stats.hh"
+ +#include "sim/param.hh"
+ +#include "sim/serialize.hh"
+ +#include "sim/sim_exit.hh"
+ +#include "sim/stat_control.hh"
+ +#include "sim/stats.hh"
+ +#include "sim/system.hh"
+ +#include "sim/debug.hh"
+ +#include "sim/vptr.hh"
+ +
+ +using namespace std;
+ +
+ +using namespace Stats;
+ +using namespace TheISA;
+ +
+ +namespace AlphaPseudo
+ +{
+ +    bool doStatisticsInsts;
+ +    bool doCheckpointInsts;
+ +    bool doQuiesce;
+ +
+ +    void
+ +    arm(ThreadContext *tc)
+ +    {
+ +        if (tc->getKernelStats())
+ +            tc->getKernelStats()->arm();
+ +    }
+ +
+ +    void
+ +    quiesce(ThreadContext *tc)
+ +    {
+ +        if (!doQuiesce)
+ +            return;
+ +
+ +        tc->suspend();
+ +        if (tc->getKernelStats())
+ +            tc->getKernelStats()->quiesce();
+ +    }
+ +
+ +    void
+ +    quiesceNs(ThreadContext *tc, uint64_t ns)
+ +    {
+ +        if (!doQuiesce || ns == 0)
+ +            return;
+ +
+ +        EndQuiesceEvent *quiesceEvent = tc->getQuiesceEvent();
+ +
+ +        if (quiesceEvent->scheduled())
+ +            quiesceEvent->reschedule(curTick + Clock::Int::ns * ns);
+ +        else
+ +            quiesceEvent->schedule(curTick + Clock::Int::ns * ns);
+ +
+ +        tc->suspend();
+ +        if (tc->getKernelStats())
+ +            tc->getKernelStats()->quiesce();
+ +    }
+ +
+ +    void
+ +    quiesceCycles(ThreadContext *tc, uint64_t cycles)
+ +    {
+ +        if (!doQuiesce || cycles == 0)
+ +            return;
+ +
+ +        EndQuiesceEvent *quiesceEvent = tc->getQuiesceEvent();
+ +
+ +        if (quiesceEvent->scheduled())
+ +            quiesceEvent->reschedule(curTick +
+ +                                     tc->getCpuPtr()->cycles(cycles));
+ +        else
+ +            quiesceEvent->schedule(curTick +
+ +                                   tc->getCpuPtr()->cycles(cycles));
+ +
+ +        tc->suspend();
+ +        if (tc->getKernelStats())
+ +            tc->getKernelStats()->quiesce();
+ +    }
+ +
+ +    uint64_t
+ +    quiesceTime(ThreadContext *tc)
+ +    {
+ +        return (tc->readLastActivate() - tc->readLastSuspend()) / Clock::Int::ns;
+ +    }
+ +
+ +    void
+ +    ivlb(ThreadContext *tc)
+ +    {
+ +        if (tc->getKernelStats())
+ +            tc->getKernelStats()->ivlb();
+ +    }
+ +
+ +    void
+ +    ivle(ThreadContext *tc)
+ +    {
+ +    }
+ +
+ +    void
+ +    m5exit_old(ThreadContext *tc)
+ +    {
+ +        exitSimLoop(curTick, "m5_exit_old instruction encountered");
+ +    }
+ +
+ +    void
+ +    m5exit(ThreadContext *tc, Tick delay)
+ +    {
+ +        Tick when = curTick + delay * Clock::Int::ns;
+ +        exitSimLoop(when, "m5_exit instruction encountered");
+ +    }
+ +
++    void
++    loadsymbol(ExecContext *xc)
++    {
++        const string &filename = xc->getCpuPtr()->system->params()->symbolfile;
++        if (filename.empty()) {
++            return;
++        }
++
++        std::string buffer;
++        ifstream file(filename.c_str());
++
++        if (!file)
++            fatal("file error: Can't open symbol table file %s\n", filename);
++
++        while (!file.eof()) {
++            getline(file, buffer);
++
++            if (buffer.empty())
++                continue;
++
++            int idx = buffer.find(' ');
++            if (idx == string::npos)
++                continue;
++
++            string address = "0x" + buffer.substr(0, idx);
++            eat_white(address);
++            if (address.empty())
++                continue;
++
++            // Skip over letter and space
++            string symbol = buffer.substr(idx + 3);
++            eat_white(symbol);
++            if (symbol.empty())
++                continue;
++
++            Addr addr;
++            if (!to_number(address, addr))
++                continue;
++
++            if (!xc->getSystemPtr()->kernelSymtab->insert(addr, symbol))
++                continue;
++
++
++            DPRINTF(Loader, "Loaded symbol: %s @ %#llx\n", symbol, addr);
++        }
++        file.close();
++    }
++
+ +    void
+ +    resetstats(ThreadContext *tc, Tick delay, Tick period)
+ +    {
+ +        if (!doStatisticsInsts)
+ +            return;
+ +
+ +
+ +        Tick when = curTick + delay * Clock::Int::ns;
+ +        Tick repeat = period * Clock::Int::ns;
+ +
+ +        using namespace Stats;
+ +        SetupEvent(Reset, when, repeat);
+ +    }
+ +
+ +    void
+ +    dumpstats(ThreadContext *tc, Tick delay, Tick period)
+ +    {
+ +        if (!doStatisticsInsts)
+ +            return;
+ +
+ +
+ +        Tick when = curTick + delay * Clock::Int::ns;
+ +        Tick repeat = period * Clock::Int::ns;
+ +
+ +        using namespace Stats;
+ +        SetupEvent(Dump, when, repeat);
+ +    }
+ +
+ +    void
+ +    addsymbol(ThreadContext *tc, Addr addr, Addr symbolAddr)
+ +    {
+ +        char symb[100];
+ +        CopyStringOut(tc, symb, symbolAddr, 100);
+ +        std::string symbol(symb);
+ +
+ +        DPRINTF(Loader, "Loaded symbol: %s @ %#llx\n", symbol, addr);
+ +
+ +        tc->getSystemPtr()->kernelSymtab->insert(addr,symbol);
+ +    }
+ +
+ +    void
+ +    anBegin(ThreadContext *tc, uint64_t cur)
+ +    {
+ +        Annotate::annotations.add(tc->getSystemPtr(), 0, cur >> 32, cur &
+ +                0xFFFFFFFF, 0,0);
+ +    }
+ +
+ +    void
+ +    anWait(ThreadContext *tc, uint64_t cur, uint64_t wait)
+ +    {
+ +        Annotate::annotations.add(tc->getSystemPtr(), 0, cur >> 32, cur &
+ +                0xFFFFFFFF, wait >> 32, wait & 0xFFFFFFFF);
+ +    }
+ +
+ +
+ +    void
+ +    dumpresetstats(ThreadContext *tc, Tick delay, Tick period)
+ +    {
+ +        if (!doStatisticsInsts)
+ +            return;
+ +
+ +
+ +        Tick when = curTick + delay * Clock::Int::ns;
+ +        Tick repeat = period * Clock::Int::ns;
+ +
+ +        using namespace Stats;
+ +        SetupEvent(Dump|Reset, when, repeat);
+ +    }
+ +
+ +    void
+ +    m5checkpoint(ThreadContext *tc, Tick delay, Tick period)
+ +    {
+ +        if (!doCheckpointInsts)
+ +            return;
+ +        exitSimLoop("checkpoint");
+ +    }
+ +
+ +    uint64_t
+ +    readfile(ThreadContext *tc, Addr vaddr, uint64_t len, uint64_t offset)
+ +    {
+ +        const string &file = tc->getCpuPtr()->system->params()->readfile;
+ +        if (file.empty()) {
+ +            return ULL(0);
+ +        }
+ +
+ +        uint64_t result = 0;
+ +
+ +        int fd = ::open(file.c_str(), O_RDONLY, 0);
+ +        if (fd < 0)
+ +            panic("could not open file %s\n", file);
+ +
+ +        if (::lseek(fd, offset, SEEK_SET) < 0)
+ +            panic("could not seek: %s", strerror(errno));
+ +
+ +        char *buf = new char[len];
+ +        char *p = buf;
+ +        while (len > 0) {
+ +            int bytes = ::read(fd, p, len);
+ +            if (bytes <= 0)
+ +                break;
+ +
+ +            p += bytes;
+ +            result += bytes;
+ +            len -= bytes;
+ +        }
+ +
+ +        close(fd);
+ +        CopyIn(tc, vaddr, buf, result);
+ +        delete [] buf;
+ +        return result;
+ +    }
+ +
+ +    class Context : public ParamContext
+ +    {
+ +      public:
+ +        Context(const string &section) : ParamContext(section) {}
+ +        void checkParams();
+ +    };
+ +
+ +    Context context("pseudo_inst");
+ +
+ +    Param<bool> __quiesce(&context, "quiesce",
+ +                          "enable quiesce instructions",
+ +                          true);
+ +    Param<bool> __statistics(&context, "statistics",
+ +                             "enable statistics pseudo instructions",
+ +                             true);
+ +    Param<bool> __checkpoint(&context, "checkpoint",
+ +                             "enable checkpoint pseudo instructions",
+ +                             true);
+ +
+ +    void
+ +    Context::checkParams()
+ +    {
+ +        doQuiesce = __quiesce;
+ +        doStatisticsInsts = __statistics;
+ +        doCheckpointInsts = __checkpoint;
+ +    }
+ +
+ +    void debugbreak(ThreadContext *tc)
+ +    {
+ +        debug_break();
+ +    }
+ +
+ +    void switchcpu(ThreadContext *tc)
+ +    {
+ +        exitSimLoop("switchcpu");
+ +    }
+ +}
diff --cc src/sim/pseudo_inst.hh

index da2fb4ee3f99ae09b9a8ce115d600fcb18badc36,0000000000000000000000000000000000000000..d211de44e28b3f6d3614516b00d4988ff6aedd57

mode 100644,000000..100644
--- 1/src/sim/pseudo_inst.hh
--- /dev/null
+++ b/src/sim/pseudo_inst.hh
@@@ -1,64 -1,0 +1,65 @@@
+ +/*
+ + * Copyright (c) 2003-2006 The Regents of The University of Michigan
+ + * All rights reserved.
+ + *
+ + * Redistribution and use in source and binary forms, with or without
+ + * modification, are permitted provided that the following conditions are
+ + * met: redistributions of source code must retain the above copyright
+ + * notice, this list of conditions and the following disclaimer;
+ + * redistributions in binary form must reproduce the above copyright
+ + * notice, this list of conditions and the following disclaimer in the
+ + * documentation and/or other materials provided with the distribution;
+ + * neither the name of the copyright holders nor the names of its
+ + * contributors may be used to endorse or promote products derived from
+ + * this software without specific prior written permission.
+ + *
+ + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ + *
+ + * Authors: Nathan Binkert
+ + */
+ +
+ +class ThreadContext;
+ +
+ +//We need the "Tick" and "Addr" data types from here
+ +#include "sim/host.hh"
+ +
+ +namespace AlphaPseudo
+ +{
+ +    /**
+ +     * @todo these externs are only here for a hack in fullCPU::takeOver...
+ +     */
+ +    extern bool doStatisticsInsts;
+ +    extern bool doCheckpointInsts;
+ +    extern bool doQuiesce;
+ +
+ +    void arm(ThreadContext *tc);
+ +    void quiesce(ThreadContext *tc);
+ +    void quiesceNs(ThreadContext *tc, uint64_t ns);
+ +    void quiesceCycles(ThreadContext *tc, uint64_t cycles);
+ +    uint64_t quiesceTime(ThreadContext *tc);
+ +    void ivlb(ThreadContext *tc);
+ +    void ivle(ThreadContext *tc);
+ +    void m5exit(ThreadContext *tc, Tick delay);
+ +    void m5exit_old(ThreadContext *tc);
++    void loadsymbol(ThreadContext *xc);
+ +    void resetstats(ThreadContext *tc, Tick delay, Tick period);
+ +    void dumpstats(ThreadContext *tc, Tick delay, Tick period);
+ +    void dumpresetstats(ThreadContext *tc, Tick delay, Tick period);
+ +    void m5checkpoint(ThreadContext *tc, Tick delay, Tick period);
+ +    uint64_t readfile(ThreadContext *tc, Addr vaddr, uint64_t len, uint64_t offset);
+ +    void debugbreak(ThreadContext *tc);
+ +    void switchcpu(ThreadContext *tc);
+ +    void addsymbol(ThreadContext *tc, Addr addr, Addr symbolAddr);
+ +    void anBegin(ThreadContext *tc, uint64_t cur);
+ +    void anWait(ThreadContext *tc, uint64_t cur, uint64_t wait);
+ +}
diff --cc src/sim/serialize.cc

index 6a1d084b7b4e7d25540f6ad35581a0ffe3e74076,0000000000000000000000000000000000000000..941f0b1c6f9ef9737990e9b5efc196b3ef87b7cb

mode 100644,000000..100644
--- 1/src/sim/serialize.cc
--- /dev/null
+++ b/src/sim/serialize.cc
@@@ -1,406 -1,0 +1,442 @@@
+ +/*
+ + * Copyright (c) 2002-2005 The Regents of The University of Michigan
+ + * All rights reserved.
+ + *
+ + * Redistribution and use in source and binary forms, with or without
+ + * modification, are permitted provided that the following conditions are
+ + * met: redistributions of source code must retain the above copyright
+ + * notice, this list of conditions and the following disclaimer;
+ + * redistributions in binary form must reproduce the above copyright
+ + * notice, this list of conditions and the following disclaimer in the
+ + * documentation and/or other materials provided with the distribution;
+ + * neither the name of the copyright holders nor the names of its
+ + * contributors may be used to endorse or promote products derived from
+ + * this software without specific prior written permission.
+ + *
+ + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ + *
+ + * Authors: Nathan Binkert
+ + *          Erik Hallnor
+ + *          Steve Reinhardt
+ + */
+ +
+ +#include <sys/time.h>
+ +#include <sys/types.h>
+ +#include <sys/stat.h>
+ +#include <errno.h>
+ +
+ +#include <fstream>
+ +#include <list>
+ +#include <string>
+ +#include <vector>
+ +
+ +#include "base/inifile.hh"
+ +#include "base/misc.hh"
+ +#include "base/output.hh"
+ +#include "base/str.hh"
+ +#include "base/trace.hh"
+ +#include "sim/eventq.hh"
+ +#include "sim/param.hh"
+ +#include "sim/serialize.hh"
+ +#include "sim/sim_events.hh"
+ +#include "sim/sim_exit.hh"
+ +#include "sim/sim_object.hh"
+ +
++// For stat reset hack
++#include "sim/stat_control.hh"
++
+ +using namespace std;
+ +
+ +int Serializable::ckptMaxCount = 0;
+ +int Serializable::ckptCount = 0;
+ +int Serializable::ckptPrevCount = -1;
+ +
+ +void
+ +Serializable::nameOut(ostream &os)
+ +{
+ +    os << "\n[" << name() << "]\n";
+ +}
+ +
+ +void
+ +Serializable::nameOut(ostream &os, const string &_name)
+ +{
+ +    os << "\n[" << _name << "]\n";
+ +}
+ +
+ +template <class T>
+ +void
+ +paramOut(ostream &os, const std::string &name, const T &param)
+ +{
+ +    os << name << "=";
+ +    showParam(os, param);
+ +    os << "\n";
+ +}
+ +
+ +
+ +template <class T>
+ +void
+ +paramIn(Checkpoint *cp, const std::string &section,
+ +        const std::string &name, T &param)
+ +{
+ +    std::string str;
+ +    if (!cp->find(section, name, str) || !parseParam(str, param)) {
+ +        fatal("Can't unserialize '%s:%s'\n", section, name);
+ +    }
+ +}
+ +
+ +
+ +template <class T>
+ +void
+ +arrayParamOut(ostream &os, const std::string &name,
+ +              const T *param, int size)
+ +{
+ +    os << name << "=";
+ +    if (size > 0)
+ +        showParam(os, param[0]);
+ +    for (int i = 1; i < size; ++i) {
+ +        os << " ";
+ +        showParam(os, param[i]);
+ +    }
+ +    os << "\n";
+ +}
+ +
+ +
+ +template <class T>
+ +void
+ +arrayParamIn(Checkpoint *cp, const std::string &section,
+ +             const std::string &name, T *param, int size)
+ +{
+ +    std::string str;
+ +    if (!cp->find(section, name, str)) {
+ +        fatal("Can't unserialize '%s:%s'\n", section, name);
+ +    }
+ +
+ +    // code below stolen from VectorParam<T>::parse().
+ +    // it would be nice to unify these somehow...
+ +
+ +    vector<string> tokens;
+ +
+ +    tokenize(tokens, str, ' ');
+ +
+ +    // Need this if we were doing a vector
+ +    // value.resize(tokens.size());
+ +
+ +    if (tokens.size() != size) {
+ +        fatal("Array size mismatch on %s:%s'\n", section, name);
+ +    }
+ +
+ +    for (int i = 0; i < tokens.size(); i++) {
+ +        // need to parse into local variable to handle vector<bool>,
+ +        // for which operator[] returns a special reference class
+ +        // that's not the same as 'bool&', (since it's a packed
+ +        // vector)
+ +        T scalar_value;
+ +        if (!parseParam(tokens[i], scalar_value)) {
+ +            string err("could not parse \"");
+ +
+ +            err += str;
+ +            err += "\"";
+ +
+ +            fatal(err);
+ +        }
+ +
+ +        // assign parsed value to vector
+ +        param[i] = scalar_value;
+ +    }
+ +}
+ +
+ +
+ +void
+ +objParamIn(Checkpoint *cp, const std::string &section,
+ +           const std::string &name, Serializable * &param)
+ +{
+ +    if (!cp->findObj(section, name, param)) {
+ +        fatal("Can't unserialize '%s:%s'\n", section, name);
+ +    }
+ +}
+ +
+ +
+ +#define INSTANTIATE_PARAM_TEMPLATES(type)                             \
+ +template void                                                         \
+ +paramOut(ostream &os, const std::string &name, type const &param);    \
+ +template void                                                         \
+ +paramIn(Checkpoint *cp, const std::string &section,                   \
+ +        const std::string &name, type & param);                               \
+ +template void                                                         \
+ +arrayParamOut(ostream &os, const std::string &name,                   \
+ +              type const *param, int size);                           \
+ +template void                                                         \
+ +arrayParamIn(Checkpoint *cp, const std::string &section,              \
+ +             const std::string &name, type *param, int size);
+ +
+ +INSTANTIATE_PARAM_TEMPLATES(signed char)
+ +INSTANTIATE_PARAM_TEMPLATES(unsigned char)
+ +INSTANTIATE_PARAM_TEMPLATES(signed short)
+ +INSTANTIATE_PARAM_TEMPLATES(unsigned short)
+ +INSTANTIATE_PARAM_TEMPLATES(signed int)
+ +INSTANTIATE_PARAM_TEMPLATES(unsigned int)
+ +INSTANTIATE_PARAM_TEMPLATES(signed long)
+ +INSTANTIATE_PARAM_TEMPLATES(unsigned long)
+ +INSTANTIATE_PARAM_TEMPLATES(signed long long)
+ +INSTANTIATE_PARAM_TEMPLATES(unsigned long long)
+ +INSTANTIATE_PARAM_TEMPLATES(bool)
+ +INSTANTIATE_PARAM_TEMPLATES(string)
+ +
+ +
+ +/////////////////////////////
+ +
+ +/// Container for serializing global variables (not associated with
+ +/// any serialized object).
+ +class Globals : public Serializable
+ +{
+ +  public:
+ +    const string name() const;
+ +    void serialize(ostream &os);
+ +    void unserialize(Checkpoint *cp);
+ +};
+ +
+ +/// The one and only instance of the Globals class.
+ +Globals globals;
+ +
+ +const string
+ +Globals::name() const
+ +{
+ +    return "Globals";
+ +}
+ +
+ +void
+ +Globals::serialize(ostream &os)
+ +{
+ +    nameOut(os);
+ +    SERIALIZE_SCALAR(curTick);
+ +
+ +    nameOut(os, "MainEventQueue");
+ +    mainEventQueue.serialize(os);
+ +}
+ +
+ +void
+ +Globals::unserialize(Checkpoint *cp)
+ +{
+ +    const string &section = name();
+ +    UNSERIALIZE_SCALAR(curTick);
+ +
+ +    mainEventQueue.unserialize(cp, "MainEventQueue");
+ +}
+ +
+ +void
+ +Serializable::serializeAll(const std::string &cpt_dir)
+ +{
+ +    setCheckpointDir(cpt_dir);
+ +    string dir = Checkpoint::dir();
+ +    if (mkdir(dir.c_str(), 0775) == -1 && errno != EEXIST)
+ +            fatal("couldn't mkdir %s\n", dir);
+ +
+ +    string cpt_file = dir + Checkpoint::baseFilename;
+ +    ofstream outstream(cpt_file.c_str());
+ +    time_t t = time(NULL);
+ +    outstream << "// checkpoint generated: " << ctime(&t);
+ +
+ +    globals.serialize(outstream);
+ +    SimObject::serializeAll(outstream);
+ +}
+ +
+ +void
+ +Serializable::unserializeAll(const std::string &cpt_dir)
+ +{
+ +    setCheckpointDir(cpt_dir);
+ +    string dir = Checkpoint::dir();
+ +    string cpt_file = dir + Checkpoint::baseFilename;
+ +    string section = "";
+ +
+ +    DPRINTFR(Config, "Loading checkpoint dir '%s'\n",
+ +             dir);
+ +    Checkpoint *cp = new Checkpoint(dir, section);
+ +    unserializeGlobals(cp);
+ +
+ +    SimObject::unserializeAll(cp);
+ +}
+ +
+ +void
+ +Serializable::unserializeGlobals(Checkpoint *cp)
+ +{
+ +    globals.unserialize(cp);
+ +}
+ +
+ +const char *Checkpoint::baseFilename = "m5.cpt";
+ +
+ +static string checkpointDirBase;
+ +
+ +void
+ +setCheckpointDir(const std::string &name)
+ +{
+ +    checkpointDirBase = name;
+ +    if (checkpointDirBase[checkpointDirBase.size() - 1] != '/')
+ +        checkpointDirBase += "/";
+ +}
+ +
+ +string
+ +Checkpoint::dir()
+ +{
+ +    // use csprintf to insert curTick into directory name if it
+ +    // appears to have a format placeholder in it.
+ +    return (checkpointDirBase.find("%") != string::npos) ?
+ +        csprintf(checkpointDirBase, curTick) : checkpointDirBase;
+ +}
+ +
+ +void
+ +debug_serialize(const std::string &cpt_dir)
+ +{
+ +    Serializable::serializeAll(cpt_dir);
+ +}
+ +
+ +
+ +////////////////////////////////////////////////////////////////////////
+ +//
+ +// SerializableClass member definitions
+ +//
+ +////////////////////////////////////////////////////////////////////////
+ +
+ +// Map of class names to SerializableBuilder creation functions.
+ +// Need to make this a pointer so we can force initialization on the
+ +// first reference; otherwise, some SerializableClass constructors
+ +// may be invoked before the classMap constructor.
+ +map<string,SerializableClass::CreateFunc> *SerializableClass::classMap = 0;
+ +
+ +// SerializableClass constructor: add mapping to classMap
+ +SerializableClass::SerializableClass(const string &className,
+ +                                       CreateFunc createFunc)
+ +{
+ +    if (classMap == NULL)
+ +        classMap = new map<string,SerializableClass::CreateFunc>();
+ +
+ +    if ((*classMap)[className])
+ +    {
+ +        cerr << "Error: simulation object class " << className << " redefined"
+ +             << endl;
+ +        fatal("");
+ +    }
+ +
+ +    // add className --> createFunc to class map
+ +    (*classMap)[className] = createFunc;
+ +}
+ +
+ +
+ +//
+ +//
+ +Serializable *
+ +SerializableClass::createObject(Checkpoint *cp,
+ +                                 const std::string &section)
+ +{
+ +    string className;
+ +
+ +    if (!cp->find(section, "type", className)) {
+ +        fatal("Serializable::create: no 'type' entry in section '%s'.\n",
+ +              section);
+ +    }
+ +
+ +    CreateFunc createFunc = (*classMap)[className];
+ +
+ +    if (createFunc == NULL) {
+ +        fatal("Serializable::create: no create function for class '%s'.\n",
+ +              className);
+ +    }
+ +
+ +    Serializable *object = createFunc(cp, section);
+ +
+ +    assert(object != NULL);
+ +
+ +    return object;
+ +}
+ +
+ +
+ +Serializable *
+ +Serializable::create(Checkpoint *cp, const std::string &section)
+ +{
+ +    Serializable *object = SerializableClass::createObject(cp, section);
+ +    object->unserialize(cp, section);
+ +    return object;
+ +}
+ +
+ +
+ +Checkpoint::Checkpoint(const std::string &cpt_dir, const std::string &path)
+ +    : db(new IniFile), basePath(path), cptDir(cpt_dir)
+ +{
+ +    string filename = cpt_dir + "/" + Checkpoint::baseFilename;
+ +    if (!db->load(filename)) {
+ +        fatal("Can't load checkpoint file '%s'\n", filename);
+ +    }
+ +}
+ +
+ +
+ +bool
+ +Checkpoint::find(const std::string &section, const std::string &entry,
+ +                 std::string &value)
+ +{
+ +    return db->find(section, entry, value);
+ +}
+ +
+ +
+ +bool
+ +Checkpoint::findObj(const std::string &section, const std::string &entry,
+ +                    Serializable *&value)
+ +{
+ +    string path;
+ +
+ +    if (!db->find(section, entry, path))
+ +        return false;
+ +
+ +    if ((value = objMap[path]) != NULL)
+ +        return true;
+ +
+ +    return false;
+ +}
+ +
+ +
+ +bool
+ +Checkpoint::sectionExists(const std::string &section)
+ +{
+ +    return db->sectionExists(section);
+ +}
++
++/** Hacked stat reset event */
++
++class StatresetParamContext : public ParamContext
++{
++  public:
++    StatresetParamContext(const string &section);
++    ~StatresetParamContext();
++    void startup();
++};
++
++StatresetParamContext statParams("statsreset");
++
++Param<Tick> reset_cycle(&statParams, "reset_cycle",
++                        "Cycle to reset stats on", 0);
++
++StatresetParamContext::StatresetParamContext(const string &section)
++    : ParamContext(section)
++{ }
++
++StatresetParamContext::~StatresetParamContext()
++{
++}
++
++void
++StatresetParamContext::startup()
++{
++    if (reset_cycle > 0) {
++        Stats::SetupEvent(Stats::Reset, curTick + reset_cycle, 0);
++        cprintf("Stats reset event scheduled for %lli\n",
++                curTick + reset_cycle);
++    }
++}
diff --cc src/sim/stat_control.cc

index 041830ab7e726888c12f0e5fcbce52ffae5f4120,0000000000000000000000000000000000000000..dfed2a0c8c7caa4bac4f4e65fe06d40b5358bc9c

mode 100644,000000..100644
--- 1/src/sim/stat_control.cc
--- /dev/null
+++ b/src/sim/stat_control.cc
@@@ -1,228 -1,0 +1,233 @@@
-     StatEvent(int _flags, Tick _when, Tick _repeat);
+ +/*
+ + * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ + * All rights reserved.
+ + *
+ + * Redistribution and use in source and binary forms, with or without
+ + * modification, are permitted provided that the following conditions are
+ + * met: redistributions of source code must retain the above copyright
+ + * notice, this list of conditions and the following disclaimer;
+ + * redistributions in binary form must reproduce the above copyright
+ + * notice, this list of conditions and the following disclaimer in the
+ + * documentation and/or other materials provided with the distribution;
+ + * neither the name of the copyright holders nor the names of its
+ + * contributors may be used to endorse or promote products derived from
+ + * this software without specific prior written permission.
+ + *
+ + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ + *
+ + * Authors: Nathan Binkert
+ + */
+ +
+ +// This file will contain default statistics for the simulator that
+ +// don't really belong to a specific simulator object
+ +
+ +#include <fstream>
+ +#include <iostream>
+ +#include <list>
+ +
+ +#include "base/callback.hh"
+ +#include "base/hostinfo.hh"
+ +#include "base/statistics.hh"
+ +#include "base/str.hh"
+ +#include "base/time.hh"
+ +#include "base/stats/output.hh"
+ +#include "cpu/base.hh"
+ +#include "sim/eventq.hh"
+ +#include "sim/sim_object.hh"
+ +#include "sim/stat_control.hh"
+ +#include "sim/root.hh"
+ +
+ +using namespace std;
+ +
+ +Stats::Formula hostInstRate;
+ +Stats::Formula hostTickRate;
+ +Stats::Value hostMemory;
+ +Stats::Value hostSeconds;
+ +
+ +Stats::Value simTicks;
+ +Stats::Value simInsts;
+ +Stats::Value simFreq;
+ +Stats::Formula simSeconds;
+ +
+ +namespace Stats {
+ +
+ +Time statTime(true);
+ +Tick startTick;
+ +Tick lastDump(0);
+ +
+ +class SimTicksReset : public Callback
+ +{
+ +  public:
+ +    void process()
+ +    {
+ +        statTime.set();
+ +        startTick = curTick;
+ +    }
+ +};
+ +
+ +double
+ +statElapsedTime()
+ +{
+ +    Time now(true);
+ +    Time elapsed = now - statTime;
+ +    return elapsed();
+ +}
+ +
+ +Tick
+ +statElapsedTicks()
+ +{
+ +    return curTick - startTick;
+ +}
+ +
+ +SimTicksReset simTicksReset;
+ +
+ +void
+ +InitSimStats()
+ +{
+ +    simInsts
+ +        .functor(BaseCPU::numSimulatedInstructions)
+ +        .name("sim_insts")
+ +        .desc("Number of instructions simulated")
+ +        .precision(0)
+ +        .prereq(simInsts)
+ +        ;
+ +
+ +    simSeconds
+ +        .name("sim_seconds")
+ +        .desc("Number of seconds simulated")
+ +        ;
+ +
+ +    simFreq
+ +        .scalar(Clock::Frequency)
+ +        .name("sim_freq")
+ +        .desc("Frequency of simulated ticks")
+ +        ;
+ +
+ +    simTicks
+ +        .functor(statElapsedTicks)
+ +        .name("sim_ticks")
+ +        .desc("Number of ticks simulated")
+ +        ;
+ +
+ +    hostInstRate
+ +        .name("host_inst_rate")
+ +        .desc("Simulator instruction rate (inst/s)")
+ +        .precision(0)
+ +        .prereq(simInsts)
+ +        ;
+ +
+ +    hostMemory
+ +        .functor(memUsage)
+ +        .name("host_mem_usage")
+ +        .desc("Number of bytes of host memory used")
+ +        .prereq(hostMemory)
+ +        ;
+ +
+ +    hostSeconds
+ +        .functor(statElapsedTime)
+ +        .name("host_seconds")
+ +        .desc("Real time elapsed on the host")
+ +        .precision(2)
+ +        ;
+ +
+ +    hostTickRate
+ +        .name("host_tick_rate")
+ +        .desc("Simulator tick rate (ticks/s)")
+ +        .precision(0)
+ +        ;
+ +
+ +    simSeconds = simTicks / simFreq;
+ +    hostInstRate = simInsts / hostSeconds;
+ +    hostTickRate = simTicks / hostSeconds;
+ +
+ +    registerResetCallback(&simTicksReset);
+ +}
+ +
+ +class StatEvent : public Event
+ +{
+ +  protected:
+ +    int flags;
+ +    Tick repeat;
+ +
+ +  public:
- StatEvent::StatEvent(int _flags, Tick _when, Tick _repeat)
-     : Event(&mainEventQueue, Stat_Event_Pri),
++    StatEvent(EventQueue *queue, int _flags, Tick _when, Tick _repeat);
+ +    virtual void process();
+ +    virtual const char *description();
+ +};
+ +
-     if (flags & Stats::Reset)
++StatEvent::StatEvent(EventQueue *queue, int _flags, Tick _when, Tick _repeat)
++    : Event(queue, Stat_Event_Pri),
+ +      flags(_flags), repeat(_repeat)
+ +{
+ +    setFlags(AutoDelete);
+ +    schedule(_when);
+ +}
+ +
+ +const char *
+ +StatEvent::description()
+ +{
+ +    return "Statistics dump and/or reset";
+ +}
+ +
+ +void
+ +StatEvent::process()
+ +{
+ +    if (flags & Stats::Dump)
+ +        DumpNow();
+ +
- SetupEvent(int flags, Tick when, Tick repeat)
++    if (flags & Stats::Reset) {
++        cprintf("Resetting stats!\n");
+ +        reset();
++    }
+ +
+ +    if (repeat)
+ +        schedule(curTick + repeat);
+ +}
+ +
+ +list<Output *> OutputList;
+ +
+ +void
+ +DumpNow()
+ +{
+ +    assert(lastDump <= curTick);
+ +    if (lastDump == curTick)
+ +        return;
+ +    lastDump = curTick;
+ +
+ +    list<Output *>::iterator i = OutputList.begin();
+ +    list<Output *>::iterator end = OutputList.end();
+ +    for (; i != end; ++i) {
+ +        Output *output = *i;
+ +        if (!output->valid())
+ +            continue;
+ +
+ +        output->output();
+ +    }
+ +}
+ +
+ +void
-     new StatEvent(flags, when, repeat);
++SetupEvent(int flags, Tick when, Tick repeat, EventQueue *queue)
+ +{
++    if (queue == NULL)
++        queue = &mainEventQueue;
++
++    new StatEvent(queue, flags, when, repeat);
+ +}
+ +
+ +/* namespace Stats */ }
+ +
+ +void debugDumpStats()
+ +{
+ +    Stats::DumpNow();
+ +}
+ +
diff --cc src/sim/stat_control.hh

index fb369f640019817053a576b24785dda91bb89b6d,0000000000000000000000000000000000000000..67f7cc491e3d0e57f3b7f032510f85f34f4072c6

mode 100644,000000..100644
--- 1/src/sim/stat_control.hh
--- /dev/null
+++ b/src/sim/stat_control.hh
@@@ -1,54 -1,0 +1,56 @@@
- void SetupEvent(int flags, Tick when, Tick repeat = 0);
+ +/*
+ + * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ + * All rights reserved.
+ + *
+ + * Redistribution and use in source and binary forms, with or without
+ + * modification, are permitted provided that the following conditions are
+ + * met: redistributions of source code must retain the above copyright
+ + * notice, this list of conditions and the following disclaimer;
+ + * redistributions in binary form must reproduce the above copyright
+ + * notice, this list of conditions and the following disclaimer in the
+ + * documentation and/or other materials provided with the distribution;
+ + * neither the name of the copyright holders nor the names of its
+ + * contributors may be used to endorse or promote products derived from
+ + * this software without specific prior written permission.
+ + *
+ + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ + *
+ + * Authors: Nathan Binkert
+ + */
+ +
+ +#ifndef __SIM_STAT_CONTROL_HH__
+ +#define __SIM_STAT_CONTROL_HH__
+ +
+ +#include <fstream>
+ +#include <list>
+ +
++class EventQueue;
++
+ +namespace Stats {
+ +
+ +enum {
+ +    Reset = 0x1,
+ +    Dump = 0x2
+ +};
+ +
+ +class Output;
+ +extern std::list<Output *> OutputList;
+ +
+ +void DumpNow();
++void SetupEvent(int flags, Tick when, Tick repeat = 0, EventQueue *queue = NULL);
+ +
+ +void InitSimStats();
+ +
+ +/* namespace Stats */ }
+ +
+ +#endif // __SIM_STAT_CONTROL_HH__
diff --cc src/sim/system.hh

index 11f4f0c90f0ccfb28af5bf75aabfb3206add1886,0000000000000000000000000000000000000000..3ab1d81f29b3b91b9230ae684c4f908d4af9bfea

mode 100644,000000..100644
--- 1/src/sim/system.hh
--- /dev/null
+++ b/src/sim/system.hh
@@@ -1,247 -1,0 +1,248 @@@
+ +/*
+ + * Copyright (c) 2002-2005 The Regents of The University of Michigan
+ + * All rights reserved.
+ + *
+ + * Redistribution and use in source and binary forms, with or without
+ + * modification, are permitted provided that the following conditions are
+ + * met: redistributions of source code must retain the above copyright
+ + * notice, this list of conditions and the following disclaimer;
+ + * redistributions in binary form must reproduce the above copyright
+ + * notice, this list of conditions and the following disclaimer in the
+ + * documentation and/or other materials provided with the distribution;
+ + * neither the name of the copyright holders nor the names of its
+ + * contributors may be used to endorse or promote products derived from
+ + * this software without specific prior written permission.
+ + *
+ + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ + *
+ + * Authors: Steve Reinhardt
+ + *          Lisa Hsu
+ + *          Nathan Binkert
+ + */
+ +
+ +#ifndef __SYSTEM_HH__
+ +#define __SYSTEM_HH__
+ +
+ +#include <string>
+ +#include <vector>
+ +
+ +#include "base/loader/symtab.hh"
+ +#include "base/misc.hh"
+ +#include "base/statistics.hh"
+ +#include "config/full_system.hh"
+ +#include "cpu/pc_event.hh"
+ +#include "mem/port.hh"
+ +#include "sim/sim_object.hh"
+ +#if FULL_SYSTEM
+ +#include "kern/system_events.hh"
+ +#include "mem/vport.hh"
+ +#endif
+ +
+ +class BaseCPU;
+ +class ThreadContext;
+ +class ObjectFile;
+ +class PhysicalMemory;
+ +
+ +#if FULL_SYSTEM
+ +class Platform;
+ +class GDBListener;
+ +class RemoteGDB;
+ +#endif
+ +
+ +class System : public SimObject
+ +{
+ +  public:
+ +    enum MemoryMode {
+ +        Invalid=0,
+ +        Atomic,
+ +        Timing
+ +    };
+ +
+ +    static const char *MemoryModeStrings[3];
+ +
+ +
+ +    MemoryMode getMemoryMode() { assert(memoryMode); return memoryMode; }
+ +
+ +    /** Change the memory mode of the system. This should only be called by the
+ +     * python!!
+ +     * @param mode Mode to change to (atomic/timing)
+ +     */
+ +    void setMemoryMode(MemoryMode mode);
+ +
+ +    PhysicalMemory *physmem;
+ +    PCEventQueue pcEventQueue;
+ +
+ +    std::vector<ThreadContext *> threadContexts;
+ +    int numcpus;
+ +
+ +    int getNumCPUs()
+ +    {
+ +        if (numcpus != threadContexts.size())
+ +            panic("cpu array not fully populated!");
+ +
+ +        return numcpus;
+ +    }
+ +
+ +#if FULL_SYSTEM
+ +    Platform *platform;
+ +    uint64_t init_param;
+ +
+ +    /** Port to physical memory used for writing object files into ram at
+ +     * boot.*/
+ +    FunctionalPort functionalPort;
+ +    VirtualPort virtPort;
+ +
+ +    /** kernel symbol table */
+ +    SymbolTable *kernelSymtab;
+ +
+ +    /** Object pointer for the kernel code */
+ +    ObjectFile *kernel;
+ +
+ +    /** Begining of kernel code */
+ +    Addr kernelStart;
+ +
+ +    /** End of kernel code */
+ +    Addr kernelEnd;
+ +
+ +    /** Entry point in the kernel to start at */
+ +    Addr kernelEntry;
+ +
+ +#else
+ +
+ +    int page_ptr;
+ +
+ +
+ +#endif // FULL_SYSTEM
+ +
+ +  protected:
+ +
+ +    MemoryMode memoryMode;
+ +
+ +#if FULL_SYSTEM
+ +    /**
+ +     * Fix up an address used to match PCs for hooking simulator
+ +     * events on to target function executions.  See comment in
+ +     * system.cc for details.
+ +     */
+ +    virtual Addr fixFuncEventAddr(Addr addr) = 0;
+ +
+ +    /**
+ +     * Add a function-based event to the given function, to be looked
+ +     * up in the specified symbol table.
+ +     */
+ +    template <class T>
+ +    T *addFuncEvent(SymbolTable *symtab, const char *lbl)
+ +    {
+ +        Addr addr = 0; // initialize only to avoid compiler warning
+ +
+ +        if (symtab->findAddress(lbl, addr)) {
+ +            T *ev = new T(&pcEventQueue, lbl, fixFuncEventAddr(addr));
+ +            return ev;
+ +        }
+ +
+ +        return NULL;
+ +    }
+ +
+ +    /** Add a function-based event to kernel code. */
+ +    template <class T>
+ +    T *addKernelFuncEvent(const char *lbl)
+ +    {
+ +        return addFuncEvent<T>(kernelSymtab, lbl);
+ +    }
+ +
+ +#endif
+ +  public:
+ +#if FULL_SYSTEM
+ +    std::vector<RemoteGDB *> remoteGDB;
+ +    std::vector<GDBListener *> gdbListen;
+ +    virtual bool breakpoint() = 0;
+ +#endif // FULL_SYSTEM
+ +
+ +  public:
+ +    struct Params
+ +    {
+ +        std::string name;
+ +        PhysicalMemory *physmem;
+ +        MemoryMode mem_mode;
+ +
+ +#if FULL_SYSTEM
+ +        Tick boot_cpu_frequency;
+ +        std::string boot_osflags;
+ +        uint64_t init_param;
+ +
+ +        std::string kernel_path;
+ +        std::string readfile;
++        std::string symbolfile;
+ +#endif
+ +    };
+ +
+ +  protected:
+ +    Params *_params;
+ +
+ +  public:
+ +    System(Params *p);
+ +    ~System();
+ +
+ +    void startup();
+ +
+ +    const Params *params() const { return (const Params *)_params; }
+ +
+ +  public:
+ +
+ +#if FULL_SYSTEM
+ +    /**
+ +     * Returns the addess the kernel starts at.
+ +     * @return address the kernel starts at
+ +     */
+ +    Addr getKernelStart() const { return kernelStart; }
+ +
+ +    /**
+ +     * Returns the addess the kernel ends at.
+ +     * @return address the kernel ends at
+ +     */
+ +    Addr getKernelEnd() const { return kernelEnd; }
+ +
+ +    /**
+ +     * Returns the addess the entry point to the kernel code.
+ +     * @return entry point of the kernel code
+ +     */
+ +    Addr getKernelEntry() const { return kernelEntry; }
+ +
+ +#else
+ +
+ +    Addr new_page();
+ +
+ +#endif // FULL_SYSTEM
+ +
+ +    int registerThreadContext(ThreadContext *tc, int tcIndex);
+ +    void replaceThreadContext(ThreadContext *tc, int tcIndex);
+ +
+ +    void serialize(std::ostream &os);
+ +    void unserialize(Checkpoint *cp, const std::string &section);
+ +
+ +  public:
+ +    ////////////////////////////////////////////
+ +    //
+ +    // STATIC GLOBAL SYSTEM LIST
+ +    //
+ +    ////////////////////////////////////////////
+ +
+ +    static std::vector<System *> systemList;
+ +    static int numSystemsRunning;
+ +
+ +    static void printSystems();
+ +
+ +
+ +};
+ +
+ +#endif // __SYSTEM_HH__
diff --cc util/m5/m5.c

index 193c0e32921cfc4622cd3b9b1c64a3c444bb63f8,fa9be96e8b7f0f5d5ef083a9a60f72c417d54dbb..23401aea50d275b4c821fd7c4063520a7edc30ae
--- 1/util/m5/m5.c
--- 2/util/m5/m5.c
+++ b/util/m5/m5.c
@@@ -186,20 -199,9 +202,23 @@@ main(int argc, char *argv[]
           return 0;
       }
   
+     if (COMPARE("loadsymbol")) {
+         m5_loadsymbol(arg1);
+         return 0;
+ +    if (COMPARE("readfile")) {
+ +            char buf[256*1024];
+ +            int offset = 0;
+ +            int len;
+ +
+ +            if (argc != 2)
+ +                    usage();
+ +
+ +            while ((len = m5_readfile(buf, sizeof(buf), offset)) > 0) {
+ +                    write(STDOUT_FILENO, buf, len);
+ +                    offset += len;
+ +            }
+ +
+ +            return 0;
       }
   
       usage();
diff --cc util/m5/m5op.S
Simple merge
author	Kevin Lim <ktlim@umich.edu>
	Sun, 1 Oct 2006 03:43:23 +0000 (23:43 -0400)
committer	Kevin Lim <ktlim@umich.edu>
	Sun, 1 Oct 2006 03:43:23 +0000 (23:43 -0400)
		1	2
src/arch/alpha/ev5.cc	patch \|	diff1 \|	\|	blob \| history
src/arch/alpha/freebsd/system.cc	patch \|	diff1 \|	\|	blob \| history
src/arch/alpha/isa/decoder.isa	patch \|	diff1 \|	\|	blob \| history
src/arch/alpha/isa/mem.isa	patch \|	diff1 \|	\|	blob \| history
src/arch/alpha/isa_traits.hh	patch \|	diff1 \|	\|	blob \| history
src/arch/alpha/linux/system.cc	patch \|	diff1 \|	\|	blob \| history
src/arch/alpha/system.cc	patch \|	diff1 \|	\|	blob \| history
src/arch/alpha/tru64/system.cc	patch \|	diff1 \|	\|	blob \| history
src/cpu/base.cc	patch \|	diff1 \|	\|	blob \| history
src/cpu/base.hh	patch \|	diff1 \|	\|	blob \| history
src/cpu/base_dyn_inst.hh	patch \|	diff1 \|	\|	blob \| history
src/cpu/checker/cpu.hh	patch \|	diff1 \|	\|	blob \| history
src/cpu/checker/cpu_impl.hh	patch \|	diff1 \|	\|	blob \| history
src/cpu/o3/alpha/cpu_builder.cc	patch \|	diff1 \|	\|	blob \| history
src/cpu/o3/checker_builder.cc	patch \|	diff1 \|	\|	blob \| history
src/cpu/o3/commit_impl.hh	patch \|	diff1 \|	\|	blob \| history
src/cpu/o3/cpu.cc	patch \|	diff1 \|	\|	blob \| history
src/cpu/o3/fetch_impl.hh	patch \|	diff1 \|	\|	blob \| history
src/cpu/o3/iew.hh	patch \|	diff1 \|	\|	blob \| history
src/cpu/o3/iew_impl.hh	patch \|	diff1 \|	\|	blob \| history
src/cpu/o3/inst_queue.hh	patch \|	diff1 \|	\|	blob \| history
src/cpu/o3/inst_queue_impl.hh	patch \|	diff1 \|	\|	blob \| history
src/cpu/o3/lsq_impl.hh	patch \|	diff1 \|	\|	blob \| history
src/cpu/o3/lsq_unit.hh	patch \|	diff1 \|	\|	blob \| history
src/cpu/o3/lsq_unit_impl.hh	patch \|	diff1 \|	\|	blob \| history
src/cpu/o3/mem_dep_unit_impl.hh	patch \|	diff1 \|	\|	blob \| history
src/cpu/o3/rename.hh	patch \|	diff1 \|	\|	blob \| history
src/cpu/o3/rename_impl.hh	patch \|	diff1 \|	\|	blob \| history
src/cpu/o3/thread_state.hh	patch \|	diff1 \|	\|	blob \| history
src/cpu/o3/tournament_pred.cc	patch \|	diff1 \|	\|	blob \| history
src/cpu/o3/tournament_pred.hh	patch \|	diff1 \|	\|	blob \| history
src/cpu/ozone/checker_builder.cc	patch \|	diff1 \|	\|	blob \| history
src/cpu/ozone/cpu.hh	patch \|	diff1 \|	\|	blob \| history
src/cpu/ozone/cpu_builder.cc	patch \|	diff1 \|	\|	blob \| history
src/cpu/ozone/cpu_impl.hh	patch \|	diff1 \|	\|	blob \| history
src/cpu/ozone/front_end.hh	patch \|	diff1 \|	\|	blob \| history
src/cpu/ozone/front_end_impl.hh	patch \|	diff1 \|	\|	blob \| history
src/cpu/ozone/inorder_back_end_impl.hh	patch \|	diff1 \|	\|	blob \| history
src/cpu/ozone/inst_queue_impl.hh	patch \|	diff1 \|	\|	blob \| history
src/cpu/ozone/lw_back_end.hh	patch \|	diff1 \|	\|	blob \| history
src/cpu/ozone/lw_back_end_impl.hh	patch \|	diff1 \|	\|	blob \| history
src/cpu/ozone/lw_lsq.hh	patch \|	diff1 \|	\|	blob \| history
src/cpu/ozone/lw_lsq_impl.hh	patch \|	diff1 \|	\|	blob \| history
src/cpu/ozone/simple_params.hh	patch \|	diff1 \|	\|	blob \| history
src/cpu/ozone/thread_state.hh	patch \|	diff1 \|	\|	blob \| history
src/cpu/simple/base.cc	patch \|	diff1 \|	\|	blob \| history
src/cpu/simple_thread.cc	patch \|	diff1 \|	\|	blob \| history
src/cpu/thread_state.hh	patch \|	diff1 \|	\|	blob \| history
src/dev/ide_disk.hh	patch \|	diff1 \|	\|	blob \| history
src/python/m5/objects/BaseCPU.py	patch \|	diff1 \|	\|	blob \| history
src/python/m5/objects/O3CPU.py	patch \|	diff1 \|	\|	blob \| history
src/python/m5/objects/OzoneCPU.py	patch \|	diff1 \|	\|	blob \| history
src/python/m5/objects/Root.py	patch \|	diff1 \|	\|	blob \| history
src/python/m5/objects/System.py	patch \|	diff1 \|	\|	blob \| history
src/sim/eventq.hh	patch \|	diff1 \|	\|	blob \| history
src/sim/pseudo_inst.cc	patch \|	diff1 \|	\|	blob \| history
src/sim/pseudo_inst.hh	patch \|	diff1 \|	\|	blob \| history
src/sim/serialize.cc	patch \|	diff1 \|	\|	blob \| history
src/sim/stat_control.cc	patch \|	diff1 \|	\|	blob \| history
src/sim/stat_control.hh	patch \|	diff1 \|	\|	blob \| history
src/sim/system.hh	patch \|	diff1 \|	\|	blob \| history
util/m5/m5.c	patch \|	diff1 \|	diff2 \|	blob \| history
util/m5/m5op.S	patch \|	diff1 \|	diff2 \|	blob \| history