+++ /dev/null
-
-#include "base/timebuf.hh"
-#include "cpu/activity.hh"
-
-ActivityRecorder::ActivityRecorder(int num_stages, int longest_latency,
- int activity)
- : activityBuffer(longest_latency, 0), longestLatency(longest_latency),
- activityCount(activity), numStages(num_stages)
-{
- stageActive = new bool[numStages];
- memset(stageActive, 0, numStages);
-}
-
-void
-ActivityRecorder::activity()
-{
- if (activityBuffer[0]) {
- return;
- }
-
- activityBuffer[0] = true;
-
- ++activityCount;
-
- DPRINTF(Activity, "Activity: %i\n", activityCount);
-}
-
-void
-ActivityRecorder::advance()
-{
- if (activityBuffer[-longestLatency]) {
- --activityCount;
-
- assert(activityCount >= 0);
-
- DPRINTF(Activity, "Activity: %i\n", activityCount);
-
- if (activityCount == 0) {
- DPRINTF(Activity, "No activity left!\n");
- }
- }
-
- activityBuffer.advance();
-}
-
-void
-ActivityRecorder::activateStage(const int idx)
-{
- if (!stageActive[idx]) {
- ++activityCount;
-
- stageActive[idx] = true;
-
- DPRINTF(Activity, "Activity: %i\n", activityCount);
- } else {
- DPRINTF(Activity, "Stage %i already active.\n", idx);
- }
-
-// assert(activityCount < longestLatency + numStages + 1);
-}
-
-void
-ActivityRecorder::deactivateStage(const int idx)
-{
- if (stageActive[idx]) {
- --activityCount;
-
- stageActive[idx] = false;
-
- DPRINTF(Activity, "Activity: %i\n", activityCount);
- } else {
- DPRINTF(Activity, "Stage %i already inactive.\n", idx);
- }
-
- assert(activityCount >= 0);
-}
-
-void
-ActivityRecorder::reset()
-{
- activityCount = 0;
- memset(stageActive, 0, numStages);
- for (int i = 0; i < longestLatency + 1; ++i)
- activityBuffer.advance();
-}
-
-void
-ActivityRecorder::dump()
-{
- for (int i = 0; i <= longestLatency; ++i) {
- cprintf("[Idx:%i %i] ", i, activityBuffer[-i]);
- }
-
- cprintf("\n");
-
- for (int i = 0; i < numStages; ++i) {
- cprintf("[Stage:%i %i]\n", i, stageActive[i]);
- }
-
- cprintf("\n");
-
- cprintf("Activity count: %i\n", activityCount);
-}
-
-void
-ActivityRecorder::validate()
-{
- int count = 0;
- for (int i = 0; i <= longestLatency; ++i) {
- if (activityBuffer[-i]) {
- count++;
- }
- }
-
- for (int i = 0; i < numStages; ++i) {
- if (stageActive[i]) {
- count++;
- }
- }
-
- assert(count == activityCount);
-}
+++ /dev/null
-
-#ifndef __CPU_ACTIVITY_HH__
-#define __CPU_ACTIVITY_HH__
-
-#include "base/timebuf.hh"
-#include "base/trace.hh"
-
-class ActivityRecorder {
- public:
- ActivityRecorder(int num_stages, int longest_latency, int count);
-
- /** Records that there is activity this cycle. */
- void activity();
- /** Advances the activity buffer, decrementing the activityCount if active
- * communication just left the time buffer, and descheduling the CPU if
- * there is no activity.
- */
- void advance();
- /** Marks a stage as active. */
- void activateStage(const int idx);
- /** Deactivates a stage. */
- void deactivateStage(const int idx);
-
- int getActivityCount() { return activityCount; }
-
- void setActivityCount(int count)
- { activityCount = count; }
-
- bool active() { return activityCount; }
-
- void reset();
-
- void dump();
-
- void validate();
-
- private:
- /** Time buffer that tracks if any cycles has active communication
- * in them. It should be as long as the longest communication
- * latency in the system. Each time any time buffer is written,
- * the activity buffer should also be written to. The
- * activityBuffer is advanced along with all the other time
- * buffers, so it should have a 1 somewhere in it only if there
- * is active communication in a time buffer.
- */
- TimeBuffer<bool> activityBuffer;
-
- int longestLatency;
-
- /** Tracks how many stages and cycles of time buffer have
- * activity. Stages increment this count when they switch to
- * active, and decrement it when they switch to
- * inactive. Whenever a cycle that previously had no information
- * is written in the time buffer, this is incremented. When a
- * cycle that had information exits the time buffer due to age,
- * this count is decremented. When the count is 0, there is no
- * activity in the CPU, and it can be descheduled.
- */
- int activityCount;
-
- int numStages;
-
- /** Records which stages are active/inactive. */
- bool *stageActive;
-};
-
-#endif // __CPU_ACTIVITY_HH__
+++ /dev/null
-/*
- * Copyright (c) 2006 The Regents of The University of Michigan
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <list>
-#include <string>
-
-#include "base/refcnt.hh"
-#include "cpu/base.hh"
-#include "cpu/base_dyn_inst.hh"
-#include "cpu/checker/cpu.hh"
-#include "cpu/cpu_exec_context.hh"
-#include "cpu/exec_context.hh"
-#include "cpu/static_inst.hh"
-#include "sim/byteswap.hh"
-#include "sim/sim_object.hh"
-#include "sim/stats.hh"
-
-#include "cpu/o3/alpha_dyn_inst.hh"
-#include "cpu/o3/alpha_impl.hh"
-
-#include "cpu/ozone/dyn_inst.hh"
-#include "cpu/ozone/ozone_impl.hh"
-#include "cpu/ozone/simple_impl.hh"
-
-#if FULL_SYSTEM
-#include "sim/system.hh"
-#include "arch/vtophys.hh"
-#endif // FULL_SYSTEM
-
-using namespace std;
-//The CheckerCPU does alpha only
-using namespace AlphaISA;
-
-void
-CheckerCPU::init()
-{
-}
-
-CheckerCPU::CheckerCPU(Params *p)
- : BaseCPU(p), cpuXC(NULL), xcProxy(NULL)
-{
- memReq = new MemReq();
- memReq->xc = xcProxy;
- memReq->asid = 0;
- memReq->data = new uint8_t[64];
-
- numInst = 0;
- startNumInst = 0;
- numLoad = 0;
- startNumLoad = 0;
- youngestSN = 0;
-
- changedPC = willChangePC = changedNextPC = false;
-
- exitOnError = p->exitOnError;
-#if FULL_SYSTEM
- itb = p->itb;
- dtb = p->dtb;
- systemPtr = NULL;
- memPtr = NULL;
-#endif
-}
-
-CheckerCPU::~CheckerCPU()
-{
-}
-
-void
-CheckerCPU::setMemory(FunctionalMemory *mem)
-{
- memPtr = mem;
-#if !FULL_SYSTEM
- cpuXC = new CPUExecContext(this, /* thread_num */ 0, mem,
- /* asid */ 0);
-
- cpuXC->setStatus(ExecContext::Suspended);
- xcProxy = cpuXC->getProxy();
- execContexts.push_back(xcProxy);
-#else
- if (systemPtr) {
- cpuXC = new CPUExecContext(this, 0, systemPtr, itb, dtb, memPtr, false);
-
- cpuXC->setStatus(ExecContext::Suspended);
- xcProxy = cpuXC->getProxy();
- execContexts.push_back(xcProxy);
- memReq->xc = xcProxy;
- delete cpuXC->kernelStats;
- cpuXC->kernelStats = NULL;
- }
-#endif
-}
-
-#if FULL_SYSTEM
-void
-CheckerCPU::setSystem(System *system)
-{
- systemPtr = system;
-
- if (memPtr) {
- cpuXC = new CPUExecContext(this, 0, systemPtr, itb, dtb, memPtr, false);
-
- cpuXC->setStatus(ExecContext::Suspended);
- xcProxy = cpuXC->getProxy();
- execContexts.push_back(xcProxy);
- memReq->xc = xcProxy;
- delete cpuXC->kernelStats;
- cpuXC->kernelStats = NULL;
- }
-}
-#endif
-
-void
-CheckerCPU::serialize(ostream &os)
-{
-/*
- BaseCPU::serialize(os);
- SERIALIZE_SCALAR(inst);
- nameOut(os, csprintf("%s.xc", name()));
- cpuXC->serialize(os);
- cacheCompletionEvent.serialize(os);
-*/
-}
-
-void
-CheckerCPU::unserialize(Checkpoint *cp, const string §ion)
-{
-/*
- BaseCPU::unserialize(cp, section);
- UNSERIALIZE_SCALAR(inst);
- cpuXC->unserialize(cp, csprintf("%s.xc", section));
-*/
-}
-
-Fault
-CheckerCPU::copySrcTranslate(Addr src)
-{
- panic("Unimplemented!");
-}
-
-Fault
-CheckerCPU::copy(Addr dest)
-{
- panic("Unimplemented!");
-}
-
-template <class T>
-Fault
-CheckerCPU::read(Addr addr, T &data, unsigned flags)
-{
- memReq->reset(addr, sizeof(T), flags);
-
- // translate to physical address
- translateDataReadReq(memReq);
-
- memReq->cmd = Read;
- memReq->completionEvent = NULL;
- memReq->time = curTick;
- memReq->flags &= ~INST_READ;
-
- if (!(memReq->flags & UNCACHEABLE)) {
- // Access memory to see if we have the same data
- cpuXC->read(memReq, data);
- } else {
- // Assume the data is correct if it's an uncached access
- memcpy(&data, &unverifiedResult.integer, sizeof(T));
- }
-
- return NoFault;
-}
-
-#ifndef DOXYGEN_SHOULD_SKIP_THIS
-
-template
-Fault
-CheckerCPU::read(Addr addr, uint64_t &data, unsigned flags);
-
-template
-Fault
-CheckerCPU::read(Addr addr, uint32_t &data, unsigned flags);
-
-template
-Fault
-CheckerCPU::read(Addr addr, uint16_t &data, unsigned flags);
-
-template
-Fault
-CheckerCPU::read(Addr addr, uint8_t &data, unsigned flags);
-
-#endif //DOXYGEN_SHOULD_SKIP_THIS
-
-template<>
-Fault
-CheckerCPU::read(Addr addr, double &data, unsigned flags)
-{
- return read(addr, *(uint64_t*)&data, flags);
-}
-
-template<>
-Fault
-CheckerCPU::read(Addr addr, float &data, unsigned flags)
-{
- return read(addr, *(uint32_t*)&data, flags);
-}
-
-template<>
-Fault
-CheckerCPU::read(Addr addr, int32_t &data, unsigned flags)
-{
- return read(addr, (uint32_t&)data, flags);
-}
-
-template <class T>
-Fault
-CheckerCPU::write(T data, Addr addr, unsigned flags, uint64_t *res)
-{
- memReq->reset(addr, sizeof(T), flags);
-
- // translate to physical address
- cpuXC->translateDataWriteReq(memReq);
-
- // Can compare the write data and result only if it's cacheable,
- // not a store conditional, or is a store conditional that
- // succeeded.
- // @todo: Verify that actual memory matches up with these values.
- // Right now it only verifies that the instruction data is the
- // same as what was in the request that got sent to memory; there
- // is no verification that it is the same as what is in memory.
- // This is because the LSQ would have to be snooped in the CPU to
- // verify this data.
- if (unverifiedReq &&
- !(unverifiedReq->flags & UNCACHEABLE) &&
- (!(unverifiedReq->flags & LOCKED) ||
- ((unverifiedReq->flags & LOCKED) &&
- unverifiedReq->result == 1))) {
-#if 0
- memReq->cmd = Read;
- memReq->completionEvent = NULL;
- memReq->time = curTick;
- memReq->flags &= ~INST_READ;
- cpuXC->read(memReq, inst_data);
-#endif
- T inst_data;
- memcpy(&inst_data, unverifiedReq->data, sizeof(T));
-
- if (data != inst_data) {
- warn("%lli: Store value does not match value in memory! "
- "Instruction: %#x, memory: %#x",
- curTick, inst_data, data);
- handleError();
- }
- }
-
- // Assume the result was the same as the one passed in. This checker
- // doesn't check if the SC should succeed or fail, it just checks the
- // value.
- if (res)
- *res = unverifiedReq->result;
-
- return NoFault;
-}
-
-
-#ifndef DOXYGEN_SHOULD_SKIP_THIS
-template
-Fault
-CheckerCPU::write(uint64_t data, Addr addr, unsigned flags, uint64_t *res);
-
-template
-Fault
-CheckerCPU::write(uint32_t data, Addr addr, unsigned flags, uint64_t *res);
-
-template
-Fault
-CheckerCPU::write(uint16_t data, Addr addr, unsigned flags, uint64_t *res);
-
-template
-Fault
-CheckerCPU::write(uint8_t data, Addr addr, unsigned flags, uint64_t *res);
-
-#endif //DOXYGEN_SHOULD_SKIP_THIS
-
-template<>
-Fault
-CheckerCPU::write(double data, Addr addr, unsigned flags, uint64_t *res)
-{
- return write(*(uint64_t*)&data, addr, flags, res);
-}
-
-template<>
-Fault
-CheckerCPU::write(float data, Addr addr, unsigned flags, uint64_t *res)
-{
- return write(*(uint32_t*)&data, addr, flags, res);
-}
-
-template<>
-Fault
-CheckerCPU::write(int32_t data, Addr addr, unsigned flags, uint64_t *res)
-{
- return write((uint32_t)data, addr, flags, res);
-}
-
-
-#if FULL_SYSTEM
-Addr
-CheckerCPU::dbg_vtophys(Addr addr)
-{
- return vtophys(xcProxy, addr);
-}
-#endif // FULL_SYSTEM
-
-bool
-CheckerCPU::translateInstReq(MemReqPtr &req)
-{
-#if FULL_SYSTEM
- return (cpuXC->translateInstReq(req) == NoFault);
-#else
- cpuXC->translateInstReq(req);
- return true;
-#endif
-}
-
-void
-CheckerCPU::translateDataReadReq(MemReqPtr &req)
-{
- cpuXC->translateDataReadReq(req);
-
- if (req->vaddr != unverifiedReq->vaddr) {
- warn("%lli: Request virtual addresses do not match! Inst: %#x, "
- "checker: %#x",
- curTick, unverifiedReq->vaddr, req->vaddr);
- handleError();
- }
- req->paddr = unverifiedReq->paddr;
-
- if (checkFlags(req)) {
- warn("%lli: Request flags do not match! Inst: %#x, checker: %#x",
- curTick, unverifiedReq->flags, req->flags);
- handleError();
- }
-}
-
-void
-CheckerCPU::translateDataWriteReq(MemReqPtr &req)
-{
- cpuXC->translateDataWriteReq(req);
-
- if (req->vaddr != unverifiedReq->vaddr) {
- warn("%lli: Request virtual addresses do not match! Inst: %#x, "
- "checker: %#x",
- curTick, unverifiedReq->vaddr, req->vaddr);
- handleError();
- }
- req->paddr = unverifiedReq->paddr;
-
- if (checkFlags(req)) {
- warn("%lli: Request flags do not match! Inst: %#x, checker: %#x",
- curTick, unverifiedReq->flags, req->flags);
- handleError();
- }
-}
-
-bool
-CheckerCPU::checkFlags(MemReqPtr &req)
-{
- // Remove any dynamic flags that don't have to do with the request itself.
- unsigned flags = unverifiedReq->flags;
- unsigned mask = LOCKED | PHYSICAL | VPTE | ALTMODE | UNCACHEABLE | NO_FAULT;
- flags = flags & (mask);
- if (flags == req->flags) {
- return false;
- } else {
- return true;
- }
-}
-
-template <class DynInstPtr>
-void
-Checker<DynInstPtr>::tick(DynInstPtr &completed_inst)
-{
- DynInstPtr inst;
-
- // Either check this instruction, or add it to a list of
- // instructions waiting to be checked. Instructions must be
- // checked in program order, so if a store has committed yet not
- // completed, there may be some instructions that are waiting
- // behind it that have completed and must be checked.
- if (!instList.empty()) {
- if (youngestSN < completed_inst->seqNum) {
- DPRINTF(Checker, "Adding instruction [sn:%lli] PC:%#x to list.\n",
- completed_inst->seqNum, completed_inst->readPC());
- instList.push_back(completed_inst);
- youngestSN = completed_inst->seqNum;
- }
-
- if (!instList.front()->isCompleted()) {
- return;
- } else {
- inst = instList.front();
- instList.pop_front();
- }
- } else {
- if (!completed_inst->isCompleted()) {
- if (youngestSN < completed_inst->seqNum) {
- DPRINTF(Checker, "Adding instruction [sn:%lli] PC:%#x to list.\n",
- completed_inst->seqNum, completed_inst->readPC());
- instList.push_back(completed_inst);
- youngestSN = completed_inst->seqNum;
- }
- return;
- } else {
- if (youngestSN < completed_inst->seqNum) {
- inst = completed_inst;
- youngestSN = completed_inst->seqNum;
- } else {
- return;
- }
- }
- }
-
- // Try to check all instructions that are completed, ending if we
- // run out of instructions to check or if an instruction is not
- // yet completed.
- while (1) {
- DPRINTF(Checker, "Processing instruction [sn:%lli] PC:%#x.\n",
- inst->seqNum, inst->readPC());
- unverifiedResult.integer = inst->readIntResult();
- unverifiedReq = inst->req;
- numCycles++;
-
- Fault fault = NoFault;
-
- // maintain $r0 semantics
- cpuXC->setIntReg(ZeroReg, 0);
-#ifdef TARGET_ALPHA
- cpuXC->setFloatRegDouble(ZeroReg, 0.0);
-#endif // TARGET_ALPHA
-
- // Check if any recent PC changes match up with anything we
- // expect to happen. This is mostly to check if traps or
- // PC-based events have occurred in both the checker and CPU.
- if (changedPC) {
- DPRINTF(Checker, "Changed PC recently to %#x\n",
- cpuXC->readPC());
- if (willChangePC) {
- if (newPC == cpuXC->readPC()) {
- DPRINTF(Checker, "Changed PC matches expected PC\n");
- } else {
- warn("%lli: Changed PC does not match expected PC, "
- "changed: %#x, expected: %#x",
- curTick, cpuXC->readPC(), newPC);
- handleError();
- }
- willChangePC = false;
- }
- changedPC = false;
- }
- if (changedNextPC) {
- DPRINTF(Checker, "Changed NextPC recently to %#x\n",
- cpuXC->readNextPC());
- changedNextPC = false;
- }
-
- // Try to fetch the instruction
-
-#if FULL_SYSTEM
-#define IFETCH_FLAGS(pc) ((pc) & 1) ? PHYSICAL : 0
-#else
-#define IFETCH_FLAGS(pc) 0
-#endif
-
- // set up memory request for instruction fetch
- memReq->cmd = Read;
- memReq->reset(cpuXC->readPC() & ~3, sizeof(uint32_t),
- IFETCH_FLAGS(cpuXC->readPC()));
-
- bool succeeded = translateInstReq(memReq);
-
- if (!succeeded) {
- if (inst->getFault() == NoFault) {
- // In this case the instruction was not a dummy
- // instruction carrying an ITB fault. In the single
- // threaded case the ITB should still be able to
- // translate this instruction; in the SMT case it's
- // possible that its ITB entry was kicked out.
- warn("%lli: Instruction PC %#x was not found in the ITB!",
- curTick, cpuXC->readPC());
- handleError();
-
- // go to the next instruction
- cpuXC->setPC(cpuXC->readNextPC());
- cpuXC->setNextPC(cpuXC->readNextPC() + sizeof(MachInst));
-
- return;
- } else {
- // The instruction is carrying an ITB fault. Handle
- // the fault and see if our results match the CPU on
- // the next tick().
- fault = inst->getFault();
- }
- }
-
- if (fault == NoFault) {
- cpuXC->mem->read(memReq, machInst);
-
- // keep an instruction count
- numInst++;
-
- // decode the instruction
- machInst = gtoh(machInst);
- // Checks that the instruction matches what we expected it to be.
- // Checks both the machine instruction and the PC.
- validateInst(inst);
-
- curStaticInst = StaticInst::decode(makeExtMI(machInst,
- cpuXC->readPC()));
-
-#if FULL_SYSTEM
- cpuXC->setInst(machInst);
-#endif // FULL_SYSTEM
-
- fault = inst->getFault();
- }
-
- // Either the instruction was a fault and we should process the fault,
- // or we should just go ahead execute the instruction. This assumes
- // that the instruction is properly marked as a fault.
- if (fault == NoFault) {
-
- cpuXC->func_exe_inst++;
-
- fault = curStaticInst->execute(this, NULL);
-
- // Checks to make sure instrution results are correct.
- validateExecution(inst);
-
- if (curStaticInst->isLoad()) {
- ++numLoad;
- }
- }
-
- if (fault != NoFault) {
-#if FULL_SYSTEM
- fault->invoke(xcProxy);
- willChangePC = true;
- newPC = cpuXC->readPC();
- DPRINTF(Checker, "Fault, PC is now %#x\n", newPC);
-#else // !FULL_SYSTEM
- fatal("fault (%d) detected @ PC 0x%08p", fault, cpuXC->readPC());
-#endif // FULL_SYSTEM
- } else {
-#if THE_ISA != MIPS_ISA
- // go to the next instruction
- cpuXC->setPC(cpuXC->readNextPC());
- cpuXC->setNextPC(cpuXC->readNextPC() + sizeof(MachInst));
-#else
- // go to the next instruction
- cpuXC->setPC(cpuXC->readNextPC());
- cpuXC->setNextPC(cpuXC->readNextNPC());
- cpuXC->setNextNPC(cpuXC->readNextNPC() + sizeof(MachInst));
-#endif
-
- }
-
-#if FULL_SYSTEM
- // @todo: Determine if these should happen only if the
- // instruction hasn't faulted. In the SimpleCPU case this may
- // not be true, but in the O3 or Ozone case this may be true.
- Addr oldpc;
- int count = 0;
- do {
- oldpc = cpuXC->readPC();
- system->pcEventQueue.service(xcProxy);
- count++;
- } while (oldpc != cpuXC->readPC());
- if (count > 1) {
- willChangePC = true;
- newPC = cpuXC->readPC();
- DPRINTF(Checker, "PC Event, PC is now %#x\n", newPC);
- }
-#endif
-
- // @todo: Optionally can check all registers. (Or just those
- // that have been modified).
- validateState();
-
- // Continue verifying instructions if there's another completed
- // instruction waiting to be verified.
- if (instList.empty()) {
- break;
- } else if (instList.front()->isCompleted()) {
- inst = instList.front();
- instList.pop_front();
- } else {
- break;
- }
- }
-}
-
-template <class DynInstPtr>
-void
-Checker<DynInstPtr>::switchOut(Sampler *s)
-{
- instList.clear();
-}
-
-template <class DynInstPtr>
-void
-Checker<DynInstPtr>::takeOverFrom(BaseCPU *oldCPU)
-{
-}
-
-template <class DynInstPtr>
-void
-Checker<DynInstPtr>::validateInst(DynInstPtr &inst)
-{
- if (inst->readPC() != cpuXC->readPC()) {
- warn("%lli: PCs do not match! Inst: %#x, checker: %#x",
- curTick, inst->readPC(), cpuXC->readPC());
- if (changedPC) {
- warn("%lli: Changed PCs recently, may not be an error",
- curTick);
- } else {
- handleError();
- }
- }
-
- MachInst mi = static_cast<MachInst>(inst->staticInst->machInst);
-
- if (mi != machInst) {
- warn("%lli: Binary instructions do not match! Inst: %#x, "
- "checker: %#x",
- curTick, mi, machInst);
- handleError();
- }
-}
-
-template <class DynInstPtr>
-void
-Checker<DynInstPtr>::validateExecution(DynInstPtr &inst)
-{
- if (inst->numDestRegs()) {
- // @todo: Support more destination registers.
- if (inst->isUnverifiable()) {
- // Unverifiable instructions assume they were executed
- // properly by the CPU. Grab the result from the
- // instruction and write it to the register.
- RegIndex idx = inst->destRegIdx(0);
- if (idx < TheISA::FP_Base_DepTag) {
- cpuXC->setIntReg(idx, inst->readIntResult());
- } else if (idx < TheISA::Fpcr_DepTag) {
- cpuXC->setFloatRegInt(idx, inst->readIntResult());
- } else {
- cpuXC->setMiscReg(idx, inst->readIntResult());
- }
- } else if (result.integer != inst->readIntResult()) {
- warn("%lli: Instruction results do not match! (Results may not "
- "actually be integers) Inst: %#x, checker: %#x",
- curTick, inst->readIntResult(), result.integer);
- handleError();
- }
- }
-
- if (inst->readNextPC() != cpuXC->readNextPC()) {
- warn("%lli: Instruction next PCs do not match! Inst: %#x, "
- "checker: %#x",
- curTick, inst->readNextPC(), cpuXC->readNextPC());
- handleError();
- }
-
- // Checking side effect registers can be difficult if they are not
- // checked simultaneously with the execution of the instruction.
- // This is because other valid instructions may have modified
- // these registers in the meantime, and their values are not
- // stored within the DynInst.
- while (!miscRegIdxs.empty()) {
- int misc_reg_idx = miscRegIdxs.front();
- miscRegIdxs.pop();
-
- if (inst->xcBase()->readMiscReg(misc_reg_idx) !=
- cpuXC->readMiscReg(misc_reg_idx)) {
- warn("%lli: Misc reg idx %i (side effect) does not match! "
- "Inst: %#x, checker: %#x",
- curTick, misc_reg_idx,
- inst->xcBase()->readMiscReg(misc_reg_idx),
- cpuXC->readMiscReg(misc_reg_idx));
- handleError();
- }
- }
-}
-
-template <class DynInstPtr>
-void
-Checker<DynInstPtr>::validateState()
-{
-}
-
-template <class DynInstPtr>
-void
-Checker<DynInstPtr>::dumpInsts()
-{
- int num = 0;
-
- InstListIt inst_list_it = --(instList.end());
-
- cprintf("Inst list size: %i\n", instList.size());
-
- while (inst_list_it != instList.end())
- {
- cprintf("Instruction:%i\n",
- num);
-
- cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
- "Completed:%i\n",
- (*inst_list_it)->readPC(),
- (*inst_list_it)->seqNum,
- (*inst_list_it)->threadNumber,
- (*inst_list_it)->isCompleted());
-
- cprintf("\n");
-
- inst_list_it--;
- ++num;
- }
-
-}
-
-template
-class Checker<RefCountingPtr<OzoneDynInst<OzoneImpl> > >;
-
-template
-class Checker<RefCountingPtr<AlphaDynInst<AlphaSimpleImpl> > >;
+++ /dev/null
-/*
- * Copyright (c) 2006 The Regents of The University of Michigan
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __CPU_CHECKER_CPU_HH__
-#define __CPU_CHECKER_CPU_HH__
-
-#include <list>
-#include <queue>
-#include <map>
-
-#include "base/statistics.hh"
-#include "config/full_system.hh"
-#include "cpu/base.hh"
-#include "cpu/base_dyn_inst.hh"
-#include "cpu/cpu_exec_context.hh"
-#include "cpu/pc_event.hh"
-#include "cpu/static_inst.hh"
-#include "sim/eventq.hh"
-
-// forward declarations
-#if FULL_SYSTEM
-class Processor;
-class AlphaITB;
-class AlphaDTB;
-class PhysicalMemory;
-
-class RemoteGDB;
-class GDBListener;
-
-#else
-
-class Process;
-
-#endif // FULL_SYSTEM
-template <class>
-class BaseDynInst;
-class ExecContext;
-class MemInterface;
-class Checkpoint;
-class Sampler;
-
-class CheckerCPU : public BaseCPU
-{
- protected:
- typedef TheISA::MachInst MachInst;
- typedef TheISA::MiscReg MiscReg;
- public:
- // main simulation loop (one cycle)
- virtual void init();
-
- struct Params : public BaseCPU::Params
- {
-#if FULL_SYSTEM
- AlphaITB *itb;
- AlphaDTB *dtb;
- FunctionalMemory *mem;
-#else
- Process *process;
-#endif
- bool exitOnError;
- };
-
- public:
- CheckerCPU(Params *p);
- virtual ~CheckerCPU();
-
- void setMemory(FunctionalMemory *mem);
-
- FunctionalMemory *memPtr;
-
-#if FULL_SYSTEM
- void setSystem(System *system);
-
- System *systemPtr;
-#endif
- public:
- // execution context
- CPUExecContext *cpuXC;
-
- ExecContext *xcProxy;
-
- AlphaITB *itb;
- AlphaDTB *dtb;
-
-#if FULL_SYSTEM
- Addr dbg_vtophys(Addr addr);
-#endif
-
- union Result {
- uint64_t integer;
- float fp;
- double dbl;
- };
-
- Result result;
-
- // current instruction
- MachInst machInst;
-
- // Refcounted pointer to the one memory request.
- MemReqPtr memReq;
-
- StaticInstPtr curStaticInst;
-
- // number of simulated instructions
- Counter numInst;
- Counter startNumInst;
-
- std::queue<int> miscRegIdxs;
-
- virtual Counter totalInstructions() const
- {
- return numInst - startNumInst;
- }
-
- // number of simulated loads
- Counter numLoad;
- Counter startNumLoad;
-
- virtual void serialize(std::ostream &os);
- virtual void unserialize(Checkpoint *cp, const std::string §ion);
-
- template <class T>
- Fault read(Addr addr, T &data, unsigned flags);
-
- template <class T>
- Fault write(T data, Addr addr, unsigned flags, uint64_t *res);
-
- // These functions are only used in CPU models that split
- // effective address computation from the actual memory access.
- void setEA(Addr EA) { panic("SimpleCPU::setEA() not implemented\n"); }
- Addr getEA() { panic("SimpleCPU::getEA() not implemented\n"); }
-
- void prefetch(Addr addr, unsigned flags)
- {
- // need to do this...
- }
-
- void writeHint(Addr addr, int size, unsigned flags)
- {
- // need to do this...
- }
-
- Fault copySrcTranslate(Addr src);
-
- Fault copy(Addr dest);
-
- // The register accessor methods provide the index of the
- // instruction's operand (e.g., 0 or 1), not the architectural
- // register index, to simplify the implementation of register
- // renaming. We find the architectural register index by indexing
- // into the instruction's own operand index table. Note that a
- // raw pointer to the StaticInst is provided instead of a
- // ref-counted StaticInstPtr to redice overhead. This is fine as
- // long as these methods don't copy the pointer into any long-term
- // storage (which is pretty hard to imagine they would have reason
- // to do).
-
- uint64_t readIntReg(const StaticInst *si, int idx)
- {
- return cpuXC->readIntReg(si->srcRegIdx(idx));
- }
-
- float readFloatRegSingle(const StaticInst *si, int idx)
- {
- int reg_idx = si->srcRegIdx(idx) - TheISA::FP_Base_DepTag;
- return cpuXC->readFloatRegSingle(reg_idx);
- }
-
- double readFloatRegDouble(const StaticInst *si, int idx)
- {
- int reg_idx = si->srcRegIdx(idx) - TheISA::FP_Base_DepTag;
- return cpuXC->readFloatRegDouble(reg_idx);
- }
-
- uint64_t readFloatRegInt(const StaticInst *si, int idx)
- {
- int reg_idx = si->srcRegIdx(idx) - TheISA::FP_Base_DepTag;
- return cpuXC->readFloatRegInt(reg_idx);
- }
-
- void setIntReg(const StaticInst *si, int idx, uint64_t val)
- {
- cpuXC->setIntReg(si->destRegIdx(idx), val);
- result.integer = val;
- }
-
- void setFloatRegSingle(const StaticInst *si, int idx, float val)
- {
- int reg_idx = si->destRegIdx(idx) - TheISA::FP_Base_DepTag;
- cpuXC->setFloatRegSingle(reg_idx, val);
- result.fp = val;
- }
-
- void setFloatRegDouble(const StaticInst *si, int idx, double val)
- {
- int reg_idx = si->destRegIdx(idx) - TheISA::FP_Base_DepTag;
- cpuXC->setFloatRegDouble(reg_idx, val);
- result.dbl = val;
- }
-
- void setFloatRegInt(const StaticInst *si, int idx, uint64_t val)
- {
- int reg_idx = si->destRegIdx(idx) - TheISA::FP_Base_DepTag;
- cpuXC->setFloatRegInt(reg_idx, val);
- result.integer = val;
- }
-
- uint64_t readPC() { return cpuXC->readPC(); }
- void setNextPC(uint64_t val) {
- cpuXC->setNextPC(val);
- }
-
- MiscReg readMiscReg(int misc_reg)
- {
- return cpuXC->readMiscReg(misc_reg);
- }
-
- MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault)
- {
- return cpuXC->readMiscRegWithEffect(misc_reg, fault);
- }
-
- Fault setMiscReg(int misc_reg, const MiscReg &val)
- {
- result.integer = val;
- miscRegIdxs.push(misc_reg);
- return cpuXC->setMiscReg(misc_reg, val);
- }
-
- Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val)
- {
- miscRegIdxs.push(misc_reg);
- return cpuXC->setMiscRegWithEffect(misc_reg, val);
- }
-
- void recordPCChange(uint64_t val) { changedPC = true; }
- void recordNextPCChange(uint64_t val) { changedNextPC = true; }
-
- bool translateInstReq(MemReqPtr &req);
- void translateDataWriteReq(MemReqPtr &req);
- void translateDataReadReq(MemReqPtr &req);
-
-#if FULL_SYSTEM
- Fault hwrei() { return cpuXC->hwrei(); }
- int readIntrFlag() { return cpuXC->readIntrFlag(); }
- void setIntrFlag(int val) { cpuXC->setIntrFlag(val); }
- bool inPalMode() { return cpuXC->inPalMode(); }
- void ev5_trap(Fault fault) { fault->invoke(xcProxy); }
- bool simPalCheck(int palFunc) { return cpuXC->simPalCheck(palFunc); }
-#else
- // Assume that the normal CPU's call to syscall was successful.
- // The checker's state would have already been updated by the syscall.
- void syscall() { }
-#endif
-
- void handleError()
- {
- if (exitOnError)
- panic("Checker found error!");
- }
- bool checkFlags(MemReqPtr &req);
-
- ExecContext *xcBase() { return xcProxy; }
- CPUExecContext *cpuXCBase() { return cpuXC; }
-
- Result unverifiedResult;
- MemReqPtr unverifiedReq;
-
- bool changedPC;
- bool willChangePC;
- uint64_t newPC;
- bool changedNextPC;
- bool exitOnError;
-
- InstSeqNum youngestSN;
-};
-
-template <class DynInstPtr>
-class Checker : public CheckerCPU
-{
- public:
- Checker(Params *p)
- : CheckerCPU(p)
- { }
-
- void switchOut(Sampler *s);
- void takeOverFrom(BaseCPU *oldCPU);
-
- void tick(DynInstPtr &inst);
-
- void validateInst(DynInstPtr &inst);
- void validateExecution(DynInstPtr &inst);
- void validateState();
-
- std::list<DynInstPtr> instList;
- typedef typename std::list<DynInstPtr>::iterator InstListIt;
- void dumpInsts();
-};
-
-#endif // __CPU_CHECKER_CPU_HH__
+++ /dev/null
-
-#include <string>
-
-#include "cpu/checker/cpu.hh"
-#include "cpu/inst_seq.hh"
-#include "cpu/ozone/dyn_inst.hh"
-#include "cpu/ozone/ozone_impl.hh"
-#include "mem/base_mem.hh"
-#include "sim/builder.hh"
-#include "sim/process.hh"
-#include "sim/sim_object.hh"
-
-class OzoneChecker : public Checker<RefCountingPtr<OzoneDynInst<OzoneImpl> > >
-{
- public:
- OzoneChecker(Params *p)
- : Checker<RefCountingPtr<OzoneDynInst<OzoneImpl> > >(p)
- { }
-};
-
-////////////////////////////////////////////////////////////////////////
-//
-// CheckerCPU Simulation Object
-//
-BEGIN_DECLARE_SIM_OBJECT_PARAMS(OzoneChecker)
-
- Param<Counter> max_insts_any_thread;
- Param<Counter> max_insts_all_threads;
- Param<Counter> max_loads_any_thread;
- Param<Counter> max_loads_all_threads;
-
-#if FULL_SYSTEM
- SimObjectParam<AlphaITB *> itb;
- SimObjectParam<AlphaDTB *> dtb;
- SimObjectParam<FunctionalMemory *> mem;
- SimObjectParam<System *> system;
- Param<int> cpu_id;
- Param<Tick> profile;
-#else
- SimObjectParam<Process *> workload;
-#endif // FULL_SYSTEM
- Param<int> clock;
- SimObjectParam<BaseMem *> icache;
- SimObjectParam<BaseMem *> dcache;
-
- Param<bool> defer_registration;
- Param<bool> exitOnError;
- Param<bool> function_trace;
- Param<Tick> function_trace_start;
-
-END_DECLARE_SIM_OBJECT_PARAMS(OzoneChecker)
-
-BEGIN_INIT_SIM_OBJECT_PARAMS(OzoneChecker)
-
- INIT_PARAM(max_insts_any_thread,
- "terminate when any thread reaches this inst count"),
- INIT_PARAM(max_insts_all_threads,
- "terminate when all threads have reached this inst count"),
- INIT_PARAM(max_loads_any_thread,
- "terminate when any thread reaches this load count"),
- INIT_PARAM(max_loads_all_threads,
- "terminate when all threads have reached this load count"),
-
-#if FULL_SYSTEM
- INIT_PARAM(itb, "Instruction TLB"),
- INIT_PARAM(dtb, "Data TLB"),
- INIT_PARAM(mem, "memory"),
- INIT_PARAM(system, "system object"),
- INIT_PARAM(cpu_id, "processor ID"),
- INIT_PARAM(profile, ""),
-#else
- INIT_PARAM(workload, "processes to run"),
-#endif // FULL_SYSTEM
-
- INIT_PARAM(clock, "clock speed"),
- INIT_PARAM(icache, "L1 instruction cache object"),
- INIT_PARAM(dcache, "L1 data cache object"),
-
- INIT_PARAM(defer_registration, "defer system registration (for sampling)"),
- INIT_PARAM(exitOnError, "exit on error"),
- INIT_PARAM(function_trace, "Enable function trace"),
- INIT_PARAM(function_trace_start, "Cycle to start function trace")
-
-END_INIT_SIM_OBJECT_PARAMS(OzoneChecker)
-
-
-CREATE_SIM_OBJECT(OzoneChecker)
-{
- OzoneChecker::Params *params = new OzoneChecker::Params();
- params->name = getInstanceName();
- params->numberOfThreads = 1;
- params->max_insts_any_thread = 0;
- params->max_insts_all_threads = 0;
- params->max_loads_any_thread = 0;
- params->max_loads_all_threads = 0;
- params->exitOnError = exitOnError;
- params->deferRegistration = defer_registration;
- params->functionTrace = function_trace;
- params->functionTraceStart = function_trace_start;
- params->clock = clock;
- // Hack to touch all parameters. Consider not deriving Checker
- // from BaseCPU..it's not really a CPU in the end.
- Counter temp;
- temp = max_insts_any_thread;
- temp = max_insts_all_threads;
- temp = max_loads_any_thread;
- temp = max_loads_all_threads;
- BaseMem *cache = icache;
- cache = dcache;
-
-#if FULL_SYSTEM
- params->itb = itb;
- params->dtb = dtb;
- params->mem = mem;
- params->system = system;
- params->cpu_id = cpu_id;
- params->profile = profile;
-#else
- params->process = workload;
-#endif
-
- OzoneChecker *cpu = new OzoneChecker(params);
- return cpu;
-}
-
-REGISTER_SIM_OBJECT("OzoneChecker", OzoneChecker)
+++ /dev/null
-/*
- * Copyright (c) 2006 The Regents of The University of Michigan
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __CPU_CHECKER_EXEC_CONTEXT_HH__
-#define __CPU_CHECKER_EXEC_CONTEXT_HH__
-
-#include "cpu/checker/cpu.hh"
-#include "cpu/cpu_exec_context.hh"
-#include "cpu/exec_context.hh"
-
-class EndQuiesceEvent;
-namespace Kernel {
- class Statistics;
-};
-
-template <class XC>
-class CheckerExecContext : public ExecContext
-{
- public:
- CheckerExecContext(XC *actual_xc,
- CheckerCPU *checker_cpu)
- : actualXC(actual_xc), checkerXC(checker_cpu->cpuXC),
- checkerCPU(checker_cpu)
- { }
-
- private:
- XC *actualXC;
- CPUExecContext *checkerXC;
- CheckerCPU *checkerCPU;
-
- public:
-
- BaseCPU *getCpuPtr() { return actualXC->getCpuPtr(); }
-
- void setCpuId(int id)
- {
- actualXC->setCpuId(id);
- checkerXC->setCpuId(id);
- }
-
- int readCpuId() { return actualXC->readCpuId(); }
-
- FunctionalMemory *getMemPtr() { return actualXC->getMemPtr(); }
-
-#if FULL_SYSTEM
- System *getSystemPtr() { return actualXC->getSystemPtr(); }
-
- PhysicalMemory *getPhysMemPtr() { return actualXC->getPhysMemPtr(); }
-
- AlphaITB *getITBPtr() { return actualXC->getITBPtr(); }
-
- AlphaDTB *getDTBPtr() { return actualXC->getDTBPtr(); }
-
- Kernel::Statistics *getKernelStats() { return actualXC->getKernelStats(); }
-#else
- Process *getProcessPtr() { return actualXC->getProcessPtr(); }
-#endif
-
- Status status() const { return actualXC->status(); }
-
- void setStatus(Status new_status)
- {
- actualXC->setStatus(new_status);
- checkerXC->setStatus(new_status);
- }
-
- /// Set the status to Active. Optional delay indicates number of
- /// cycles to wait before beginning execution.
- void activate(int delay = 1) { actualXC->activate(delay); }
-
- /// Set the status to Suspended.
- void suspend() { actualXC->suspend(); }
-
- /// Set the status to Unallocated.
- void deallocate() { actualXC->deallocate(); }
-
- /// Set the status to Halted.
- void halt() { actualXC->halt(); }
-
-#if FULL_SYSTEM
- void dumpFuncProfile() { actualXC->dumpFuncProfile(); }
-#endif
-
- void takeOverFrom(ExecContext *oldContext)
- {
- actualXC->takeOverFrom(oldContext);
- checkerXC->takeOverFrom(oldContext);
- }
-
- void regStats(const std::string &name) { actualXC->regStats(name); }
-
- void serialize(std::ostream &os) { actualXC->serialize(os); }
- void unserialize(Checkpoint *cp, const std::string §ion)
- { actualXC->unserialize(cp, section); }
-
-#if FULL_SYSTEM
- EndQuiesceEvent *getQuiesceEvent() { return actualXC->getQuiesceEvent(); }
-
- Tick readLastActivate() { return actualXC->readLastActivate(); }
- Tick readLastSuspend() { return actualXC->readLastSuspend(); }
-
- void profileClear() { return actualXC->profileClear(); }
- void profileSample() { return actualXC->profileSample(); }
-#endif
-
- int getThreadNum() { return actualXC->getThreadNum(); }
-
- // @todo: Do I need this?
- MachInst getInst() { return actualXC->getInst(); }
-
- // @todo: Do I need this?
- void copyArchRegs(ExecContext *xc)
- {
- actualXC->copyArchRegs(xc);
- checkerXC->copyArchRegs(xc);
- }
-
- void clearArchRegs()
- {
- actualXC->clearArchRegs();
- checkerXC->clearArchRegs();
- }
-
- //
- // New accessors for new decoder.
- //
- uint64_t readIntReg(int reg_idx)
- { return actualXC->readIntReg(reg_idx); }
-
- float readFloatRegSingle(int reg_idx)
- { return actualXC->readFloatRegSingle(reg_idx); }
-
- double readFloatRegDouble(int reg_idx)
- { return actualXC->readFloatRegDouble(reg_idx); }
-
- uint64_t readFloatRegInt(int reg_idx)
- { return actualXC->readFloatRegInt(reg_idx); }
-
- void setIntReg(int reg_idx, uint64_t val)
- {
- actualXC->setIntReg(reg_idx, val);
- checkerXC->setIntReg(reg_idx, val);
- }
-
- void setFloatRegSingle(int reg_idx, float val)
- {
- actualXC->setFloatRegSingle(reg_idx, val);
- checkerXC->setFloatRegSingle(reg_idx, val);
- }
-
- void setFloatRegDouble(int reg_idx, double val)
- {
- actualXC->setFloatRegDouble(reg_idx, val);
- checkerXC->setFloatRegSingle(reg_idx, val);
- }
-
- void setFloatRegInt(int reg_idx, uint64_t val)
- {
- actualXC->setFloatRegInt(reg_idx, val);
- checkerXC->setFloatRegInt(reg_idx, val);
- }
-
- uint64_t readPC() { return actualXC->readPC(); }
-
- void setPC(uint64_t val)
- {
- actualXC->setPC(val);
- checkerXC->setPC(val);
- checkerCPU->recordPCChange(val);
- }
-
- uint64_t readNextPC() { return actualXC->readNextPC(); }
-
- void setNextPC(uint64_t val)
- {
- actualXC->setNextPC(val);
- checkerXC->setNextPC(val);
- checkerCPU->recordNextPCChange(val);
- }
-
- MiscReg readMiscReg(int misc_reg)
- { return actualXC->readMiscReg(misc_reg); }
-
- MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault)
- { return actualXC->readMiscRegWithEffect(misc_reg, fault); }
-
- Fault setMiscReg(int misc_reg, const MiscReg &val)
- {
- checkerXC->setMiscReg(misc_reg, val);
- return actualXC->setMiscReg(misc_reg, val);
- }
-
- Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val)
- {
- checkerXC->setMiscRegWithEffect(misc_reg, val);
- return actualXC->setMiscRegWithEffect(misc_reg, val);
- }
-
- unsigned readStCondFailures()
- { return actualXC->readStCondFailures(); }
-
- void setStCondFailures(unsigned sc_failures)
- {
- checkerXC->setStCondFailures(sc_failures);
- actualXC->setStCondFailures(sc_failures);
- }
-#if FULL_SYSTEM
- bool inPalMode() { return actualXC->inPalMode(); }
-#endif
-
- // @todo: Fix this!
- bool misspeculating() { return actualXC->misspeculating(); }
-
-#if !FULL_SYSTEM
- IntReg getSyscallArg(int i) { return actualXC->getSyscallArg(i); }
-
- // used to shift args for indirect syscall
- void setSyscallArg(int i, IntReg val)
- {
- checkerXC->setSyscallArg(i, val);
- actualXC->setSyscallArg(i, val);
- }
-
- void setSyscallReturn(SyscallReturn return_value)
- {
- checkerXC->setSyscallReturn(return_value);
- actualXC->setSyscallReturn(return_value);
- }
-
- Counter readFuncExeInst() { return actualXC->readFuncExeInst(); }
-#endif
-};
-
-#endif // __CPU_CHECKER_EXEC_CONTEXT_HH__
+++ /dev/null
-
-#include <string>
-
-#include "cpu/checker/cpu.hh"
-#include "cpu/inst_seq.hh"
-#include "cpu/o3/alpha_dyn_inst.hh"
-#include "cpu/o3/alpha_impl.hh"
-#include "mem/base_mem.hh"
-#include "sim/builder.hh"
-#include "sim/process.hh"
-#include "sim/sim_object.hh"
-
-class O3Checker : public Checker<RefCountingPtr<AlphaDynInst<AlphaSimpleImpl> > >
-{
- public:
- O3Checker(Params *p)
- : Checker<RefCountingPtr<AlphaDynInst<AlphaSimpleImpl> > >(p)
- { }
-};
-
-////////////////////////////////////////////////////////////////////////
-//
-// CheckerCPU Simulation Object
-//
-BEGIN_DECLARE_SIM_OBJECT_PARAMS(O3Checker)
-
- Param<Counter> max_insts_any_thread;
- Param<Counter> max_insts_all_threads;
- Param<Counter> max_loads_any_thread;
- Param<Counter> max_loads_all_threads;
-
-#if FULL_SYSTEM
- SimObjectParam<AlphaITB *> itb;
- SimObjectParam<AlphaDTB *> dtb;
- SimObjectParam<FunctionalMemory *> mem;
- SimObjectParam<System *> system;
- Param<int> cpu_id;
- Param<Tick> profile;
-#else
- SimObjectParam<Process *> workload;
-#endif // FULL_SYSTEM
- Param<int> clock;
- SimObjectParam<BaseMem *> icache;
- SimObjectParam<BaseMem *> dcache;
-
- Param<bool> defer_registration;
- Param<bool> exitOnError;
- Param<bool> function_trace;
- Param<Tick> function_trace_start;
-
-END_DECLARE_SIM_OBJECT_PARAMS(O3Checker)
-
-BEGIN_INIT_SIM_OBJECT_PARAMS(O3Checker)
-
- INIT_PARAM(max_insts_any_thread,
- "terminate when any thread reaches this inst count"),
- INIT_PARAM(max_insts_all_threads,
- "terminate when all threads have reached this inst count"),
- INIT_PARAM(max_loads_any_thread,
- "terminate when any thread reaches this load count"),
- INIT_PARAM(max_loads_all_threads,
- "terminate when all threads have reached this load count"),
-
-#if FULL_SYSTEM
- INIT_PARAM(itb, "Instruction TLB"),
- INIT_PARAM(dtb, "Data TLB"),
- INIT_PARAM(mem, "memory"),
- INIT_PARAM(system, "system object"),
- INIT_PARAM(cpu_id, "processor ID"),
- INIT_PARAM(profile, ""),
-#else
- INIT_PARAM(workload, "processes to run"),
-#endif // FULL_SYSTEM
-
- INIT_PARAM(clock, "clock speed"),
- INIT_PARAM(icache, "L1 instruction cache object"),
- INIT_PARAM(dcache, "L1 data cache object"),
-
- INIT_PARAM(defer_registration, "defer system registration (for sampling)"),
- INIT_PARAM(exitOnError, "exit on error"),
- INIT_PARAM(function_trace, "Enable function trace"),
- INIT_PARAM(function_trace_start, "Cycle to start function trace")
-
-END_INIT_SIM_OBJECT_PARAMS(O3Checker)
-
-
-CREATE_SIM_OBJECT(O3Checker)
-{
- O3Checker::Params *params = new O3Checker::Params();
- params->name = getInstanceName();
- params->numberOfThreads = 1;
- params->max_insts_any_thread = 0;
- params->max_insts_all_threads = 0;
- params->max_loads_any_thread = 0;
- params->max_loads_all_threads = 0;
- params->exitOnError = exitOnError;
- params->deferRegistration = defer_registration;
- params->functionTrace = function_trace;
- params->functionTraceStart = function_trace_start;
- params->clock = clock;
- // Hack to touch all parameters. Consider not deriving Checker
- // from BaseCPU..it's not really a CPU in the end.
- Counter temp;
- temp = max_insts_any_thread;
- temp = max_insts_all_threads;
- temp = max_loads_any_thread;
- temp = max_loads_all_threads;
- BaseMem *cache = icache;
- cache = dcache;
-
-#if FULL_SYSTEM
- params->itb = itb;
- params->dtb = dtb;
- params->mem = mem;
- params->system = system;
- params->cpu_id = cpu_id;
- params->profile = profile;
-#else
- params->process = workload;
-#endif
-
- O3Checker *cpu = new O3Checker(params);
- return cpu;
-}
-
-REGISTER_SIM_OBJECT("O3Checker", O3Checker)
+++ /dev/null
-
-#ifndef __CPU_O3_DEP_GRAPH_HH__
-#define __CPU_O3_DEP_GRAPH_HH__
-
-#include "cpu/o3/comm.hh"
-
-template <class DynInstPtr>
-class DependencyEntry
-{
- public:
- DependencyEntry()
- : inst(NULL), next(NULL)
- { }
-
- DynInstPtr inst;
- //Might want to include data about what arch. register the
- //dependence is waiting on.
- DependencyEntry<DynInstPtr> *next;
-};
-
-template <class DynInstPtr>
-class DependencyGraph
-{
- public:
- typedef DependencyEntry<DynInstPtr> DepEntry;
-
- DependencyGraph()
- : numEntries(0), memAllocCounter(0), nodesTraversed(0), nodesRemoved(0)
- { }
-
- void resize(int num_entries);
-
- void reset();
-
- void insert(PhysRegIndex idx, DynInstPtr &new_inst);
-
- void setInst(PhysRegIndex idx, DynInstPtr &new_inst)
- { dependGraph[idx].inst = new_inst; }
-
- void clearInst(PhysRegIndex idx)
- { dependGraph[idx].inst = NULL; }
-
- void remove(PhysRegIndex idx, DynInstPtr &inst_to_remove);
-
- DynInstPtr pop(PhysRegIndex idx);
-
- bool empty(PhysRegIndex idx) { return !dependGraph[idx].next; }
-
- /** Debugging function to dump out the dependency graph.
- */
- void dump();
-
- private:
- /** Array of linked lists. Each linked list is a list of all the
- * instructions that depend upon a given register. The actual
- * register's index is used to index into the graph; ie all
- * instructions in flight that are dependent upon r34 will be
- * in the linked list of dependGraph[34].
- */
- DepEntry *dependGraph;
-
- int numEntries;
-
- // Debug variable, remove when done testing.
- unsigned memAllocCounter;
-
- public:
- uint64_t nodesTraversed;
- uint64_t nodesRemoved;
-};
-
-template <class DynInstPtr>
-void
-DependencyGraph<DynInstPtr>::resize(int num_entries)
-{
- numEntries = num_entries;
- dependGraph = new DepEntry[numEntries];
-}
-
-template <class DynInstPtr>
-void
-DependencyGraph<DynInstPtr>::reset()
-{
- // Clear the dependency graph
- DepEntry *curr;
- DepEntry *prev;
-
- for (int i = 0; i < numEntries; ++i) {
- curr = dependGraph[i].next;
-
- while (curr) {
- memAllocCounter--;
-
- prev = curr;
- curr = prev->next;
- prev->inst = NULL;
-
- delete prev;
- }
-
- if (dependGraph[i].inst) {
- dependGraph[i].inst = NULL;
- }
-
- dependGraph[i].next = NULL;
- }
-}
-
-template <class DynInstPtr>
-void
-DependencyGraph<DynInstPtr>::insert(PhysRegIndex idx, DynInstPtr &new_inst)
-{
- //Add this new, dependent instruction at the head of the dependency
- //chain.
-
- // First create the entry that will be added to the head of the
- // dependency chain.
- DepEntry *new_entry = new DepEntry;
- new_entry->next = dependGraph[idx].next;
- new_entry->inst = new_inst;
-
- // Then actually add it to the chain.
- dependGraph[idx].next = new_entry;
-
- ++memAllocCounter;
-}
-
-
-template <class DynInstPtr>
-void
-DependencyGraph<DynInstPtr>::remove(PhysRegIndex idx,
- DynInstPtr &inst_to_remove)
-{
- DepEntry *prev = &dependGraph[idx];
- DepEntry *curr = dependGraph[idx].next;
-
- // Make sure curr isn't NULL. Because this instruction is being
- // removed from a dependency list, it must have been placed there at
- // an earlier time. The dependency chain should not be empty,
- // unless the instruction dependent upon it is already ready.
- if (curr == NULL) {
- return;
- }
-
- nodesRemoved++;
-
- // Find the instruction to remove within the dependency linked list.
- while (curr->inst != inst_to_remove) {
- prev = curr;
- curr = curr->next;
- nodesTraversed++;
-
- assert(curr != NULL);
- }
-
- // Now remove this instruction from the list.
- prev->next = curr->next;
-
- --memAllocCounter;
-
- // Could push this off to the destructor of DependencyEntry
- curr->inst = NULL;
-
- delete curr;
-}
-
-template <class DynInstPtr>
-DynInstPtr
-DependencyGraph<DynInstPtr>::pop(PhysRegIndex idx)
-{
- DepEntry *node;
- node = dependGraph[idx].next;
- DynInstPtr inst = NULL;
- if (node) {
- inst = node->inst;
- dependGraph[idx].next = node->next;
- node->inst = NULL;
- memAllocCounter--;
- delete node;
- }
- return inst;
-}
-
-template <class DynInstPtr>
-void
-DependencyGraph<DynInstPtr>::dump()
-{
- DepEntry *curr;
-
- for (int i = 0; i < numEntries; ++i)
- {
- curr = &dependGraph[i];
-
- if (curr->inst) {
- cprintf("dependGraph[%i]: producer: %#x [sn:%lli] consumer: ",
- i, curr->inst->readPC(), curr->inst->seqNum);
- } else {
- cprintf("dependGraph[%i]: No producer. consumer: ", i);
- }
-
- while (curr->next != NULL) {
- curr = curr->next;
-
- cprintf("%#x [sn:%lli] ",
- curr->inst->readPC(), curr->inst->seqNum);
- }
-
- cprintf("\n");
- }
- cprintf("memAllocCounter: %i\n", memAllocCounter);
-}
-
-#endif // __CPU_O3_DEP_GRAPH_HH__
+++ /dev/null
-/*
- * Copyright (c) 2002-2005 The Regents of The University of Michigan
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <sstream>
-
-#include "cpu/o3/fu_pool.hh"
-#include "encumbered/cpu/full/fu_pool.hh"
-#include "sim/builder.hh"
-
-using namespace std;
-
-////////////////////////////////////////////////////////////////////////////
-//
-// A pool of function units
-//
-
-inline void
-FUPool::FUIdxQueue::addFU(int fu_idx)
-{
- funcUnitsIdx.push_back(fu_idx);
- ++size;
-}
-
-inline int
-FUPool::FUIdxQueue::getFU()
-{
- int retval = funcUnitsIdx[idx++];
-
- if (idx == size)
- idx = 0;
-
- return retval;
-}
-
-FUPool::~FUPool()
-{
- fuListIterator i = funcUnits.begin();
- fuListIterator end = funcUnits.end();
- for (; i != end; ++i)
- delete *i;
-}
-
-
-// Constructor
-FUPool::FUPool(string name, vector<FUDesc *> paramList)
- : SimObject(name)
-{
- numFU = 0;
-
- funcUnits.clear();
-
- for (int i = 0; i < Num_OpClasses; ++i) {
- maxOpLatencies[i] = 0;
- maxIssueLatencies[i] = 0;
- }
-
- //
- // Iterate through the list of FUDescData structures
- //
- for (FUDDiterator i = paramList.begin(); i != paramList.end(); ++i) {
-
- //
- // Don't bother with this if we're not going to create any FU's
- //
- if ((*i)->number) {
- //
- // Create the FuncUnit object from this structure
- // - add the capabilities listed in the FU's operation
- // description
- //
- // We create the first unit, then duplicate it as needed
- //
- FuncUnit *fu = new FuncUnit;
-
- OPDDiterator j = (*i)->opDescList.begin();
- OPDDiterator end = (*i)->opDescList.end();
- for (; j != end; ++j) {
- // indicate that this pool has this capability
- capabilityList.set((*j)->opClass);
-
- // Add each of the FU's that will have this capability to the
- // appropriate queue.
- for (int k = 0; k < (*i)->number; ++k)
- fuPerCapList[(*j)->opClass].addFU(numFU + k);
-
- // indicate that this FU has the capability
- fu->addCapability((*j)->opClass, (*j)->opLat, (*j)->issueLat);
-
- if ((*j)->opLat > maxOpLatencies[(*j)->opClass])
- maxOpLatencies[(*j)->opClass] = (*j)->opLat;
-
- if ((*j)->issueLat > maxIssueLatencies[(*j)->opClass])
- maxIssueLatencies[(*j)->opClass] = (*j)->issueLat;
- }
-
- numFU++;
-
- // Add the appropriate number of copies of this FU to the list
- ostringstream s;
-
- s << (*i)->name() << "(0)";
- fu->name = s.str();
- funcUnits.push_back(fu);
-
- for (int c = 1; c < (*i)->number; ++c) {
- ostringstream s;
- numFU++;
- FuncUnit *fu2 = new FuncUnit(*fu);
-
- s << (*i)->name() << "(" << c << ")";
- fu2->name = s.str();
- funcUnits.push_back(fu2);
- }
- }
- }
-
- unitBusy.resize(numFU);
-
- for (int i = 0; i < numFU; i++) {
- unitBusy[i] = false;
- }
-}
-
-void
-FUPool::annotateMemoryUnits(unsigned hit_latency)
-{
- maxOpLatencies[MemReadOp] = hit_latency;
-
- fuListIterator i = funcUnits.begin();
- fuListIterator iend = funcUnits.end();
- for (; i != iend; ++i) {
- if ((*i)->provides(MemReadOp))
- (*i)->opLatency(MemReadOp) = hit_latency;
-
- if ((*i)->provides(MemWriteOp))
- (*i)->opLatency(MemWriteOp) = hit_latency;
- }
-}
-
-int
-FUPool::getUnit(OpClass capability)
-{
- // If this pool doesn't have the specified capability,
- // return this information to the caller
- if (!capabilityList[capability])
- return -2;
-
- int fu_idx = fuPerCapList[capability].getFU();
- int start_idx = fu_idx;
-
- // Iterate through the circular queue if needed, stopping if we've reached
- // the first element again.
- while (unitBusy[fu_idx]) {
- fu_idx = fuPerCapList[capability].getFU();
- if (fu_idx == start_idx) {
- // No FU available
- return -1;
- }
- }
-
- unitBusy[fu_idx] = true;
-
- return fu_idx;
-}
-
-void
-FUPool::freeUnitNextCycle(int fu_idx)
-{
- assert(unitBusy[fu_idx]);
- unitsToBeFreed.push_back(fu_idx);
-}
-
-void
-FUPool::processFreeUnits()
-{
- while (!unitsToBeFreed.empty()) {
- int fu_idx = unitsToBeFreed.back();
- unitsToBeFreed.pop_back();
-
- assert(unitBusy[fu_idx]);
-
- unitBusy[fu_idx] = false;
- }
-}
-
-void
-FUPool::dump()
-{
- cout << "Function Unit Pool (" << name() << ")\n";
- cout << "======================================\n";
- cout << "Free List:\n";
-
- for (int i = 0; i < numFU; ++i) {
- if (unitBusy[i]) {
- continue;
- }
-
- cout << " [" << i << "] : ";
-
- cout << funcUnits[i]->name << " ";
-
- cout << "\n";
- }
-
- cout << "======================================\n";
- cout << "Busy List:\n";
- for (int i = 0; i < numFU; ++i) {
- if (!unitBusy[i]) {
- continue;
- }
-
- cout << " [" << i << "] : ";
-
- cout << funcUnits[i]->name << " ";
-
- cout << "\n";
- }
-}
-
-void
-FUPool::switchOut()
-{
-}
-
-void
-FUPool::takeOverFrom()
-{
- for (int i = 0; i < numFU; i++) {
- unitBusy[i] = false;
- }
- unitsToBeFreed.clear();
-}
-
-//
-
-////////////////////////////////////////////////////////////////////////////
-//
-// The SimObjects we use to get the FU information into the simulator
-//
-////////////////////////////////////////////////////////////////////////////
-
-//
-// FUPool - Contails a list of FUDesc objects to make available
-//
-
-//
-// The FuPool object
-//
-
-BEGIN_DECLARE_SIM_OBJECT_PARAMS(FUPool)
-
- SimObjectVectorParam<FUDesc *> FUList;
-
-END_DECLARE_SIM_OBJECT_PARAMS(FUPool)
-
-
-BEGIN_INIT_SIM_OBJECT_PARAMS(FUPool)
-
- INIT_PARAM(FUList, "list of FU's for this pool")
-
-END_INIT_SIM_OBJECT_PARAMS(FUPool)
-
-
-CREATE_SIM_OBJECT(FUPool)
-{
- return new FUPool(getInstanceName(), FUList);
-}
-
-REGISTER_SIM_OBJECT("FUPool", FUPool)
-
+++ /dev/null
-/*
- * Copyright (c) 2002-2005 The Regents of The University of Michigan
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __CPU_O3_FU_POOL_HH__
-#define __CPU_O3_FU_POOL_HH__
-
-#include <bitset>
-#include <list>
-#include <string>
-#include <vector>
-
-#include "base/sched_list.hh"
-#include "encumbered/cpu/full/op_class.hh"
-#include "sim/sim_object.hh"
-
-class FUDesc;
-class FuncUnit;
-
-/**
- * Pool of FU's, specific to the new CPU model. The old FU pool had lists of
- * free units and busy units, and whenever a FU was needed it would iterate
- * through the free units to find a FU that provided the capability. This pool
- * has lists of units specific to each of the capabilities, and whenever a FU
- * is needed, it iterates through that list to find a free unit. The previous
- * FU pool would have to be ticked each cycle to update which units became
- * free. This FU pool lets the IEW stage handle freeing units, which frees
- * them as their scheduled execution events complete. This limits units in this
- * model to either have identical issue and op latencies, or 1 cycle issue
- * latencies.
- */
-class FUPool : public SimObject
-{
- private:
- /** Maximum op execution latencies, per op class. */
- unsigned maxOpLatencies[Num_OpClasses];
- /** Maximum issue latencies, per op class. */
- unsigned maxIssueLatencies[Num_OpClasses];
-
- /** Bitvector listing capabilities of this FU pool. */
- std::bitset<Num_OpClasses> capabilityList;
-
- /** Bitvector listing which FUs are busy. */
- std::vector<bool> unitBusy;
-
- /** List of units to be freed at the end of this cycle. */
- std::vector<int> unitsToBeFreed;
-
- /**
- * Class that implements a circular queue to hold FU indices. The hope is
- * that FUs that have been just used will be moved to the end of the queue
- * by iterating through it, thus leaving free units at the head of the
- * queue.
- */
- class FUIdxQueue {
- public:
- /** Constructs a circular queue of FU indices. */
- FUIdxQueue()
- : idx(0), size(0)
- { }
-
- /** Adds a FU to the queue. */
- inline void addFU(int fu_idx);
-
- /** Returns the index of the FU at the head of the queue, and changes
- * the index to the next element.
- */
- inline int getFU();
-
- private:
- /** Circular queue index. */
- int idx;
-
- /** Size of the queue. */
- int size;
-
- /** Queue of FU indices. */
- std::vector<int> funcUnitsIdx;
- };
-
- /** Per op class queues of FUs that provide that capability. */
- FUIdxQueue fuPerCapList[Num_OpClasses];
-
- /** Number of FUs. */
- int numFU;
-
- /** Functional units. */
- std::vector<FuncUnit *> funcUnits;
-
- typedef std::vector<FuncUnit *>::iterator fuListIterator;
-
- public:
-
- /** Constructs a FU pool. */
- FUPool(std::string name, std::vector<FUDesc *> l);
- ~FUPool();
-
- /** Annotates units that provide memory operations. Included only because
- * old FU pool provided this function.
- */
- void annotateMemoryUnits(unsigned hit_latency);
-
- /**
- * Gets a FU providing the requested capability. Will mark the unit as busy,
- * but leaves the freeing of the unit up to the IEW stage.
- * @param capability The capability requested.
- * @return Returns -2 if the FU pool does not have the capability, -1 if
- * there is no free FU, and the FU's index otherwise.
- */
- int getUnit(OpClass capability);
-
- /** Frees a FU at the end of this cycle. */
- void freeUnitNextCycle(int fu_idx);
-
- /** Frees all FUs on the list. */
- void processFreeUnits();
-
- /** Returns the total number of FUs. */
- int size() { return numFU; }
-
- /** Debugging function used to dump FU information. */
- void dump();
-
- /** Returns the operation execution latency of the given capability. */
- unsigned getOpLatency(OpClass capability) {
- return maxOpLatencies[capability];
- }
-
- /** Returns the issue latency of the given capability. */
- unsigned getIssueLatency(OpClass capability) {
- return maxIssueLatencies[capability];
- }
-
- void switchOut();
- void takeOverFrom();
-};
-
-#endif // __CPU_O3_FU_POOL_HH__
+++ /dev/null
-/*
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "cpu/o3/alpha_dyn_inst.hh"
-#include "cpu/o3/alpha_cpu.hh"
-#include "cpu/o3/alpha_impl.hh"
-#include "cpu/o3/lsq_impl.hh"
-
-// Force the instantiation of LDSTQ for all the implementations we care about.
-template class LSQ<AlphaSimpleImpl>;
-
+++ /dev/null
-/*
- * Copyright (c) 2004-2006 The Regents of The University of Michigan
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __CPU_O3_LSQ_HH__
-#define __CPU_O3_LSQ_HH__
-
-#include <map>
-#include <queue>
-
-#include "config/full_system.hh"
-#include "cpu/inst_seq.hh"
-//#include "cpu/o3/cpu_policy.hh"
-#include "cpu/o3/lsq_unit.hh"
-#include "mem/mem_interface.hh"
-//#include "mem/page_table.hh"
-#include "sim/sim_object.hh"
-
-template <class Impl>
-class LSQ {
- public:
- typedef typename Impl::Params Params;
- typedef typename Impl::FullCPU FullCPU;
- typedef typename Impl::DynInstPtr DynInstPtr;
- typedef typename Impl::CPUPol::IEW IEW;
- typedef typename Impl::CPUPol::LSQUnit LSQUnit;
-
- enum LSQPolicy {
- Dynamic,
- Partitioned,
- Threshold
- };
-
- /** Constructs an LSQ with the given parameters. */
- LSQ(Params *params);
-
- /** Returns the name of the LSQ. */
- std::string name() const;
-
- /** Sets the pointer to the list of active threads. */
- void setActiveThreads(std::list<unsigned> *at_ptr);
- /** Sets the CPU pointer. */
- void setCPU(FullCPU *cpu_ptr);
- /** Sets the IEW stage pointer. */
- void setIEW(IEW *iew_ptr);
- /** Sets the page table pointer. */
-// void setPageTable(PageTable *pt_ptr);
-
- void switchOut();
- void takeOverFrom();
-
- /** Number of entries needed for the given amount of threads.*/
- int entryAmount(int num_threads);
- void removeEntries(unsigned tid);
- /** Reset the max entries for each thread. */
- void resetEntries();
- /** Resize the max entries for a thread. */
- void resizeEntries(unsigned size, unsigned tid);
-
- /** Ticks the LSQ. */
- void tick();
- /** Ticks a specific LSQ Unit. */
- void tick(unsigned tid)
- { thread[tid].tick(); }
-
- /** Inserts a load into the LSQ. */
- void insertLoad(DynInstPtr &load_inst);
- /** Inserts a store into the LSQ. */
- void insertStore(DynInstPtr &store_inst);
-
- /** Executes a load. */
- Fault executeLoad(DynInstPtr &inst);
-
- Fault executeLoad(int lq_idx, unsigned tid)
- { return thread[tid].executeLoad(lq_idx); }
-
- /** Executes a store. */
- Fault executeStore(DynInstPtr &inst);
-
- /**
- * Commits loads up until the given sequence number for a specific thread.
- */
- void commitLoads(InstSeqNum &youngest_inst, unsigned tid)
- { thread[tid].commitLoads(youngest_inst); }
-
- /**
- * Commits stores up until the given sequence number for a specific thread.
- */
- void commitStores(InstSeqNum &youngest_inst, unsigned tid)
- { thread[tid].commitStores(youngest_inst); }
-
- /**
- * Attempts to write back stores until all cache ports are used or the
- * interface becomes blocked.
- */
- void writebackStores();
- /** Same as above, but only for one thread. */
- void writebackStores(unsigned tid);
-
- /**
- * Squash instructions from a thread until the specified sequence number.
- */
- void squash(const InstSeqNum &squashed_num, unsigned tid)
- { thread[tid].squash(squashed_num); }
-
- /** Returns whether or not there was a memory ordering violation. */
- bool violation();
- /**
- * Returns whether or not there was a memory ordering violation for a
- * specific thread.
- */
- bool violation(unsigned tid)
- { return thread[tid].violation(); }
-
- /** Returns if a load is blocked due to the memory system for a specific
- * thread.
- */
- bool loadBlocked(unsigned tid)
- { return thread[tid].loadBlocked(); }
-
- bool isLoadBlockedHandled(unsigned tid)
- { return thread[tid].isLoadBlockedHandled(); }
-
- void setLoadBlockedHandled(unsigned tid)
- { thread[tid].setLoadBlockedHandled(); }
-
- /** Gets the instruction that caused the memory ordering violation. */
- DynInstPtr getMemDepViolator(unsigned tid)
- { return thread[tid].getMemDepViolator(); }
-
- /** Returns the head index of the load queue for a specific thread. */
- int getLoadHead(unsigned tid)
- { return thread[tid].getLoadHead(); }
-
- /** Returns the sequence number of the head of the load queue. */
- InstSeqNum getLoadHeadSeqNum(unsigned tid)
- {
- return thread[tid].getLoadHeadSeqNum();
- }
-
- /** Returns the head index of the store queue. */
- int getStoreHead(unsigned tid)
- { return thread[tid].getStoreHead(); }
-
- /** Returns the sequence number of the head of the store queue. */
- InstSeqNum getStoreHeadSeqNum(unsigned tid)
- {
- return thread[tid].getStoreHeadSeqNum();
- }
-
- /** Returns the number of instructions in all of the queues. */
- int getCount();
- /** Returns the number of instructions in the queues of one thread. */
- int getCount(unsigned tid)
- { return thread[tid].getCount(); }
-
- /** Returns the total number of loads in the load queue. */
- int numLoads();
- /** Returns the total number of loads for a single thread. */
- int numLoads(unsigned tid)
- { return thread[tid].numLoads(); }
-
- /** Returns the total number of stores in the store queue. */
- int numStores();
- /** Returns the total number of stores for a single thread. */
- int numStores(unsigned tid)
- { return thread[tid].numStores(); }
-
- /** Returns the total number of loads that are ready. */
- int numLoadsReady();
- /** Returns the number of loads that are ready for a single thread. */
- int numLoadsReady(unsigned tid)
- { return thread[tid].numLoadsReady(); }
-
- /** Returns the number of free entries. */
- unsigned numFreeEntries();
- /** Returns the number of free entries for a specific thread. */
- unsigned numFreeEntries(unsigned tid);
-
- /** Returns if the LSQ is full (either LQ or SQ is full). */
- bool isFull();
- /**
- * Returns if the LSQ is full for a specific thread (either LQ or SQ is
- * full).
- */
- bool isFull(unsigned tid);
-
- /** Returns if any of the LQs are full. */
- bool lqFull();
- /** Returns if the LQ of a given thread is full. */
- bool lqFull(unsigned tid);
-
- /** Returns if any of the SQs are full. */
- bool sqFull();
- /** Returns if the SQ of a given thread is full. */
- bool sqFull(unsigned tid);
-
- /**
- * Returns if the LSQ is stalled due to a memory operation that must be
- * replayed.
- */
- bool isStalled();
- /**
- * Returns if the LSQ of a specific thread is stalled due to a memory
- * operation that must be replayed.
- */
- bool isStalled(unsigned tid);
-
- /** Returns whether or not there are any stores to write back to memory. */
- bool hasStoresToWB();
-
- /** Returns whether or not a specific thread has any stores to write back
- * to memory.
- */
- bool hasStoresToWB(unsigned tid)
- { return thread[tid].hasStoresToWB(); }
-
- /** Returns the number of stores a specific thread has to write back. */
- int numStoresToWB(unsigned tid)
- { return thread[tid].numStoresToWB(); }
-
- /** Returns if the LSQ will write back to memory this cycle. */
- bool willWB();
- /** Returns if the LSQ of a specific thread will write back to memory this
- * cycle.
- */
- bool willWB(unsigned tid)
- { return thread[tid].willWB(); }
-
- /** Debugging function to print out all instructions. */
- void dumpInsts();
- /** Debugging function to print out instructions from a specific thread. */
- void dumpInsts(unsigned tid)
- { thread[tid].dumpInsts(); }
-
- /** Executes a read operation, using the load specified at the load index. */
- template <class T>
- Fault read(MemReqPtr &req, T &data, int load_idx);
-
- /** Executes a store operation, using the store specified at the store
- * index.
- */
- template <class T>
- Fault write(MemReqPtr &req, T &data, int store_idx);
-
- private:
- /** The LSQ policy for SMT mode. */
- LSQPolicy lsqPolicy;
-
- /** The LSQ units for individual threads. */
- LSQUnit thread[Impl::MaxThreads];
-
- /** The CPU pointer. */
- FullCPU *cpu;
-
- /** The IEW stage pointer. */
- IEW *iewStage;
-
- /** The pointer to the page table. */
-// PageTable *pTable;
-
- /** List of Active Threads in System. */
- std::list<unsigned> *activeThreads;
-
- /** Total Size of LQ Entries. */
- unsigned LQEntries;
- /** Total Size of SQ Entries. */
- unsigned SQEntries;
-
- /** Max LQ Size - Used to Enforce Sharing Policies. */
- unsigned maxLQEntries;
-
- /** Max SQ Size - Used to Enforce Sharing Policies. */
- unsigned maxSQEntries;
-
- /** Number of Threads. */
- unsigned numThreads;
-};
-
-template <class Impl>
-template <class T>
-Fault
-LSQ<Impl>::read(MemReqPtr &req, T &data, int load_idx)
-{
- unsigned tid = req->thread_num;
-
- return thread[tid].read(req, data, load_idx);
-}
-
-template <class Impl>
-template <class T>
-Fault
-LSQ<Impl>::write(MemReqPtr &req, T &data, int store_idx)
-{
- unsigned tid = req->thread_num;
-
- return thread[tid].write(req, data, store_idx);
-}
-
-#endif // __CPU_O3_LSQ_HH__
+++ /dev/null
-/*
- * Copyright (c) 2004-2006 The Regents of The University of Michigan
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <algorithm>
-#include <string>
-
-#include "cpu/o3/lsq.hh"
-
-using namespace std;
-
-template <class Impl>
-LSQ<Impl>::LSQ(Params *params)
- : LQEntries(params->LQEntries), SQEntries(params->SQEntries),
- numThreads(params->numberOfThreads)
-{
- DPRINTF(LSQ, "Creating LSQ object.\n");
-
- //**********************************************/
- //************ Handle SMT Parameters ***********/
- //**********************************************/
- string policy = params->smtLSQPolicy;
-
- //Convert string to lowercase
- std::transform(policy.begin(), policy.end(), policy.begin(),
- (int(*)(int)) tolower);
-
- //Figure out fetch policy
- if (policy == "dynamic") {
- lsqPolicy = Dynamic;
-
- maxLQEntries = LQEntries;
- maxSQEntries = SQEntries;
-
- DPRINTF(LSQ, "LSQ sharing policy set to Dynamic\n");
-
- } else if (policy == "partitioned") {
- lsqPolicy = Partitioned;
-
- //@todo:make work if part_amt doesnt divide evenly.
- maxLQEntries = LQEntries / numThreads;
- maxSQEntries = SQEntries / numThreads;
-
- DPRINTF(Fetch, "LSQ sharing policy set to Partitioned: "
- "%i entries per LQ | %i entries per SQ",
- maxLQEntries,maxSQEntries);
-
- } else if (policy == "threshold") {
- lsqPolicy = Threshold;
-
- assert(params->smtLSQThreshold > LQEntries);
- assert(params->smtLSQThreshold > SQEntries);
-
- //Divide up by threshold amount
- //@todo: Should threads check the max and the total
- //amount of the LSQ
- maxLQEntries = params->smtLSQThreshold;
- maxSQEntries = params->smtLSQThreshold;
-
- DPRINTF(LSQ, "LSQ sharing policy set to Threshold: "
- "%i entries per LQ | %i entries per SQ",
- maxLQEntries,maxSQEntries);
-
- } else {
- assert(0 && "Invalid LSQ Sharing Policy.Options Are:{Dynamic,"
- "Partitioned, Threshold}");
- }
-
- //Initialize LSQs
- for (int tid=0; tid < numThreads; tid++) {
- thread[tid].init(params, maxLQEntries, maxSQEntries, tid);
- }
-}
-
-
-template<class Impl>
-std::string
-LSQ<Impl>::name() const
-{
- return iewStage->name() + ".lsq";
-}
-
-template<class Impl>
-void
-LSQ<Impl>::setActiveThreads(list<unsigned> *at_ptr)
-{
- activeThreads = at_ptr;
- assert(activeThreads != 0);
-}
-
-template<class Impl>
-void
-LSQ<Impl>::setCPU(FullCPU *cpu_ptr)
-{
- cpu = cpu_ptr;
-
- for (int tid=0; tid < numThreads; tid++) {
- thread[tid].setCPU(cpu_ptr);
- }
-}
-
-template<class Impl>
-void
-LSQ<Impl>::setIEW(IEW *iew_ptr)
-{
- iewStage = iew_ptr;
-
- for (int tid=0; tid < numThreads; tid++) {
- thread[tid].setIEW(iew_ptr);
- }
-}
-
-#if 0
-template<class Impl>
-void
-LSQ<Impl>::setPageTable(PageTable *pt_ptr)
-{
- for (int tid=0; tid < numThreads; tid++) {
- thread[tid].setPageTable(pt_ptr);
- }
-}
-#endif
-
-template <class Impl>
-void
-LSQ<Impl>::switchOut()
-{
- for (int tid = 0; tid < numThreads; tid++) {
- thread[tid].switchOut();
- }
-}
-
-template <class Impl>
-void
-LSQ<Impl>::takeOverFrom()
-{
- for (int tid = 0; tid < numThreads; tid++) {
- thread[tid].takeOverFrom();
- }
-}
-
-template <class Impl>
-int
-LSQ<Impl>::entryAmount(int num_threads)
-{
- if (lsqPolicy == Partitioned) {
- return LQEntries / num_threads;
- } else {
- return 0;
- }
-}
-
-template <class Impl>
-void
-LSQ<Impl>::resetEntries()
-{
- if (lsqPolicy != Dynamic || numThreads > 1) {
- int active_threads = (*activeThreads).size();
-
- list<unsigned>::iterator threads = (*activeThreads).begin();
- list<unsigned>::iterator list_end = (*activeThreads).end();
-
- int maxEntries;
-
- if (lsqPolicy == Partitioned) {
- maxEntries = LQEntries / active_threads;
- } else if (lsqPolicy == Threshold && active_threads == 1) {
- maxEntries = LQEntries;
- } else {
- maxEntries = LQEntries;
- }
-
- while (threads != list_end) {
- resizeEntries(maxEntries,*threads++);
- }
- }
-}
-
-template<class Impl>
-void
-LSQ<Impl>::removeEntries(unsigned tid)
-{
- thread[tid].clearLQ();
- thread[tid].clearSQ();
-}
-
-template<class Impl>
-void
-LSQ<Impl>::resizeEntries(unsigned size,unsigned tid)
-{
- thread[tid].resizeLQ(size);
- thread[tid].resizeSQ(size);
-}
-
-template<class Impl>
-void
-LSQ<Impl>::tick()
-{
- list<unsigned>::iterator active_threads = (*activeThreads).begin();
-
- while (active_threads != (*activeThreads).end()) {
- unsigned tid = *active_threads++;
-
- thread[tid].tick();
- }
-}
-
-template<class Impl>
-void
-LSQ<Impl>::insertLoad(DynInstPtr &load_inst)
-{
- unsigned tid = load_inst->threadNumber;
-
- thread[tid].insertLoad(load_inst);
-}
-
-template<class Impl>
-void
-LSQ<Impl>::insertStore(DynInstPtr &store_inst)
-{
- unsigned tid = store_inst->threadNumber;
-
- thread[tid].insertStore(store_inst);
-}
-
-template<class Impl>
-Fault
-LSQ<Impl>::executeLoad(DynInstPtr &inst)
-{
- unsigned tid = inst->threadNumber;
-
- return thread[tid].executeLoad(inst);
-}
-
-template<class Impl>
-Fault
-LSQ<Impl>::executeStore(DynInstPtr &inst)
-{
- unsigned tid = inst->threadNumber;
-
- return thread[tid].executeStore(inst);
-}
-
-template<class Impl>
-void
-LSQ<Impl>::writebackStores()
-{
- list<unsigned>::iterator active_threads = (*activeThreads).begin();
-
- while (active_threads != (*activeThreads).end()) {
- unsigned tid = *active_threads++;
-
- if (numStoresToWB(tid) > 0) {
- DPRINTF(Writeback,"[tid:%i] Writing back stores. %i stores "
- "available for Writeback.\n", tid, numStoresToWB(tid));
- }
-
- thread[tid].writebackStores();
- }
-}
-
-template<class Impl>
-bool
-LSQ<Impl>::violation()
-{
- /* Answers: Does Anybody Have a Violation?*/
- list<unsigned>::iterator active_threads = (*activeThreads).begin();
-
- while (active_threads != (*activeThreads).end()) {
- unsigned tid = *active_threads++;
- if (thread[tid].violation())
- return true;
- }
-
- return false;
-}
-
-template<class Impl>
-int
-LSQ<Impl>::getCount()
-{
- unsigned total = 0;
-
- list<unsigned>::iterator active_threads = (*activeThreads).begin();
-
- while (active_threads != (*activeThreads).end()) {
- unsigned tid = *active_threads++;
- total += getCount(tid);
- }
-
- return total;
-}
-
-template<class Impl>
-int
-LSQ<Impl>::numLoads()
-{
- unsigned total = 0;
-
- list<unsigned>::iterator active_threads = (*activeThreads).begin();
-
- while (active_threads != (*activeThreads).end()) {
- unsigned tid = *active_threads++;
- total += numLoads(tid);
- }
-
- return total;
-}
-
-template<class Impl>
-int
-LSQ<Impl>::numStores()
-{
- unsigned total = 0;
-
- list<unsigned>::iterator active_threads = (*activeThreads).begin();
-
- while (active_threads != (*activeThreads).end()) {
- unsigned tid = *active_threads++;
- total += thread[tid].numStores();
- }
-
- return total;
-}
-
-template<class Impl>
-int
-LSQ<Impl>::numLoadsReady()
-{
- unsigned total = 0;
-
- list<unsigned>::iterator active_threads = (*activeThreads).begin();
-
- while (active_threads != (*activeThreads).end()) {
- unsigned tid = *active_threads++;
- total += thread[tid].numLoadsReady();
- }
-
- return total;
-}
-
-template<class Impl>
-unsigned
-LSQ<Impl>::numFreeEntries()
-{
- unsigned total = 0;
-
- list<unsigned>::iterator active_threads = (*activeThreads).begin();
-
- while (active_threads != (*activeThreads).end()) {
- unsigned tid = *active_threads++;
- total += thread[tid].numFreeEntries();
- }
-
- return total;
-}
-
-template<class Impl>
-unsigned
-LSQ<Impl>::numFreeEntries(unsigned tid)
-{
- //if( lsqPolicy == Dynamic )
- //return numFreeEntries();
- //else
- return thread[tid].numFreeEntries();
-}
-
-template<class Impl>
-bool
-LSQ<Impl>::isFull()
-{
- list<unsigned>::iterator active_threads = (*activeThreads).begin();
-
- while (active_threads != (*activeThreads).end()) {
- unsigned tid = *active_threads++;
- if (! (thread[tid].lqFull() || thread[tid].sqFull()) )
- return false;
- }
-
- return true;
-}
-
-template<class Impl>
-bool
-LSQ<Impl>::isFull(unsigned tid)
-{
- //@todo: Change to Calculate All Entries for
- //Dynamic Policy
- if( lsqPolicy == Dynamic )
- return isFull();
- else
- return thread[tid].lqFull() || thread[tid].sqFull();
-}
-
-template<class Impl>
-bool
-LSQ<Impl>::lqFull()
-{
- list<unsigned>::iterator active_threads = (*activeThreads).begin();
-
- while (active_threads != (*activeThreads).end()) {
- unsigned tid = *active_threads++;
- if (!thread[tid].lqFull())
- return false;
- }
-
- return true;
-}
-
-template<class Impl>
-bool
-LSQ<Impl>::lqFull(unsigned tid)
-{
- //@todo: Change to Calculate All Entries for
- //Dynamic Policy
- if( lsqPolicy == Dynamic )
- return lqFull();
- else
- return thread[tid].lqFull();
-}
-
-template<class Impl>
-bool
-LSQ<Impl>::sqFull()
-{
- list<unsigned>::iterator active_threads = (*activeThreads).begin();
-
- while (active_threads != (*activeThreads).end()) {
- unsigned tid = *active_threads++;
- if (!sqFull(tid))
- return false;
- }
-
- return true;
-}
-
-template<class Impl>
-bool
-LSQ<Impl>::sqFull(unsigned tid)
-{
- //@todo: Change to Calculate All Entries for
- //Dynamic Policy
- if( lsqPolicy == Dynamic )
- return sqFull();
- else
- return thread[tid].sqFull();
-}
-
-template<class Impl>
-bool
-LSQ<Impl>::isStalled()
-{
- list<unsigned>::iterator active_threads = (*activeThreads).begin();
-
- while (active_threads != (*activeThreads).end()) {
- unsigned tid = *active_threads++;
- if (!thread[tid].isStalled())
- return false;
- }
-
- return true;
-}
-
-template<class Impl>
-bool
-LSQ<Impl>::isStalled(unsigned tid)
-{
- if( lsqPolicy == Dynamic )
- return isStalled();
- else
- return thread[tid].isStalled();
-}
-
-template<class Impl>
-bool
-LSQ<Impl>::hasStoresToWB()
-{
- list<unsigned>::iterator active_threads = (*activeThreads).begin();
-
- while (active_threads != (*activeThreads).end()) {
- unsigned tid = *active_threads++;
- if (!hasStoresToWB(tid))
- return false;
- }
-
- return true;
-}
-
-template<class Impl>
-bool
-LSQ<Impl>::willWB()
-{
- list<unsigned>::iterator active_threads = (*activeThreads).begin();
-
- while (active_threads != (*activeThreads).end()) {
- unsigned tid = *active_threads++;
- if (!willWB(tid))
- return false;
- }
-
- return true;
-}
-
-template<class Impl>
-void
-LSQ<Impl>::dumpInsts()
-{
- list<unsigned>::iterator active_threads = (*activeThreads).begin();
-
- while (active_threads != (*activeThreads).end()) {
- unsigned tid = *active_threads++;
- thread[tid].dumpInsts();
- }
-}
+++ /dev/null
-/*
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "cpu/o3/alpha_dyn_inst.hh"
-#include "cpu/o3/alpha_cpu.hh"
-#include "cpu/o3/alpha_impl.hh"
-#include "cpu/o3/lsq_unit_impl.hh"
-
-// Force the instantiation of LDSTQ for all the implementations we care about.
-template class LSQUnit<AlphaSimpleImpl>;
-
+++ /dev/null
-/*
- * Copyright (c) 2004-2006 The Regents of The University of Michigan
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __CPU_O3_LSQ_UNIT_HH__
-#define __CPU_O3_LSQ_UNIT_HH__
-
-#include <algorithm>
-#include <map>
-#include <queue>
-
-#include "arch/faults.hh"
-#include "config/full_system.hh"
-#include "base/hashmap.hh"
-#include "cpu/inst_seq.hh"
-#include "mem/mem_interface.hh"
-//#include "mem/page_table.hh"
-//#include "sim/debug.hh"
-//#include "sim/sim_object.hh"
-
-/**
- * Class that implements the actual LQ and SQ for each specific
- * thread. Both are circular queues; load entries are freed upon
- * committing, while store entries are freed once they writeback. The
- * LSQUnit tracks if there are memory ordering violations, and also
- * detects partial load to store forwarding cases (a store only has
- * part of a load's data) that requires the load to wait until the
- * store writes back. In the former case it holds onto the instruction
- * until the dependence unit looks at it, and in the latter it stalls
- * the LSQ until the store writes back. At that point the load is
- * replayed.
- */
-template <class Impl>
-class LSQUnit {
- protected:
- typedef TheISA::IntReg IntReg;
- public:
- typedef typename Impl::Params Params;
- typedef typename Impl::FullCPU FullCPU;
- typedef typename Impl::DynInstPtr DynInstPtr;
- typedef typename Impl::CPUPol::IEW IEW;
- typedef typename Impl::CPUPol::IssueStruct IssueStruct;
-
- private:
- class StoreCompletionEvent : public Event {
- public:
- /** Constructs a store completion event. */
- StoreCompletionEvent(int store_idx, Event *wb_event, LSQUnit *lsq_ptr);
-
- /** Processes the store completion event. */
- void process();
-
- /** Returns the description of this event. */
- const char *description();
-
- /** The writeback event for the store. Needed for store
- * conditionals.
- */
- Event *wbEvent;
-
- private:
- /** The store index of the store being written back. */
- int storeIdx;
- private:
- /** The pointer to the LSQ unit that issued the store. */
- LSQUnit<Impl> *lsqPtr;
- };
-
- public:
- /** Constructs an LSQ unit. init() must be called prior to use. */
- LSQUnit();
-
- /** Initializes the LSQ unit with the specified number of entries. */
- void init(Params *params, unsigned maxLQEntries,
- unsigned maxSQEntries, unsigned id);
-
- /** Returns the name of the LSQ unit. */
- std::string name() const;
-
- /** Sets the CPU pointer. */
- void setCPU(FullCPU *cpu_ptr)
- { cpu = cpu_ptr; }
-
- /** Sets the IEW stage pointer. */
- void setIEW(IEW *iew_ptr)
- { iewStage = iew_ptr; }
-
- /** Sets the page table pointer. */
-// void setPageTable(PageTable *pt_ptr);
-
- void switchOut();
-
- void takeOverFrom();
-
- bool isSwitchedOut() { return switchedOut; }
-
- /** Ticks the LSQ unit, which in this case only resets the number of
- * used cache ports.
- * @todo: Move the number of used ports up to the LSQ level so it can
- * be shared by all LSQ units.
- */
- void tick() { usedPorts = 0; }
-
- /** Inserts an instruction. */
- void insert(DynInstPtr &inst);
- /** Inserts a load instruction. */
- void insertLoad(DynInstPtr &load_inst);
- /** Inserts a store instruction. */
- void insertStore(DynInstPtr &store_inst);
-
- /** Executes a load instruction. */
- Fault executeLoad(DynInstPtr &inst);
-
- Fault executeLoad(int lq_idx) { panic("Not implemented"); return NoFault; }
- /** Executes a store instruction. */
- Fault executeStore(DynInstPtr &inst);
-
- /** Commits the head load. */
- void commitLoad();
- /** Commits loads older than a specific sequence number. */
- void commitLoads(InstSeqNum &youngest_inst);
-
- /** Commits stores older than a specific sequence number. */
- void commitStores(InstSeqNum &youngest_inst);
-
- /** Writes back stores. */
- void writebackStores();
-
- // @todo: Include stats in the LSQ unit.
- //void regStats();
-
- /** Clears all the entries in the LQ. */
- void clearLQ();
-
- /** Clears all the entries in the SQ. */
- void clearSQ();
-
- /** Resizes the LQ to a given size. */
- void resizeLQ(unsigned size);
-
- /** Resizes the SQ to a given size. */
- void resizeSQ(unsigned size);
-
- /** Squashes all instructions younger than a specific sequence number. */
- void squash(const InstSeqNum &squashed_num);
-
- /** Returns if there is a memory ordering violation. Value is reset upon
- * call to getMemDepViolator().
- */
- bool violation() { return memDepViolator; }
-
- /** Returns the memory ordering violator. */
- DynInstPtr getMemDepViolator();
-
- /** Returns if a load became blocked due to the memory system. */
- bool loadBlocked()
- { return isLoadBlocked; }
-
- void clearLoadBlocked()
- { isLoadBlocked = false; }
-
- bool isLoadBlockedHandled()
- { return loadBlockedHandled; }
-
- void setLoadBlockedHandled()
- { loadBlockedHandled = true; }
-
- /** Returns the number of free entries (min of free LQ and SQ entries). */
- unsigned numFreeEntries();
-
- /** Returns the number of loads ready to execute. */
- int numLoadsReady();
-
- /** Returns the number of loads in the LQ. */
- int numLoads() { return loads; }
-
- /** Returns the number of stores in the SQ. */
- int numStores() { return stores; }
-
- /** Returns if either the LQ or SQ is full. */
- bool isFull() { return lqFull() || sqFull(); }
-
- /** Returns if the LQ is full. */
- bool lqFull() { return loads >= (LQEntries - 1); }
-
- /** Returns if the SQ is full. */
- bool sqFull() { return stores >= (SQEntries - 1); }
-
- /** Returns the number of instructions in the LSQ. */
- unsigned getCount() { return loads + stores; }
-
- /** Returns if there are any stores to writeback. */
- bool hasStoresToWB() { return storesToWB; }
-
- /** Returns the number of stores to writeback. */
- int numStoresToWB() { return storesToWB; }
-
- /** Returns if the LSQ unit will writeback on this cycle. */
- bool willWB() { return storeQueue[storeWBIdx].canWB &&
- !storeQueue[storeWBIdx].completed &&
- !dcacheInterface->isBlocked(); }
-
- private:
- /** Completes the store at the specified index. */
- void completeStore(int store_idx);
-
- /** Increments the given store index (circular queue). */
- inline void incrStIdx(int &store_idx);
- /** Decrements the given store index (circular queue). */
- inline void decrStIdx(int &store_idx);
- /** Increments the given load index (circular queue). */
- inline void incrLdIdx(int &load_idx);
- /** Decrements the given load index (circular queue). */
- inline void decrLdIdx(int &load_idx);
-
- public:
- /** Debugging function to dump instructions in the LSQ. */
- void dumpInsts();
-
- private:
- /** Pointer to the CPU. */
- FullCPU *cpu;
-
- /** Pointer to the IEW stage. */
- IEW *iewStage;
-
- /** Pointer to the D-cache. */
- MemInterface *dcacheInterface;
-
- /** Pointer to the page table. */
-// PageTable *pTable;
-
- public:
- struct SQEntry {
- /** Constructs an empty store queue entry. */
- SQEntry()
- : inst(NULL), req(NULL), size(0), data(0),
- canWB(0), committed(0), completed(0)
- { }
-
- /** Constructs a store queue entry for a given instruction. */
- SQEntry(DynInstPtr &_inst)
- : inst(_inst), req(NULL), size(0), data(0),
- canWB(0), committed(0), completed(0)
- { }
-
- /** The store instruction. */
- DynInstPtr inst;
- /** The memory request for the store. */
- MemReqPtr req;
- /** The size of the store. */
- int size;
- /** The store data. */
- IntReg data;
- /** Whether or not the store can writeback. */
- bool canWB;
- /** Whether or not the store is committed. */
- bool committed;
- /** Whether or not the store is completed. */
- bool completed;
- };
-
- private:
- /** The LSQUnit thread id. */
- unsigned lsqID;
-
- /** The store queue. */
- std::vector<SQEntry> storeQueue;
-
- /** The load queue. */
- std::vector<DynInstPtr> loadQueue;
-
- /** The number of LQ entries, plus a sentinel entry (circular queue).
- * @todo: Consider having var that records the true number of LQ entries.
- */
- unsigned LQEntries;
- /** The number of SQ entries, plus a sentinel entry (circular queue).
- * @todo: Consider having var that records the true number of SQ entries.
- */
- unsigned SQEntries;
-
- /** The number of load instructions in the LQ. */
- int loads;
- /** The number of store instructions in the SQ. */
- int stores;
- /** The number of store instructions in the SQ waiting to writeback. */
- int storesToWB;
-
- /** The index of the head instruction in the LQ. */
- int loadHead;
- /** The index of the tail instruction in the LQ. */
- int loadTail;
-
- /** The index of the head instruction in the SQ. */
- int storeHead;
- /** The index of the first instruction that may be ready to be
- * written back, and has not yet been written back.
- */
- int storeWBIdx;
- /** The index of the tail instruction in the SQ. */
- int storeTail;
-
- /// @todo Consider moving to a more advanced model with write vs read ports
- /** The number of cache ports available each cycle. */
- int cachePorts;
-
- /** The number of used cache ports in this cycle. */
- int usedPorts;
-
- bool switchedOut;
-
- //list<InstSeqNum> mshrSeqNums;
-
- /** Wire to read information from the issue stage time queue. */
- typename TimeBuffer<IssueStruct>::wire fromIssue;
-
- /** Whether or not the LSQ is stalled. */
- bool stalled;
- /** The store that causes the stall due to partial store to load
- * forwarding.
- */
- InstSeqNum stallingStoreIsn;
- /** The index of the above store. */
- int stallingLoadIdx;
-
- /** Whether or not a load is blocked due to the memory system. */
- bool isLoadBlocked;
-
- bool loadBlockedHandled;
-
- InstSeqNum blockedLoadSeqNum;
-
- /** The oldest load that caused a memory ordering violation. */
- DynInstPtr memDepViolator;
-
- // Will also need how many read/write ports the Dcache has. Or keep track
- // of that in stage that is one level up, and only call executeLoad/Store
- // the appropriate number of times.
-/*
- // total number of loads forwaded from LSQ stores
- Stats::Vector<> lsq_forw_loads;
-
- // total number of loads ignored due to invalid addresses
- Stats::Vector<> inv_addr_loads;
-
- // total number of software prefetches ignored due to invalid addresses
- Stats::Vector<> inv_addr_swpfs;
-
- // total non-speculative bogus addresses seen (debug var)
- Counter sim_invalid_addrs;
- Stats::Vector<> fu_busy; //cumulative fu busy
-
- // ready loads blocked due to memory disambiguation
- Stats::Vector<> lsq_blocked_loads;
-
- Stats::Scalar<> lsqInversion;
-*/
- public:
- /** Executes the load at the given index. */
- template <class T>
- Fault read(MemReqPtr &req, T &data, int load_idx);
-
- /** Executes the store at the given index. */
- template <class T>
- Fault write(MemReqPtr &req, T &data, int store_idx);
-
- /** Returns the index of the head load instruction. */
- int getLoadHead() { return loadHead; }
- /** Returns the sequence number of the head load instruction. */
- InstSeqNum getLoadHeadSeqNum()
- {
- if (loadQueue[loadHead]) {
- return loadQueue[loadHead]->seqNum;
- } else {
- return 0;
- }
-
- }
-
- /** Returns the index of the head store instruction. */
- int getStoreHead() { return storeHead; }
- /** Returns the sequence number of the head store instruction. */
- InstSeqNum getStoreHeadSeqNum()
- {
- if (storeQueue[storeHead].inst) {
- return storeQueue[storeHead].inst->seqNum;
- } else {
- return 0;
- }
-
- }
-
- /** Returns whether or not the LSQ unit is stalled. */
- bool isStalled() { return stalled; }
-};
-
-template <class Impl>
-template <class T>
-Fault
-LSQUnit<Impl>::read(MemReqPtr &req, T &data, int load_idx)
-{
- assert(loadQueue[load_idx]);
-
- assert(!loadQueue[load_idx]->isExecuted());
-
- // Make sure this isn't an uncacheable access
- // A bit of a hackish way to get uncached accesses to work only if they're
- // at the head of the LSQ and are ready to commit (at the head of the ROB
- // too).
- if (req->flags & UNCACHEABLE &&
- (load_idx != loadHead || !loadQueue[load_idx]->reachedCommit)) {
- iewStage->rescheduleMemInst(loadQueue[load_idx]);
- return TheISA::genMachineCheckFault();
- }
-
- // Check the SQ for any previous stores that might lead to forwarding
- int store_idx = loadQueue[load_idx]->sqIdx;
-
- int store_size = 0;
-
- DPRINTF(LSQUnit, "Read called, load idx: %i, store idx: %i, "
- "storeHead: %i addr: %#x\n",
- load_idx, store_idx, storeHead, req->paddr);
-
-#if 0
- if (req->flags & LOCKED) {
- cpu->lockAddr = req->paddr;
- cpu->lockFlag = true;
- }
-#endif
- req->cmd = Read;
- assert(!req->completionEvent);
- req->completionEvent = NULL;
- req->time = curTick;
-
- while (store_idx != -1) {
- // End once we've reached the top of the LSQ
- if (store_idx == storeWBIdx) {
- break;
- }
-
- // Move the index to one younger
- if (--store_idx < 0)
- store_idx += SQEntries;
-
- assert(storeQueue[store_idx].inst);
-
- store_size = storeQueue[store_idx].size;
-
- if (store_size == 0)
- continue;
-
- // Check if the store data is within the lower and upper bounds of
- // addresses that the request needs.
- bool store_has_lower_limit =
- req->vaddr >= storeQueue[store_idx].inst->effAddr;
- bool store_has_upper_limit =
- (req->vaddr + req->size) <= (storeQueue[store_idx].inst->effAddr +
- store_size);
- bool lower_load_has_store_part =
- req->vaddr < (storeQueue[store_idx].inst->effAddr +
- store_size);
- bool upper_load_has_store_part =
- (req->vaddr + req->size) > storeQueue[store_idx].inst->effAddr;
-
- // If the store's data has all of the data needed, we can forward.
- if (store_has_lower_limit && store_has_upper_limit) {
- // Get shift amount for offset into the store's data.
- int shift_amt = req->vaddr & (store_size - 1);
- // @todo: Magic number, assumes byte addressing
- shift_amt = shift_amt << 3;
-
- // Cast this to type T?
- data = storeQueue[store_idx].data >> shift_amt;
-
- assert(!req->data);
- req->data = new uint8_t[64];
-
- memcpy(req->data, &data, req->size);
-
- DPRINTF(LSQUnit, "Forwarding from store idx %i to load to "
- "addr %#x, data %#x\n",
- store_idx, req->vaddr, *(req->data));
-
- typename IEW::LdWritebackEvent *wb =
- new typename IEW::LdWritebackEvent(loadQueue[load_idx],
- iewStage);
-
- // We'll say this has a 1 cycle load-store forwarding latency
- // for now.
- // @todo: Need to make this a parameter.
- wb->schedule(curTick);
-
- // Should keep track of stat for forwarded data
- return NoFault;
- } else if ((store_has_lower_limit && lower_load_has_store_part) ||
- (store_has_upper_limit && upper_load_has_store_part) ||
- (lower_load_has_store_part && upper_load_has_store_part)) {
- // This is the partial store-load forwarding case where a store
- // has only part of the load's data.
-
- // If it's already been written back, then don't worry about
- // stalling on it.
- if (storeQueue[store_idx].completed) {
- continue;
- }
-
- // Must stall load and force it to retry, so long as it's the oldest
- // load that needs to do so.
- if (!stalled ||
- (stalled &&
- loadQueue[load_idx]->seqNum <
- loadQueue[stallingLoadIdx]->seqNum)) {
- stalled = true;
- stallingStoreIsn = storeQueue[store_idx].inst->seqNum;
- stallingLoadIdx = load_idx;
- }
-
- // Tell IQ/mem dep unit that this instruction will need to be
- // rescheduled eventually
- iewStage->rescheduleMemInst(loadQueue[load_idx]);
-
- // Do not generate a writeback event as this instruction is not
- // complete.
- DPRINTF(LSQUnit, "Load-store forwarding mis-match. "
- "Store idx %i to load addr %#x\n",
- store_idx, req->vaddr);
-
- return NoFault;
- }
- }
-
- // If there's no forwarding case, then go access memory
- DynInstPtr inst = loadQueue[load_idx];
-
- DPRINTF(LSQUnit, "Doing functional access for inst [sn:%lli] PC %#x\n",
- loadQueue[load_idx]->seqNum, loadQueue[load_idx]->readPC());
-
- assert(!req->data);
- req->data = new uint8_t[64];
- Fault fault = cpu->read(req, data);
- memcpy(req->data, &data, sizeof(T));
-
- ++usedPorts;
-
- // if we have a cache, do cache access too
- if (fault == NoFault && dcacheInterface) {
- if (dcacheInterface->isBlocked()) {
- // There's an older load that's already going to squash.
- if (isLoadBlocked && blockedLoadSeqNum < inst->seqNum)
- return NoFault;
-
- // Record that the load was blocked due to memory. This
- // load will squash all instructions after it, be
- // refetched, and re-executed.
- isLoadBlocked = true;
- loadBlockedHandled = false;
- blockedLoadSeqNum = inst->seqNum;
- // No fault occurred, even though the interface is blocked.
- return NoFault;
- }
-
- DPRINTF(LSQUnit, "Doing timing access for inst PC %#x\n",
- loadQueue[load_idx]->readPC());
-
- assert(!req->completionEvent);
- req->completionEvent =
- new typename IEW::LdWritebackEvent(loadQueue[load_idx], iewStage);
- MemAccessResult result = dcacheInterface->access(req);
-
- assert(dcacheInterface->doEvents());
-
- if (result != MA_HIT) {
- DPRINTF(LSQUnit, "LSQUnit: D-cache miss!\n");
- DPRINTF(Activity, "Activity: ld accessing mem miss [sn:%lli]\n",
- inst->seqNum);
- } else {
- DPRINTF(LSQUnit, "LSQUnit: D-cache hit!\n");
- DPRINTF(Activity, "Activity: ld accessing mem hit [sn:%lli]\n",
- inst->seqNum);
- }
- }
-
- return fault;
-}
-
-template <class Impl>
-template <class T>
-Fault
-LSQUnit<Impl>::write(MemReqPtr &req, T &data, int store_idx)
-{
- assert(storeQueue[store_idx].inst);
-
- DPRINTF(LSQUnit, "Doing write to store idx %i, addr %#x data %#x"
- " | storeHead:%i [sn:%i]\n",
- store_idx, req->paddr, data, storeHead,
- storeQueue[store_idx].inst->seqNum);
-
- storeQueue[store_idx].req = req;
- storeQueue[store_idx].size = sizeof(T);
- storeQueue[store_idx].data = data;
-
- // This function only writes the data to the store queue, so no fault
- // can happen here.
- return NoFault;
-}
-
-#endif // __CPU_O3_LSQ_UNIT_HH__
+++ /dev/null
-/*
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "cpu/checker/cpu.hh"
-#include "cpu/o3/lsq_unit.hh"
-#include "base/str.hh"
-
-template <class Impl>
-LSQUnit<Impl>::StoreCompletionEvent::StoreCompletionEvent(int store_idx,
- Event *wb_event,
- LSQUnit<Impl> *lsq_ptr)
- : Event(&mainEventQueue),
- wbEvent(wb_event),
- storeIdx(store_idx),
- lsqPtr(lsq_ptr)
-{
- this->setFlags(Event::AutoDelete);
-}
-
-template <class Impl>
-void
-LSQUnit<Impl>::StoreCompletionEvent::process()
-{
- DPRINTF(LSQ, "Cache miss complete for store idx:%i\n", storeIdx);
- DPRINTF(Activity, "Activity: st writeback event idx:%i\n", storeIdx);
-
- //lsqPtr->removeMSHR(lsqPtr->storeQueue[storeIdx].inst->seqNum);
-
- if (lsqPtr->isSwitchedOut())
- return;
-
- lsqPtr->cpu->wakeCPU();
- if (wbEvent)
- wbEvent->process();
- lsqPtr->completeStore(storeIdx);
-}
-
-template <class Impl>
-const char *
-LSQUnit<Impl>::StoreCompletionEvent::description()
-{
- return "LSQ store completion event";
-}
-
-template <class Impl>
-LSQUnit<Impl>::LSQUnit()
- : loads(0), stores(0), storesToWB(0), stalled(false), isLoadBlocked(false),
- loadBlockedHandled(false)
-{
-}
-
-template<class Impl>
-void
-LSQUnit<Impl>::init(Params *params, unsigned maxLQEntries,
- unsigned maxSQEntries, unsigned id)
-
-{
- DPRINTF(LSQUnit, "Creating LSQUnit%i object.\n",id);
-
- switchedOut = false;
-
- lsqID = id;
-
- // Add 1 for the sentinel entry (they are circular queues).
- LQEntries = maxLQEntries + 1;
- SQEntries = maxSQEntries + 1;
-
- loadQueue.resize(LQEntries);
- storeQueue.resize(SQEntries);
-
- loadHead = loadTail = 0;
-
- storeHead = storeWBIdx = storeTail = 0;
-
- usedPorts = 0;
- cachePorts = params->cachePorts;
-
- dcacheInterface = params->dcacheInterface;
-
- memDepViolator = NULL;
-
- blockedLoadSeqNum = 0;
-}
-
-template<class Impl>
-std::string
-LSQUnit<Impl>::name() const
-{
- if (Impl::MaxThreads == 1) {
- return iewStage->name() + ".lsq";
- } else {
- return iewStage->name() + ".lsq.thread." + to_string(lsqID);
- }
-}
-
-template<class Impl>
-void
-LSQUnit<Impl>::clearLQ()
-{
- loadQueue.clear();
-}
-
-template<class Impl>
-void
-LSQUnit<Impl>::clearSQ()
-{
- storeQueue.clear();
-}
-
-#if 0
-template<class Impl>
-void
-LSQUnit<Impl>::setPageTable(PageTable *pt_ptr)
-{
- DPRINTF(LSQUnit, "Setting the page table pointer.\n");
- pTable = pt_ptr;
-}
-#endif
-
-template<class Impl>
-void
-LSQUnit<Impl>::switchOut()
-{
- switchedOut = true;
- for (int i = 0; i < loadQueue.size(); ++i)
- loadQueue[i] = NULL;
-
- assert(storesToWB == 0);
-
- while (storesToWB > 0 &&
- storeWBIdx != storeTail &&
- storeQueue[storeWBIdx].inst &&
- storeQueue[storeWBIdx].canWB) {
-
- if (storeQueue[storeWBIdx].size == 0 ||
- storeQueue[storeWBIdx].inst->isDataPrefetch() ||
- storeQueue[storeWBIdx].committed ||
- storeQueue[storeWBIdx].req->flags & LOCKED) {
- incrStIdx(storeWBIdx);
-
- continue;
- }
-
- assert(storeQueue[storeWBIdx].req);
- assert(!storeQueue[storeWBIdx].committed);
-
- MemReqPtr req = storeQueue[storeWBIdx].req;
- storeQueue[storeWBIdx].committed = true;
-
- req->cmd = Write;
- req->completionEvent = NULL;
- req->time = curTick;
- assert(!req->data);
- req->data = new uint8_t[64];
- memcpy(req->data, (uint8_t *)&storeQueue[storeWBIdx].data, req->size);
-
- DPRINTF(LSQUnit, "D-Cache: Writing back store idx:%i PC:%#x "
- "to Addr:%#x, data:%#x [sn:%lli]\n",
- storeWBIdx,storeQueue[storeWBIdx].inst->readPC(),
- req->paddr, *(req->data),
- storeQueue[storeWBIdx].inst->seqNum);
-
- switch(storeQueue[storeWBIdx].size) {
- case 1:
- cpu->write(req, (uint8_t &)storeQueue[storeWBIdx].data);
- break;
- case 2:
- cpu->write(req, (uint16_t &)storeQueue[storeWBIdx].data);
- break;
- case 4:
- cpu->write(req, (uint32_t &)storeQueue[storeWBIdx].data);
- break;
- case 8:
- cpu->write(req, (uint64_t &)storeQueue[storeWBIdx].data);
- break;
- default:
- panic("Unexpected store size!\n");
- }
- incrStIdx(storeWBIdx);
- }
-}
-
-template<class Impl>
-void
-LSQUnit<Impl>::takeOverFrom()
-{
- switchedOut = false;
- loads = stores = storesToWB = 0;
-
- loadHead = loadTail = 0;
-
- storeHead = storeWBIdx = storeTail = 0;
-
- usedPorts = 0;
-
- memDepViolator = NULL;
-
- blockedLoadSeqNum = 0;
-
- stalled = false;
- isLoadBlocked = false;
- loadBlockedHandled = false;
-}
-
-template<class Impl>
-void
-LSQUnit<Impl>::resizeLQ(unsigned size)
-{
- unsigned size_plus_sentinel = size + 1;
- assert(size_plus_sentinel >= LQEntries);
-
- if (size_plus_sentinel > LQEntries) {
- while (size_plus_sentinel > loadQueue.size()) {
- DynInstPtr dummy;
- loadQueue.push_back(dummy);
- LQEntries++;
- }
- } else {
- LQEntries = size_plus_sentinel;
- }
-
-}
-
-template<class Impl>
-void
-LSQUnit<Impl>::resizeSQ(unsigned size)
-{
- unsigned size_plus_sentinel = size + 1;
- if (size_plus_sentinel > SQEntries) {
- while (size_plus_sentinel > storeQueue.size()) {
- SQEntry dummy;
- storeQueue.push_back(dummy);
- SQEntries++;
- }
- } else {
- SQEntries = size_plus_sentinel;
- }
-}
-
-template <class Impl>
-void
-LSQUnit<Impl>::insert(DynInstPtr &inst)
-{
- assert(inst->isMemRef());
-
- assert(inst->isLoad() || inst->isStore());
-
- if (inst->isLoad()) {
- insertLoad(inst);
- } else {
- insertStore(inst);
- }
-
- inst->setInLSQ();
-}
-
-template <class Impl>
-void
-LSQUnit<Impl>::insertLoad(DynInstPtr &load_inst)
-{
- assert((loadTail + 1) % LQEntries != loadHead);
- assert(loads < LQEntries);
-
- DPRINTF(LSQUnit, "Inserting load PC %#x, idx:%i [sn:%lli]\n",
- load_inst->readPC(), loadTail, load_inst->seqNum);
-
- load_inst->lqIdx = loadTail;
-
- if (stores == 0) {
- load_inst->sqIdx = -1;
- } else {
- load_inst->sqIdx = storeTail;
- }
-
- loadQueue[loadTail] = load_inst;
-
- incrLdIdx(loadTail);
-
- ++loads;
-}
-
-template <class Impl>
-void
-LSQUnit<Impl>::insertStore(DynInstPtr &store_inst)
-{
- // Make sure it is not full before inserting an instruction.
- assert((storeTail + 1) % SQEntries != storeHead);
- assert(stores < SQEntries);
-
- DPRINTF(LSQUnit, "Inserting store PC %#x, idx:%i [sn:%lli]\n",
- store_inst->readPC(), storeTail, store_inst->seqNum);
-
- store_inst->sqIdx = storeTail;
- store_inst->lqIdx = loadTail;
-
- storeQueue[storeTail] = SQEntry(store_inst);
-
- incrStIdx(storeTail);
-
- ++stores;
-}
-
-template <class Impl>
-typename Impl::DynInstPtr
-LSQUnit<Impl>::getMemDepViolator()
-{
- DynInstPtr temp = memDepViolator;
-
- memDepViolator = NULL;
-
- return temp;
-}
-
-template <class Impl>
-unsigned
-LSQUnit<Impl>::numFreeEntries()
-{
- unsigned free_lq_entries = LQEntries - loads;
- unsigned free_sq_entries = SQEntries - stores;
-
- // Both the LQ and SQ entries have an extra dummy entry to differentiate
- // empty/full conditions. Subtract 1 from the free entries.
- if (free_lq_entries < free_sq_entries) {
- return free_lq_entries - 1;
- } else {
- return free_sq_entries - 1;
- }
-}
-
-template <class Impl>
-int
-LSQUnit<Impl>::numLoadsReady()
-{
- int load_idx = loadHead;
- int retval = 0;
-
- while (load_idx != loadTail) {
- assert(loadQueue[load_idx]);
-
- if (loadQueue[load_idx]->readyToIssue()) {
- ++retval;
- }
- }
-
- return retval;
-}
-
-template <class Impl>
-Fault
-LSQUnit<Impl>::executeLoad(DynInstPtr &inst)
-{
- // Execute a specific load.
- Fault load_fault = NoFault;
-
- DPRINTF(LSQUnit, "Executing load PC %#x, [sn:%lli]\n",
- inst->readPC(),inst->seqNum);
-
-// load_fault = inst->initiateAcc();
- load_fault = inst->execute();
-
- // If the instruction faulted, then we need to send it along to commit
- // without the instruction completing.
- if (load_fault != NoFault) {
- // Send this instruction to commit, also make sure iew stage
- // realizes there is activity.
- iewStage->instToCommit(inst);
- iewStage->activityThisCycle();
- }
-
- return load_fault;
-}
-
-template <class Impl>
-Fault
-LSQUnit<Impl>::executeStore(DynInstPtr &store_inst)
-{
- using namespace TheISA;
- // Make sure that a store exists.
- assert(stores != 0);
-
- int store_idx = store_inst->sqIdx;
-
- DPRINTF(LSQUnit, "Executing store PC %#x [sn:%lli]\n",
- store_inst->readPC(), store_inst->seqNum);
-
- // Check the recently completed loads to see if any match this store's
- // address. If so, then we have a memory ordering violation.
- int load_idx = store_inst->lqIdx;
-
- Fault store_fault = store_inst->initiateAcc();
-// Fault store_fault = store_inst->execute();
-
- if (storeQueue[store_idx].size == 0) {
- DPRINTF(LSQUnit,"Fault on Store PC %#x, [sn:%lli],Size = 0\n",
- store_inst->readPC(),store_inst->seqNum);
-
- return store_fault;
- }
-
- assert(store_fault == NoFault);
-
- if (store_inst->isStoreConditional()) {
- // Store conditionals need to set themselves as able to
- // writeback if we haven't had a fault by here.
- storeQueue[store_idx].canWB = true;
-
- ++storesToWB;
- }
-
- if (!memDepViolator) {
- while (load_idx != loadTail) {
- // Really only need to check loads that have actually executed
- // It's safe to check all loads because effAddr is set to
- // InvalAddr when the dyn inst is created.
-
- // @todo: For now this is extra conservative, detecting a
- // violation if the addresses match assuming all accesses
- // are quad word accesses.
-
- // @todo: Fix this, magic number being used here
- if ((loadQueue[load_idx]->effAddr >> 8) ==
- (store_inst->effAddr >> 8)) {
- // A load incorrectly passed this store. Squash and refetch.
- // For now return a fault to show that it was unsuccessful.
- memDepViolator = loadQueue[load_idx];
-
- return genMachineCheckFault();
- }
-
- incrLdIdx(load_idx);
- }
-
- // If we've reached this point, there was no violation.
- memDepViolator = NULL;
- }
-
- return store_fault;
-}
-
-template <class Impl>
-void
-LSQUnit<Impl>::commitLoad()
-{
- assert(loadQueue[loadHead]);
-
- DPRINTF(LSQUnit, "Committing head load instruction, PC %#x\n",
- loadQueue[loadHead]->readPC());
-
-
- loadQueue[loadHead] = NULL;
-
- incrLdIdx(loadHead);
-
- --loads;
-}
-
-template <class Impl>
-void
-LSQUnit<Impl>::commitLoads(InstSeqNum &youngest_inst)
-{
- assert(loads == 0 || loadQueue[loadHead]);
-
- while (loads != 0 && loadQueue[loadHead]->seqNum <= youngest_inst) {
- commitLoad();
- }
-}
-
-template <class Impl>
-void
-LSQUnit<Impl>::commitStores(InstSeqNum &youngest_inst)
-{
- assert(stores == 0 || storeQueue[storeHead].inst);
-
- int store_idx = storeHead;
-
- while (store_idx != storeTail) {
- assert(storeQueue[store_idx].inst);
- // Mark any stores that are now committed and have not yet
- // been marked as able to write back.
- if (!storeQueue[store_idx].canWB) {
- if (storeQueue[store_idx].inst->seqNum > youngest_inst) {
- break;
- }
- DPRINTF(LSQUnit, "Marking store as able to write back, PC "
- "%#x [sn:%lli]\n",
- storeQueue[store_idx].inst->readPC(),
- storeQueue[store_idx].inst->seqNum);
-
- storeQueue[store_idx].canWB = true;
-
- ++storesToWB;
- }
-
- incrStIdx(store_idx);
- }
-}
-
-template <class Impl>
-void
-LSQUnit<Impl>::writebackStores()
-{
- while (storesToWB > 0 &&
- storeWBIdx != storeTail &&
- storeQueue[storeWBIdx].inst &&
- storeQueue[storeWBIdx].canWB &&
- usedPorts < cachePorts) {
-
- // Store didn't write any data so no need to write it back to
- // memory.
- if (storeQueue[storeWBIdx].size == 0) {
- completeStore(storeWBIdx);
-
- incrStIdx(storeWBIdx);
-
- continue;
- }
-
- if (dcacheInterface && dcacheInterface->isBlocked()) {
- DPRINTF(LSQUnit, "Unable to write back any more stores, cache"
- " is blocked!\n");
- break;
- }
-
- ++usedPorts;
-
- if (storeQueue[storeWBIdx].inst->isDataPrefetch()) {
- incrStIdx(storeWBIdx);
-
- continue;
- }
-
- assert(storeQueue[storeWBIdx].req);
- assert(!storeQueue[storeWBIdx].committed);
-
- MemReqPtr req = storeQueue[storeWBIdx].req;
- storeQueue[storeWBIdx].committed = true;
-
- req->cmd = Write;
- req->completionEvent = NULL;
- req->time = curTick;
- assert(!req->data);
- req->data = new uint8_t[64];
- memcpy(req->data, (uint8_t *)&storeQueue[storeWBIdx].data, req->size);
-
- DPRINTF(LSQUnit, "D-Cache: Writing back store idx:%i PC:%#x "
- "to Addr:%#x, data:%#x [sn:%lli]\n",
- storeWBIdx,storeQueue[storeWBIdx].inst->readPC(),
- req->paddr, *(req->data),
- storeQueue[storeWBIdx].inst->seqNum);
-
- switch(storeQueue[storeWBIdx].size) {
- case 1:
- cpu->write(req, (uint8_t &)storeQueue[storeWBIdx].data);
- break;
- case 2:
- cpu->write(req, (uint16_t &)storeQueue[storeWBIdx].data);
- break;
- case 4:
- cpu->write(req, (uint32_t &)storeQueue[storeWBIdx].data);
- break;
- case 8:
- cpu->write(req, (uint64_t &)storeQueue[storeWBIdx].data);
- break;
- default:
- panic("Unexpected store size!\n");
- }
-
- // Stores other than store conditionals are completed at this
- // time. Mark them as completed and, if we have a checker,
- // tell it that the instruction is completed.
- // @todo: Figure out what time I can say stores are complete in
- // the timing memory.
- if (!(req->flags & LOCKED)) {
- storeQueue[storeWBIdx].inst->setCompleted();
- if (cpu->checker) {
- cpu->checker->tick(storeQueue[storeWBIdx].inst);
- }
- }
-
- if (dcacheInterface) {
- assert(!req->completionEvent);
- StoreCompletionEvent *store_event = new
- StoreCompletionEvent(storeWBIdx, NULL, this);
- req->completionEvent = store_event;
-
- MemAccessResult result = dcacheInterface->access(req);
-
- if (isStalled() &&
- storeQueue[storeWBIdx].inst->seqNum == stallingStoreIsn) {
- DPRINTF(LSQUnit, "Unstalling, stalling store [sn:%lli] "
- "load idx:%i\n",
- stallingStoreIsn, stallingLoadIdx);
- stalled = false;
- stallingStoreIsn = 0;
- iewStage->replayMemInst(loadQueue[stallingLoadIdx]);
- }
-
- typename IEW::LdWritebackEvent *wb = NULL;
- if (req->flags & LOCKED) {
- // Stx_C should not generate a system port transaction
- // if it misses in the cache, but that might be hard
- // to accomplish without explicit cache support.
- wb = new typename
- IEW::LdWritebackEvent(storeQueue[storeWBIdx].inst,
- iewStage);
- store_event->wbEvent = wb;
- }
-
- if (result != MA_HIT && dcacheInterface->doEvents()) {
- DPRINTF(LSQUnit,"D-Cache Write Miss on idx:%i!\n",
- storeWBIdx);
-
- DPRINTF(Activity, "Active st accessing mem miss [sn:%lli]\n",
- storeQueue[storeWBIdx].inst->seqNum);
-
- //mshrSeqNums.push_back(storeQueue[storeWBIdx].inst->seqNum);
-
- //DPRINTF(LSQUnit, "Added MSHR. count = %i\n",mshrSeqNums.size());
-
- // @todo: Increment stat here.
- } else {
- DPRINTF(LSQUnit,"D-Cache: Write Hit on idx:%i !\n",
- storeWBIdx);
-
- DPRINTF(Activity, "Active st accessing mem hit [sn:%lli]\n",
- storeQueue[storeWBIdx].inst->seqNum);
- }
-
- incrStIdx(storeWBIdx);
- } else {
- panic("Must HAVE DCACHE!!!!!\n");
- }
- }
-
- // Not sure this should set it to 0.
- usedPorts = 0;
-
- assert(stores >= 0 && storesToWB >= 0);
-}
-
-/*template <class Impl>
-void
-LSQUnit<Impl>::removeMSHR(InstSeqNum seqNum)
-{
- list<InstSeqNum>::iterator mshr_it = find(mshrSeqNums.begin(),
- mshrSeqNums.end(),
- seqNum);
-
- if (mshr_it != mshrSeqNums.end()) {
- mshrSeqNums.erase(mshr_it);
- DPRINTF(LSQUnit, "Removing MSHR. count = %i\n",mshrSeqNums.size());
- }
-}*/
-
-template <class Impl>
-void
-LSQUnit<Impl>::squash(const InstSeqNum &squashed_num)
-{
- DPRINTF(LSQUnit, "Squashing until [sn:%lli]!"
- "(Loads:%i Stores:%i)\n", squashed_num, loads, stores);
-
- int load_idx = loadTail;
- decrLdIdx(load_idx);
-
- while (loads != 0 && loadQueue[load_idx]->seqNum > squashed_num) {
- DPRINTF(LSQUnit,"Load Instruction PC %#x squashed, "
- "[sn:%lli]\n",
- loadQueue[load_idx]->readPC(),
- loadQueue[load_idx]->seqNum);
-
- if (isStalled() && load_idx == stallingLoadIdx) {
- stalled = false;
- stallingStoreIsn = 0;
- stallingLoadIdx = 0;
- }
-
- // Clear the smart pointer to make sure it is decremented.
- loadQueue[load_idx]->squashed = true;
- loadQueue[load_idx] = NULL;
- --loads;
-
- // Inefficient!
- loadTail = load_idx;
-
- decrLdIdx(load_idx);
- }
-
- if (isLoadBlocked) {
- if (squashed_num < blockedLoadSeqNum) {
- isLoadBlocked = false;
- loadBlockedHandled = false;
- blockedLoadSeqNum = 0;
- }
- }
-
- int store_idx = storeTail;
- decrStIdx(store_idx);
-
- while (stores != 0 &&
- storeQueue[store_idx].inst->seqNum > squashed_num) {
- // Instructions marked as can WB are already committed.
- if (storeQueue[store_idx].canWB) {
- break;
- }
-
- DPRINTF(LSQUnit,"Store Instruction PC %#x squashed, "
- "idx:%i [sn:%lli]\n",
- storeQueue[store_idx].inst->readPC(),
- store_idx, storeQueue[store_idx].inst->seqNum);
-
- // I don't think this can happen. It should have been cleared
- // by the stalling load.
- if (isStalled() &&
- storeQueue[store_idx].inst->seqNum == stallingStoreIsn) {
- panic("Is stalled should have been cleared by stalling load!\n");
- stalled = false;
- stallingStoreIsn = 0;
- }
-
- // Clear the smart pointer to make sure it is decremented.
- storeQueue[store_idx].inst->squashed = true;
- storeQueue[store_idx].inst = NULL;
- storeQueue[store_idx].canWB = 0;
-
- if (storeQueue[store_idx].req) {
- // There should not be a completion event if the store has
- // not yet committed.
- assert(!storeQueue[store_idx].req->completionEvent);
- }
-
- storeQueue[store_idx].req = NULL;
- --stores;
-
- // Inefficient!
- storeTail = store_idx;
-
- decrStIdx(store_idx);
- }
-}
-
-template <class Impl>
-void
-LSQUnit<Impl>::completeStore(int store_idx)
-{
- assert(storeQueue[store_idx].inst);
- storeQueue[store_idx].completed = true;
- --storesToWB;
- // A bit conservative because a store completion may not free up entries,
- // but hopefully avoids two store completions in one cycle from making
- // the CPU tick twice.
- cpu->activityThisCycle();
-
- if (store_idx == storeHead) {
- do {
- incrStIdx(storeHead);
-
- --stores;
- } while (storeQueue[storeHead].completed &&
- storeHead != storeTail);
-
- iewStage->updateLSQNextCycle = true;
- }
-
- DPRINTF(LSQUnit, "Completing store [sn:%lli], idx:%i, store head "
- "idx:%i\n",
- storeQueue[store_idx].inst->seqNum, store_idx, storeHead);
-
- if (isStalled() &&
- storeQueue[store_idx].inst->seqNum == stallingStoreIsn) {
- DPRINTF(LSQUnit, "Unstalling, stalling store [sn:%lli] "
- "load idx:%i\n",
- stallingStoreIsn, stallingLoadIdx);
- stalled = false;
- stallingStoreIsn = 0;
- iewStage->replayMemInst(loadQueue[stallingLoadIdx]);
- }
-
- storeQueue[store_idx].inst->setCompleted();
-
- // Tell the checker we've completed this instruction. Some stores
- // may get reported twice to the checker, but the checker can
- // handle that case.
- if (cpu->checker) {
- cpu->checker->tick(storeQueue[store_idx].inst);
- }
-}
-
-template <class Impl>
-inline void
-LSQUnit<Impl>::incrStIdx(int &store_idx)
-{
- if (++store_idx >= SQEntries)
- store_idx = 0;
-}
-
-template <class Impl>
-inline void
-LSQUnit<Impl>::decrStIdx(int &store_idx)
-{
- if (--store_idx < 0)
- store_idx += SQEntries;
-}
-
-template <class Impl>
-inline void
-LSQUnit<Impl>::incrLdIdx(int &load_idx)
-{
- if (++load_idx >= LQEntries)
- load_idx = 0;
-}
-
-template <class Impl>
-inline void
-LSQUnit<Impl>::decrLdIdx(int &load_idx)
-{
- if (--load_idx < 0)
- load_idx += LQEntries;
-}
-
-template <class Impl>
-void
-LSQUnit<Impl>::dumpInsts()
-{
- cprintf("Load store queue: Dumping instructions.\n");
- cprintf("Load queue size: %i\n", loads);
- cprintf("Load queue: ");
-
- int load_idx = loadHead;
-
- while (load_idx != loadTail && loadQueue[load_idx]) {
- cprintf("%#x ", loadQueue[load_idx]->readPC());
-
- incrLdIdx(load_idx);
- }
-
- cprintf("Store queue size: %i\n", stores);
- cprintf("Store queue: ");
-
- int store_idx = storeHead;
-
- while (store_idx != storeTail && storeQueue[store_idx].inst) {
- cprintf("%#x ", storeQueue[store_idx].inst->readPC());
-
- incrStIdx(store_idx);
- }
-
- cprintf("\n");
-}
+++ /dev/null
-/*
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "cpu/o3/scoreboard.hh"
-
-Scoreboard::Scoreboard(unsigned activeThreads,
- unsigned _numLogicalIntRegs,
- unsigned _numPhysicalIntRegs,
- unsigned _numLogicalFloatRegs,
- unsigned _numPhysicalFloatRegs,
- unsigned _numMiscRegs,
- unsigned _zeroRegIdx)
- : numLogicalIntRegs(_numLogicalIntRegs),
- numPhysicalIntRegs(_numPhysicalIntRegs),
- numLogicalFloatRegs(_numLogicalFloatRegs),
- numPhysicalFloatRegs(_numPhysicalFloatRegs),
- numMiscRegs(_numMiscRegs),
- zeroRegIdx(_zeroRegIdx)
-{
- //Get Register Sizes
- numLogicalRegs = numLogicalIntRegs + numLogicalFloatRegs;
- numPhysicalRegs = numPhysicalIntRegs + numPhysicalFloatRegs;
-
- //Resize scoreboard appropriately
- regScoreBoard.resize(numPhysicalRegs + (numMiscRegs * activeThreads));
-
- //Initialize values
- for (int i=0; i < numLogicalIntRegs * activeThreads; i++) {
- regScoreBoard[i] = 1;
- }
-
- for (int i= numPhysicalIntRegs;
- i < numPhysicalIntRegs + (numLogicalFloatRegs * activeThreads);
- i++) {
- regScoreBoard[i] = 1;
- }
-
- for (int i = numPhysicalRegs;
- i < numPhysicalRegs + (numMiscRegs * activeThreads);
- i++) {
- regScoreBoard[i] = 1;
- }
-}
-
-std::string
-Scoreboard::name() const
-{
- return "cpu.scoreboard";
-}
-
-bool
-Scoreboard::getReg(PhysRegIndex phys_reg)
-{
- // Always ready if int or fp zero reg.
- if (phys_reg == zeroRegIdx ||
- phys_reg == (zeroRegIdx + numPhysicalIntRegs)) {
- return 1;
- }
-
- return regScoreBoard[phys_reg];
-}
-
-void
-Scoreboard::setReg(PhysRegIndex phys_reg)
-{
- DPRINTF(Scoreboard, "Setting reg %i as ready\n", phys_reg);
-
- regScoreBoard[phys_reg] = 1;
-}
-
-void
-Scoreboard::unsetReg(PhysRegIndex ready_reg)
-{
- if (ready_reg == zeroRegIdx ||
- ready_reg == (zeroRegIdx + numPhysicalIntRegs)) {
- // Don't do anything if int or fp zero reg.
- return;
- }
-
- regScoreBoard[ready_reg] = 0;
-}
+++ /dev/null
-/*
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __CPU_O3_SCOREBOARD_HH__
-#define __CPU_O3_SCOREBOARD_HH__
-
-#include <iostream>
-#include <utility>
-#include <vector>
-#include "arch/alpha/isa_traits.hh"
-#include "base/trace.hh"
-#include "base/traceflags.hh"
-#include "cpu/o3/comm.hh"
-
-/**
- * Implements a simple scoreboard to track which registers are ready.
- * This class assumes that the fp registers start, index wise, right after
- * the integer registers. The misc. registers start, index wise, right after
- * the fp registers.
- * @todo: Fix up handling of the zero register in case the decoder does not
- * automatically make insts that write the zero register into nops.
- */
-class Scoreboard
-{
- public:
- /** Constructs a scoreboard.
- * @param activeThreads The number of active threads.
- * @param _numLogicalIntRegs Number of logical integer registers.
- * @param _numPhysicalIntRegs Number of physical integer registers.
- * @param _numLogicalFloatRegs Number of logical fp registers.
- * @param _numPhysicalFloatRegs Number of physical fp registers.
- * @param _numMiscRegs Number of miscellaneous registers.
- * @param _zeroRegIdx Index of the zero register.
- */
- Scoreboard(unsigned activeThreads,
- unsigned _numLogicalIntRegs,
- unsigned _numPhysicalIntRegs,
- unsigned _numLogicalFloatRegs,
- unsigned _numPhysicalFloatRegs,
- unsigned _numMiscRegs,
- unsigned _zeroRegIdx);
-
- /** Destructor. */
- ~Scoreboard() {}
-
- /** Returns the name of the scoreboard. */
- std::string name() const;
-
- /** Checks if the register is ready. */
- bool getReg(PhysRegIndex ready_reg);
-
- /** Sets the register as ready. */
- void setReg(PhysRegIndex phys_reg);
-
- /** Sets the register as not ready. */
- void unsetReg(PhysRegIndex ready_reg);
-
- private:
- /** Scoreboard of physical integer registers, saying whether or not they
- * are ready.
- */
- std::vector<bool> regScoreBoard;
-
- /** Number of logical integer registers. */
- int numLogicalIntRegs;
-
- /** Number of physical integer registers. */
- int numPhysicalIntRegs;
-
- /** Number of logical floating point registers. */
- int numLogicalFloatRegs;
-
- /** Number of physical floating point registers. */
- int numPhysicalFloatRegs;
-
- /** Number of miscellaneous registers. */
- int numMiscRegs;
-
- /** Number of logical integer + float registers. */
- int numLogicalRegs;
-
- /** Number of physical integer + float registers. */
- int numPhysicalRegs;
-
- /** The logical index of the zero register. */
- int zeroRegIdx;
-};
-
-#endif
+++ /dev/null
-/*
- * Copyright (c) 2006 The Regents of The University of Michigan
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __CPU_O3_THREAD_STATE_HH__
-#define __CPU_O3_THREAD_STATE_HH__
-
-#include "arch/faults.hh"
-#include "arch/isa_traits.hh"
-#include "cpu/exec_context.hh"
-#include "cpu/thread_state.hh"
-
-class Event;
-class Process;
-
-#if FULL_SYSTEM
-class EndQuiesceEvent;
-class FunctionProfile;
-class ProfileNode;
-#else
-class FunctionalMemory;
-class Process;
-#endif
-
-/**
- * Class that has various thread state, such as the status, the
- * current instruction being processed, whether or not the thread has
- * a trap pending or is being externally updated, the ExecContext
- * proxy pointer, etc. It also handles anything related to a specific
- * thread's process, such as syscalls and checking valid addresses.
- */
-template <class Impl>
-struct O3ThreadState : public ThreadState {
- typedef ExecContext::Status Status;
- typedef typename Impl::FullCPU FullCPU;
-
- Status _status;
-
- // Current instruction
- TheISA::MachInst inst;
- private:
- FullCPU *cpu;
- public:
-
- bool inSyscall;
-
- bool trapPending;
-
-#if FULL_SYSTEM
- O3ThreadState(FullCPU *_cpu, int _thread_num, FunctionalMemory *_mem)
- : ThreadState(-1, _thread_num, _mem),
- inSyscall(0), trapPending(0)
- { }
-#else
- O3ThreadState(FullCPU *_cpu, int _thread_num, Process *_process, int _asid)
- : ThreadState(-1, _thread_num, _process->getMemory(), _process, _asid),
- cpu(_cpu), inSyscall(0), trapPending(0)
- { }
-
- O3ThreadState(FullCPU *_cpu, int _thread_num, FunctionalMemory *_mem,
- int _asid)
- : ThreadState(-1, _thread_num, _mem, NULL, _asid),
- cpu(_cpu), inSyscall(0), trapPending(0)
- { }
-#endif
-
- ExecContext *xcProxy;
-
- ExecContext *getXCProxy() { return xcProxy; }
-
- Status status() const { return _status; }
-
- void setStatus(Status new_status) { _status = new_status; }
-
-#if !FULL_SYSTEM
- bool validInstAddr(Addr addr)
- { return process->validInstAddr(addr); }
-
- bool validDataAddr(Addr addr)
- { return process->validDataAddr(addr); }
-#endif
-
- bool misspeculating() { return false; }
-
- void setInst(TheISA::MachInst _inst) { inst = _inst; }
-
- Counter readFuncExeInst() { return funcExeInst; }
-
- void setFuncExeInst(Counter new_val) { funcExeInst = new_val; }
-
-#if !FULL_SYSTEM
- void syscall() { process->syscall(xcProxy); }
-#endif
-};
-
-#endif // __CPU_O3_THREAD_STATE_HH__
+++ /dev/null
-
-#include "cpu/ozone/back_end_impl.hh"
-#include "cpu/ozone/ozone_impl.hh"
-
-//template class BackEnd<OzoneImpl>;
+++ /dev/null
-
-#ifndef __CPU_OZONE_BACK_END_HH__
-#define __CPU_OZONE_BACK_END_HH__
-
-#include <list>
-#include <queue>
-#include <string>
-
-#include "arch/faults.hh"
-#include "base/timebuf.hh"
-#include "cpu/inst_seq.hh"
-#include "cpu/ozone/rename_table.hh"
-#include "cpu/ozone/thread_state.hh"
-#include "mem/functional/functional.hh"
-#include "mem/mem_interface.hh"
-#include "mem/mem_req.hh"
-#include "sim/eventq.hh"
-
-class ExecContext;
-
-template <class Impl>
-class OzoneThreadState;
-
-template <class Impl>
-class BackEnd
-{
- public:
- typedef OzoneThreadState<Impl> Thread;
-
- typedef typename Impl::Params Params;
- typedef typename Impl::DynInst DynInst;
- typedef typename Impl::DynInstPtr DynInstPtr;
- typedef typename Impl::FullCPU FullCPU;
- typedef typename Impl::FrontEnd FrontEnd;
- typedef typename Impl::FullCPU::CommStruct CommStruct;
-
- struct SizeStruct {
- int size;
- };
-
- typedef SizeStruct DispatchToIssue;
- typedef SizeStruct IssueToExec;
- typedef SizeStruct ExecToCommit;
- typedef SizeStruct Writeback;
-
- TimeBuffer<DispatchToIssue> d2i;
- typename TimeBuffer<DispatchToIssue>::wire instsToDispatch;
- TimeBuffer<IssueToExec> i2e;
- typename TimeBuffer<IssueToExec>::wire instsToExecute;
- TimeBuffer<ExecToCommit> e2c;
- TimeBuffer<Writeback> numInstsToWB;
-
- TimeBuffer<CommStruct> *comm;
- typename TimeBuffer<CommStruct>::wire toIEW;
- typename TimeBuffer<CommStruct>::wire fromCommit;
-
- class InstQueue {
- enum queue {
- NonSpec,
- IQ,
- ToBeScheduled,
- ReadyList,
- ReplayList
- };
- struct pqCompare {
- bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const
- {
- return lhs->seqNum > rhs->seqNum;
- }
- };
- public:
- InstQueue(Params *params);
-
- std::string name() const;
-
- void regStats();
-
- void setIssueExecQueue(TimeBuffer<IssueToExec> *i2e_queue);
-
- void setBE(BackEnd *_be) { be = _be; }
-
- void insert(DynInstPtr &inst);
-
- void scheduleReadyInsts();
-
- void scheduleNonSpec(const InstSeqNum &sn);
-
- DynInstPtr getReadyInst();
-
- void commit(const InstSeqNum &sn) {}
-
- void squash(const InstSeqNum &sn);
-
- int wakeDependents(DynInstPtr &inst);
-
- /** Tells memory dependence unit that a memory instruction needs to be
- * rescheduled. It will re-execute once replayMemInst() is called.
- */
- void rescheduleMemInst(DynInstPtr &inst);
-
- /** Re-executes all rescheduled memory instructions. */
- void replayMemInst(DynInstPtr &inst);
-
- /** Completes memory instruction. */
- void completeMemInst(DynInstPtr &inst);
-
- void violation(DynInstPtr &inst, DynInstPtr &violation) { }
-
- bool isFull() { return numInsts >= size; }
-
- void dumpInsts();
-
- private:
- bool find(queue q, typename std::list<DynInstPtr>::iterator it);
- BackEnd *be;
- TimeBuffer<IssueToExec> *i2e;
- typename TimeBuffer<IssueToExec>::wire numIssued;
- typedef typename std::list<DynInstPtr> InstList;
- typedef typename std::list<DynInstPtr>::iterator InstListIt;
- typedef typename std::priority_queue<DynInstPtr, std::vector<DynInstPtr>, pqCompare> ReadyInstQueue;
- // Not sure I need the IQ list; it just needs to be a count.
- InstList iq;
- InstList toBeScheduled;
- InstList readyList;
- InstList nonSpec;
- InstList replayList;
- ReadyInstQueue readyQueue;
- public:
- int size;
- int numInsts;
- int width;
-
- Stats::VectorDistribution<> occ_dist;
-
- Stats::Vector<> inst_count;
- Stats::Vector<> peak_inst_count;
- Stats::Scalar<> empty_count;
- Stats::Scalar<> current_count;
- Stats::Scalar<> fullCount;
-
- Stats::Formula occ_rate;
- Stats::Formula avg_residency;
- Stats::Formula empty_rate;
- Stats::Formula full_rate;
- };
-
- /** LdWriteback event for a load completion. */
- class LdWritebackEvent : public Event {
- private:
- /** Instruction that is writing back data to the register file. */
- DynInstPtr inst;
- /** Pointer to IEW stage. */
- BackEnd *be;
-
- public:
- /** Constructs a load writeback event. */
- LdWritebackEvent(DynInstPtr &_inst, BackEnd *be);
-
- /** Processes writeback event. */
- virtual void process();
- /** Returns the description of the writeback event. */
- virtual const char *description();
- };
-
- BackEnd(Params *params);
-
- std::string name() const;
-
- void regStats();
-
- void setCPU(FullCPU *cpu_ptr)
- { cpu = cpu_ptr; }
-
- void setFrontEnd(FrontEnd *front_end_ptr)
- { frontEnd = front_end_ptr; }
-
- void setXC(ExecContext *xc_ptr)
- { xc = xc_ptr; }
-
- void setThreadState(Thread *thread_ptr)
- { thread = thread_ptr; }
-
- void setCommBuffer(TimeBuffer<CommStruct> *_comm);
-
- void tick();
- void squash();
- void squashFromXC();
- bool xcSquash;
-
- template <class T>
- Fault read(MemReqPtr &req, T &data, int load_idx);
-
- template <class T>
- Fault write(MemReqPtr &req, T &data, int store_idx);
-
- Addr readCommitPC() { return commitPC; }
-
- Addr commitPC;
-
- bool robEmpty() { return instList.empty(); }
-
- bool isFull() { return numInsts >= numROBEntries; }
- bool isBlocked() { return status == Blocked || dispatchStatus == Blocked; }
-
- /** Tells memory dependence unit that a memory instruction needs to be
- * rescheduled. It will re-execute once replayMemInst() is called.
- */
- void rescheduleMemInst(DynInstPtr &inst)
- { IQ.rescheduleMemInst(inst); }
-
- /** Re-executes all rescheduled memory instructions. */
- void replayMemInst(DynInstPtr &inst)
- { IQ.replayMemInst(inst); }
-
- /** Completes memory instruction. */
- void completeMemInst(DynInstPtr &inst)
- { IQ.completeMemInst(inst); }
-
- void fetchFault(Fault &fault);
-
- private:
- void updateStructures();
- void dispatchInsts();
- void dispatchStall();
- void checkDispatchStatus();
- void scheduleReadyInsts();
- void executeInsts();
- void commitInsts();
- void addToIQ(DynInstPtr &inst);
- void addToLSQ(DynInstPtr &inst);
- void instToCommit(DynInstPtr &inst);
- void writebackInsts();
- bool commitInst(int inst_num);
- void squash(const InstSeqNum &sn);
- void squashDueToBranch(DynInstPtr &inst);
- void squashDueToMemBlocked(DynInstPtr &inst);
- void updateExeInstStats(DynInstPtr &inst);
- void updateComInstStats(DynInstPtr &inst);
-
- public:
- FullCPU *cpu;
-
- FrontEnd *frontEnd;
-
- ExecContext *xc;
-
- Thread *thread;
-
- enum Status {
- Running,
- Idle,
- DcacheMissStall,
- DcacheMissComplete,
- Blocked
- };
-
- Status status;
-
- Status dispatchStatus;
-
- Counter funcExeInst;
-
- private:
-// typedef typename Impl::InstQueue InstQueue;
-
- InstQueue IQ;
-
- typedef typename Impl::LdstQueue LdstQueue;
-
- LdstQueue LSQ;
- public:
- RenameTable<Impl> commitRenameTable;
-
- RenameTable<Impl> renameTable;
- private:
- class DCacheCompletionEvent : public Event
- {
- private:
- BackEnd *be;
-
- public:
- DCacheCompletionEvent(BackEnd *_be);
-
- virtual void process();
- virtual const char *description();
- };
-
- friend class DCacheCompletionEvent;
-
- DCacheCompletionEvent cacheCompletionEvent;
-
- MemInterface *dcacheInterface;
-
- MemReqPtr memReq;
-
- // General back end width. Used if the more specific isn't given.
- int width;
-
- // Dispatch width.
- int dispatchWidth;
- int numDispatchEntries;
- int dispatchSize;
-
- int issueWidth;
-
- // Writeback width
- int wbWidth;
-
- // Commit width
- int commitWidth;
-
- /** Index into queue of instructions being written back. */
- unsigned wbNumInst;
-
- /** Cycle number within the queue of instructions being written
- * back. Used in case there are too many instructions writing
- * back at the current cycle and writesbacks need to be scheduled
- * for the future. See comments in instToCommit().
- */
- unsigned wbCycle;
-
- int numROBEntries;
- int numInsts;
-
- bool squashPending;
- InstSeqNum squashSeqNum;
- Addr squashNextPC;
-
- Fault faultFromFetch;
-
- private:
- typedef typename std::list<DynInstPtr>::iterator InstListIt;
-
- std::list<DynInstPtr> instList;
- std::list<DynInstPtr> dispatch;
- std::list<DynInstPtr> writeback;
-
- int latency;
-
- int squashLatency;
-
- bool exactFullStall;
-
- bool fetchRedirect[Impl::MaxThreads];
-
- // number of cycles stalled for D-cache misses
-/* Stats::Scalar<> dcacheStallCycles;
- Counter lastDcacheStall;
-*/
- Stats::Vector<> rob_cap_events;
- Stats::Vector<> rob_cap_inst_count;
- Stats::Vector<> iq_cap_events;
- Stats::Vector<> iq_cap_inst_count;
- // total number of instructions executed
- Stats::Vector<> exe_inst;
- Stats::Vector<> exe_swp;
- Stats::Vector<> exe_nop;
- Stats::Vector<> exe_refs;
- Stats::Vector<> exe_loads;
- Stats::Vector<> exe_branches;
-
- Stats::Vector<> issued_ops;
-
- // total number of loads forwaded from LSQ stores
- Stats::Vector<> lsq_forw_loads;
-
- // total number of loads ignored due to invalid addresses
- Stats::Vector<> inv_addr_loads;
-
- // total number of software prefetches ignored due to invalid addresses
- Stats::Vector<> inv_addr_swpfs;
- // ready loads blocked due to memory disambiguation
- Stats::Vector<> lsq_blocked_loads;
-
- Stats::Scalar<> lsqInversion;
-
- Stats::Vector<> n_issued_dist;
- Stats::VectorDistribution<> issue_delay_dist;
-
- Stats::VectorDistribution<> queue_res_dist;
-/*
- Stats::Vector<> stat_fu_busy;
- Stats::Vector2d<> stat_fuBusy;
- Stats::Vector<> dist_unissued;
- Stats::Vector2d<> stat_issued_inst_type;
-
- Stats::Formula misspec_cnt;
- Stats::Formula misspec_ipc;
- Stats::Formula issue_rate;
- Stats::Formula issue_stores;
- Stats::Formula issue_op_rate;
- Stats::Formula fu_busy_rate;
- Stats::Formula commit_stores;
- Stats::Formula commit_ipc;
- Stats::Formula commit_ipb;
- Stats::Formula lsq_inv_rate;
-*/
- Stats::Vector<> writeback_count;
- Stats::Vector<> producer_inst;
- Stats::Vector<> consumer_inst;
- Stats::Vector<> wb_penalized;
-
- Stats::Formula wb_rate;
- Stats::Formula wb_fanout;
- Stats::Formula wb_penalized_rate;
-
- // total number of instructions committed
- Stats::Vector<> stat_com_inst;
- Stats::Vector<> stat_com_swp;
- Stats::Vector<> stat_com_refs;
- Stats::Vector<> stat_com_loads;
- Stats::Vector<> stat_com_membars;
- Stats::Vector<> stat_com_branches;
-
- Stats::Distribution<> n_committed_dist;
-
- Stats::Scalar<> commit_eligible_samples;
- Stats::Vector<> commit_eligible;
-
- Stats::Scalar<> ROB_fcount;
- Stats::Formula ROB_full_rate;
-
- Stats::Vector<> ROB_count; // cumulative ROB occupancy
- Stats::Formula ROB_occ_rate;
- Stats::VectorDistribution<> ROB_occ_dist;
- public:
- void dumpInsts();
-};
-
-template <class Impl>
-template <class T>
-Fault
-BackEnd<Impl>::read(MemReqPtr &req, T &data, int load_idx)
-{
-/* memReq->reset(addr, sizeof(T), flags);
-
- // translate to physical address
- Fault fault = cpu->translateDataReadReq(memReq);
-
- // if we have a cache, do cache access too
- if (fault == NoFault && dcacheInterface) {
- memReq->cmd = Read;
- memReq->completionEvent = NULL;
- memReq->time = curTick;
- memReq->flags &= ~INST_READ;
- MemAccessResult result = dcacheInterface->access(memReq);
-
- // Ugly hack to get an event scheduled *only* if the access is
- // a miss. We really should add first-class support for this
- // at some point.
- if (result != MA_HIT && dcacheInterface->doEvents()) {
- // Fix this hack for keeping funcExeInst correct with loads that
- // are executed twice.
- --funcExeInst;
-
- memReq->completionEvent = &cacheCompletionEvent;
- lastDcacheStall = curTick;
-// unscheduleTickEvent();
-// status = DcacheMissStall;
- DPRINTF(OzoneCPU, "Dcache miss stall!\n");
- } else {
- // do functional access
- fault = thread->mem->read(memReq, data);
-
- }
- }
-*/
-/*
- if (!dcacheInterface && (memReq->flags & UNCACHEABLE))
- recordEvent("Uncached Read");
-*/
- return LSQ.read(req, data, load_idx);
-}
-
-template <class Impl>
-template <class T>
-Fault
-BackEnd<Impl>::write(MemReqPtr &req, T &data, int store_idx)
-{
-/*
- memReq->reset(addr, sizeof(T), flags);
-
- // translate to physical address
- Fault fault = cpu->translateDataWriteReq(memReq);
-
- if (fault == NoFault && dcacheInterface) {
- memReq->cmd = Write;
- memcpy(memReq->data,(uint8_t *)&data,memReq->size);
- memReq->completionEvent = NULL;
- memReq->time = curTick;
- memReq->flags &= ~INST_READ;
- MemAccessResult result = dcacheInterface->access(memReq);
-
- // Ugly hack to get an event scheduled *only* if the access is
- // a miss. We really should add first-class support for this
- // at some point.
- if (result != MA_HIT && dcacheInterface->doEvents()) {
- memReq->completionEvent = &cacheCompletionEvent;
- lastDcacheStall = curTick;
-// unscheduleTickEvent();
-// status = DcacheMissStall;
- DPRINTF(OzoneCPU, "Dcache miss stall!\n");
- }
- }
-
- if (res && (fault == NoFault))
- *res = memReq->result;
- */
-/*
- if (!dcacheInterface && (memReq->flags & UNCACHEABLE))
- recordEvent("Uncached Write");
-*/
- return LSQ.write(req, data, store_idx);
-}
-
-#endif // __CPU_OZONE_BACK_END_HH__
+++ /dev/null
-
-#include "encumbered/cpu/full/op_class.hh"
-#include "cpu/ozone/back_end.hh"
-
-template <class Impl>
-BackEnd<Impl>::InstQueue::InstQueue(Params *params)
- : size(params->numIQEntries), numInsts(0), width(params->issueWidth)
-{
-}
-
-template <class Impl>
-std::string
-BackEnd<Impl>::InstQueue::name() const
-{
- return be->name() + ".iq";
-}
-
-template <class Impl>
-void
-BackEnd<Impl>::InstQueue::regStats()
-{
- using namespace Stats;
-
- occ_dist
- .init(1, 0, size, 2)
- .name(name() + "occ_dist")
- .desc("IQ Occupancy per cycle")
- .flags(total | cdf)
- ;
-
- inst_count
- .init(1)
- .name(name() + "cum_num_insts")
- .desc("Total occupancy")
- .flags(total)
- ;
-
- peak_inst_count
- .init(1)
- .name(name() + "peak_occupancy")
- .desc("Peak IQ occupancy")
- .flags(total)
- ;
-
- current_count
- .name(name() + "current_count")
- .desc("Occupancy this cycle")
- ;
-
- empty_count
- .name(name() + "empty_count")
- .desc("Number of empty cycles")
- ;
-
- fullCount
- .name(name() + "full_count")
- .desc("Number of full cycles")
- ;
-
-
- occ_rate
- .name(name() + "occ_rate")
- .desc("Average occupancy")
- .flags(total)
- ;
- occ_rate = inst_count / be->cpu->numCycles;
-
- avg_residency
- .name(name() + "avg_residency")
- .desc("Average IQ residency")
- .flags(total)
- ;
- avg_residency = occ_rate / be->cpu->numCycles;
-
- empty_rate
- .name(name() + "empty_rate")
- .desc("Fraction of cycles empty")
- ;
- empty_rate = 100 * empty_count / be->cpu->numCycles;
-
- full_rate
- .name(name() + "full_rate")
- .desc("Fraction of cycles full")
- ;
- full_rate = 100 * fullCount / be->cpu->numCycles;
-}
-
-template <class Impl>
-void
-BackEnd<Impl>::InstQueue::setIssueExecQueue(TimeBuffer<IssueToExec> *i2e_queue)
-{
- i2e = i2e_queue;
- numIssued = i2e->getWire(0);
-}
-
-template <class Impl>
-void
-BackEnd<Impl>::InstQueue::insert(DynInstPtr &inst)
-{
- numInsts++;
- inst_count[0]++;
- if (!inst->isNonSpeculative()) {
- DPRINTF(BE, "Instruction [sn:%lli] added to IQ\n", inst->seqNum);
- if (inst->readyToIssue()) {
- toBeScheduled.push_front(inst);
- inst->iqIt = toBeScheduled.begin();
- inst->iqItValid = true;
- } else {
- iq.push_front(inst);
- inst->iqIt = iq.begin();
- inst->iqItValid = true;
- }
- } else {
- DPRINTF(BE, "Nonspeculative instruction [sn:%lli] added to IQ\n", inst->seqNum);
- nonSpec.push_front(inst);
- inst->iqIt = nonSpec.begin();
- inst->iqItValid = true;
- }
-}
-
-template <class Impl>
-void
-BackEnd<Impl>::InstQueue::scheduleReadyInsts()
-{
- int scheduled = numIssued->size;
- InstListIt iq_it = --toBeScheduled.end();
- InstListIt iq_end_it = toBeScheduled.end();
-
- while (iq_it != iq_end_it && scheduled < width) {
-// if ((*iq_it)->readyToIssue()) {
- DPRINTF(BE, "Instruction [sn:%lli] PC:%#x is ready\n",
- (*iq_it)->seqNum, (*iq_it)->readPC());
- readyQueue.push(*iq_it);
- readyList.push_front(*iq_it);
-
- (*iq_it)->iqIt = readyList.begin();
-
- toBeScheduled.erase(iq_it--);
-
- ++scheduled;
-// } else {
-// iq_it++;
-// }
- }
-
- numIssued->size+= scheduled;
-}
-
-template <class Impl>
-void
-BackEnd<Impl>::InstQueue::scheduleNonSpec(const InstSeqNum &sn)
-{
-/*
- InstListIt non_spec_it = nonSpec.begin();
- InstListIt non_spec_end_it = nonSpec.end();
-
- while ((*non_spec_it)->seqNum != sn) {
- non_spec_it++;
- assert(non_spec_it != non_spec_end_it);
- }
-*/
- DynInstPtr inst = nonSpec.back();
-
- DPRINTF(BE, "Nonspeculative instruction [sn:%lli] scheduled\n", inst->seqNum);
-
- assert(inst->seqNum == sn);
-
- assert(find(NonSpec, inst->iqIt));
- nonSpec.erase(inst->iqIt);
- readyList.push_front(inst);
- inst->iqIt = readyList.begin();
- readyQueue.push(inst);
- numIssued->size++;
-}
-
-template <class Impl>
-typename Impl::DynInstPtr
-BackEnd<Impl>::InstQueue::getReadyInst()
-{
- assert(!readyList.empty());
-
- DynInstPtr inst = readyQueue.top();
- readyQueue.pop();
- assert(find(ReadyList, inst->iqIt));
- readyList.erase(inst->iqIt);
- inst->iqItValid = false;
-// if (!inst->isMemRef())
- --numInsts;
- return inst;
-}
-
-template <class Impl>
-void
-BackEnd<Impl>::InstQueue::squash(const InstSeqNum &sn)
-{
- InstListIt iq_it = iq.begin();
- InstListIt iq_end_it = iq.end();
-
- while (iq_it != iq_end_it && (*iq_it)->seqNum > sn) {
- DPRINTF(BE, "Instruction [sn:%lli] removed from IQ\n", (*iq_it)->seqNum);
- (*iq_it)->iqItValid = false;
- iq.erase(iq_it++);
- --numInsts;
- }
-
- iq_it = nonSpec.begin();
- iq_end_it = nonSpec.end();
-
- while (iq_it != iq_end_it && (*iq_it)->seqNum > sn) {
- DPRINTF(BE, "Instruction [sn:%lli] removed from IQ\n", (*iq_it)->seqNum);
- (*iq_it)->iqItValid = false;
- nonSpec.erase(iq_it++);
- --numInsts;
- }
-
- iq_it = replayList.begin();
- iq_end_it = replayList.end();
-
- while (iq_it != iq_end_it) {
- if ((*iq_it)->seqNum > sn) {
- DPRINTF(BE, "Instruction [sn:%lli] removed from IQ\n", (*iq_it)->seqNum);
- (*iq_it)->iqItValid = false;
- replayList.erase(iq_it++);
- --numInsts;
- } else {
- iq_it++;
- }
- }
-
- assert(numInsts >= 0);
-/*
- InstListIt ready_it = readyList.begin();
- InstListIt ready_end_it = readyList.end();
-
- while (ready_it != ready_end_it) {
- if ((*ready_it)->seqNum > sn) {
- readyList.erase(ready_it++);
- } else {
- ready_it++;
- }
- }
-*/
-}
-
-template <class Impl>
-int
-BackEnd<Impl>::InstQueue::wakeDependents(DynInstPtr &inst)
-{
- assert(!inst->isSquashed());
- std::vector<DynInstPtr> &dependents = inst->getDependents();
- int num_outputs = dependents.size();
-
- DPRINTF(BE, "Waking instruction [sn:%lli] dependents in IQ\n", inst->seqNum);
-
- for (int i = 0; i < num_outputs; i++) {
- DynInstPtr dep_inst = dependents[i];
- dep_inst->markSrcRegReady();
- DPRINTF(BE, "Marking source reg ready [sn:%lli] in IQ\n", dep_inst->seqNum);
-
- if (dep_inst->readyToIssue() && dep_inst->iqItValid) {
- if (dep_inst->isNonSpeculative()) {
- assert(find(NonSpec, dep_inst->iqIt));
- nonSpec.erase(dep_inst->iqIt);
- } else {
- assert(find(IQ, dep_inst->iqIt));
- iq.erase(dep_inst->iqIt);
- }
-
- toBeScheduled.push_front(dep_inst);
- dep_inst->iqIt = toBeScheduled.begin();
- }
- }
- return num_outputs;
-}
-
-template <class Impl>
-void
-BackEnd<Impl>::InstQueue::rescheduleMemInst(DynInstPtr &inst)
-{
- DPRINTF(BE, "Rescheduling memory instruction [sn:%lli]\n", inst->seqNum);
- assert(!inst->iqItValid);
- replayList.push_front(inst);
- inst->iqIt = replayList.begin();
- inst->iqItValid = true;
- ++numInsts;
-}
-
-template <class Impl>
-void
-BackEnd<Impl>::InstQueue::replayMemInst(DynInstPtr &inst)
-{
- DPRINTF(BE, "Replaying memory instruction [sn:%lli]\n", inst->seqNum);
- assert(find(ReplayList, inst->iqIt));
- InstListIt iq_it = --replayList.end();
- InstListIt iq_end_it = replayList.end();
- while (iq_it != iq_end_it) {
- DynInstPtr rescheduled_inst = (*iq_it);
-
- DPRINTF(BE, "Memory instruction [sn:%lli] also replayed\n", inst->seqNum);
- replayList.erase(iq_it--);
- toBeScheduled.push_front(rescheduled_inst);
- rescheduled_inst->iqIt = toBeScheduled.begin();
- }
-}
-
-template <class Impl>
-void
-BackEnd<Impl>::InstQueue::completeMemInst(DynInstPtr &inst)
-{
- panic("Not implemented.");
-}
-
-template <class Impl>
-bool
-BackEnd<Impl>::InstQueue::find(queue q, InstListIt it)
-{
- InstListIt iq_it, iq_end_it;
- switch(q) {
- case NonSpec:
- iq_it = nonSpec.begin();
- iq_end_it = nonSpec.end();
- break;
- case IQ:
- iq_it = iq.begin();
- iq_end_it = iq.end();
- break;
- case ToBeScheduled:
- iq_it = toBeScheduled.begin();
- iq_end_it = toBeScheduled.end();
- break;
- case ReadyList:
- iq_it = readyList.begin();
- iq_end_it = readyList.end();
- break;
- case ReplayList:
- iq_it = replayList.begin();
- iq_end_it = replayList.end();
- }
-
- while (iq_it != it && iq_it != iq_end_it) {
- iq_it++;
- }
- if (iq_it == it) {
- return true;
- } else {
- return false;
- }
-}
-
-template <class Impl>
-void
-BackEnd<Impl>::InstQueue::dumpInsts()
-{
- cprintf("IQ size: %i\n", iq.size());
-
- InstListIt inst_list_it = --iq.end();
-
- int num = 0;
- int valid_num = 0;
- while (inst_list_it != iq.end())
- {
- cprintf("Instruction:%i\n",
- num);
- if (!(*inst_list_it)->isSquashed()) {
- if (!(*inst_list_it)->isIssued()) {
- ++valid_num;
- cprintf("Count:%i\n", valid_num);
- } else if ((*inst_list_it)->isMemRef() &&
- !(*inst_list_it)->memOpDone) {
- // Loads that have not been marked as executed still count
- // towards the total instructions.
- ++valid_num;
- cprintf("Count:%i\n", valid_num);
- }
- }
-
- cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
- "Issued:%i\nSquashed:%i\n",
- (*inst_list_it)->readPC(),
- (*inst_list_it)->seqNum,
- (*inst_list_it)->threadNumber,
- (*inst_list_it)->isIssued(),
- (*inst_list_it)->isSquashed());
-
- if ((*inst_list_it)->isMemRef()) {
- cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
- }
-
- cprintf("\n");
-
- inst_list_it--;
- ++num;
- }
-
- cprintf("nonSpec size: %i\n", nonSpec.size());
-
- inst_list_it = --nonSpec.end();
-
- while (inst_list_it != nonSpec.end())
- {
- cprintf("Instruction:%i\n",
- num);
- if (!(*inst_list_it)->isSquashed()) {
- if (!(*inst_list_it)->isIssued()) {
- ++valid_num;
- cprintf("Count:%i\n", valid_num);
- } else if ((*inst_list_it)->isMemRef() &&
- !(*inst_list_it)->memOpDone) {
- // Loads that have not been marked as executed still count
- // towards the total instructions.
- ++valid_num;
- cprintf("Count:%i\n", valid_num);
- }
- }
-
- cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
- "Issued:%i\nSquashed:%i\n",
- (*inst_list_it)->readPC(),
- (*inst_list_it)->seqNum,
- (*inst_list_it)->threadNumber,
- (*inst_list_it)->isIssued(),
- (*inst_list_it)->isSquashed());
-
- if ((*inst_list_it)->isMemRef()) {
- cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
- }
-
- cprintf("\n");
-
- inst_list_it--;
- ++num;
- }
-
- cprintf("toBeScheduled size: %i\n", toBeScheduled.size());
-
- inst_list_it = --toBeScheduled.end();
-
- while (inst_list_it != toBeScheduled.end())
- {
- cprintf("Instruction:%i\n",
- num);
- if (!(*inst_list_it)->isSquashed()) {
- if (!(*inst_list_it)->isIssued()) {
- ++valid_num;
- cprintf("Count:%i\n", valid_num);
- } else if ((*inst_list_it)->isMemRef() &&
- !(*inst_list_it)->memOpDone) {
- // Loads that have not been marked as executed still count
- // towards the total instructions.
- ++valid_num;
- cprintf("Count:%i\n", valid_num);
- }
- }
-
- cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
- "Issued:%i\nSquashed:%i\n",
- (*inst_list_it)->readPC(),
- (*inst_list_it)->seqNum,
- (*inst_list_it)->threadNumber,
- (*inst_list_it)->isIssued(),
- (*inst_list_it)->isSquashed());
-
- if ((*inst_list_it)->isMemRef()) {
- cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
- }
-
- cprintf("\n");
-
- inst_list_it--;
- ++num;
- }
-
- cprintf("readyList size: %i\n", readyList.size());
-
- inst_list_it = --readyList.end();
-
- while (inst_list_it != readyList.end())
- {
- cprintf("Instruction:%i\n",
- num);
- if (!(*inst_list_it)->isSquashed()) {
- if (!(*inst_list_it)->isIssued()) {
- ++valid_num;
- cprintf("Count:%i\n", valid_num);
- } else if ((*inst_list_it)->isMemRef() &&
- !(*inst_list_it)->memOpDone) {
- // Loads that have not been marked as executed still count
- // towards the total instructions.
- ++valid_num;
- cprintf("Count:%i\n", valid_num);
- }
- }
-
- cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
- "Issued:%i\nSquashed:%i\n",
- (*inst_list_it)->readPC(),
- (*inst_list_it)->seqNum,
- (*inst_list_it)->threadNumber,
- (*inst_list_it)->isIssued(),
- (*inst_list_it)->isSquashed());
-
- if ((*inst_list_it)->isMemRef()) {
- cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
- }
-
- cprintf("\n");
-
- inst_list_it--;
- ++num;
- }
-}
-
-template<class Impl>
-BackEnd<Impl>::LdWritebackEvent::LdWritebackEvent(DynInstPtr &_inst,
- BackEnd<Impl> *_be)
- : Event(&mainEventQueue), inst(_inst), be(_be)
-{
- this->setFlags(Event::AutoDelete);
-}
-
-template<class Impl>
-void
-BackEnd<Impl>::LdWritebackEvent::process()
-{
- DPRINTF(BE, "Load writeback event [sn:%lli]\n", inst->seqNum);
-// DPRINTF(Activity, "Activity: Ld Writeback event [sn:%lli]\n", inst->seqNum);
-
- //iewStage->ldstQueue.removeMSHR(inst->threadNumber,inst->seqNum);
-
-// iewStage->wakeCPU();
-
- if (inst->isSquashed()) {
- inst = NULL;
- return;
- }
-
- if (!inst->isExecuted()) {
- inst->setExecuted();
-
- // Execute again to copy data to proper place.
- inst->completeAcc();
- }
-
- // Need to insert instruction into queue to commit
- be->instToCommit(inst);
-
- //wroteToTimeBuffer = true;
-// iewStage->activityThisCycle();
-
- inst = NULL;
-}
-
-template<class Impl>
-const char *
-BackEnd<Impl>::LdWritebackEvent::description()
-{
- return "Load writeback event";
-}
-
-
-template <class Impl>
-BackEnd<Impl>::DCacheCompletionEvent::DCacheCompletionEvent(BackEnd *_be)
- : Event(&mainEventQueue, CPU_Tick_Pri), be(_be)
-{
-}
-
-template <class Impl>
-void
-BackEnd<Impl>::DCacheCompletionEvent::process()
-{
-}
-
-template <class Impl>
-const char *
-BackEnd<Impl>::DCacheCompletionEvent::description()
-{
- return "Cache completion event";
-}
-
-template <class Impl>
-BackEnd<Impl>::BackEnd(Params *params)
- : d2i(5, 5), i2e(5, 5), e2c(5, 5), numInstsToWB(5, 5),
- xcSquash(false), IQ(params),
- cacheCompletionEvent(this), width(params->backEndWidth),
- exactFullStall(true)
-{
- numROBEntries = params->numROBEntries;
- numInsts = 0;
- numDispatchEntries = 32;
- IQ.setBE(this);
- LSQ.setBE(this);
-
- // Setup IQ and LSQ with their parameters here.
- instsToDispatch = d2i.getWire(-1);
-
- instsToExecute = i2e.getWire(-1);
-
- IQ.setIssueExecQueue(&i2e);
-
- dispatchWidth = params->dispatchWidth ? params->dispatchWidth : width;
- issueWidth = params->issueWidth ? params->issueWidth : width;
- wbWidth = params->wbWidth ? params->wbWidth : width;
- commitWidth = params->commitWidth ? params->commitWidth : width;
-
- LSQ.init(params, params->LQEntries, params->SQEntries, 0);
-
- dispatchStatus = Running;
-}
-
-template <class Impl>
-std::string
-BackEnd<Impl>::name() const
-{
- return cpu->name() + ".backend";
-}
-
-template <class Impl>
-void
-BackEnd<Impl>::regStats()
-{
- using namespace Stats;
- rob_cap_events
- .init(cpu->number_of_threads)
- .name(name() + ".ROB:cap_events")
- .desc("number of cycles where ROB cap was active")
- .flags(total)
- ;
-
- rob_cap_inst_count
- .init(cpu->number_of_threads)
- .name(name() + ".ROB:cap_inst")
- .desc("number of instructions held up by ROB cap")
- .flags(total)
- ;
-
- iq_cap_events
- .init(cpu->number_of_threads)
- .name(name() +".IQ:cap_events" )
- .desc("number of cycles where IQ cap was active")
- .flags(total)
- ;
-
- iq_cap_inst_count
- .init(cpu->number_of_threads)
- .name(name() + ".IQ:cap_inst")
- .desc("number of instructions held up by IQ cap")
- .flags(total)
- ;
-
-
- exe_inst
- .init(cpu->number_of_threads)
- .name(name() + ".ISSUE:count")
- .desc("number of insts issued")
- .flags(total)
- ;
-
- exe_swp
- .init(cpu->number_of_threads)
- .name(name() + ".ISSUE:swp")
- .desc("number of swp insts issued")
- .flags(total)
- ;
-
- exe_nop
- .init(cpu->number_of_threads)
- .name(name() + ".ISSUE:nop")
- .desc("number of nop insts issued")
- .flags(total)
- ;
-
- exe_refs
- .init(cpu->number_of_threads)
- .name(name() + ".ISSUE:refs")
- .desc("number of memory reference insts issued")
- .flags(total)
- ;
-
- exe_loads
- .init(cpu->number_of_threads)
- .name(name() + ".ISSUE:loads")
- .desc("number of load insts issued")
- .flags(total)
- ;
-
- exe_branches
- .init(cpu->number_of_threads)
- .name(name() + ".ISSUE:branches")
- .desc("Number of branches issued")
- .flags(total)
- ;
-
- issued_ops
- .init(cpu->number_of_threads)
- .name(name() + ".ISSUE:op_count")
- .desc("number of insts issued")
- .flags(total)
- ;
-
-/*
- for (int i=0; i<Num_OpClasses; ++i) {
- stringstream subname;
- subname << opClassStrings[i] << "_delay";
- issue_delay_dist.subname(i, subname.str());
- }
-*/
- //
- // Other stats
- //
- lsq_forw_loads
- .init(cpu->number_of_threads)
- .name(name() + ".LSQ:forw_loads")
- .desc("number of loads forwarded via LSQ")
- .flags(total)
- ;
-
- inv_addr_loads
- .init(cpu->number_of_threads)
- .name(name() + ".ISSUE:addr_loads")
- .desc("number of invalid-address loads")
- .flags(total)
- ;
-
- inv_addr_swpfs
- .init(cpu->number_of_threads)
- .name(name() + ".ISSUE:addr_swpfs")
- .desc("number of invalid-address SW prefetches")
- .flags(total)
- ;
-
- lsq_blocked_loads
- .init(cpu->number_of_threads)
- .name(name() + ".LSQ:blocked_loads")
- .desc("number of ready loads not issued due to memory disambiguation")
- .flags(total)
- ;
-
- lsqInversion
- .name(name() + ".ISSUE:lsq_invert")
- .desc("Number of times LSQ instruction issued early")
- ;
-
- n_issued_dist
- .init(issueWidth + 1)
- .name(name() + ".ISSUE:issued_per_cycle")
- .desc("Number of insts issued each cycle")
- .flags(total | pdf | dist)
- ;
- issue_delay_dist
- .init(Num_OpClasses,0,99,2)
- .name(name() + ".ISSUE:")
- .desc("cycles from operands ready to issue")
- .flags(pdf | cdf)
- ;
-
- queue_res_dist
- .init(Num_OpClasses, 0, 99, 2)
- .name(name() + ".IQ:residence:")
- .desc("cycles from dispatch to issue")
- .flags(total | pdf | cdf )
- ;
- for (int i = 0; i < Num_OpClasses; ++i) {
- queue_res_dist.subname(i, opClassStrings[i]);
- }
-
- writeback_count
- .init(cpu->number_of_threads)
- .name(name() + ".WB:count")
- .desc("cumulative count of insts written-back")
- .flags(total)
- ;
-
- producer_inst
- .init(cpu->number_of_threads)
- .name(name() + ".WB:producers")
- .desc("num instructions producing a value")
- .flags(total)
- ;
-
- consumer_inst
- .init(cpu->number_of_threads)
- .name(name() + ".WB:consumers")
- .desc("num instructions consuming a value")
- .flags(total)
- ;
-
- wb_penalized
- .init(cpu->number_of_threads)
- .name(name() + ".WB:penalized")
- .desc("number of instrctions required to write to 'other' IQ")
- .flags(total)
- ;
-
-
- wb_penalized_rate
- .name(name() + ".WB:penalized_rate")
- .desc ("fraction of instructions written-back that wrote to 'other' IQ")
- .flags(total)
- ;
-
- wb_penalized_rate = wb_penalized / writeback_count;
-
- wb_fanout
- .name(name() + ".WB:fanout")
- .desc("average fanout of values written-back")
- .flags(total)
- ;
-
- wb_fanout = producer_inst / consumer_inst;
-
- wb_rate
- .name(name() + ".WB:rate")
- .desc("insts written-back per cycle")
- .flags(total)
- ;
- wb_rate = writeback_count / cpu->numCycles;
-
- stat_com_inst
- .init(cpu->number_of_threads)
- .name(name() + ".COM:count")
- .desc("Number of instructions committed")
- .flags(total)
- ;
-
- stat_com_swp
- .init(cpu->number_of_threads)
- .name(name() + ".COM:swp_count")
- .desc("Number of s/w prefetches committed")
- .flags(total)
- ;
-
- stat_com_refs
- .init(cpu->number_of_threads)
- .name(name() + ".COM:refs")
- .desc("Number of memory references committed")
- .flags(total)
- ;
-
- stat_com_loads
- .init(cpu->number_of_threads)
- .name(name() + ".COM:loads")
- .desc("Number of loads committed")
- .flags(total)
- ;
-
- stat_com_membars
- .init(cpu->number_of_threads)
- .name(name() + ".COM:membars")
- .desc("Number of memory barriers committed")
- .flags(total)
- ;
-
- stat_com_branches
- .init(cpu->number_of_threads)
- .name(name() + ".COM:branches")
- .desc("Number of branches committed")
- .flags(total)
- ;
- n_committed_dist
- .init(0,commitWidth,1)
- .name(name() + ".COM:committed_per_cycle")
- .desc("Number of insts commited each cycle")
- .flags(pdf)
- ;
-
- //
- // Commit-Eligible instructions...
- //
- // -> The number of instructions eligible to commit in those
- // cycles where we reached our commit BW limit (less the number
- // actually committed)
- //
- // -> The average value is computed over ALL CYCLES... not just
- // the BW limited cycles
- //
- // -> The standard deviation is computed only over cycles where
- // we reached the BW limit
- //
- commit_eligible
- .init(cpu->number_of_threads)
- .name(name() + ".COM:bw_limited")
- .desc("number of insts not committed due to BW limits")
- .flags(total)
- ;
-
- commit_eligible_samples
- .name(name() + ".COM:bw_lim_events")
- .desc("number cycles where commit BW limit reached")
- ;
-
- ROB_fcount
- .name(name() + ".ROB:full_count")
- .desc("number of cycles where ROB was full")
- ;
-
- ROB_count
- .init(cpu->number_of_threads)
- .name(name() + ".ROB:occupancy")
- .desc(name() + ".ROB occupancy (cumulative)")
- .flags(total)
- ;
-
- ROB_full_rate
- .name(name() + ".ROB:full_rate")
- .desc("ROB full per cycle")
- ;
- ROB_full_rate = ROB_fcount / cpu->numCycles;
-
- ROB_occ_rate
- .name(name() + ".ROB:occ_rate")
- .desc("ROB occupancy rate")
- .flags(total)
- ;
- ROB_occ_rate = ROB_count / cpu->numCycles;
-
- ROB_occ_dist
- .init(cpu->number_of_threads,0,numROBEntries,2)
- .name(name() + ".ROB:occ_dist")
- .desc("ROB Occupancy per cycle")
- .flags(total | cdf)
- ;
-
- IQ.regStats();
-}
-
-template <class Impl>
-void
-BackEnd<Impl>::setCommBuffer(TimeBuffer<CommStruct> *_comm)
-{
- comm = _comm;
- toIEW = comm->getWire(0);
- fromCommit = comm->getWire(-1);
-}
-
-template <class Impl>
-void
-BackEnd<Impl>::tick()
-{
- DPRINTF(BE, "Ticking back end\n");
-
- ROB_count[0]+= numInsts;
-
- wbCycle = 0;
-
- if (xcSquash) {
- squashFromXC();
- }
-
- // Read in any done instruction information and update the IQ or LSQ.
- updateStructures();
-
- if (dispatchStatus != Blocked) {
- d2i.advance();
- dispatchInsts();
- } else {
- checkDispatchStatus();
- }
-
- i2e.advance();
- scheduleReadyInsts();
-
- e2c.advance();
- executeInsts();
-
- numInstsToWB.advance();
- writebackInsts();
-
- commitInsts();
-
- DPRINTF(BE, "IQ entries in use: %i, ROB entries in use: %i, LSQ loads: %i, LSQ stores: %i\n",
- IQ.numInsts, numInsts, LSQ.numLoads(), LSQ.numStores());
-
- assert(numInsts == instList.size());
-}
-
-template <class Impl>
-void
-BackEnd<Impl>::updateStructures()
-{
- if (fromCommit->doneSeqNum) {
- IQ.commit(fromCommit->doneSeqNum);
- LSQ.commitLoads(fromCommit->doneSeqNum);
- LSQ.commitStores(fromCommit->doneSeqNum);
- }
-
- if (fromCommit->nonSpecSeqNum) {
- if (fromCommit->uncached) {
- LSQ.executeLoad(fromCommit->lqIdx);
- } else {
- IQ.scheduleNonSpec(
- fromCommit->nonSpecSeqNum);
- }
- }
-}
-
-template <class Impl>
-void
-BackEnd<Impl>::addToIQ(DynInstPtr &inst)
-{
- // Do anything IQ specific here?
- IQ.insert(inst);
-}
-
-template <class Impl>
-void
-BackEnd<Impl>::addToLSQ(DynInstPtr &inst)
-{
- // Do anything LSQ specific here?
- LSQ.insert(inst);
-}
-
-template <class Impl>
-void
-BackEnd<Impl>::dispatchInsts()
-{
- DPRINTF(BE, "Trying to dispatch instructions.\n");
-
- // Pull instructions out of the front end.
- int disp_width = dispatchWidth ? dispatchWidth : width;
-
- // Could model dispatching time, but in general 1 cycle is probably
- // good enough.
-
- if (dispatchSize < numDispatchEntries) {
- for (int i = 0; i < disp_width; i++) {
- // Get instructions
- DynInstPtr inst = frontEnd->getInst();
-
- if (!inst) {
- // No more instructions to get
- break;
- }
-
- DPRINTF(BE, "Processing instruction [sn:%lli] PC:%#x\n",
- inst->seqNum, inst->readPC());
-
- for (int i = 0; i < inst->numDestRegs(); ++i)
- renameTable[inst->destRegIdx(i)] = inst;
-
- // Add to queue to be dispatched.
- dispatch.push_back(inst);
-
- d2i[0].size++;
- ++dispatchSize;
- }
- }
-
- assert(dispatch.size() < 64);
-
- for (int i = 0; i < instsToDispatch->size; ++i) {
- assert(!dispatch.empty());
- // Get instruction from front of time buffer
- DynInstPtr inst = dispatch.front();
- dispatch.pop_front();
- --dispatchSize;
-
- if (inst->isSquashed())
- continue;
-
- ++numInsts;
- instList.push_back(inst);
-
- DPRINTF(BE, "Dispatching instruction [sn:%lli] PC:%#x\n",
- inst->seqNum, inst->readPC());
-
- addToIQ(inst);
-
- if (inst->isMemRef()) {
- addToLSQ(inst);
- }
-
- if (inst->isNonSpeculative()) {
- inst->setCanCommit();
- }
-
- // Check if IQ or LSQ is full. If so we'll need to break and stop
- // removing instructions. Also update the number of insts to remove
- // from the queue.
- if (exactFullStall) {
- bool stall = false;
- if (IQ.isFull()) {
- DPRINTF(BE, "IQ is full!\n");
- stall = true;
- } else if (LSQ.isFull()) {
- DPRINTF(BE, "LSQ is full!\n");
- stall = true;
- } else if (isFull()) {
- DPRINTF(BE, "ROB is full!\n");
- stall = true;
- ROB_fcount++;
- }
- if (stall) {
- instsToDispatch->size-= i+1;
- dispatchStall();
- return;
- }
- }
- }
-
- // Check if IQ or LSQ is full. If so we'll need to break and stop
- // removing instructions. Also update the number of insts to remove
- // from the queue. Check here if we don't care about exact stall
- // conditions.
-
- bool stall = false;
- if (IQ.isFull()) {
- DPRINTF(BE, "IQ is full!\n");
- stall = true;
- } else if (LSQ.isFull()) {
- DPRINTF(BE, "LSQ is full!\n");
- stall = true;
- } else if (isFull()) {
- DPRINTF(BE, "ROB is full!\n");
- stall = true;
- ROB_fcount++;
- }
- if (stall) {
- d2i.advance();
- dispatchStall();
- return;
- }
-}
-
-template <class Impl>
-void
-BackEnd<Impl>::dispatchStall()
-{
- dispatchStatus = Blocked;
- if (!cpu->decoupledFrontEnd) {
- // Tell front end to stall here through a timebuffer, or just tell
- // it directly.
- }
-}
-
-template <class Impl>
-void
-BackEnd<Impl>::checkDispatchStatus()
-{
- DPRINTF(BE, "Checking dispatch status\n");
- assert(dispatchStatus == Blocked);
- if (!IQ.isFull() && !LSQ.isFull() && !isFull()) {
- DPRINTF(BE, "Dispatch no longer blocked\n");
- dispatchStatus = Running;
- dispatchInsts();
- }
-}
-
-template <class Impl>
-void
-BackEnd<Impl>::scheduleReadyInsts()
-{
- // Tell IQ to put any ready instructions into the instruction list.
- // Probably want to have a list of DynInstPtrs returned here. Then I
- // can choose to either put them into a time buffer to simulate
- // IQ scheduling time, or hand them directly off to the next stage.
- // Do you ever want to directly hand it off to the next stage?
- DPRINTF(BE, "Trying to schedule ready instructions\n");
- IQ.scheduleReadyInsts();
-}
-
-template <class Impl>
-void
-BackEnd<Impl>::executeInsts()
-{
- int insts_to_execute = instsToExecute->size;
-
- issued_ops[0]+= insts_to_execute;
- n_issued_dist[insts_to_execute]++;
-
- DPRINTF(BE, "Trying to execute %i instructions\n", insts_to_execute);
-
- fetchRedirect[0] = false;
-
- while (insts_to_execute > 0) {
- // Get ready instruction from the IQ (or queue coming out of IQ)
- // Execute the ready instruction.
- // Wakeup any dependents if it's done.
- DynInstPtr inst = IQ.getReadyInst();
-
- DPRINTF(BE, "Executing inst [sn:%lli] PC: %#x\n",
- inst->seqNum, inst->readPC());
-
- ++funcExeInst;
-
- // Check if the instruction is squashed; if so then skip it
- // and don't count it towards the FU usage.
- if (inst->isSquashed()) {
- DPRINTF(BE, "Execute: Instruction was squashed.\n");
-
- // Not sure how to handle this plus the method of sending # of
- // instructions to use. Probably will just have to count it
- // towards the bandwidth usage, but not the FU usage.
- --insts_to_execute;
-
- // Consider this instruction executed so that commit can go
- // ahead and retire the instruction.
- inst->setExecuted();
-
- // Not sure if I should set this here or just let commit try to
- // commit any squashed instructions. I like the latter a bit more.
- inst->setCanCommit();
-
-// ++iewExecSquashedInsts;
-
- continue;
- }
-
- Fault fault = NoFault;
-
- // Execute instruction.
- // Note that if the instruction faults, it will be handled
- // at the commit stage.
- if (inst->isMemRef() &&
- (!inst->isDataPrefetch() && !inst->isInstPrefetch())) {
- DPRINTF(BE, "Execute: Initiating access for memory "
- "reference.\n");
-
- // Tell the LDSTQ to execute this instruction (if it is a load).
- if (inst->isLoad()) {
- // Loads will mark themselves as executed, and their writeback
- // event adds the instruction to the queue to commit
- fault = LSQ.executeLoad(inst);
-
-// ++iewExecLoadInsts;
- } else if (inst->isStore()) {
- LSQ.executeStore(inst);
-
-// ++iewExecStoreInsts;
-
- if (!(inst->req->flags & LOCKED)) {
- inst->setExecuted();
-
- instToCommit(inst);
- }
- // Store conditionals will mark themselves as executed, and
- // their writeback event will add the instruction to the queue
- // to commit.
- } else {
- panic("Unexpected memory type!\n");
- }
-
- } else {
- inst->execute();
-
-// ++iewExecutedInsts;
-
- inst->setExecuted();
-
- instToCommit(inst);
- }
-
- updateExeInstStats(inst);
-
- // Probably should have some sort of function for this.
- // More general question of how to handle squashes? Have some sort of
- // squash unit that controls it? Probably...
- // Check if branch was correct. This check happens after the
- // instruction is added to the queue because even if the branch
- // is mispredicted, the branch instruction itself is still valid.
- // Only handle this if there hasn't already been something that
- // redirects fetch in this group of instructions.
-
- // This probably needs to prioritize the redirects if a different
- // scheduler is used. Currently the scheduler schedules the oldest
- // instruction first, so the branch resolution order will be correct.
- unsigned tid = inst->threadNumber;
-
- if (!fetchRedirect[tid]) {
-
- if (inst->mispredicted()) {
- fetchRedirect[tid] = true;
-
- DPRINTF(BE, "Execute: Branch mispredict detected.\n");
- DPRINTF(BE, "Execute: Redirecting fetch to PC: %#x.\n",
- inst->nextPC);
-
- // If incorrect, then signal the ROB that it must be squashed.
- squashDueToBranch(inst);
-
- if (inst->predTaken()) {
-// predictedTakenIncorrect++;
- } else {
-// predictedNotTakenIncorrect++;
- }
- } else if (LSQ.violation()) {
- fetchRedirect[tid] = true;
-
- // Get the DynInst that caused the violation. Note that this
- // clears the violation signal.
- DynInstPtr violator;
- violator = LSQ.getMemDepViolator();
-
- DPRINTF(BE, "LDSTQ detected a violation. Violator PC: "
- "%#x, inst PC: %#x. Addr is: %#x.\n",
- violator->readPC(), inst->readPC(), inst->physEffAddr);
-
- // Tell the instruction queue that a violation has occured.
-// IQ.violation(inst, violator);
-
- // Squash.
-// squashDueToMemOrder(inst,tid);
- squashDueToBranch(inst);
-
-// ++memOrderViolationEvents;
- } else if (LSQ.loadBlocked()) {
- fetchRedirect[tid] = true;
-
- DPRINTF(BE, "Load operation couldn't execute because the "
- "memory system is blocked. PC: %#x [sn:%lli]\n",
- inst->readPC(), inst->seqNum);
-
- squashDueToMemBlocked(inst);
- }
- }
-
-// instList.pop_front();
-
- --insts_to_execute;
-
- // keep an instruction count
- thread->numInst++;
- thread->numInsts++;
- }
-
- assert(insts_to_execute >= 0);
-}
-
-template<class Impl>
-void
-BackEnd<Impl>::instToCommit(DynInstPtr &inst)
-{
- int wb_width = wbWidth;
- // First check the time slot that this instruction will write
- // to. If there are free write ports at the time, then go ahead
- // and write the instruction to that time. If there are not,
- // keep looking back to see where's the first time there's a
- // free slot. What happens if you run out of free spaces?
- // For now naively assume that all instructions take one cycle.
- // Otherwise would have to look into the time buffer based on the
- // latency of the instruction.
-
- DPRINTF(BE, "Sending instructions to commit [sn:%lli] PC %#x.\n",
- inst->seqNum, inst->readPC());
-
- while (numInstsToWB[wbCycle].size >= wb_width) {
- ++wbCycle;
-
- assert(wbCycle < 5);
- }
-
- // Add finished instruction to queue to commit.
- writeback.push_back(inst);
- numInstsToWB[wbCycle].size++;
-
- if (wbCycle)
- wb_penalized[0]++;
-}
-
-template <class Impl>
-void
-BackEnd<Impl>::writebackInsts()
-{
- int wb_width = wbWidth;
- // Using this method I'm not quite sure how to prevent an
- // instruction from waking its own dependents multiple times,
- // without the guarantee that commit always has enough bandwidth
- // to accept all instructions being written back. This guarantee
- // might not be too unrealistic.
- InstListIt wb_inst_it = writeback.begin();
- InstListIt wb_end_it = writeback.end();
- int inst_num = 0;
- int consumer_insts = 0;
-
- for (; inst_num < wb_width &&
- wb_inst_it != wb_end_it; inst_num++) {
- DynInstPtr inst = (*wb_inst_it);
-
- // Some instructions will be sent to commit without having
- // executed because they need commit to handle them.
- // E.g. Uncached loads have not actually executed when they
- // are first sent to commit. Instead commit must tell the LSQ
- // when it's ready to execute the uncached load.
- if (!inst->isSquashed()) {
- DPRINTF(BE, "Writing back instruction [sn:%lli] PC %#x.\n",
- inst->seqNum, inst->readPC());
-
- inst->setCanCommit();
- inst->setResultReady();
-
- if (inst->isExecuted()) {
- int dependents = IQ.wakeDependents(inst);
- if (dependents) {
- producer_inst[0]++;
- consumer_insts+= dependents;
- }
- }
- }
-
- writeback.erase(wb_inst_it++);
- }
- LSQ.writebackStores();
- consumer_inst[0]+= consumer_insts;
- writeback_count[0]+= inst_num;
-}
-
-template <class Impl>
-bool
-BackEnd<Impl>::commitInst(int inst_num)
-{
- // Read instruction from the head of the ROB
- DynInstPtr inst = instList.front();
-
- // Make sure instruction is valid
- assert(inst);
-
- if (!inst->readyToCommit())
- return false;
-
- DPRINTF(BE, "Trying to commit instruction [sn:%lli] PC:%#x\n",
- inst->seqNum, inst->readPC());
-
- // If the instruction is not executed yet, then it is a non-speculative
- // or store inst. Signal backwards that it should be executed.
- if (!inst->isExecuted()) {
- // Keep this number correct. We have not yet actually executed
- // and committed this instruction.
-// thread->funcExeInst--;
-
- if (inst->isNonSpeculative()) {
-#if !FULL_SYSTEM
- // Hack to make sure syscalls aren't executed until all stores
- // write back their data. This direct communication shouldn't
- // be used for anything other than this.
- if (inst_num > 0 || LSQ.hasStoresToWB()) {
- DPRINTF(BE, "Waiting for all stores to writeback.\n");
- return false;
- }
-#endif
-
- DPRINTF(BE, "Encountered a store or non-speculative "
- "instruction at the head of the ROB, PC %#x.\n",
- inst->readPC());
-
- // Send back the non-speculative instruction's sequence number.
- toIEW->nonSpecSeqNum = inst->seqNum;
-
- // Change the instruction so it won't try to commit again until
- // it is executed.
- inst->clearCanCommit();
-
-// ++commitNonSpecStalls;
-
- return false;
- } else if (inst->isLoad()) {
- DPRINTF(BE, "[sn:%lli]: Uncached load, PC %#x.\n",
- inst->seqNum, inst->readPC());
-
- // Send back the non-speculative instruction's sequence
- // number. Maybe just tell the lsq to re-execute the load.
- toIEW->nonSpecSeqNum = inst->seqNum;
- toIEW->uncached = true;
- toIEW->lqIdx = inst->lqIdx;
-
- inst->clearCanCommit();
-
- return false;
- } else {
- panic("Trying to commit un-executed instruction "
- "of unknown type!\n");
- }
- }
-
- // Now check if it's one of the special trap or barrier or
- // serializing instructions.
- if (inst->isThreadSync())
- {
- // Not handled for now.
- panic("Barrier instructions are not handled yet.\n");
- }
-
- // Check if the instruction caused a fault. If so, trap.
- Fault inst_fault = inst->getFault();
-
- if (inst_fault != NoFault) {
- if (!inst->isNop()) {
-#if FULL_SYSTEM
- DPRINTF(BE, "Inst [sn:%lli] PC %#x has a fault\n",
- inst->seqNum, inst->readPC());
-
-// assert(!thread->inSyscall);
-
-// thread->inSyscall = true;
-
- // Consider holding onto the trap and waiting until the trap event
- // happens for this to be executed.
- inst_fault->invoke(thread->getXCProxy());
-
- // Exit state update mode to avoid accidental updating.
-// thread->inSyscall = false;
-
-// commitStatus = TrapPending;
-
- // Generate trap squash event.
-// generateTrapEvent();
-
- return false;
-#else // !FULL_SYSTEM
- panic("fault (%d) detected @ PC %08p", inst_fault,
- inst->PC);
-#endif // FULL_SYSTEM
- }
- }
-
- if (inst->isControl()) {
-// ++commitCommittedBranches;
- }
-
- int freed_regs = 0;
-
- for (int i = 0; i < inst->numDestRegs(); ++i) {
- DPRINTF(BE, "Commit rename map setting register %i to [sn:%lli]\n",
- (int)inst->destRegIdx(i), inst->seqNum);
- thread->renameTable[inst->destRegIdx(i)] = inst;
- ++freed_regs;
- }
-
- if (inst->traceData) {
- inst->traceData->finalize();
- inst->traceData = NULL;
- }
-
- inst->clearDependents();
-
- frontEnd->addFreeRegs(freed_regs);
-
- instList.pop_front();
-
- --numInsts;
- cpu->numInst++;
- thread->numInsts++;
- ++thread->funcExeInst;
- thread->PC = inst->readNextPC();
- updateComInstStats(inst);
-
- // Write the done sequence number here.
- toIEW->doneSeqNum = inst->seqNum;
-
-#if FULL_SYSTEM
- int count = 0;
- Addr oldpc;
- do {
- if (count == 0)
- assert(!thread->inSyscall && !thread->trapPending);
- oldpc = thread->readPC();
- cpu->system->pcEventQueue.service(
- thread->getXCProxy());
- count++;
- } while (oldpc != thread->readPC());
- if (count > 1) {
- DPRINTF(BE, "PC skip function event, stopping commit\n");
-// completed_last_inst = false;
-// squashPending = true;
- return false;
- }
-#endif
- return true;
-}
-
-template <class Impl>
-void
-BackEnd<Impl>::commitInsts()
-{
- int commit_width = commitWidth ? commitWidth : width;
-
- // Not sure this should be a loop or not.
- int inst_num = 0;
- while (!instList.empty() && inst_num < commit_width) {
- if (instList.front()->isSquashed()) {
- panic("No squashed insts should still be on the list!");
- instList.front()->clearDependents();
- instList.pop_front();
- continue;
- }
-
- if (!commitInst(inst_num++)) {
- break;
- }
- }
- n_committed_dist.sample(inst_num);
-}
-
-template <class Impl>
-void
-BackEnd<Impl>::squash(const InstSeqNum &sn)
-{
- IQ.squash(sn);
- LSQ.squash(sn);
-
- int freed_regs = 0;
- InstListIt dispatch_end = dispatch.end();
- InstListIt insts_it = dispatch.end();
- insts_it--;
-
- while (insts_it != dispatch_end && (*insts_it)->seqNum > sn)
- {
- if ((*insts_it)->isSquashed()) {
- --insts_it;
- continue;
- }
- DPRINTF(BE, "Squashing instruction on dispatch list PC %#x, [sn:%lli].\n",
- (*insts_it)->readPC(),
- (*insts_it)->seqNum);
-
- // Mark the instruction as squashed, and ready to commit so that
- // it can drain out of the pipeline.
- (*insts_it)->setSquashed();
-
- (*insts_it)->setCanCommit();
-
- // Be careful with IPRs and such here
- for (int i = 0; i < (*insts_it)->numDestRegs(); ++i) {
- DynInstPtr prev_dest = (*insts_it)->getPrevDestInst(i);
- DPRINTF(BE, "Commit rename map setting register %i to [sn:%lli]\n",
- (int)(*insts_it)->destRegIdx(i), prev_dest);
- renameTable[(*insts_it)->destRegIdx(i)] = prev_dest;
- ++freed_regs;
- }
-
- (*insts_it)->clearDependents();
-
- --insts_it;
- }
-
- insts_it = instList.end();
- insts_it--;
-
- while (!instList.empty() && (*insts_it)->seqNum > sn)
- {
- if ((*insts_it)->isSquashed()) {
- --insts_it;
- continue;
- }
- DPRINTF(BE, "Squashing instruction on inst list PC %#x, [sn:%lli].\n",
- (*insts_it)->readPC(),
- (*insts_it)->seqNum);
-
- // Mark the instruction as squashed, and ready to commit so that
- // it can drain out of the pipeline.
- (*insts_it)->setSquashed();
-
- (*insts_it)->setCanCommit();
-
- for (int i = 0; i < (*insts_it)->numDestRegs(); ++i) {
- DynInstPtr prev_dest = (*insts_it)->getPrevDestInst(i);
- DPRINTF(BE, "Commit rename map setting register %i to [sn:%lli]\n",
- (int)(*insts_it)->destRegIdx(i), prev_dest);
- renameTable[(*insts_it)->destRegIdx(i)] = prev_dest;
- ++freed_regs;
- }
-
- (*insts_it)->clearDependents();
-
- instList.erase(insts_it--);
- --numInsts;
- }
-
- frontEnd->addFreeRegs(freed_regs);
-}
-
-template <class Impl>
-void
-BackEnd<Impl>::squashFromXC()
-{
- xcSquash = true;
-}
-
-template <class Impl>
-void
-BackEnd<Impl>::squashDueToBranch(DynInstPtr &inst)
-{
- // Update the branch predictor state I guess
- squash(inst->seqNum);
- frontEnd->squash(inst->seqNum, inst->readNextPC(),
- true, inst->mispredicted());
-}
-
-template <class Impl>
-void
-BackEnd<Impl>::squashDueToMemBlocked(DynInstPtr &inst)
-{
- DPRINTF(IEW, "Memory blocked, squashing load and younger insts, "
- "PC: %#x [sn:%i].\n", inst->readPC(), inst->seqNum);
-
- squash(inst->seqNum - 1);
- frontEnd->squash(inst->seqNum - 1, inst->readPC());
-}
-
-template <class Impl>
-void
-BackEnd<Impl>::fetchFault(Fault &fault)
-{
- faultFromFetch = fault;
-}
-
-template <class Impl>
-void
-BackEnd<Impl>::updateExeInstStats(DynInstPtr &inst)
-{
- int thread_number = inst->threadNumber;
-
- //
- // Pick off the software prefetches
- //
-#ifdef TARGET_ALPHA
- if (inst->isDataPrefetch())
- exe_swp[thread_number]++;
- else
- exe_inst[thread_number]++;
-#else
- exe_inst[thread_number]++;
-#endif
-
- //
- // Control operations
- //
- if (inst->isControl())
- exe_branches[thread_number]++;
-
- //
- // Memory operations
- //
- if (inst->isMemRef()) {
- exe_refs[thread_number]++;
-
- if (inst->isLoad())
- exe_loads[thread_number]++;
- }
-}
-
-template <class Impl>
-void
-BackEnd<Impl>::updateComInstStats(DynInstPtr &inst)
-{
- unsigned thread = inst->threadNumber;
-
- //
- // Pick off the software prefetches
- //
-#ifdef TARGET_ALPHA
- if (inst->isDataPrefetch()) {
- stat_com_swp[thread]++;
- } else {
- stat_com_inst[thread]++;
- }
-#else
- stat_com_inst[thread]++;
-#endif
-
- //
- // Control Instructions
- //
- if (inst->isControl())
- stat_com_branches[thread]++;
-
- //
- // Memory references
- //
- if (inst->isMemRef()) {
- stat_com_refs[thread]++;
-
- if (inst->isLoad()) {
- stat_com_loads[thread]++;
- }
- }
-
- if (inst->isMemBarrier()) {
- stat_com_membars[thread]++;
- }
-}
-
-template <class Impl>
-void
-BackEnd<Impl>::dumpInsts()
-{
- int num = 0;
- int valid_num = 0;
-
- InstListIt inst_list_it = instList.begin();
-
- cprintf("Inst list size: %i\n", instList.size());
-
- while (inst_list_it != instList.end())
- {
- cprintf("Instruction:%i\n",
- num);
- if (!(*inst_list_it)->isSquashed()) {
- if (!(*inst_list_it)->isIssued()) {
- ++valid_num;
- cprintf("Count:%i\n", valid_num);
- } else if ((*inst_list_it)->isMemRef() &&
- !(*inst_list_it)->memOpDone) {
- // Loads that have not been marked as executed still count
- // towards the total instructions.
- ++valid_num;
- cprintf("Count:%i\n", valid_num);
- }
- }
-
- cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
- "Issued:%i\nSquashed:%i\n",
- (*inst_list_it)->readPC(),
- (*inst_list_it)->seqNum,
- (*inst_list_it)->threadNumber,
- (*inst_list_it)->isIssued(),
- (*inst_list_it)->isSquashed());
-
- if ((*inst_list_it)->isMemRef()) {
- cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
- }
-
- cprintf("\n");
-
- inst_list_it++;
- ++num;
- }
-
- cprintf("Dispatch list size: %i\n", dispatch.size());
-
- inst_list_it = dispatch.begin();
-
- while (inst_list_it != dispatch.end())
- {
- cprintf("Instruction:%i\n",
- num);
- if (!(*inst_list_it)->isSquashed()) {
- if (!(*inst_list_it)->isIssued()) {
- ++valid_num;
- cprintf("Count:%i\n", valid_num);
- } else if ((*inst_list_it)->isMemRef() &&
- !(*inst_list_it)->memOpDone) {
- // Loads that have not been marked as executed still count
- // towards the total instructions.
- ++valid_num;
- cprintf("Count:%i\n", valid_num);
- }
- }
-
- cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
- "Issued:%i\nSquashed:%i\n",
- (*inst_list_it)->readPC(),
- (*inst_list_it)->seqNum,
- (*inst_list_it)->threadNumber,
- (*inst_list_it)->isIssued(),
- (*inst_list_it)->isSquashed());
-
- if ((*inst_list_it)->isMemRef()) {
- cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
- }
-
- cprintf("\n");
-
- inst_list_it++;
- ++num;
- }
-
- cprintf("Writeback list size: %i\n", writeback.size());
-
- inst_list_it = writeback.begin();
-
- while (inst_list_it != writeback.end())
- {
- cprintf("Instruction:%i\n",
- num);
- if (!(*inst_list_it)->isSquashed()) {
- if (!(*inst_list_it)->isIssued()) {
- ++valid_num;
- cprintf("Count:%i\n", valid_num);
- } else if ((*inst_list_it)->isMemRef() &&
- !(*inst_list_it)->memOpDone) {
- // Loads that have not been marked as executed still count
- // towards the total instructions.
- ++valid_num;
- cprintf("Count:%i\n", valid_num);
- }
- }
-
- cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
- "Issued:%i\nSquashed:%i\n",
- (*inst_list_it)->readPC(),
- (*inst_list_it)->seqNum,
- (*inst_list_it)->threadNumber,
- (*inst_list_it)->isIssued(),
- (*inst_list_it)->isSquashed());
-
- if ((*inst_list_it)->isMemRef()) {
- cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
- }
-
- cprintf("\n");
-
- inst_list_it++;
- ++num;
- }
-}
+++ /dev/null
-
-#include <string>
-
-#include "cpu/checker/cpu.hh"
-#include "cpu/inst_seq.hh"
-#include "cpu/ozone/cpu.hh"
-#include "cpu/ozone/ozone_impl.hh"
-#include "cpu/ozone/simple_impl.hh"
-#include "cpu/ozone/simple_params.hh"
-#include "mem/cache/base_cache.hh"
-#include "sim/builder.hh"
-#include "sim/process.hh"
-#include "sim/sim_object.hh"
-
-class DerivOzoneCPU : public OzoneCPU<OzoneImpl>
-{
- public:
- DerivOzoneCPU(SimpleParams *p)
- : OzoneCPU<OzoneImpl>(p)
- { }
-};
-
-class SimpleOzoneCPU : public OzoneCPU<SimpleImpl>
-{
- public:
- SimpleOzoneCPU(SimpleParams *p)
- : OzoneCPU<SimpleImpl>(p)
- { }
-};
-
-
-////////////////////////////////////////////////////////////////////////
-//
-// OzoneCPU Simulation Object
-//
-
-BEGIN_DECLARE_SIM_OBJECT_PARAMS(DerivOzoneCPU)
-
- Param<int> clock;
- Param<int> numThreads;
-
-#if FULL_SYSTEM
-SimObjectParam<System *> system;
-Param<int> cpu_id;
-SimObjectParam<AlphaITB *> itb;
-SimObjectParam<AlphaDTB *> dtb;
-#else
-SimObjectVectorParam<Process *> workload;
-//SimObjectParam<PageTable *> page_table;
-#endif // FULL_SYSTEM
-
-SimObjectParam<FunctionalMemory *> mem;
-
-SimObjectParam<BaseCPU *> checker;
-
-Param<Counter> max_insts_any_thread;
-Param<Counter> max_insts_all_threads;
-Param<Counter> max_loads_any_thread;
-Param<Counter> max_loads_all_threads;
-
-SimObjectParam<BaseCache *> icache;
-SimObjectParam<BaseCache *> dcache;
-
-Param<unsigned> cachePorts;
-Param<unsigned> width;
-Param<unsigned> frontEndWidth;
-Param<unsigned> backEndWidth;
-Param<unsigned> backEndSquashLatency;
-Param<unsigned> backEndLatency;
-Param<unsigned> maxInstBufferSize;
-Param<unsigned> numPhysicalRegs;
-Param<unsigned> maxOutstandingMemOps;
-
-Param<unsigned> decodeToFetchDelay;
-Param<unsigned> renameToFetchDelay;
-Param<unsigned> iewToFetchDelay;
-Param<unsigned> commitToFetchDelay;
-Param<unsigned> fetchWidth;
-
-Param<unsigned> renameToDecodeDelay;
-Param<unsigned> iewToDecodeDelay;
-Param<unsigned> commitToDecodeDelay;
-Param<unsigned> fetchToDecodeDelay;
-Param<unsigned> decodeWidth;
-
-Param<unsigned> iewToRenameDelay;
-Param<unsigned> commitToRenameDelay;
-Param<unsigned> decodeToRenameDelay;
-Param<unsigned> renameWidth;
-
-Param<unsigned> commitToIEWDelay;
-Param<unsigned> renameToIEWDelay;
-Param<unsigned> issueToExecuteDelay;
-Param<unsigned> issueWidth;
-Param<unsigned> executeWidth;
-Param<unsigned> executeIntWidth;
-Param<unsigned> executeFloatWidth;
-Param<unsigned> executeBranchWidth;
-Param<unsigned> executeMemoryWidth;
-
-Param<unsigned> iewToCommitDelay;
-Param<unsigned> renameToROBDelay;
-Param<unsigned> commitWidth;
-Param<unsigned> squashWidth;
-
-Param<unsigned> localPredictorSize;
-Param<unsigned> localCtrBits;
-Param<unsigned> localHistoryTableSize;
-Param<unsigned> localHistoryBits;
-Param<unsigned> globalPredictorSize;
-Param<unsigned> globalCtrBits;
-Param<unsigned> globalHistoryBits;
-Param<unsigned> choicePredictorSize;
-Param<unsigned> choiceCtrBits;
-
-Param<unsigned> BTBEntries;
-Param<unsigned> BTBTagSize;
-
-Param<unsigned> RASSize;
-
-Param<unsigned> LQEntries;
-Param<unsigned> SQEntries;
-Param<unsigned> LFSTSize;
-Param<unsigned> SSITSize;
-
-Param<unsigned> numPhysIntRegs;
-Param<unsigned> numPhysFloatRegs;
-Param<unsigned> numIQEntries;
-Param<unsigned> numROBEntries;
-
-Param<bool> decoupledFrontEnd;
-Param<int> dispatchWidth;
-Param<int> wbWidth;
-
-Param<unsigned> smtNumFetchingThreads;
-Param<std::string> smtFetchPolicy;
-Param<std::string> smtLSQPolicy;
-Param<unsigned> smtLSQThreshold;
-Param<std::string> smtIQPolicy;
-Param<unsigned> smtIQThreshold;
-Param<std::string> smtROBPolicy;
-Param<unsigned> smtROBThreshold;
-Param<std::string> smtCommitPolicy;
-
-Param<unsigned> instShiftAmt;
-
-Param<bool> defer_registration;
-
-Param<bool> function_trace;
-Param<Tick> function_trace_start;
-
-END_DECLARE_SIM_OBJECT_PARAMS(DerivOzoneCPU)
-
-BEGIN_INIT_SIM_OBJECT_PARAMS(DerivOzoneCPU)
-
- INIT_PARAM(clock, "clock speed"),
- INIT_PARAM(numThreads, "number of HW thread contexts"),
-
-#if FULL_SYSTEM
- INIT_PARAM(system, "System object"),
- INIT_PARAM(cpu_id, "processor ID"),
- INIT_PARAM(itb, "Instruction translation buffer"),
- INIT_PARAM(dtb, "Data translation buffer"),
-#else
- INIT_PARAM(workload, "Processes to run"),
-// INIT_PARAM(page_table, "Page table"),
-#endif // FULL_SYSTEM
-
- INIT_PARAM_DFLT(mem, "Memory", NULL),
-
- INIT_PARAM_DFLT(checker, "Checker CPU", NULL),
-
- INIT_PARAM_DFLT(max_insts_any_thread,
- "Terminate when any thread reaches this inst count",
- 0),
- INIT_PARAM_DFLT(max_insts_all_threads,
- "Terminate when all threads have reached"
- "this inst count",
- 0),
- INIT_PARAM_DFLT(max_loads_any_thread,
- "Terminate when any thread reaches this load count",
- 0),
- INIT_PARAM_DFLT(max_loads_all_threads,
- "Terminate when all threads have reached this load"
- "count",
- 0),
-
- INIT_PARAM_DFLT(icache, "L1 instruction cache", NULL),
- INIT_PARAM_DFLT(dcache, "L1 data cache", NULL),
-
- INIT_PARAM_DFLT(cachePorts, "Cache Ports", 200),
- INIT_PARAM_DFLT(width, "Width", 1),
- INIT_PARAM_DFLT(frontEndWidth, "Front end width", 1),
- INIT_PARAM_DFLT(backEndWidth, "Back end width", 1),
- INIT_PARAM_DFLT(backEndSquashLatency, "Back end squash latency", 1),
- INIT_PARAM_DFLT(backEndLatency, "Back end latency", 1),
- INIT_PARAM_DFLT(maxInstBufferSize, "Maximum instruction buffer size", 16),
- INIT_PARAM(numPhysicalRegs, "Number of physical registers"),
- INIT_PARAM_DFLT(maxOutstandingMemOps, "Maximum outstanding memory operations", 4),
-
- INIT_PARAM(decodeToFetchDelay, "Decode to fetch delay"),
- INIT_PARAM(renameToFetchDelay, "Rename to fetch delay"),
- INIT_PARAM(iewToFetchDelay, "Issue/Execute/Writeback to fetch"
- "delay"),
- INIT_PARAM(commitToFetchDelay, "Commit to fetch delay"),
- INIT_PARAM(fetchWidth, "Fetch width"),
- INIT_PARAM(renameToDecodeDelay, "Rename to decode delay"),
- INIT_PARAM(iewToDecodeDelay, "Issue/Execute/Writeback to decode"
- "delay"),
- INIT_PARAM(commitToDecodeDelay, "Commit to decode delay"),
- INIT_PARAM(fetchToDecodeDelay, "Fetch to decode delay"),
- INIT_PARAM(decodeWidth, "Decode width"),
-
- INIT_PARAM(iewToRenameDelay, "Issue/Execute/Writeback to rename"
- "delay"),
- INIT_PARAM(commitToRenameDelay, "Commit to rename delay"),
- INIT_PARAM(decodeToRenameDelay, "Decode to rename delay"),
- INIT_PARAM(renameWidth, "Rename width"),
-
- INIT_PARAM(commitToIEWDelay, "Commit to "
- "Issue/Execute/Writeback delay"),
- INIT_PARAM(renameToIEWDelay, "Rename to "
- "Issue/Execute/Writeback delay"),
- INIT_PARAM(issueToExecuteDelay, "Issue to execute delay (internal"
- "to the IEW stage)"),
- INIT_PARAM(issueWidth, "Issue width"),
- INIT_PARAM(executeWidth, "Execute width"),
- INIT_PARAM(executeIntWidth, "Integer execute width"),
- INIT_PARAM(executeFloatWidth, "Floating point execute width"),
- INIT_PARAM(executeBranchWidth, "Branch execute width"),
- INIT_PARAM(executeMemoryWidth, "Memory execute width"),
-
- INIT_PARAM(iewToCommitDelay, "Issue/Execute/Writeback to commit "
- "delay"),
- INIT_PARAM(renameToROBDelay, "Rename to reorder buffer delay"),
- INIT_PARAM(commitWidth, "Commit width"),
- INIT_PARAM(squashWidth, "Squash width"),
-
- INIT_PARAM(localPredictorSize, "Size of local predictor"),
- INIT_PARAM(localCtrBits, "Bits per counter"),
- INIT_PARAM(localHistoryTableSize, "Size of local history table"),
- INIT_PARAM(localHistoryBits, "Bits for the local history"),
- INIT_PARAM(globalPredictorSize, "Size of global predictor"),
- INIT_PARAM(globalCtrBits, "Bits per counter"),
- INIT_PARAM(globalHistoryBits, "Bits of history"),
- INIT_PARAM(choicePredictorSize, "Size of choice predictor"),
- INIT_PARAM(choiceCtrBits, "Bits of choice counters"),
-
- INIT_PARAM(BTBEntries, "Number of BTB entries"),
- INIT_PARAM(BTBTagSize, "Size of the BTB tags, in bits"),
-
- INIT_PARAM(RASSize, "RAS size"),
-
- INIT_PARAM(LQEntries, "Number of load queue entries"),
- INIT_PARAM(SQEntries, "Number of store queue entries"),
- INIT_PARAM(LFSTSize, "Last fetched store table size"),
- INIT_PARAM(SSITSize, "Store set ID table size"),
-
- INIT_PARAM(numPhysIntRegs, "Number of physical integer registers"),
- INIT_PARAM(numPhysFloatRegs, "Number of physical floating point "
- "registers"),
- INIT_PARAM(numIQEntries, "Number of instruction queue entries"),
- INIT_PARAM(numROBEntries, "Number of reorder buffer entries"),
-
- INIT_PARAM_DFLT(decoupledFrontEnd, "Decoupled front end", true),
- INIT_PARAM_DFLT(dispatchWidth, "Dispatch width", 0),
- INIT_PARAM_DFLT(wbWidth, "Writeback width", 0),
-
- INIT_PARAM_DFLT(smtNumFetchingThreads, "SMT Number of Fetching Threads", 1),
- INIT_PARAM_DFLT(smtFetchPolicy, "SMT Fetch Policy", "SingleThread"),
- INIT_PARAM_DFLT(smtLSQPolicy, "SMT LSQ Sharing Policy", "Partitioned"),
- INIT_PARAM_DFLT(smtLSQThreshold,"SMT LSQ Threshold", 100),
- INIT_PARAM_DFLT(smtIQPolicy, "SMT IQ Policy", "Partitioned"),
- INIT_PARAM_DFLT(smtIQThreshold, "SMT IQ Threshold", 100),
- INIT_PARAM_DFLT(smtROBPolicy, "SMT ROB Sharing Policy", "Partitioned"),
- INIT_PARAM_DFLT(smtROBThreshold,"SMT ROB Threshold", 100),
- INIT_PARAM_DFLT(smtCommitPolicy,"SMT Commit Fetch Policy", "RoundRobin"),
-
- INIT_PARAM(instShiftAmt, "Number of bits to shift instructions by"),
- INIT_PARAM(defer_registration, "defer system registration (for sampling)"),
-
- INIT_PARAM(function_trace, "Enable function trace"),
- INIT_PARAM(function_trace_start, "Cycle to start function trace")
-
-END_INIT_SIM_OBJECT_PARAMS(DerivOzoneCPU)
-
-CREATE_SIM_OBJECT(DerivOzoneCPU)
-{
- DerivOzoneCPU *cpu;
-
-#if FULL_SYSTEM
- // Full-system only supports a single thread for the moment.
- int actual_num_threads = 1;
-#else
- // In non-full-system mode, we infer the number of threads from
- // the workload if it's not explicitly specified.
- int actual_num_threads =
- numThreads.isValid() ? numThreads : workload.size();
-
- if (workload.size() == 0) {
- fatal("Must specify at least one workload!");
- }
-
-#endif
-
- SimpleParams *params = new SimpleParams;
-
- params->clock = clock;
-
- params->name = getInstanceName();
- params->numberOfThreads = actual_num_threads;
-
-#if FULL_SYSTEM
- params->system = system;
- params->cpu_id = cpu_id;
- params->itb = itb;
- params->dtb = dtb;
-#else
- params->workload = workload;
-// params->pTable = page_table;
-#endif // FULL_SYSTEM
-
- params->mem = mem;
- params->checker = checker;
- params->max_insts_any_thread = max_insts_any_thread;
- params->max_insts_all_threads = max_insts_all_threads;
- params->max_loads_any_thread = max_loads_any_thread;
- params->max_loads_all_threads = max_loads_all_threads;
-
- //
- // Caches
- //
- params->icacheInterface = icache ? icache->getInterface() : NULL;
- params->dcacheInterface = dcache ? dcache->getInterface() : NULL;
- params->cachePorts = cachePorts;
-
- params->width = width;
- params->frontEndWidth = frontEndWidth;
- params->backEndWidth = backEndWidth;
- params->backEndSquashLatency = backEndSquashLatency;
- params->backEndLatency = backEndLatency;
- params->maxInstBufferSize = maxInstBufferSize;
- params->numPhysicalRegs = numPhysIntRegs + numPhysFloatRegs;
- params->maxOutstandingMemOps = maxOutstandingMemOps;
-
- params->decodeToFetchDelay = decodeToFetchDelay;
- params->renameToFetchDelay = renameToFetchDelay;
- params->iewToFetchDelay = iewToFetchDelay;
- params->commitToFetchDelay = commitToFetchDelay;
- params->fetchWidth = fetchWidth;
-
- params->renameToDecodeDelay = renameToDecodeDelay;
- params->iewToDecodeDelay = iewToDecodeDelay;
- params->commitToDecodeDelay = commitToDecodeDelay;
- params->fetchToDecodeDelay = fetchToDecodeDelay;
- params->decodeWidth = decodeWidth;
-
- params->iewToRenameDelay = iewToRenameDelay;
- params->commitToRenameDelay = commitToRenameDelay;
- params->decodeToRenameDelay = decodeToRenameDelay;
- params->renameWidth = renameWidth;
-
- params->commitToIEWDelay = commitToIEWDelay;
- params->renameToIEWDelay = renameToIEWDelay;
- params->issueToExecuteDelay = issueToExecuteDelay;
- params->issueWidth = issueWidth;
- params->executeWidth = executeWidth;
- params->executeIntWidth = executeIntWidth;
- params->executeFloatWidth = executeFloatWidth;
- params->executeBranchWidth = executeBranchWidth;
- params->executeMemoryWidth = executeMemoryWidth;
-
- params->iewToCommitDelay = iewToCommitDelay;
- params->renameToROBDelay = renameToROBDelay;
- params->commitWidth = commitWidth;
- params->squashWidth = squashWidth;
-
-
- params->localPredictorSize = localPredictorSize;
- params->localCtrBits = localCtrBits;
- params->localHistoryTableSize = localHistoryTableSize;
- params->localHistoryBits = localHistoryBits;
- params->globalPredictorSize = globalPredictorSize;
- params->globalCtrBits = globalCtrBits;
- params->globalHistoryBits = globalHistoryBits;
- params->choicePredictorSize = choicePredictorSize;
- params->choiceCtrBits = choiceCtrBits;
-
- params->BTBEntries = BTBEntries;
- params->BTBTagSize = BTBTagSize;
-
- params->RASSize = RASSize;
-
- params->LQEntries = LQEntries;
- params->SQEntries = SQEntries;
-
- params->SSITSize = SSITSize;
- params->LFSTSize = LFSTSize;
-
- params->numPhysIntRegs = numPhysIntRegs;
- params->numPhysFloatRegs = numPhysFloatRegs;
- params->numIQEntries = numIQEntries;
- params->numROBEntries = numROBEntries;
-
- params->decoupledFrontEnd = decoupledFrontEnd;
- params->dispatchWidth = dispatchWidth;
- params->wbWidth = wbWidth;
-
- params->smtNumFetchingThreads = smtNumFetchingThreads;
- params->smtFetchPolicy = smtFetchPolicy;
- params->smtIQPolicy = smtIQPolicy;
- params->smtLSQPolicy = smtLSQPolicy;
- params->smtLSQThreshold = smtLSQThreshold;
- params->smtROBPolicy = smtROBPolicy;
- params->smtROBThreshold = smtROBThreshold;
- params->smtCommitPolicy = smtCommitPolicy;
-
- params->instShiftAmt = 2;
-
- params->deferRegistration = defer_registration;
-
- params->functionTrace = function_trace;
- params->functionTraceStart = function_trace_start;
-
- cpu = new DerivOzoneCPU(params);
-
- return cpu;
-}
-
-REGISTER_SIM_OBJECT("DerivOzoneCPU", DerivOzoneCPU)
-
-
-
-////////////////////////////////////////////////////////////////////////
-//
-// OzoneCPU Simulation Object
-//
-
-BEGIN_DECLARE_SIM_OBJECT_PARAMS(SimpleOzoneCPU)
-
- Param<int> clock;
- Param<int> numThreads;
-
-#if FULL_SYSTEM
-SimObjectParam<System *> system;
-Param<int> cpu_id;
-SimObjectParam<AlphaITB *> itb;
-SimObjectParam<AlphaDTB *> dtb;
-#else
-SimObjectVectorParam<Process *> workload;
-//SimObjectParam<PageTable *> page_table;
-#endif // FULL_SYSTEM
-
-SimObjectParam<FunctionalMemory *> mem;
-
-SimObjectParam<BaseCPU *> checker;
-
-Param<Counter> max_insts_any_thread;
-Param<Counter> max_insts_all_threads;
-Param<Counter> max_loads_any_thread;
-Param<Counter> max_loads_all_threads;
-
-SimObjectParam<BaseCache *> icache;
-SimObjectParam<BaseCache *> dcache;
-
-Param<unsigned> cachePorts;
-Param<unsigned> width;
-Param<unsigned> frontEndWidth;
-Param<unsigned> backEndWidth;
-Param<unsigned> backEndSquashLatency;
-Param<unsigned> backEndLatency;
-Param<unsigned> maxInstBufferSize;
-Param<unsigned> numPhysicalRegs;
-
-Param<unsigned> decodeToFetchDelay;
-Param<unsigned> renameToFetchDelay;
-Param<unsigned> iewToFetchDelay;
-Param<unsigned> commitToFetchDelay;
-Param<unsigned> fetchWidth;
-
-Param<unsigned> renameToDecodeDelay;
-Param<unsigned> iewToDecodeDelay;
-Param<unsigned> commitToDecodeDelay;
-Param<unsigned> fetchToDecodeDelay;
-Param<unsigned> decodeWidth;
-
-Param<unsigned> iewToRenameDelay;
-Param<unsigned> commitToRenameDelay;
-Param<unsigned> decodeToRenameDelay;
-Param<unsigned> renameWidth;
-
-Param<unsigned> commitToIEWDelay;
-Param<unsigned> renameToIEWDelay;
-Param<unsigned> issueToExecuteDelay;
-Param<unsigned> issueWidth;
-Param<unsigned> executeWidth;
-Param<unsigned> executeIntWidth;
-Param<unsigned> executeFloatWidth;
-Param<unsigned> executeBranchWidth;
-Param<unsigned> executeMemoryWidth;
-
-Param<unsigned> iewToCommitDelay;
-Param<unsigned> renameToROBDelay;
-Param<unsigned> commitWidth;
-Param<unsigned> squashWidth;
-
-Param<unsigned> localPredictorSize;
-Param<unsigned> localCtrBits;
-Param<unsigned> localHistoryTableSize;
-Param<unsigned> localHistoryBits;
-Param<unsigned> globalPredictorSize;
-Param<unsigned> globalCtrBits;
-Param<unsigned> globalHistoryBits;
-Param<unsigned> choicePredictorSize;
-Param<unsigned> choiceCtrBits;
-
-Param<unsigned> BTBEntries;
-Param<unsigned> BTBTagSize;
-
-Param<unsigned> RASSize;
-
-Param<unsigned> LQEntries;
-Param<unsigned> SQEntries;
-Param<unsigned> LFSTSize;
-Param<unsigned> SSITSize;
-
-Param<unsigned> numPhysIntRegs;
-Param<unsigned> numPhysFloatRegs;
-Param<unsigned> numIQEntries;
-Param<unsigned> numROBEntries;
-
-Param<bool> decoupledFrontEnd;
-Param<int> dispatchWidth;
-Param<int> wbWidth;
-
-Param<unsigned> smtNumFetchingThreads;
-Param<std::string> smtFetchPolicy;
-Param<std::string> smtLSQPolicy;
-Param<unsigned> smtLSQThreshold;
-Param<std::string> smtIQPolicy;
-Param<unsigned> smtIQThreshold;
-Param<std::string> smtROBPolicy;
-Param<unsigned> smtROBThreshold;
-Param<std::string> smtCommitPolicy;
-
-Param<unsigned> instShiftAmt;
-
-Param<bool> defer_registration;
-
-Param<bool> function_trace;
-Param<Tick> function_trace_start;
-
-END_DECLARE_SIM_OBJECT_PARAMS(SimpleOzoneCPU)
-
-BEGIN_INIT_SIM_OBJECT_PARAMS(SimpleOzoneCPU)
-
- INIT_PARAM(clock, "clock speed"),
- INIT_PARAM(numThreads, "number of HW thread contexts"),
-
-#if FULL_SYSTEM
- INIT_PARAM(system, "System object"),
- INIT_PARAM(cpu_id, "processor ID"),
- INIT_PARAM(itb, "Instruction translation buffer"),
- INIT_PARAM(dtb, "Data translation buffer"),
-#else
- INIT_PARAM(workload, "Processes to run"),
-// INIT_PARAM(page_table, "Page table"),
-#endif // FULL_SYSTEM
-
- INIT_PARAM_DFLT(mem, "Memory", NULL),
-
- INIT_PARAM_DFLT(checker, "Checker CPU", NULL),
-
- INIT_PARAM_DFLT(max_insts_any_thread,
- "Terminate when any thread reaches this inst count",
- 0),
- INIT_PARAM_DFLT(max_insts_all_threads,
- "Terminate when all threads have reached"
- "this inst count",
- 0),
- INIT_PARAM_DFLT(max_loads_any_thread,
- "Terminate when any thread reaches this load count",
- 0),
- INIT_PARAM_DFLT(max_loads_all_threads,
- "Terminate when all threads have reached this load"
- "count",
- 0),
-
- INIT_PARAM_DFLT(icache, "L1 instruction cache", NULL),
- INIT_PARAM_DFLT(dcache, "L1 data cache", NULL),
-
- INIT_PARAM_DFLT(cachePorts, "Cache Ports", 200),
- INIT_PARAM_DFLT(width, "Width", 1),
- INIT_PARAM_DFLT(frontEndWidth, "Front end width", 1),
- INIT_PARAM_DFLT(backEndWidth, "Back end width", 1),
- INIT_PARAM_DFLT(backEndSquashLatency, "Back end squash latency", 1),
- INIT_PARAM_DFLT(backEndLatency, "Back end latency", 1),
- INIT_PARAM_DFLT(maxInstBufferSize, "Maximum instruction buffer size", 16),
- INIT_PARAM(numPhysicalRegs, "Number of physical registers"),
-
- INIT_PARAM(decodeToFetchDelay, "Decode to fetch delay"),
- INIT_PARAM(renameToFetchDelay, "Rename to fetch delay"),
- INIT_PARAM(iewToFetchDelay, "Issue/Execute/Writeback to fetch"
- "delay"),
- INIT_PARAM(commitToFetchDelay, "Commit to fetch delay"),
- INIT_PARAM(fetchWidth, "Fetch width"),
- INIT_PARAM(renameToDecodeDelay, "Rename to decode delay"),
- INIT_PARAM(iewToDecodeDelay, "Issue/Execute/Writeback to decode"
- "delay"),
- INIT_PARAM(commitToDecodeDelay, "Commit to decode delay"),
- INIT_PARAM(fetchToDecodeDelay, "Fetch to decode delay"),
- INIT_PARAM(decodeWidth, "Decode width"),
-
- INIT_PARAM(iewToRenameDelay, "Issue/Execute/Writeback to rename"
- "delay"),
- INIT_PARAM(commitToRenameDelay, "Commit to rename delay"),
- INIT_PARAM(decodeToRenameDelay, "Decode to rename delay"),
- INIT_PARAM(renameWidth, "Rename width"),
-
- INIT_PARAM(commitToIEWDelay, "Commit to "
- "Issue/Execute/Writeback delay"),
- INIT_PARAM(renameToIEWDelay, "Rename to "
- "Issue/Execute/Writeback delay"),
- INIT_PARAM(issueToExecuteDelay, "Issue to execute delay (internal"
- "to the IEW stage)"),
- INIT_PARAM(issueWidth, "Issue width"),
- INIT_PARAM(executeWidth, "Execute width"),
- INIT_PARAM(executeIntWidth, "Integer execute width"),
- INIT_PARAM(executeFloatWidth, "Floating point execute width"),
- INIT_PARAM(executeBranchWidth, "Branch execute width"),
- INIT_PARAM(executeMemoryWidth, "Memory execute width"),
-
- INIT_PARAM(iewToCommitDelay, "Issue/Execute/Writeback to commit "
- "delay"),
- INIT_PARAM(renameToROBDelay, "Rename to reorder buffer delay"),
- INIT_PARAM(commitWidth, "Commit width"),
- INIT_PARAM(squashWidth, "Squash width"),
-
- INIT_PARAM(localPredictorSize, "Size of local predictor"),
- INIT_PARAM(localCtrBits, "Bits per counter"),
- INIT_PARAM(localHistoryTableSize, "Size of local history table"),
- INIT_PARAM(localHistoryBits, "Bits for the local history"),
- INIT_PARAM(globalPredictorSize, "Size of global predictor"),
- INIT_PARAM(globalCtrBits, "Bits per counter"),
- INIT_PARAM(globalHistoryBits, "Bits of history"),
- INIT_PARAM(choicePredictorSize, "Size of choice predictor"),
- INIT_PARAM(choiceCtrBits, "Bits of choice counters"),
-
- INIT_PARAM(BTBEntries, "Number of BTB entries"),
- INIT_PARAM(BTBTagSize, "Size of the BTB tags, in bits"),
-
- INIT_PARAM(RASSize, "RAS size"),
-
- INIT_PARAM(LQEntries, "Number of load queue entries"),
- INIT_PARAM(SQEntries, "Number of store queue entries"),
- INIT_PARAM(LFSTSize, "Last fetched store table size"),
- INIT_PARAM(SSITSize, "Store set ID table size"),
-
- INIT_PARAM(numPhysIntRegs, "Number of physical integer registers"),
- INIT_PARAM(numPhysFloatRegs, "Number of physical floating point "
- "registers"),
- INIT_PARAM(numIQEntries, "Number of instruction queue entries"),
- INIT_PARAM(numROBEntries, "Number of reorder buffer entries"),
-
- INIT_PARAM_DFLT(decoupledFrontEnd, "Decoupled front end", true),
- INIT_PARAM_DFLT(dispatchWidth, "Dispatch width", 0),
- INIT_PARAM_DFLT(wbWidth, "Writeback width", 0),
-
- INIT_PARAM_DFLT(smtNumFetchingThreads, "SMT Number of Fetching Threads", 1),
- INIT_PARAM_DFLT(smtFetchPolicy, "SMT Fetch Policy", "SingleThread"),
- INIT_PARAM_DFLT(smtLSQPolicy, "SMT LSQ Sharing Policy", "Partitioned"),
- INIT_PARAM_DFLT(smtLSQThreshold,"SMT LSQ Threshold", 100),
- INIT_PARAM_DFLT(smtIQPolicy, "SMT IQ Policy", "Partitioned"),
- INIT_PARAM_DFLT(smtIQThreshold, "SMT IQ Threshold", 100),
- INIT_PARAM_DFLT(smtROBPolicy, "SMT ROB Sharing Policy", "Partitioned"),
- INIT_PARAM_DFLT(smtROBThreshold,"SMT ROB Threshold", 100),
- INIT_PARAM_DFLT(smtCommitPolicy,"SMT Commit Fetch Policy", "RoundRobin"),
-
- INIT_PARAM(instShiftAmt, "Number of bits to shift instructions by"),
- INIT_PARAM(defer_registration, "defer system registration (for sampling)"),
-
- INIT_PARAM(function_trace, "Enable function trace"),
- INIT_PARAM(function_trace_start, "Cycle to start function trace")
-
-END_INIT_SIM_OBJECT_PARAMS(SimpleOzoneCPU)
-
-CREATE_SIM_OBJECT(SimpleOzoneCPU)
-{
- SimpleOzoneCPU *cpu;
-
-#if FULL_SYSTEM
- // Full-system only supports a single thread for the moment.
- int actual_num_threads = 1;
-#else
- // In non-full-system mode, we infer the number of threads from
- // the workload if it's not explicitly specified.
- int actual_num_threads =
- numThreads.isValid() ? numThreads : workload.size();
-
- if (workload.size() == 0) {
- fatal("Must specify at least one workload!");
- }
-
-#endif
-
- SimpleParams *params = new SimpleParams;
-
- params->clock = clock;
-
- params->name = getInstanceName();
- params->numberOfThreads = actual_num_threads;
-
-#if FULL_SYSTEM
- params->system = system;
- params->cpu_id = cpu_id;
- params->itb = itb;
- params->dtb = dtb;
-#else
- params->workload = workload;
-// params->pTable = page_table;
-#endif // FULL_SYSTEM
-
- params->mem = mem;
- params->checker = checker;
- params->max_insts_any_thread = max_insts_any_thread;
- params->max_insts_all_threads = max_insts_all_threads;
- params->max_loads_any_thread = max_loads_any_thread;
- params->max_loads_all_threads = max_loads_all_threads;
-
- //
- // Caches
- //
- params->icacheInterface = icache ? icache->getInterface() : NULL;
- params->dcacheInterface = dcache ? dcache->getInterface() : NULL;
- params->cachePorts = cachePorts;
-
- params->width = width;
- params->frontEndWidth = frontEndWidth;
- params->backEndWidth = backEndWidth;
- params->backEndSquashLatency = backEndSquashLatency;
- params->backEndLatency = backEndLatency;
- params->maxInstBufferSize = maxInstBufferSize;
- params->numPhysicalRegs = numPhysIntRegs + numPhysFloatRegs;
-
- params->decodeToFetchDelay = decodeToFetchDelay;
- params->renameToFetchDelay = renameToFetchDelay;
- params->iewToFetchDelay = iewToFetchDelay;
- params->commitToFetchDelay = commitToFetchDelay;
- params->fetchWidth = fetchWidth;
-
- params->renameToDecodeDelay = renameToDecodeDelay;
- params->iewToDecodeDelay = iewToDecodeDelay;
- params->commitToDecodeDelay = commitToDecodeDelay;
- params->fetchToDecodeDelay = fetchToDecodeDelay;
- params->decodeWidth = decodeWidth;
-
- params->iewToRenameDelay = iewToRenameDelay;
- params->commitToRenameDelay = commitToRenameDelay;
- params->decodeToRenameDelay = decodeToRenameDelay;
- params->renameWidth = renameWidth;
-
- params->commitToIEWDelay = commitToIEWDelay;
- params->renameToIEWDelay = renameToIEWDelay;
- params->issueToExecuteDelay = issueToExecuteDelay;
- params->issueWidth = issueWidth;
- params->executeWidth = executeWidth;
- params->executeIntWidth = executeIntWidth;
- params->executeFloatWidth = executeFloatWidth;
- params->executeBranchWidth = executeBranchWidth;
- params->executeMemoryWidth = executeMemoryWidth;
-
- params->iewToCommitDelay = iewToCommitDelay;
- params->renameToROBDelay = renameToROBDelay;
- params->commitWidth = commitWidth;
- params->squashWidth = squashWidth;
-
-
- params->localPredictorSize = localPredictorSize;
- params->localCtrBits = localCtrBits;
- params->localHistoryTableSize = localHistoryTableSize;
- params->localHistoryBits = localHistoryBits;
- params->globalPredictorSize = globalPredictorSize;
- params->globalCtrBits = globalCtrBits;
- params->globalHistoryBits = globalHistoryBits;
- params->choicePredictorSize = choicePredictorSize;
- params->choiceCtrBits = choiceCtrBits;
-
- params->BTBEntries = BTBEntries;
- params->BTBTagSize = BTBTagSize;
-
- params->RASSize = RASSize;
-
- params->LQEntries = LQEntries;
- params->SQEntries = SQEntries;
-
- params->SSITSize = SSITSize;
- params->LFSTSize = LFSTSize;
-
- params->numPhysIntRegs = numPhysIntRegs;
- params->numPhysFloatRegs = numPhysFloatRegs;
- params->numIQEntries = numIQEntries;
- params->numROBEntries = numROBEntries;
-
- params->decoupledFrontEnd = decoupledFrontEnd;
- params->dispatchWidth = dispatchWidth;
- params->wbWidth = wbWidth;
-
- params->smtNumFetchingThreads = smtNumFetchingThreads;
- params->smtFetchPolicy = smtFetchPolicy;
- params->smtIQPolicy = smtIQPolicy;
- params->smtLSQPolicy = smtLSQPolicy;
- params->smtLSQThreshold = smtLSQThreshold;
- params->smtROBPolicy = smtROBPolicy;
- params->smtROBThreshold = smtROBThreshold;
- params->smtCommitPolicy = smtCommitPolicy;
-
- params->instShiftAmt = 2;
-
- params->deferRegistration = defer_registration;
-
- params->functionTrace = function_trace;
- params->functionTraceStart = function_trace_start;
-
- cpu = new SimpleOzoneCPU(params);
-
- return cpu;
-}
-
-REGISTER_SIM_OBJECT("SimpleOzoneCPU", SimpleOzoneCPU)
-
+++ /dev/null
-/*
- * Copyright (c) 2005 The Regents of The University of Michigan
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "cpu/ozone/dyn_inst_impl.hh"
-#include "cpu/ozone/ozone_impl.hh"
-#include "cpu/ozone/simple_impl.hh"
-
-template class OzoneDynInst<OzoneImpl>;
-template class OzoneDynInst<SimpleImpl>;
-
+++ /dev/null
-/*
- * Copyright (c) 2005-2006 The Regents of The University of Michigan
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __CPU_OZONE_DYN_INST_HH__
-#define __CPU_OZONE_DYN_INST_HH__
-
-#include "arch/isa_traits.hh"
-#include "config/full_system.hh"
-#include "cpu/base_dyn_inst.hh"
-#include "cpu/ozone/cpu.hh" // MUST include this
-#include "cpu/inst_seq.hh"
-#include "cpu/ozone/simple_impl.hh" // Would be nice to not have to include this
-#include "cpu/ozone/ozone_impl.hh"
-
-#include <list>
-#include <vector>
-
-template <class Impl>
-class OzoneDynInst : public BaseDynInst<Impl>
-{
- public:
- // Typedefs
- typedef typename Impl::FullCPU FullCPU;
-
- typedef typename FullCPU::ImplState ImplState;
-
- // Typedef for DynInstPtr. This is really just a RefCountingPtr<OoODynInst>.
- typedef typename Impl::DynInstPtr DynInstPtr;
-
- typedef TheISA::ExtMachInst ExtMachInst;
- typedef TheISA::MachInst MachInst;
- typedef TheISA::MiscReg MiscReg;
- typedef typename std::list<DynInstPtr>::iterator ListIt;
-
- // Note that this is duplicated from the BaseDynInst class; I'm
- // simply not sure the enum would carry through so I could use it
- // in array declarations in this class.
- enum {
- MaxInstSrcRegs = TheISA::MaxInstSrcRegs,
- MaxInstDestRegs = TheISA::MaxInstDestRegs
- };
-
- OzoneDynInst(FullCPU *cpu);
-
- OzoneDynInst(ExtMachInst inst, Addr PC, Addr Pred_PC,
- InstSeqNum seq_num, FullCPU *cpu);
-
- OzoneDynInst(StaticInstPtr inst);
-
- ~OzoneDynInst();
-
- void setSrcInst(DynInstPtr &newSrcInst, int regIdx)
- { srcInsts[regIdx] = newSrcInst; }
-
- bool srcInstReady(int regIdx);
-
- void setPrevDestInst(DynInstPtr &oldDestInst, int regIdx)
- { prevDestInst[regIdx] = oldDestInst; }
-
- DynInstPtr &getPrevDestInst(int regIdx)
- { return prevDestInst[regIdx]; }
-
- void addDependent(DynInstPtr &dependent_inst);
-
- std::vector<DynInstPtr> &getDependents() { return dependents; }
- std::vector<DynInstPtr> &getMemDeps() { return memDependents; }
- std::list<DynInstPtr> &getMemSrcs() { return srcMemInsts; }
-
- void wakeDependents();
-
- void wakeMemDependents();
-
- void addMemDependent(DynInstPtr &inst) { memDependents.push_back(inst); }
-
- void addSrcMemInst(DynInstPtr &inst) { srcMemInsts.push_back(inst); }
-
- void markMemInstReady(OzoneDynInst<Impl> *inst);
-
- // For now I will remove instructions from the list when they wake
- // up. In the future, you only really need a counter.
- bool memDepReady() { return srcMemInsts.empty(); }
-
- private:
- void initInstPtrs();
-
- std::vector<DynInstPtr> dependents;
-
- std::vector<DynInstPtr> memDependents;
-
- std::list<DynInstPtr> srcMemInsts;
-
- /** The instruction that produces the value of the source
- * registers. These may be NULL if the value has already been
- * read from the source instruction.
- */
- DynInstPtr srcInsts[MaxInstSrcRegs];
-
- /**
- * Previous rename instruction for this destination.
- */
- DynInstPtr prevDestInst[MaxInstSrcRegs];
-
- public:
-
- Fault initiateAcc();
-
- Fault completeAcc();
-
- // The register accessor methods provide the index of the
- // instruction's operand (e.g., 0 or 1), not the architectural
- // register index, to simplify the implementation of register
- // renaming. We find the architectural register index by indexing
- // into the instruction's own operand index table. Note that a
- // raw pointer to the StaticInst is provided instead of a
- // ref-counted StaticInstPtr to redice overhead. This is fine as
- // long as these methods don't copy the pointer into any long-term
- // storage (which is pretty hard to imagine they would have reason
- // to do).
-
- uint64_t readIntReg(const StaticInst *si, int idx)
- {
- return srcInsts[idx]->readIntResult();
- }
-
- float readFloatRegSingle(const StaticInst *si, int idx)
- {
- return srcInsts[idx]->readFloatResult();
- }
-
- double readFloatRegDouble(const StaticInst *si, int idx)
- {
- return srcInsts[idx]->readDoubleResult();
- }
-
- uint64_t readFloatRegInt(const StaticInst *si, int idx)
- {
- return srcInsts[idx]->readIntResult();
- }
-
- /** @todo: Make results into arrays so they can handle multiple dest
- * registers.
- */
- void setIntReg(const StaticInst *si, int idx, uint64_t val)
- {
- BaseDynInst<Impl>::setIntReg(si, idx, val);
- }
-
- void setFloatRegSingle(const StaticInst *si, int idx, float val)
- {
- BaseDynInst<Impl>::setFloatRegSingle(si, idx, val);
- }
-
- void setFloatRegDouble(const StaticInst *si, int idx, double val)
- {
- BaseDynInst<Impl>::setFloatRegDouble(si, idx, val);
- }
-
- void setFloatRegInt(const StaticInst *si, int idx, uint64_t val)
- {
- BaseDynInst<Impl>::setFloatRegInt(si, idx, val);
- }
-
- void setIntResult(uint64_t result) { this->instResult.integer = result; }
- void setDoubleResult(double result) { this->instResult.dbl = result; }
-
- bool srcsReady();
- bool eaSrcsReady();
-
- Fault execute();
-
- Fault executeEAComp()
- { return NoFault; }
-
- Fault executeMemAcc()
- { return this->staticInst->memAccInst()->execute(this, this->traceData); }
-
- void clearDependents();
-
- void clearMemDependents();
-
- public:
- // ISA stuff
- MiscReg readMiscReg(int misc_reg);
-
- MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault);
-
- Fault setMiscReg(int misc_reg, const MiscReg &val);
-
- Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val);
-
-#if FULL_SYSTEM
- Fault hwrei();
- int readIntrFlag();
- void setIntrFlag(int val);
- bool inPalMode();
- void trap(Fault fault);
- bool simPalCheck(int palFunc);
-#else
- void syscall();
-#endif
-
- ListIt iqIt;
- bool iqItValid;
-};
-
-#endif // __CPU_OZONE_DYN_INST_HH__
+++ /dev/null
-/*
- * Copyright (c) 2005-2006 The Regents of The University of Michigan
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "arch/faults.hh"
-#include "arch/isa_traits.hh"
-#include "config/full_system.hh"
-#include "cpu/ozone/dyn_inst.hh"
-#include "kern/kernel_stats.hh"
-
-using namespace TheISA;
-
-template <class Impl>
-OzoneDynInst<Impl>::OzoneDynInst(FullCPU *cpu)
- : BaseDynInst<Impl>(0, 0, 0, 0, cpu)
-{
- this->setResultReady();
-
- initInstPtrs();
-}
-
-template <class Impl>
-OzoneDynInst<Impl>::OzoneDynInst(ExtMachInst inst, Addr PC, Addr Pred_PC,
- InstSeqNum seq_num, FullCPU *cpu)
- : BaseDynInst<Impl>(inst, PC, Pred_PC, seq_num, cpu)
-{
- initInstPtrs();
-}
-
-template <class Impl>
-OzoneDynInst<Impl>::OzoneDynInst(StaticInstPtr _staticInst)
- : BaseDynInst<Impl>(_staticInst)
-{
- initInstPtrs();
-}
-
-template <class Impl>
-OzoneDynInst<Impl>::~OzoneDynInst()
-{
- DPRINTF(BE, "[sn:%lli] destructor called\n", this->seqNum);
- for (int i = 0; i < this->numSrcRegs(); ++i) {
- srcInsts[i] = NULL;
- }
-
- for (int i = 0; i < this->numDestRegs(); ++i) {
- prevDestInst[i] = NULL;
- }
-
- dependents.clear();
-}
-
-template <class Impl>
-Fault
-OzoneDynInst<Impl>::execute()
-{
- // @todo: Pretty convoluted way to avoid squashing from happening when using
- // the XC during an instruction's execution (specifically for instructions
- // that have sideeffects that use the XC). Fix this.
- bool in_syscall = this->thread->inSyscall;
- this->thread->inSyscall = true;
-
- this->fault = this->staticInst->execute(this, this->traceData);
-
- this->thread->inSyscall = in_syscall;
-
- return this->fault;
-}
-
-template <class Impl>
-Fault
-OzoneDynInst<Impl>::initiateAcc()
-{
- // @todo: Pretty convoluted way to avoid squashing from happening when using
- // the XC during an instruction's execution (specifically for instructions
- // that have sideeffects that use the XC). Fix this.
- bool in_syscall = this->thread->inSyscall;
- this->thread->inSyscall = true;
-
- this->fault = this->staticInst->initiateAcc(this, this->traceData);
-
- this->thread->inSyscall = in_syscall;
-
- return this->fault;
-}
-
-template <class Impl>
-Fault
-OzoneDynInst<Impl>::completeAcc()
-{
- if (this->isLoad()) {
- this->fault = this->staticInst->completeAcc(this->req->data,
- this,
- this->traceData);
- } else if (this->isStore()) {
- this->fault = this->staticInst->completeAcc((uint8_t*)&this->req->result,
- this,
- this->traceData);
- } else {
- panic("Unknown type!");
- }
-
- return this->fault;
-}
-
-template <class Impl>
-bool
-OzoneDynInst<Impl>::srcInstReady(int regIdx)
-{
- return srcInsts[regIdx]->isResultReady();
-}
-
-template <class Impl>
-void
-OzoneDynInst<Impl>::addDependent(DynInstPtr &dependent_inst)
-{
- dependents.push_back(dependent_inst);
-}
-
-template <class Impl>
-void
-OzoneDynInst<Impl>::wakeDependents()
-{
- for (int i = 0; i < dependents.size(); ++i) {
- dependents[i]->markSrcRegReady();
- }
-}
-
-template <class Impl>
-void
-OzoneDynInst<Impl>::wakeMemDependents()
-{
- for (int i = 0; i < memDependents.size(); ++i) {
- memDependents[i]->markMemInstReady(this);
- }
-}
-
-template <class Impl>
-void
-OzoneDynInst<Impl>::markMemInstReady(OzoneDynInst<Impl> *inst)
-{
- ListIt mem_it = srcMemInsts.begin();
- while ((*mem_it) != inst && mem_it != srcMemInsts.end()) {
- mem_it++;
- }
- assert(mem_it != srcMemInsts.end());
-
- srcMemInsts.erase(mem_it);
-}
-
-template <class Impl>
-void
-OzoneDynInst<Impl>::initInstPtrs()
-{
- for (int i = 0; i < MaxInstSrcRegs; ++i) {
- srcInsts[i] = NULL;
- }
- iqItValid = false;
-}
-
-template <class Impl>
-bool
-OzoneDynInst<Impl>::srcsReady()
-{
- for (int i = 0; i < this->numSrcRegs(); ++i) {
- if (!srcInsts[i]->isResultReady())
- return false;
- }
-
- return true;
-}
-
-template <class Impl>
-bool
-OzoneDynInst<Impl>::eaSrcsReady()
-{
- for (int i = 1; i < this->numSrcRegs(); ++i) {
- if (!srcInsts[i]->isResultReady())
- return false;
- }
-
- return true;
-}
-
-template <class Impl>
-void
-OzoneDynInst<Impl>::clearDependents()
-{
- dependents.clear();
- for (int i = 0; i < this->numSrcRegs(); ++i) {
- srcInsts[i] = NULL;
- }
- for (int i = 0; i < this->numDestRegs(); ++i) {
- prevDestInst[i] = NULL;
- }
-}
-
-template <class Impl>
-void
-OzoneDynInst<Impl>::clearMemDependents()
-{
- memDependents.clear();
-}
-
-template <class Impl>
-MiscReg
-OzoneDynInst<Impl>::readMiscReg(int misc_reg)
-{
- return this->thread->readMiscReg(misc_reg);
-}
-
-template <class Impl>
-MiscReg
-OzoneDynInst<Impl>::readMiscRegWithEffect(int misc_reg, Fault &fault)
-{
- return this->thread->readMiscRegWithEffect(misc_reg, fault);
-}
-
-template <class Impl>
-Fault
-OzoneDynInst<Impl>::setMiscReg(int misc_reg, const MiscReg &val)
-{
- this->setIntResult(val);
- return this->thread->setMiscReg(misc_reg, val);
-}
-
-template <class Impl>
-Fault
-OzoneDynInst<Impl>::setMiscRegWithEffect(int misc_reg, const MiscReg &val)
-{
- return this->thread->setMiscRegWithEffect(misc_reg, val);
-}
-
-#if FULL_SYSTEM
-
-template <class Impl>
-Fault
-OzoneDynInst<Impl>::hwrei()
-{
- if (!this->cpu->inPalMode(this->readPC()))
- return new AlphaISA::UnimplementedOpcodeFault;
-
- this->setNextPC(this->thread->readMiscReg(AlphaISA::IPR_EXC_ADDR));
-
- this->cpu->hwrei();
-
- // FIXME: XXX check for interrupts? XXX
- return NoFault;
-}
-
-template <class Impl>
-int
-OzoneDynInst<Impl>::readIntrFlag()
-{
-return this->cpu->readIntrFlag();
-}
-
-template <class Impl>
-void
-OzoneDynInst<Impl>::setIntrFlag(int val)
-{
- this->cpu->setIntrFlag(val);
-}
-
-template <class Impl>
-bool
-OzoneDynInst<Impl>::inPalMode()
-{
- return this->cpu->inPalMode();
-}
-
-template <class Impl>
-void
-OzoneDynInst<Impl>::trap(Fault fault)
-{
- fault->invoke(this->thread->getXCProxy());
-}
-
-template <class Impl>
-bool
-OzoneDynInst<Impl>::simPalCheck(int palFunc)
-{
- return this->cpu->simPalCheck(palFunc);
-}
-#else
-template <class Impl>
-void
-OzoneDynInst<Impl>::syscall()
-{
- this->cpu->syscall();
-}
-#endif
+++ /dev/null
-
-#include "cpu/ozone/front_end_impl.hh"
-#include "cpu/ozone/ozone_impl.hh"
-#include "cpu/ozone/simple_impl.hh"
-
-template class FrontEnd<OzoneImpl>;
-template class FrontEnd<SimpleImpl>;
+++ /dev/null
-/*
- * Copyright (c) 2006 The Regents of The University of Michigan
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __CPU_OZONE_FRONT_END_HH__
-#define __CPU_OZONE_FRONT_END_HH__
-
-#include <deque>
-
-#include "cpu/inst_seq.hh"
-#include "cpu/o3/bpred_unit.hh"
-#include "cpu/ozone/rename_table.hh"
-#include "mem/mem_req.hh"
-#include "sim/eventq.hh"
-#include "sim/stats.hh"
-
-class ExecContext;
-class MemInterface;
-template <class>
-class OzoneThreadState;
-class PageTable;
-template <class>
-class TimeBuffer;
-
-template <class Impl>
-class FrontEnd
-{
- public:
- typedef typename Impl::Params Params;
- typedef typename Impl::DynInst DynInst;
- typedef typename Impl::DynInstPtr DynInstPtr;
- typedef typename Impl::FullCPU FullCPU;
- typedef typename Impl::BackEnd BackEnd;
-
- typedef typename Impl::FullCPU::OzoneXC OzoneXC;
- typedef typename Impl::FullCPU::CommStruct CommStruct;
-
- FrontEnd(Params *params);
-
- std::string name() const;
-
- void setCPU(FullCPU *cpu_ptr)
- { cpu = cpu_ptr; }
-
- void setBackEnd(BackEnd *back_end_ptr)
- { backEnd = back_end_ptr; }
-
- void setCommBuffer(TimeBuffer<CommStruct> *_comm);
-
- void setXC(ExecContext *xc_ptr);
-
- void setThreadState(OzoneThreadState<Impl> *thread_ptr)
- { thread = thread_ptr; }
-
- void regStats();
-
- void tick();
- Fault fetchCacheLine();
- void processInst(DynInstPtr &inst);
- void squash(const InstSeqNum &squash_num, const Addr &next_PC,
- const bool is_branch = false, const bool branch_taken = false);
- DynInstPtr getInst();
-
- void processCacheCompletion(MemReqPtr &req);
-
- void addFreeRegs(int num_freed);
-
- bool isEmpty() { return instBuffer.empty(); }
-
- void switchOut();
-
- void doSwitchOut();
-
- void takeOverFrom(ExecContext *old_xc = NULL);
-
- bool isSwitchedOut() { return switchedOut; }
-
- bool switchedOut;
-
- private:
- bool updateStatus();
-
- void checkBE();
- DynInstPtr getInstFromCacheline();
- void renameInst(DynInstPtr &inst);
- // Returns true if we need to stop the front end this cycle
- bool processBarriers(DynInstPtr &inst);
-
- void handleFault(Fault &fault);
- public:
- Fault getFault() { return fetchFault; }
- private:
- Fault fetchFault;
-
- // Align an address (typically a PC) to the start of an I-cache block.
- // We fold in the PISA 64- to 32-bit conversion here as well.
- Addr icacheBlockAlignPC(Addr addr)
- {
- addr = TheISA::realPCToFetchPC(addr);
- return (addr & ~(cacheBlkMask));
- }
-
- InstSeqNum getAndIncrementInstSeq()
- { return cpu->globalSeqNum++; }
-
- public:
- FullCPU *cpu;
-
- BackEnd *backEnd;
-
- ExecContext *xc;
-
- OzoneThreadState<Impl> *thread;
-
- enum Status {
- Running,
- Idle,
- IcacheMissStall,
- IcacheMissComplete,
- SerializeBlocked,
- SerializeComplete,
- RenameBlocked,
- QuiescePending,
- TrapPending,
- BEBlocked
- };
-
- Status status;
-
- private:
- TimeBuffer<CommStruct> *comm;
- typename TimeBuffer<CommStruct>::wire fromCommit;
-
- typedef typename Impl::BranchPred BranchPred;
-
- BranchPred branchPred;
-
- class ICacheCompletionEvent : public Event
- {
- private:
- MemReqPtr req;
- FrontEnd *frontEnd;
-
- public:
- ICacheCompletionEvent(MemReqPtr &_req, FrontEnd *_fe);
-
- virtual void process();
- virtual const char *description();
- };
-
- MemInterface *icacheInterface;
-
-#if !FULL_SYSTEM
- PageTable *pTable;
-#endif
-
- MemReqPtr memReq;
-
- /** Mask to get a cache block's address. */
- Addr cacheBlkMask;
-
- unsigned cacheBlkSize;
-
- Addr cacheBlkPC;
-
- /** The cache line being fetched. */
- uint8_t *cacheData;
-
- bool fetchCacheLineNextCycle;
-
- bool cacheBlkValid;
-
- public:
- RenameTable<Impl> renameTable;
-
- private:
- Addr PC;
- Addr nextPC;
-
- public:
- void setPC(Addr val) { PC = val; }
- void setNextPC(Addr val) { nextPC = val; }
-
- void wakeFromQuiesce();
-
- void dumpInsts();
-
- private:
- typedef typename std::deque<DynInstPtr> InstBuff;
- typedef typename InstBuff::iterator InstBuffIt;
-
- InstBuff instBuffer;
-
- int instBufferSize;
-
- int maxInstBufferSize;
-
- int width;
-
- int freeRegs;
-
- int numPhysRegs;
-
- bool serializeNext;
-
- DynInstPtr barrierInst;
-
- public:
- bool interruptPending;
- private:
- // number of idle cycles
-/*
- Stats::Average<> notIdleFraction;
- Stats::Formula idleFraction;
-*/
- // @todo: Consider making these vectors and tracking on a per thread basis.
- /** Stat for total number of cycles stalled due to an icache miss. */
- Stats::Scalar<> icacheStallCycles;
- /** Stat for total number of fetched instructions. */
- Stats::Scalar<> fetchedInsts;
- Stats::Scalar<> fetchedBranches;
- /** Stat for total number of predicted branches. */
- Stats::Scalar<> predictedBranches;
- /** Stat for total number of cycles spent fetching. */
- Stats::Scalar<> fetchCycles;
-
- Stats::Scalar<> fetchIdleCycles;
- /** Stat for total number of cycles spent squashing. */
- Stats::Scalar<> fetchSquashCycles;
- /** Stat for total number of cycles spent blocked due to other stages in
- * the pipeline.
- */
- Stats::Scalar<> fetchBlockedCycles;
- /** Stat for total number of fetched cache lines. */
- Stats::Scalar<> fetchedCacheLines;
-
- Stats::Scalar<> fetchIcacheSquashes;
- /** Distribution of number of instructions fetched each cycle. */
- Stats::Distribution<> fetchNisnDist;
-// Stats::Vector<> qfull_iq_occupancy;
-// Stats::VectorDistribution<> qfull_iq_occ_dist_;
- Stats::Formula idleRate;
- Stats::Formula branchRate;
- Stats::Formula fetchRate;
- Stats::Scalar<> IFQCount; // cumulative IFQ occupancy
- Stats::Formula IFQOccupancy;
- Stats::Formula IFQLatency;
- Stats::Scalar<> IFQFcount; // cumulative IFQ full count
- Stats::Formula IFQFullRate;
-
- Stats::Scalar<> dispatchCountStat;
- Stats::Scalar<> dispatchedSerializing;
- Stats::Scalar<> dispatchedTempSerializing;
- Stats::Scalar<> dispatchSerializeStallCycles;
- Stats::Formula dispatchRate;
- Stats::Formula regIntFull;
- Stats::Formula regFpFull;
-};
-
-#endif // __CPU_OZONE_FRONT_END_HH__
+++ /dev/null
-/*
- * Copyright (c) 2006 The Regents of The University of Michigan
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "arch/faults.hh"
-#include "arch/isa_traits.hh"
-#include "base/statistics.hh"
-#include "cpu/exec_context.hh"
-#include "cpu/exetrace.hh"
-#include "cpu/ozone/front_end.hh"
-#include "mem/mem_interface.hh"
-#include "sim/byte_swap.hh"
-
-using namespace TheISA;
-
-template <class Impl>
-FrontEnd<Impl>::FrontEnd(Params *params)
- : branchPred(params),
- icacheInterface(params->icacheInterface),
- instBufferSize(0),
- maxInstBufferSize(params->maxInstBufferSize),
- width(params->frontEndWidth),
- freeRegs(params->numPhysicalRegs),
- numPhysRegs(params->numPhysicalRegs),
- serializeNext(false),
- interruptPending(false)
-{
- switchedOut = false;
-
- status = Idle;
-
- memReq = NULL;
- // Size of cache block.
- cacheBlkSize = icacheInterface ? icacheInterface->getBlockSize() : 64;
-
- assert(isPowerOf2(cacheBlkSize));
-
- // Create mask to get rid of offset bits.
- cacheBlkMask = (cacheBlkSize - 1);
-
- // Create space to store a cache line.
- cacheData = new uint8_t[cacheBlkSize];
-
- fetchCacheLineNextCycle = true;
-
- cacheBlkValid = false;
-
-#if !FULL_SYSTEM
-// pTable = params->pTable;
-#endif
- fetchFault = NoFault;
-}
-
-template <class Impl>
-std::string
-FrontEnd<Impl>::name() const
-{
- return cpu->name() + ".frontend";
-}
-
-template <class Impl>
-void
-FrontEnd<Impl>::setCommBuffer(TimeBuffer<CommStruct> *_comm)
-{
- comm = _comm;
- // @todo: Hardcoded for now. Allow this to be set by a latency.
- fromCommit = comm->getWire(-1);
-}
-
-template <class Impl>
-void
-FrontEnd<Impl>::setXC(ExecContext *xc_ptr)
-{
- xc = xc_ptr;
-}
-
-template <class Impl>
-void
-FrontEnd<Impl>::regStats()
-{
- icacheStallCycles
- .name(name() + ".icacheStallCycles")
- .desc("Number of cycles fetch is stalled on an Icache miss")
- .prereq(icacheStallCycles);
-
- fetchedInsts
- .name(name() + ".fetchedInsts")
- .desc("Number of instructions fetch has processed")
- .prereq(fetchedInsts);
-
- fetchedBranches
- .name(name() + ".fetchedBranches")
- .desc("Number of fetched branches")
- .prereq(fetchedBranches);
-
- predictedBranches
- .name(name() + ".predictedBranches")
- .desc("Number of branches that fetch has predicted taken")
- .prereq(predictedBranches);
-
- fetchCycles
- .name(name() + ".fetchCycles")
- .desc("Number of cycles fetch has run and was not squashing or"
- " blocked")
- .prereq(fetchCycles);
-
- fetchIdleCycles
- .name(name() + ".fetchIdleCycles")
- .desc("Number of cycles fetch was idle")
- .prereq(fetchIdleCycles);
-
- fetchSquashCycles
- .name(name() + ".fetchSquashCycles")
- .desc("Number of cycles fetch has spent squashing")
- .prereq(fetchSquashCycles);
-
- fetchBlockedCycles
- .name(name() + ".fetchBlockedCycles")
- .desc("Number of cycles fetch has spent blocked")
- .prereq(fetchBlockedCycles);
-
- fetchedCacheLines
- .name(name() + ".fetchedCacheLines")
- .desc("Number of cache lines fetched")
- .prereq(fetchedCacheLines);
-
- fetchIcacheSquashes
- .name(name() + ".fetchIcacheSquashes")
- .desc("Number of outstanding Icache misses that were squashed")
- .prereq(fetchIcacheSquashes);
-
- fetchNisnDist
- .init(/* base value */ 0,
- /* last value */ width,
- /* bucket size */ 1)
- .name(name() + ".rateDist")
- .desc("Number of instructions fetched each cycle (Total)")
- .flags(Stats::pdf);
-
- idleRate
- .name(name() + ".idleRate")
- .desc("Percent of cycles fetch was idle")
- .prereq(idleRate);
- idleRate = fetchIdleCycles * 100 / cpu->numCycles;
-
- branchRate
- .name(name() + ".branchRate")
- .desc("Number of branch fetches per cycle")
- .flags(Stats::total);
- branchRate = fetchedBranches / cpu->numCycles;
-
- fetchRate
- .name(name() + ".rate")
- .desc("Number of inst fetches per cycle")
- .flags(Stats::total);
- fetchRate = fetchedInsts / cpu->numCycles;
-
- IFQCount
- .name(name() + ".IFQ:count")
- .desc("cumulative IFQ occupancy")
- ;
-
- IFQFcount
- .name(name() + ".IFQ:fullCount")
- .desc("cumulative IFQ full count")
- .flags(Stats::total)
- ;
-
- IFQOccupancy
- .name(name() + ".IFQ:occupancy")
- .desc("avg IFQ occupancy (inst's)")
- ;
- IFQOccupancy = IFQCount / cpu->numCycles;
-
- IFQLatency
- .name(name() + ".IFQ:latency")
- .desc("avg IFQ occupant latency (cycle's)")
- .flags(Stats::total)
- ;
-
- IFQFullRate
- .name(name() + ".IFQ:fullRate")
- .desc("fraction of time (cycles) IFQ was full")
- .flags(Stats::total);
- ;
- IFQFullRate = IFQFcount * Stats::constant(100) / cpu->numCycles;
-
- dispatchCountStat
- .name(name() + ".DIS:count")
- .desc("cumulative count of dispatched insts")
- .flags(Stats::total)
- ;
-
- dispatchedSerializing
- .name(name() + ".DIS:serializingInsts")
- .desc("count of serializing insts dispatched")
- .flags(Stats::total)
- ;
-
- dispatchedTempSerializing
- .name(name() + ".DIS:tempSerializingInsts")
- .desc("count of temporary serializing insts dispatched")
- .flags(Stats::total)
- ;
-
- dispatchSerializeStallCycles
- .name(name() + ".DIS:serializeStallCycles")
- .desc("count of cycles dispatch stalled for serializing inst")
- .flags(Stats::total)
- ;
-
- dispatchRate
- .name(name() + ".DIS:rate")
- .desc("dispatched insts per cycle")
- .flags(Stats::total)
- ;
- dispatchRate = dispatchCountStat / cpu->numCycles;
-
- regIntFull
- .name(name() + ".REG:int:full")
- .desc("number of cycles where there were no INT registers")
- ;
-
- regFpFull
- .name(name() + ".REG:fp:full")
- .desc("number of cycles where there were no FP registers")
- ;
- IFQLatency = IFQOccupancy / dispatchRate;
-
- branchPred.regStats();
-}
-
-template <class Impl>
-void
-FrontEnd<Impl>::tick()
-{
- if (switchedOut)
- return;
-
- // @todo: Maybe I want to just have direct communication...
- if (fromCommit->doneSeqNum) {
- branchPred.update(fromCommit->doneSeqNum, 0);
- }
-
- IFQCount += instBufferSize;
- IFQFcount += instBufferSize == maxInstBufferSize;
-
- // Fetch cache line
- if (status == IcacheMissComplete) {
- cacheBlkValid = true;
-
- status = Running;
- if (barrierInst)
- status = SerializeBlocked;
- if (freeRegs <= 0)
- status = RenameBlocked;
- checkBE();
- } else if (status == IcacheMissStall) {
- DPRINTF(FE, "Still in Icache miss stall.\n");
- icacheStallCycles++;
- return;
- }
-
- if (status == RenameBlocked || status == SerializeBlocked ||
- status == TrapPending || status == BEBlocked) {
- // Will cause a one cycle bubble between changing state and
- // restarting.
- DPRINTF(FE, "In blocked status.\n");
-
- fetchBlockedCycles++;
-
- if (status == SerializeBlocked) {
- dispatchSerializeStallCycles++;
- }
- updateStatus();
- return;
- } else if (status == QuiescePending) {
- DPRINTF(FE, "Waiting for quiesce to execute or get squashed.\n");
- return;
- } else if (status != IcacheMissComplete) {
- if (fetchCacheLineNextCycle) {
- Fault fault = fetchCacheLine();
- if (fault != NoFault) {
- handleFault(fault);
- fetchFault = fault;
- return;
- }
- fetchCacheLineNextCycle = false;
- }
- // If miss, stall until it returns.
- if (status == IcacheMissStall) {
- // Tell CPU to not tick me for now.
- return;
- }
- }
-
- fetchCycles++;
-
- int num_inst = 0;
-
- // Otherwise loop and process instructions.
- // One way to hack infinite width is to set width and maxInstBufferSize
- // both really high. Inelegant, but probably will work.
- while (num_inst < width &&
- instBufferSize < maxInstBufferSize) {
- // Get instruction from cache line.
- DynInstPtr inst = getInstFromCacheline();
-
- if (!inst) {
- // PC is no longer in the cache line, end fetch.
- // Might want to check this at the end of the cycle so that
- // there's no cycle lost to checking for a new cache line.
- DPRINTF(FE, "Need to get new cache line\n");
- fetchCacheLineNextCycle = true;
- break;
- }
-
- processInst(inst);
-
- if (status == SerializeBlocked) {
- break;
- }
-
- // Possibly push into a time buffer that estimates the front end
- // latency
- instBuffer.push_back(inst);
- ++instBufferSize;
- ++num_inst;
-
-#if FULL_SYSTEM
- if (inst->isQuiesce()) {
- warn("%lli: Quiesce instruction encountered, halting fetch!", curTick);
- status = QuiescePending;
- break;
- }
-#endif
-
- if (inst->predTaken()) {
- // Start over with tick?
- break;
- } else if (freeRegs <= 0) {
- DPRINTF(FE, "Ran out of free registers to rename to!\n");
- status = RenameBlocked;
- break;
- } else if (serializeNext) {
- break;
- }
- }
-
- fetchNisnDist.sample(num_inst);
- checkBE();
-
- DPRINTF(FE, "Num insts processed: %i, Inst Buffer size: %i, Free "
- "Regs %i\n", num_inst, instBufferSize, freeRegs);
-}
-
-template <class Impl>
-Fault
-FrontEnd<Impl>::fetchCacheLine()
-{
- // Read a cache line, based on the current PC.
-#if FULL_SYSTEM
- // Flag to say whether or not address is physical addr.
- unsigned flags = cpu->inPalMode(PC) ? PHYSICAL : 0;
-#else
- unsigned flags = 0;
-#endif // FULL_SYSTEM
- Fault fault = NoFault;
-
- if (interruptPending && flags == 0) {
- return fault;
- }
-
- // Align the fetch PC so it's at the start of a cache block.
- Addr fetch_PC = icacheBlockAlignPC(PC);
-
- DPRINTF(FE, "Fetching cache line starting at %#x.\n", fetch_PC);
-
- // Setup the memReq to do a read of the first isntruction's address.
- // Set the appropriate read size and flags as well.
- memReq = new MemReq();
-
- memReq->asid = 0;
- memReq->thread_num = 0;
- memReq->data = new uint8_t[64];
- memReq->xc = xc;
- memReq->cmd = Read;
- memReq->reset(fetch_PC, cacheBlkSize, flags);
-
- // Translate the instruction request.
- fault = cpu->translateInstReq(memReq);
-
- // Now do the timing access to see whether or not the instruction
- // exists within the cache.
- if (icacheInterface && fault == NoFault) {
-#if FULL_SYSTEM
- if (cpu->system->memctrl->badaddr(memReq->paddr) ||
- memReq->flags & UNCACHEABLE) {
- DPRINTF(FE, "Fetch: Bad address %#x (hopefully on a "
- "misspeculating path!",
- memReq->paddr);
- return TheISA::genMachineCheckFault();
- }
-#endif
-
- memReq->completionEvent = NULL;
-
- memReq->time = curTick;
- fault = cpu->mem->read(memReq, cacheData);
-
- MemAccessResult res = icacheInterface->access(memReq);
-
- // If the cache missed then schedule an event to wake
- // up this stage once the cache miss completes.
- if (icacheInterface->doEvents() && res != MA_HIT) {
- memReq->completionEvent = new ICacheCompletionEvent(memReq, this);
-
- status = IcacheMissStall;
-
- cacheBlkValid = false;
-
- DPRINTF(FE, "Cache miss.\n");
- } else {
- DPRINTF(FE, "Cache hit.\n");
-
- cacheBlkValid = true;
-
-// memcpy(cacheData, memReq->data, memReq->size);
- }
- }
-
- // Note that this will set the cache block PC a bit earlier than it should
- // be set.
- cacheBlkPC = fetch_PC;
-
- ++fetchedCacheLines;
-
- DPRINTF(FE, "Done fetching cache line.\n");
-
- return fault;
-}
-
-template <class Impl>
-void
-FrontEnd<Impl>::processInst(DynInstPtr &inst)
-{
- if (processBarriers(inst)) {
- return;
- }
-
- Addr inst_PC = inst->readPC();
-
- if (!inst->isControl()) {
- inst->setPredTarg(inst->readNextPC());
- } else {
- fetchedBranches++;
- if (branchPred.predict(inst, inst_PC, inst->threadNumber)) {
- predictedBranches++;
- }
- }
-
- Addr next_PC = inst->readPredTarg();
-
- DPRINTF(FE, "[sn:%lli] Predicted and processed inst PC %#x, next PC "
- "%#x\n", inst->seqNum, inst_PC, next_PC);
-
-// inst->setNextPC(next_PC);
-
- // Not sure where I should set this
- PC = next_PC;
-
- renameInst(inst);
-}
-
-template <class Impl>
-bool
-FrontEnd<Impl>::processBarriers(DynInstPtr &inst)
-{
- if (serializeNext) {
- inst->setSerializeBefore();
- serializeNext = false;
- } else if (!inst->isSerializing() &&
- !inst->isIprAccess() &&
- !inst->isStoreConditional()) {
- return false;
- }
-
- if ((inst->isIprAccess() || inst->isSerializeBefore()) &&
- !inst->isSerializeHandled()) {
- DPRINTF(FE, "Serialize before instruction encountered.\n");
-
- if (!inst->isTempSerializeBefore()) {
- dispatchedSerializing++;
- inst->setSerializeHandled();
- } else {
- dispatchedTempSerializing++;
- }
-
- // Change status over to SerializeBlocked so that other stages know
- // what this is blocked on.
- status = SerializeBlocked;
-
- barrierInst = inst;
- return true;
- } else if ((inst->isStoreConditional() || inst->isSerializeAfter())
- && !inst->isSerializeHandled()) {
- DPRINTF(FE, "Serialize after instruction encountered.\n");
-
- inst->setSerializeHandled();
-
- dispatchedSerializing++;
-
- serializeNext = true;
- return false;
- }
- return false;
-}
-
-template <class Impl>
-void
-FrontEnd<Impl>::handleFault(Fault &fault)
-{
- DPRINTF(FE, "Fault at fetch, telling commit\n");
-
- // We're blocked on the back end until it handles this fault.
- status = TrapPending;
-
- // Get a sequence number.
- InstSeqNum inst_seq = getAndIncrementInstSeq();
- // We will use a nop in order to carry the fault.
- ExtMachInst ext_inst = TheISA::NoopMachInst;
-
- // Create a new DynInst from the dummy nop.
- DynInstPtr instruction = new DynInst(ext_inst, PC,
- PC+sizeof(MachInst),
- inst_seq, cpu);
- instruction->setPredTarg(instruction->readNextPC());
-// instruction->setThread(tid);
-
-// instruction->setASID(tid);
-
- instruction->setState(thread);
-
- instruction->traceData = NULL;
-
- instruction->fault = fault;
- instruction->setCanIssue();
- instBuffer.push_back(instruction);
- ++instBufferSize;
-}
-
-template <class Impl>
-void
-FrontEnd<Impl>::squash(const InstSeqNum &squash_num, const Addr &next_PC,
- const bool is_branch, const bool branch_taken)
-{
- DPRINTF(FE, "Squashing from [sn:%lli], setting PC to %#x\n",
- squash_num, next_PC);
-
- if (fetchFault != NoFault)
- fetchFault = NoFault;
-
- while (!instBuffer.empty() &&
- instBuffer.back()->seqNum > squash_num) {
- DynInstPtr inst = instBuffer.back();
-
- DPRINTF(FE, "Squashing instruction [sn:%lli] PC %#x\n",
- inst->seqNum, inst->readPC());
-
- inst->clearDependents();
-
- instBuffer.pop_back();
- --instBufferSize;
-
- freeRegs+= inst->numDestRegs();
- }
-
- // Copy over rename table from the back end.
- renameTable.copyFrom(backEnd->renameTable);
-
- PC = next_PC;
-
- // Update BP with proper information.
- if (is_branch) {
- branchPred.squash(squash_num, next_PC, branch_taken, 0);
- } else {
- branchPred.squash(squash_num, 0);
- }
-
- // Clear the icache miss if it's outstanding.
- if (status == IcacheMissStall && icacheInterface) {
- DPRINTF(FE, "Squashing outstanding Icache miss.\n");
- memReq = NULL;
- }
-
- if (status == SerializeBlocked) {
- assert(barrierInst->seqNum > squash_num);
- barrierInst = NULL;
- }
-
- // Unless this squash originated from the front end, we're probably
- // in running mode now.
- // Actually might want to make this latency dependent.
- status = Running;
- fetchCacheLineNextCycle = true;
-}
-
-template <class Impl>
-typename Impl::DynInstPtr
-FrontEnd<Impl>::getInst()
-{
- if (instBufferSize == 0) {
- return NULL;
- }
-
- DynInstPtr inst = instBuffer.front();
-
- instBuffer.pop_front();
-
- --instBufferSize;
-
- dispatchCountStat++;
-
- return inst;
-}
-
-template <class Impl>
-void
-FrontEnd<Impl>::processCacheCompletion(MemReqPtr &req)
-{
- DPRINTF(FE, "Processing cache completion\n");
-
- // Do something here.
- if (status != IcacheMissStall ||
- req != memReq ||
- switchedOut) {
- DPRINTF(FE, "Previous fetch was squashed.\n");
- fetchIcacheSquashes++;
- return;
- }
-
- status = IcacheMissComplete;
-
-/* if (checkStall(tid)) {
- fetchStatus[tid] = Blocked;
- } else {
- fetchStatus[tid] = IcacheMissComplete;
- }
-*/
-// memcpy(cacheData, memReq->data, memReq->size);
-
- // Reset the completion event to NULL.
-// memReq->completionEvent = NULL;
- memReq = NULL;
-}
-
-template <class Impl>
-void
-FrontEnd<Impl>::addFreeRegs(int num_freed)
-{
- if (status == RenameBlocked && freeRegs + num_freed > 0) {
- status = Running;
- }
-
- DPRINTF(FE, "Adding %i freed registers\n", num_freed);
-
- freeRegs+= num_freed;
-
-// assert(freeRegs <= numPhysRegs);
- if (freeRegs > numPhysRegs)
- freeRegs = numPhysRegs;
-}
-
-template <class Impl>
-bool
-FrontEnd<Impl>::updateStatus()
-{
- bool serialize_block = !backEnd->robEmpty() || instBufferSize;
- bool be_block = cpu->decoupledFrontEnd ? false : backEnd->isBlocked();
- bool ret_val = false;
-
- if (status == SerializeBlocked && !serialize_block) {
- status = SerializeComplete;
- ret_val = true;
- }
-
- if (status == BEBlocked && !be_block) {
- if (barrierInst) {
- status = SerializeBlocked;
- } else {
- status = Running;
- }
- ret_val = true;
- }
- return ret_val;
-}
-
-template <class Impl>
-void
-FrontEnd<Impl>::checkBE()
-{
- bool be_block = cpu->decoupledFrontEnd ? false : backEnd->isBlocked();
- if (be_block) {
- if (status == Running || status == Idle) {
- status = BEBlocked;
- }
- }
-}
-
-template <class Impl>
-typename Impl::DynInstPtr
-FrontEnd<Impl>::getInstFromCacheline()
-{
- if (status == SerializeComplete) {
- DynInstPtr inst = barrierInst;
- status = Running;
- barrierInst = NULL;
- inst->clearSerializeBefore();
- return inst;
- }
-
- InstSeqNum inst_seq;
- MachInst inst;
- // @todo: Fix this magic number used here to handle word offset (and
- // getting rid of PAL bit)
- unsigned offset = (PC & cacheBlkMask) & ~3;
-
- // PC of inst is not in this cache block
- if (PC >= (cacheBlkPC + cacheBlkSize) || PC < cacheBlkPC || !cacheBlkValid) {
- return NULL;
- }
-
- //////////////////////////
- // Fetch one instruction
- //////////////////////////
-
- // Get a sequence number.
- inst_seq = getAndIncrementInstSeq();
-
- // Make sure this is a valid index.
- assert(offset <= cacheBlkSize - sizeof(MachInst));
-
- // Get the instruction from the array of the cache line.
- inst = htog(*reinterpret_cast<MachInst *>(&cacheData[offset]));
-
- ExtMachInst decode_inst = TheISA::makeExtMI(inst, PC);
-
- // Create a new DynInst from the instruction fetched.
- DynInstPtr instruction = new DynInst(decode_inst, PC, PC+sizeof(MachInst),
- inst_seq, cpu);
-
- instruction->setState(thread);
-
- DPRINTF(FE, "Instruction [sn:%lli] created, with PC %#x\n%s\n",
- inst_seq, instruction->readPC(),
- instruction->staticInst->disassemble(PC));
-
- instruction->traceData =
- Trace::getInstRecord(curTick, xc, cpu,
- instruction->staticInst,
- instruction->readPC(), 0);
-
- // Increment stat of fetched instructions.
- ++fetchedInsts;
-
- return instruction;
-}
-
-template <class Impl>
-void
-FrontEnd<Impl>::renameInst(DynInstPtr &inst)
-{
- DynInstPtr src_inst = NULL;
- int num_src_regs = inst->numSrcRegs();
- if (num_src_regs == 0) {
- inst->setCanIssue();
- } else {
- for (int i = 0; i < num_src_regs; ++i) {
- src_inst = renameTable[inst->srcRegIdx(i)];
-
- inst->setSrcInst(src_inst, i);
-
- DPRINTF(FE, "[sn:%lli]: Src reg %i is inst [sn:%lli]\n",
- inst->seqNum, (int)inst->srcRegIdx(i), src_inst->seqNum);
-
- if (src_inst->isResultReady()) {
- DPRINTF(FE, "Reg ready.\n");
- inst->markSrcRegReady(i);
- } else {
- DPRINTF(FE, "Adding to dependent list.\n");
- src_inst->addDependent(inst);
- }
- }
- }
-
- for (int i = 0; i < inst->numDestRegs(); ++i) {
- RegIndex idx = inst->destRegIdx(i);
-
- DPRINTF(FE, "Dest reg %i is now inst [sn:%lli], was previously "
- "[sn:%lli]\n",
- (int)inst->destRegIdx(i), inst->seqNum,
- renameTable[idx]->seqNum);
-
- inst->setPrevDestInst(renameTable[idx], i);
-
- renameTable[idx] = inst;
- --freeRegs;
- }
-}
-
-template <class Impl>
-void
-FrontEnd<Impl>::wakeFromQuiesce()
-{
- DPRINTF(FE, "Waking up from quiesce\n");
- // Hopefully this is safe
- status = Running;
-}
-
-template <class Impl>
-void
-FrontEnd<Impl>::switchOut()
-{
- switchedOut = true;
- cpu->signalSwitched();
-}
-
-template <class Impl>
-void
-FrontEnd<Impl>::doSwitchOut()
-{
- memReq = NULL;
- squash(0, 0);
- instBuffer.clear();
- instBufferSize = 0;
- status = Idle;
-}
-
-template <class Impl>
-void
-FrontEnd<Impl>::takeOverFrom(ExecContext *old_xc)
-{
- assert(freeRegs == numPhysRegs);
- fetchCacheLineNextCycle = true;
-
- cacheBlkValid = false;
-
-#if !FULL_SYSTEM
-// pTable = params->pTable;
-#endif
- fetchFault = NoFault;
- serializeNext = false;
- barrierInst = NULL;
- status = Running;
- switchedOut = false;
- interruptPending = false;
-}
-
-template <class Impl>
-void
-FrontEnd<Impl>::dumpInsts()
-{
- cprintf("instBuffer size: %i\n", instBuffer.size());
-
- InstBuffIt buff_it = instBuffer.begin();
-
- for (int num = 0; buff_it != instBuffer.end(); num++) {
- cprintf("Instruction:%i\nPC:%#x\n[tid:%i]\n[sn:%lli]\nIssued:%i\n"
- "Squashed:%i\n\n",
- num, (*buff_it)->readPC(), (*buff_it)->threadNumber,
- (*buff_it)->seqNum, (*buff_it)->isIssued(),
- (*buff_it)->isSquashed());
- buff_it++;
- }
-}
-
-template <class Impl>
-FrontEnd<Impl>::ICacheCompletionEvent::ICacheCompletionEvent(MemReqPtr &_req, FrontEnd *fe)
- : Event(&mainEventQueue, Delayed_Writeback_Pri), req(_req), frontEnd(fe)
-{
- this->setFlags(Event::AutoDelete);
-}
-
-template <class Impl>
-void
-FrontEnd<Impl>::ICacheCompletionEvent::process()
-{
- frontEnd->processCacheCompletion(req);
-}
-
-template <class Impl>
-const char *
-FrontEnd<Impl>::ICacheCompletionEvent::description()
-{
- return "ICache completion event";
-}
+++ /dev/null
-
-#include "cpu/ozone/inorder_back_end_impl.hh"
-#include "cpu/ozone/simple_impl.hh"
-
-template class InorderBackEnd<SimpleImpl>;
+++ /dev/null
-
-#ifndef __CPU_OZONE_INORDER_BACK_END_HH__
-#define __CPU_OZONE_INORDER_BACK_END_HH__
-
-#include <list>
-
-#include "arch/faults.hh"
-#include "base/timebuf.hh"
-#include "cpu/exec_context.hh"
-#include "cpu/inst_seq.hh"
-#include "cpu/ozone/rename_table.hh"
-#include "cpu/ozone/thread_state.hh"
-#include "mem/mem_interface.hh"
-#include "mem/mem_req.hh"
-#include "sim/eventq.hh"
-
-template <class Impl>
-class InorderBackEnd
-{
- public:
- typedef typename Impl::Params Params;
- typedef typename Impl::DynInstPtr DynInstPtr;
- typedef typename Impl::FullCPU FullCPU;
- typedef typename Impl::FrontEnd FrontEnd;
-
- typedef typename FullCPU::OzoneXC OzoneXC;
- typedef typename Impl::FullCPU::CommStruct CommStruct;
-
- InorderBackEnd(Params *params);
-
- std::string name() const;
-
- void setCPU(FullCPU *cpu_ptr)
- { cpu = cpu_ptr; }
-
- void setFrontEnd(FrontEnd *front_end_ptr)
- { frontEnd = front_end_ptr; }
-
- void setCommBuffer(TimeBuffer<CommStruct> *_comm)
- { comm = _comm; }
-
- void setXC(ExecContext *xc_ptr);
-
- void setThreadState(OzoneThreadState<Impl> *thread_ptr);
-
- void regStats() { }
-
-#if FULL_SYSTEM
- void checkInterrupts();
-#endif
-
- void tick();
- void executeInsts();
- void squash(const InstSeqNum &squash_num, const Addr &next_PC);
-
- void squashFromXC();
- void generateXCEvent() { }
-
- bool robEmpty() { return instList.empty(); }
-
- bool isFull() { return false; }
- bool isBlocked() { return status == DcacheMissStoreStall ||
- status == DcacheMissLoadStall ||
- interruptBlocked; }
-
- void fetchFault(Fault &fault);
-
- void dumpInsts();
-
- private:
- void handleFault();
-
- void setSquashInfoFromXC();
-
- bool squashPending;
- InstSeqNum squashSeqNum;
- Addr squashNextPC;
-
- Fault faultFromFetch;
-
- bool interruptBlocked;
-
- public:
- template <class T>
- Fault read(Addr addr, T &data, unsigned flags);
-
- template <class T>
- Fault read(MemReqPtr &req, T &data, int load_idx);
-
- template <class T>
- Fault write(T data, Addr addr, unsigned flags, uint64_t *res);
-
- template <class T>
- Fault write(MemReqPtr &req, T &data, int store_idx);
-
- Addr readCommitPC() { return commitPC; }
-
- Addr commitPC;
-
- void switchOut() { panic("Not implemented!"); }
- void doSwitchOut() { panic("Not implemented!"); }
- void takeOverFrom(ExecContext *old_xc = NULL) { panic("Not implemented!"); }
-
- public:
- FullCPU *cpu;
-
- FrontEnd *frontEnd;
-
- ExecContext *xc;
-
- OzoneThreadState<Impl> *thread;
-
- RenameTable<Impl> renameTable;
-
- protected:
- enum Status {
- Running,
- Idle,
- DcacheMissLoadStall,
- DcacheMissStoreStall,
- DcacheMissComplete,
- Blocked
- };
-
- Status status;
-
- class DCacheCompletionEvent : public Event
- {
- private:
- InorderBackEnd *be;
-
- public:
- DCacheCompletionEvent(InorderBackEnd *_be);
-
- virtual void process();
- virtual const char *description();
-
- DynInstPtr inst;
- };
-
- friend class DCacheCompletionEvent;
-
- DCacheCompletionEvent cacheCompletionEvent;
-
- MemInterface *dcacheInterface;
-
- MemReqPtr memReq;
-
- private:
- typedef typename std::list<DynInstPtr>::iterator InstListIt;
-
- std::list<DynInstPtr> instList;
-
- // General back end width. Used if the more specific isn't given.
- int width;
-
- int latency;
-
- int squashLatency;
-
- TimeBuffer<int> numInstsToWB;
- TimeBuffer<int>::wire instsAdded;
- TimeBuffer<int>::wire instsToExecute;
-
- TimeBuffer<CommStruct> *comm;
- // number of cycles stalled for D-cache misses
- Stats::Scalar<> dcacheStallCycles;
- Counter lastDcacheStall;
-};
-
-template <class Impl>
-template <class T>
-Fault
-InorderBackEnd<Impl>::read(Addr addr, T &data, unsigned flags)
-{
- memReq->reset(addr, sizeof(T), flags);
-
- // translate to physical address
- Fault fault = cpu->translateDataReadReq(memReq);
-
- // if we have a cache, do cache access too
- if (fault == NoFault && dcacheInterface) {
- memReq->cmd = Read;
- memReq->completionEvent = NULL;
- memReq->time = curTick;
- memReq->flags &= ~INST_READ;
- MemAccessResult result = dcacheInterface->access(memReq);
-
- // Ugly hack to get an event scheduled *only* if the access is
- // a miss. We really should add first-class support for this
- // at some point.
- if (result != MA_HIT) {
- // Fix this hack for keeping funcExeInst correct with loads that
- // are executed twice.
- memReq->completionEvent = &cacheCompletionEvent;
- lastDcacheStall = curTick;
-// unscheduleTickEvent();
- status = DcacheMissLoadStall;
- DPRINTF(IBE, "Dcache miss stall!\n");
- } else {
- // do functional access
- DPRINTF(IBE, "Dcache hit!\n");
- }
- }
-/*
- if (!dcacheInterface && (memReq->flags & UNCACHEABLE))
- recordEvent("Uncached Read");
-*/
- return fault;
-}
-#if 0
-template <class Impl>
-template <class T>
-Fault
-InorderBackEnd<Impl>::read(MemReqPtr &req, T &data)
-{
-#if FULL_SYSTEM && defined(TARGET_ALPHA)
- if (req->flags & LOCKED) {
- req->xc->setMiscReg(TheISA::Lock_Addr_DepTag, req->paddr);
- req->xc->setMiscReg(TheISA::Lock_Flag_DepTag, true);
- }
-#endif
-
- Fault error;
- error = thread->mem->read(req, data);
- data = LittleEndianGuest::gtoh(data);
- return error;
-}
-#endif
-
-template <class Impl>
-template <class T>
-Fault
-InorderBackEnd<Impl>::write(T data, Addr addr, unsigned flags, uint64_t *res)
-{
- memReq->reset(addr, sizeof(T), flags);
-
- // translate to physical address
- Fault fault = cpu->translateDataWriteReq(memReq);
-
- if (fault == NoFault && dcacheInterface) {
- memReq->cmd = Write;
-// memcpy(memReq->data,(uint8_t *)&data,memReq->size);
- memReq->completionEvent = NULL;
- memReq->time = curTick;
- memReq->flags &= ~INST_READ;
- MemAccessResult result = dcacheInterface->access(memReq);
-
- // Ugly hack to get an event scheduled *only* if the access is
- // a miss. We really should add first-class support for this
- // at some point.
- if (result != MA_HIT) {
- memReq->completionEvent = &cacheCompletionEvent;
- lastDcacheStall = curTick;
-// unscheduleTickEvent();
- status = DcacheMissStoreStall;
- DPRINTF(IBE, "Dcache miss stall!\n");
- } else {
- DPRINTF(IBE, "Dcache hit!\n");
- }
- }
-
- if (res && (fault == NoFault))
- *res = memReq->result;
-/*
- if (!dcacheInterface && (memReq->flags & UNCACHEABLE))
- recordEvent("Uncached Write");
-*/
- return fault;
-}
-#if 0
-template <class Impl>
-template <class T>
-Fault
-InorderBackEnd<Impl>::write(MemReqPtr &req, T &data)
-{
-#if FULL_SYSTEM && defined(TARGET_ALPHA)
- ExecContext *xc;
-
- // If this is a store conditional, act appropriately
- if (req->flags & LOCKED) {
- xc = req->xc;
-
- if (req->flags & UNCACHEABLE) {
- // Don't update result register (see stq_c in isa_desc)
- req->result = 2;
- xc->setStCondFailures(0);//Needed? [RGD]
- } else {
- bool lock_flag = xc->readMiscReg(TheISA::Lock_Flag_DepTag);
- Addr lock_addr = xc->readMiscReg(TheISA::Lock_Addr_DepTag);
- req->result = lock_flag;
- if (!lock_flag ||
- ((lock_addr & ~0xf) != (req->paddr & ~0xf))) {
- xc->setMiscReg(TheISA::Lock_Flag_DepTag, false);
- xc->setStCondFailures(xc->readStCondFailures() + 1);
- if (((xc->readStCondFailures()) % 100000) == 0) {
- std::cerr << "Warning: "
- << xc->readStCondFailures()
- << " consecutive store conditional failures "
- << "on cpu " << req->xc->readCpuId()
- << std::endl;
- }
- return NoFault;
- }
- else xc->setStCondFailures(0);
- }
- }
-
- // Need to clear any locked flags on other proccessors for
- // this address. Only do this for succsful Store Conditionals
- // and all other stores (WH64?). Unsuccessful Store
- // Conditionals would have returned above, and wouldn't fall
- // through.
- for (int i = 0; i < cpu->system->execContexts.size(); i++){
- xc = cpu->system->execContexts[i];
- if ((xc->readMiscReg(TheISA::Lock_Addr_DepTag) & ~0xf) ==
- (req->paddr & ~0xf)) {
- xc->setMiscReg(TheISA::Lock_Flag_DepTag, false);
- }
- }
-
-#endif
- return thread->mem->write(req, (T)LittleEndianGuest::htog(data));
-}
-#endif
-
-template <class Impl>
-template <class T>
-Fault
-InorderBackEnd<Impl>::read(MemReqPtr &req, T &data, int load_idx)
-{
-// panic("Unimplemented!");
-// memReq->reset(addr, sizeof(T), flags);
-
- // translate to physical address
-// Fault fault = cpu->translateDataReadReq(req);
- req->cmd = Read;
- req->completionEvent = NULL;
- req->time = curTick;
- assert(!req->data);
- req->data = new uint8_t[64];
- req->flags &= ~INST_READ;
- Fault fault = cpu->read(req, data);
- memcpy(req->data, &data, sizeof(T));
-
- // if we have a cache, do cache access too
- if (dcacheInterface) {
- MemAccessResult result = dcacheInterface->access(req);
-
- // Ugly hack to get an event scheduled *only* if the access is
- // a miss. We really should add first-class support for this
- // at some point.
- if (result != MA_HIT) {
- req->completionEvent = &cacheCompletionEvent;
- lastDcacheStall = curTick;
-// unscheduleTickEvent();
- status = DcacheMissLoadStall;
- DPRINTF(IBE, "Dcache miss load stall!\n");
- } else {
- DPRINTF(IBE, "Dcache hit!\n");
-
- }
- }
-
-/*
- if (!dcacheInterface && (req->flags & UNCACHEABLE))
- recordEvent("Uncached Read");
-*/
- return NoFault;
-}
-
-template <class Impl>
-template <class T>
-Fault
-InorderBackEnd<Impl>::write(MemReqPtr &req, T &data, int store_idx)
-{
-// req->reset(addr, sizeof(T), flags);
-
- // translate to physical address
-// Fault fault = cpu->translateDataWriteReq(req);
-
- req->cmd = Write;
- req->completionEvent = NULL;
- req->time = curTick;
- assert(!req->data);
- req->data = new uint8_t[64];
- memcpy(req->data, (uint8_t *)&data, req->size);
-
- switch(req->size) {
- case 1:
- cpu->write(req, (uint8_t &)data);
- break;
- case 2:
- cpu->write(req, (uint16_t &)data);
- break;
- case 4:
- cpu->write(req, (uint32_t &)data);
- break;
- case 8:
- cpu->write(req, (uint64_t &)data);
- break;
- default:
- panic("Unexpected store size!\n");
- }
-
- if (dcacheInterface) {
- req->cmd = Write;
- req->data = new uint8_t[64];
- memcpy(req->data,(uint8_t *)&data,req->size);
- req->completionEvent = NULL;
- req->time = curTick;
- req->flags &= ~INST_READ;
- MemAccessResult result = dcacheInterface->access(req);
-
- // Ugly hack to get an event scheduled *only* if the access is
- // a miss. We really should add first-class support for this
- // at some point.
- if (result != MA_HIT) {
- req->completionEvent = &cacheCompletionEvent;
- lastDcacheStall = curTick;
-// unscheduleTickEvent();
- status = DcacheMissStoreStall;
- DPRINTF(IBE, "Dcache miss store stall!\n");
- } else {
- DPRINTF(IBE, "Dcache hit!\n");
-
- }
- }
-/*
- if (req->flags & LOCKED) {
- if (req->flags & UNCACHEABLE) {
- // Don't update result register (see stq_c in isa_desc)
- req->result = 2;
- } else {
- req->result = 1;
- }
- }
-*/
-/*
- if (res && (fault == NoFault))
- *res = req->result;
- */
-/*
- if (!dcacheInterface && (req->flags & UNCACHEABLE))
- recordEvent("Uncached Write");
-*/
- return NoFault;
-}
-
-#endif // __CPU_OZONE_INORDER_BACK_END_HH__
+++ /dev/null
-
-#include "arch/faults.hh"
-#include "arch/isa_traits.hh"
-#include "cpu/ozone/inorder_back_end.hh"
-#include "cpu/ozone/thread_state.hh"
-
-using namespace TheISA;
-
-template <class Impl>
-InorderBackEnd<Impl>::InorderBackEnd(Params *params)
- : squashPending(false),
- squashSeqNum(0),
- squashNextPC(0),
- faultFromFetch(NoFault),
- interruptBlocked(false),
- cacheCompletionEvent(this),
- dcacheInterface(params->dcacheInterface),
- width(params->backEndWidth),
- latency(params->backEndLatency),
- squashLatency(params->backEndSquashLatency),
- numInstsToWB(0, latency + 1)
-{
- instsAdded = numInstsToWB.getWire(latency);
- instsToExecute = numInstsToWB.getWire(0);
-
- memReq = new MemReq;
- memReq->data = new uint8_t[64];
- status = Running;
-}
-
-template <class Impl>
-std::string
-InorderBackEnd<Impl>::name() const
-{
- return cpu->name() + ".inorderbackend";
-}
-
-template <class Impl>
-void
-InorderBackEnd<Impl>::setXC(ExecContext *xc_ptr)
-{
- xc = xc_ptr;
- memReq->xc = xc;
-}
-
-template <class Impl>
-void
-InorderBackEnd<Impl>::setThreadState(OzoneThreadState<Impl> *thread_ptr)
-{
- thread = thread_ptr;
- thread->setFuncExeInst(0);
-}
-
-#if FULL_SYSTEM
-template <class Impl>
-void
-InorderBackEnd<Impl>::checkInterrupts()
-{
- //Check if there are any outstanding interrupts
- //Handle the interrupts
- int ipl = 0;
- int summary = 0;
-
- cpu->checkInterrupts = false;
-
- if (thread->readMiscReg(IPR_ASTRR))
- panic("asynchronous traps not implemented\n");
-
- if (thread->readMiscReg(IPR_SIRR)) {
- for (int i = INTLEVEL_SOFTWARE_MIN;
- i < INTLEVEL_SOFTWARE_MAX; i++) {
- if (thread->readMiscReg(IPR_SIRR) & (ULL(1) << i)) {
- // See table 4-19 of the 21164 hardware reference
- ipl = (i - INTLEVEL_SOFTWARE_MIN) + 1;
- summary |= (ULL(1) << i);
- }
- }
- }
-
- uint64_t interrupts = cpu->intr_status();
-
- if (interrupts) {
- for (int i = INTLEVEL_EXTERNAL_MIN;
- i < INTLEVEL_EXTERNAL_MAX; i++) {
- if (interrupts & (ULL(1) << i)) {
- // See table 4-19 of the 21164 hardware reference
- ipl = i;
- summary |= (ULL(1) << i);
- }
- }
- }
-
- if (ipl && ipl > thread->readMiscReg(IPR_IPLR)) {
- thread->inSyscall = true;
-
- thread->setMiscReg(IPR_ISR, summary);
- thread->setMiscReg(IPR_INTID, ipl);
- Fault(new InterruptFault)->invoke(xc);
- DPRINTF(Flow, "Interrupt! IPLR=%d ipl=%d summary=%x\n",
- thread->readMiscReg(IPR_IPLR), ipl, summary);
-
- // May need to go 1 inst prior
- squashPending = true;
-
- thread->inSyscall = false;
-
- setSquashInfoFromXC();
- }
-}
-#endif
-
-template <class Impl>
-void
-InorderBackEnd<Impl>::tick()
-{
- // Squash due to an external source
- // Not sure if this or an interrupt has higher priority
- if (squashPending) {
- squash(squashSeqNum, squashNextPC);
- return;
- }
-
- // if (interrupt) then set thread PC, stall front end, record that
- // I'm waiting for it to drain. (for now just squash)
-#if FULL_SYSTEM
- if (interruptBlocked ||
- (cpu->checkInterrupts &&
- cpu->check_interrupts() &&
- !cpu->inPalMode())) {
- if (!robEmpty()) {
- interruptBlocked = true;
- } else if (robEmpty() && cpu->inPalMode()) {
- // Will need to let the front end continue a bit until
- // we're out of pal mode. Hopefully we never get into an
- // infinite loop...
- interruptBlocked = false;
- } else {
- interruptBlocked = false;
- checkInterrupts();
- return;
- }
- }
-#endif
-
- if (status != DcacheMissLoadStall &&
- status != DcacheMissStoreStall) {
- for (int i = 0; i < width && (*instsAdded) < width; ++i) {
- DynInstPtr inst = frontEnd->getInst();
-
- if (!inst)
- break;
-
- instList.push_back(inst);
-
- (*instsAdded)++;
- }
-
-#if FULL_SYSTEM
- if (faultFromFetch && robEmpty() && frontEnd->isEmpty()) {
- handleFault();
- } else {
- executeInsts();
- }
-#else
- executeInsts();
-#endif
- }
-}
-
-template <class Impl>
-void
-InorderBackEnd<Impl>::executeInsts()
-{
- bool completed_last_inst = true;
- int insts_to_execute = *instsToExecute;
- int freed_regs = 0;
-
- while (insts_to_execute > 0) {
- assert(!instList.empty());
- DynInstPtr inst = instList.front();
-
- commitPC = inst->readPC();
-
- thread->setPC(commitPC);
- thread->setNextPC(inst->readNextPC());
-
-#if FULL_SYSTEM
- int count = 0;
- Addr oldpc;
- do {
- if (count == 0)
- assert(!thread->inSyscall && !thread->trapPending);
- oldpc = thread->readPC();
- cpu->system->pcEventQueue.service(
- thread->getXCProxy());
- count++;
- } while (oldpc != thread->readPC());
- if (count > 1) {
- DPRINTF(IBE, "PC skip function event, stopping commit\n");
- completed_last_inst = false;
- squashPending = true;
- break;
- }
-#endif
-
- Fault inst_fault = NoFault;
-
- if (status == DcacheMissComplete) {
- DPRINTF(IBE, "Completing inst [sn:%lli]\n", inst->seqNum);
- status = Running;
- } else if (inst->isMemRef() && status != DcacheMissComplete &&
- (!inst->isDataPrefetch() && !inst->isInstPrefetch())) {
- DPRINTF(IBE, "Initiating mem op inst [sn:%lli] PC: %#x\n",
- inst->seqNum, inst->readPC());
-
- cacheCompletionEvent.inst = inst;
- inst_fault = inst->initiateAcc();
- if (inst_fault == NoFault &&
- status != DcacheMissLoadStall &&
- status != DcacheMissStoreStall) {
- inst_fault = inst->completeAcc();
- }
- ++thread->funcExeInst;
- } else {
- DPRINTF(IBE, "Executing inst [sn:%lli] PC: %#x\n",
- inst->seqNum, inst->readPC());
- inst_fault = inst->execute();
- ++thread->funcExeInst;
- }
-
- // Will need to be able to break this loop in case the load
- // misses. Split access/complete ops would be useful here
- // with writeback events.
- if (status == DcacheMissLoadStall) {
- *instsToExecute = insts_to_execute;
-
- completed_last_inst = false;
- break;
- } else if (status == DcacheMissStoreStall) {
- // Figure out how to fix this hack. Probably have DcacheMissLoad
- // vs DcacheMissStore.
- *instsToExecute = insts_to_execute;
- completed_last_inst = false;
-/*
- instList.pop_front();
- --insts_to_execute;
- if (inst->traceData) {
- inst->traceData->finalize();
- }
-*/
-
- // Don't really need to stop for a store stall as long as
- // the memory system is able to handle store forwarding
- // and such. Breaking out might help avoid the cache
- // interface becoming blocked.
- break;
- }
-
- inst->setExecuted();
- inst->setCompleted();
- inst->setCanCommit();
-
- instList.pop_front();
-
- --insts_to_execute;
- --(*instsToExecute);
-
- if (inst->traceData) {
- inst->traceData->finalize();
- inst->traceData = NULL;
- }
-
- if (inst_fault != NoFault) {
-#if FULL_SYSTEM
- DPRINTF(IBE, "Inst [sn:%lli] PC %#x has a fault\n",
- inst->seqNum, inst->readPC());
-
- assert(!thread->inSyscall);
-
- thread->inSyscall = true;
-
- // Hack for now; DTB will sometimes need the machine instruction
- // for when faults happen. So we will set it here, prior to the
- // DTB possibly needing it for this translation.
- thread->setInst(
- static_cast<TheISA::MachInst>(inst->staticInst->machInst));
-
- // Consider holding onto the trap and waiting until the trap event
- // happens for this to be executed.
- inst_fault->invoke(xc);
-
- // Exit state update mode to avoid accidental updating.
- thread->inSyscall = false;
-
- squashPending = true;
-
- // Generate trap squash event.
-// generateTrapEvent(tid);
- completed_last_inst = false;
- break;
-#else // !FULL_SYSTEM
- panic("fault (%d) detected @ PC %08p", inst_fault,
- inst->PC);
-#endif // FULL_SYSTEM
- }
-
- for (int i = 0; i < inst->numDestRegs(); ++i) {
- renameTable[inst->destRegIdx(i)] = inst;
- thread->renameTable[inst->destRegIdx(i)] = inst;
- ++freed_regs;
- }
-
- inst->clearDependents();
-
- comm->access(0)->doneSeqNum = inst->seqNum;
-
- if (inst->mispredicted()) {
- squash(inst->seqNum, inst->readNextPC());
-
- thread->setNextPC(inst->readNextPC());
-
- break;
- } else if (squashPending) {
- // Something external happened that caused the CPU to squash.
- // Break out of commit and handle the squash next cycle.
- break;
- }
- // If it didn't mispredict, then it executed fine. Send back its
- // registers and BP info? What about insts that may still have
- // latency, like loads? Probably can send back the information after
- // it is completed.
-
- // keep an instruction count
- cpu->numInst++;
- thread->numInsts++;
- }
-
- frontEnd->addFreeRegs(freed_regs);
-
- assert(insts_to_execute >= 0);
-
- // Should only advance this if I have executed all instructions.
- if (insts_to_execute == 0) {
- numInstsToWB.advance();
- }
-
- // Should I set the PC to the next PC here? What do I set next PC to?
- if (completed_last_inst) {
- thread->setPC(thread->readNextPC());
- thread->setNextPC(thread->readPC() + sizeof(MachInst));
- }
-
- if (squashPending) {
- setSquashInfoFromXC();
- }
-}
-
-template <class Impl>
-void
-InorderBackEnd<Impl>::handleFault()
-{
- DPRINTF(Commit, "Handling fault from fetch\n");
-
- assert(!thread->inSyscall);
-
- thread->inSyscall = true;
-
- // Consider holding onto the trap and waiting until the trap event
- // happens for this to be executed.
- faultFromFetch->invoke(xc);
-
- // Exit state update mode to avoid accidental updating.
- thread->inSyscall = false;
-
- squashPending = true;
-
- setSquashInfoFromXC();
-}
-
-template <class Impl>
-void
-InorderBackEnd<Impl>::squash(const InstSeqNum &squash_num, const Addr &next_PC)
-{
- DPRINTF(IBE, "Squashing from [sn:%lli], setting PC to %#x\n",
- squash_num, next_PC);
-
- InstListIt squash_it = --(instList.end());
-
- int freed_regs = 0;
-
- while (!instList.empty() && (*squash_it)->seqNum > squash_num) {
- DynInstPtr inst = *squash_it;
-
- DPRINTF(IBE, "Squashing instruction PC %#x, [sn:%lli].\n",
- inst->readPC(),
- inst->seqNum);
-
- // May cause problems with misc regs
- freed_regs+= inst->numDestRegs();
- inst->clearDependents();
- squash_it--;
- instList.pop_back();
- }
-
- frontEnd->addFreeRegs(freed_regs);
-
- for (int i = 0; i < latency+1; ++i) {
- numInstsToWB.advance();
- }
-
- squashPending = false;
-
- // Probably want to make sure that this squash is the one that set the
- // thread into inSyscall mode.
- thread->inSyscall = false;
-
- // Tell front end to squash, reset PC to new one.
- frontEnd->squash(squash_num, next_PC);
-
- faultFromFetch = NULL;
-}
-
-template <class Impl>
-void
-InorderBackEnd<Impl>::squashFromXC()
-{
- // Record that I need to squash
- squashPending = true;
-
- thread->inSyscall = true;
-}
-
-template <class Impl>
-void
-InorderBackEnd<Impl>::setSquashInfoFromXC()
-{
- // Need to handle the case of the instList being empty. In that case
- // probably any number works, except maybe with stores in the store buffer.
- squashSeqNum = instList.empty() ? 0 : instList.front()->seqNum - 1;
-
- squashNextPC = thread->PC;
-}
-
-template <class Impl>
-void
-InorderBackEnd<Impl>::fetchFault(Fault &fault)
-{
- faultFromFetch = fault;
-}
-
-template <class Impl>
-void
-InorderBackEnd<Impl>::dumpInsts()
-{
- int num = 0;
- int valid_num = 0;
-
- InstListIt inst_list_it = instList.begin();
-
- cprintf("Inst list size: %i\n", instList.size());
-
- while (inst_list_it != instList.end())
- {
- cprintf("Instruction:%i\n",
- num);
- if (!(*inst_list_it)->isSquashed()) {
- if (!(*inst_list_it)->isIssued()) {
- ++valid_num;
- cprintf("Count:%i\n", valid_num);
- } else if ((*inst_list_it)->isMemRef() &&
- !(*inst_list_it)->memOpDone) {
- // Loads that have not been marked as executed still count
- // towards the total instructions.
- ++valid_num;
- cprintf("Count:%i\n", valid_num);
- }
- }
-
- cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
- "Issued:%i\nSquashed:%i\n",
- (*inst_list_it)->readPC(),
- (*inst_list_it)->seqNum,
- (*inst_list_it)->threadNumber,
- (*inst_list_it)->isIssued(),
- (*inst_list_it)->isSquashed());
-
- if ((*inst_list_it)->isMemRef()) {
- cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
- }
-
- cprintf("\n");
-
- inst_list_it++;
- ++num;
- }
-}
-
-template <class Impl>
-InorderBackEnd<Impl>::DCacheCompletionEvent::DCacheCompletionEvent(
- InorderBackEnd *_be)
- : Event(&mainEventQueue, CPU_Tick_Pri), be(_be)
-{
-// this->setFlags(Event::AutoDelete);
-}
-
-template <class Impl>
-void
-InorderBackEnd<Impl>::DCacheCompletionEvent::process()
-{
- inst->completeAcc();
- be->status = DcacheMissComplete;
-}
-
-template <class Impl>
-const char *
-InorderBackEnd<Impl>::DCacheCompletionEvent::description()
-{
- return "DCache completion event";
-}
+++ /dev/null
-/*
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "cpu/ozone/dyn_inst.hh"
-#include "cpu/ozone/ozone_impl.hh"
-#include "cpu/ozone/simple_impl.hh"
-#include "cpu/ozone/inst_queue_impl.hh"
-
-// Force instantiation of InstructionQueue.
-template class InstQueue<SimpleImpl>;
-template class InstQueue<OzoneImpl>;
+++ /dev/null
-/*
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __CPU_OZONE_INST_QUEUE_HH__
-#define __CPU_OZONE_INST_QUEUE_HH__
-
-#include <list>
-#include <map>
-#include <queue>
-#include <vector>
-
-#include "base/statistics.hh"
-#include "base/timebuf.hh"
-#include "cpu/inst_seq.hh"
-#include "sim/host.hh"
-
-class FUPool;
-class MemInterface;
-
-/**
- * A standard instruction queue class. It holds ready instructions, in
- * order, in seperate priority queues to facilitate the scheduling of
- * instructions. The IQ uses a separate linked list to track dependencies.
- * Similar to the rename map and the free list, it expects that
- * floating point registers have their indices start after the integer
- * registers (ie with 96 int and 96 fp registers, regs 0-95 are integer
- * and 96-191 are fp). This remains true even for both logical and
- * physical register indices. The IQ depends on the memory dependence unit to
- * track when memory operations are ready in terms of ordering; register
- * dependencies are tracked normally. Right now the IQ also handles the
- * execution timing; this is mainly to allow back-to-back scheduling without
- * requiring IEW to be able to peek into the IQ. At the end of the execution
- * latency, the instruction is put into the queue to execute, where it will
- * have the execute() function called on it.
- * @todo: Make IQ able to handle multiple FU pools.
- */
-template <class Impl>
-class InstQueue
-{
- public:
- //Typedefs from the Impl.
- typedef typename Impl::FullCPU FullCPU;
- typedef typename Impl::DynInstPtr DynInstPtr;
- typedef typename Impl::Params Params;
- typedef typename Impl::IssueStruct IssueStruct;
-/*
- typedef typename Impl::CPUPol::IEW IEW;
- typedef typename Impl::CPUPol::MemDepUnit MemDepUnit;
- typedef typename Impl::CPUPol::IssueStruct IssueStruct;
- typedef typename Impl::CPUPol::TimeStruct TimeStruct;
-*/
- // Typedef of iterator through the list of instructions.
- typedef typename std::list<DynInstPtr>::iterator ListIt;
-
- friend class Impl::FullCPU;
-#if 0
- /** FU completion event class. */
- class FUCompletion : public Event {
- private:
- /** Executing instruction. */
- DynInstPtr inst;
-
- /** Index of the FU used for executing. */
- int fuIdx;
-
- /** Pointer back to the instruction queue. */
- InstQueue<Impl> *iqPtr;
-
- public:
- /** Construct a FU completion event. */
- FUCompletion(DynInstPtr &_inst, int fu_idx,
- InstQueue<Impl> *iq_ptr);
-
- virtual void process();
- virtual const char *description();
- };
-#endif
- /** Constructs an IQ. */
- InstQueue(Params *params);
-
- /** Destructs the IQ. */
- ~InstQueue();
-
- /** Returns the name of the IQ. */
- std::string name() const;
-
- /** Registers statistics. */
- void regStats();
-
- /** Sets CPU pointer. */
- void setCPU(FullCPU *_cpu) { cpu = _cpu; }
-#if 0
- /** Sets active threads list. */
- void setActiveThreads(list<unsigned> *at_ptr);
-
- /** Sets the IEW pointer. */
- void setIEW(IEW *iew_ptr) { iewStage = iew_ptr; }
-#endif
- /** Sets the timer buffer between issue and execute. */
- void setIssueToExecuteQueue(TimeBuffer<IssueStruct> *i2eQueue);
-#if 0
- /** Sets the global time buffer. */
- void setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr);
-
- /** Number of entries needed for given amount of threads. */
- int entryAmount(int num_threads);
-
- /** Resets max entries for all threads. */
- void resetEntries();
-#endif
- /** Returns total number of free entries. */
- unsigned numFreeEntries();
-
- /** Returns number of free entries for a thread. */
- unsigned numFreeEntries(unsigned tid);
-
- /** Returns whether or not the IQ is full. */
- bool isFull();
-
- /** Returns whether or not the IQ is full for a specific thread. */
- bool isFull(unsigned tid);
-
- /** Returns if there are any ready instructions in the IQ. */
- bool hasReadyInsts();
-
- /** Inserts a new instruction into the IQ. */
- void insert(DynInstPtr &new_inst);
-
- /** Inserts a new, non-speculative instruction into the IQ. */
- void insertNonSpec(DynInstPtr &new_inst);
-#if 0
- /**
- * Advances the tail of the IQ, used if an instruction is not added to the
- * IQ for scheduling.
- * @todo: Rename this function.
- */
- void advanceTail(DynInstPtr &inst);
-
- /** Process FU completion event. */
- void processFUCompletion(DynInstPtr &inst, int fu_idx);
-#endif
- /**
- * Schedules ready instructions, adding the ready ones (oldest first) to
- * the queue to execute.
- */
- void scheduleReadyInsts();
-
- /** Schedules a single specific non-speculative instruction. */
- void scheduleNonSpec(const InstSeqNum &inst);
-
- /**
- * Commits all instructions up to and including the given sequence number,
- * for a specific thread.
- */
- void commit(const InstSeqNum &inst, unsigned tid = 0);
-
- /** Wakes all dependents of a completed instruction. */
- void wakeDependents(DynInstPtr &completed_inst);
-
- /** Adds a ready memory instruction to the ready list. */
- void addReadyMemInst(DynInstPtr &ready_inst);
-#if 0
- /**
- * Reschedules a memory instruction. It will be ready to issue once
- * replayMemInst() is called.
- */
- void rescheduleMemInst(DynInstPtr &resched_inst);
-
- /** Replays a memory instruction. It must be rescheduled first. */
- void replayMemInst(DynInstPtr &replay_inst);
-#endif
- /** Completes a memory operation. */
- void completeMemInst(DynInstPtr &completed_inst);
-#if 0
- /** Indicates an ordering violation between a store and a load. */
- void violation(DynInstPtr &store, DynInstPtr &faulting_load);
-#endif
- /**
- * Squashes instructions for a thread. Squashing information is obtained
- * from the time buffer.
- */
- void squash(unsigned tid); // Probably want the ISN
-
- /** Returns the number of used entries for a thread. */
- unsigned getCount(unsigned tid) { return count[tid]; };
-
- /** Updates the number of free entries. */
- void updateFreeEntries(int num) { freeEntries += num; }
-
- /** Debug function to print all instructions. */
- void printInsts();
-
- private:
- /** Does the actual squashing. */
- void doSquash(unsigned tid);
-
- /////////////////////////
- // Various pointers
- /////////////////////////
-
- /** Pointer to the CPU. */
- FullCPU *cpu;
-
- /** Cache interface. */
- MemInterface *dcacheInterface;
-#if 0
- /** Pointer to IEW stage. */
- IEW *iewStage;
-
- /** The memory dependence unit, which tracks/predicts memory dependences
- * between instructions.
- */
- MemDepUnit memDepUnit[Impl::MaxThreads];
-#endif
- /** The queue to the execute stage. Issued instructions will be written
- * into it.
- */
- TimeBuffer<IssueStruct> *issueToExecuteQueue;
-#if 0
- /** The backwards time buffer. */
- TimeBuffer<TimeStruct> *timeBuffer;
-
- /** Wire to read information from timebuffer. */
- typename TimeBuffer<TimeStruct>::wire fromCommit;
-
- /** Function unit pool. */
- FUPool *fuPool;
-#endif
- //////////////////////////////////////
- // Instruction lists, ready queues, and ordering
- //////////////////////////////////////
-
- /** List of all the instructions in the IQ (some of which may be issued). */
- std::list<DynInstPtr> instList[Impl::MaxThreads];
-
- /**
- * Struct for comparing entries to be added to the priority queue. This
- * gives reverse ordering to the instructions in terms of sequence
- * numbers: the instructions with smaller sequence numbers (and hence
- * are older) will be at the top of the priority queue.
- */
- struct pqCompare {
- bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const
- {
- return lhs->seqNum > rhs->seqNum;
- }
- };
-
- /**
- * Struct for an IQ entry. It includes the instruction and an iterator
- * to the instruction's spot in the IQ.
- */
- struct IQEntry {
- DynInstPtr inst;
- ListIt iqIt;
- };
-
- typedef std::priority_queue<DynInstPtr, std::vector<DynInstPtr>, pqCompare>
- ReadyInstQueue;
-
- typedef std::map<DynInstPtr, pqCompare> ReadyInstMap;
- typedef typename std::map<DynInstPtr, pqCompare>::iterator ReadyMapIt;
-
- /** List of ready instructions.
- */
- ReadyInstQueue readyInsts;
-
- /** List of non-speculative instructions that will be scheduled
- * once the IQ gets a signal from commit. While it's redundant to
- * have the key be a part of the value (the sequence number is stored
- * inside of DynInst), when these instructions are woken up only
- * the sequence number will be available. Thus it is most efficient to be
- * able to search by the sequence number alone.
- */
- std::map<InstSeqNum, DynInstPtr> nonSpecInsts;
-
- typedef typename std::map<InstSeqNum, DynInstPtr>::iterator NonSpecMapIt;
-#if 0
- /** Entry for the list age ordering by op class. */
- struct ListOrderEntry {
- OpClass queueType;
- InstSeqNum oldestInst;
- };
-
- /** List that contains the age order of the oldest instruction of each
- * ready queue. Used to select the oldest instruction available
- * among op classes.
- */
- std::list<ListOrderEntry> listOrder;
-
- typedef typename std::list<ListOrderEntry>::iterator ListOrderIt;
-
- /** Tracks if each ready queue is on the age order list. */
- bool queueOnList[Num_OpClasses];
-
- /** Iterators of each ready queue. Points to their spot in the age order
- * list.
- */
- ListOrderIt readyIt[Num_OpClasses];
-
- /** Add an op class to the age order list. */
- void addToOrderList(OpClass op_class);
-
- /**
- * Called when the oldest instruction has been removed from a ready queue;
- * this places that ready queue into the proper spot in the age order list.
- */
- void moveToYoungerInst(ListOrderIt age_order_it);
-#endif
- //////////////////////////////////////
- // Various parameters
- //////////////////////////////////////
-#if 0
- /** IQ Resource Sharing Policy */
- enum IQPolicy {
- Dynamic,
- Partitioned,
- Threshold
- };
-
- /** IQ sharing policy for SMT. */
- IQPolicy iqPolicy;
-#endif
- /** Number of Total Threads*/
- unsigned numThreads;
-#if 0
- /** Pointer to list of active threads. */
- list<unsigned> *activeThreads;
-#endif
- /** Per Thread IQ count */
- unsigned count[Impl::MaxThreads];
-
- /** Max IQ Entries Per Thread */
- unsigned maxEntries[Impl::MaxThreads];
-
- /** Number of free IQ entries left. */
- unsigned freeEntries;
-
- /** The number of entries in the instruction queue. */
- unsigned numEntries;
-
- /** The total number of instructions that can be issued in one cycle. */
- unsigned totalWidth;
-#if 0
- /** The number of physical registers in the CPU. */
- unsigned numPhysRegs;
-
- /** The number of physical integer registers in the CPU. */
- unsigned numPhysIntRegs;
-
- /** The number of floating point registers in the CPU. */
- unsigned numPhysFloatRegs;
-#endif
- /** Delay between commit stage and the IQ.
- * @todo: Make there be a distinction between the delays within IEW.
- */
- unsigned commitToIEWDelay;
-
- //////////////////////////////////
- // Variables needed for squashing
- //////////////////////////////////
-
- /** The sequence number of the squashed instruction. */
- InstSeqNum squashedSeqNum[Impl::MaxThreads];
-
- /** Iterator that points to the last instruction that has been squashed.
- * This will not be valid unless the IQ is in the process of squashing.
- */
- ListIt squashIt[Impl::MaxThreads];
-#if 0
- ///////////////////////////////////
- // Dependency graph stuff
- ///////////////////////////////////
-
- class DependencyEntry
- {
- public:
- DependencyEntry()
- : inst(NULL), next(NULL)
- { }
-
- DynInstPtr inst;
- //Might want to include data about what arch. register the
- //dependence is waiting on.
- DependencyEntry *next;
-
- //This function, and perhaps this whole class, stand out a little
- //bit as they don't fit a classification well. I want access
- //to the underlying structure of the linked list, yet at
- //the same time it feels like this should be something abstracted
- //away. So for now it will sit here, within the IQ, until
- //a better implementation is decided upon.
- // This function probably shouldn't be within the entry...
- void insert(DynInstPtr &new_inst);
-
- void remove(DynInstPtr &inst_to_remove);
-
- // Debug variable, remove when done testing.
- static unsigned mem_alloc_counter;
- };
-
- /** Array of linked lists. Each linked list is a list of all the
- * instructions that depend upon a given register. The actual
- * register's index is used to index into the graph; ie all
- * instructions in flight that are dependent upon r34 will be
- * in the linked list of dependGraph[34].
- */
- DependencyEntry *dependGraph;
-
- /** A cache of the recently woken registers. It is 1 if the register
- * has been woken up recently, and 0 if the register has been added
- * to the dependency graph and has not yet received its value. It
- * is basically a secondary scoreboard, and should pretty much mirror
- * the scoreboard that exists in the rename map.
- */
- vector<bool> regScoreboard;
-
- /** Adds an instruction to the dependency graph, as a producer. */
- bool addToDependents(DynInstPtr &new_inst);
-
- /** Adds an instruction to the dependency graph, as a consumer. */
- void createDependency(DynInstPtr &new_inst);
-#endif
- /** Moves an instruction to the ready queue if it is ready. */
- void addIfReady(DynInstPtr &inst);
-
- /** Debugging function to count how many entries are in the IQ. It does
- * a linear walk through the instructions, so do not call this function
- * during normal execution.
- */
- int countInsts();
-#if 0
- /** Debugging function to dump out the dependency graph.
- */
- void dumpDependGraph();
-#endif
- /** Debugging function to dump all the list sizes, as well as print
- * out the list of nonspeculative instructions. Should not be used
- * in any other capacity, but it has no harmful sideaffects.
- */
- void dumpLists();
-
- /** Debugging function to dump out all instructions that are in the
- * IQ.
- */
- void dumpInsts();
-
- /** Stat for number of instructions added. */
- Stats::Scalar<> iqInstsAdded;
- /** Stat for number of non-speculative instructions added. */
- Stats::Scalar<> iqNonSpecInstsAdded;
-// Stats::Scalar<> iqIntInstsAdded;
- /** Stat for number of integer instructions issued. */
- Stats::Scalar<> iqIntInstsIssued;
-// Stats::Scalar<> iqFloatInstsAdded;
- /** Stat for number of floating point instructions issued. */
- Stats::Scalar<> iqFloatInstsIssued;
-// Stats::Scalar<> iqBranchInstsAdded;
- /** Stat for number of branch instructions issued. */
- Stats::Scalar<> iqBranchInstsIssued;
-// Stats::Scalar<> iqMemInstsAdded;
- /** Stat for number of memory instructions issued. */
- Stats::Scalar<> iqMemInstsIssued;
-// Stats::Scalar<> iqMiscInstsAdded;
- /** Stat for number of miscellaneous instructions issued. */
- Stats::Scalar<> iqMiscInstsIssued;
- /** Stat for number of squashed instructions that were ready to issue. */
- Stats::Scalar<> iqSquashedInstsIssued;
- /** Stat for number of squashed instructions examined when squashing. */
- Stats::Scalar<> iqSquashedInstsExamined;
- /** Stat for number of squashed instruction operands examined when
- * squashing.
- */
- Stats::Scalar<> iqSquashedOperandsExamined;
- /** Stat for number of non-speculative instructions removed due to a squash.
- */
- Stats::Scalar<> iqSquashedNonSpecRemoved;
-
-};
-
-#endif //__CPU_OZONE_INST_QUEUE_HH__
+++ /dev/null
-/*
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-// Todo:
-// Current ordering allows for 0 cycle added-to-scheduled. Could maybe fake
-// it; either do in reverse order, or have added instructions put into a
-// different ready queue that, in scheduleRreadyInsts(), gets put onto the
-// normal ready queue. This would however give only a one cycle delay,
-// but probably is more flexible to actually add in a delay parameter than
-// just running it backwards.
-
-#include <vector>
-
-#include "sim/root.hh"
-
-#include "cpu/ozone/inst_queue.hh"
-#if 0
-template <class Impl>
-InstQueue<Impl>::FUCompletion::FUCompletion(DynInstPtr &_inst,
- int fu_idx,
- InstQueue<Impl> *iq_ptr)
- : Event(&mainEventQueue, Stat_Event_Pri),
- inst(_inst), fuIdx(fu_idx), iqPtr(iq_ptr)
-{
- this->setFlags(Event::AutoDelete);
-}
-
-template <class Impl>
-void
-InstQueue<Impl>::FUCompletion::process()
-{
- iqPtr->processFUCompletion(inst, fuIdx);
-}
-
-
-template <class Impl>
-const char *
-InstQueue<Impl>::FUCompletion::description()
-{
- return "Functional unit completion event";
-}
-#endif
-template <class Impl>
-InstQueue<Impl>::InstQueue(Params *params)
- : dcacheInterface(params->dcacheInterface),
-// fuPool(params->fuPool),
- numEntries(params->numIQEntries),
- totalWidth(params->issueWidth),
-// numPhysIntRegs(params->numPhysIntRegs),
-// numPhysFloatRegs(params->numPhysFloatRegs),
- commitToIEWDelay(params->commitToIEWDelay)
-{
-// assert(fuPool);
-
-// numThreads = params->numberOfThreads;
- numThreads = 1;
-
- //Initialize thread IQ counts
- for (int i = 0; i <numThreads; i++) {
- count[i] = 0;
- }
-
- // Initialize the number of free IQ entries.
- freeEntries = numEntries;
-
- // Set the number of physical registers as the number of int + float
-// numPhysRegs = numPhysIntRegs + numPhysFloatRegs;
-
-// DPRINTF(IQ, "There are %i physical registers.\n", numPhysRegs);
-
- //Create an entry for each physical register within the
- //dependency graph.
-// dependGraph = new DependencyEntry[numPhysRegs];
-
- // Resize the register scoreboard.
-// regScoreboard.resize(numPhysRegs);
-/*
- //Initialize Mem Dependence Units
- for (int i = 0; i < numThreads; i++) {
- memDepUnit[i].init(params,i);
- memDepUnit[i].setIQ(this);
- }
-
- // Initialize all the head pointers to point to NULL, and all the
- // entries as unready.
- // Note that in actuality, the registers corresponding to the logical
- // registers start off as ready. However this doesn't matter for the
- // IQ as the instruction should have been correctly told if those
- // registers are ready in rename. Thus it can all be initialized as
- // unready.
- for (int i = 0; i < numPhysRegs; ++i) {
- dependGraph[i].next = NULL;
- dependGraph[i].inst = NULL;
- regScoreboard[i] = false;
- }
-*/
- for (int i = 0; i < numThreads; ++i) {
- squashedSeqNum[i] = 0;
- }
-/*
- for (int i = 0; i < Num_OpClasses; ++i) {
- queueOnList[i] = false;
- readyIt[i] = listOrder.end();
- }
-
- string policy = params->smtIQPolicy;
-
- //Convert string to lowercase
- std::transform(policy.begin(), policy.end(), policy.begin(),
- (int(*)(int)) tolower);
-
- //Figure out resource sharing policy
- if (policy == "dynamic") {
- iqPolicy = Dynamic;
-
- //Set Max Entries to Total ROB Capacity
- for (int i = 0; i < numThreads; i++) {
- maxEntries[i] = numEntries;
- }
-
- } else if (policy == "partitioned") {
- iqPolicy = Partitioned;
-
- //@todo:make work if part_amt doesnt divide evenly.
- int part_amt = numEntries / numThreads;
-
- //Divide ROB up evenly
- for (int i = 0; i < numThreads; i++) {
- maxEntries[i] = part_amt;
- }
-
- DPRINTF(Fetch, "IQ sharing policy set to Partitioned:"
- "%i entries per thread.\n",part_amt);
-
- } else if (policy == "threshold") {
- iqPolicy = Threshold;
-
- double threshold = (double)params->smtIQThreshold / 100;
-
- int thresholdIQ = (int)((double)threshold * numEntries);
-
- //Divide up by threshold amount
- for (int i = 0; i < numThreads; i++) {
- maxEntries[i] = thresholdIQ;
- }
-
- DPRINTF(Fetch, "IQ sharing policy set to Threshold:"
- "%i entries per thread.\n",thresholdIQ);
- } else {
- assert(0 && "Invalid IQ Sharing Policy.Options Are:{Dynamic,"
- "Partitioned, Threshold}");
- }
-*/
-}
-
-template <class Impl>
-InstQueue<Impl>::~InstQueue()
-{
- // Clear the dependency graph
-/*
- DependencyEntry *curr;
- DependencyEntry *prev;
-
- for (int i = 0; i < numPhysRegs; ++i) {
- curr = dependGraph[i].next;
-
- while (curr) {
- DependencyEntry::mem_alloc_counter--;
-
- prev = curr;
- curr = prev->next;
- prev->inst = NULL;
-
- delete prev;
- }
-
- if (dependGraph[i].inst) {
- dependGraph[i].inst = NULL;
- }
-
- dependGraph[i].next = NULL;
- }
-
- assert(DependencyEntry::mem_alloc_counter == 0);
-
- delete [] dependGraph;
-*/
-}
-
-template <class Impl>
-std::string
-InstQueue<Impl>::name() const
-{
- return cpu->name() + ".iq";
-}
-
-template <class Impl>
-void
-InstQueue<Impl>::regStats()
-{
- iqInstsAdded
- .name(name() + ".iqInstsAdded")
- .desc("Number of instructions added to the IQ (excludes non-spec)")
- .prereq(iqInstsAdded);
-
- iqNonSpecInstsAdded
- .name(name() + ".iqNonSpecInstsAdded")
- .desc("Number of non-speculative instructions added to the IQ")
- .prereq(iqNonSpecInstsAdded);
-
-// iqIntInstsAdded;
-
- iqIntInstsIssued
- .name(name() + ".iqIntInstsIssued")
- .desc("Number of integer instructions issued")
- .prereq(iqIntInstsIssued);
-
-// iqFloatInstsAdded;
-
- iqFloatInstsIssued
- .name(name() + ".iqFloatInstsIssued")
- .desc("Number of float instructions issued")
- .prereq(iqFloatInstsIssued);
-
-// iqBranchInstsAdded;
-
- iqBranchInstsIssued
- .name(name() + ".iqBranchInstsIssued")
- .desc("Number of branch instructions issued")
- .prereq(iqBranchInstsIssued);
-
-// iqMemInstsAdded;
-
- iqMemInstsIssued
- .name(name() + ".iqMemInstsIssued")
- .desc("Number of memory instructions issued")
- .prereq(iqMemInstsIssued);
-
-// iqMiscInstsAdded;
-
- iqMiscInstsIssued
- .name(name() + ".iqMiscInstsIssued")
- .desc("Number of miscellaneous instructions issued")
- .prereq(iqMiscInstsIssued);
-
- iqSquashedInstsIssued
- .name(name() + ".iqSquashedInstsIssued")
- .desc("Number of squashed instructions issued")
- .prereq(iqSquashedInstsIssued);
-
- iqSquashedInstsExamined
- .name(name() + ".iqSquashedInstsExamined")
- .desc("Number of squashed instructions iterated over during squash;"
- " mainly for profiling")
- .prereq(iqSquashedInstsExamined);
-
- iqSquashedOperandsExamined
- .name(name() + ".iqSquashedOperandsExamined")
- .desc("Number of squashed operands that are examined and possibly "
- "removed from graph")
- .prereq(iqSquashedOperandsExamined);
-
- iqSquashedNonSpecRemoved
- .name(name() + ".iqSquashedNonSpecRemoved")
- .desc("Number of squashed non-spec instructions that were removed")
- .prereq(iqSquashedNonSpecRemoved);
-/*
- for ( int i=0; i < numThreads; i++) {
- // Tell mem dependence unit to reg stats as well.
- memDepUnit[i].regStats();
- }
-*/
-}
-/*
-template <class Impl>
-void
-InstQueue<Impl>::setActiveThreads(list<unsigned> *at_ptr)
-{
- DPRINTF(IQ, "Setting active threads list pointer.\n");
- activeThreads = at_ptr;
-}
-*/
-template <class Impl>
-void
-InstQueue<Impl>::setIssueToExecuteQueue(TimeBuffer<IssueStruct> *i2e_ptr)
-{
- DPRINTF(IQ, "Set the issue to execute queue.\n");
- issueToExecuteQueue = i2e_ptr;
-}
-/*
-template <class Impl>
-void
-InstQueue<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr)
-{
- DPRINTF(IQ, "Set the time buffer.\n");
- timeBuffer = tb_ptr;
-
- fromCommit = timeBuffer->getWire(-commitToIEWDelay);
-}
-
-template <class Impl>
-int
-InstQueue<Impl>::entryAmount(int num_threads)
-{
- if (iqPolicy == Partitioned) {
- return numEntries / num_threads;
- } else {
- return 0;
- }
-}
-
-
-template <class Impl>
-void
-InstQueue<Impl>::resetEntries()
-{
- if (iqPolicy != Dynamic || numThreads > 1) {
- int active_threads = (*activeThreads).size();
-
- list<unsigned>::iterator threads = (*activeThreads).begin();
- list<unsigned>::iterator list_end = (*activeThreads).end();
-
- while (threads != list_end) {
- if (iqPolicy == Partitioned) {
- maxEntries[*threads++] = numEntries / active_threads;
- } else if(iqPolicy == Threshold && active_threads == 1) {
- maxEntries[*threads++] = numEntries;
- }
- }
- }
-}
-*/
-template <class Impl>
-unsigned
-InstQueue<Impl>::numFreeEntries()
-{
- return freeEntries;
-}
-
-template <class Impl>
-unsigned
-InstQueue<Impl>::numFreeEntries(unsigned tid)
-{
- return maxEntries[tid] - count[tid];
-}
-
-// Might want to do something more complex if it knows how many instructions
-// will be issued this cycle.
-template <class Impl>
-bool
-InstQueue<Impl>::isFull()
-{
- if (freeEntries == 0) {
- return(true);
- } else {
- return(false);
- }
-}
-
-template <class Impl>
-bool
-InstQueue<Impl>::isFull(unsigned tid)
-{
- if (numFreeEntries(tid) == 0) {
- return(true);
- } else {
- return(false);
- }
-}
-
-template <class Impl>
-bool
-InstQueue<Impl>::hasReadyInsts()
-{
-/*
- if (!listOrder.empty()) {
- return true;
- }
-
- for (int i = 0; i < Num_OpClasses; ++i) {
- if (!readyInsts[i].empty()) {
- return true;
- }
- }
-
- return false;
-*/
- return readyInsts.empty();
-}
-
-template <class Impl>
-void
-InstQueue<Impl>::insert(DynInstPtr &new_inst)
-{
- // Make sure the instruction is valid
- assert(new_inst);
-
- DPRINTF(IQ, "Adding instruction PC %#x to the IQ.\n",
- new_inst->readPC());
-
- // Check if there are any free entries. Panic if there are none.
- // Might want to have this return a fault in the future instead of
- // panicing.
- assert(freeEntries != 0);
-
- instList[new_inst->threadNumber].push_back(new_inst);
-
- // Decrease the number of free entries.
- --freeEntries;
-
- //Mark Instruction as in IQ
-// new_inst->setInIQ();
-/*
- // Look through its source registers (physical regs), and mark any
- // dependencies.
- addToDependents(new_inst);
-
- // Have this instruction set itself as the producer of its destination
- // register(s).
- createDependency(new_inst);
-*/
- // If it's a memory instruction, add it to the memory dependency
- // unit.
-// if (new_inst->isMemRef()) {
-// memDepUnit[new_inst->threadNumber].insert(new_inst);
-// } else {
- // If the instruction is ready then add it to the ready list.
- addIfReady(new_inst);
-// }
-
- ++iqInstsAdded;
-
-
- //Update Thread IQ Count
- count[new_inst->threadNumber]++;
-
- assert(freeEntries == (numEntries - countInsts()));
-}
-
-template <class Impl>
-void
-InstQueue<Impl>::insertNonSpec(DynInstPtr &new_inst)
-{
- nonSpecInsts[new_inst->seqNum] = new_inst;
-
- // @todo: Clean up this code; can do it by setting inst as unable
- // to issue, then calling normal insert on the inst.
-
- // Make sure the instruction is valid
- assert(new_inst);
-
- DPRINTF(IQ, "Adding instruction PC %#x to the IQ.\n",
- new_inst->readPC());
-
- // Check if there are any free entries. Panic if there are none.
- // Might want to have this return a fault in the future instead of
- // panicing.
- assert(freeEntries != 0);
-
- instList[new_inst->threadNumber].push_back(new_inst);
-
- // Decrease the number of free entries.
- --freeEntries;
-
- //Mark Instruction as in IQ
-// new_inst->setInIQ();
-/*
- // Have this instruction set itself as the producer of its destination
- // register(s).
- createDependency(new_inst);
-
- // If it's a memory instruction, add it to the memory dependency
- // unit.
- if (new_inst->isMemRef()) {
- memDepUnit[new_inst->threadNumber].insertNonSpec(new_inst);
- }
-*/
- ++iqNonSpecInstsAdded;
-
- //Update Thread IQ Count
- count[new_inst->threadNumber]++;
-
- assert(freeEntries == (numEntries - countInsts()));
-}
-/*
-template <class Impl>
-void
-InstQueue<Impl>::advanceTail(DynInstPtr &inst)
-{
- // Have this instruction set itself as the producer of its destination
- // register(s).
- createDependency(inst);
-}
-
-template <class Impl>
-void
-InstQueue<Impl>::addToOrderList(OpClass op_class)
-{
- assert(!readyInsts[op_class].empty());
-
- ListOrderEntry queue_entry;
-
- queue_entry.queueType = op_class;
-
- queue_entry.oldestInst = readyInsts[op_class].top()->seqNum;
-
- ListOrderIt list_it = listOrder.begin();
- ListOrderIt list_end_it = listOrder.end();
-
- while (list_it != list_end_it) {
- if ((*list_it).oldestInst > queue_entry.oldestInst) {
- break;
- }
-
- list_it++;
- }
-
- readyIt[op_class] = listOrder.insert(list_it, queue_entry);
- queueOnList[op_class] = true;
-}
-
-template <class Impl>
-void
-InstQueue<Impl>::moveToYoungerInst(ListOrderIt list_order_it)
-{
- // Get iterator of next item on the list
- // Delete the original iterator
- // Determine if the next item is either the end of the list or younger
- // than the new instruction. If so, then add in a new iterator right here.
- // If not, then move along.
- ListOrderEntry queue_entry;
- OpClass op_class = (*list_order_it).queueType;
- ListOrderIt next_it = list_order_it;
-
- ++next_it;
-
- queue_entry.queueType = op_class;
- queue_entry.oldestInst = readyInsts[op_class].top()->seqNum;
-
- while (next_it != listOrder.end() &&
- (*next_it).oldestInst < queue_entry.oldestInst) {
- ++next_it;
- }
-
- readyIt[op_class] = listOrder.insert(next_it, queue_entry);
-}
-
-template <class Impl>
-void
-InstQueue<Impl>::processFUCompletion(DynInstPtr &inst, int fu_idx)
-{
- // The CPU could have been sleeping until this op completed (*extremely*
- // long latency op). Wake it if it was. This may be overkill.
- iewStage->wakeCPU();
-
- fuPool->freeUnit(fu_idx);
-
- int &size = issueToExecuteQueue->access(0)->size;
-
- issueToExecuteQueue->access(0)->insts[size++] = inst;
-}
-*/
-// @todo: Figure out a better way to remove the squashed items from the
-// lists. Checking the top item of each list to see if it's squashed
-// wastes time and forces jumps.
-template <class Impl>
-void
-InstQueue<Impl>::scheduleReadyInsts()
-{
- DPRINTF(IQ, "Attempting to schedule ready instructions from "
- "the IQ.\n");
-
-// IssueStruct *i2e_info = issueToExecuteQueue->access(0);
-/*
- // Will need to reorder the list if either a queue is not on the list,
- // or it has an older instruction than last time.
- for (int i = 0; i < Num_OpClasses; ++i) {
- if (!readyInsts[i].empty()) {
- if (!queueOnList[i]) {
- addToOrderList(OpClass(i));
- } else if (readyInsts[i].top()->seqNum <
- (*readyIt[i]).oldestInst) {
- listOrder.erase(readyIt[i]);
- addToOrderList(OpClass(i));
- }
- }
- }
-
- // Have iterator to head of the list
- // While I haven't exceeded bandwidth or reached the end of the list,
- // Try to get a FU that can do what this op needs.
- // If successful, change the oldestInst to the new top of the list, put
- // the queue in the proper place in the list.
- // Increment the iterator.
- // This will avoid trying to schedule a certain op class if there are no
- // FUs that handle it.
- ListOrderIt order_it = listOrder.begin();
- ListOrderIt order_end_it = listOrder.end();
- int total_issued = 0;
- int exec_queue_slot = i2e_info->size;
-
- while (exec_queue_slot < totalWidth && order_it != order_end_it) {
- OpClass op_class = (*order_it).queueType;
-
- assert(!readyInsts[op_class].empty());
-
- DynInstPtr issuing_inst = readyInsts[op_class].top();
-
- assert(issuing_inst->seqNum == (*order_it).oldestInst);
-
- if (issuing_inst->isSquashed()) {
- readyInsts[op_class].pop();
-
- if (!readyInsts[op_class].empty()) {
- moveToYoungerInst(order_it);
- } else {
- readyIt[op_class] = listOrder.end();
- queueOnList[op_class] = false;
- }
-
- listOrder.erase(order_it++);
-
- ++iqSquashedInstsIssued;
-
- continue;
- }
-
- int idx = fuPool->getUnit(op_class);
-
- if (idx != -1) {
- int op_latency = fuPool->getOpLatency(op_class);
-
- if (op_latency == 1) {
- i2e_info->insts[exec_queue_slot++] = issuing_inst;
- i2e_info->size++;
-
- // Add the FU onto the list of FU's to be freed next cycle.
- fuPool->freeUnit(idx);
- } else {
- int issue_latency = fuPool->getIssueLatency(op_class);
-
- if (issue_latency > 1) {
- // Generate completion event for the FU
- FUCompletion *execution = new FUCompletion(issuing_inst,
- idx, this);
-
- execution->schedule(curTick + issue_latency - 1);
- } else {
- i2e_info->insts[exec_queue_slot++] = issuing_inst;
- i2e_info->size++;
-
- // Add the FU onto the list of FU's to be freed next cycle.
- fuPool->freeUnit(idx);
- }
- }
-
- DPRINTF(IQ, "Thread %i: Issuing instruction PC %#x "
- "[sn:%lli]\n",
- issuing_inst->threadNumber, issuing_inst->readPC(),
- issuing_inst->seqNum);
-
- readyInsts[op_class].pop();
-
- if (!readyInsts[op_class].empty()) {
- moveToYoungerInst(order_it);
- } else {
- readyIt[op_class] = listOrder.end();
- queueOnList[op_class] = false;
- }
-
- issuing_inst->setIssued();
- ++total_issued;
-
- if (!issuing_inst->isMemRef()) {
- // Memory instructions can not be freed from the IQ until they
- // complete.
- ++freeEntries;
- count[issuing_inst->threadNumber]--;
- issuing_inst->removeInIQ();
- } else {
- memDepUnit[issuing_inst->threadNumber].issue(issuing_inst);
- }
-
- listOrder.erase(order_it++);
- } else {
- ++order_it;
- }
- }
-
- if (total_issued) {
- cpu->activityThisCycle();
- } else {
- DPRINTF(IQ, "Not able to schedule any instructions.\n");
- }
-*/
-}
-
-template <class Impl>
-void
-InstQueue<Impl>::scheduleNonSpec(const InstSeqNum &inst)
-{
- DPRINTF(IQ, "Marking nonspeculative instruction with sequence "
- "number %i as ready to execute.\n", inst);
-
- NonSpecMapIt inst_it = nonSpecInsts.find(inst);
-
- assert(inst_it != nonSpecInsts.end());
-
-// unsigned tid = (*inst_it).second->threadNumber;
-
- // Mark this instruction as ready to issue.
- (*inst_it).second->setCanIssue();
-
- // Now schedule the instruction.
-// if (!(*inst_it).second->isMemRef()) {
- addIfReady((*inst_it).second);
-// } else {
-// memDepUnit[tid].nonSpecInstReady((*inst_it).second);
-// }
-
- nonSpecInsts.erase(inst_it);
-}
-
-template <class Impl>
-void
-InstQueue<Impl>::commit(const InstSeqNum &inst, unsigned tid)
-{
- /*Need to go through each thread??*/
- DPRINTF(IQ, "[tid:%i]: Committing instructions older than [sn:%i]\n",
- tid,inst);
-
- ListIt iq_it = instList[tid].begin();
-
- while (iq_it != instList[tid].end() &&
- (*iq_it)->seqNum <= inst) {
- ++iq_it;
- instList[tid].pop_front();
- }
-
- assert(freeEntries == (numEntries - countInsts()));
-}
-
-template <class Impl>
-void
-InstQueue<Impl>::wakeDependents(DynInstPtr &completed_inst)
-{
- DPRINTF(IQ, "Waking dependents of completed instruction.\n");
- // Look at the physical destination register of the DynInst
- // and look it up on the dependency graph. Then mark as ready
- // any instructions within the instruction queue.
-/*
- DependencyEntry *curr;
- DependencyEntry *prev;
-*/
- // Tell the memory dependence unit to wake any dependents on this
- // instruction if it is a memory instruction. Also complete the memory
- // instruction at this point since we know it executed fine.
- // @todo: Might want to rename "completeMemInst" to
- // something that indicates that it won't need to be replayed, and call
- // this earlier. Might not be a big deal.
- if (completed_inst->isMemRef()) {
-// memDepUnit[completed_inst->threadNumber].wakeDependents(completed_inst);
- completeMemInst(completed_inst);
- }
- completed_inst->wakeDependents();
-/*
- for (int dest_reg_idx = 0;
- dest_reg_idx < completed_inst->numDestRegs();
- dest_reg_idx++)
- {
- PhysRegIndex dest_reg =
- completed_inst->renamedDestRegIdx(dest_reg_idx);
-
- // Special case of uniq or control registers. They are not
- // handled by the IQ and thus have no dependency graph entry.
- // @todo Figure out a cleaner way to handle this.
- if (dest_reg >= numPhysRegs) {
- continue;
- }
-
- DPRINTF(IQ, "Waking any dependents on register %i.\n",
- (int) dest_reg);
-
- //Maybe abstract this part into a function.
- //Go through the dependency chain, marking the registers as ready
- //within the waiting instructions.
-
- curr = dependGraph[dest_reg].next;
-
- while (curr) {
- DPRINTF(IQ, "Waking up a dependent instruction, PC%#x.\n",
- curr->inst->readPC());
-
- // Might want to give more information to the instruction
- // so that it knows which of its source registers is ready.
- // However that would mean that the dependency graph entries
- // would need to hold the src_reg_idx.
- curr->inst->markSrcRegReady();
-
- addIfReady(curr->inst);
-
- DependencyEntry::mem_alloc_counter--;
-
- prev = curr;
- curr = prev->next;
- prev->inst = NULL;
-
- delete prev;
- }
-
- // Reset the head node now that all of its dependents have been woken
- // up.
- dependGraph[dest_reg].next = NULL;
- dependGraph[dest_reg].inst = NULL;
-
- // Mark the scoreboard as having that register ready.
- regScoreboard[dest_reg] = true;
- }
-*/
-}
-
-template <class Impl>
-void
-InstQueue<Impl>::addReadyMemInst(DynInstPtr &ready_inst)
-{
- OpClass op_class = ready_inst->opClass();
-
- readyInsts.push(ready_inst);
-
- DPRINTF(IQ, "Instruction is ready to issue, putting it onto "
- "the ready list, PC %#x opclass:%i [sn:%lli].\n",
- ready_inst->readPC(), op_class, ready_inst->seqNum);
-}
-/*
-template <class Impl>
-void
-InstQueue<Impl>::rescheduleMemInst(DynInstPtr &resched_inst)
-{
- memDepUnit[resched_inst->threadNumber].reschedule(resched_inst);
-}
-
-template <class Impl>
-void
-InstQueue<Impl>::replayMemInst(DynInstPtr &replay_inst)
-{
- memDepUnit[replay_inst->threadNumber].replay(replay_inst);
-}
-*/
-template <class Impl>
-void
-InstQueue<Impl>::completeMemInst(DynInstPtr &completed_inst)
-{
- int tid = completed_inst->threadNumber;
-
- DPRINTF(IQ, "Completing mem instruction PC:%#x [sn:%lli]\n",
- completed_inst->readPC(), completed_inst->seqNum);
-
- ++freeEntries;
-
-// completed_inst->memOpDone = true;
-
-// memDepUnit[tid].completed(completed_inst);
-
- count[tid]--;
-}
-/*
-template <class Impl>
-void
-InstQueue<Impl>::violation(DynInstPtr &store,
- DynInstPtr &faulting_load)
-{
- memDepUnit[store->threadNumber].violation(store, faulting_load);
-}
-*/
-template <class Impl>
-void
-InstQueue<Impl>::squash(unsigned tid)
-{
- DPRINTF(IQ, "[tid:%i]: Starting to squash instructions in "
- "the IQ.\n", tid);
-
- // Read instruction sequence number of last instruction out of the
- // time buffer.
-// squashedSeqNum[tid] = fromCommit->commitInfo[tid].doneSeqNum;
-
- // Setup the squash iterator to point to the tail.
- squashIt[tid] = instList[tid].end();
- --squashIt[tid];
-
- // Call doSquash if there are insts in the IQ
- if (count[tid] > 0) {
- doSquash(tid);
- }
-
- // Also tell the memory dependence unit to squash.
-// memDepUnit[tid].squash(squashedSeqNum[tid], tid);
-}
-
-template <class Impl>
-void
-InstQueue<Impl>::doSquash(unsigned tid)
-{
- // Make sure the squashed sequence number is valid.
- assert(squashedSeqNum[tid] != 0);
-
- DPRINTF(IQ, "[tid:%i]: Squashing until sequence number %i!\n",
- tid, squashedSeqNum[tid]);
-
- // Squash any instructions younger than the squashed sequence number
- // given.
- while (squashIt[tid] != instList[tid].end() &&
- (*squashIt[tid])->seqNum > squashedSeqNum[tid]) {
-
- DynInstPtr squashed_inst = (*squashIt[tid]);
-
- // Only handle the instruction if it actually is in the IQ and
- // hasn't already been squashed in the IQ.
- if (squashed_inst->threadNumber != tid ||
- squashed_inst->isSquashedInIQ()) {
- --squashIt[tid];
- continue;
- }
-
- if (!squashed_inst->isIssued() ||
- (squashed_inst->isMemRef()/* &&
- !squashed_inst->memOpDone*/)) {
-
- // Remove the instruction from the dependency list.
- if (!squashed_inst->isNonSpeculative()) {
-/*
- for (int src_reg_idx = 0;
- src_reg_idx < squashed_inst->numSrcRegs();
- src_reg_idx++)
- {
- PhysRegIndex src_reg =
- squashed_inst->renamedSrcRegIdx(src_reg_idx);
-
- // Only remove it from the dependency graph if it was
- // placed there in the first place.
- // HACK: This assumes that instructions woken up from the
- // dependency chain aren't informed that a specific src
- // register has become ready. This may not always be true
- // in the future.
- // Instead of doing a linked list traversal, we can just
- // remove these squashed instructions either at issue time,
- // or when the register is overwritten. The only downside
- // to this is it leaves more room for error.
-
- if (!squashed_inst->isReadySrcRegIdx(src_reg_idx) &&
- src_reg < numPhysRegs) {
- dependGraph[src_reg].remove(squashed_inst);
- }
-
-
- ++iqSquashedOperandsExamined;
- }
-*/
- // Might want to remove producers as well.
- } else {
- nonSpecInsts[squashed_inst->seqNum] = NULL;
-
- nonSpecInsts.erase(squashed_inst->seqNum);
-
- ++iqSquashedNonSpecRemoved;
- }
-
- // Might want to also clear out the head of the dependency graph.
-
- // Mark it as squashed within the IQ.
- squashed_inst->setSquashedInIQ();
-
- // @todo: Remove this hack where several statuses are set so the
- // inst will flow through the rest of the pipeline.
- squashed_inst->setIssued();
- squashed_inst->setCanCommit();
-// squashed_inst->removeInIQ();
-
- //Update Thread IQ Count
- count[squashed_inst->threadNumber]--;
-
- ++freeEntries;
-
- if (numThreads > 1) {
- DPRINTF(IQ, "[tid:%i]: Instruction PC %#x squashed.\n",
- tid, squashed_inst->readPC());
- } else {
- DPRINTF(IQ, "Instruction PC %#x squashed.\n",
- squashed_inst->readPC());
- }
- }
-
- --squashIt[tid];
- ++iqSquashedInstsExamined;
- }
-}
-/*
-template <class Impl>
-void
-InstQueue<Impl>::DependencyEntry::insert(DynInstPtr &new_inst)
-{
- //Add this new, dependent instruction at the head of the dependency
- //chain.
-
- // First create the entry that will be added to the head of the
- // dependency chain.
- DependencyEntry *new_entry = new DependencyEntry;
- new_entry->next = this->next;
- new_entry->inst = new_inst;
-
- // Then actually add it to the chain.
- this->next = new_entry;
-
- ++mem_alloc_counter;
-}
-
-template <class Impl>
-void
-InstQueue<Impl>::DependencyEntry::remove(DynInstPtr &inst_to_remove)
-{
- DependencyEntry *prev = this;
- DependencyEntry *curr = this->next;
-
- // Make sure curr isn't NULL. Because this instruction is being
- // removed from a dependency list, it must have been placed there at
- // an earlier time. The dependency chain should not be empty,
- // unless the instruction dependent upon it is already ready.
- if (curr == NULL) {
- return;
- }
-
- // Find the instruction to remove within the dependency linked list.
- while (curr->inst != inst_to_remove) {
- prev = curr;
- curr = curr->next;
-
- assert(curr != NULL);
- }
-
- // Now remove this instruction from the list.
- prev->next = curr->next;
-
- --mem_alloc_counter;
-
- // Could push this off to the destructor of DependencyEntry
- curr->inst = NULL;
-
- delete curr;
-}
-
-template <class Impl>
-bool
-InstQueue<Impl>::addToDependents(DynInstPtr &new_inst)
-{
- // Loop through the instruction's source registers, adding
- // them to the dependency list if they are not ready.
- int8_t total_src_regs = new_inst->numSrcRegs();
- bool return_val = false;
-
- for (int src_reg_idx = 0;
- src_reg_idx < total_src_regs;
- src_reg_idx++)
- {
- // Only add it to the dependency graph if it's not ready.
- if (!new_inst->isReadySrcRegIdx(src_reg_idx)) {
- PhysRegIndex src_reg = new_inst->renamedSrcRegIdx(src_reg_idx);
-
- // Check the IQ's scoreboard to make sure the register
- // hasn't become ready while the instruction was in flight
- // between stages. Only if it really isn't ready should
- // it be added to the dependency graph.
- if (src_reg >= numPhysRegs) {
- continue;
- } else if (regScoreboard[src_reg] == false) {
- DPRINTF(IQ, "Instruction PC %#x has src reg %i that "
- "is being added to the dependency chain.\n",
- new_inst->readPC(), src_reg);
-
- dependGraph[src_reg].insert(new_inst);
-
- // Change the return value to indicate that something
- // was added to the dependency graph.
- return_val = true;
- } else {
- DPRINTF(IQ, "Instruction PC %#x has src reg %i that "
- "became ready before it reached the IQ.\n",
- new_inst->readPC(), src_reg);
- // Mark a register ready within the instruction.
- new_inst->markSrcRegReady();
- }
- }
- }
-
- return return_val;
-}
-
-template <class Impl>
-void
-InstQueue<Impl>::createDependency(DynInstPtr &new_inst)
-{
- //Actually nothing really needs to be marked when an
- //instruction becomes the producer of a register's value,
- //but for convenience a ptr to the producing instruction will
- //be placed in the head node of the dependency links.
- int8_t total_dest_regs = new_inst->numDestRegs();
-
- for (int dest_reg_idx = 0;
- dest_reg_idx < total_dest_regs;
- dest_reg_idx++)
- {
- PhysRegIndex dest_reg = new_inst->renamedDestRegIdx(dest_reg_idx);
-
- // Instructions that use the misc regs will have a reg number
- // higher than the normal physical registers. In this case these
- // registers are not renamed, and there is no need to track
- // dependencies as these instructions must be executed at commit.
- if (dest_reg >= numPhysRegs) {
- continue;
- }
-
- if (dependGraph[dest_reg].next) {
- dumpDependGraph();
- panic("Dependency graph %i not empty!", dest_reg);
- }
-
- dependGraph[dest_reg].inst = new_inst;
-
- // Mark the scoreboard to say it's not yet ready.
- regScoreboard[dest_reg] = false;
- }
-}
-*/
-template <class Impl>
-void
-InstQueue<Impl>::addIfReady(DynInstPtr &inst)
-{
- //If the instruction now has all of its source registers
- // available, then add it to the list of ready instructions.
- if (inst->readyToIssue()) {
-
- //Add the instruction to the proper ready list.
- if (inst->isMemRef()) {
-
- DPRINTF(IQ, "Checking if memory instruction can issue.\n");
-
- // Message to the mem dependence unit that this instruction has
- // its registers ready.
-
-// memDepUnit[inst->threadNumber].regsReady(inst);
-
- return;
- }
-
- OpClass op_class = inst->opClass();
-
- DPRINTF(IQ, "Instruction is ready to issue, putting it onto "
- "the ready list, PC %#x opclass:%i [sn:%lli].\n",
- inst->readPC(), op_class, inst->seqNum);
-
- readyInsts.push(inst);
- }
-}
-
-template <class Impl>
-int
-InstQueue<Impl>::countInsts()
-{
- //ksewell:This works but definitely could use a cleaner write
- //with a more intuitive way of counting. Right now it's
- //just brute force ....
-
-#if 0
- int total_insts = 0;
-
- for (int i = 0; i < numThreads; ++i) {
- ListIt count_it = instList[i].begin();
-
- while (count_it != instList[i].end()) {
- if (!(*count_it)->isSquashed() && !(*count_it)->isSquashedInIQ()) {
- if (!(*count_it)->isIssued()) {
- ++total_insts;
- } else if ((*count_it)->isMemRef() &&
- !(*count_it)->memOpDone) {
- // Loads that have not been marked as executed still count
- // towards the total instructions.
- ++total_insts;
- }
- }
-
- ++count_it;
- }
- }
-
- return total_insts;
-#else
- return numEntries - freeEntries;
-#endif
-}
-/*
-template <class Impl>
-void
-InstQueue<Impl>::dumpDependGraph()
-{
- DependencyEntry *curr;
-
- for (int i = 0; i < numPhysRegs; ++i)
- {
- curr = &dependGraph[i];
-
- if (curr->inst) {
- cprintf("dependGraph[%i]: producer: %#x [sn:%lli] consumer: ",
- i, curr->inst->readPC(), curr->inst->seqNum);
- } else {
- cprintf("dependGraph[%i]: No producer. consumer: ", i);
- }
-
- while (curr->next != NULL) {
- curr = curr->next;
-
- cprintf("%#x [sn:%lli] ",
- curr->inst->readPC(), curr->inst->seqNum);
- }
-
- cprintf("\n");
- }
-}
-*/
-template <class Impl>
-void
-InstQueue<Impl>::dumpLists()
-{
- for (int i = 0; i < Num_OpClasses; ++i) {
- cprintf("Ready list %i size: %i\n", i, readyInsts.size());
-
- cprintf("\n");
- }
-
- cprintf("Non speculative list size: %i\n", nonSpecInsts.size());
-
- NonSpecMapIt non_spec_it = nonSpecInsts.begin();
- NonSpecMapIt non_spec_end_it = nonSpecInsts.end();
-
- cprintf("Non speculative list: ");
-
- while (non_spec_it != non_spec_end_it) {
- cprintf("%#x [sn:%lli]", (*non_spec_it).second->readPC(),
- (*non_spec_it).second->seqNum);
- ++non_spec_it;
- }
-
- cprintf("\n");
-/*
- ListOrderIt list_order_it = listOrder.begin();
- ListOrderIt list_order_end_it = listOrder.end();
- int i = 1;
-
- cprintf("List order: ");
-
- while (list_order_it != list_order_end_it) {
- cprintf("%i OpClass:%i [sn:%lli] ", i, (*list_order_it).queueType,
- (*list_order_it).oldestInst);
-
- ++list_order_it;
- ++i;
- }
-*/
- cprintf("\n");
-}
-
-
-template <class Impl>
-void
-InstQueue<Impl>::dumpInsts()
-{
- for (int i = 0; i < numThreads; ++i) {
-// int num = 0;
-// int valid_num = 0;
-/*
- ListIt inst_list_it = instList[i].begin();
-
- while (inst_list_it != instList[i].end())
- {
- cprintf("Instruction:%i\n",
- num);
- if (!(*inst_list_it)->isSquashed()) {
- if (!(*inst_list_it)->isIssued()) {
- ++valid_num;
- cprintf("Count:%i\n", valid_num);
- } else if ((*inst_list_it)->isMemRef() &&
- !(*inst_list_it)->memOpDone) {
- // Loads that have not been marked as executed still count
- // towards the total instructions.
- ++valid_num;
- cprintf("Count:%i\n", valid_num);
- }
- }
-
- cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
- "Issued:%i\nSquashed:%i\n",
- (*inst_list_it)->readPC(),
- (*inst_list_it)->seqNum,
- (*inst_list_it)->threadNumber,
- (*inst_list_it)->isIssued(),
- (*inst_list_it)->isSquashed());
-
- if ((*inst_list_it)->isMemRef()) {
- cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
- }
-
- cprintf("\n");
-
- inst_list_it++;
- ++num;
- }
-*/
- }
-}
+++ /dev/null
-/*
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "cpu/ozone/ozone_impl.hh"
-#include "cpu/ozone/lsq_unit_impl.hh"
-
-// Force the instantiation of LDSTQ for all the implementations we care about.
-template class OzoneLSQ<OzoneImpl>;
-
+++ /dev/null
-/*
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __CPU_OZONE_LSQ_UNIT_HH__
-#define __CPU_OZONE_LSQ_UNIT_HH__
-
-#include <map>
-#include <queue>
-#include <algorithm>
-
-#include "arch/faults.hh"
-#include "arch/isa_traits.hh"
-#include "config/full_system.hh"
-#include "base/hashmap.hh"
-#include "cpu/inst_seq.hh"
-#include "mem/mem_interface.hh"
-//#include "mem/page_table.hh"
-#include "sim/sim_object.hh"
-
-class PageTable;
-
-/**
- * Class that implements the actual LQ and SQ for each specific thread.
- * Both are circular queues; load entries are freed upon committing, while
- * store entries are freed once they writeback. The LSQUnit tracks if there
- * are memory ordering violations, and also detects partial load to store
- * forwarding cases (a store only has part of a load's data) that requires
- * the load to wait until the store writes back. In the former case it
- * holds onto the instruction until the dependence unit looks at it, and
- * in the latter it stalls the LSQ until the store writes back. At that
- * point the load is replayed.
- */
-template <class Impl>
-class OzoneLSQ {
- public:
- typedef typename Impl::Params Params;
- typedef typename Impl::FullCPU FullCPU;
- typedef typename Impl::BackEnd BackEnd;
- typedef typename Impl::DynInstPtr DynInstPtr;
- typedef typename Impl::IssueStruct IssueStruct;
-
- typedef TheISA::IntReg IntReg;
-
- typedef typename std::map<InstSeqNum, DynInstPtr>::iterator LdMapIt;
-
- private:
- class StoreCompletionEvent : public Event {
- public:
- /** Constructs a store completion event. */
- StoreCompletionEvent(int store_idx, Event *wb_event, OzoneLSQ *lsq_ptr);
-
- /** Processes the store completion event. */
- void process();
-
- /** Returns the description of this event. */
- const char *description();
-
- private:
- /** The store index of the store being written back. */
- int storeIdx;
- /** The writeback event for the store. Needed for store
- * conditionals.
- */
- Event *wbEvent;
- /** The pointer to the LSQ unit that issued the store. */
- OzoneLSQ<Impl> *lsqPtr;
- };
-
- friend class StoreCompletionEvent;
-
- public:
- /** Constructs an LSQ unit. init() must be called prior to use. */
- OzoneLSQ();
-
- /** Initializes the LSQ unit with the specified number of entries. */
- void init(Params *params, unsigned maxLQEntries,
- unsigned maxSQEntries, unsigned id);
-
- /** Returns the name of the LSQ unit. */
- std::string name() const;
-
- /** Sets the CPU pointer. */
- void setCPU(FullCPU *cpu_ptr)
- { cpu = cpu_ptr; }
-
- /** Sets the back-end stage pointer. */
- void setBE(BackEnd *be_ptr)
- { be = be_ptr; }
-
- /** Sets the page table pointer. */
- void setPageTable(PageTable *pt_ptr);
-
- /** Ticks the LSQ unit, which in this case only resets the number of
- * used cache ports.
- * @todo: Move the number of used ports up to the LSQ level so it can
- * be shared by all LSQ units.
- */
- void tick() { usedPorts = 0; }
-
- /** Inserts an instruction. */
- void insert(DynInstPtr &inst);
- /** Inserts a load instruction. */
- void insertLoad(DynInstPtr &load_inst);
- /** Inserts a store instruction. */
- void insertStore(DynInstPtr &store_inst);
-
- /** Executes a load instruction. */
- Fault executeLoad(DynInstPtr &inst);
-
- Fault executeLoad(int lq_idx);
- /** Executes a store instruction. */
- Fault executeStore(DynInstPtr &inst);
-
- /** Commits the head load. */
- void commitLoad();
- /** Commits a specific load, given by the sequence number. */
- void commitLoad(InstSeqNum &inst);
- /** Commits loads older than a specific sequence number. */
- void commitLoads(InstSeqNum &youngest_inst);
-
- /** Commits stores older than a specific sequence number. */
- void commitStores(InstSeqNum &youngest_inst);
-
- /** Writes back stores. */
- void writebackStores();
-
- // @todo: Include stats in the LSQ unit.
- //void regStats();
-
- /** Clears all the entries in the LQ. */
- void clearLQ();
-
- /** Clears all the entries in the SQ. */
- void clearSQ();
-
- /** Resizes the LQ to a given size. */
- void resizeLQ(unsigned size);
-
- /** Resizes the SQ to a given size. */
- void resizeSQ(unsigned size);
-
- /** Squashes all instructions younger than a specific sequence number. */
- void squash(const InstSeqNum &squashed_num);
-
- /** Returns if there is a memory ordering violation. Value is reset upon
- * call to getMemDepViolator().
- */
- bool violation() { return memDepViolator; }
-
- /** Returns the memory ordering violator. */
- DynInstPtr getMemDepViolator();
-
- /** Returns if a load became blocked due to the memory system. It clears
- * the bool's value upon this being called.
- */
- inline bool loadBlocked();
-
- /** Returns the number of free entries (min of free LQ and SQ entries). */
- unsigned numFreeEntries();
-
- /** Returns the number of loads ready to execute. */
- int numLoadsReady();
-
- /** Returns the number of loads in the LQ. */
- int numLoads() { return loads; }
-
- /** Returns the number of stores in the SQ. */
- int numStores() { return stores; }
-
- /** Returns if either the LQ or SQ is full. */
- bool isFull() { return lqFull() || sqFull(); }
-
- /** Returns if the LQ is full. */
- bool lqFull() { return loads >= (LQEntries - 1); }
-
- /** Returns if the SQ is full. */
- bool sqFull() { return stores >= (SQEntries - 1); }
-
- /** Debugging function to dump instructions in the LSQ. */
- void dumpInsts();
-
- /** Returns the number of instructions in the LSQ. */
- unsigned getCount() { return loads + stores; }
-
- /** Returns if there are any stores to writeback. */
- bool hasStoresToWB() { return storesToWB; }
-
- /** Returns the number of stores to writeback. */
- int numStoresToWB() { return storesToWB; }
-
- /** Returns if the LSQ unit will writeback on this cycle. */
- bool willWB() { return storeQueue[storeWBIdx].canWB &&
- !storeQueue[storeWBIdx].completed &&
- !dcacheInterface->isBlocked(); }
-
- private:
- /** Completes the store at the specified index. */
- void completeStore(int store_idx);
-
- /** Increments the given store index (circular queue). */
- inline void incrStIdx(int &store_idx);
- /** Decrements the given store index (circular queue). */
- inline void decrStIdx(int &store_idx);
- /** Increments the given load index (circular queue). */
- inline void incrLdIdx(int &load_idx);
- /** Decrements the given load index (circular queue). */
- inline void decrLdIdx(int &load_idx);
-
- private:
- /** Pointer to the CPU. */
- FullCPU *cpu;
-
- /** Pointer to the back-end stage. */
- BackEnd *be;
-
- /** Pointer to the D-cache. */
- MemInterface *dcacheInterface;
-
- /** Pointer to the page table. */
- PageTable *pTable;
-
- public:
- struct SQEntry {
- /** Constructs an empty store queue entry. */
- SQEntry()
- : inst(NULL), req(NULL), size(0), data(0),
- canWB(0), committed(0), completed(0)
- { }
-
- /** Constructs a store queue entry for a given instruction. */
- SQEntry(DynInstPtr &_inst)
- : inst(_inst), req(NULL), size(0), data(0),
- canWB(0), committed(0), completed(0)
- { }
-
- /** The store instruction. */
- DynInstPtr inst;
- /** The memory request for the store. */
- MemReqPtr req;
- /** The size of the store. */
- int size;
- /** The store data. */
- IntReg data;
- /** Whether or not the store can writeback. */
- bool canWB;
- /** Whether or not the store is committed. */
- bool committed;
- /** Whether or not the store is completed. */
- bool completed;
- };
-
- enum Status {
- Running,
- Idle,
- DcacheMissStall,
- DcacheMissSwitch
- };
-
- private:
- /** The OzoneLSQ thread id. */
- unsigned lsqID;
-
- /** The status of the LSQ unit. */
- Status _status;
-
- /** The store queue. */
- std::vector<SQEntry> storeQueue;
-
- /** The load queue. */
- std::vector<DynInstPtr> loadQueue;
-
- // Consider making these 16 bits
- /** The number of LQ entries. */
- unsigned LQEntries;
- /** The number of SQ entries. */
- unsigned SQEntries;
-
- /** The number of load instructions in the LQ. */
- int loads;
- /** The number of store instructions in the SQ (excludes those waiting to
- * writeback).
- */
- int stores;
- /** The number of store instructions in the SQ waiting to writeback. */
- int storesToWB;
-
- /** The index of the head instruction in the LQ. */
- int loadHead;
- /** The index of the tail instruction in the LQ. */
- int loadTail;
-
- /** The index of the head instruction in the SQ. */
- int storeHead;
- /** The index of the first instruction that is ready to be written back,
- * and has not yet been written back.
- */
- int storeWBIdx;
- /** The index of the tail instruction in the SQ. */
- int storeTail;
-
- /// @todo Consider moving to a more advanced model with write vs read ports
- /** The number of cache ports available each cycle. */
- int cachePorts;
-
- /** The number of used cache ports in this cycle. */
- int usedPorts;
-
- //list<InstSeqNum> mshrSeqNums;
-
- //Stats::Scalar<> dcacheStallCycles;
- Counter lastDcacheStall;
-
- /** Wire to read information from the issue stage time queue. */
- typename TimeBuffer<IssueStruct>::wire fromIssue;
-
- // Make these per thread?
- /** Whether or not the LSQ is stalled. */
- bool stalled;
- /** The store that causes the stall due to partial store to load
- * forwarding.
- */
- InstSeqNum stallingStoreIsn;
- /** The index of the above store. */
- int stallingLoadIdx;
-
- /** Whether or not a load is blocked due to the memory system. It is
- * cleared when this value is checked via loadBlocked().
- */
- bool isLoadBlocked;
-
- /** The oldest faulting load instruction. */
- DynInstPtr loadFaultInst;
- /** The oldest faulting store instruction. */
- DynInstPtr storeFaultInst;
-
- /** The oldest load that caused a memory ordering violation. */
- DynInstPtr memDepViolator;
-
- // Will also need how many read/write ports the Dcache has. Or keep track
- // of that in stage that is one level up, and only call executeLoad/Store
- // the appropriate number of times.
-
- public:
- /** Executes the load at the given index. */
- template <class T>
- Fault read(MemReqPtr &req, T &data, int load_idx);
-
- /** Executes the store at the given index. */
- template <class T>
- Fault write(MemReqPtr &req, T &data, int store_idx);
-
- /** Returns the index of the head load instruction. */
- int getLoadHead() { return loadHead; }
- /** Returns the sequence number of the head load instruction. */
- InstSeqNum getLoadHeadSeqNum()
- {
- if (loadQueue[loadHead]) {
- return loadQueue[loadHead]->seqNum;
- } else {
- return 0;
- }
-
- }
-
- /** Returns the index of the head store instruction. */
- int getStoreHead() { return storeHead; }
- /** Returns the sequence number of the head store instruction. */
- InstSeqNum getStoreHeadSeqNum()
- {
- if (storeQueue[storeHead].inst) {
- return storeQueue[storeHead].inst->seqNum;
- } else {
- return 0;
- }
-
- }
-
- /** Returns whether or not the LSQ unit is stalled. */
- bool isStalled() { return stalled; }
-};
-
-template <class Impl>
-template <class T>
-Fault
-OzoneLSQ<Impl>::read(MemReqPtr &req, T &data, int load_idx)
-{
- //Depending on issue2execute delay a squashed load could
- //execute if it is found to be squashed in the same
- //cycle it is scheduled to execute
- assert(loadQueue[load_idx]);
-
- if (loadQueue[load_idx]->isExecuted()) {
- panic("Should not reach this point with split ops!");
-
- memcpy(&data,req->data,req->size);
-
- return NoFault;
- }
-
- // Make sure this isn't an uncacheable access
- // A bit of a hackish way to get uncached accesses to work only if they're
- // at the head of the LSQ and are ready to commit (at the head of the ROB
- // too).
- // @todo: Fix uncached accesses.
- if (req->flags & UNCACHEABLE &&
- (load_idx != loadHead || !loadQueue[load_idx]->readyToCommit())) {
-
- return TheISA::genMachineCheckFault();
- }
-
- // Check the SQ for any previous stores that might lead to forwarding
- int store_idx = loadQueue[load_idx]->sqIdx;
-
- int store_size = 0;
-
- DPRINTF(OzoneLSQ, "Read called, load idx: %i, store idx: %i, "
- "storeHead: %i addr: %#x\n",
- load_idx, store_idx, storeHead, req->paddr);
-
- while (store_idx != -1) {
- // End once we've reached the top of the LSQ
- if (store_idx == storeWBIdx) {
- break;
- }
-
- // Move the index to one younger
- if (--store_idx < 0)
- store_idx += SQEntries;
-
- assert(storeQueue[store_idx].inst);
-
- store_size = storeQueue[store_idx].size;
-
- if (store_size == 0)
- continue;
-
- // Check if the store data is within the lower and upper bounds of
- // addresses that the request needs.
- bool store_has_lower_limit =
- req->vaddr >= storeQueue[store_idx].inst->effAddr;
- bool store_has_upper_limit =
- (req->vaddr + req->size) <= (storeQueue[store_idx].inst->effAddr +
- store_size);
- bool lower_load_has_store_part =
- req->vaddr < (storeQueue[store_idx].inst->effAddr +
- store_size);
- bool upper_load_has_store_part =
- (req->vaddr + req->size) > storeQueue[store_idx].inst->effAddr;
-
- // If the store's data has all of the data needed, we can forward.
- if (store_has_lower_limit && store_has_upper_limit) {
-
- int shift_amt = req->vaddr & (store_size - 1);
- // Assumes byte addressing
- shift_amt = shift_amt << 3;
-
- // Cast this to type T?
- data = storeQueue[store_idx].data >> shift_amt;
-
- req->cmd = Read;
- assert(!req->completionEvent);
- req->completionEvent = NULL;
- req->time = curTick;
- assert(!req->data);
- req->data = new uint8_t[64];
-
- memcpy(req->data, &data, req->size);
-
- DPRINTF(OzoneLSQ, "Forwarding from store idx %i to load to "
- "addr %#x, data %#x\n",
- store_idx, req->vaddr, *(req->data));
-
- typename BackEnd::LdWritebackEvent *wb =
- new typename BackEnd::LdWritebackEvent(loadQueue[load_idx],
- be);
-
- // We'll say this has a 1 cycle load-store forwarding latency
- // for now.
- // FIXME - Need to make this a parameter.
- wb->schedule(curTick);
-
- // Should keep track of stat for forwarded data
- return NoFault;
- } else if ((store_has_lower_limit && lower_load_has_store_part) ||
- (store_has_upper_limit && upper_load_has_store_part) ||
- (lower_load_has_store_part && upper_load_has_store_part)) {
- // This is the partial store-load forwarding case where a store
- // has only part of the load's data.
-
- // If it's already been written back, then don't worry about
- // stalling on it.
- if (storeQueue[store_idx].completed) {
- continue;
- }
-
- // Must stall load and force it to retry, so long as it's the oldest
- // load that needs to do so.
- if (!stalled ||
- (stalled &&
- loadQueue[load_idx]->seqNum <
- loadQueue[stallingLoadIdx]->seqNum)) {
- stalled = true;
- stallingStoreIsn = storeQueue[store_idx].inst->seqNum;
- stallingLoadIdx = load_idx;
- }
-
- // Tell IQ/mem dep unit that this instruction will need to be
- // rescheduled eventually
- be->rescheduleMemInst(loadQueue[load_idx]);
-
- DPRINTF(OzoneLSQ, "Load-store forwarding mis-match. "
- "Store idx %i to load addr %#x\n",
- store_idx, req->vaddr);
-
- return NoFault;
- }
- }
-
-
- // If there's no forwarding case, then go access memory
- DynInstPtr inst = loadQueue[load_idx];
-
- ++usedPorts;
-
- // if we have a cache, do cache access too
- if (dcacheInterface) {
- if (dcacheInterface->isBlocked()) {
- isLoadBlocked = true;
- // No fault occurred, even though the interface is blocked.
- return NoFault;
- }
-
- DPRINTF(OzoneLSQ, "D-cache: PC:%#x reading from paddr:%#x "
- "vaddr:%#x flags:%i\n",
- inst->readPC(), req->paddr, req->vaddr, req->flags);
-
- // Setup MemReq pointer
- req->cmd = Read;
- req->completionEvent = NULL;
- req->time = curTick;
- assert(!req->data);
- req->data = new uint8_t[64];
-
- assert(!req->completionEvent);
- typedef typename BackEnd::LdWritebackEvent LdWritebackEvent;
-
- LdWritebackEvent *wb = new LdWritebackEvent(loadQueue[load_idx], be);
-
- req->completionEvent = wb;
-
- // Do Cache Access
- MemAccessResult result = dcacheInterface->access(req);
-
- // Ugly hack to get an event scheduled *only* if the access is
- // a miss. We really should add first-class support for this
- // at some point.
- // @todo: Probably should support having no events
- if (result != MA_HIT) {
- DPRINTF(OzoneLSQ, "D-cache miss!\n");
- DPRINTF(Activity, "Activity: ld accessing mem miss [sn:%lli]\n",
- inst->seqNum);
-
- lastDcacheStall = curTick;
-
- _status = DcacheMissStall;
-
- wb->setDcacheMiss();
-
- } else {
-// DPRINTF(Activity, "Activity: ld accessing mem hit [sn:%lli]\n",
-// inst->seqNum);
-
- DPRINTF(OzoneLSQ, "D-cache hit!\n");
- }
- } else {
- fatal("Must use D-cache with new memory system");
- }
-
- return NoFault;
-}
-
-template <class Impl>
-template <class T>
-Fault
-OzoneLSQ<Impl>::write(MemReqPtr &req, T &data, int store_idx)
-{
- assert(storeQueue[store_idx].inst);
-
- DPRINTF(OzoneLSQ, "Doing write to store idx %i, addr %#x data %#x"
- " | storeHead:%i [sn:%i]\n",
- store_idx, req->paddr, data, storeHead,
- storeQueue[store_idx].inst->seqNum);
-
- storeQueue[store_idx].req = req;
- storeQueue[store_idx].size = sizeof(T);
- storeQueue[store_idx].data = data;
-
- // This function only writes the data to the store queue, so no fault
- // can happen here.
- return NoFault;
-}
-
-template <class Impl>
-inline bool
-OzoneLSQ<Impl>::loadBlocked()
-{
- bool ret_val = isLoadBlocked;
- isLoadBlocked = false;
- return ret_val;
-}
-
-#endif // __CPU_OZONE_LSQ_UNIT_HH__
+++ /dev/null
-/*
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "arch/isa_traits.hh"
-#include "base/str.hh"
-#include "cpu/ozone/lsq_unit.hh"
-
-template <class Impl>
-OzoneLSQ<Impl>::StoreCompletionEvent::StoreCompletionEvent(int store_idx,
- Event *wb_event,
- OzoneLSQ<Impl> *lsq_ptr)
- : Event(&mainEventQueue),
- storeIdx(store_idx),
- wbEvent(wb_event),
- lsqPtr(lsq_ptr)
-{
- this->setFlags(Event::AutoDelete);
-}
-
-template <class Impl>
-void
-OzoneLSQ<Impl>::StoreCompletionEvent::process()
-{
- DPRINTF(OzoneLSQ, "Cache miss complete for store idx:%i\n", storeIdx);
-
- //lsqPtr->removeMSHR(lsqPtr->storeQueue[storeIdx].inst->seqNum);
-
-// lsqPtr->cpu->wakeCPU();
- if (wbEvent)
- wbEvent->process();
- lsqPtr->completeStore(storeIdx);
-}
-
-template <class Impl>
-const char *
-OzoneLSQ<Impl>::StoreCompletionEvent::description()
-{
- return "LSQ store completion event";
-}
-
-template <class Impl>
-OzoneLSQ<Impl>::OzoneLSQ()
- : loads(0), stores(0), storesToWB(0), stalled(false), isLoadBlocked(false)
-{
-}
-
-template<class Impl>
-void
-OzoneLSQ<Impl>::init(Params *params, unsigned maxLQEntries,
- unsigned maxSQEntries, unsigned id)
-
-{
- DPRINTF(OzoneLSQ, "Creating OzoneLSQ%i object.\n",id);
-
- lsqID = id;
-
- LQEntries = maxLQEntries;
- SQEntries = maxSQEntries;
-
- loadQueue.resize(LQEntries);
- storeQueue.resize(SQEntries);
-
-
- // May want to initialize these entries to NULL
-
- loadHead = loadTail = 0;
-
- storeHead = storeWBIdx = storeTail = 0;
-
- usedPorts = 0;
- cachePorts = params->cachePorts;
-
- dcacheInterface = params->dcacheInterface;
-
- loadFaultInst = storeFaultInst = memDepViolator = NULL;
-}
-
-template<class Impl>
-std::string
-OzoneLSQ<Impl>::name() const
-{
- return "lsqunit";
-}
-
-template<class Impl>
-void
-OzoneLSQ<Impl>::clearLQ()
-{
- loadQueue.clear();
-}
-
-template<class Impl>
-void
-OzoneLSQ<Impl>::clearSQ()
-{
- storeQueue.clear();
-}
-
-template<class Impl>
-void
-OzoneLSQ<Impl>::setPageTable(PageTable *pt_ptr)
-{
- DPRINTF(OzoneLSQ, "Setting the page table pointer.\n");
- pTable = pt_ptr;
-}
-
-template<class Impl>
-void
-OzoneLSQ<Impl>::resizeLQ(unsigned size)
-{
- assert( size >= LQEntries);
-
- if (size > LQEntries) {
- while (size > loadQueue.size()) {
- DynInstPtr dummy;
- loadQueue.push_back(dummy);
- LQEntries++;
- }
- } else {
- LQEntries = size;
- }
-
-}
-
-template<class Impl>
-void
-OzoneLSQ<Impl>::resizeSQ(unsigned size)
-{
- if (size > SQEntries) {
- while (size > storeQueue.size()) {
- SQEntry dummy;
- storeQueue.push_back(dummy);
- SQEntries++;
- }
- } else {
- SQEntries = size;
- }
-}
-
-template <class Impl>
-void
-OzoneLSQ<Impl>::insert(DynInstPtr &inst)
-{
- // Make sure we really have a memory reference.
- assert(inst->isMemRef());
-
- // Make sure it's one of the two classes of memory references.
- assert(inst->isLoad() || inst->isStore());
-
- if (inst->isLoad()) {
- insertLoad(inst);
- } else {
- insertStore(inst);
- }
-
-// inst->setInLSQ();
-}
-
-template <class Impl>
-void
-OzoneLSQ<Impl>::insertLoad(DynInstPtr &load_inst)
-{
- assert((loadTail + 1) % LQEntries != loadHead && loads < LQEntries);
-
- DPRINTF(OzoneLSQ, "Inserting load PC %#x, idx:%i [sn:%lli]\n",
- load_inst->readPC(), loadTail, load_inst->seqNum);
-
- load_inst->lqIdx = loadTail;
-
- if (stores == 0) {
- load_inst->sqIdx = -1;
- } else {
- load_inst->sqIdx = storeTail;
- }
-
- loadQueue[loadTail] = load_inst;
-
- incrLdIdx(loadTail);
-
- ++loads;
-}
-
-template <class Impl>
-void
-OzoneLSQ<Impl>::insertStore(DynInstPtr &store_inst)
-{
- // Make sure it is not full before inserting an instruction.
- assert((storeTail + 1) % SQEntries != storeHead);
- assert(stores < SQEntries);
-
- DPRINTF(OzoneLSQ, "Inserting store PC %#x, idx:%i [sn:%lli]\n",
- store_inst->readPC(), storeTail, store_inst->seqNum);
-
- store_inst->sqIdx = storeTail;
- store_inst->lqIdx = loadTail;
-
- storeQueue[storeTail] = SQEntry(store_inst);
-
- incrStIdx(storeTail);
-
- ++stores;
-
-}
-
-template <class Impl>
-typename Impl::DynInstPtr
-OzoneLSQ<Impl>::getMemDepViolator()
-{
- DynInstPtr temp = memDepViolator;
-
- memDepViolator = NULL;
-
- return temp;
-}
-
-template <class Impl>
-unsigned
-OzoneLSQ<Impl>::numFreeEntries()
-{
- unsigned free_lq_entries = LQEntries - loads;
- unsigned free_sq_entries = SQEntries - stores;
-
- // Both the LQ and SQ entries have an extra dummy entry to differentiate
- // empty/full conditions. Subtract 1 from the free entries.
- if (free_lq_entries < free_sq_entries) {
- return free_lq_entries - 1;
- } else {
- return free_sq_entries - 1;
- }
-}
-
-template <class Impl>
-int
-OzoneLSQ<Impl>::numLoadsReady()
-{
- int load_idx = loadHead;
- int retval = 0;
-
- while (load_idx != loadTail) {
- assert(loadQueue[load_idx]);
-
- if (loadQueue[load_idx]->readyToIssue()) {
- ++retval;
- }
- }
-
- return retval;
-}
-
-#if 0
-template <class Impl>
-Fault
-OzoneLSQ<Impl>::executeLoad()
-{
- Fault load_fault = NoFault;
- DynInstPtr load_inst;
-
- assert(readyLoads.size() != 0);
-
- // Execute a ready load.
- LdMapIt ready_it = readyLoads.begin();
-
- load_inst = (*ready_it).second;
-
- // Execute the instruction, which is held in the data portion of the
- // iterator.
- load_fault = load_inst->execute();
-
- // If it executed successfully, then switch it over to the executed
- // loads list.
- if (load_fault == NoFault) {
- executedLoads[load_inst->seqNum] = load_inst;
-
- readyLoads.erase(ready_it);
- } else {
- loadFaultInst = load_inst;
- }
-
- return load_fault;
-}
-#endif
-
-template <class Impl>
-Fault
-OzoneLSQ<Impl>::executeLoad(DynInstPtr &inst)
-{
- // Execute a specific load.
- Fault load_fault = NoFault;
-
- DPRINTF(OzoneLSQ, "Executing load PC %#x, [sn:%lli]\n",
- inst->readPC(),inst->seqNum);
-
- // Make sure it's really in the list.
- // Normally it should always be in the list. However,
- /* due to a syscall it may not be the list.
-#ifdef DEBUG
- int i = loadHead;
- while (1) {
- if (i == loadTail && !find(inst)) {
- assert(0 && "Load not in the queue!");
- } else if (loadQueue[i] == inst) {
- break;
- }
-
- i = i + 1;
- if (i >= LQEntries) {
- i = 0;
- }
- }
-#endif // DEBUG*/
-
- load_fault = inst->initiateAcc();
-
- // Might want to make sure that I'm not overwriting a previously faulting
- // instruction that hasn't been checked yet.
- // Actually probably want the oldest faulting load
- if (load_fault != NoFault) {
- // Maybe just set it as can commit here, although that might cause
- // some other problems with sending traps to the ROB too quickly.
-// iewStage->instToCommit(inst);
-// iewStage->activityThisCycle();
- }
-
- return load_fault;
-}
-
-template <class Impl>
-Fault
-OzoneLSQ<Impl>::executeLoad(int lq_idx)
-{
- // Very hackish. Not sure the best way to check that this
- // instruction is at the head of the ROB. I should have some sort
- // of extra information here so that I'm not overloading the
- // canCommit signal for 15 different things.
- loadQueue[lq_idx]->setCanCommit();
- Fault ret_fault = executeLoad(loadQueue[lq_idx]);
- loadQueue[lq_idx]->clearCanCommit();
- return ret_fault;
-}
-
-template <class Impl>
-Fault
-OzoneLSQ<Impl>::executeStore(DynInstPtr &store_inst)
-{
- // Make sure that a store exists.
- assert(stores != 0);
-
- int store_idx = store_inst->sqIdx;
-
- DPRINTF(OzoneLSQ, "Executing store PC %#x [sn:%lli]\n",
- store_inst->readPC(), store_inst->seqNum);
-
- // Check the recently completed loads to see if any match this store's
- // address. If so, then we have a memory ordering violation.
- int load_idx = store_inst->lqIdx;
-
- Fault store_fault = store_inst->initiateAcc();
-
- // Store size should now be available. Use it to get proper offset for
- // addr comparisons.
- int size = storeQueue[store_idx].size;
-
- if (size == 0) {
- DPRINTF(OzoneLSQ,"Fault on Store PC %#x, [sn:%lli],Size = 0\n",
- store_inst->readPC(),store_inst->seqNum);
-
- return store_fault;
- }
-
- assert(store_fault == NoFault);
-
- if (!storeFaultInst) {
- if (store_fault != NoFault) {
- panic("Fault in a store instruction!");
- storeFaultInst = store_inst;
- } else if (store_inst->isNonSpeculative()) {
- // Nonspeculative accesses (namely store conditionals)
- // need to set themselves as able to writeback if we
- // haven't had a fault by here.
- storeQueue[store_idx].canWB = true;
-
- ++storesToWB;
- }
- }
-
- if (!memDepViolator) {
- while (load_idx != loadTail) {
- // Actually should only check loads that have actually executed
- // Might be safe because effAddr is set to InvalAddr when the
- // dyn inst is created.
-
- // Must actually check all addrs in the proper size range
- // Which is more correct than needs to be. What if for now we just
- // assume all loads are quad-word loads, and do the addr based
- // on that.
- // @todo: Fix this, magic number being used here
- if ((loadQueue[load_idx]->effAddr >> 8) ==
- (store_inst->effAddr >> 8)) {
- // A load incorrectly passed this store. Squash and refetch.
- // For now return a fault to show that it was unsuccessful.
- memDepViolator = loadQueue[load_idx];
-
- return TheISA::genMachineCheckFault();
- }
-
- incrLdIdx(load_idx);
- }
-
- // If we've reached this point, there was no violation.
- memDepViolator = NULL;
- }
-
- return store_fault;
-}
-
-template <class Impl>
-void
-OzoneLSQ<Impl>::commitLoad()
-{
- assert(loadQueue[loadHead]);
-
- DPRINTF(OzoneLSQ, "[sn:%lli] Committing head load instruction, PC %#x\n",
- loadQueue[loadHead]->seqNum, loadQueue[loadHead]->readPC());
-
-
- loadQueue[loadHead] = NULL;
-
- incrLdIdx(loadHead);
-
- --loads;
-}
-
-template <class Impl>
-void
-OzoneLSQ<Impl>::commitLoad(InstSeqNum &inst)
-{
- // Hopefully I don't use this function too much
- panic("Don't use this function!");
-
- int i = loadHead;
- while (1) {
- if (i == loadTail) {
- assert(0 && "Load not in the queue!");
- } else if (loadQueue[i]->seqNum == inst) {
- break;
- }
-
- ++i;
- if (i >= LQEntries) {
- i = 0;
- }
- }
-
-// loadQueue[i]->removeInLSQ();
- loadQueue[i] = NULL;
- --loads;
-}
-
-template <class Impl>
-void
-OzoneLSQ<Impl>::commitLoads(InstSeqNum &youngest_inst)
-{
- assert(loads == 0 || loadQueue[loadHead]);
-
- while (loads != 0 && loadQueue[loadHead]->seqNum <= youngest_inst) {
- commitLoad();
- }
-}
-
-template <class Impl>
-void
-OzoneLSQ<Impl>::commitStores(InstSeqNum &youngest_inst)
-{
- assert(stores == 0 || storeQueue[storeHead].inst);
-
- int store_idx = storeHead;
-
- while (store_idx != storeTail) {
- assert(storeQueue[store_idx].inst);
- if (!storeQueue[store_idx].canWB) {
- if (storeQueue[store_idx].inst->seqNum > youngest_inst) {
- break;
- }
- DPRINTF(OzoneLSQ, "Marking store as able to write back, PC "
- "%#x [sn:%lli]\n",
- storeQueue[store_idx].inst->readPC(),
- storeQueue[store_idx].inst->seqNum);
-
- storeQueue[store_idx].canWB = true;
-
-// --stores;
- ++storesToWB;
- }
-
- incrStIdx(store_idx);
- }
-}
-
-template <class Impl>
-void
-OzoneLSQ<Impl>::writebackStores()
-{
- while (storesToWB > 0 &&
- storeWBIdx != storeTail &&
- storeQueue[storeWBIdx].inst &&
- storeQueue[storeWBIdx].canWB &&
- usedPorts < cachePorts) {
-
- if (storeQueue[storeWBIdx].size == 0) {
- completeStore(storeWBIdx);
-
- incrStIdx(storeWBIdx);
-
- continue;
- }
-
- if (dcacheInterface && dcacheInterface->isBlocked()) {
- DPRINTF(OzoneLSQ, "Unable to write back any more stores, cache"
- " is blocked!\n");
- break;
- }
-
- ++usedPorts;
-
- if (storeQueue[storeWBIdx].inst->isDataPrefetch()) {
- incrStIdx(storeWBIdx);
-
- continue;
- }
-
- assert(storeQueue[storeWBIdx].req);
- assert(!storeQueue[storeWBIdx].committed);
-
- MemReqPtr req = storeQueue[storeWBIdx].req;
- storeQueue[storeWBIdx].committed = true;
-
-// Fault fault = cpu->translateDataReadReq(req);
- req->cmd = Write;
- req->completionEvent = NULL;
- req->time = curTick;
- assert(!req->data);
- req->data = new uint8_t[64];
- memcpy(req->data, (uint8_t *)&storeQueue[storeWBIdx].data, req->size);
-
- DPRINTF(OzoneLSQ, "D-Cache: Writing back store idx:%i PC:%#x "
- "to Addr:%#x, data:%#x [sn:%lli]\n",
- storeWBIdx,storeQueue[storeWBIdx].inst->readPC(),
- req->paddr, *(req->data),
- storeQueue[storeWBIdx].inst->seqNum);
-
-// if (fault != NoFault) {
- //What should we do if there is a fault???
- //for now panic
-// panic("Page Table Fault!!!!!\n");
-// }
-
- if (dcacheInterface) {
- MemAccessResult result = dcacheInterface->access(req);
-
- //@todo temp fix for LL/SC (works fine for 1 CPU)
- if (req->flags & LOCKED) {
- req->result=1;
- panic("LL/SC! oh no no support!!!");
- }
-
- if (isStalled() &&
- storeQueue[storeWBIdx].inst->seqNum == stallingStoreIsn) {
- DPRINTF(OzoneLSQ, "Unstalling, stalling store [sn:%lli] "
- "load idx:%i\n",
- stallingStoreIsn, stallingLoadIdx);
- stalled = false;
- stallingStoreIsn = 0;
- be->replayMemInst(loadQueue[stallingLoadIdx]);
- }
-
- if (result != MA_HIT && dcacheInterface->doEvents()) {
- Event *wb = NULL;
-/*
- typename IEW::LdWritebackEvent *wb = NULL;
- if (req->flags & LOCKED) {
- // Stx_C does not generate a system port transaction.
- req->result=0;
- wb = new typename IEW::LdWritebackEvent(storeQueue[storeWBIdx].inst,
- iewStage);
- }
-*/
- DPRINTF(OzoneLSQ,"D-Cache Write Miss!\n");
-
-// DPRINTF(Activity, "Active st accessing mem miss [sn:%lli]\n",
-// storeQueue[storeWBIdx].inst->seqNum);
-
- // Will stores need their own kind of writeback events?
- // Do stores even need writeback events?
- assert(!req->completionEvent);
- req->completionEvent = new
- StoreCompletionEvent(storeWBIdx, wb, this);
-
- lastDcacheStall = curTick;
-
- _status = DcacheMissStall;
-
- //mshrSeqNums.push_back(storeQueue[storeWBIdx].inst->seqNum);
-
- //DPRINTF(OzoneLSQ, "Added MSHR. count = %i\n",mshrSeqNums.size());
-
- // Increment stat here or something
- } else {
- DPRINTF(OzoneLSQ,"D-Cache: Write Hit on idx:%i !\n",
- storeWBIdx);
-
-// DPRINTF(Activity, "Active st accessing mem hit [sn:%lli]\n",
-// storeQueue[storeWBIdx].inst->seqNum);
-
- if (req->flags & LOCKED) {
- // Stx_C does not generate a system port transaction.
- req->result=1;
- typename BackEnd::LdWritebackEvent *wb =
- new typename BackEnd::LdWritebackEvent(storeQueue[storeWBIdx].inst,
- be);
- wb->schedule(curTick);
- }
-
- completeStore(storeWBIdx);
- }
-
- incrStIdx(storeWBIdx);
- } else {
- panic("Must HAVE DCACHE!!!!!\n");
- }
- }
-
- // Not sure this should set it to 0.
- usedPorts = 0;
-
- assert(stores >= 0 && storesToWB >= 0);
-}
-
-/*template <class Impl>
-void
-OzoneLSQ<Impl>::removeMSHR(InstSeqNum seqNum)
-{
- list<InstSeqNum>::iterator mshr_it = find(mshrSeqNums.begin(),
- mshrSeqNums.end(),
- seqNum);
-
- if (mshr_it != mshrSeqNums.end()) {
- mshrSeqNums.erase(mshr_it);
- DPRINTF(OzoneLSQ, "Removing MSHR. count = %i\n",mshrSeqNums.size());
- }
-}*/
-
-template <class Impl>
-void
-OzoneLSQ<Impl>::squash(const InstSeqNum &squashed_num)
-{
- DPRINTF(OzoneLSQ, "Squashing until [sn:%lli]!"
- "(Loads:%i Stores:%i)\n",squashed_num,loads,stores);
-
- int load_idx = loadTail;
- decrLdIdx(load_idx);
-
- while (loads != 0 && loadQueue[load_idx]->seqNum > squashed_num) {
-
- // Clear the smart pointer to make sure it is decremented.
- DPRINTF(OzoneLSQ,"Load Instruction PC %#x squashed, "
- "[sn:%lli]\n",
- loadQueue[load_idx]->readPC(),
- loadQueue[load_idx]->seqNum);
-
- if (isStalled() && load_idx == stallingLoadIdx) {
- stalled = false;
- stallingStoreIsn = 0;
- stallingLoadIdx = 0;
- }
-
-// loadQueue[load_idx]->squashed = true;
- loadQueue[load_idx] = NULL;
- --loads;
-
- // Inefficient!
- loadTail = load_idx;
-
- decrLdIdx(load_idx);
- }
-
- int store_idx = storeTail;
- decrStIdx(store_idx);
-
- while (stores != 0 && storeQueue[store_idx].inst->seqNum > squashed_num) {
-
- // Clear the smart pointer to make sure it is decremented.
- DPRINTF(OzoneLSQ,"Store Instruction PC %#x squashed, "
- "idx:%i [sn:%lli]\n",
- storeQueue[store_idx].inst->readPC(),
- store_idx, storeQueue[store_idx].inst->seqNum);
-
- // I don't think this can happen. It should have been cleared by the
- // stalling load.
- if (isStalled() &&
- storeQueue[store_idx].inst->seqNum == stallingStoreIsn) {
- panic("Is stalled should have been cleared by stalling load!\n");
- stalled = false;
- stallingStoreIsn = 0;
- }
-
-// storeQueue[store_idx].inst->squashed = true;
- storeQueue[store_idx].inst = NULL;
- storeQueue[store_idx].canWB = 0;
-
- if (storeQueue[store_idx].req) {
- assert(!storeQueue[store_idx].req->completionEvent);
- }
- storeQueue[store_idx].req = NULL;
- --stores;
-
- // Inefficient!
- storeTail = store_idx;
-
- decrStIdx(store_idx);
- }
-}
-
-template <class Impl>
-void
-OzoneLSQ<Impl>::dumpInsts()
-{
- cprintf("Load store queue: Dumping instructions.\n");
- cprintf("Load queue size: %i\n", loads);
- cprintf("Load queue: ");
-
- int load_idx = loadHead;
-
- while (load_idx != loadTail && loadQueue[load_idx]) {
- cprintf("[sn:%lli] %#x ", loadQueue[load_idx]->seqNum,
- loadQueue[load_idx]->readPC());
-
- incrLdIdx(load_idx);
- }
-
- cprintf("\nStore queue size: %i\n", stores);
- cprintf("Store queue: ");
-
- int store_idx = storeHead;
-
- while (store_idx != storeTail && storeQueue[store_idx].inst) {
- cprintf("[sn:%lli] %#x ", storeQueue[store_idx].inst->seqNum,
- storeQueue[store_idx].inst->readPC());
-
- incrStIdx(store_idx);
- }
-
- cprintf("\n");
-}
-
-template <class Impl>
-void
-OzoneLSQ<Impl>::completeStore(int store_idx)
-{
- assert(storeQueue[store_idx].inst);
- storeQueue[store_idx].completed = true;
- --storesToWB;
- // A bit conservative because a store completion may not free up entries,
- // but hopefully avoids two store completions in one cycle from making
- // the CPU tick twice.
-// cpu->activityThisCycle();
-
- if (store_idx == storeHead) {
- do {
- incrStIdx(storeHead);
-
- --stores;
- } while (storeQueue[storeHead].completed &&
- storeHead != storeTail);
-
-// be->updateLSQNextCycle = true;
- }
-
- DPRINTF(OzoneLSQ, "Store head idx:%i\n", storeHead);
-
- if (isStalled() &&
- storeQueue[store_idx].inst->seqNum == stallingStoreIsn) {
- DPRINTF(OzoneLSQ, "Unstalling, stalling store [sn:%lli] "
- "load idx:%i\n",
- stallingStoreIsn, stallingLoadIdx);
- stalled = false;
- stallingStoreIsn = 0;
- be->replayMemInst(loadQueue[stallingLoadIdx]);
- }
-}
-
-template <class Impl>
-inline void
-OzoneLSQ<Impl>::incrStIdx(int &store_idx)
-{
- if (++store_idx >= SQEntries)
- store_idx = 0;
-}
-
-template <class Impl>
-inline void
-OzoneLSQ<Impl>::decrStIdx(int &store_idx)
-{
- if (--store_idx < 0)
- store_idx += SQEntries;
-}
-
-template <class Impl>
-inline void
-OzoneLSQ<Impl>::incrLdIdx(int &load_idx)
-{
- if (++load_idx >= LQEntries)
- load_idx = 0;
-}
-
-template <class Impl>
-inline void
-OzoneLSQ<Impl>::decrLdIdx(int &load_idx)
-{
- if (--load_idx < 0)
- load_idx += LQEntries;
-}
+++ /dev/null
-
-#include "cpu/ozone/lw_back_end_impl.hh"
-#include "cpu/ozone/ozone_impl.hh"
-
-template class LWBackEnd<OzoneImpl>;
+++ /dev/null
-/*
- * Copyright (c) 2006 The Regents of The University of Michigan
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __CPU_OZONE_LW_BACK_END_HH__
-#define __CPU_OZONE_LW_BACK_END_HH__
-
-#include <list>
-#include <queue>
-#include <set>
-#include <string>
-
-#include "arch/faults.hh"
-#include "base/timebuf.hh"
-#include "cpu/inst_seq.hh"
-#include "cpu/ozone/rename_table.hh"
-#include "cpu/ozone/thread_state.hh"
-#include "mem/functional/functional.hh"
-#include "mem/mem_interface.hh"
-#include "mem/mem_req.hh"
-#include "sim/eventq.hh"
-
-template <class>
-class Checker;
-class ExecContext;
-
-template <class Impl>
-class OzoneThreadState;
-
-template <class Impl>
-class LWBackEnd
-{
- public:
- typedef OzoneThreadState<Impl> Thread;
-
- typedef typename Impl::Params Params;
- typedef typename Impl::DynInst DynInst;
- typedef typename Impl::DynInstPtr DynInstPtr;
- typedef typename Impl::FullCPU FullCPU;
- typedef typename Impl::FrontEnd FrontEnd;
- typedef typename Impl::FullCPU::CommStruct CommStruct;
-
- struct SizeStruct {
- int size;
- };
-
- typedef SizeStruct DispatchToIssue;
- typedef SizeStruct IssueToExec;
- typedef SizeStruct ExecToCommit;
- typedef SizeStruct Writeback;
-
- TimeBuffer<DispatchToIssue> d2i;
- typename TimeBuffer<DispatchToIssue>::wire instsToDispatch;
- TimeBuffer<IssueToExec> i2e;
- typename TimeBuffer<IssueToExec>::wire instsToExecute;
- TimeBuffer<ExecToCommit> e2c;
- TimeBuffer<Writeback> numInstsToWB;
-
- TimeBuffer<CommStruct> *comm;
- typename TimeBuffer<CommStruct>::wire toIEW;
- typename TimeBuffer<CommStruct>::wire fromCommit;
-
- class TrapEvent : public Event {
- private:
- LWBackEnd<Impl> *be;
-
- public:
- TrapEvent(LWBackEnd<Impl> *_be);
-
- void process();
- const char *description();
- };
-
- /** LdWriteback event for a load completion. */
- class LdWritebackEvent : public Event {
- private:
- /** Instruction that is writing back data to the register file. */
- DynInstPtr inst;
- /** Pointer to IEW stage. */
- LWBackEnd *be;
-
- bool dcacheMiss;
-
- public:
- /** Constructs a load writeback event. */
- LdWritebackEvent(DynInstPtr &_inst, LWBackEnd *be);
-
- /** Processes writeback event. */
- virtual void process();
- /** Returns the description of the writeback event. */
- virtual const char *description();
-
- void setDcacheMiss() { dcacheMiss = true; be->addDcacheMiss(inst); }
- };
-
- LWBackEnd(Params *params);
-
- std::string name() const;
-
- void regStats();
-
- void setCPU(FullCPU *cpu_ptr);
-
- void setFrontEnd(FrontEnd *front_end_ptr)
- { frontEnd = front_end_ptr; }
-
- void setXC(ExecContext *xc_ptr)
- { xc = xc_ptr; }
-
- void setThreadState(Thread *thread_ptr)
- { thread = thread_ptr; }
-
- void setCommBuffer(TimeBuffer<CommStruct> *_comm);
-
- void tick();
- void squash();
- void generateXCEvent() { xcSquash = true; }
- void squashFromXC();
- void squashFromTrap();
- void checkInterrupts();
- bool trapSquash;
- bool xcSquash;
-
- template <class T>
- Fault read(MemReqPtr &req, T &data, int load_idx);
-
- template <class T>
- Fault write(MemReqPtr &req, T &data, int store_idx);
-
- Addr readCommitPC() { return commitPC; }
-
- Addr commitPC;
-
- Tick lastCommitCycle;
-
- bool robEmpty() { return instList.empty(); }
-
- bool isFull() { return numInsts >= numROBEntries; }
- bool isBlocked() { return status == Blocked || dispatchStatus == Blocked; }
-
- void fetchFault(Fault &fault);
-
- int wakeDependents(DynInstPtr &inst, bool memory_deps = false);
-
- /** Tells memory dependence unit that a memory instruction needs to be
- * rescheduled. It will re-execute once replayMemInst() is called.
- */
- void rescheduleMemInst(DynInstPtr &inst);
-
- /** Re-executes all rescheduled memory instructions. */
- void replayMemInst(DynInstPtr &inst);
-
- /** Completes memory instruction. */
- void completeMemInst(DynInstPtr &inst) { }
-
- void addDcacheMiss(DynInstPtr &inst)
- {
- waitingMemOps.insert(inst->seqNum);
- numWaitingMemOps++;
- DPRINTF(BE, "Adding a Dcache miss mem op [sn:%lli], total %i\n",
- inst->seqNum, numWaitingMemOps);
- }
-
- void removeDcacheMiss(DynInstPtr &inst)
- {
- assert(waitingMemOps.find(inst->seqNum) != waitingMemOps.end());
- waitingMemOps.erase(inst->seqNum);
- numWaitingMemOps--;
- DPRINTF(BE, "Removing a Dcache miss mem op [sn:%lli], total %i\n",
- inst->seqNum, numWaitingMemOps);
- }
-
- void addWaitingMemOp(DynInstPtr &inst)
- {
- waitingMemOps.insert(inst->seqNum);
- numWaitingMemOps++;
- DPRINTF(BE, "Adding a waiting mem op [sn:%lli], total %i\n",
- inst->seqNum, numWaitingMemOps);
- }
-
- void removeWaitingMemOp(DynInstPtr &inst)
- {
- assert(waitingMemOps.find(inst->seqNum) != waitingMemOps.end());
- waitingMemOps.erase(inst->seqNum);
- numWaitingMemOps--;
- DPRINTF(BE, "Removing a waiting mem op [sn:%lli], total %i\n",
- inst->seqNum, numWaitingMemOps);
- }
-
- void instToCommit(DynInstPtr &inst);
-
- void switchOut();
- void doSwitchOut();
- void takeOverFrom(ExecContext *old_xc = NULL);
-
- bool isSwitchedOut() { return switchedOut; }
-
- private:
- void generateTrapEvent(Tick latency = 0);
- void handleFault(Fault &fault, Tick latency = 0);
- void updateStructures();
- void dispatchInsts();
- void dispatchStall();
- void checkDispatchStatus();
- void executeInsts();
- void commitInsts();
- void addToLSQ(DynInstPtr &inst);
- void writebackInsts();
- bool commitInst(int inst_num);
- void squash(const InstSeqNum &sn);
- void squashDueToBranch(DynInstPtr &inst);
- void squashDueToMemViolation(DynInstPtr &inst);
- void squashDueToMemBlocked(DynInstPtr &inst);
- void updateExeInstStats(DynInstPtr &inst);
- void updateComInstStats(DynInstPtr &inst);
-
- public:
- FullCPU *cpu;
-
- FrontEnd *frontEnd;
-
- ExecContext *xc;
-
- Thread *thread;
-
- enum Status {
- Running,
- Idle,
- DcacheMissStall,
- DcacheMissComplete,
- Blocked,
- TrapPending
- };
-
- Status status;
-
- Status dispatchStatus;
-
- Status commitStatus;
-
- Counter funcExeInst;
-
- private:
- typedef typename Impl::LdstQueue LdstQueue;
-
- LdstQueue LSQ;
- public:
- RenameTable<Impl> commitRenameTable;
-
- RenameTable<Impl> renameTable;
- private:
- class DCacheCompletionEvent : public Event
- {
- private:
- LWBackEnd *be;
-
- public:
- DCacheCompletionEvent(LWBackEnd *_be);
-
- virtual void process();
- virtual const char *description();
- };
-
- friend class DCacheCompletionEvent;
-
- DCacheCompletionEvent cacheCompletionEvent;
-
- MemInterface *dcacheInterface;
-
- MemReqPtr memReq;
-
- // General back end width. Used if the more specific isn't given.
- int width;
-
- // Dispatch width.
- int dispatchWidth;
- int numDispatchEntries;
- int dispatchSize;
-
- int waitingInsts;
-
- int issueWidth;
-
- // Writeback width
- int wbWidth;
-
- // Commit width
- int commitWidth;
-
- /** Index into queue of instructions being written back. */
- unsigned wbNumInst;
-
- /** Cycle number within the queue of instructions being written
- * back. Used in case there are too many instructions writing
- * back at the current cycle and writesbacks need to be scheduled
- * for the future. See comments in instToCommit().
- */
- unsigned wbCycle;
-
- int numROBEntries;
- int numInsts;
-
- std::set<InstSeqNum> waitingMemOps;
- typedef std::set<InstSeqNum>::iterator MemIt;
- int numWaitingMemOps;
- unsigned maxOutstandingMemOps;
-
- bool squashPending;
- InstSeqNum squashSeqNum;
- Addr squashNextPC;
-
- Fault faultFromFetch;
- bool fetchHasFault;
-
- bool switchedOut;
- bool switchPending;
-
- DynInstPtr memBarrier;
-
- private:
- struct pqCompare {
- bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const
- {
- return lhs->seqNum > rhs->seqNum;
- }
- };
-
- typedef typename std::priority_queue<DynInstPtr, std::vector<DynInstPtr>, pqCompare> ReadyInstQueue;
- ReadyInstQueue exeList;
-
- typedef typename std::list<DynInstPtr>::iterator InstListIt;
-
- std::list<DynInstPtr> instList;
- std::list<DynInstPtr> waitingList;
- std::list<DynInstPtr> replayList;
- std::list<DynInstPtr> writeback;
-
- int latency;
-
- int squashLatency;
-
- bool exactFullStall;
-
- // number of cycles stalled for D-cache misses
-/* Stats::Scalar<> dcacheStallCycles;
- Counter lastDcacheStall;
-*/
- Stats::Vector<> rob_cap_events;
- Stats::Vector<> rob_cap_inst_count;
- Stats::Vector<> iq_cap_events;
- Stats::Vector<> iq_cap_inst_count;
- // total number of instructions executed
- Stats::Vector<> exe_inst;
- Stats::Vector<> exe_swp;
- Stats::Vector<> exe_nop;
- Stats::Vector<> exe_refs;
- Stats::Vector<> exe_loads;
- Stats::Vector<> exe_branches;
-
- Stats::Vector<> issued_ops;
-
- // total number of loads forwaded from LSQ stores
- Stats::Vector<> lsq_forw_loads;
-
- // total number of loads ignored due to invalid addresses
- Stats::Vector<> inv_addr_loads;
-
- // total number of software prefetches ignored due to invalid addresses
- Stats::Vector<> inv_addr_swpfs;
- // ready loads blocked due to memory disambiguation
- Stats::Vector<> lsq_blocked_loads;
-
- Stats::Scalar<> lsqInversion;
-
- Stats::Vector<> n_issued_dist;
- Stats::VectorDistribution<> issue_delay_dist;
-
- Stats::VectorDistribution<> queue_res_dist;
-/*
- Stats::Vector<> stat_fu_busy;
- Stats::Vector2d<> stat_fuBusy;
- Stats::Vector<> dist_unissued;
- Stats::Vector2d<> stat_issued_inst_type;
-
- Stats::Formula misspec_cnt;
- Stats::Formula misspec_ipc;
- Stats::Formula issue_rate;
- Stats::Formula issue_stores;
- Stats::Formula issue_op_rate;
- Stats::Formula fu_busy_rate;
- Stats::Formula commit_stores;
- Stats::Formula commit_ipc;
- Stats::Formula commit_ipb;
- Stats::Formula lsq_inv_rate;
-*/
- Stats::Vector<> writeback_count;
- Stats::Vector<> producer_inst;
- Stats::Vector<> consumer_inst;
- Stats::Vector<> wb_penalized;
-
- Stats::Formula wb_rate;
- Stats::Formula wb_fanout;
- Stats::Formula wb_penalized_rate;
-
- // total number of instructions committed
- Stats::Vector<> stat_com_inst;
- Stats::Vector<> stat_com_swp;
- Stats::Vector<> stat_com_refs;
- Stats::Vector<> stat_com_loads;
- Stats::Vector<> stat_com_membars;
- Stats::Vector<> stat_com_branches;
-
- Stats::Distribution<> n_committed_dist;
-
- Stats::Scalar<> commit_eligible_samples;
- Stats::Vector<> commit_eligible;
-
- Stats::Vector<> squashedInsts;
- Stats::Vector<> ROBSquashedInsts;
-
- Stats::Scalar<> ROB_fcount;
- Stats::Formula ROB_full_rate;
-
- Stats::Vector<> ROB_count; // cumulative ROB occupancy
- Stats::Formula ROB_occ_rate;
- Stats::VectorDistribution<> ROB_occ_dist;
- public:
- void dumpInsts();
-
- Checker<DynInstPtr> *checker;
-};
-
-template <class Impl>
-template <class T>
-Fault
-LWBackEnd<Impl>::read(MemReqPtr &req, T &data, int load_idx)
-{
- return LSQ.read(req, data, load_idx);
-}
-
-template <class Impl>
-template <class T>
-Fault
-LWBackEnd<Impl>::write(MemReqPtr &req, T &data, int store_idx)
-{
- return LSQ.write(req, data, store_idx);
-}
-
-#endif // __CPU_OZONE_LW_BACK_END_HH__
+++ /dev/null
-/*
- * Copyright (c) 2006 The Regents of The University of Michigan
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "cpu/checker/cpu.hh"
-#include "cpu/ozone/lw_back_end.hh"
-#include "encumbered/cpu/full/op_class.hh"
-
-template <class Impl>
-void
-LWBackEnd<Impl>::generateTrapEvent(Tick latency)
-{
- DPRINTF(BE, "Generating trap event\n");
-
- TrapEvent *trap = new TrapEvent(this);
-
- trap->schedule(curTick + cpu->cycles(latency));
-
- thread->trapPending = true;
-}
-
-template <class Impl>
-int
-LWBackEnd<Impl>::wakeDependents(DynInstPtr &inst, bool memory_deps)
-{
- assert(!inst->isSquashed());
- std::vector<DynInstPtr> &dependents = memory_deps ? inst->getMemDeps() :
- inst->getDependents();
- int num_outputs = dependents.size();
-
- DPRINTF(BE, "Waking instruction [sn:%lli] dependents in IQ\n", inst->seqNum);
-
- for (int i = 0; i < num_outputs; i++) {
- DynInstPtr dep_inst = dependents[i];
- if (!memory_deps) {
- dep_inst->markSrcRegReady();
- } else {
- if (!dep_inst->isSquashed())
- dep_inst->markMemInstReady(inst.get());
- }
-
- DPRINTF(BE, "Marking source reg ready [sn:%lli] in IQ\n", dep_inst->seqNum);
-
- if (dep_inst->readyToIssue() && dep_inst->isInROB() &&
- !dep_inst->isNonSpeculative() && !dep_inst->isStoreConditional() &&
- dep_inst->memDepReady() && !dep_inst->isMemBarrier() &&
- !dep_inst->isWriteBarrier()) {
- DPRINTF(BE, "Adding instruction to exeList [sn:%lli]\n",
- dep_inst->seqNum);
- exeList.push(dep_inst);
- if (dep_inst->iqItValid) {
- DPRINTF(BE, "Removing instruction from waiting list\n");
- waitingList.erase(dep_inst->iqIt);
- waitingInsts--;
- dep_inst->iqItValid = false;
- assert(waitingInsts >= 0);
- }
- if (dep_inst->isMemRef()) {
- removeWaitingMemOp(dep_inst);
- DPRINTF(BE, "Issued a waiting mem op [sn:%lli]\n",
- dep_inst->seqNum);
- }
- }
- }
- return num_outputs;
-}
-
-template <class Impl>
-void
-LWBackEnd<Impl>::rescheduleMemInst(DynInstPtr &inst)
-{
- replayList.push_front(inst);
-}
-
-template <class Impl>
-LWBackEnd<Impl>::TrapEvent::TrapEvent(LWBackEnd<Impl> *_be)
- : Event(&mainEventQueue, CPU_Tick_Pri), be(_be)
-{
- this->setFlags(Event::AutoDelete);
-}
-
-template <class Impl>
-void
-LWBackEnd<Impl>::TrapEvent::process()
-{
- be->trapSquash = true;
-}
-
-template <class Impl>
-const char *
-LWBackEnd<Impl>::TrapEvent::description()
-{
- return "Trap event";
-}
-
-template <class Impl>
-void
-LWBackEnd<Impl>::replayMemInst(DynInstPtr &inst)
-{
- bool found_inst = false;
- while (!replayList.empty()) {
- exeList.push(replayList.front());
- if (replayList.front() == inst) {
- found_inst = true;
- }
- replayList.pop_front();
- }
- assert(found_inst);
-}
-
-template<class Impl>
-LWBackEnd<Impl>::LdWritebackEvent::LdWritebackEvent(DynInstPtr &_inst,
- LWBackEnd<Impl> *_be)
- : Event(&mainEventQueue), inst(_inst), be(_be), dcacheMiss(false)
-{
- this->setFlags(Event::AutoDelete);
-}
-
-template<class Impl>
-void
-LWBackEnd<Impl>::LdWritebackEvent::process()
-{
- DPRINTF(BE, "Load writeback event [sn:%lli]\n", inst->seqNum);
-// DPRINTF(Activity, "Activity: Ld Writeback event [sn:%lli]\n", inst->seqNum);
-
- //iewStage->ldstQueue.removeMSHR(inst->threadNumber,inst->seqNum);
-
-// iewStage->wakeCPU();
-
- if (be->isSwitchedOut())
- return;
-
- if (dcacheMiss) {
- be->removeDcacheMiss(inst);
- }
-
- if (inst->isSquashed()) {
- inst = NULL;
- return;
- }
-
- if (!inst->isExecuted()) {
- inst->setExecuted();
-
- // Execute again to copy data to proper place.
- inst->completeAcc();
- }
-
- // Need to insert instruction into queue to commit
- be->instToCommit(inst);
-
- //wroteToTimeBuffer = true;
-// iewStage->activityThisCycle();
-
- inst = NULL;
-}
-
-template<class Impl>
-const char *
-LWBackEnd<Impl>::LdWritebackEvent::description()
-{
- return "Load writeback event";
-}
-
-
-template <class Impl>
-LWBackEnd<Impl>::DCacheCompletionEvent::DCacheCompletionEvent(LWBackEnd *_be)
- : Event(&mainEventQueue, CPU_Tick_Pri), be(_be)
-{
-}
-
-template <class Impl>
-void
-LWBackEnd<Impl>::DCacheCompletionEvent::process()
-{
-}
-
-template <class Impl>
-const char *
-LWBackEnd<Impl>::DCacheCompletionEvent::description()
-{
- return "Cache completion event";
-}
-
-template <class Impl>
-LWBackEnd<Impl>::LWBackEnd(Params *params)
- : d2i(5, 5), i2e(5, 5), e2c(5, 5), numInstsToWB(5, 5),
- trapSquash(false), xcSquash(false), cacheCompletionEvent(this),
- dcacheInterface(params->dcacheInterface), width(params->backEndWidth),
- exactFullStall(true)
-{
- numROBEntries = params->numROBEntries;
- numInsts = 0;
- numDispatchEntries = 32;
- maxOutstandingMemOps = params->maxOutstandingMemOps;
- numWaitingMemOps = 0;
- waitingInsts = 0;
- switchedOut = false;
- switchPending = false;
-
- LSQ.setBE(this);
-
- // Setup IQ and LSQ with their parameters here.
- instsToDispatch = d2i.getWire(-1);
-
- instsToExecute = i2e.getWire(-1);
-
- dispatchWidth = params->dispatchWidth ? params->dispatchWidth : width;
- issueWidth = params->issueWidth ? params->issueWidth : width;
- wbWidth = params->wbWidth ? params->wbWidth : width;
- commitWidth = params->commitWidth ? params->commitWidth : width;
-
- LSQ.init(params, params->LQEntries, params->SQEntries, 0);
-
- dispatchStatus = Running;
-}
-
-template <class Impl>
-std::string
-LWBackEnd<Impl>::name() const
-{
- return cpu->name() + ".backend";
-}
-
-template <class Impl>
-void
-LWBackEnd<Impl>::regStats()
-{
- using namespace Stats;
- rob_cap_events
- .init(cpu->number_of_threads)
- .name(name() + ".ROB:cap_events")
- .desc("number of cycles where ROB cap was active")
- .flags(total)
- ;
-
- rob_cap_inst_count
- .init(cpu->number_of_threads)
- .name(name() + ".ROB:cap_inst")
- .desc("number of instructions held up by ROB cap")
- .flags(total)
- ;
-
- iq_cap_events
- .init(cpu->number_of_threads)
- .name(name() +".IQ:cap_events" )
- .desc("number of cycles where IQ cap was active")
- .flags(total)
- ;
-
- iq_cap_inst_count
- .init(cpu->number_of_threads)
- .name(name() + ".IQ:cap_inst")
- .desc("number of instructions held up by IQ cap")
- .flags(total)
- ;
-
-
- exe_inst
- .init(cpu->number_of_threads)
- .name(name() + ".ISSUE:count")
- .desc("number of insts issued")
- .flags(total)
- ;
-
- exe_swp
- .init(cpu->number_of_threads)
- .name(name() + ".ISSUE:swp")
- .desc("number of swp insts issued")
- .flags(total)
- ;
-
- exe_nop
- .init(cpu->number_of_threads)
- .name(name() + ".ISSUE:nop")
- .desc("number of nop insts issued")
- .flags(total)
- ;
-
- exe_refs
- .init(cpu->number_of_threads)
- .name(name() + ".ISSUE:refs")
- .desc("number of memory reference insts issued")
- .flags(total)
- ;
-
- exe_loads
- .init(cpu->number_of_threads)
- .name(name() + ".ISSUE:loads")
- .desc("number of load insts issued")
- .flags(total)
- ;
-
- exe_branches
- .init(cpu->number_of_threads)
- .name(name() + ".ISSUE:branches")
- .desc("Number of branches issued")
- .flags(total)
- ;
-
- issued_ops
- .init(cpu->number_of_threads)
- .name(name() + ".ISSUE:op_count")
- .desc("number of insts issued")
- .flags(total)
- ;
-
-/*
- for (int i=0; i<Num_OpClasses; ++i) {
- stringstream subname;
- subname << opClassStrings[i] << "_delay";
- issue_delay_dist.subname(i, subname.str());
- }
-*/
- //
- // Other stats
- //
- lsq_forw_loads
- .init(cpu->number_of_threads)
- .name(name() + ".LSQ:forw_loads")
- .desc("number of loads forwarded via LSQ")
- .flags(total)
- ;
-
- inv_addr_loads
- .init(cpu->number_of_threads)
- .name(name() + ".ISSUE:addr_loads")
- .desc("number of invalid-address loads")
- .flags(total)
- ;
-
- inv_addr_swpfs
- .init(cpu->number_of_threads)
- .name(name() + ".ISSUE:addr_swpfs")
- .desc("number of invalid-address SW prefetches")
- .flags(total)
- ;
-
- lsq_blocked_loads
- .init(cpu->number_of_threads)
- .name(name() + ".LSQ:blocked_loads")
- .desc("number of ready loads not issued due to memory disambiguation")
- .flags(total)
- ;
-
- lsqInversion
- .name(name() + ".ISSUE:lsq_invert")
- .desc("Number of times LSQ instruction issued early")
- ;
-
- n_issued_dist
- .init(issueWidth + 1)
- .name(name() + ".ISSUE:issued_per_cycle")
- .desc("Number of insts issued each cycle")
- .flags(total | pdf | dist)
- ;
- issue_delay_dist
- .init(Num_OpClasses,0,99,2)
- .name(name() + ".ISSUE:")
- .desc("cycles from operands ready to issue")
- .flags(pdf | cdf)
- ;
-
- queue_res_dist
- .init(Num_OpClasses, 0, 99, 2)
- .name(name() + ".IQ:residence:")
- .desc("cycles from dispatch to issue")
- .flags(total | pdf | cdf )
- ;
- for (int i = 0; i < Num_OpClasses; ++i) {
- queue_res_dist.subname(i, opClassStrings[i]);
- }
-
- writeback_count
- .init(cpu->number_of_threads)
- .name(name() + ".WB:count")
- .desc("cumulative count of insts written-back")
- .flags(total)
- ;
-
- producer_inst
- .init(cpu->number_of_threads)
- .name(name() + ".WB:producers")
- .desc("num instructions producing a value")
- .flags(total)
- ;
-
- consumer_inst
- .init(cpu->number_of_threads)
- .name(name() + ".WB:consumers")
- .desc("num instructions consuming a value")
- .flags(total)
- ;
-
- wb_penalized
- .init(cpu->number_of_threads)
- .name(name() + ".WB:penalized")
- .desc("number of instrctions required to write to 'other' IQ")
- .flags(total)
- ;
-
-
- wb_penalized_rate
- .name(name() + ".WB:penalized_rate")
- .desc ("fraction of instructions written-back that wrote to 'other' IQ")
- .flags(total)
- ;
-
- wb_penalized_rate = wb_penalized / writeback_count;
-
- wb_fanout
- .name(name() + ".WB:fanout")
- .desc("average fanout of values written-back")
- .flags(total)
- ;
-
- wb_fanout = producer_inst / consumer_inst;
-
- wb_rate
- .name(name() + ".WB:rate")
- .desc("insts written-back per cycle")
- .flags(total)
- ;
- wb_rate = writeback_count / cpu->numCycles;
-
- stat_com_inst
- .init(cpu->number_of_threads)
- .name(name() + ".COM:count")
- .desc("Number of instructions committed")
- .flags(total)
- ;
-
- stat_com_swp
- .init(cpu->number_of_threads)
- .name(name() + ".COM:swp_count")
- .desc("Number of s/w prefetches committed")
- .flags(total)
- ;
-
- stat_com_refs
- .init(cpu->number_of_threads)
- .name(name() + ".COM:refs")
- .desc("Number of memory references committed")
- .flags(total)
- ;
-
- stat_com_loads
- .init(cpu->number_of_threads)
- .name(name() + ".COM:loads")
- .desc("Number of loads committed")
- .flags(total)
- ;
-
- stat_com_membars
- .init(cpu->number_of_threads)
- .name(name() + ".COM:membars")
- .desc("Number of memory barriers committed")
- .flags(total)
- ;
-
- stat_com_branches
- .init(cpu->number_of_threads)
- .name(name() + ".COM:branches")
- .desc("Number of branches committed")
- .flags(total)
- ;
- n_committed_dist
- .init(0,commitWidth,1)
- .name(name() + ".COM:committed_per_cycle")
- .desc("Number of insts commited each cycle")
- .flags(pdf)
- ;
-
- //
- // Commit-Eligible instructions...
- //
- // -> The number of instructions eligible to commit in those
- // cycles where we reached our commit BW limit (less the number
- // actually committed)
- //
- // -> The average value is computed over ALL CYCLES... not just
- // the BW limited cycles
- //
- // -> The standard deviation is computed only over cycles where
- // we reached the BW limit
- //
- commit_eligible
- .init(cpu->number_of_threads)
- .name(name() + ".COM:bw_limited")
- .desc("number of insts not committed due to BW limits")
- .flags(total)
- ;
-
- commit_eligible_samples
- .name(name() + ".COM:bw_lim_events")
- .desc("number cycles where commit BW limit reached")
- ;
-
- squashedInsts
- .init(cpu->number_of_threads)
- .name(name() + ".COM:squashed_insts")
- .desc("Number of instructions removed from inst list")
- ;
-
- ROBSquashedInsts
- .init(cpu->number_of_threads)
- .name(name() + ".COM:rob_squashed_insts")
- .desc("Number of instructions removed from inst list when they reached the head of the ROB")
- ;
-
- ROB_fcount
- .name(name() + ".ROB:full_count")
- .desc("number of cycles where ROB was full")
- ;
-
- ROB_count
- .init(cpu->number_of_threads)
- .name(name() + ".ROB:occupancy")
- .desc(name() + ".ROB occupancy (cumulative)")
- .flags(total)
- ;
-
- ROB_full_rate
- .name(name() + ".ROB:full_rate")
- .desc("ROB full per cycle")
- ;
- ROB_full_rate = ROB_fcount / cpu->numCycles;
-
- ROB_occ_rate
- .name(name() + ".ROB:occ_rate")
- .desc("ROB occupancy rate")
- .flags(total)
- ;
- ROB_occ_rate = ROB_count / cpu->numCycles;
-
- ROB_occ_dist
- .init(cpu->number_of_threads,0,numROBEntries,2)
- .name(name() + ".ROB:occ_dist")
- .desc("ROB Occupancy per cycle")
- .flags(total | cdf)
- ;
-}
-
-template <class Impl>
-void
-LWBackEnd<Impl>::setCPU(FullCPU *cpu_ptr)
-{
- cpu = cpu_ptr;
- LSQ.setCPU(cpu_ptr);
- checker = cpu->checker;
-}
-
-template <class Impl>
-void
-LWBackEnd<Impl>::setCommBuffer(TimeBuffer<CommStruct> *_comm)
-{
- comm = _comm;
- toIEW = comm->getWire(0);
- fromCommit = comm->getWire(-1);
-}
-
-#if FULL_SYSTEM
-template <class Impl>
-void
-LWBackEnd<Impl>::checkInterrupts()
-{
- if (cpu->checkInterrupts &&
- cpu->check_interrupts() &&
- !cpu->inPalMode(thread->readPC()) &&
- !trapSquash &&
- !xcSquash) {
- frontEnd->interruptPending = true;
- if (robEmpty() && !LSQ.hasStoresToWB()) {
- // Will need to squash all instructions currently in flight and have
- // the interrupt handler restart at the last non-committed inst.
- // Most of that can be handled through the trap() function. The
- // processInterrupts() function really just checks for interrupts
- // and then calls trap() if there is an interrupt present.
-
- // Not sure which thread should be the one to interrupt. For now
- // always do thread 0.
- assert(!thread->inSyscall);
- thread->inSyscall = true;
-
- // CPU will handle implementation of the interrupt.
- cpu->processInterrupts();
-
- // Now squash or record that I need to squash this cycle.
- commitStatus = TrapPending;
-
- // Exit state update mode to avoid accidental updating.
- thread->inSyscall = false;
-
- // Generate trap squash event.
- generateTrapEvent();
-
- DPRINTF(BE, "Interrupt detected.\n");
- } else {
- DPRINTF(BE, "Interrupt must wait for ROB to drain.\n");
- }
- }
-}
-
-template <class Impl>
-void
-LWBackEnd<Impl>::handleFault(Fault &fault, Tick latency)
-{
- DPRINTF(BE, "Handling fault!\n");
-
- assert(!thread->inSyscall);
-
- thread->inSyscall = true;
-
- // Consider holding onto the trap and waiting until the trap event
- // happens for this to be executed.
- fault->invoke(thread->getXCProxy());
-
- // Exit state update mode to avoid accidental updating.
- thread->inSyscall = false;
-
- commitStatus = TrapPending;
-
- // Generate trap squash event.
- generateTrapEvent(latency);
-}
-#endif
-
-template <class Impl>
-void
-LWBackEnd<Impl>::tick()
-{
- DPRINTF(BE, "Ticking back end\n");
-
- if (switchPending && robEmpty() && !LSQ.hasStoresToWB()) {
- cpu->signalSwitched();
- return;
- }
-
- ROB_count[0]+= numInsts;
-
- wbCycle = 0;
-
- // Read in any done instruction information and update the IQ or LSQ.
- updateStructures();
-
-#if FULL_SYSTEM
- checkInterrupts();
-
- if (trapSquash) {
- assert(!xcSquash);
- squashFromTrap();
- } else if (xcSquash) {
- squashFromXC();
- }
-#endif
-
- if (dispatchStatus != Blocked) {
- dispatchInsts();
- } else {
- checkDispatchStatus();
- }
-
- if (commitStatus != TrapPending) {
- executeInsts();
-
- commitInsts();
- }
-
- LSQ.writebackStores();
-
- DPRINTF(BE, "Waiting insts: %i, mem ops: %i, ROB entries in use: %i, "
- "LSQ loads: %i, LSQ stores: %i\n",
- waitingInsts, numWaitingMemOps, numInsts,
- LSQ.numLoads(), LSQ.numStores());
-
-#ifdef DEBUG
- assert(numInsts == instList.size());
- assert(waitingInsts == waitingList.size());
- assert(numWaitingMemOps == waitingMemOps.size());
- assert(!switchedOut);
-#endif
-}
-
-template <class Impl>
-void
-LWBackEnd<Impl>::updateStructures()
-{
- if (fromCommit->doneSeqNum) {
- LSQ.commitLoads(fromCommit->doneSeqNum);
- LSQ.commitStores(fromCommit->doneSeqNum);
- }
-
- if (fromCommit->nonSpecSeqNum) {
- if (fromCommit->uncached) {
-// LSQ.executeLoad(fromCommit->lqIdx);
- } else {
-// IQ.scheduleNonSpec(
-// fromCommit->nonSpecSeqNum);
- }
- }
-}
-
-template <class Impl>
-void
-LWBackEnd<Impl>::addToLSQ(DynInstPtr &inst)
-{
- // Do anything LSQ specific here?
- LSQ.insert(inst);
-}
-
-template <class Impl>
-void
-LWBackEnd<Impl>::dispatchInsts()
-{
- DPRINTF(BE, "Trying to dispatch instructions.\n");
-
- while (numInsts < numROBEntries &&
- numWaitingMemOps < maxOutstandingMemOps) {
- // Get instruction from front of time buffer
- DynInstPtr inst = frontEnd->getInst();
- if (!inst) {
- break;
- } else if (inst->isSquashed()) {
- continue;
- }
-
- ++numInsts;
- instList.push_front(inst);
-
- inst->setInROB();
-
- DPRINTF(BE, "Dispatching instruction [sn:%lli] PC:%#x\n",
- inst->seqNum, inst->readPC());
-
- for (int i = 0; i < inst->numDestRegs(); ++i)
- renameTable[inst->destRegIdx(i)] = inst;
-
- if (inst->isMemBarrier() || inst->isWriteBarrier()) {
- if (memBarrier) {
- DPRINTF(BE, "Instruction [sn:%lli] is waiting on "
- "barrier [sn:%lli].\n",
- inst->seqNum, memBarrier->seqNum);
- memBarrier->addMemDependent(inst);
- inst->addSrcMemInst(memBarrier);
- }
- memBarrier = inst;
- inst->setCanCommit();
- } else if (inst->readyToIssue() &&
- !inst->isNonSpeculative() &&
- !inst->isStoreConditional()) {
- if (inst->isMemRef()) {
-
- LSQ.insert(inst);
- if (memBarrier) {
- DPRINTF(BE, "Instruction [sn:%lli] is waiting on "
- "barrier [sn:%lli].\n",
- inst->seqNum, memBarrier->seqNum);
- memBarrier->addMemDependent(inst);
- inst->addSrcMemInst(memBarrier);
- addWaitingMemOp(inst);
-
- waitingList.push_front(inst);
- inst->iqIt = waitingList.begin();
- inst->iqItValid = true;
- waitingInsts++;
- } else {
- DPRINTF(BE, "Instruction [sn:%lli] ready, addding to "
- "exeList.\n",
- inst->seqNum);
- exeList.push(inst);
- }
- } else if (inst->isNop()) {
- DPRINTF(BE, "Nop encountered [sn:%lli], skipping exeList.\n",
- inst->seqNum);
- inst->setIssued();
- inst->setExecuted();
- inst->setCanCommit();
- } else {
- DPRINTF(BE, "Instruction [sn:%lli] ready, addding to "
- "exeList.\n",
- inst->seqNum);
- exeList.push(inst);
- }
- } else {
- if (inst->isNonSpeculative() || inst->isStoreConditional()) {
- inst->setCanCommit();
- DPRINTF(BE, "Adding non speculative instruction\n");
- }
-
- if (inst->isMemRef()) {
- addWaitingMemOp(inst);
- LSQ.insert(inst);
- if (memBarrier) {
- memBarrier->addMemDependent(inst);
- inst->addSrcMemInst(memBarrier);
-
- DPRINTF(BE, "Instruction [sn:%lli] is waiting on "
- "barrier [sn:%lli].\n",
- inst->seqNum, memBarrier->seqNum);
- }
- }
-
- DPRINTF(BE, "Instruction [sn:%lli] not ready, addding to "
- "waitingList.\n",
- inst->seqNum);
- waitingList.push_front(inst);
- inst->iqIt = waitingList.begin();
- inst->iqItValid = true;
- waitingInsts++;
- }
- }
-
- // Check if IQ or LSQ is full. If so we'll need to break and stop
- // removing instructions. Also update the number of insts to remove
- // from the queue. Check here if we don't care about exact stall
- // conditions.
-/*
- bool stall = false;
- if (IQ.isFull()) {
- DPRINTF(BE, "IQ is full!\n");
- stall = true;
- } else if (LSQ.isFull()) {
- DPRINTF(BE, "LSQ is full!\n");
- stall = true;
- } else if (isFull()) {
- DPRINTF(BE, "ROB is full!\n");
- stall = true;
- ROB_fcount++;
- }
- if (stall) {
- d2i.advance();
- dispatchStall();
- return;
- }
-*/
-}
-
-template <class Impl>
-void
-LWBackEnd<Impl>::dispatchStall()
-{
- dispatchStatus = Blocked;
- if (!cpu->decoupledFrontEnd) {
- // Tell front end to stall here through a timebuffer, or just tell
- // it directly.
- }
-}
-
-template <class Impl>
-void
-LWBackEnd<Impl>::checkDispatchStatus()
-{
- DPRINTF(BE, "Checking dispatch status\n");
- assert(dispatchStatus == Blocked);
- if (!LSQ.isFull() && !isFull()) {
- DPRINTF(BE, "Dispatch no longer blocked\n");
- dispatchStatus = Running;
- dispatchInsts();
- }
-}
-
-template <class Impl>
-void
-LWBackEnd<Impl>::executeInsts()
-{
- DPRINTF(BE, "Trying to execute instructions\n");
-
- int num_executed = 0;
- while (!exeList.empty() && num_executed < issueWidth) {
- DynInstPtr inst = exeList.top();
-
- DPRINTF(BE, "Executing inst [sn:%lli] PC: %#x\n",
- inst->seqNum, inst->readPC());
-
- // Check if the instruction is squashed; if so then skip it
- // and don't count it towards the FU usage.
- if (inst->isSquashed()) {
- DPRINTF(BE, "Execute: Instruction was squashed.\n");
-
- // Not sure how to handle this plus the method of sending # of
- // instructions to use. Probably will just have to count it
- // towards the bandwidth usage, but not the FU usage.
- ++num_executed;
-
- // Consider this instruction executed so that commit can go
- // ahead and retire the instruction.
- inst->setExecuted();
-
- // Not sure if I should set this here or just let commit try to
- // commit any squashed instructions. I like the latter a bit more.
- inst->setCanCommit();
-
-// ++iewExecSquashedInsts;
- exeList.pop();
-
- continue;
- }
-
- Fault fault = NoFault;
-
- // Execute instruction.
- // Note that if the instruction faults, it will be handled
- // at the commit stage.
- if (inst->isMemRef() &&
- (!inst->isDataPrefetch() && !inst->isInstPrefetch())) {
- if (dcacheInterface->isBlocked()) {
- // Should I move the instruction aside?
- DPRINTF(BE, "Execute: dcache is blocked\n");
- break;
- }
- DPRINTF(BE, "Execute: Initiating access for memory "
- "reference.\n");
-
- if (inst->isLoad()) {
- LSQ.executeLoad(inst);
- } else if (inst->isStore()) {
- LSQ.executeStore(inst);
- if (inst->req && !(inst->req->flags & LOCKED)) {
- inst->setExecuted();
-
- instToCommit(inst);
- }
- } else {
- panic("Unknown mem type!");
- }
- } else {
- inst->execute();
-
- inst->setExecuted();
-
- instToCommit(inst);
- }
-
- updateExeInstStats(inst);
-
- ++funcExeInst;
- ++num_executed;
-
- exeList.pop();
-
- if (inst->mispredicted()) {
- squashDueToBranch(inst);
- break;
- } else if (LSQ.violation()) {
- // Get the DynInst that caused the violation. Note that this
- // clears the violation signal.
- DynInstPtr violator;
- violator = LSQ.getMemDepViolator();
-
- DPRINTF(BE, "LDSTQ detected a violation. Violator PC: "
- "%#x, inst PC: %#x. Addr is: %#x.\n",
- violator->readPC(), inst->readPC(), inst->physEffAddr);
-
- // Squash.
- squashDueToMemViolation(inst);
- }
- }
-
- issued_ops[0]+= num_executed;
- n_issued_dist[num_executed]++;
-}
-
-template<class Impl>
-void
-LWBackEnd<Impl>::instToCommit(DynInstPtr &inst)
-{
-
- DPRINTF(BE, "Sending instructions to commit [sn:%lli] PC %#x.\n",
- inst->seqNum, inst->readPC());
-
- if (!inst->isSquashed()) {
- DPRINTF(BE, "Writing back instruction [sn:%lli] PC %#x.\n",
- inst->seqNum, inst->readPC());
-
- inst->setCanCommit();
-
- if (inst->isExecuted()) {
- inst->setResultReady();
- int dependents = wakeDependents(inst);
- if (dependents) {
- producer_inst[0]++;
- consumer_inst[0]+= dependents;
- }
- }
- }
-
- writeback_count[0]++;
-}
-#if 0
-template <class Impl>
-void
-LWBackEnd<Impl>::writebackInsts()
-{
- int wb_width = wbWidth;
- // Using this method I'm not quite sure how to prevent an
- // instruction from waking its own dependents multiple times,
- // without the guarantee that commit always has enough bandwidth
- // to accept all instructions being written back. This guarantee
- // might not be too unrealistic.
- InstListIt wb_inst_it = writeback.begin();
- InstListIt wb_end_it = writeback.end();
- int inst_num = 0;
- int consumer_insts = 0;
-
- for (; inst_num < wb_width &&
- wb_inst_it != wb_end_it; inst_num++) {
- DynInstPtr inst = (*wb_inst_it);
-
- // Some instructions will be sent to commit without having
- // executed because they need commit to handle them.
- // E.g. Uncached loads have not actually executed when they
- // are first sent to commit. Instead commit must tell the LSQ
- // when it's ready to execute the uncached load.
- if (!inst->isSquashed()) {
- DPRINTF(BE, "Writing back instruction [sn:%lli] PC %#x.\n",
- inst->seqNum, inst->readPC());
-
- inst->setCanCommit();
- inst->setResultReady();
-
- if (inst->isExecuted()) {
- int dependents = wakeDependents(inst);
- if (dependents) {
- producer_inst[0]++;
- consumer_insts+= dependents;
- }
- }
- }
-
- writeback.erase(wb_inst_it++);
- }
- LSQ.writebackStores();
- consumer_inst[0]+= consumer_insts;
- writeback_count[0]+= inst_num;
-}
-#endif
-template <class Impl>
-bool
-LWBackEnd<Impl>::commitInst(int inst_num)
-{
- // Read instruction from the head of the ROB
- DynInstPtr inst = instList.back();
-
- // Make sure instruction is valid
- assert(inst);
-
- if (!inst->readyToCommit())
- return false;
-
- DPRINTF(BE, "Trying to commit instruction [sn:%lli] PC:%#x\n",
- inst->seqNum, inst->readPC());
-
- thread->setPC(inst->readPC());
- thread->setNextPC(inst->readNextPC());
- inst->reachedCommit = true;
-
- // If the instruction is not executed yet, then it is a non-speculative
- // or store inst. Signal backwards that it should be executed.
- if (!inst->isExecuted()) {
- if (inst->isNonSpeculative() ||
- inst->isStoreConditional() ||
- inst->isMemBarrier() ||
- inst->isWriteBarrier()) {
-#if !FULL_SYSTEM
- // Hack to make sure syscalls aren't executed until all stores
- // write back their data. This direct communication shouldn't
- // be used for anything other than this.
- if (inst_num > 0 || LSQ.hasStoresToWB())
-#else
- if ((inst->isMemBarrier() || inst->isWriteBarrier() ||
- inst->isQuiesce()) &&
- LSQ.hasStoresToWB())
-#endif
- {
- DPRINTF(BE, "Waiting for all stores to writeback.\n");
- return false;
- }
-
- DPRINTF(BE, "Encountered a store or non-speculative "
- "instruction at the head of the ROB, PC %#x.\n",
- inst->readPC());
-
- if (inst->isMemBarrier() || inst->isWriteBarrier()) {
- DPRINTF(BE, "Waking dependents on barrier [sn:%lli]\n",
- inst->seqNum);
- assert(memBarrier);
- wakeDependents(inst, true);
- if (memBarrier == inst)
- memBarrier = NULL;
- inst->clearMemDependents();
- }
-
- // Send back the non-speculative instruction's sequence number.
- if (inst->iqItValid) {
- DPRINTF(BE, "Removing instruction from waiting list\n");
- waitingList.erase(inst->iqIt);
- inst->iqItValid = false;
- waitingInsts--;
- assert(waitingInsts >= 0);
- if (inst->isStore())
- removeWaitingMemOp(inst);
- }
-
- exeList.push(inst);
-
- // Change the instruction so it won't try to commit again until
- // it is executed.
- inst->clearCanCommit();
-
-// ++commitNonSpecStalls;
-
- return false;
- } else if (inst->isLoad()) {
- DPRINTF(BE, "[sn:%lli]: Uncached load, PC %#x.\n",
- inst->seqNum, inst->readPC());
-
- // Send back the non-speculative instruction's sequence
- // number. Maybe just tell the lsq to re-execute the load.
-
- // Send back the non-speculative instruction's sequence number.
- if (inst->iqItValid) {
- DPRINTF(BE, "Removing instruction from waiting list\n");
- waitingList.erase(inst->iqIt);
- inst->iqItValid = false;
- waitingInsts--;
- assert(waitingInsts >= 0);
- removeWaitingMemOp(inst);
- }
- replayMemInst(inst);
-
- inst->clearCanCommit();
-
- return false;
- } else {
- panic("Trying to commit un-executed instruction "
- "of unknown type!\n");
- }
- }
-
- // Not handled for now.
- assert(!inst->isThreadSync());
- assert(inst->memDepReady());
- // Stores will mark themselves as totally completed as they need
- // to wait to writeback to memory. @todo: Hack...attempt to fix
- // having the checker be forced to wait until a store completes in
- // order to check all of the instructions. If the store at the
- // head of the check list misses, but a later store hits, then
- // loads in the checker may see the younger store values instead
- // of the store they should see. Either the checker needs its own
- // memory (annoying to update), its own store buffer (how to tell
- // which value is correct?), or something else...
- if (!inst->isStore()) {
- inst->setCompleted();
- }
- // Check if the instruction caused a fault. If so, trap.
- Fault inst_fault = inst->getFault();
-
- // Use checker prior to updating anything due to traps or PC
- // based events.
- if (checker) {
- checker->tick(inst);
- }
-
- if (inst_fault != NoFault) {
- DPRINTF(BE, "Inst [sn:%lli] PC %#x has a fault\n",
- inst->seqNum, inst->readPC());
-
- // Instruction is completed as it has a fault.
- inst->setCompleted();
-
- if (LSQ.hasStoresToWB()) {
- DPRINTF(BE, "Stores still in flight, will wait until drained.\n");
- return false;
- } else if (inst_num != 0) {
- DPRINTF(BE, "Will wait until instruction is head of commit group.\n");
- return false;
- } else if (checker && inst->isStore()) {
- checker->tick(inst);
- }
-
- thread->setInst(
- static_cast<TheISA::MachInst>(inst->staticInst->machInst));
-#if FULL_SYSTEM
- handleFault(inst_fault);
- return false;
-#else // !FULL_SYSTEM
- panic("fault (%d) detected @ PC %08p", inst_fault,
- inst->PC);
-#endif // FULL_SYSTEM
- }
-
- int freed_regs = 0;
-
- for (int i = 0; i < inst->numDestRegs(); ++i) {
- DPRINTF(BE, "Commit rename map setting reg %i to [sn:%lli]\n",
- (int)inst->destRegIdx(i), inst->seqNum);
- thread->renameTable[inst->destRegIdx(i)] = inst;
- ++freed_regs;
- }
-
- if (inst->traceData) {
- inst->traceData->setFetchSeq(inst->seqNum);
- inst->traceData->setCPSeq(thread->numInst);
- inst->traceData->finalize();
- inst->traceData = NULL;
- }
-
- inst->clearDependents();
-
- frontEnd->addFreeRegs(freed_regs);
-
- instList.pop_back();
-
- --numInsts;
- ++thread->funcExeInst;
- // Maybe move this to where the fault is handled; if the fault is
- // handled, don't try to set this myself as the fault will set it.
- // If not, then I set thread->PC = thread->nextPC and
- // thread->nextPC = thread->nextPC + 4.
- thread->setPC(thread->readNextPC());
- thread->setNextPC(thread->readNextPC() + sizeof(TheISA::MachInst));
- updateComInstStats(inst);
-
- // Write the done sequence number here.
- toIEW->doneSeqNum = inst->seqNum;
- lastCommitCycle = curTick;
-
-#if FULL_SYSTEM
- int count = 0;
- Addr oldpc;
- do {
- if (count == 0)
- assert(!thread->inSyscall && !thread->trapPending);
- oldpc = thread->readPC();
- cpu->system->pcEventQueue.service(
- thread->getXCProxy());
- count++;
- } while (oldpc != thread->readPC());
- if (count > 1) {
- DPRINTF(BE, "PC skip function event, stopping commit\n");
- xcSquash = true;
- return false;
- }
-#endif
- return true;
-}
-
-template <class Impl>
-void
-LWBackEnd<Impl>::commitInsts()
-{
- // Not sure this should be a loop or not.
- int inst_num = 0;
- while (!instList.empty() && inst_num < commitWidth) {
- if (instList.back()->isSquashed()) {
- instList.back()->clearDependents();
- instList.pop_back();
- --numInsts;
- ROBSquashedInsts[instList.back()->threadNumber]++;
- continue;
- }
-
- if (!commitInst(inst_num++)) {
- DPRINTF(BE, "Can't commit, Instruction [sn:%lli] PC "
- "%#x is head of ROB and not ready\n",
- instList.back()->seqNum, instList.back()->readPC());
- --inst_num;
- break;
- }
- }
- n_committed_dist.sample(inst_num);
-}
-
-template <class Impl>
-void
-LWBackEnd<Impl>::squash(const InstSeqNum &sn)
-{
- LSQ.squash(sn);
-
- int freed_regs = 0;
- InstListIt waiting_list_end = waitingList.end();
- InstListIt insts_it = waitingList.begin();
-
- while (insts_it != waiting_list_end && (*insts_it)->seqNum > sn)
- {
- if ((*insts_it)->isSquashed()) {
- ++insts_it;
- continue;
- }
- DPRINTF(BE, "Squashing instruction on waitingList PC %#x, [sn:%lli].\n",
- (*insts_it)->readPC(),
- (*insts_it)->seqNum);
-
- if ((*insts_it)->isMemRef()) {
- DPRINTF(BE, "Squashing a waiting mem op [sn:%lli]\n",
- (*insts_it)->seqNum);
- removeWaitingMemOp((*insts_it));
- }
-
- waitingList.erase(insts_it++);
- waitingInsts--;
- }
- assert(waitingInsts >= 0);
-
- insts_it = instList.begin();
-
- while (!instList.empty() && (*insts_it)->seqNum > sn)
- {
- if ((*insts_it)->isSquashed()) {
- ++insts_it;
- continue;
- }
- DPRINTF(BE, "Squashing instruction on inst list PC %#x, [sn:%lli].\n",
- (*insts_it)->readPC(),
- (*insts_it)->seqNum);
-
- // Mark the instruction as squashed, and ready to commit so that
- // it can drain out of the pipeline.
- (*insts_it)->setSquashed();
-
- (*insts_it)->setCanCommit();
-
- (*insts_it)->removeInROB();
-
- for (int i = 0; i < (*insts_it)->numDestRegs(); ++i) {
- DynInstPtr prev_dest = (*insts_it)->getPrevDestInst(i);
- DPRINTF(BE, "Commit rename map setting reg %i to [sn:%lli]\n",
- (int)(*insts_it)->destRegIdx(i), prev_dest->seqNum);
- renameTable[(*insts_it)->destRegIdx(i)] = prev_dest;
- ++freed_regs;
- }
-
- (*insts_it)->clearDependents();
-
- squashedInsts[(*insts_it)->threadNumber]++;
-
- instList.erase(insts_it++);
- --numInsts;
- }
-
- insts_it = waitingList.begin();
- while (!waitingList.empty() && insts_it != waitingList.end()) {
- if ((*insts_it)->seqNum < sn) {
- ++insts_it;
- continue;
- }
- assert((*insts_it)->isSquashed());
-
- waitingList.erase(insts_it++);
- waitingInsts--;
- }
-
- while (memBarrier && memBarrier->seqNum > sn) {
- DPRINTF(BE, "[sn:%lli] Memory barrier squashed (or previously "
- "squashed)\n", memBarrier->seqNum);
- memBarrier->clearMemDependents();
- if (memBarrier->memDepReady()) {
- DPRINTF(BE, "No previous barrier\n");
- memBarrier = NULL;
- } else {
- std::list<DynInstPtr> &srcs = memBarrier->getMemSrcs();
- memBarrier = srcs.front();
- srcs.pop_front();
- assert(srcs.empty());
- DPRINTF(BE, "Previous barrier: [sn:%lli]\n",
- memBarrier->seqNum);
- }
- }
-
- frontEnd->addFreeRegs(freed_regs);
-}
-
-template <class Impl>
-void
-LWBackEnd<Impl>::squashFromXC()
-{
- InstSeqNum squashed_inst = robEmpty() ? 0 : instList.back()->seqNum - 1;
- squash(squashed_inst);
- frontEnd->squash(squashed_inst, thread->readPC(),
- false, false);
- frontEnd->interruptPending = false;
-
- thread->trapPending = false;
- thread->inSyscall = false;
- xcSquash = false;
- commitStatus = Running;
-}
-
-template <class Impl>
-void
-LWBackEnd<Impl>::squashFromTrap()
-{
- InstSeqNum squashed_inst = robEmpty() ? 0 : instList.back()->seqNum - 1;
- squash(squashed_inst);
- frontEnd->squash(squashed_inst, thread->readPC(),
- false, false);
- frontEnd->interruptPending = false;
-
- thread->trapPending = false;
- thread->inSyscall = false;
- trapSquash = false;
- commitStatus = Running;
-}
-
-template <class Impl>
-void
-LWBackEnd<Impl>::squashDueToBranch(DynInstPtr &inst)
-{
- // Update the branch predictor state I guess
- DPRINTF(BE, "Squashing due to branch [sn:%lli], will restart at PC %#x\n",
- inst->seqNum, inst->readNextPC());
- squash(inst->seqNum);
- frontEnd->squash(inst->seqNum, inst->readNextPC(),
- true, inst->mispredicted());
-}
-
-template <class Impl>
-void
-LWBackEnd<Impl>::squashDueToMemViolation(DynInstPtr &inst)
-{
- // Update the branch predictor state I guess
- DPRINTF(BE, "Squashing due to violation [sn:%lli], will restart at PC %#x\n",
- inst->seqNum, inst->readNextPC());
- squash(inst->seqNum);
- frontEnd->squash(inst->seqNum, inst->readNextPC(),
- false, inst->mispredicted());
-}
-
-template <class Impl>
-void
-LWBackEnd<Impl>::squashDueToMemBlocked(DynInstPtr &inst)
-{
- DPRINTF(IEW, "Memory blocked, squashing load and younger insts, "
- "PC: %#x [sn:%i].\n", inst->readPC(), inst->seqNum);
-
- squash(inst->seqNum - 1);
- frontEnd->squash(inst->seqNum - 1, inst->readPC());
-}
-
-template <class Impl>
-void
-LWBackEnd<Impl>::fetchFault(Fault &fault)
-{
- faultFromFetch = fault;
- fetchHasFault = true;
-}
-
-template <class Impl>
-void
-LWBackEnd<Impl>::switchOut()
-{
- switchPending = true;
-}
-
-template <class Impl>
-void
-LWBackEnd<Impl>::doSwitchOut()
-{
- switchedOut = true;
- switchPending = false;
- // Need to get rid of all committed, non-speculative state and write it
- // to memory/XC. In this case this is stores that have committed and not
- // yet written back.
- assert(robEmpty());
- assert(!LSQ.hasStoresToWB());
-
- LSQ.switchOut();
-
- squash(0);
-}
-
-template <class Impl>
-void
-LWBackEnd<Impl>::takeOverFrom(ExecContext *old_xc)
-{
- switchedOut = false;
- xcSquash = false;
- trapSquash = false;
-
- numInsts = 0;
- numWaitingMemOps = 0;
- waitingMemOps.clear();
- waitingInsts = 0;
- switchedOut = false;
- dispatchStatus = Running;
- commitStatus = Running;
- LSQ.takeOverFrom(old_xc);
-}
-
-template <class Impl>
-void
-LWBackEnd<Impl>::updateExeInstStats(DynInstPtr &inst)
-{
- int thread_number = inst->threadNumber;
-
- //
- // Pick off the software prefetches
- //
-#ifdef TARGET_ALPHA
- if (inst->isDataPrefetch())
- exe_swp[thread_number]++;
- else
- exe_inst[thread_number]++;
-#else
- exe_inst[thread_number]++;
-#endif
-
- //
- // Control operations
- //
- if (inst->isControl())
- exe_branches[thread_number]++;
-
- //
- // Memory operations
- //
- if (inst->isMemRef()) {
- exe_refs[thread_number]++;
-
- if (inst->isLoad())
- exe_loads[thread_number]++;
- }
-}
-
-template <class Impl>
-void
-LWBackEnd<Impl>::updateComInstStats(DynInstPtr &inst)
-{
- unsigned tid = inst->threadNumber;
-
- // keep an instruction count
- thread->numInst++;
- thread->numInsts++;
-
- cpu->numInst++;
- //
- // Pick off the software prefetches
- //
-#ifdef TARGET_ALPHA
- if (inst->isDataPrefetch()) {
- stat_com_swp[tid]++;
- } else {
- stat_com_inst[tid]++;
- }
-#else
- stat_com_inst[tid]++;
-#endif
-
- //
- // Control Instructions
- //
- if (inst->isControl())
- stat_com_branches[tid]++;
-
- //
- // Memory references
- //
- if (inst->isMemRef()) {
- stat_com_refs[tid]++;
-
- if (inst->isLoad()) {
- stat_com_loads[tid]++;
- }
- }
-
- if (inst->isMemBarrier()) {
- stat_com_membars[tid]++;
- }
-}
-
-template <class Impl>
-void
-LWBackEnd<Impl>::dumpInsts()
-{
- int num = 0;
- int valid_num = 0;
-
- InstListIt inst_list_it = --(instList.end());
-
- cprintf("ExeList size: %i\n", exeList.size());
-
- cprintf("Inst list size: %i\n", instList.size());
-
- while (inst_list_it != instList.end())
- {
- cprintf("Instruction:%i\n",
- num);
- if (!(*inst_list_it)->isSquashed()) {
- if (!(*inst_list_it)->isIssued()) {
- ++valid_num;
- cprintf("Count:%i\n", valid_num);
- } else if ((*inst_list_it)->isMemRef() &&
- !(*inst_list_it)->memOpDone) {
- // Loads that have not been marked as executed still count
- // towards the total instructions.
- ++valid_num;
- cprintf("Count:%i\n", valid_num);
- }
- }
-
- cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
- "Issued:%i\nSquashed:%i\n",
- (*inst_list_it)->readPC(),
- (*inst_list_it)->seqNum,
- (*inst_list_it)->threadNumber,
- (*inst_list_it)->isIssued(),
- (*inst_list_it)->isSquashed());
-
- if ((*inst_list_it)->isMemRef()) {
- cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
- }
-
- cprintf("\n");
-
- inst_list_it--;
- ++num;
- }
-
- cprintf("Waiting list size: %i\n", waitingList.size());
-
- inst_list_it = --(waitingList.end());
-
- while (inst_list_it != waitingList.end())
- {
- cprintf("Instruction:%i\n",
- num);
- if (!(*inst_list_it)->isSquashed()) {
- if (!(*inst_list_it)->isIssued()) {
- ++valid_num;
- cprintf("Count:%i\n", valid_num);
- } else if ((*inst_list_it)->isMemRef() &&
- !(*inst_list_it)->memOpDone) {
- // Loads that have not been marked as executed still count
- // towards the total instructions.
- ++valid_num;
- cprintf("Count:%i\n", valid_num);
- }
- }
-
- cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
- "Issued:%i\nSquashed:%i\n",
- (*inst_list_it)->readPC(),
- (*inst_list_it)->seqNum,
- (*inst_list_it)->threadNumber,
- (*inst_list_it)->isIssued(),
- (*inst_list_it)->isSquashed());
-
- if ((*inst_list_it)->isMemRef()) {
- cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
- }
-
- cprintf("\n");
-
- inst_list_it--;
- ++num;
- }
-
- cprintf("waitingMemOps list size: %i\n", waitingMemOps.size());
-
- MemIt waiting_it = waitingMemOps.begin();
-
- while (waiting_it != waitingMemOps.end())
- {
- cprintf("[sn:%lli] ", (*waiting_it));
- waiting_it++;
- ++num;
- }
- cprintf("\n");
-}
+++ /dev/null
-/*
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "cpu/ozone/ozone_impl.hh"
-#include "cpu/ozone/lw_lsq_impl.hh"
-
-// Force the instantiation of LDSTQ for all the implementations we care about.
-template class OzoneLWLSQ<OzoneImpl>;
-
+++ /dev/null
-/*
- * Copyright (c) 2004-2006 The Regents of The University of Michigan
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __CPU_OZONE_LW_LSQ_HH__
-#define __CPU_OZONE_LW_LSQ_HH__
-
-#include <list>
-#include <map>
-#include <queue>
-#include <algorithm>
-
-#include "arch/faults.hh"
-#include "arch/isa_traits.hh"
-#include "config/full_system.hh"
-#include "base/hashmap.hh"
-#include "cpu/inst_seq.hh"
-#include "mem/mem_interface.hh"
-//#include "mem/page_table.hh"
-#include "sim/debug.hh"
-#include "sim/sim_object.hh"
-
-//class PageTable;
-
-/**
- * Class that implements the actual LQ and SQ for each specific thread.
- * Both are circular queues; load entries are freed upon committing, while
- * store entries are freed once they writeback. The LSQUnit tracks if there
- * are memory ordering violations, and also detects partial load to store
- * forwarding cases (a store only has part of a load's data) that requires
- * the load to wait until the store writes back. In the former case it
- * holds onto the instruction until the dependence unit looks at it, and
- * in the latter it stalls the LSQ until the store writes back. At that
- * point the load is replayed.
- */
-template <class Impl>
-class OzoneLWLSQ {
- public:
- typedef typename Impl::Params Params;
- typedef typename Impl::FullCPU FullCPU;
- typedef typename Impl::BackEnd BackEnd;
- typedef typename Impl::DynInstPtr DynInstPtr;
- typedef typename Impl::IssueStruct IssueStruct;
-
- typedef TheISA::IntReg IntReg;
-
- typedef typename std::map<InstSeqNum, DynInstPtr>::iterator LdMapIt;
-
- private:
- class StoreCompletionEvent : public Event {
- public:
- /** Constructs a store completion event. */
- StoreCompletionEvent(DynInstPtr &inst, BackEnd *be,
- Event *wb_event, OzoneLWLSQ *lsq_ptr);
-
- /** Processes the store completion event. */
- void process();
-
- /** Returns the description of this event. */
- const char *description();
-
- private:
- /** The store index of the store being written back. */
- DynInstPtr inst;
-
- BackEnd *be;
- /** The writeback event for the store. Needed for store
- * conditionals.
- */
- public:
- Event *wbEvent;
- bool miss;
- private:
- /** The pointer to the LSQ unit that issued the store. */
- OzoneLWLSQ<Impl> *lsqPtr;
- };
-
- public:
- /** Constructs an LSQ unit. init() must be called prior to use. */
- OzoneLWLSQ();
-
- /** Initializes the LSQ unit with the specified number of entries. */
- void init(Params *params, unsigned maxLQEntries,
- unsigned maxSQEntries, unsigned id);
-
- /** Returns the name of the LSQ unit. */
- std::string name() const;
-
- /** Sets the CPU pointer. */
- void setCPU(FullCPU *cpu_ptr)
- { cpu = cpu_ptr; }
-
- /** Sets the back-end stage pointer. */
- void setBE(BackEnd *be_ptr)
- { be = be_ptr; }
-
- /** Sets the page table pointer. */
-// void setPageTable(PageTable *pt_ptr);
-
- /** Ticks the LSQ unit, which in this case only resets the number of
- * used cache ports.
- * @todo: Move the number of used ports up to the LSQ level so it can
- * be shared by all LSQ units.
- */
- void tick() { usedPorts = 0; }
-
- /** Inserts an instruction. */
- void insert(DynInstPtr &inst);
- /** Inserts a load instruction. */
- void insertLoad(DynInstPtr &load_inst);
- /** Inserts a store instruction. */
- void insertStore(DynInstPtr &store_inst);
-
- /** Executes a load instruction. */
- Fault executeLoad(DynInstPtr &inst);
-
- /** Executes a store instruction. */
- Fault executeStore(DynInstPtr &inst);
-
- /** Commits the head load. */
- void commitLoad();
- /** Commits loads older than a specific sequence number. */
- void commitLoads(InstSeqNum &youngest_inst);
-
- /** Commits stores older than a specific sequence number. */
- void commitStores(InstSeqNum &youngest_inst);
-
- /** Writes back stores. */
- void writebackStores();
-
- // @todo: Include stats in the LSQ unit.
- //void regStats();
-
- /** Clears all the entries in the LQ. */
- void clearLQ();
-
- /** Clears all the entries in the SQ. */
- void clearSQ();
-
- /** Resizes the LQ to a given size. */
- void resizeLQ(unsigned size);
-
- /** Resizes the SQ to a given size. */
- void resizeSQ(unsigned size);
-
- /** Squashes all instructions younger than a specific sequence number. */
- void squash(const InstSeqNum &squashed_num);
-
- /** Returns if there is a memory ordering violation. Value is reset upon
- * call to getMemDepViolator().
- */
- bool violation() { return memDepViolator; }
-
- /** Returns the memory ordering violator. */
- DynInstPtr getMemDepViolator();
-
- /** Returns if a load became blocked due to the memory system. It clears
- * the bool's value upon this being called.
- */
- bool loadBlocked()
- { return isLoadBlocked; }
-
- void clearLoadBlocked()
- { isLoadBlocked = false; }
-
- bool isLoadBlockedHandled()
- { return loadBlockedHandled; }
-
- void setLoadBlockedHandled()
- { loadBlockedHandled = true; }
-
- /** Returns the number of free entries (min of free LQ and SQ entries). */
- unsigned numFreeEntries();
-
- /** Returns the number of loads ready to execute. */
- int numLoadsReady();
-
- /** Returns the number of loads in the LQ. */
- int numLoads() { return loads; }
-
- /** Returns the number of stores in the SQ. */
- int numStores() { return stores; }
-
- /** Returns if either the LQ or SQ is full. */
- bool isFull() { return lqFull() || sqFull(); }
-
- /** Returns if the LQ is full. */
- bool lqFull() { return loads >= (LQEntries - 1); }
-
- /** Returns if the SQ is full. */
- bool sqFull() { return stores >= (SQEntries - 1); }
-
- /** Debugging function to dump instructions in the LSQ. */
- void dumpInsts();
-
- /** Returns the number of instructions in the LSQ. */
- unsigned getCount() { return loads + stores; }
-
- /** Returns if there are any stores to writeback. */
- bool hasStoresToWB() { return storesToWB; }
-
- /** Returns the number of stores to writeback. */
- int numStoresToWB() { return storesToWB; }
-
- /** Returns if the LSQ unit will writeback on this cycle. */
- bool willWB() { return storeQueue.back().canWB &&
- !storeQueue.back().completed &&
- !dcacheInterface->isBlocked(); }
-
- void switchOut();
-
- void takeOverFrom(ExecContext *old_xc = NULL);
-
- bool isSwitchedOut() { return switchedOut; }
-
- bool switchedOut;
-
- private:
- /** Completes the store at the specified index. */
- void completeStore(int store_idx);
-
- private:
- /** Pointer to the CPU. */
- FullCPU *cpu;
-
- /** Pointer to the back-end stage. */
- BackEnd *be;
-
- /** Pointer to the D-cache. */
- MemInterface *dcacheInterface;
-
- /** Pointer to the page table. */
-// PageTable *pTable;
-
- public:
- struct SQEntry {
- /** Constructs an empty store queue entry. */
- SQEntry()
- : inst(NULL), req(NULL), size(0), data(0),
- canWB(0), committed(0), completed(0), lqIt(NULL)
- { }
-
- /** Constructs a store queue entry for a given instruction. */
- SQEntry(DynInstPtr &_inst)
- : inst(_inst), req(NULL), size(0), data(0),
- canWB(0), committed(0), completed(0), lqIt(NULL)
- { }
-
- /** The store instruction. */
- DynInstPtr inst;
- /** The memory request for the store. */
- MemReqPtr req;
- /** The size of the store. */
- int size;
- /** The store data. */
- IntReg data;
- /** Whether or not the store can writeback. */
- bool canWB;
- /** Whether or not the store is committed. */
- bool committed;
- /** Whether or not the store is completed. */
- bool completed;
-
- typename std::list<DynInstPtr>::iterator lqIt;
- };
-
- enum Status {
- Running,
- Idle,
- DcacheMissStall,
- DcacheMissSwitch
- };
-
- private:
- /** The OzoneLWLSQ thread id. */
- unsigned lsqID;
-
- /** The status of the LSQ unit. */
- Status _status;
-
- /** The store queue. */
- std::list<SQEntry> storeQueue;
- /** The load queue. */
- std::list<DynInstPtr> loadQueue;
-
- typedef typename std::list<SQEntry>::iterator SQIt;
- typedef typename std::list<DynInstPtr>::iterator LQIt;
-
-
- struct HashFn {
- size_t operator() (const int a) const
- {
- unsigned hash = (((a >> 14) ^ ((a >> 2) & 0xffff))) & 0x7FFFFFFF;
-
- return hash;
- }
- };
-
- m5::hash_map<int, SQIt, HashFn> SQItHash;
- std::queue<int> SQIndices;
- m5::hash_map<int, LQIt, HashFn> LQItHash;
- std::queue<int> LQIndices;
-
- typedef typename m5::hash_map<int, LQIt, HashFn>::iterator LQHashIt;
- typedef typename m5::hash_map<int, SQIt, HashFn>::iterator SQHashIt;
- // Consider making these 16 bits
- /** The number of LQ entries. */
- unsigned LQEntries;
- /** The number of SQ entries. */
- unsigned SQEntries;
-
- /** The number of load instructions in the LQ. */
- int loads;
- /** The number of store instructions in the SQ (excludes those waiting to
- * writeback).
- */
- int stores;
-
- int storesToWB;
-
- /// @todo Consider moving to a more advanced model with write vs read ports
- /** The number of cache ports available each cycle. */
- int cachePorts;
-
- /** The number of used cache ports in this cycle. */
- int usedPorts;
-
- //list<InstSeqNum> mshrSeqNums;
-
- //Stats::Scalar<> dcacheStallCycles;
- Counter lastDcacheStall;
-
- // Make these per thread?
- /** Whether or not the LSQ is stalled. */
- bool stalled;
- /** The store that causes the stall due to partial store to load
- * forwarding.
- */
- InstSeqNum stallingStoreIsn;
- /** The index of the above store. */
- LQIt stallingLoad;
-
- /** Whether or not a load is blocked due to the memory system. It is
- * cleared when this value is checked via loadBlocked().
- */
- bool isLoadBlocked;
-
- bool loadBlockedHandled;
-
- InstSeqNum blockedLoadSeqNum;
-
- /** The oldest faulting load instruction. */
- DynInstPtr loadFaultInst;
- /** The oldest faulting store instruction. */
- DynInstPtr storeFaultInst;
-
- /** The oldest load that caused a memory ordering violation. */
- DynInstPtr memDepViolator;
-
- // Will also need how many read/write ports the Dcache has. Or keep track
- // of that in stage that is one level up, and only call executeLoad/Store
- // the appropriate number of times.
-
- public:
- /** Executes the load at the given index. */
- template <class T>
- Fault read(MemReqPtr &req, T &data, int load_idx);
-
- /** Executes the store at the given index. */
- template <class T>
- Fault write(MemReqPtr &req, T &data, int store_idx);
-
- /** Returns the sequence number of the head load instruction. */
- InstSeqNum getLoadHeadSeqNum()
- {
- if (!loadQueue.empty()) {
- return loadQueue.back()->seqNum;
- } else {
- return 0;
- }
-
- }
-
- /** Returns the sequence number of the head store instruction. */
- InstSeqNum getStoreHeadSeqNum()
- {
- if (!storeQueue.empty()) {
- return storeQueue.back().inst->seqNum;
- } else {
- return 0;
- }
-
- }
-
- /** Returns whether or not the LSQ unit is stalled. */
- bool isStalled() { return stalled; }
-};
-
-template <class Impl>
-template <class T>
-Fault
-OzoneLWLSQ<Impl>::read(MemReqPtr &req, T &data, int load_idx)
-{
- //Depending on issue2execute delay a squashed load could
- //execute if it is found to be squashed in the same
- //cycle it is scheduled to execute
- typename m5::hash_map<int, LQIt, HashFn>::iterator
- lq_hash_it = LQItHash.find(load_idx);
- assert(lq_hash_it != LQItHash.end());
- DynInstPtr inst = (*(*lq_hash_it).second);
-
- if (inst->isExecuted()) {
- panic("Should not reach this point with split ops!");
-
- memcpy(&data,req->data,req->size);
-
- return NoFault;
- }
-
- // Make sure this isn't an uncacheable access
- // A bit of a hackish way to get uncached accesses to work only if they're
- // at the head of the LSQ and are ready to commit (at the head of the ROB
- // too).
- // @todo: Fix uncached accesses.
- if (req->flags & UNCACHEABLE &&
- (inst != loadQueue.back() || !inst->reachedCommit)) {
- DPRINTF(OzoneLSQ, "[sn:%lli] Uncached load and not head of "
- "commit/LSQ!\n",
- inst->seqNum);
- be->rescheduleMemInst(inst);
- return TheISA::genMachineCheckFault();
- }
-
- // Check the SQ for any previous stores that might lead to forwarding
- SQIt sq_it = storeQueue.begin();
- int store_size = 0;
-
- DPRINTF(OzoneLSQ, "Read called, load idx: %i addr: %#x\n",
- load_idx, req->paddr);
-
- while (sq_it != storeQueue.end() && (*sq_it).inst->seqNum > inst->seqNum)
- ++sq_it;
-
- while (1) {
- // End once we've reached the top of the LSQ
- if (sq_it == storeQueue.end()) {
- break;
- }
-
- assert((*sq_it).inst);
-
- store_size = (*sq_it).size;
-
- if (store_size == 0) {
- sq_it++;
- continue;
- }
-
- // Check if the store data is within the lower and upper bounds of
- // addresses that the request needs.
- bool store_has_lower_limit =
- req->vaddr >= (*sq_it).inst->effAddr;
- bool store_has_upper_limit =
- (req->vaddr + req->size) <= ((*sq_it).inst->effAddr +
- store_size);
- bool lower_load_has_store_part =
- req->vaddr < ((*sq_it).inst->effAddr +
- store_size);
- bool upper_load_has_store_part =
- (req->vaddr + req->size) > (*sq_it).inst->effAddr;
-
- // If the store's data has all of the data needed, we can forward.
- if (store_has_lower_limit && store_has_upper_limit) {
-
- int shift_amt = req->vaddr & (store_size - 1);
- // Assumes byte addressing
- shift_amt = shift_amt << 3;
-
- // Cast this to type T?
- data = (*sq_it).data >> shift_amt;
-
- req->cmd = Read;
- assert(!req->completionEvent);
- req->completionEvent = NULL;
- req->time = curTick;
- assert(!req->data);
- req->data = new uint8_t[64];
-
- memcpy(req->data, &data, req->size);
-
- DPRINTF(OzoneLSQ, "Forwarding from store [sn:%lli] to load to "
- "[sn:%lli] addr %#x, data %#x\n",
- (*sq_it).inst->seqNum, inst->seqNum, req->vaddr, *(req->data));
-
- typename BackEnd::LdWritebackEvent *wb =
- new typename BackEnd::LdWritebackEvent(inst,
- be);
-
- // We'll say this has a 1 cycle load-store forwarding latency
- // for now.
- // FIXME - Need to make this a parameter.
- wb->schedule(curTick);
-
- // Should keep track of stat for forwarded data
- return NoFault;
- } else if ((store_has_lower_limit && lower_load_has_store_part) ||
- (store_has_upper_limit && upper_load_has_store_part) ||
- (lower_load_has_store_part && upper_load_has_store_part)) {
- // This is the partial store-load forwarding case where a store
- // has only part of the load's data.
-
- // If it's already been written back, then don't worry about
- // stalling on it.
- if ((*sq_it).completed) {
- sq_it++;
- break;
- }
-
- // Must stall load and force it to retry, so long as it's the oldest
- // load that needs to do so.
- if (!stalled ||
- (stalled &&
- inst->seqNum <
- (*stallingLoad)->seqNum)) {
- stalled = true;
- stallingStoreIsn = (*sq_it).inst->seqNum;
- stallingLoad = (*lq_hash_it).second;
- }
-
- // Tell IQ/mem dep unit that this instruction will need to be
- // rescheduled eventually
- be->rescheduleMemInst(inst);
-
- DPRINTF(OzoneLSQ, "Load-store forwarding mis-match. "
- "Store [sn:%lli] to load addr %#x\n",
- (*sq_it).inst->seqNum, req->vaddr);
-
- return NoFault;
- }
- sq_it++;
- }
-
- // If there's no forwarding case, then go access memory
- DPRINTF(OzoneLSQ, "Doing functional access for inst PC %#x\n",
- inst->readPC());
-
- // Setup MemReq pointer
- req->cmd = Read;
- req->completionEvent = NULL;
- req->time = curTick;
- assert(!req->data);
- req->data = new uint8_t[64];
- Fault fault = cpu->read(req, data);
- memcpy(req->data, &data, sizeof(T));
-
- ++usedPorts;
-
- // if we have a cache, do cache access too
- if (dcacheInterface) {
- if (dcacheInterface->isBlocked()) {
- // There's an older load that's already going to squash.
- if (isLoadBlocked && blockedLoadSeqNum < inst->seqNum)
- return NoFault;
-
- isLoadBlocked = true;
- loadBlockedHandled = false;
- blockedLoadSeqNum = inst->seqNum;
- // No fault occurred, even though the interface is blocked.
- return NoFault;
- }
-
- DPRINTF(OzoneLSQ, "D-cache: PC:%#x reading from paddr:%#x "
- "vaddr:%#x flags:%i\n",
- inst->readPC(), req->paddr, req->vaddr, req->flags);
-
- assert(!req->completionEvent);
- req->completionEvent =
- new typename BackEnd::LdWritebackEvent(inst, be);
-
- // Do Cache Access
- MemAccessResult result = dcacheInterface->access(req);
-
- // Ugly hack to get an event scheduled *only* if the access is
- // a miss. We really should add first-class support for this
- // at some point.
- // @todo: Probably should support having no events
- if (result != MA_HIT) {
- DPRINTF(OzoneLSQ, "D-cache miss!\n");
- DPRINTF(Activity, "Activity: ld accessing mem miss [sn:%lli]\n",
- inst->seqNum);
-
- lastDcacheStall = curTick;
-
- _status = DcacheMissStall;
-
- } else {
- DPRINTF(OzoneLSQ, "D-cache hit!\n");
- }
- } else {
- fatal("Must use D-cache with new memory system");
- }
-
- return NoFault;
-}
-
-template <class Impl>
-template <class T>
-Fault
-OzoneLWLSQ<Impl>::write(MemReqPtr &req, T &data, int store_idx)
-{
- SQHashIt sq_hash_it = SQItHash.find(store_idx);
- assert(sq_hash_it != SQItHash.end());
-
- SQIt sq_it = (*sq_hash_it).second;
- assert((*sq_it).inst);
-
- DPRINTF(OzoneLSQ, "Doing write to store idx %i, addr %#x data %#x"
- " | [sn:%lli]\n",
- store_idx, req->paddr, data, (*sq_it).inst->seqNum);
-
- (*sq_it).req = req;
- (*sq_it).size = sizeof(T);
- (*sq_it).data = data;
- assert(!req->data);
- req->data = new uint8_t[64];
- memcpy(req->data, (uint8_t *)&(*sq_it).data, req->size);
-
- // This function only writes the data to the store queue, so no fault
- // can happen here.
- return NoFault;
-}
-
-#endif // __CPU_OZONE_LW_LSQ_HH__
+++ /dev/null
-/*
- * Copyright (c) 2004-2006 The Regents of The University of Michigan
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "arch/isa_traits.hh"
-#include "base/str.hh"
-#include "cpu/ozone/lw_lsq.hh"
-#include "cpu/checker/cpu.hh"
-
-template <class Impl>
-OzoneLWLSQ<Impl>::StoreCompletionEvent::StoreCompletionEvent(DynInstPtr &_inst,
- BackEnd *_be,
- Event *wb_event,
- OzoneLWLSQ<Impl> *lsq_ptr)
- : Event(&mainEventQueue),
- inst(_inst),
- be(_be),
- wbEvent(wb_event),
- miss(false),
- lsqPtr(lsq_ptr)
-{
- this->setFlags(Event::AutoDelete);
-}
-
-template <class Impl>
-void
-OzoneLWLSQ<Impl>::StoreCompletionEvent::process()
-{
- DPRINTF(OzoneLSQ, "Cache miss complete for store [sn:%lli]\n",
- inst->seqNum);
-
- //lsqPtr->removeMSHR(lsqPtr->storeQueue[storeIdx].inst->seqNum);
-
-// lsqPtr->cpu->wakeCPU();
- if (lsqPtr->isSwitchedOut()) {
- if (wbEvent)
- delete wbEvent;
-
- return;
- }
-
- if (wbEvent) {
- wbEvent->process();
- delete wbEvent;
- }
-
- lsqPtr->completeStore(inst->sqIdx);
- if (miss)
- be->removeDcacheMiss(inst);
-}
-
-template <class Impl>
-const char *
-OzoneLWLSQ<Impl>::StoreCompletionEvent::description()
-{
- return "LSQ store completion event";
-}
-
-template <class Impl>
-OzoneLWLSQ<Impl>::OzoneLWLSQ()
- : loads(0), stores(0), storesToWB(0), stalled(false), isLoadBlocked(false),
- loadBlockedHandled(false)
-{
-}
-
-template<class Impl>
-void
-OzoneLWLSQ<Impl>::init(Params *params, unsigned maxLQEntries,
- unsigned maxSQEntries, unsigned id)
-{
- DPRINTF(OzoneLSQ, "Creating OzoneLWLSQ%i object.\n",id);
-
- lsqID = id;
-
- LQEntries = maxLQEntries;
- SQEntries = maxSQEntries;
-
- for (int i = 0; i < LQEntries * 2; i++) {
- LQIndices.push(i);
- SQIndices.push(i);
- }
-
- usedPorts = 0;
- cachePorts = params->cachePorts;
-
- dcacheInterface = params->dcacheInterface;
-
- loadFaultInst = storeFaultInst = memDepViolator = NULL;
-
- blockedLoadSeqNum = 0;
-}
-
-template<class Impl>
-std::string
-OzoneLWLSQ<Impl>::name() const
-{
- return "lsqunit";
-}
-
-template<class Impl>
-void
-OzoneLWLSQ<Impl>::clearLQ()
-{
- loadQueue.clear();
-}
-
-template<class Impl>
-void
-OzoneLWLSQ<Impl>::clearSQ()
-{
- storeQueue.clear();
-}
-/*
-template<class Impl>
-void
-OzoneLWLSQ<Impl>::setPageTable(PageTable *pt_ptr)
-{
- DPRINTF(OzoneLSQ, "Setting the page table pointer.\n");
- pTable = pt_ptr;
-}
-*/
-template<class Impl>
-void
-OzoneLWLSQ<Impl>::resizeLQ(unsigned size)
-{
- assert( size >= LQEntries);
-
- if (size > LQEntries) {
- while (size > loadQueue.size()) {
- DynInstPtr dummy;
- loadQueue.push_back(dummy);
- LQEntries++;
- }
- } else {
- LQEntries = size;
- }
-
-}
-
-template<class Impl>
-void
-OzoneLWLSQ<Impl>::resizeSQ(unsigned size)
-{
- if (size > SQEntries) {
- while (size > storeQueue.size()) {
- SQEntry dummy;
- storeQueue.push_back(dummy);
- SQEntries++;
- }
- } else {
- SQEntries = size;
- }
-}
-
-template <class Impl>
-void
-OzoneLWLSQ<Impl>::insert(DynInstPtr &inst)
-{
- // Make sure we really have a memory reference.
- assert(inst->isMemRef());
-
- // Make sure it's one of the two classes of memory references.
- assert(inst->isLoad() || inst->isStore());
-
- if (inst->isLoad()) {
- insertLoad(inst);
- } else {
- insertStore(inst);
- }
-}
-
-template <class Impl>
-void
-OzoneLWLSQ<Impl>::insertLoad(DynInstPtr &load_inst)
-{
- assert(loads < LQEntries * 2);
- assert(!LQIndices.empty());
- int load_index = LQIndices.front();
- LQIndices.pop();
-
- DPRINTF(OzoneLSQ, "Inserting load PC %#x, idx:%i [sn:%lli]\n",
- load_inst->readPC(), load_index, load_inst->seqNum);
-
- load_inst->lqIdx = load_index;
-
- loadQueue.push_front(load_inst);
- LQItHash[load_index] = loadQueue.begin();
-
- ++loads;
-}
-
-template <class Impl>
-void
-OzoneLWLSQ<Impl>::insertStore(DynInstPtr &store_inst)
-{
- // Make sure it is not full before inserting an instruction.
- assert(stores - storesToWB < SQEntries);
-
- assert(!SQIndices.empty());
- int store_index = SQIndices.front();
- SQIndices.pop();
-
- DPRINTF(OzoneLSQ, "Inserting store PC %#x, idx:%i [sn:%lli]\n",
- store_inst->readPC(), store_index, store_inst->seqNum);
-
- store_inst->sqIdx = store_index;
- SQEntry entry(store_inst);
- if (loadQueue.empty()) {
- entry.lqIt = loadQueue.end();
- } else {
- entry.lqIt = loadQueue.begin();
- }
- storeQueue.push_front(entry);
-
- SQItHash[store_index] = storeQueue.begin();
-
- ++stores;
-}
-
-template <class Impl>
-typename Impl::DynInstPtr
-OzoneLWLSQ<Impl>::getMemDepViolator()
-{
- DynInstPtr temp = memDepViolator;
-
- memDepViolator = NULL;
-
- return temp;
-}
-
-template <class Impl>
-unsigned
-OzoneLWLSQ<Impl>::numFreeEntries()
-{
- unsigned free_lq_entries = LQEntries - loads;
- unsigned free_sq_entries = SQEntries - stores;
-
- // Both the LQ and SQ entries have an extra dummy entry to differentiate
- // empty/full conditions. Subtract 1 from the free entries.
- if (free_lq_entries < free_sq_entries) {
- return free_lq_entries - 1;
- } else {
- return free_sq_entries - 1;
- }
-}
-
-template <class Impl>
-int
-OzoneLWLSQ<Impl>::numLoadsReady()
-{
- int retval = 0;
- LQIt lq_it = loadQueue.begin();
- LQIt end_it = loadQueue.end();
-
- while (lq_it != end_it) {
- if ((*lq_it)->readyToIssue()) {
- ++retval;
- }
- }
-
- return retval;
-}
-
-template <class Impl>
-Fault
-OzoneLWLSQ<Impl>::executeLoad(DynInstPtr &inst)
-{
- // Execute a specific load.
- Fault load_fault = NoFault;
-
- DPRINTF(OzoneLSQ, "Executing load PC %#x, [sn:%lli]\n",
- inst->readPC(),inst->seqNum);
-
- // Make sure it's really in the list.
- // Normally it should always be in the list. However,
- /* due to a syscall it may not be the list.
-#ifdef DEBUG
- int i = loadHead;
- while (1) {
- if (i == loadTail && !find(inst)) {
- assert(0 && "Load not in the queue!");
- } else if (loadQueue[i] == inst) {
- break;
- }
-
- i = i + 1;
- if (i >= LQEntries) {
- i = 0;
- }
- }
-#endif // DEBUG*/
-
- load_fault = inst->initiateAcc();
-
- // Might want to make sure that I'm not overwriting a previously faulting
- // instruction that hasn't been checked yet.
- // Actually probably want the oldest faulting load
- if (load_fault != NoFault) {
- DPRINTF(OzoneLSQ, "Load [sn:%lli] has a fault\n", inst->seqNum);
- // Maybe just set it as can commit here, although that might cause
- // some other problems with sending traps to the ROB too quickly.
- be->instToCommit(inst);
-// iewStage->activityThisCycle();
- }
-
- return load_fault;
-}
-
-template <class Impl>
-Fault
-OzoneLWLSQ<Impl>::executeStore(DynInstPtr &store_inst)
-{
- // Make sure that a store exists.
- assert(stores != 0);
-
- int store_idx = store_inst->sqIdx;
- SQHashIt sq_hash_it = SQItHash.find(store_idx);
- assert(sq_hash_it != SQItHash.end());
- DPRINTF(OzoneLSQ, "Executing store PC %#x [sn:%lli]\n",
- store_inst->readPC(), store_inst->seqNum);
-
- SQIt sq_it = (*sq_hash_it).second;
-
- Fault store_fault = store_inst->initiateAcc();
-
- // Store size should now be available. Use it to get proper offset for
- // addr comparisons.
- int size = (*sq_it).size;
-
- if (size == 0) {
- DPRINTF(OzoneLSQ,"Fault on Store PC %#x, [sn:%lli],Size = 0\n",
- store_inst->readPC(),store_inst->seqNum);
-
- return store_fault;
- }
-
- assert(store_fault == NoFault);
-
- if (!storeFaultInst) {
- if (store_fault != NoFault) {
- panic("Fault in a store instruction!");
- storeFaultInst = store_inst;
- } else if (store_inst->isStoreConditional()) {
- // Store conditionals need to set themselves as able to
- // writeback if we haven't had a fault by here.
- (*sq_it).canWB = true;
-
- ++storesToWB;
- DPRINTF(OzoneLSQ, "Nonspeculative store! storesToWB:%i\n",
- storesToWB);
- }
- }
-
- LQIt lq_it = --(loadQueue.end());
-
- if (!memDepViolator) {
- while (lq_it != loadQueue.end()) {
- if ((*lq_it)->seqNum < store_inst->seqNum) {
- lq_it--;
- continue;
- }
- // Actually should only check loads that have actually executed
- // Might be safe because effAddr is set to InvalAddr when the
- // dyn inst is created.
-
- // Must actually check all addrs in the proper size range
- // Which is more correct than needs to be. What if for now we just
- // assume all loads are quad-word loads, and do the addr based
- // on that.
- // @todo: Fix this, magic number being used here
- if (((*lq_it)->effAddr >> 8) ==
- (store_inst->effAddr >> 8)) {
- // A load incorrectly passed this store. Squash and refetch.
- // For now return a fault to show that it was unsuccessful.
- memDepViolator = (*lq_it);
-
- return TheISA::genMachineCheckFault();
- }
-
- lq_it--;
- }
-
- // If we've reached this point, there was no violation.
- memDepViolator = NULL;
- }
-
- return store_fault;
-}
-
-template <class Impl>
-void
-OzoneLWLSQ<Impl>::commitLoad()
-{
- assert(!loadQueue.empty());
-
- DPRINTF(OzoneLSQ, "[sn:%lli] Committing head load instruction, PC %#x\n",
- loadQueue.back()->seqNum, loadQueue.back()->readPC());
-
- LQIndices.push(loadQueue.back()->lqIdx);
- LQItHash.erase(loadQueue.back()->lqIdx);
-
- loadQueue.pop_back();
-
- --loads;
-}
-
-template <class Impl>
-void
-OzoneLWLSQ<Impl>::commitLoads(InstSeqNum &youngest_inst)
-{
- assert(loads == 0 || !loadQueue.empty());
-
- while (loads != 0 &&
- loadQueue.back()->seqNum <= youngest_inst) {
- commitLoad();
- }
-}
-
-template <class Impl>
-void
-OzoneLWLSQ<Impl>::commitStores(InstSeqNum &youngest_inst)
-{
- assert(stores == 0 || !storeQueue.empty());
-
- SQIt sq_it = --(storeQueue.end());
- while (!storeQueue.empty() && sq_it != storeQueue.end()) {
- assert((*sq_it).inst);
- if (!(*sq_it).canWB) {
- if ((*sq_it).inst->seqNum > youngest_inst) {
- break;
- }
- ++storesToWB;
-
- DPRINTF(OzoneLSQ, "Marking store as able to write back, PC "
- "%#x [sn:%lli], storesToWB:%i\n",
- (*sq_it).inst->readPC(),
- (*sq_it).inst->seqNum,
- storesToWB);
-
- (*sq_it).canWB = true;
- }
-
- sq_it--;
- }
-}
-
-template <class Impl>
-void
-OzoneLWLSQ<Impl>::writebackStores()
-{
- SQIt sq_it = --(storeQueue.end());
- while (storesToWB > 0 &&
- sq_it != storeQueue.end() &&
- (*sq_it).inst &&
- (*sq_it).canWB &&
- usedPorts < cachePorts) {
-
- DynInstPtr inst = (*sq_it).inst;
-
- if ((*sq_it).size == 0 && !(*sq_it).completed) {
- sq_it--;
- completeStore(inst->sqIdx);
-
- continue;
- }
-
- if (inst->isDataPrefetch() || (*sq_it).committed) {
- sq_it--;
- continue;
- }
-
- if (dcacheInterface && dcacheInterface->isBlocked()) {
- DPRINTF(OzoneLSQ, "Unable to write back any more stores, cache"
- " is blocked!\n");
- break;
- }
-
- ++usedPorts;
-
- assert((*sq_it).req);
- assert(!(*sq_it).committed);
-
- (*sq_it).committed = true;
-
- MemReqPtr req = (*sq_it).req;
-
- req->cmd = Write;
- req->completionEvent = NULL;
- req->time = curTick;
-
- switch((*sq_it).size) {
- case 1:
- cpu->write(req, (uint8_t &)(*sq_it).data);
- break;
- case 2:
- cpu->write(req, (uint16_t &)(*sq_it).data);
- break;
- case 4:
- cpu->write(req, (uint32_t &)(*sq_it).data);
- break;
- case 8:
- cpu->write(req, (uint64_t &)(*sq_it).data);
- break;
- default:
- panic("Unexpected store size!\n");
- }
- if (!(req->flags & LOCKED)) {
- (*sq_it).inst->setCompleted();
- if (cpu->checker) {
- cpu->checker->tick((*sq_it).inst);
- }
- }
-
- DPRINTF(OzoneLSQ, "D-Cache: Writing back store idx:%i PC:%#x "
- "to Addr:%#x, data:%#x [sn:%lli]\n",
- inst->sqIdx,inst->readPC(),
- req->paddr, *(req->data),
- inst->seqNum);
-
- if (dcacheInterface) {
- assert(!req->completionEvent);
- StoreCompletionEvent *store_event = new
- StoreCompletionEvent(inst, be, NULL, this);
- req->completionEvent = store_event;
-
- MemAccessResult result = dcacheInterface->access(req);
-
- if (isStalled() &&
- inst->seqNum == stallingStoreIsn) {
- DPRINTF(OzoneLSQ, "Unstalling, stalling store [sn:%lli] "
- "load [sn:%lli]\n",
- stallingStoreIsn, (*stallingLoad)->seqNum);
- stalled = false;
- stallingStoreIsn = 0;
- be->replayMemInst((*stallingLoad));
- }
-
- if (result != MA_HIT && dcacheInterface->doEvents()) {
- store_event->miss = true;
- typename BackEnd::LdWritebackEvent *wb = NULL;
- if (req->flags & LOCKED) {
- wb = new typename BackEnd::LdWritebackEvent(inst,
- be);
- store_event->wbEvent = wb;
- }
-
- DPRINTF(OzoneLSQ,"D-Cache Write Miss!\n");
-
-// DPRINTF(Activity, "Active st accessing mem miss [sn:%lli]\n",
-// inst->seqNum);
-
- be->addDcacheMiss(inst);
-
- lastDcacheStall = curTick;
-
- _status = DcacheMissStall;
-
- // Increment stat here or something
-
- sq_it--;
- } else {
- DPRINTF(OzoneLSQ,"D-Cache: Write Hit on idx:%i !\n",
- inst->sqIdx);
-
-// DPRINTF(Activity, "Active st accessing mem hit [sn:%lli]\n",
-// inst->seqNum);
-
- if (req->flags & LOCKED) {
- // Stx_C does not generate a system port
- // transaction in the 21264, but that might be
- // hard to accomplish in this model.
-
- typename BackEnd::LdWritebackEvent *wb =
- new typename BackEnd::LdWritebackEvent(inst,
- be);
- store_event->wbEvent = wb;
- }
- sq_it--;
- }
- } else {
- panic("Must HAVE DCACHE!!!!!\n");
- }
- }
-
- // Not sure this should set it to 0.
- usedPorts = 0;
-
- assert(stores >= 0 && storesToWB >= 0);
-}
-
-template <class Impl>
-void
-OzoneLWLSQ<Impl>::squash(const InstSeqNum &squashed_num)
-{
- DPRINTF(OzoneLSQ, "Squashing until [sn:%lli]!"
- "(Loads:%i Stores:%i)\n",squashed_num,loads,stores);
-
-
- LQIt lq_it = loadQueue.begin();
-
- while (loads != 0 && (*lq_it)->seqNum > squashed_num) {
- assert(!loadQueue.empty());
- // Clear the smart pointer to make sure it is decremented.
- DPRINTF(OzoneLSQ,"Load Instruction PC %#x squashed, "
- "[sn:%lli]\n",
- (*lq_it)->readPC(),
- (*lq_it)->seqNum);
-
- if (isStalled() && lq_it == stallingLoad) {
- stalled = false;
- stallingStoreIsn = 0;
- stallingLoad = NULL;
- }
-
- --loads;
-
- // Inefficient!
- LQHashIt lq_hash_it = LQItHash.find((*lq_it)->lqIdx);
- assert(lq_hash_it != LQItHash.end());
- LQItHash.erase(lq_hash_it);
- LQIndices.push((*lq_it)->lqIdx);
- loadQueue.erase(lq_it++);
- }
-
- if (isLoadBlocked) {
- if (squashed_num < blockedLoadSeqNum) {
- isLoadBlocked = false;
- loadBlockedHandled = false;
- blockedLoadSeqNum = 0;
- }
- }
-
- SQIt sq_it = storeQueue.begin();
-
- while (stores != 0 && (*sq_it).inst->seqNum > squashed_num) {
- assert(!storeQueue.empty());
-
- if ((*sq_it).canWB) {
- break;
- }
-
- // Clear the smart pointer to make sure it is decremented.
- DPRINTF(OzoneLSQ,"Store Instruction PC %#x idx:%i squashed [sn:%lli]\n",
- (*sq_it).inst->readPC(), (*sq_it).inst->sqIdx,
- (*sq_it).inst->seqNum);
-
- // I don't think this can happen. It should have been cleared by the
- // stalling load.
- if (isStalled() &&
- (*sq_it).inst->seqNum == stallingStoreIsn) {
- panic("Is stalled should have been cleared by stalling load!\n");
- stalled = false;
- stallingStoreIsn = 0;
- }
-
- SQHashIt sq_hash_it = SQItHash.find((*sq_it).inst->sqIdx);
- assert(sq_hash_it != SQItHash.end());
- SQItHash.erase(sq_hash_it);
- SQIndices.push((*sq_it).inst->sqIdx);
- (*sq_it).inst = NULL;
- (*sq_it).canWB = 0;
-
- if ((*sq_it).req) {
- assert(!(*sq_it).req->completionEvent);
- }
- (*sq_it).req = NULL;
- --stores;
- storeQueue.erase(sq_it++);
- }
-}
-
-template <class Impl>
-void
-OzoneLWLSQ<Impl>::dumpInsts()
-{
- cprintf("Load store queue: Dumping instructions.\n");
- cprintf("Load queue size: %i\n", loads);
- cprintf("Load queue: ");
-
- LQIt lq_it = --(loadQueue.end());
-
- while (lq_it != loadQueue.end() && (*lq_it)) {
- cprintf("[sn:%lli] %#x ", (*lq_it)->seqNum,
- (*lq_it)->readPC());
-
- lq_it--;
- }
-
- cprintf("\nStore queue size: %i\n", stores);
- cprintf("Store queue: ");
-
- SQIt sq_it = --(storeQueue.end());
-
- while (sq_it != storeQueue.end() && (*sq_it).inst) {
- cprintf("[sn:%lli]\nPC:%#x\nSize:%i\nCommitted:%i\nCompleted:%i\ncanWB:%i\n",
- (*sq_it).inst->seqNum,
- (*sq_it).inst->readPC(),
- (*sq_it).size,
- (*sq_it).committed,
- (*sq_it).completed,
- (*sq_it).canWB);
-
- sq_it--;
- }
-
- cprintf("\n");
-}
-
-template <class Impl>
-void
-OzoneLWLSQ<Impl>::completeStore(int store_idx)
-{
- SQHashIt sq_hash_it = SQItHash.find(store_idx);
- assert(sq_hash_it != SQItHash.end());
- SQIt sq_it = (*sq_hash_it).second;
-
- assert((*sq_it).inst);
- (*sq_it).completed = true;
- DynInstPtr inst = (*sq_it).inst;
-
- --storesToWB;
-
- if (isStalled() &&
- inst->seqNum == stallingStoreIsn) {
- DPRINTF(OzoneLSQ, "Unstalling, stalling store [sn:%lli] "
- "load [sn:%lli]\n",
- stallingStoreIsn, (*stallingLoad)->seqNum);
- stalled = false;
- stallingStoreIsn = 0;
- be->replayMemInst((*stallingLoad));
- }
-
- DPRINTF(OzoneLSQ, "Completing store idx:%i [sn:%lli], storesToWB:%i\n",
- inst->sqIdx, inst->seqNum, storesToWB);
-
- assert(!storeQueue.empty());
- SQItHash.erase(sq_hash_it);
- SQIndices.push(inst->sqIdx);
- storeQueue.erase(sq_it);
- --stores;
-
- inst->setCompleted();
- if (cpu->checker) {
- cpu->checker->tick(inst);
- }
-}
-
-template <class Impl>
-void
-OzoneLWLSQ<Impl>::switchOut()
-{
- assert(storesToWB == 0);
- switchedOut = true;
- SQIt sq_it = --(storeQueue.end());
- while (storesToWB > 0 &&
- sq_it != storeQueue.end() &&
- (*sq_it).inst &&
- (*sq_it).canWB) {
-
- DynInstPtr inst = (*sq_it).inst;
-
- if ((*sq_it).size == 0 && !(*sq_it).completed) {
- sq_it--;
- continue;
- }
-
- // Store conditionals don't complete until *after* they have written
- // back. If it's here and not yet sent to memory, then don't bother
- // as it's not part of committed state.
- if (inst->isDataPrefetch() || (*sq_it).committed) {
- sq_it--;
- continue;
- } else if ((*sq_it).req->flags & LOCKED) {
- sq_it--;
- assert(!(*sq_it).canWB ||
- ((*sq_it).canWB && (*sq_it).req->flags & LOCKED));
- continue;
- }
-
- assert((*sq_it).req);
- assert(!(*sq_it).committed);
-
- MemReqPtr req = (*sq_it).req;
- (*sq_it).committed = true;
-
- req->cmd = Write;
- req->completionEvent = NULL;
- req->time = curTick;
- assert(!req->data);
- req->data = new uint8_t[64];
- memcpy(req->data, (uint8_t *)&(*sq_it).data, req->size);
-
- DPRINTF(OzoneLSQ, "Switching out : Writing back store idx:%i PC:%#x "
- "to Addr:%#x, data:%#x directly to memory [sn:%lli]\n",
- inst->sqIdx,inst->readPC(),
- req->paddr, *(req->data),
- inst->seqNum);
-
- switch((*sq_it).size) {
- case 1:
- cpu->write(req, (uint8_t &)(*sq_it).data);
- break;
- case 2:
- cpu->write(req, (uint16_t &)(*sq_it).data);
- break;
- case 4:
- cpu->write(req, (uint32_t &)(*sq_it).data);
- break;
- case 8:
- cpu->write(req, (uint64_t &)(*sq_it).data);
- break;
- default:
- panic("Unexpected store size!\n");
- }
- }
-
- // Clear the queue to free up resources
- storeQueue.clear();
- loadQueue.clear();
- loads = stores = storesToWB = 0;
-}
-
-template <class Impl>
-void
-OzoneLWLSQ<Impl>::takeOverFrom(ExecContext *old_xc)
-{
- // Clear out any old state. May be redundant if this is the first time
- // the CPU is being used.
- stalled = false;
- isLoadBlocked = false;
- loadBlockedHandled = false;
- switchedOut = false;
-
- // Could do simple checks here to see if indices are on twice
- while (!LQIndices.empty())
- LQIndices.pop();
- while (!SQIndices.empty())
- SQIndices.pop();
-
- for (int i = 0; i < LQEntries * 2; i++) {
- LQIndices.push(i);
- SQIndices.push(i);
- }
-
- usedPorts = 0;
-
- loadFaultInst = storeFaultInst = memDepViolator = NULL;
-
- blockedLoadSeqNum = 0;
-}
+++ /dev/null
-
-#ifndef __CPU_OZONE_NULL_PREDICTOR_HH__
-#define __CPU_OZONE_NULL_PREDICTOR_HH__
-
-#include "arch/isa_traits.hh"
-#include "cpu/inst_seq.hh"
-
-template <class Impl>
-class NullPredictor
-{
- public:
- typedef typename Impl::Params Params;
- typedef typename Impl::DynInstPtr DynInstPtr;
-
- NullPredictor(Params *p) { }
-
- struct BPredInfo {
- BPredInfo()
- : PC(0), nextPC(0)
- { }
-
- BPredInfo(const Addr &pc, const Addr &next_pc)
- : PC(pc), nextPC(next_pc)
- { }
-
- Addr PC;
- Addr nextPC;
- };
-
- BPredInfo lookup(Addr &PC) { return BPredInfo(PC, PC+4); }
-
- void undo(BPredInfo &bp_info) { return; }
-
- /**
- * Predicts whether or not the instruction is a taken branch, and the
- * target of the branch if it is taken.
- * @param inst The branch instruction.
- * @param PC The predicted PC is passed back through this parameter.
- * @param tid The thread id.
- * @return Returns if the branch is taken or not.
- */
- bool predict(DynInstPtr &inst, Addr &PC, unsigned tid)
- { return false; }
-
- /**
- * Tells the branch predictor to commit any updates until the given
- * sequence number.
- * @param done_sn The sequence number to commit any older updates up until.
- * @param tid The thread id.
- */
- void update(const InstSeqNum &done_sn, unsigned tid) { }
-
- /**
- * Squashes all outstanding updates until a given sequence number.
- * @param squashed_sn The sequence number to squash any younger updates up
- * until.
- * @param tid The thread id.
- */
- void squash(const InstSeqNum &squashed_sn, unsigned tid) { }
-
- /**
- * Squashes all outstanding updates until a given sequence number, and
- * corrects that sn's update with the proper address and taken/not taken.
- * @param squashed_sn The sequence number to squash any younger updates up
- * until.
- * @param corr_target The correct branch target.
- * @param actually_taken The correct branch direction.
- * @param tid The thread id.
- */
- void squash(const InstSeqNum &squashed_sn, const Addr &corr_target,
- bool actually_taken, unsigned tid)
- { }
-
-};
-
-#endif // __CPU_OZONE_NULL_PREDICTOR_HH__
+++ /dev/null
-/*
- * Copyright (c) 2006 The Regents of The University of Michigan
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __CPU_OZONE_OZONE_IMPL_HH__
-#define __CPU_OZONE_OZONE_IMPL_HH__
-
-#include "arch/alpha/isa_traits.hh"
-#include "cpu/o3/bpred_unit.hh"
-#include "cpu/ozone/back_end.hh"
-#include "cpu/ozone/front_end.hh"
-#include "cpu/ozone/inst_queue.hh"
-#include "cpu/ozone/lsq_unit.hh"
-#include "cpu/ozone/lw_lsq.hh"
-#include "cpu/ozone/lw_back_end.hh"
-#include "cpu/ozone/null_predictor.hh"
-#include "cpu/ozone/dyn_inst.hh"
-#include "cpu/ozone/simple_params.hh"
-
-template <class Impl>
-class OzoneCPU;
-
-template <class Impl>
-class OzoneDynInst;
-
-struct OzoneImpl {
- typedef SimpleParams Params;
- typedef OzoneCPU<OzoneImpl> OzoneCPU;
- typedef OzoneCPU FullCPU;
-
- // Would like to put these into their own area.
-// typedef NullPredictor BranchPred;
- typedef TwobitBPredUnit<OzoneImpl> BranchPred;
- typedef FrontEnd<OzoneImpl> FrontEnd;
- // Will need IQ, LSQ eventually
- typedef LWBackEnd<OzoneImpl> BackEnd;
-
- typedef InstQueue<OzoneImpl> InstQueue;
- typedef OzoneLWLSQ<OzoneImpl> LdstQueue;
-
- typedef OzoneDynInst<OzoneImpl> DynInst;
- typedef RefCountingPtr<DynInst> DynInstPtr;
-
- typedef uint64_t IssueStruct;
-
- enum {
- MaxThreads = 1
- };
-};
-
-#endif // __CPU_OZONE_OZONE_IMPL_HH__
+++ /dev/null
-
-#include "cpu/ozone/rename_table_impl.hh"
-#include "cpu/ozone/ozone_impl.hh"
-#include "cpu/ozone/simple_impl.hh"
-
-template class RenameTable<OzoneImpl>;
-template class RenameTable<SimpleImpl>;
+++ /dev/null
-/*
- * Copyright (c) 2006 The Regents of The University of Michigan
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __CPU_OZONE_RENAME_TABLE_HH__
-#define __CPU_OZONE_RENAME_TABLE_HH__
-
-#include "arch/isa_traits.hh"
-
-/** Rename table that holds the rename of each architectural register to
- * producing DynInst. Needs to support copying from one table to another.
- */
-
-template <class Impl>
-class RenameTable {
- public:
- typedef typename Impl::DynInstPtr DynInstPtr;
-
- RenameTable();
-
- void copyFrom(const RenameTable<Impl> &table_to_copy);
-
- DynInstPtr &operator [] (int index)
- { return table[index]; }
-
- DynInstPtr table[TheISA::TotalNumRegs];
-};
-
-#endif // __CPU_OZONE_RENAME_TABLE_HH__
+++ /dev/null
-
-#include <cstdlib> // Not really sure what to include to get NULL
-#include "cpu/ozone/rename_table.hh"
-
-template <class Impl>
-RenameTable<Impl>::RenameTable()
-{
- // Actually should set these to dummy dyn insts that have the initial value
- // and force their values to be initialized. This keeps everything the
- // same.
- for (int i = 0; i < TheISA::TotalNumRegs; ++i) {
- table[i] = NULL;
- }
-}
-
-template <class Impl>
-void
-RenameTable<Impl>::copyFrom(const RenameTable<Impl> &table_to_copy)
-{
- for (int i = 0; i < TheISA::TotalNumRegs; ++i) {
- table[i] = table_to_copy.table[i];
- }
-}
+++ /dev/null
-/*
- * Copyright (c) 2006 The Regents of The University of Michigan
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __CPU_OZONE_SIMPLE_IMPL_HH__
-#define __CPU_OZONE_SIMPLE_IMPL_HH__
-
-#include "arch/isa_traits.hh"
-#include "cpu/o3/bpred_unit.hh"
-#include "cpu/ozone/cpu.hh"
-#include "cpu/ozone/front_end.hh"
-#include "cpu/ozone/inorder_back_end.hh"
-#include "cpu/ozone/null_predictor.hh"
-#include "cpu/ozone/dyn_inst.hh"
-#include "cpu/ozone/simple_params.hh"
-
-//template <class Impl>
-//class OzoneCPU;
-
-template <class Impl>
-class OzoneDynInst;
-
-struct SimpleImpl {
- typedef SimpleParams Params;
- typedef OzoneCPU<SimpleImpl> OzoneCPU;
- typedef OzoneCPU FullCPU;
-
- // Would like to put these into their own area.
-// typedef NullPredictor BranchPred;
- typedef TwobitBPredUnit<SimpleImpl> BranchPred;
- typedef FrontEnd<SimpleImpl> FrontEnd;
- // Will need IQ, LSQ eventually
- typedef InorderBackEnd<SimpleImpl> BackEnd;
-
- typedef OzoneDynInst<SimpleImpl> DynInst;
- typedef RefCountingPtr<DynInst> DynInstPtr;
-
- typedef uint64_t IssueStruct;
-
- enum {
- MaxThreads = 1
- };
-};
-
-#endif // __CPU_OZONE_SIMPLE_IMPL_HH__
+++ /dev/null
-
-
-#ifndef __CPU_OZONE_SIMPLE_PARAMS_HH__
-#define __CPU_OZONE_SIMPLE_PARAMS_HH__
-
-#include "cpu/ozone/cpu.hh"
-
-//Forward declarations
-class AlphaDTB;
-class AlphaITB;
-class FUPool;
-class FunctionalMemory;
-class MemInterface;
-class PageTable;
-class Process;
-class System;
-
-/**
- * This file defines the parameters that will be used for the OzoneCPU.
- * This must be defined externally so that the Impl can have a params class
- * defined that it can pass to all of the individual stages.
- */
-
-class SimpleParams : public BaseCPU::Params
-{
- public:
-
-#if FULL_SYSTEM
- AlphaITB *itb; AlphaDTB *dtb;
-#else
- std::vector<Process *> workload;
-// Process *process;
-#endif // FULL_SYSTEM
-
- //Page Table
- PageTable *pTable;
-
- FunctionalMemory *mem;
-
- //
- // Caches
- //
- MemInterface *icacheInterface;
- MemInterface *dcacheInterface;
-
- unsigned cachePorts;
- unsigned width;
- unsigned frontEndWidth;
- unsigned backEndWidth;
- unsigned backEndSquashLatency;
- unsigned backEndLatency;
- unsigned maxInstBufferSize;
- unsigned numPhysicalRegs;
- unsigned maxOutstandingMemOps;
- //
- // Fetch
- //
- unsigned decodeToFetchDelay;
- unsigned renameToFetchDelay;
- unsigned iewToFetchDelay;
- unsigned commitToFetchDelay;
- unsigned fetchWidth;
-
- //
- // Decode
- //
- unsigned renameToDecodeDelay;
- unsigned iewToDecodeDelay;
- unsigned commitToDecodeDelay;
- unsigned fetchToDecodeDelay;
- unsigned decodeWidth;
-
- //
- // Rename
- //
- unsigned iewToRenameDelay;
- unsigned commitToRenameDelay;
- unsigned decodeToRenameDelay;
- unsigned renameWidth;
-
- //
- // IEW
- //
- unsigned commitToIEWDelay;
- unsigned renameToIEWDelay;
- unsigned issueToExecuteDelay;
- unsigned issueWidth;
- unsigned executeWidth;
- unsigned executeIntWidth;
- unsigned executeFloatWidth;
- unsigned executeBranchWidth;
- unsigned executeMemoryWidth;
- FUPool *fuPool;
-
- //
- // Commit
- //
- unsigned iewToCommitDelay;
- unsigned renameToROBDelay;
- unsigned commitWidth;
- unsigned squashWidth;
-
- //
- // Branch predictor (BP & BTB)
- //
- unsigned localPredictorSize;
- unsigned localCtrBits;
- unsigned localHistoryTableSize;
- unsigned localHistoryBits;
- unsigned globalPredictorSize;
- unsigned globalCtrBits;
- unsigned globalHistoryBits;
- unsigned choicePredictorSize;
- unsigned choiceCtrBits;
-
- unsigned BTBEntries;
- unsigned BTBTagSize;
-
- unsigned RASSize;
-
- //
- // Load store queue
- //
- unsigned LQEntries;
- unsigned SQEntries;
-
- //
- // Memory dependence
- //
- unsigned SSITSize;
- unsigned LFSTSize;
-
- //
- // Miscellaneous
- //
- unsigned numPhysIntRegs;
- unsigned numPhysFloatRegs;
- unsigned numIQEntries;
- unsigned numROBEntries;
-
- bool decoupledFrontEnd;
- int dispatchWidth;
- int wbWidth;
-
- //SMT Parameters
- unsigned smtNumFetchingThreads;
-
- std::string smtFetchPolicy;
-
- std::string smtIQPolicy;
- unsigned smtIQThreshold;
-
- std::string smtLSQPolicy;
- unsigned smtLSQThreshold;
-
- std::string smtCommitPolicy;
-
- std::string smtROBPolicy;
- unsigned smtROBThreshold;
-
- // Probably can get this from somewhere.
- unsigned instShiftAmt;
-};
-
-#endif // __CPU_OZONE_SIMPLE_PARAMS_HH__
+++ /dev/null
-/*
- * Copyright (c) 2006 The Regents of The University of Michigan
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __CPU_OZONE_THREAD_STATE_HH__
-#define __CPU_OZONE_THREAD_STATE_HH__
-
-#include "arch/faults.hh"
-#include "arch/isa_traits.hh"
-#include "cpu/exec_context.hh"
-#include "cpu/thread_state.hh"
-#include "sim/process.hh"
-
-class Event;
-//class Process;
-
-#if FULL_SYSTEM
-class EndQuiesceEvent;
-class FunctionProfile;
-class ProfileNode;
-#else
-class Process;
-class FunctionalMemory;
-#endif
-
-// Maybe this ozone thread state should only really have committed state?
-// I need to think about why I'm using this and what it's useful for. Clearly
-// has benefits for SMT; basically serves same use as CPUExecContext.
-// Makes the ExecContext proxy easier. Gives organization/central access point
-// to state of a thread that can be accessed normally (i.e. not in-flight
-// stuff within a OoO processor). Does this need an XC proxy within it?
-template <class Impl>
-struct OzoneThreadState : public ThreadState {
- typedef typename ExecContext::Status Status;
- typedef typename Impl::FullCPU FullCPU;
- typedef TheISA::MiscReg MiscReg;
-
-#if FULL_SYSTEM
- OzoneThreadState(FullCPU *_cpu, int _thread_num, FunctionalMemory *_mem)
- : ThreadState(-1, _thread_num, _mem),
- inSyscall(0), trapPending(0)
- {
- memset(®s, 0, sizeof(TheISA::RegFile));
- }
-#else
- OzoneThreadState(FullCPU *_cpu, int _thread_num, Process *_process, int _asid)
- : ThreadState(-1, _thread_num, _process->getMemory(), _process, _asid),
- cpu(_cpu), inSyscall(0), trapPending(0)
- {
- memset(®s, 0, sizeof(TheISA::RegFile));
- }
-
- OzoneThreadState(FullCPU *_cpu, int _thread_num, FunctionalMemory *_mem,
- int _asid)
- : ThreadState(-1, _thread_num, _mem, NULL, _asid),
- cpu(_cpu), inSyscall(0), trapPending(0)
- {
- memset(®s, 0, sizeof(TheISA::RegFile));
- }
-#endif
-
- Status _status;
-
- Status status() const { return _status; }
-
- void setStatus(Status new_status) { _status = new_status; }
-
- RenameTable<Impl> renameTable;
- Addr PC;
- Addr nextPC;
-
- // Current instruction
- TheISA::MachInst inst;
-
- TheISA::RegFile regs;
-
- typename Impl::FullCPU *cpu;
-
- bool inSyscall;
-
- bool trapPending;
-
- ExecContext *xcProxy;
-
- ExecContext *getXCProxy() { return xcProxy; }
-
-#if !FULL_SYSTEM
-
- Fault dummyTranslation(MemReqPtr &req)
- {
-#if 0
- assert((req->vaddr >> 48 & 0xffff) == 0);
-#endif
-
- // put the asid in the upper 16 bits of the paddr
- req->paddr = req->vaddr & ~((Addr)0xffff << sizeof(Addr) * 8 - 16);
- req->paddr = req->paddr | (Addr)req->asid << sizeof(Addr) * 8 - 16;
- return NoFault;
- }
- Fault translateInstReq(MemReqPtr &req)
- {
- return dummyTranslation(req);
- }
- Fault translateDataReadReq(MemReqPtr &req)
- {
- return dummyTranslation(req);
- }
- Fault translateDataWriteReq(MemReqPtr &req)
- {
- return dummyTranslation(req);
- }
-#else
- Fault translateInstReq(MemReqPtr &req)
- {
- return cpu->itb->translate(req);
- }
-
- Fault translateDataReadReq(MemReqPtr &req)
- {
- return cpu->dtb->translate(req, false);
- }
-
- Fault translateDataWriteReq(MemReqPtr &req)
- {
- return cpu->dtb->translate(req, true);
- }
-#endif
-
- MiscReg readMiscReg(int misc_reg)
- {
- return regs.miscRegs.readReg(misc_reg);
- }
-
- MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault)
- {
- return regs.miscRegs.readRegWithEffect(misc_reg, fault, xcProxy);
- }
-
- Fault setMiscReg(int misc_reg, const MiscReg &val)
- {
- return regs.miscRegs.setReg(misc_reg, val);
- }
-
- Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val)
- {
- return regs.miscRegs.setRegWithEffect(misc_reg, val, xcProxy);
- }
-
- uint64_t readPC()
- { return PC; }
-
- void setPC(uint64_t val)
- { PC = val; }
-
- uint64_t readNextPC()
- { return nextPC; }
-
- void setNextPC(uint64_t val)
- { nextPC = val; }
-
- bool misspeculating() { return false; }
-
- void setInst(TheISA::MachInst _inst) { inst = _inst; }
-
- Counter readFuncExeInst() { return funcExeInst; }
-
- void setFuncExeInst(Counter new_val) { funcExeInst = new_val; }
-};
-
-#endif // __CPU_OZONE_THREAD_STATE_HH__
+++ /dev/null
-
-#include "cpu/exec_context.hh"
-#include "cpu/quiesce_event.hh"
-
-EndQuiesceEvent::EndQuiesceEvent(ExecContext *_xc)
- : Event(&mainEventQueue), xc(_xc)
-{
-}
-
-void
-EndQuiesceEvent::process()
-{
- xc->activate();
-}
-
-const char*
-EndQuiesceEvent::description()
-{
- return "End Quiesce Event.";
-}
+++ /dev/null
-#ifndef __CPU_QUIESCE_EVENT_HH__
-#define __CPU_QUIESCE_EVENT_HH__
-
-#include "sim/eventq.hh"
-
-class ExecContext;
-
-/** Event for timing out quiesce instruction */
-struct EndQuiesceEvent : public Event
-{
- /** A pointer to the execution context that is quiesced */
- ExecContext *xc;
-
- EndQuiesceEvent(ExecContext *_xc);
-
- /** Event process to occur at interrupt*/
- virtual void process();
-
- /** Event description */
- virtual const char *description();
-};
-
-#endif // __CPU_QUIESCE_EVENT_HH__
+++ /dev/null
-/*
- * Copyright (c) 2006 The Regents of The University of Michigan
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __CPU_THREAD_STATE_HH__
-#define __CPU_THREAD_STATE_HH__
-
-#include "cpu/exec_context.hh"
-
-#if FULL_SYSTEM
-class EndQuiesceEvent;
-class FunctionProfile;
-class ProfileNode;
-namespace Kernel {
- class Statistics;
-};
-#else
-class FunctionalMemory;
-class Process;
-#endif
-
-/**
- * Struct for holding general thread state that is needed across CPU
- * models. This includes things such as pointers to the process,
- * memory, quiesce events, and certain stats. This can be expanded
- * to hold more thread-specific stats within it.
- */
-struct ThreadState {
-#if FULL_SYSTEM
- ThreadState(int _cpuId, int _tid, FunctionalMemory *_mem)
- : cpuId(_cpuId), tid(_tid), mem(_mem), lastActivate(0), lastSuspend(0),
- profile(NULL), profileNode(NULL), profilePC(0), quiesceEvent(NULL)
-#else
- ThreadState(int _cpuId, int _tid, FunctionalMemory *_mem,
- Process *_process, short _asid)
- : cpuId(_cpuId), tid(_tid), mem(_mem), process(_process), asid(_asid)
-#endif
- {
- funcExeInst = 0;
- storeCondFailures = 0;
- }
-
- ExecContext::Status status;
-
- int cpuId;
-
- // Index of hardware thread context on the CPU that this represents.
- int tid;
-
- Counter numInst;
- Stats::Scalar<> numInsts;
- Stats::Scalar<> numMemRefs;
-
- // number of simulated loads
- Counter numLoad;
- Counter startNumLoad;
-
- FunctionalMemory *mem; // functional storage for process address space
-
-#if FULL_SYSTEM
- Tick lastActivate;
- Tick lastSuspend;
-
- FunctionProfile *profile;
- ProfileNode *profileNode;
- Addr profilePC;
-
- EndQuiesceEvent *quiesceEvent;
-
- Kernel::Statistics *kernelStats;
-#else
- Process *process;
-
- // Address space ID. Note that this is used for TIMING cache
- // simulation only; all functional memory accesses should use
- // one of the FunctionalMemory pointers above.
- short asid;
-
-#endif
-
- /**
- * Temporary storage to pass the source address from copy_load to
- * copy_store.
- * @todo Remove this temporary when we have a better way to do it.
- */
- Addr copySrcAddr;
- /**
- * Temp storage for the physical source address of a copy.
- * @todo Remove this temporary when we have a better way to do it.
- */
- Addr copySrcPhysAddr;
-
- /*
- * number of executed instructions, for matching with syscall trace
- * points in EIO files.
- */
- Counter funcExeInst;
-
- //
- // Count failed store conditionals so we can warn of apparent
- // application deadlock situations.
- unsigned storeCondFailures;
-};
-
-#endif // __CPU_THREAD_STATE_HH__
+++ /dev/null
-from m5 import *
-from FullCPU import OpType
-from FullCPU import OpDesc
-from FullCPU import FUDesc
-
-class FUPool(SimObject):
- type = 'FUPool'
- FUList = VectorParam.FUDesc("list of FU's for this pool")
+++ /dev/null
-from m5 import *
-from BaseCPU import BaseCPU
-
-class DerivOzoneCPU(BaseCPU):
- type = 'DerivOzoneCPU'
-
- numThreads = Param.Unsigned("number of HW thread contexts")
-
- if not build_env['FULL_SYSTEM']:
- mem = Param.FunctionalMemory(NULL, "memory")
-
- checker = Param.BaseCPU("Checker CPU")
-
- width = Param.Unsigned("Width")
- frontEndWidth = Param.Unsigned("Front end width")
- backEndWidth = Param.Unsigned("Back end width")
- backEndSquashLatency = Param.Unsigned("Back end squash latency")
- backEndLatency = Param.Unsigned("Back end latency")
- maxInstBufferSize = Param.Unsigned("Maximum instruction buffer size")
- maxOutstandingMemOps = Param.Unsigned("Maximum number of outstanding memory operations")
- decodeToFetchDelay = Param.Unsigned("Decode to fetch delay")
- renameToFetchDelay = Param.Unsigned("Rename to fetch delay")
- iewToFetchDelay = Param.Unsigned("Issue/Execute/Writeback to fetch "
- "delay")
- commitToFetchDelay = Param.Unsigned("Commit to fetch delay")
- fetchWidth = Param.Unsigned("Fetch width")
-
- renameToDecodeDelay = Param.Unsigned("Rename to decode delay")
- iewToDecodeDelay = Param.Unsigned("Issue/Execute/Writeback to decode "
- "delay")
- commitToDecodeDelay = Param.Unsigned("Commit to decode delay")
- fetchToDecodeDelay = Param.Unsigned("Fetch to decode delay")
- decodeWidth = Param.Unsigned("Decode width")
-
- iewToRenameDelay = Param.Unsigned("Issue/Execute/Writeback to rename "
- "delay")
- commitToRenameDelay = Param.Unsigned("Commit to rename delay")
- decodeToRenameDelay = Param.Unsigned("Decode to rename delay")
- renameWidth = Param.Unsigned("Rename width")
-
- commitToIEWDelay = Param.Unsigned("Commit to "
- "Issue/Execute/Writeback delay")
- renameToIEWDelay = Param.Unsigned("Rename to "
- "Issue/Execute/Writeback delay")
- issueToExecuteDelay = Param.Unsigned("Issue to execute delay (internal "
- "to the IEW stage)")
- issueWidth = Param.Unsigned("Issue width")
- executeWidth = Param.Unsigned("Execute width")
- executeIntWidth = Param.Unsigned("Integer execute width")
- executeFloatWidth = Param.Unsigned("Floating point execute width")
- executeBranchWidth = Param.Unsigned("Branch execute width")
- executeMemoryWidth = Param.Unsigned("Memory execute width")
-
- iewToCommitDelay = Param.Unsigned("Issue/Execute/Writeback to commit "
- "delay")
- renameToROBDelay = Param.Unsigned("Rename to reorder buffer delay")
- commitWidth = Param.Unsigned("Commit width")
- squashWidth = Param.Unsigned("Squash width")
-
- localPredictorSize = Param.Unsigned("Size of local predictor")
- localCtrBits = Param.Unsigned("Bits per counter")
- localHistoryTableSize = Param.Unsigned("Size of local history table")
- localHistoryBits = Param.Unsigned("Bits for the local history")
- globalPredictorSize = Param.Unsigned("Size of global predictor")
- globalCtrBits = Param.Unsigned("Bits per counter")
- globalHistoryBits = Param.Unsigned("Bits of history")
- choicePredictorSize = Param.Unsigned("Size of choice predictor")
- choiceCtrBits = Param.Unsigned("Bits of choice counters")
-
- BTBEntries = Param.Unsigned("Number of BTB entries")
- BTBTagSize = Param.Unsigned("Size of the BTB tags, in bits")
-
- RASSize = Param.Unsigned("RAS size")
-
- LQEntries = Param.Unsigned("Number of load queue entries")
- SQEntries = Param.Unsigned("Number of store queue entries")
- LFSTSize = Param.Unsigned("Last fetched store table size")
- SSITSize = Param.Unsigned("Store set ID table size")
-
- numPhysIntRegs = Param.Unsigned("Number of physical integer registers")
- numPhysFloatRegs = Param.Unsigned("Number of physical floating point "
- "registers")
- numIQEntries = Param.Unsigned("Number of instruction queue entries")
- numROBEntries = Param.Unsigned("Number of reorder buffer entries")
-
- instShiftAmt = Param.Unsigned("Number of bits to shift instructions by")
-
- function_trace = Param.Bool(False, "Enable function trace")
- function_trace_start = Param.Tick(0, "Cycle to start function trace")
+++ /dev/null
-from m5 import *
-from BaseCPU import BaseCPU
-
-class SimpleOzoneCPU(BaseCPU):
- type = 'SimpleOzoneCPU'
-
- numThreads = Param.Unsigned("number of HW thread contexts")
-
- if not build_env['FULL_SYSTEM']:
- mem = Param.FunctionalMemory(NULL, "memory")
-
- width = Param.Unsigned("Width")
- frontEndWidth = Param.Unsigned("Front end width")
- backEndWidth = Param.Unsigned("Back end width")
- backEndSquashLatency = Param.Unsigned("Back end squash latency")
- backEndLatency = Param.Unsigned("Back end latency")
- maxInstBufferSize = Param.Unsigned("Maximum instruction buffer size")
- decodeToFetchDelay = Param.Unsigned("Decode to fetch delay")
- renameToFetchDelay = Param.Unsigned("Rename to fetch delay")
- iewToFetchDelay = Param.Unsigned("Issue/Execute/Writeback to fetch "
- "delay")
- commitToFetchDelay = Param.Unsigned("Commit to fetch delay")
- fetchWidth = Param.Unsigned("Fetch width")
-
- renameToDecodeDelay = Param.Unsigned("Rename to decode delay")
- iewToDecodeDelay = Param.Unsigned("Issue/Execute/Writeback to decode "
- "delay")
- commitToDecodeDelay = Param.Unsigned("Commit to decode delay")
- fetchToDecodeDelay = Param.Unsigned("Fetch to decode delay")
- decodeWidth = Param.Unsigned("Decode width")
-
- iewToRenameDelay = Param.Unsigned("Issue/Execute/Writeback to rename "
- "delay")
- commitToRenameDelay = Param.Unsigned("Commit to rename delay")
- decodeToRenameDelay = Param.Unsigned("Decode to rename delay")
- renameWidth = Param.Unsigned("Rename width")
-
- commitToIEWDelay = Param.Unsigned("Commit to "
- "Issue/Execute/Writeback delay")
- renameToIEWDelay = Param.Unsigned("Rename to "
- "Issue/Execute/Writeback delay")
- issueToExecuteDelay = Param.Unsigned("Issue to execute delay (internal "
- "to the IEW stage)")
- issueWidth = Param.Unsigned("Issue width")
- executeWidth = Param.Unsigned("Execute width")
- executeIntWidth = Param.Unsigned("Integer execute width")
- executeFloatWidth = Param.Unsigned("Floating point execute width")
- executeBranchWidth = Param.Unsigned("Branch execute width")
- executeMemoryWidth = Param.Unsigned("Memory execute width")
-
- iewToCommitDelay = Param.Unsigned("Issue/Execute/Writeback to commit "
- "delay")
- renameToROBDelay = Param.Unsigned("Rename to reorder buffer delay")
- commitWidth = Param.Unsigned("Commit width")
- squashWidth = Param.Unsigned("Squash width")
-
- localPredictorSize = Param.Unsigned("Size of local predictor")
- localCtrBits = Param.Unsigned("Bits per counter")
- localHistoryTableSize = Param.Unsigned("Size of local history table")
- localHistoryBits = Param.Unsigned("Bits for the local history")
- globalPredictorSize = Param.Unsigned("Size of global predictor")
- globalCtrBits = Param.Unsigned("Bits per counter")
- globalHistoryBits = Param.Unsigned("Bits of history")
- choicePredictorSize = Param.Unsigned("Size of choice predictor")
- choiceCtrBits = Param.Unsigned("Bits of choice counters")
-
- BTBEntries = Param.Unsigned("Number of BTB entries")
- BTBTagSize = Param.Unsigned("Size of the BTB tags, in bits")
-
- RASSize = Param.Unsigned("RAS size")
-
- LQEntries = Param.Unsigned("Number of load queue entries")
- SQEntries = Param.Unsigned("Number of store queue entries")
- LFSTSize = Param.Unsigned("Last fetched store table size")
- SSITSize = Param.Unsigned("Store set ID table size")
-
- numPhysIntRegs = Param.Unsigned("Number of physical integer registers")
- numPhysFloatRegs = Param.Unsigned("Number of physical floating point "
- "registers")
- numIQEntries = Param.Unsigned("Number of instruction queue entries")
- numROBEntries = Param.Unsigned("Number of reorder buffer entries")
-
- instShiftAmt = Param.Unsigned("Number of bits to shift instructions by")
-
- function_trace = Param.Bool(False, "Enable function trace")
- function_trace_start = Param.Tick(0, "Cycle to start function trace")
cpu/quiesce_event.cc
cpu/static_inst.cc
cpu/sampler/sampler.cc
-
+
+ encumbered/cpu/full/fu_pool.cc
+
mem/bridge.cc
mem/bus.cc
mem/connector.cc
'BusBridge',
'Cache',
'Chains',
+ 'Checker',
'Clock',
'Commit',
'CommitRate',
'IBE',
'BE',
'OzoneLSQ',
+ 'PCEvent',
+ 'PCIA',
+ 'PCIDEV',
+ 'PciConfigAll',
+ 'Pipeline',
+ 'Printf',
+ 'ROB',
+ 'Regs',
+ 'Rename',
+ 'RenameMap',
+ 'SQL',
+ 'Sampler',
+ 'ScsiCtrl',
+ 'ScsiDisk',
+ 'ScsiNone',
+ 'Serialize',
+ 'SimpleCPU',
+ 'SimpleDisk',
+ 'SimpleDiskData',
+ 'Sparc',
+ 'Split',
+ 'Stack',
+ 'StatEvents',
+ 'Stats',
+ 'StoreSet',
+ 'Syscall',
+ 'SyscallVerbose',
+ 'TCPIP',
+ 'TLB',
+ 'Thread',
+ 'Timer',
+ 'Tsunami',
+ 'Uart',
+ 'VtoPhys',
+ 'WriteBarrier',
+ 'Activity',
+ 'Scoreboard',
+ 'Writeback',
]
#
if 'AlphaFullCPU' in env['CPU_MODELS']:
sources += Split('''
+ base_dyn_inst.cc
o3/2bit_local_pred.cc
o3/alpha_dyn_inst.cc
o3/alpha_cpu.cc
if 'OzoneCPU' in env['CPU_MODELS']:
sources += Split('''
- ozone/back_end.cc
ozone/lsq_unit.cc
ozone/lw_back_end.cc
ozone/lw_lsq.cc
if 'CheckerCPU' in env['CPU_MODELS']:
sources += Split('''
checker/cpu.cc
- checker/cpu_builder.cc
checker/o3_cpu_builder.cc
''')
--- /dev/null
+
+#include "base/timebuf.hh"
+#include "cpu/activity.hh"
+
+ActivityRecorder::ActivityRecorder(int num_stages, int longest_latency,
+ int activity)
+ : activityBuffer(longest_latency, 0), longestLatency(longest_latency),
+ activityCount(activity), numStages(num_stages)
+{
+ stageActive = new bool[numStages];
+ memset(stageActive, 0, numStages);
+}
+
+void
+ActivityRecorder::activity()
+{
+ if (activityBuffer[0]) {
+ return;
+ }
+
+ activityBuffer[0] = true;
+
+ ++activityCount;
+
+ DPRINTF(Activity, "Activity: %i\n", activityCount);
+}
+
+void
+ActivityRecorder::advance()
+{
+ if (activityBuffer[-longestLatency]) {
+ --activityCount;
+
+ assert(activityCount >= 0);
+
+ DPRINTF(Activity, "Activity: %i\n", activityCount);
+
+ if (activityCount == 0) {
+ DPRINTF(Activity, "No activity left!\n");
+ }
+ }
+
+ activityBuffer.advance();
+}
+
+void
+ActivityRecorder::activateStage(const int idx)
+{
+ if (!stageActive[idx]) {
+ ++activityCount;
+
+ stageActive[idx] = true;
+
+ DPRINTF(Activity, "Activity: %i\n", activityCount);
+ } else {
+ DPRINTF(Activity, "Stage %i already active.\n", idx);
+ }
+
+// assert(activityCount < longestLatency + numStages + 1);
+}
+
+void
+ActivityRecorder::deactivateStage(const int idx)
+{
+ if (stageActive[idx]) {
+ --activityCount;
+
+ stageActive[idx] = false;
+
+ DPRINTF(Activity, "Activity: %i\n", activityCount);
+ } else {
+ DPRINTF(Activity, "Stage %i already inactive.\n", idx);
+ }
+
+ assert(activityCount >= 0);
+}
+
+void
+ActivityRecorder::reset()
+{
+ activityCount = 0;
+ memset(stageActive, 0, numStages);
+ for (int i = 0; i < longestLatency + 1; ++i)
+ activityBuffer.advance();
+}
+
+void
+ActivityRecorder::dump()
+{
+ for (int i = 0; i <= longestLatency; ++i) {
+ cprintf("[Idx:%i %i] ", i, activityBuffer[-i]);
+ }
+
+ cprintf("\n");
+
+ for (int i = 0; i < numStages; ++i) {
+ cprintf("[Stage:%i %i]\n", i, stageActive[i]);
+ }
+
+ cprintf("\n");
+
+ cprintf("Activity count: %i\n", activityCount);
+}
+
+void
+ActivityRecorder::validate()
+{
+ int count = 0;
+ for (int i = 0; i <= longestLatency; ++i) {
+ if (activityBuffer[-i]) {
+ count++;
+ }
+ }
+
+ for (int i = 0; i < numStages; ++i) {
+ if (stageActive[i]) {
+ count++;
+ }
+ }
+
+ assert(count == activityCount);
+}
--- /dev/null
+
+#ifndef __CPU_ACTIVITY_HH__
+#define __CPU_ACTIVITY_HH__
+
+#include "base/timebuf.hh"
+#include "base/trace.hh"
+
+class ActivityRecorder {
+ public:
+ ActivityRecorder(int num_stages, int longest_latency, int count);
+
+ /** Records that there is activity this cycle. */
+ void activity();
+ /** Advances the activity buffer, decrementing the activityCount if active
+ * communication just left the time buffer, and descheduling the CPU if
+ * there is no activity.
+ */
+ void advance();
+ /** Marks a stage as active. */
+ void activateStage(const int idx);
+ /** Deactivates a stage. */
+ void deactivateStage(const int idx);
+
+ int getActivityCount() { return activityCount; }
+
+ void setActivityCount(int count)
+ { activityCount = count; }
+
+ bool active() { return activityCount; }
+
+ void reset();
+
+ void dump();
+
+ void validate();
+
+ private:
+ /** Time buffer that tracks if any cycles has active communication
+ * in them. It should be as long as the longest communication
+ * latency in the system. Each time any time buffer is written,
+ * the activity buffer should also be written to. The
+ * activityBuffer is advanced along with all the other time
+ * buffers, so it should have a 1 somewhere in it only if there
+ * is active communication in a time buffer.
+ */
+ TimeBuffer<bool> activityBuffer;
+
+ int longestLatency;
+
+ /** Tracks how many stages and cycles of time buffer have
+ * activity. Stages increment this count when they switch to
+ * active, and decrement it when they switch to
+ * inactive. Whenever a cycle that previously had no information
+ * is written in the time buffer, this is incremented. When a
+ * cycle that had information exits the time buffer due to age,
+ * this count is decremented. When the count is 0, there is no
+ * activity in the CPU, and it can be descheduled.
+ */
+ int activityCount;
+
+ int numStages;
+
+ /** Records which stages are active/inactive. */
+ bool *stageActive;
+};
+
+#endif // __CPU_ACTIVITY_HH__
#else
xc->setCpuId(xc->getProcessPtr()->registerExecContext(xc));
#endif
- }
}
}
#include "arch/faults.hh"
#include "cpu/exetrace.hh"
-#include "mem/mem_req.hh"
+#include "mem/request.hh"
#include "cpu/base_dyn_inst.hh"
#include "cpu/o3/alpha_impl.hh"
#include "cpu/o3/alpha_cpu.hh"
-#include "cpu/ozone/simple_impl.hh"
-#include "cpu/ozone/ozone_impl.hh"
+//#include "cpu/ozone/simple_impl.hh"
+//#include "cpu/ozone/ozone_impl.hh"
using namespace std;
using namespace TheISA;
BaseDynInst<Impl>::initVars()
{
req = NULL;
- effAddr = MemReq::inval_addr;
- physEffAddr = MemReq::inval_addr;
+ effAddr = 0;
+ physEffAddr = 0;
storeSize = 0;
readyRegs = 0;
// This is the "functional" implementation of prefetch. Not much
// happens here since prefetches don't affect the architectural
// state.
-
+/*
// Generate a MemReq so we can translate the effective address.
MemReqPtr req = new MemReq(addr, thread->getXCProxy(), 1, flags);
req->asid = asid;
if (traceData) {
traceData->setAddr(addr);
}
+*/
}
template <class Impl>
// will casue a TLB miss trap if necessary... not sure whether
// that's the best thing to do or not. We don't really need the
// MemReq otherwise, since wh64 has no functional effect.
+/*
MemReqPtr req = new MemReq(addr, thread->getXCProxy(), size, flags);
req->asid = asid;
storeSize = size;
storeData = 0;
+*/
}
/**
Fault
BaseDynInst<Impl>::copySrcTranslate(Addr src)
{
+/*
MemReqPtr req = new MemReq(src, thread->getXCProxy(), 64);
req->asid = asid;
thread->copySrcPhysAddr = 0;
}
return fault;
+*/
+ return NoFault;
}
/**
Fault
BaseDynInst<Impl>::copy(Addr dest)
{
+/*
uint8_t data[64];
FunctionalMemory *mem = thread->mem;
assert(thread->copySrcPhysAddr || thread->misspeculating());
mem->write(req, data);
}
return fault;
+*/
+ return NoFault;
}
template <class Impl>
template <>
int
BaseDynInst<AlphaSimpleImpl>::instcount = 0;
-
+/*
// Forward declaration
template class BaseDynInst<SimpleImpl>;
template <>
int
BaseDynInst<OzoneImpl>::instcount = 0;
+*/
#include <list>
#include <string>
+#include "arch/faults.hh"
#include "base/fast_alloc.hh"
#include "base/trace.hh"
#include "config/full_system.hh"
#include "cpu/exetrace.hh"
#include "cpu/inst_seq.hh"
+#include "cpu/op_class.hh"
#include "cpu/static_inst.hh"
-#include "encumbered/cpu/full/op_class.hh"
-#include "mem/functional/memory_control.hh"
+#include "mem/packet.hh"
#include "sim/system.hh"
/*
#include "encumbered/cpu/full/bpred_update.hh"
Fault fault;
/** The memory request. */
- MemReqPtr req;
+// MemReqPtr req;
+ Request *req;
+// Packet pkt;
+
+ uint8_t *memData;
/** The effective virtual address (lds & stores only). */
Addr effAddr;
* @param p Memory accessed.
* @param nbytes Access size.
*/
- void
- trace_mem(Fault fault,
- MemCmd cmd,
- Addr addr,
- void *p,
- int nbytes);
+// void
+// trace_mem(Fault fault,
+// MemCmd cmd,
+// Addr addr,
+// void *p,
+// int nbytes);
/** Dumps out contents of this BaseDynInst. */
void dump();
void setEA(Addr &ea) { instEffAddr = ea; eaCalcDone = true; }
/** Returns the effective address. */
- const Addr &getEA() const { return req->vaddr; }
+ const Addr &getEA() const { return instEffAddr; }
/** Returns whether or not the eff. addr. calculation has been completed. */
bool doneEACalc() { return eaCalcDone; }
BaseDynInst<Impl>::read(Addr addr, T &data, unsigned flags)
{
if (executed) {
+ panic("Not supposed to re-execute with split mem ops!");
fault = cpu->read(req, data, lqIdx);
return fault;
}
- req = new MemReq(addr, thread->getXCProxy(), sizeof(T), flags);
- req->asid = asid;
- req->thread_num = threadNumber;
- req->pc = this->PC;
+ req = new Request();
+ req->setVirt(asid, addr, sizeof(T), flags, this->PC);
+ req->setThreadContext(thread->cpuId, threadNumber);
- if ((req->vaddr & (TheISA::VMPageSize - 1)) + req->size >
+ if ((req->getVaddr() & (TheISA::VMPageSize - 1)) + req->getSize() >
TheISA::VMPageSize) {
return TheISA::genAlignmentFault();
}
fault = cpu->translateDataReadReq(req);
- effAddr = req->vaddr;
- physEffAddr = req->paddr;
- memReqFlags = req->flags;
+ effAddr = req->getVaddr();
+ physEffAddr = req->getPaddr();
+ memReqFlags = req->getFlags();
if (fault == NoFault) {
#if FULL_SYSTEM
traceData->setData(data);
}
- req = new MemReq(addr, thread->getXCProxy(), sizeof(T), flags);
-
- req->asid = asid;
- req->thread_num = threadNumber;
- req->pc = this->PC;
+ req = new Request();
+ req->setVirt(asid, addr, sizeof(T), flags, this->PC);
+ req->setThreadContext(thread->cpuId, threadNumber);
- if ((req->vaddr & (TheISA::VMPageSize - 1)) + req->size >
+ if ((req->getVaddr() & (TheISA::VMPageSize - 1)) + req->getSize() >
TheISA::VMPageSize) {
return TheISA::genAlignmentFault();
}
fault = cpu->translateDataWriteReq(req);
- effAddr = req->vaddr;
- physEffAddr = req->paddr;
- memReqFlags = req->flags;
+ effAddr = req->getVaddr();
+ physEffAddr = req->getPaddr();
+ memReqFlags = req->getFlags();
if (fault == NoFault) {
#if FULL_SYSTEM
if (res) {
// always return some result to keep misspeculated paths
// (which will ignore faults) deterministic
- *res = (fault == NoFault) ? req->result : 0;
+ *res = (fault == NoFault) ? req->getScResult() : 0;
}
return fault;
--- /dev/null
+/*
+ * Copyright (c) 2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <list>
+#include <string>
+
+#include "base/refcnt.hh"
+#include "cpu/base.hh"
+#include "cpu/base_dyn_inst.hh"
+#include "cpu/checker/cpu.hh"
+#include "cpu/cpu_exec_context.hh"
+#include "cpu/exec_context.hh"
+#include "cpu/static_inst.hh"
+#include "sim/byteswap.hh"
+#include "sim/sim_object.hh"
+#include "sim/stats.hh"
+
+#include "cpu/o3/alpha_dyn_inst.hh"
+#include "cpu/o3/alpha_impl.hh"
+
+//#include "cpu/ozone/dyn_inst.hh"
+//#include "cpu/ozone/ozone_impl.hh"
+//#include "cpu/ozone/simple_impl.hh"
+
+#if FULL_SYSTEM
+#include "sim/system.hh"
+#include "arch/vtophys.hh"
+#endif // FULL_SYSTEM
+
+using namespace std;
+//The CheckerCPU does alpha only
+using namespace AlphaISA;
+
+void
+CheckerCPU::init()
+{
+}
+
+CheckerCPU::CheckerCPU(Params *p)
+ : BaseCPU(p), cpuXC(NULL), xcProxy(NULL)
+{
+ memReq = new Request();
+// memReq->data = new uint8_t[64];
+
+ numInst = 0;
+ startNumInst = 0;
+ numLoad = 0;
+ startNumLoad = 0;
+ youngestSN = 0;
+
+ changedPC = willChangePC = changedNextPC = false;
+
+ exitOnError = p->exitOnError;
+#if FULL_SYSTEM
+ itb = p->itb;
+ dtb = p->dtb;
+ systemPtr = NULL;
+ memPtr = NULL;
+#endif
+}
+
+CheckerCPU::~CheckerCPU()
+{
+}
+
+void
+CheckerCPU::setMemory(MemObject *mem)
+{
+ memPtr = mem;
+#if !FULL_SYSTEM
+ cpuXC = new CPUExecContext(this, /* thread_num */ 0, NULL,
+ /* asid */ 0, mem);
+
+ cpuXC->setStatus(ExecContext::Suspended);
+ xcProxy = cpuXC->getProxy();
+ execContexts.push_back(xcProxy);
+#else
+ if (systemPtr) {
+ cpuXC = new CPUExecContext(this, 0, systemPtr, itb, dtb, memPtr, false);
+
+ cpuXC->setStatus(ExecContext::Suspended);
+ xcProxy = cpuXC->getProxy();
+ execContexts.push_back(xcProxy);
+ memReq->xc = xcProxy;
+ delete cpuXC->kernelStats;
+ cpuXC->kernelStats = NULL;
+ }
+#endif
+}
+
+#if FULL_SYSTEM
+void
+CheckerCPU::setSystem(System *system)
+{
+ systemPtr = system;
+
+ if (memPtr) {
+ cpuXC = new CPUExecContext(this, 0, systemPtr, itb, dtb, memPtr, false);
+
+ cpuXC->setStatus(ExecContext::Suspended);
+ xcProxy = cpuXC->getProxy();
+ execContexts.push_back(xcProxy);
+ memReq->xc = xcProxy;
+ delete cpuXC->kernelStats;
+ cpuXC->kernelStats = NULL;
+ }
+}
+#endif
+
+void
+CheckerCPU::serialize(ostream &os)
+{
+/*
+ BaseCPU::serialize(os);
+ SERIALIZE_SCALAR(inst);
+ nameOut(os, csprintf("%s.xc", name()));
+ cpuXC->serialize(os);
+ cacheCompletionEvent.serialize(os);
+*/
+}
+
+void
+CheckerCPU::unserialize(Checkpoint *cp, const string §ion)
+{
+/*
+ BaseCPU::unserialize(cp, section);
+ UNSERIALIZE_SCALAR(inst);
+ cpuXC->unserialize(cp, csprintf("%s.xc", section));
+*/
+}
+
+Fault
+CheckerCPU::copySrcTranslate(Addr src)
+{
+ panic("Unimplemented!");
+}
+
+Fault
+CheckerCPU::copy(Addr dest)
+{
+ panic("Unimplemented!");
+}
+
+template <class T>
+Fault
+CheckerCPU::read(Addr addr, T &data, unsigned flags)
+{
+/*
+ memReq->reset(addr, sizeof(T), flags);
+
+ // translate to physical address
+ translateDataReadReq(memReq);
+
+ memReq->cmd = Read;
+ memReq->completionEvent = NULL;
+ memReq->time = curTick;
+ memReq->flags &= ~INST_READ;
+
+ if (!(memReq->flags & UNCACHEABLE)) {
+ // Access memory to see if we have the same data
+ cpuXC->read(memReq, data);
+ } else {
+ // Assume the data is correct if it's an uncached access
+ memcpy(&data, &unverifiedResult.integer, sizeof(T));
+ }
+*/
+ return NoFault;
+}
+
+#ifndef DOXYGEN_SHOULD_SKIP_THIS
+
+template
+Fault
+CheckerCPU::read(Addr addr, uint64_t &data, unsigned flags);
+
+template
+Fault
+CheckerCPU::read(Addr addr, uint32_t &data, unsigned flags);
+
+template
+Fault
+CheckerCPU::read(Addr addr, uint16_t &data, unsigned flags);
+
+template
+Fault
+CheckerCPU::read(Addr addr, uint8_t &data, unsigned flags);
+
+#endif //DOXYGEN_SHOULD_SKIP_THIS
+
+template<>
+Fault
+CheckerCPU::read(Addr addr, double &data, unsigned flags)
+{
+ return read(addr, *(uint64_t*)&data, flags);
+}
+
+template<>
+Fault
+CheckerCPU::read(Addr addr, float &data, unsigned flags)
+{
+ return read(addr, *(uint32_t*)&data, flags);
+}
+
+template<>
+Fault
+CheckerCPU::read(Addr addr, int32_t &data, unsigned flags)
+{
+ return read(addr, (uint32_t&)data, flags);
+}
+
+template <class T>
+Fault
+CheckerCPU::write(T data, Addr addr, unsigned flags, uint64_t *res)
+{
+/*
+ memReq->reset(addr, sizeof(T), flags);
+
+ // translate to physical address
+ cpuXC->translateDataWriteReq(memReq);
+
+ // Can compare the write data and result only if it's cacheable,
+ // not a store conditional, or is a store conditional that
+ // succeeded.
+ // @todo: Verify that actual memory matches up with these values.
+ // Right now it only verifies that the instruction data is the
+ // same as what was in the request that got sent to memory; there
+ // is no verification that it is the same as what is in memory.
+ // This is because the LSQ would have to be snooped in the CPU to
+ // verify this data.
+ if (unverifiedReq &&
+ !(unverifiedReq->flags & UNCACHEABLE) &&
+ (!(unverifiedReq->flags & LOCKED) ||
+ ((unverifiedReq->flags & LOCKED) &&
+ unverifiedReq->result == 1))) {
+#if 0
+ memReq->cmd = Read;
+ memReq->completionEvent = NULL;
+ memReq->time = curTick;
+ memReq->flags &= ~INST_READ;
+ cpuXC->read(memReq, inst_data);
+#endif
+ T inst_data;
+ memcpy(&inst_data, unverifiedReq->data, sizeof(T));
+
+ if (data != inst_data) {
+ warn("%lli: Store value does not match value in memory! "
+ "Instruction: %#x, memory: %#x",
+ curTick, inst_data, data);
+ handleError();
+ }
+ }
+
+ // Assume the result was the same as the one passed in. This checker
+ // doesn't check if the SC should succeed or fail, it just checks the
+ // value.
+ if (res)
+ *res = unverifiedReq->result;
+ */
+ return NoFault;
+}
+
+
+#ifndef DOXYGEN_SHOULD_SKIP_THIS
+template
+Fault
+CheckerCPU::write(uint64_t data, Addr addr, unsigned flags, uint64_t *res);
+
+template
+Fault
+CheckerCPU::write(uint32_t data, Addr addr, unsigned flags, uint64_t *res);
+
+template
+Fault
+CheckerCPU::write(uint16_t data, Addr addr, unsigned flags, uint64_t *res);
+
+template
+Fault
+CheckerCPU::write(uint8_t data, Addr addr, unsigned flags, uint64_t *res);
+
+#endif //DOXYGEN_SHOULD_SKIP_THIS
+
+template<>
+Fault
+CheckerCPU::write(double data, Addr addr, unsigned flags, uint64_t *res)
+{
+ return write(*(uint64_t*)&data, addr, flags, res);
+}
+
+template<>
+Fault
+CheckerCPU::write(float data, Addr addr, unsigned flags, uint64_t *res)
+{
+ return write(*(uint32_t*)&data, addr, flags, res);
+}
+
+template<>
+Fault
+CheckerCPU::write(int32_t data, Addr addr, unsigned flags, uint64_t *res)
+{
+ return write((uint32_t)data, addr, flags, res);
+}
+
+
+#if FULL_SYSTEM
+Addr
+CheckerCPU::dbg_vtophys(Addr addr)
+{
+ return vtophys(xcProxy, addr);
+}
+#endif // FULL_SYSTEM
+
+bool
+CheckerCPU::translateInstReq(Request *req)
+{
+#if FULL_SYSTEM
+ return (cpuXC->translateInstReq(req) == NoFault);
+#else
+ cpuXC->translateInstReq(req);
+ return true;
+#endif
+}
+
+void
+CheckerCPU::translateDataReadReq(Request *req)
+{
+ cpuXC->translateDataReadReq(req);
+
+ if (req->getVaddr() != unverifiedReq->getVaddr()) {
+ warn("%lli: Request virtual addresses do not match! Inst: %#x, "
+ "checker: %#x",
+ curTick, unverifiedReq->getVaddr(), req->getVaddr());
+ handleError();
+ }
+ req->setPaddr(unverifiedReq->getPaddr());
+
+ if (checkFlags(req)) {
+ warn("%lli: Request flags do not match! Inst: %#x, checker: %#x",
+ curTick, unverifiedReq->getFlags(), req->getFlags());
+ handleError();
+ }
+}
+
+void
+CheckerCPU::translateDataWriteReq(Request *req)
+{
+ cpuXC->translateDataWriteReq(req);
+
+ if (req->getVaddr() != unverifiedReq->getVaddr()) {
+ warn("%lli: Request virtual addresses do not match! Inst: %#x, "
+ "checker: %#x",
+ curTick, unverifiedReq->getVaddr(), req->getVaddr());
+ handleError();
+ }
+ req->setPaddr(unverifiedReq->getPaddr());
+
+ if (checkFlags(req)) {
+ warn("%lli: Request flags do not match! Inst: %#x, checker: %#x",
+ curTick, unverifiedReq->getFlags(), req->getFlags());
+ handleError();
+ }
+}
+
+bool
+CheckerCPU::checkFlags(Request *req)
+{
+ // Remove any dynamic flags that don't have to do with the request itself.
+ unsigned flags = unverifiedReq->getFlags();
+ unsigned mask = LOCKED | PHYSICAL | VPTE | ALTMODE | UNCACHEABLE | NO_FAULT;
+ flags = flags & (mask);
+ if (flags == req->getFlags()) {
+ return false;
+ } else {
+ return true;
+ }
+}
+
+template <class DynInstPtr>
+void
+Checker<DynInstPtr>::tick(DynInstPtr &completed_inst)
+{
+ DynInstPtr inst;
+
+ // Either check this instruction, or add it to a list of
+ // instructions waiting to be checked. Instructions must be
+ // checked in program order, so if a store has committed yet not
+ // completed, there may be some instructions that are waiting
+ // behind it that have completed and must be checked.
+ if (!instList.empty()) {
+ if (youngestSN < completed_inst->seqNum) {
+ DPRINTF(Checker, "Adding instruction [sn:%lli] PC:%#x to list.\n",
+ completed_inst->seqNum, completed_inst->readPC());
+ instList.push_back(completed_inst);
+ youngestSN = completed_inst->seqNum;
+ }
+
+ if (!instList.front()->isCompleted()) {
+ return;
+ } else {
+ inst = instList.front();
+ instList.pop_front();
+ }
+ } else {
+ if (!completed_inst->isCompleted()) {
+ if (youngestSN < completed_inst->seqNum) {
+ DPRINTF(Checker, "Adding instruction [sn:%lli] PC:%#x to list.\n",
+ completed_inst->seqNum, completed_inst->readPC());
+ instList.push_back(completed_inst);
+ youngestSN = completed_inst->seqNum;
+ }
+ return;
+ } else {
+ if (youngestSN < completed_inst->seqNum) {
+ inst = completed_inst;
+ youngestSN = completed_inst->seqNum;
+ } else {
+ return;
+ }
+ }
+ }
+
+ // Try to check all instructions that are completed, ending if we
+ // run out of instructions to check or if an instruction is not
+ // yet completed.
+ while (1) {
+ DPRINTF(Checker, "Processing instruction [sn:%lli] PC:%#x.\n",
+ inst->seqNum, inst->readPC());
+ unverifiedResult.integer = inst->readIntResult();
+ unverifiedReq = inst->req;
+ numCycles++;
+
+ Fault fault = NoFault;
+
+ // maintain $r0 semantics
+ cpuXC->setIntReg(ZeroReg, 0);
+#ifdef TARGET_ALPHA
+ cpuXC->setFloatRegDouble(ZeroReg, 0.0);
+#endif // TARGET_ALPHA
+
+ // Check if any recent PC changes match up with anything we
+ // expect to happen. This is mostly to check if traps or
+ // PC-based events have occurred in both the checker and CPU.
+ if (changedPC) {
+ DPRINTF(Checker, "Changed PC recently to %#x\n",
+ cpuXC->readPC());
+ if (willChangePC) {
+ if (newPC == cpuXC->readPC()) {
+ DPRINTF(Checker, "Changed PC matches expected PC\n");
+ } else {
+ warn("%lli: Changed PC does not match expected PC, "
+ "changed: %#x, expected: %#x",
+ curTick, cpuXC->readPC(), newPC);
+ handleError();
+ }
+ willChangePC = false;
+ }
+ changedPC = false;
+ }
+ if (changedNextPC) {
+ DPRINTF(Checker, "Changed NextPC recently to %#x\n",
+ cpuXC->readNextPC());
+ changedNextPC = false;
+ }
+
+ // Try to fetch the instruction
+
+#if FULL_SYSTEM
+#define IFETCH_FLAGS(pc) ((pc) & 1) ? PHYSICAL : 0
+#else
+#define IFETCH_FLAGS(pc) 0
+#endif
+
+ // set up memory request for instruction fetch
+// memReq->cmd = Read;
+// memReq->reset(cpuXC->readPC() & ~3, sizeof(uint32_t),
+// IFETCH_FLAGS(cpuXC->readPC()));
+
+ bool succeeded = translateInstReq(memReq);
+
+ if (!succeeded) {
+ if (inst->getFault() == NoFault) {
+ // In this case the instruction was not a dummy
+ // instruction carrying an ITB fault. In the single
+ // threaded case the ITB should still be able to
+ // translate this instruction; in the SMT case it's
+ // possible that its ITB entry was kicked out.
+ warn("%lli: Instruction PC %#x was not found in the ITB!",
+ curTick, cpuXC->readPC());
+ handleError();
+
+ // go to the next instruction
+ cpuXC->setPC(cpuXC->readNextPC());
+ cpuXC->setNextPC(cpuXC->readNextPC() + sizeof(MachInst));
+
+ return;
+ } else {
+ // The instruction is carrying an ITB fault. Handle
+ // the fault and see if our results match the CPU on
+ // the next tick().
+ fault = inst->getFault();
+ }
+ }
+
+ if (fault == NoFault) {
+// cpuXC->read(memReq, machInst);
+
+ // keep an instruction count
+ numInst++;
+
+ // decode the instruction
+ machInst = gtoh(machInst);
+ // Checks that the instruction matches what we expected it to be.
+ // Checks both the machine instruction and the PC.
+ validateInst(inst);
+
+ curStaticInst = StaticInst::decode(makeExtMI(machInst,
+ cpuXC->readPC()));
+
+#if FULL_SYSTEM
+ cpuXC->setInst(machInst);
+#endif // FULL_SYSTEM
+
+ fault = inst->getFault();
+ }
+
+ // Either the instruction was a fault and we should process the fault,
+ // or we should just go ahead execute the instruction. This assumes
+ // that the instruction is properly marked as a fault.
+ if (fault == NoFault) {
+
+ cpuXC->func_exe_inst++;
+
+ fault = curStaticInst->execute(this, NULL);
+
+ // Checks to make sure instrution results are correct.
+ validateExecution(inst);
+
+ if (curStaticInst->isLoad()) {
+ ++numLoad;
+ }
+ }
+
+ if (fault != NoFault) {
+#if FULL_SYSTEM
+ fault->invoke(xcProxy);
+ willChangePC = true;
+ newPC = cpuXC->readPC();
+ DPRINTF(Checker, "Fault, PC is now %#x\n", newPC);
+#else // !FULL_SYSTEM
+ fatal("fault (%d) detected @ PC 0x%08p", fault, cpuXC->readPC());
+#endif // FULL_SYSTEM
+ } else {
+#if THE_ISA != MIPS_ISA
+ // go to the next instruction
+ cpuXC->setPC(cpuXC->readNextPC());
+ cpuXC->setNextPC(cpuXC->readNextPC() + sizeof(MachInst));
+#else
+ // go to the next instruction
+ cpuXC->setPC(cpuXC->readNextPC());
+ cpuXC->setNextPC(cpuXC->readNextNPC());
+ cpuXC->setNextNPC(cpuXC->readNextNPC() + sizeof(MachInst));
+#endif
+
+ }
+
+#if FULL_SYSTEM
+ // @todo: Determine if these should happen only if the
+ // instruction hasn't faulted. In the SimpleCPU case this may
+ // not be true, but in the O3 or Ozone case this may be true.
+ Addr oldpc;
+ int count = 0;
+ do {
+ oldpc = cpuXC->readPC();
+ system->pcEventQueue.service(xcProxy);
+ count++;
+ } while (oldpc != cpuXC->readPC());
+ if (count > 1) {
+ willChangePC = true;
+ newPC = cpuXC->readPC();
+ DPRINTF(Checker, "PC Event, PC is now %#x\n", newPC);
+ }
+#endif
+
+ // @todo: Optionally can check all registers. (Or just those
+ // that have been modified).
+ validateState();
+
+ // Continue verifying instructions if there's another completed
+ // instruction waiting to be verified.
+ if (instList.empty()) {
+ break;
+ } else if (instList.front()->isCompleted()) {
+ inst = instList.front();
+ instList.pop_front();
+ } else {
+ break;
+ }
+ }
+}
+
+template <class DynInstPtr>
+void
+Checker<DynInstPtr>::switchOut(Sampler *s)
+{
+ instList.clear();
+}
+
+template <class DynInstPtr>
+void
+Checker<DynInstPtr>::takeOverFrom(BaseCPU *oldCPU)
+{
+}
+
+template <class DynInstPtr>
+void
+Checker<DynInstPtr>::validateInst(DynInstPtr &inst)
+{
+ if (inst->readPC() != cpuXC->readPC()) {
+ warn("%lli: PCs do not match! Inst: %#x, checker: %#x",
+ curTick, inst->readPC(), cpuXC->readPC());
+ if (changedPC) {
+ warn("%lli: Changed PCs recently, may not be an error",
+ curTick);
+ } else {
+ handleError();
+ }
+ }
+
+ MachInst mi = static_cast<MachInst>(inst->staticInst->machInst);
+
+ if (mi != machInst) {
+ warn("%lli: Binary instructions do not match! Inst: %#x, "
+ "checker: %#x",
+ curTick, mi, machInst);
+ handleError();
+ }
+}
+
+template <class DynInstPtr>
+void
+Checker<DynInstPtr>::validateExecution(DynInstPtr &inst)
+{
+ if (inst->numDestRegs()) {
+ // @todo: Support more destination registers.
+ if (inst->isUnverifiable()) {
+ // Unverifiable instructions assume they were executed
+ // properly by the CPU. Grab the result from the
+ // instruction and write it to the register.
+ RegIndex idx = inst->destRegIdx(0);
+ if (idx < TheISA::FP_Base_DepTag) {
+ cpuXC->setIntReg(idx, inst->readIntResult());
+ } else if (idx < TheISA::Fpcr_DepTag) {
+ cpuXC->setFloatRegBits(idx, inst->readIntResult());
+ } else {
+ cpuXC->setMiscReg(idx, inst->readIntResult());
+ }
+ } else if (result.integer != inst->readIntResult()) {
+ warn("%lli: Instruction results do not match! (Results may not "
+ "actually be integers) Inst: %#x, checker: %#x",
+ curTick, inst->readIntResult(), result.integer);
+ handleError();
+ }
+ }
+
+ if (inst->readNextPC() != cpuXC->readNextPC()) {
+ warn("%lli: Instruction next PCs do not match! Inst: %#x, "
+ "checker: %#x",
+ curTick, inst->readNextPC(), cpuXC->readNextPC());
+ handleError();
+ }
+
+ // Checking side effect registers can be difficult if they are not
+ // checked simultaneously with the execution of the instruction.
+ // This is because other valid instructions may have modified
+ // these registers in the meantime, and their values are not
+ // stored within the DynInst.
+ while (!miscRegIdxs.empty()) {
+ int misc_reg_idx = miscRegIdxs.front();
+ miscRegIdxs.pop();
+
+ if (inst->xcBase()->readMiscReg(misc_reg_idx) !=
+ cpuXC->readMiscReg(misc_reg_idx)) {
+ warn("%lli: Misc reg idx %i (side effect) does not match! "
+ "Inst: %#x, checker: %#x",
+ curTick, misc_reg_idx,
+ inst->xcBase()->readMiscReg(misc_reg_idx),
+ cpuXC->readMiscReg(misc_reg_idx));
+ handleError();
+ }
+ }
+}
+
+template <class DynInstPtr>
+void
+Checker<DynInstPtr>::validateState()
+{
+}
+
+template <class DynInstPtr>
+void
+Checker<DynInstPtr>::dumpInsts()
+{
+ int num = 0;
+
+ InstListIt inst_list_it = --(instList.end());
+
+ cprintf("Inst list size: %i\n", instList.size());
+
+ while (inst_list_it != instList.end())
+ {
+ cprintf("Instruction:%i\n",
+ num);
+
+ cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
+ "Completed:%i\n",
+ (*inst_list_it)->readPC(),
+ (*inst_list_it)->seqNum,
+ (*inst_list_it)->threadNumber,
+ (*inst_list_it)->isCompleted());
+
+ cprintf("\n");
+
+ inst_list_it--;
+ ++num;
+ }
+
+}
+
+//template
+//class Checker<RefCountingPtr<OzoneDynInst<OzoneImpl> > >;
+
+template
+class Checker<RefCountingPtr<AlphaDynInst<AlphaSimpleImpl> > >;
--- /dev/null
+/*
+ * Copyright (c) 2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __CPU_CHECKER_CPU_HH__
+#define __CPU_CHECKER_CPU_HH__
+
+#include <list>
+#include <queue>
+#include <map>
+
+#include "arch/types.hh"
+#include "base/statistics.hh"
+#include "config/full_system.hh"
+#include "cpu/base.hh"
+#include "cpu/base_dyn_inst.hh"
+#include "cpu/cpu_exec_context.hh"
+#include "cpu/pc_event.hh"
+#include "cpu/static_inst.hh"
+#include "sim/eventq.hh"
+
+// forward declarations
+#if FULL_SYSTEM
+class Processor;
+class AlphaITB;
+class AlphaDTB;
+class PhysicalMemory;
+
+class RemoteGDB;
+class GDBListener;
+
+#else
+
+class Process;
+
+#endif // FULL_SYSTEM
+template <class>
+class BaseDynInst;
+class ExecContext;
+class MemInterface;
+class Checkpoint;
+class Request;
+class Sampler;
+
+class CheckerCPU : public BaseCPU
+{
+ protected:
+ typedef TheISA::MachInst MachInst;
+ typedef TheISA::FloatReg FloatReg;
+ typedef TheISA::FloatRegBits FloatRegBits;
+ typedef TheISA::MiscReg MiscReg;
+ public:
+ // main simulation loop (one cycle)
+ virtual void init();
+
+ struct Params : public BaseCPU::Params
+ {
+#if FULL_SYSTEM
+ AlphaITB *itb;
+ AlphaDTB *dtb;
+ FunctionalMemory *mem;
+#else
+ Process *process;
+#endif
+ bool exitOnError;
+ };
+
+ public:
+ CheckerCPU(Params *p);
+ virtual ~CheckerCPU();
+
+ void setMemory(MemObject *mem);
+
+ MemObject *memPtr;
+
+#if FULL_SYSTEM
+ void setSystem(System *system);
+
+ System *systemPtr;
+#endif
+ public:
+ // execution context
+ CPUExecContext *cpuXC;
+
+ ExecContext *xcProxy;
+
+ AlphaITB *itb;
+ AlphaDTB *dtb;
+
+#if FULL_SYSTEM
+ Addr dbg_vtophys(Addr addr);
+#endif
+
+ union Result {
+ uint64_t integer;
+ float fp;
+ double dbl;
+ };
+
+ Result result;
+
+ // current instruction
+ MachInst machInst;
+
+ // Refcounted pointer to the one memory request.
+ Request *memReq;
+
+ StaticInstPtr curStaticInst;
+
+ // number of simulated instructions
+ Counter numInst;
+ Counter startNumInst;
+
+ std::queue<int> miscRegIdxs;
+
+ virtual Counter totalInstructions() const
+ {
+ return numInst - startNumInst;
+ }
+
+ // number of simulated loads
+ Counter numLoad;
+ Counter startNumLoad;
+
+ virtual void serialize(std::ostream &os);
+ virtual void unserialize(Checkpoint *cp, const std::string §ion);
+
+ template <class T>
+ Fault read(Addr addr, T &data, unsigned flags);
+
+ template <class T>
+ Fault write(T data, Addr addr, unsigned flags, uint64_t *res);
+
+ // These functions are only used in CPU models that split
+ // effective address computation from the actual memory access.
+ void setEA(Addr EA) { panic("SimpleCPU::setEA() not implemented\n"); }
+ Addr getEA() { panic("SimpleCPU::getEA() not implemented\n"); }
+
+ void prefetch(Addr addr, unsigned flags)
+ {
+ // need to do this...
+ }
+
+ void writeHint(Addr addr, int size, unsigned flags)
+ {
+ // need to do this...
+ }
+
+ Fault copySrcTranslate(Addr src);
+
+ Fault copy(Addr dest);
+
+ // The register accessor methods provide the index of the
+ // instruction's operand (e.g., 0 or 1), not the architectural
+ // register index, to simplify the implementation of register
+ // renaming. We find the architectural register index by indexing
+ // into the instruction's own operand index table. Note that a
+ // raw pointer to the StaticInst is provided instead of a
+ // ref-counted StaticInstPtr to redice overhead. This is fine as
+ // long as these methods don't copy the pointer into any long-term
+ // storage (which is pretty hard to imagine they would have reason
+ // to do).
+
+ uint64_t readIntReg(const StaticInst *si, int idx)
+ {
+ return cpuXC->readIntReg(si->srcRegIdx(idx));
+ }
+
+ FloatReg readFloatReg(const StaticInst *si, int idx, int width)
+ {
+ int reg_idx = si->srcRegIdx(idx) - TheISA::FP_Base_DepTag;
+ return cpuXC->readFloatReg(reg_idx, width);
+ }
+
+ FloatReg readFloatReg(const StaticInst *si, int idx)
+ {
+ int reg_idx = si->srcRegIdx(idx) - TheISA::FP_Base_DepTag;
+ return cpuXC->readFloatReg(reg_idx);
+ }
+
+ FloatRegBits readFloatRegBits(const StaticInst *si, int idx, int width)
+ {
+ int reg_idx = si->srcRegIdx(idx) - TheISA::FP_Base_DepTag;
+ return cpuXC->readFloatRegBits(reg_idx, width);
+ }
+
+ FloatRegBits readFloatRegBits(const StaticInst *si, int idx)
+ {
+ int reg_idx = si->srcRegIdx(idx) - TheISA::FP_Base_DepTag;
+ return cpuXC->readFloatRegBits(reg_idx);
+ }
+
+ void setIntReg(const StaticInst *si, int idx, uint64_t val)
+ {
+ cpuXC->setIntReg(si->destRegIdx(idx), val);
+ result.integer = val;
+ }
+
+ void setFloatReg(const StaticInst *si, int idx, FloatReg val, int width)
+ {
+ int reg_idx = si->destRegIdx(idx) - TheISA::FP_Base_DepTag;
+ cpuXC->setFloatReg(reg_idx, val, width);
+ switch(width) {
+ case 32:
+ result.fp = val;
+ break;
+ case 64:
+ result.dbl = val;
+ break;
+ };
+ }
+
+ void setFloatReg(const StaticInst *si, int idx, FloatReg val)
+ {
+ int reg_idx = si->destRegIdx(idx) - TheISA::FP_Base_DepTag;
+ cpuXC->setFloatReg(reg_idx, val);
+ result.fp = val;
+ }
+
+ void setFloatRegBits(const StaticInst *si, int idx, FloatRegBits val,
+ int width)
+ {
+ int reg_idx = si->destRegIdx(idx) - TheISA::FP_Base_DepTag;
+ cpuXC->setFloatRegBits(reg_idx, val, width);
+ result.integer = val;
+ }
+
+ void setFloatRegBits(const StaticInst *si, int idx, FloatRegBits val)
+ {
+ int reg_idx = si->destRegIdx(idx) - TheISA::FP_Base_DepTag;
+ cpuXC->setFloatRegBits(reg_idx, val);
+ result.integer = val;
+ }
+
+ uint64_t readPC() { return cpuXC->readPC(); }
+
+ uint64_t readNextPC() { return cpuXC->readNextPC(); }
+
+ void setNextPC(uint64_t val) {
+ cpuXC->setNextPC(val);
+ }
+
+ MiscReg readMiscReg(int misc_reg)
+ {
+ return cpuXC->readMiscReg(misc_reg);
+ }
+
+ MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault)
+ {
+ return cpuXC->readMiscRegWithEffect(misc_reg, fault);
+ }
+
+ Fault setMiscReg(int misc_reg, const MiscReg &val)
+ {
+ result.integer = val;
+ miscRegIdxs.push(misc_reg);
+ return cpuXC->setMiscReg(misc_reg, val);
+ }
+
+ Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val)
+ {
+ miscRegIdxs.push(misc_reg);
+ return cpuXC->setMiscRegWithEffect(misc_reg, val);
+ }
+
+ void recordPCChange(uint64_t val) { changedPC = true; }
+ void recordNextPCChange(uint64_t val) { changedNextPC = true; }
+
+ bool translateInstReq(Request *req);
+ void translateDataWriteReq(Request *req);
+ void translateDataReadReq(Request *req);
+
+#if FULL_SYSTEM
+ Fault hwrei() { return cpuXC->hwrei(); }
+ int readIntrFlag() { return cpuXC->readIntrFlag(); }
+ void setIntrFlag(int val) { cpuXC->setIntrFlag(val); }
+ bool inPalMode() { return cpuXC->inPalMode(); }
+ void ev5_trap(Fault fault) { fault->invoke(xcProxy); }
+ bool simPalCheck(int palFunc) { return cpuXC->simPalCheck(palFunc); }
+#else
+ // Assume that the normal CPU's call to syscall was successful.
+ // The checker's state would have already been updated by the syscall.
+ void syscall(uint64_t callnum) { }
+#endif
+
+ void handleError()
+ {
+ if (exitOnError)
+ panic("Checker found error!");
+ }
+ bool checkFlags(Request *req);
+
+ ExecContext *xcBase() { return xcProxy; }
+ CPUExecContext *cpuXCBase() { return cpuXC; }
+
+ Result unverifiedResult;
+ Request *unverifiedReq;
+
+ bool changedPC;
+ bool willChangePC;
+ uint64_t newPC;
+ bool changedNextPC;
+ bool exitOnError;
+
+ InstSeqNum youngestSN;
+};
+
+template <class DynInstPtr>
+class Checker : public CheckerCPU
+{
+ public:
+ Checker(Params *p)
+ : CheckerCPU(p)
+ { }
+
+ void switchOut(Sampler *s);
+ void takeOverFrom(BaseCPU *oldCPU);
+
+ void tick(DynInstPtr &inst);
+
+ void validateInst(DynInstPtr &inst);
+ void validateExecution(DynInstPtr &inst);
+ void validateState();
+
+ std::list<DynInstPtr> instList;
+ typedef typename std::list<DynInstPtr>::iterator InstListIt;
+ void dumpInsts();
+};
+
+#endif // __CPU_CHECKER_CPU_HH__
--- /dev/null
+
+#include <string>
+
+#include "cpu/checker/cpu.hh"
+#include "cpu/inst_seq.hh"
+#include "cpu/ozone/dyn_inst.hh"
+#include "cpu/ozone/ozone_impl.hh"
+#include "mem/base_mem.hh"
+#include "sim/builder.hh"
+#include "sim/process.hh"
+#include "sim/sim_object.hh"
+
+class OzoneChecker : public Checker<RefCountingPtr<OzoneDynInst<OzoneImpl> > >
+{
+ public:
+ OzoneChecker(Params *p)
+ : Checker<RefCountingPtr<OzoneDynInst<OzoneImpl> > >(p)
+ { }
+};
+
+////////////////////////////////////////////////////////////////////////
+//
+// CheckerCPU Simulation Object
+//
+BEGIN_DECLARE_SIM_OBJECT_PARAMS(OzoneChecker)
+
+ Param<Counter> max_insts_any_thread;
+ Param<Counter> max_insts_all_threads;
+ Param<Counter> max_loads_any_thread;
+ Param<Counter> max_loads_all_threads;
+
+#if FULL_SYSTEM
+ SimObjectParam<AlphaITB *> itb;
+ SimObjectParam<AlphaDTB *> dtb;
+ SimObjectParam<FunctionalMemory *> mem;
+ SimObjectParam<System *> system;
+ Param<int> cpu_id;
+ Param<Tick> profile;
+#else
+ SimObjectParam<Process *> workload;
+#endif // FULL_SYSTEM
+ Param<int> clock;
+ SimObjectParam<BaseMem *> icache;
+ SimObjectParam<BaseMem *> dcache;
+
+ Param<bool> defer_registration;
+ Param<bool> exitOnError;
+ Param<bool> function_trace;
+ Param<Tick> function_trace_start;
+
+END_DECLARE_SIM_OBJECT_PARAMS(OzoneChecker)
+
+BEGIN_INIT_SIM_OBJECT_PARAMS(OzoneChecker)
+
+ INIT_PARAM(max_insts_any_thread,
+ "terminate when any thread reaches this inst count"),
+ INIT_PARAM(max_insts_all_threads,
+ "terminate when all threads have reached this inst count"),
+ INIT_PARAM(max_loads_any_thread,
+ "terminate when any thread reaches this load count"),
+ INIT_PARAM(max_loads_all_threads,
+ "terminate when all threads have reached this load count"),
+
+#if FULL_SYSTEM
+ INIT_PARAM(itb, "Instruction TLB"),
+ INIT_PARAM(dtb, "Data TLB"),
+ INIT_PARAM(mem, "memory"),
+ INIT_PARAM(system, "system object"),
+ INIT_PARAM(cpu_id, "processor ID"),
+ INIT_PARAM(profile, ""),
+#else
+ INIT_PARAM(workload, "processes to run"),
+#endif // FULL_SYSTEM
+
+ INIT_PARAM(clock, "clock speed"),
+ INIT_PARAM(icache, "L1 instruction cache object"),
+ INIT_PARAM(dcache, "L1 data cache object"),
+
+ INIT_PARAM(defer_registration, "defer system registration (for sampling)"),
+ INIT_PARAM(exitOnError, "exit on error"),
+ INIT_PARAM(function_trace, "Enable function trace"),
+ INIT_PARAM(function_trace_start, "Cycle to start function trace")
+
+END_INIT_SIM_OBJECT_PARAMS(OzoneChecker)
+
+
+CREATE_SIM_OBJECT(OzoneChecker)
+{
+ OzoneChecker::Params *params = new OzoneChecker::Params();
+ params->name = getInstanceName();
+ params->numberOfThreads = 1;
+ params->max_insts_any_thread = 0;
+ params->max_insts_all_threads = 0;
+ params->max_loads_any_thread = 0;
+ params->max_loads_all_threads = 0;
+ params->exitOnError = exitOnError;
+ params->deferRegistration = defer_registration;
+ params->functionTrace = function_trace;
+ params->functionTraceStart = function_trace_start;
+ params->clock = clock;
+ // Hack to touch all parameters. Consider not deriving Checker
+ // from BaseCPU..it's not really a CPU in the end.
+ Counter temp;
+ temp = max_insts_any_thread;
+ temp = max_insts_all_threads;
+ temp = max_loads_any_thread;
+ temp = max_loads_all_threads;
+ BaseMem *cache = icache;
+ cache = dcache;
+
+#if FULL_SYSTEM
+ params->itb = itb;
+ params->dtb = dtb;
+ params->mem = mem;
+ params->system = system;
+ params->cpu_id = cpu_id;
+ params->profile = profile;
+#else
+ params->process = workload;
+#endif
+
+ OzoneChecker *cpu = new OzoneChecker(params);
+ return cpu;
+}
+
+REGISTER_SIM_OBJECT("OzoneChecker", OzoneChecker)
--- /dev/null
+/*
+ * Copyright (c) 2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __CPU_CHECKER_EXEC_CONTEXT_HH__
+#define __CPU_CHECKER_EXEC_CONTEXT_HH__
+
+#include "cpu/checker/cpu.hh"
+#include "cpu/cpu_exec_context.hh"
+#include "cpu/exec_context.hh"
+
+class EndQuiesceEvent;
+namespace Kernel {
+ class Statistics;
+};
+
+template <class XC>
+class CheckerExecContext : public ExecContext
+{
+ public:
+ CheckerExecContext(XC *actual_xc,
+ CheckerCPU *checker_cpu)
+ : actualXC(actual_xc), checkerXC(checker_cpu->cpuXC),
+ checkerCPU(checker_cpu)
+ { }
+
+ private:
+ XC *actualXC;
+ CPUExecContext *checkerXC;
+ CheckerCPU *checkerCPU;
+
+ public:
+
+ BaseCPU *getCpuPtr() { return actualXC->getCpuPtr(); }
+
+ void setCpuId(int id)
+ {
+ actualXC->setCpuId(id);
+ checkerXC->setCpuId(id);
+ }
+
+ int readCpuId() { return actualXC->readCpuId(); }
+
+ TranslatingPort *getMemPort() { return actualXC->getMemPort(); }
+
+#if FULL_SYSTEM
+ System *getSystemPtr() { return actualXC->getSystemPtr(); }
+
+ PhysicalMemory *getPhysMemPtr() { return actualXC->getPhysMemPtr(); }
+
+ AlphaITB *getITBPtr() { return actualXC->getITBPtr(); }
+
+ AlphaDTB *getDTBPtr() { return actualXC->getDTBPtr(); }
+
+ Kernel::Statistics *getKernelStats() { return actualXC->getKernelStats(); }
+#else
+ Process *getProcessPtr() { return actualXC->getProcessPtr(); }
+#endif
+
+ Status status() const { return actualXC->status(); }
+
+ void setStatus(Status new_status)
+ {
+ actualXC->setStatus(new_status);
+ checkerXC->setStatus(new_status);
+ }
+
+ /// Set the status to Active. Optional delay indicates number of
+ /// cycles to wait before beginning execution.
+ void activate(int delay = 1) { actualXC->activate(delay); }
+
+ /// Set the status to Suspended.
+ void suspend() { actualXC->suspend(); }
+
+ /// Set the status to Unallocated.
+ void deallocate() { actualXC->deallocate(); }
+
+ /// Set the status to Halted.
+ void halt() { actualXC->halt(); }
+
+#if FULL_SYSTEM
+ void dumpFuncProfile() { actualXC->dumpFuncProfile(); }
+#endif
+
+ void takeOverFrom(ExecContext *oldContext)
+ {
+ actualXC->takeOverFrom(oldContext);
+ checkerXC->takeOverFrom(oldContext);
+ }
+
+ void regStats(const std::string &name) { actualXC->regStats(name); }
+
+ void serialize(std::ostream &os) { actualXC->serialize(os); }
+ void unserialize(Checkpoint *cp, const std::string §ion)
+ { actualXC->unserialize(cp, section); }
+
+#if FULL_SYSTEM
+ EndQuiesceEvent *getQuiesceEvent() { return actualXC->getQuiesceEvent(); }
+
+ Tick readLastActivate() { return actualXC->readLastActivate(); }
+ Tick readLastSuspend() { return actualXC->readLastSuspend(); }
+
+ void profileClear() { return actualXC->profileClear(); }
+ void profileSample() { return actualXC->profileSample(); }
+#endif
+
+ int getThreadNum() { return actualXC->getThreadNum(); }
+
+ // @todo: Do I need this?
+ MachInst getInst() { return actualXC->getInst(); }
+
+ // @todo: Do I need this?
+ void copyArchRegs(ExecContext *xc)
+ {
+ actualXC->copyArchRegs(xc);
+ checkerXC->copyArchRegs(xc);
+ }
+
+ void clearArchRegs()
+ {
+ actualXC->clearArchRegs();
+ checkerXC->clearArchRegs();
+ }
+
+ //
+ // New accessors for new decoder.
+ //
+ uint64_t readIntReg(int reg_idx)
+ { return actualXC->readIntReg(reg_idx); }
+
+ FloatReg readFloatReg(int reg_idx, int width)
+ { return actualXC->readFloatReg(reg_idx, width); }
+
+ FloatReg readFloatReg(int reg_idx)
+ { return actualXC->readFloatReg(reg_idx); }
+
+ FloatRegBits readFloatRegBits(int reg_idx, int width)
+ { return actualXC->readFloatRegBits(reg_idx, width); }
+
+ FloatRegBits readFloatRegBits(int reg_idx)
+ { return actualXC->readFloatRegBits(reg_idx); }
+
+ void setIntReg(int reg_idx, uint64_t val)
+ {
+ actualXC->setIntReg(reg_idx, val);
+ checkerXC->setIntReg(reg_idx, val);
+ }
+
+ void setFloatReg(int reg_idx, FloatReg val, int width)
+ {
+ actualXC->setFloatReg(reg_idx, val, width);
+ checkerXC->setFloatReg(reg_idx, val, width);
+ }
+
+ void setFloatReg(int reg_idx, FloatReg val)
+ {
+ actualXC->setFloatReg(reg_idx, val);
+ checkerXC->setFloatReg(reg_idx, val);
+ }
+
+ void setFloatRegBits(int reg_idx, FloatRegBits val, int width)
+ {
+ actualXC->setFloatRegBits(reg_idx, val, width);
+ checkerXC->setFloatRegBits(reg_idx, val, width);
+ }
+
+ void setFloatRegBits(int reg_idx, FloatRegBits val)
+ {
+ actualXC->setFloatRegBits(reg_idx, val);
+ checkerXC->setFloatRegBits(reg_idx, val);
+ }
+
+ uint64_t readPC() { return actualXC->readPC(); }
+
+ void setPC(uint64_t val)
+ {
+ actualXC->setPC(val);
+ checkerXC->setPC(val);
+ checkerCPU->recordPCChange(val);
+ }
+
+ uint64_t readNextPC() { return actualXC->readNextPC(); }
+
+ void setNextPC(uint64_t val)
+ {
+ actualXC->setNextPC(val);
+ checkerXC->setNextPC(val);
+ checkerCPU->recordNextPCChange(val);
+ }
+
+ uint64_t readNextNPC() { return actualXC->readNextNPC(); }
+
+ void setNextNPC(uint64_t val)
+ {
+ actualXC->setNextNPC(val);
+ checkerXC->setNextNPC(val);
+ checkerCPU->recordNextPCChange(val);
+ }
+
+ MiscReg readMiscReg(int misc_reg)
+ { return actualXC->readMiscReg(misc_reg); }
+
+ MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault)
+ { return actualXC->readMiscRegWithEffect(misc_reg, fault); }
+
+ Fault setMiscReg(int misc_reg, const MiscReg &val)
+ {
+ checkerXC->setMiscReg(misc_reg, val);
+ return actualXC->setMiscReg(misc_reg, val);
+ }
+
+ Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val)
+ {
+ checkerXC->setMiscRegWithEffect(misc_reg, val);
+ return actualXC->setMiscRegWithEffect(misc_reg, val);
+ }
+
+ unsigned readStCondFailures()
+ { return actualXC->readStCondFailures(); }
+
+ void setStCondFailures(unsigned sc_failures)
+ {
+ checkerXC->setStCondFailures(sc_failures);
+ actualXC->setStCondFailures(sc_failures);
+ }
+#if FULL_SYSTEM
+ bool inPalMode() { return actualXC->inPalMode(); }
+#endif
+
+ // @todo: Fix this!
+ bool misspeculating() { return actualXC->misspeculating(); }
+
+#if !FULL_SYSTEM
+ IntReg getSyscallArg(int i) { return actualXC->getSyscallArg(i); }
+
+ // used to shift args for indirect syscall
+ void setSyscallArg(int i, IntReg val)
+ {
+ checkerXC->setSyscallArg(i, val);
+ actualXC->setSyscallArg(i, val);
+ }
+
+ void setSyscallReturn(SyscallReturn return_value)
+ {
+ checkerXC->setSyscallReturn(return_value);
+ actualXC->setSyscallReturn(return_value);
+ }
+
+ Counter readFuncExeInst() { return actualXC->readFuncExeInst(); }
+#endif
+ void changeRegFileContext(RegFile::ContextParam param,
+ RegFile::ContextVal val)
+ {
+ actualXC->changeRegFileContext(param, val);
+ checkerXC->changeRegFileContext(param, val);
+ }
+};
+
+#endif // __CPU_CHECKER_EXEC_CONTEXT_HH__
--- /dev/null
+
+#include <string>
+
+#include "cpu/checker/cpu.hh"
+#include "cpu/inst_seq.hh"
+#include "cpu/o3/alpha_dyn_inst.hh"
+#include "cpu/o3/alpha_impl.hh"
+#include "sim/builder.hh"
+#include "sim/process.hh"
+#include "sim/sim_object.hh"
+
+class MemObject;
+
+class O3Checker : public Checker<RefCountingPtr<AlphaDynInst<AlphaSimpleImpl> > >
+{
+ public:
+ O3Checker(Params *p)
+ : Checker<RefCountingPtr<AlphaDynInst<AlphaSimpleImpl> > >(p)
+ { }
+};
+
+////////////////////////////////////////////////////////////////////////
+//
+// CheckerCPU Simulation Object
+//
+BEGIN_DECLARE_SIM_OBJECT_PARAMS(O3Checker)
+
+ Param<Counter> max_insts_any_thread;
+ Param<Counter> max_insts_all_threads;
+ Param<Counter> max_loads_any_thread;
+ Param<Counter> max_loads_all_threads;
+
+#if FULL_SYSTEM
+ SimObjectParam<AlphaITB *> itb;
+ SimObjectParam<AlphaDTB *> dtb;
+ SimObjectParam<MemObject *> mem;
+ SimObjectParam<System *> system;
+ Param<int> cpu_id;
+ Param<Tick> profile;
+#else
+ SimObjectParam<Process *> workload;
+#endif // FULL_SYSTEM
+ Param<int> clock;
+
+ Param<bool> defer_registration;
+ Param<bool> exitOnError;
+ Param<bool> function_trace;
+ Param<Tick> function_trace_start;
+
+END_DECLARE_SIM_OBJECT_PARAMS(O3Checker)
+
+BEGIN_INIT_SIM_OBJECT_PARAMS(O3Checker)
+
+ INIT_PARAM(max_insts_any_thread,
+ "terminate when any thread reaches this inst count"),
+ INIT_PARAM(max_insts_all_threads,
+ "terminate when all threads have reached this inst count"),
+ INIT_PARAM(max_loads_any_thread,
+ "terminate when any thread reaches this load count"),
+ INIT_PARAM(max_loads_all_threads,
+ "terminate when all threads have reached this load count"),
+
+#if FULL_SYSTEM
+ INIT_PARAM(itb, "Instruction TLB"),
+ INIT_PARAM(dtb, "Data TLB"),
+ INIT_PARAM(mem, "memory"),
+ INIT_PARAM(system, "system object"),
+ INIT_PARAM(cpu_id, "processor ID"),
+ INIT_PARAM(profile, ""),
+#else
+ INIT_PARAM(workload, "processes to run"),
+#endif // FULL_SYSTEM
+
+ INIT_PARAM(clock, "clock speed"),
+
+ INIT_PARAM(defer_registration, "defer system registration (for sampling)"),
+ INIT_PARAM(exitOnError, "exit on error"),
+ INIT_PARAM(function_trace, "Enable function trace"),
+ INIT_PARAM(function_trace_start, "Cycle to start function trace")
+
+END_INIT_SIM_OBJECT_PARAMS(O3Checker)
+
+
+CREATE_SIM_OBJECT(O3Checker)
+{
+ O3Checker::Params *params = new O3Checker::Params();
+ params->name = getInstanceName();
+ params->numberOfThreads = 1;
+ params->max_insts_any_thread = 0;
+ params->max_insts_all_threads = 0;
+ params->max_loads_any_thread = 0;
+ params->max_loads_all_threads = 0;
+ params->exitOnError = exitOnError;
+ params->deferRegistration = defer_registration;
+ params->functionTrace = function_trace;
+ params->functionTraceStart = function_trace_start;
+ params->clock = clock;
+ // Hack to touch all parameters. Consider not deriving Checker
+ // from BaseCPU..it's not really a CPU in the end.
+ Counter temp;
+ temp = max_insts_any_thread;
+ temp = max_insts_all_threads;
+ temp = max_loads_any_thread;
+ temp = max_loads_all_threads;
+
+#if FULL_SYSTEM
+ params->itb = itb;
+ params->dtb = dtb;
+ params->mem = mem;
+ params->system = system;
+ params->cpu_id = cpu_id;
+ params->profile = profile;
+#else
+ params->process = workload;
+#endif
+
+ O3Checker *cpu = new O3Checker(params);
+ return cpu;
+}
+
+REGISTER_SIM_OBJECT("O3Checker", O3Checker)
*/
#include "base/intmath.hh"
+#include "base/misc.hh"
#include "base/trace.hh"
#include "cpu/o3/2bit_local_pred.hh"
class Statistics;
};
+class TranslatingPort;
+
template <class Impl>
class AlphaFullCPU : public FullO3CPU<Impl>
{
protected:
typedef TheISA::IntReg IntReg;
+ typedef TheISA::FloatReg FloatReg;
+ typedef TheISA::FloatRegBits FloatRegBits;
typedef TheISA::MiscReg MiscReg;
typedef TheISA::RegFile RegFile;
typedef TheISA::MiscRegFile MiscRegFile;
virtual int readCpuId() { return cpu->cpu_id; }
- virtual FunctionalMemory *getMemPtr() { return thread->mem; }
+ virtual TranslatingPort *getMemPort() { return /*thread->port*/ NULL; }
#if FULL_SYSTEM
virtual System *getSystemPtr() { return cpu->system; }
virtual uint64_t readIntReg(int reg_idx);
- virtual float readFloatRegSingle(int reg_idx);
+ virtual FloatReg readFloatReg(int reg_idx, int width);
+
+ virtual FloatReg readFloatReg(int reg_idx);
- virtual double readFloatRegDouble(int reg_idx);
+ virtual FloatRegBits readFloatRegBits(int reg_idx, int width);
- virtual uint64_t readFloatRegInt(int reg_idx);
+ virtual FloatRegBits readFloatRegBits(int reg_idx);
virtual void setIntReg(int reg_idx, uint64_t val);
- virtual void setFloatRegSingle(int reg_idx, float val);
+ virtual void setFloatReg(int reg_idx, FloatReg val, int width);
+
+ virtual void setFloatReg(int reg_idx, FloatReg val);
- virtual void setFloatRegDouble(int reg_idx, double val);
+ virtual void setFloatRegBits(int reg_idx, FloatRegBits val, int width);
- virtual void setFloatRegInt(int reg_idx, uint64_t val);
+ virtual void setFloatRegBits(int reg_idx, FloatRegBits val);
virtual uint64_t readPC()
{ return cpu->readPC(thread->tid); }
virtual void setNextPC(uint64_t val);
+ virtual uint64_t readNextNPC()
+ {
+ panic("Alpha has no NextNPC!");
+ return 0;
+ }
+
+ virtual void setNextNPC(uint64_t val)
+ { panic("Alpha has no NextNPC!"); }
+
virtual MiscReg readMiscReg(int misc_reg)
{ return cpu->readMiscReg(misc_reg, thread->tid); }
virtual void setSyscallReturn(SyscallReturn return_value);
- virtual void syscall() { return cpu->syscall(thread->tid); }
+ virtual void syscall(int64_t callnum)
+ { return cpu->syscall(callnum, thread->tid); }
virtual Counter readFuncExeInst() { return thread->funcExeInst; }
#endif
+ virtual void changeRegFileContext(TheISA::RegFile::ContextParam param,
+ TheISA::RegFile::ContextVal val)
+ { panic("Not supported on Alpha!"); }
};
#if FULL_SYSTEM
#if FULL_SYSTEM
/** Translates instruction requestion. */
- Fault translateInstReq(MemReqPtr &req)
+ Fault translateInstReq(RequestPtr &req)
{
return itb->translate(req);
}
/** Translates data read request. */
- Fault translateDataReadReq(MemReqPtr &req)
+ Fault translateDataReadReq(RequestPtr &req)
{
return dtb->translate(req, false);
}
/** Translates data write request. */
- Fault translateDataWriteReq(MemReqPtr &req)
+ Fault translateDataWriteReq(RequestPtr &req)
{
return dtb->translate(req, true);
}
#else
- Fault dummyTranslation(MemReqPtr &req)
- {
-#if 0
- assert((req->vaddr >> 48 & 0xffff) == 0);
-#endif
-
- // put the asid in the upper 16 bits of the paddr
- req->paddr = req->vaddr & ~((Addr)0xffff << sizeof(Addr) * 8 - 16);
- req->paddr = req->paddr | (Addr)req->asid << sizeof(Addr) * 8 - 16;
- return NoFault;
- }
-
/** Translates instruction requestion in syscall emulation mode. */
- Fault translateInstReq(MemReqPtr &req)
+ Fault translateInstReq(RequestPtr &req)
{
- return dummyTranslation(req);
+ int tid = req->getThreadNum();
+ return this->thread[tid]->process->pTable->translate(req);
}
/** Translates data read request in syscall emulation mode. */
- Fault translateDataReadReq(MemReqPtr &req)
+ Fault translateDataReadReq(RequestPtr &req)
{
- return dummyTranslation(req);
+ int tid = req->getThreadNum();
+ return this->thread[tid]->process->pTable->translate(req);
}
/** Translates data write request in syscall emulation mode. */
- Fault translateDataWriteReq(MemReqPtr &req)
+ Fault translateDataWriteReq(RequestPtr &req)
{
- return dummyTranslation(req);
+ int tid = req->getThreadNum();
+ return this->thread[tid]->process->pTable->translate(req);
}
#endif
/** Executes a syscall.
* @todo: Determine if this needs to be virtual.
*/
- void syscall(int thread_num);
+ void syscall(int64_t callnum, int thread_num);
/** Gets a syscall argument. */
IntReg getSyscallArg(int i, int tid);
/** Read from memory function. */
template <class T>
- Fault read(MemReqPtr &req, T &data)
+ Fault read(RequestPtr &req, T &data)
{
#if 0
#if FULL_SYSTEM && THE_ISA == ALPHA_ISA
/** CPU read function, forwards read to LSQ. */
template <class T>
- Fault read(MemReqPtr &req, T &data, int load_idx)
+ Fault read(RequestPtr &req, T &data, int load_idx)
{
return this->iew.ldstQueue.read(req, data, load_idx);
}
/** Write to memory function. */
template <class T>
- Fault write(MemReqPtr &req, T &data)
+ Fault write(RequestPtr &req, T &data)
{
#if 0
#if FULL_SYSTEM && THE_ISA == ALPHA_ISA
/** CPU write function, forwards write to LSQ. */
template <class T>
- Fault write(MemReqPtr &req, T &data, int store_idx)
+ Fault write(RequestPtr &req, T &data, int store_idx)
{
return this->iew.ldstQueue.write(req, data, store_idx);
}
#include "cpu/o3/alpha_impl.hh"
#include "cpu/o3/alpha_params.hh"
#include "cpu/o3/fu_pool.hh"
-#include "mem/cache/base_cache.hh"
#include "sim/builder.hh"
class DerivAlphaFullCPU : public AlphaFullCPU<AlphaSimpleImpl>
//SimObjectParam<PageTable *> page_table;
#endif // FULL_SYSTEM
-SimObjectParam<FunctionalMemory *> mem;
+SimObjectParam<MemObject *> mem;
SimObjectParam<BaseCPU *> checker;
Param<Counter> max_loads_any_thread;
Param<Counter> max_loads_all_threads;
-SimObjectParam<BaseCache *> icache;
-SimObjectParam<BaseCache *> dcache;
-
Param<unsigned> cachePorts;
Param<unsigned> decodeToFetchDelay;
// INIT_PARAM(page_table, "Page table"),
#endif // FULL_SYSTEM
- INIT_PARAM_DFLT(mem, "Memory", NULL),
+ INIT_PARAM(mem, "Memory"),
INIT_PARAM_DFLT(checker, "Checker CPU", NULL),
"count",
0),
- INIT_PARAM_DFLT(icache, "L1 instruction cache", NULL),
- INIT_PARAM_DFLT(dcache, "L1 data cache", NULL),
-
INIT_PARAM_DFLT(cachePorts, "Cache Ports", 200),
INIT_PARAM(decodeToFetchDelay, "Decode to fetch delay"),
//
// Caches
//
- params->icacheInterface = icache ? icache->getInterface() : NULL;
- params->dcacheInterface = dcache ? dcache->getInterface() : NULL;
params->cachePorts = cachePorts;
params->decodeToFetchDelay = decodeToFetchDelay;
#include "base/statistics.hh"
#include "base/timebuf.hh"
#include "cpu/checker/exec_context.hh"
-#include "mem/mem_interface.hh"
#include "sim/sim_events.hh"
#include "sim/stats.hh"
this->thread[i]->setStatus(ExecContext::Suspended);
#else
if (i < params->workload.size()) {
- DPRINTF(FullCPU, "FullCPU: Workload[%i]'s starting PC is %#x, "
- "process is %#x",
- i, params->workload[i]->prog_entry, this->thread[i]);
+ DPRINTF(FullCPU, "FullCPU: Workload[%i] process is %#x",
+ i, this->thread[i]);
this->thread[i] = new Thread(this, i, params->workload[i], i);
- assert(params->workload[i]->getMemory() != NULL);
this->thread[i]->setStatus(ExecContext::Suspended);
//usedTids[i] = true;
AlphaFullCPU<Impl>::AlphaXC::takeOverFrom(ExecContext *old_context)
{
// some things should already be set up
- assert(getMemPtr() == old_context->getMemPtr());
+ assert(getMemPort() == old_context->getMemPort());
#if FULL_SYSTEM
assert(getSystemPtr() == old_context->getSystemPtr());
#else
}
// Then loop through the floating point registers.
- for (int i = 0; i < AlphaISA::NumFloatRegs; ++i)
- {
- renamed_reg = this->renameMap.lookup(i + AlphaISA::FP_Base_DepTag);
- this->cpuXC->setFloatRegBits(i,
- this->regFile.readFloatRegBits(renamed_reg));
+ for (int i = 0; i < AlphaISA::NumFloatRegs; ++i) {
+ renamed_reg = cpu->renameMap[tid].lookup(i + AlphaISA::FP_Base_DepTag);
+ cpu->setFloatRegBits(renamed_reg,
+ xc->readFloatRegBits(i));
}
// Copy the misc regs.
- cpu->regFile.miscRegs[tid].copyMiscRegs(xc);
+ copyMiscRegs(xc, this);
// Then finally set the PC and the next PC.
cpu->setPC(xc->readPC(), tid);
}
template <class Impl>
-float
-AlphaFullCPU<Impl>::AlphaXC::readFloatRegSingle(int reg_idx)
+FloatReg
+AlphaFullCPU<Impl>::AlphaXC::readFloatReg(int reg_idx, int width)
{
DPRINTF(Fault, "Reading float register through the XC!\n");
- return cpu->readArchFloatRegSingle(reg_idx, thread->tid);
+ switch(width) {
+ case 32:
+ return cpu->readArchFloatRegSingle(reg_idx, thread->tid);
+ case 64:
+ return cpu->readArchFloatRegDouble(reg_idx, thread->tid);
+ default:
+ panic("Unsupported width!");
+ return 0;
+ }
}
template <class Impl>
-double
-AlphaFullCPU<Impl>::AlphaXC::readFloatRegDouble(int reg_idx)
+FloatReg
+AlphaFullCPU<Impl>::AlphaXC::readFloatReg(int reg_idx)
{
DPRINTF(Fault, "Reading float register through the XC!\n");
- return cpu->readArchFloatRegDouble(reg_idx, thread->tid);
+ return cpu->readArchFloatRegSingle(reg_idx, thread->tid);
}
template <class Impl>
-uint64_t
-AlphaFullCPU<Impl>::AlphaXC::readFloatRegInt(int reg_idx)
+FloatRegBits
+AlphaFullCPU<Impl>::AlphaXC::readFloatRegBits(int reg_idx, int width)
+{
+ DPRINTF(Fault, "Reading floatint register through the XC!\n");
+ return cpu->readArchFloatRegInt(reg_idx, thread->tid);
+}
+
+template <class Impl>
+FloatRegBits
+AlphaFullCPU<Impl>::AlphaXC::readFloatRegBits(int reg_idx)
{
DPRINTF(Fault, "Reading floatint register through the XC!\n");
return cpu->readArchFloatRegInt(reg_idx, thread->tid);
template <class Impl>
void
-AlphaFullCPU<Impl>::AlphaXC::setFloatRegSingle(int reg_idx, float val)
+AlphaFullCPU<Impl>::AlphaXC::setFloatReg(int reg_idx, FloatReg val, int width)
{
DPRINTF(Fault, "Setting float register through the XC!\n");
- cpu->setArchFloatRegSingle(reg_idx, val, thread->tid);
+ switch(width) {
+ case 32:
+ cpu->setArchFloatRegSingle(reg_idx, val, thread->tid);
+ break;
+ case 64:
+ cpu->setArchFloatRegDouble(reg_idx, val, thread->tid);
+ break;
+ }
if (!thread->trapPending && !thread->inSyscall) {
cpu->squashFromXC(thread->tid);
template <class Impl>
void
-AlphaFullCPU<Impl>::AlphaXC::setFloatRegDouble(int reg_idx, double val)
+AlphaFullCPU<Impl>::AlphaXC::setFloatReg(int reg_idx, FloatReg val)
{
DPRINTF(Fault, "Setting float register through the XC!\n");
- cpu->setArchFloatRegDouble(reg_idx, val, thread->tid);
+ cpu->setArchFloatRegSingle(reg_idx, val, thread->tid);
+
+ if (!thread->trapPending && !thread->inSyscall) {
+ cpu->squashFromXC(thread->tid);
+ }
+}
+
+template <class Impl>
+void
+AlphaFullCPU<Impl>::AlphaXC::setFloatRegBits(int reg_idx, FloatRegBits val,
+ int width)
+{
+ DPRINTF(Fault, "Setting floatint register through the XC!\n");
+ cpu->setArchFloatRegInt(reg_idx, val, thread->tid);
if (!thread->trapPending && !thread->inSyscall) {
cpu->squashFromXC(thread->tid);
template <class Impl>
void
-AlphaFullCPU<Impl>::AlphaXC::setFloatRegInt(int reg_idx, uint64_t val)
+AlphaFullCPU<Impl>::AlphaXC::setFloatRegBits(int reg_idx, FloatRegBits val)
{
DPRINTF(Fault, "Setting floatint register through the XC!\n");
cpu->setArchFloatRegInt(reg_idx, val, thread->tid);
template <class Impl>
void
-AlphaFullCPU<Impl>::syscall(int tid)
+AlphaFullCPU<Impl>::syscall(int64_t callnum, int tid)
{
DPRINTF(FullCPU, "AlphaFullCPU: [tid:%i] Executing syscall().\n\n", tid);
++(this->thread[tid]->funcExeInst);
// Execute the actual syscall.
- this->thread[tid]->syscall();
+ this->thread[tid]->syscall(callnum);
// Decrease funcExeInst by one as the normal commit will handle
// incrementing it.
#ifndef __CPU_O3_ALPHA_DYN_INST_HH__
#define __CPU_O3_ALPHA_DYN_INST_HH__
+#include "arch/isa_traits.hh"
#include "cpu/base_dyn_inst.hh"
#include "cpu/inst_seq.hh"
#include "cpu/o3/alpha_cpu.hh"
#include "cpu/o3/alpha_impl.hh"
+class Packet;
+
/**
* Mostly implementation & ISA specific AlphaDynInst. As with most
* other classes in the new CPU model, it is templated on the Impl to
typedef TheISA::RegIndex RegIndex;
/** Integer register index type. */
typedef TheISA::IntReg IntReg;
+ typedef TheISA::FloatReg FloatReg;
+ typedef TheISA::FloatRegBits FloatRegBits;
/** Misc register index type. */
typedef TheISA::MiscReg MiscReg;
Fault initiateAcc();
/** Completes the access. Only valid for memory operations. */
- Fault completeAcc();
+ Fault completeAcc(Packet *pkt);
private:
/** Initializes variables. */
bool simPalCheck(int palFunc);
#else
/** Calls a syscall. */
- void syscall();
+ void syscall(int64_t callnum);
#endif
private:
template <class Impl>
Fault
-AlphaDynInst<Impl>::completeAcc()
+AlphaDynInst<Impl>::completeAcc(Packet *pkt)
{
if (this->isLoad()) {
- this->fault = this->staticInst->completeAcc(this->req->data,
- this,
+ this->fault = this->staticInst->completeAcc(pkt, this,
this->traceData);
} else if (this->isStore()) {
- this->fault = this->staticInst->completeAcc((uint8_t*)&this->req->result,
- this,
+ this->fault = this->staticInst->completeAcc(pkt, this,
this->traceData);
} else {
panic("Unknown type!");
#else
template <class Impl>
void
-AlphaDynInst<Impl>::syscall()
+AlphaDynInst<Impl>::syscall(int64_t callnum)
{
- this->cpu->syscall(this->threadNumber);
+ this->cpu->syscall(callnum, this->threadNumber);
}
#endif
class AlphaDTB;
class AlphaITB;
class FUPool;
-class FunctionalMemory;
-class MemInterface;
+class MemObject;
class Process;
class System;
//Page Table
// PageTable *pTable;
- FunctionalMemory *mem;
+ MemObject *mem;
BaseCPU *checker;
//
// Caches
//
- MemInterface *icacheInterface;
- MemInterface *dcacheInterface;
+// MemInterface *icacheInterface;
+// MemInterface *dcacheInterface;
unsigned cachePorts;
#include "cpu/o3/alpha_impl.hh"
#include "cpu/o3/alpha_dyn_inst.hh"
#include "cpu/ozone/ozone_impl.hh"
-#include "cpu/ozone/simple_impl.hh"
+//#include "cpu/ozone/simple_impl.hh"
template class TwobitBPredUnit<AlphaSimpleImpl>;
template class TwobitBPredUnit<OzoneImpl>;
-template class TwobitBPredUnit<SimpleImpl>;
+//template class TwobitBPredUnit<SimpleImpl>;
// For Addr type.
#include "arch/isa_traits.hh"
+#include "base/misc.hh"
class DefaultBTB
{
#include "base/timebuf.hh"
#include "cpu/exetrace.hh"
#include "cpu/inst_seq.hh"
-#include "mem/memory_interface.hh"
template <class>
class O3ThreadState;
/** Pointer to FullCPU. */
FullCPU *cpu;
- /** Memory interface. Used for d-cache accesses. */
- MemInterface *dcacheInterface;
-
std::vector<Thread *> thread;
Fault fetchFault;
template <class Impl>
DefaultCommit<Impl>::DefaultCommit(Params *params)
- : dcacheInterface(params->dcacheInterface),
- squashCounter(0),
+ : squashCounter(0),
iewToCommitDelay(params->iewToCommitDelay),
commitToIEWDelay(params->commitToIEWDelay),
renameToROBDelay(params->renameToROBDelay),
#include "sim/stat_control.hh"
using namespace std;
+using namespace TheISA;
BaseFullCPU::BaseFullCPU(Params *params)
: BaseCPU(params), cpu_id(0)
system(params->system),
memCtrl(system->memctrl),
physmem(system->physmem),
- mem(params->mem),
-#else
-// pTable(params->pTable),
- mem(params->workload[0]->getMemory()),
#endif // FULL_SYSTEM
+ mem(params->mem),
switchCount(0),
- icacheInterface(params->icacheInterface),
- dcacheInterface(params->dcacheInterface),
deferRegistration(params->deferRegistration),
numThreads(number_of_threads)
{
template <class Impl>
FloatRegBits
FullO3CPU<Impl>::readFloatRegBits(int reg_idx, int width)
+{
return regFile.readFloatRegBits(reg_idx, width);
}
int idx = reg_idx + TheISA::FP_Base_DepTag;
PhysRegIndex phys_reg = commitRenameMap[tid].lookup(idx);
- return regFile.readFloatRegSingle(phys_reg);
+ return regFile.readFloatReg(phys_reg);
}
template <class Impl>
int idx = reg_idx + TheISA::FP_Base_DepTag;
PhysRegIndex phys_reg = commitRenameMap[tid].lookup(idx);
- return regFile.readFloatRegDouble(phys_reg);
+ return regFile.readFloatReg(phys_reg, 64);
}
template <class Impl>
int idx = reg_idx + TheISA::FP_Base_DepTag;
PhysRegIndex phys_reg = commitRenameMap[tid].lookup(idx);
- return regFile.readFloatRegInt(phys_reg);
+ return regFile.readFloatRegBits(phys_reg);
}
template <class Impl>
{
PhysRegIndex phys_reg = commitRenameMap[tid].lookup(reg_idx);
- regFile.setFloatRegSingle(phys_reg, val);
+ regFile.setFloatReg(phys_reg, val);
}
template <class Impl>
{
PhysRegIndex phys_reg = commitRenameMap[tid].lookup(reg_idx);
- regFile.setFloatRegDouble(phys_reg, val);
+ regFile.setFloatReg(phys_reg, val, 64);
}
template <class Impl>
{
PhysRegIndex phys_reg = commitRenameMap[tid].lookup(reg_idx);
- regFile.setFloatRegInt(phys_reg, val);
+ regFile.setFloatRegBits(phys_reg, val);
}
template <class Impl>
#include <set>
#include <vector>
+#include "arch/isa_traits.hh"
#include "base/statistics.hh"
#include "base/timebuf.hh"
#include "config/full_system.hh"
template <class>
class Checker;
class ExecContext;
-class MemInterface;
+class MemObject;
class Process;
class BaseFullCPU : public BaseCPU
void regStats();
+ int readCpuId() { return cpu_id; }
+
protected:
int cpu_id;
};
class FullO3CPU : public BaseFullCPU
{
public:
+ typedef TheISA::FloatReg FloatReg;
+ typedef TheISA::FloatRegBits FloatRegBits;
+
// Typedefs from the Impl here.
typedef typename Impl::CPUPol CPUPolicy;
typedef typename Impl::Params Params;
int getDataAsid(unsigned tid)
{ return regFile.miscRegs[tid].getDataAsid(); }
#else
- /** Check if this address is a valid instruction address. */
- bool validInstAddr(Addr addr,unsigned tid)
- { return thread[tid]->validInstAddr(addr); }
-
- /** Check if this address is a valid data address. */
- bool validDataAddr(Addr addr,unsigned tid)
- { return thread[tid]->validDataAddr(addr); }
-
/** Get instruction asid. */
int getInstAsid(unsigned tid)
{ return thread[tid]->asid; }
void setIntReg(int reg_idx, uint64_t val);
- void setFloatReg(int reg_idx, FloatReg val, int width);
+ void setFloatReg(int reg_idx, FloatReg val);
void setFloatReg(int reg_idx, FloatReg val, int width);
void setFloatRegBits(int reg_idx, FloatRegBits val);
- void setFloatRegBits(int reg_idx, FloatRegBits val);
+ void setFloatRegBits(int reg_idx, FloatRegBits val, int width);
uint64_t readArchIntReg(int reg_idx, unsigned tid);
#endif
/** Pointer to memory. */
- FunctionalMemory *mem;
+ MemObject *mem;
Sampler *sampler;
--- /dev/null
+
+#ifndef __CPU_O3_DEP_GRAPH_HH__
+#define __CPU_O3_DEP_GRAPH_HH__
+
+#include "cpu/o3/comm.hh"
+
+template <class DynInstPtr>
+class DependencyEntry
+{
+ public:
+ DependencyEntry()
+ : inst(NULL), next(NULL)
+ { }
+
+ DynInstPtr inst;
+ //Might want to include data about what arch. register the
+ //dependence is waiting on.
+ DependencyEntry<DynInstPtr> *next;
+};
+
+template <class DynInstPtr>
+class DependencyGraph
+{
+ public:
+ typedef DependencyEntry<DynInstPtr> DepEntry;
+
+ DependencyGraph()
+ : numEntries(0), memAllocCounter(0), nodesTraversed(0), nodesRemoved(0)
+ { }
+
+ void resize(int num_entries);
+
+ void reset();
+
+ void insert(PhysRegIndex idx, DynInstPtr &new_inst);
+
+ void setInst(PhysRegIndex idx, DynInstPtr &new_inst)
+ { dependGraph[idx].inst = new_inst; }
+
+ void clearInst(PhysRegIndex idx)
+ { dependGraph[idx].inst = NULL; }
+
+ void remove(PhysRegIndex idx, DynInstPtr &inst_to_remove);
+
+ DynInstPtr pop(PhysRegIndex idx);
+
+ bool empty(PhysRegIndex idx) { return !dependGraph[idx].next; }
+
+ /** Debugging function to dump out the dependency graph.
+ */
+ void dump();
+
+ private:
+ /** Array of linked lists. Each linked list is a list of all the
+ * instructions that depend upon a given register. The actual
+ * register's index is used to index into the graph; ie all
+ * instructions in flight that are dependent upon r34 will be
+ * in the linked list of dependGraph[34].
+ */
+ DepEntry *dependGraph;
+
+ int numEntries;
+
+ // Debug variable, remove when done testing.
+ unsigned memAllocCounter;
+
+ public:
+ uint64_t nodesTraversed;
+ uint64_t nodesRemoved;
+};
+
+template <class DynInstPtr>
+void
+DependencyGraph<DynInstPtr>::resize(int num_entries)
+{
+ numEntries = num_entries;
+ dependGraph = new DepEntry[numEntries];
+}
+
+template <class DynInstPtr>
+void
+DependencyGraph<DynInstPtr>::reset()
+{
+ // Clear the dependency graph
+ DepEntry *curr;
+ DepEntry *prev;
+
+ for (int i = 0; i < numEntries; ++i) {
+ curr = dependGraph[i].next;
+
+ while (curr) {
+ memAllocCounter--;
+
+ prev = curr;
+ curr = prev->next;
+ prev->inst = NULL;
+
+ delete prev;
+ }
+
+ if (dependGraph[i].inst) {
+ dependGraph[i].inst = NULL;
+ }
+
+ dependGraph[i].next = NULL;
+ }
+}
+
+template <class DynInstPtr>
+void
+DependencyGraph<DynInstPtr>::insert(PhysRegIndex idx, DynInstPtr &new_inst)
+{
+ //Add this new, dependent instruction at the head of the dependency
+ //chain.
+
+ // First create the entry that will be added to the head of the
+ // dependency chain.
+ DepEntry *new_entry = new DepEntry;
+ new_entry->next = dependGraph[idx].next;
+ new_entry->inst = new_inst;
+
+ // Then actually add it to the chain.
+ dependGraph[idx].next = new_entry;
+
+ ++memAllocCounter;
+}
+
+
+template <class DynInstPtr>
+void
+DependencyGraph<DynInstPtr>::remove(PhysRegIndex idx,
+ DynInstPtr &inst_to_remove)
+{
+ DepEntry *prev = &dependGraph[idx];
+ DepEntry *curr = dependGraph[idx].next;
+
+ // Make sure curr isn't NULL. Because this instruction is being
+ // removed from a dependency list, it must have been placed there at
+ // an earlier time. The dependency chain should not be empty,
+ // unless the instruction dependent upon it is already ready.
+ if (curr == NULL) {
+ return;
+ }
+
+ nodesRemoved++;
+
+ // Find the instruction to remove within the dependency linked list.
+ while (curr->inst != inst_to_remove) {
+ prev = curr;
+ curr = curr->next;
+ nodesTraversed++;
+
+ assert(curr != NULL);
+ }
+
+ // Now remove this instruction from the list.
+ prev->next = curr->next;
+
+ --memAllocCounter;
+
+ // Could push this off to the destructor of DependencyEntry
+ curr->inst = NULL;
+
+ delete curr;
+}
+
+template <class DynInstPtr>
+DynInstPtr
+DependencyGraph<DynInstPtr>::pop(PhysRegIndex idx)
+{
+ DepEntry *node;
+ node = dependGraph[idx].next;
+ DynInstPtr inst = NULL;
+ if (node) {
+ inst = node->inst;
+ dependGraph[idx].next = node->next;
+ node->inst = NULL;
+ memAllocCounter--;
+ delete node;
+ }
+ return inst;
+}
+
+template <class DynInstPtr>
+void
+DependencyGraph<DynInstPtr>::dump()
+{
+ DepEntry *curr;
+
+ for (int i = 0; i < numEntries; ++i)
+ {
+ curr = &dependGraph[i];
+
+ if (curr->inst) {
+ cprintf("dependGraph[%i]: producer: %#x [sn:%lli] consumer: ",
+ i, curr->inst->readPC(), curr->inst->seqNum);
+ } else {
+ cprintf("dependGraph[%i]: No producer. consumer: ", i);
+ }
+
+ while (curr->next != NULL) {
+ curr = curr->next;
+
+ cprintf("%#x [sn:%lli] ",
+ curr->inst->readPC(), curr->inst->seqNum);
+ }
+
+ cprintf("\n");
+ }
+ cprintf("memAllocCounter: %i\n", memAllocCounter);
+}
+
+#endif // __CPU_O3_DEP_GRAPH_HH__
#ifndef __CPU_O3_FETCH_HH__
#define __CPU_O3_FETCH_HH__
+#include "arch/utility.hh"
#include "base/statistics.hh"
#include "base/timebuf.hh"
#include "cpu/pc_event.hh"
-#include "mem/mem_interface.hh"
+#include "mem/packet.hh"
+#include "mem/port.hh"
#include "sim/eventq.hh"
class Sampler;
typedef TheISA::MachInst MachInst;
typedef TheISA::ExtMachInst ExtMachInst;
+ class IcachePort : public Port
+ {
+ protected:
+ DefaultFetch<Impl> *fetch;
+
+ public:
+ IcachePort(DefaultFetch<Impl> *_fetch)
+ : Port(_fetch->name() + "-iport"), fetch(_fetch)
+ { }
+
+ protected:
+ virtual Tick recvAtomic(PacketPtr pkt);
+
+ virtual void recvFunctional(PacketPtr pkt);
+
+ virtual void recvStatusChange(Status status);
+
+ virtual void getDeviceAddressRanges(AddrRangeList &resp,
+ AddrRangeList &snoop)
+ { resp.clear(); snoop.clear(); }
+
+ virtual bool recvTiming(PacketPtr pkt);
+
+ virtual void recvRetry();
+ };
+
public:
/** Overall fetch status. Used to determine if the CPU can
* deschedule itsef due to a lack of activity.
TrapPending,
QuiescePending,
SwitchOut,
- IcacheMissStall,
- IcacheMissComplete
+ IcacheWaitResponse,
+ IcacheRetry,
+ IcacheAccessComplete
};
/** Fetching Policy, Add new policies here.*/
/** List that has the threads organized by priority. */
std::list<unsigned> priorityList;
- public:
- class CacheCompletionEvent : public Event
- {
- private:
- MemReqPtr req;
- /** Pointer to fetch. */
- DefaultFetch *fetch;
- /** Thread id. */
-// unsigned threadId;
-
- public:
- /** Constructs a cache completion event, which tells fetch when the
- * cache miss is complete.
- */
- CacheCompletionEvent(MemReqPtr &_req, DefaultFetch *_fetch);
-
- /** Processes cache completion event. */
- virtual void process();
- /** Returns the description of the cache completion event. */
- virtual const char *description();
- };
-
public:
/** DefaultFetch constructor. */
DefaultFetch(Params *params);
void initStage();
/** Processes cache completion event. */
- void processCacheCompletion(MemReqPtr &req);
+ void processCacheCompletion(PacketPtr pkt);
void switchOut();
/** Wire used to write any information heading to decode. */
typename TimeBuffer<FetchStruct>::wire toDecode;
+ MemObject *mem;
+
/** Icache interface. */
- MemInterface *icacheInterface;
+ IcachePort *icachePort;
/** BPredUnit. */
BPredUnit branchPred;
Addr nextPC[Impl::MaxThreads];
- /** Memory request used to access cache. */
- MemReqPtr memReq[Impl::MaxThreads];
+ /** Memory packet used to access cache. */
+ PacketPtr memPkt[Impl::MaxThreads];
/** Variable that tracks if fetch has written to the time buffer this
* cycle. Used to tell CPU if there is activity this cycle.
*/
#include "arch/isa_traits.hh"
+#include "arch/utility.hh"
#include "cpu/exetrace.hh"
#include "cpu/o3/fetch.hh"
-#include "mem/base_mem.hh"
-#include "mem/mem_interface.hh"
-#include "mem/mem_req.hh"
+#include "mem/packet.hh"
+#include "mem/request.hh"
#include "sim/byteswap.hh"
+#include "sim/host.hh"
#include "sim/root.hh"
#if FULL_SYSTEM
#include "mem/functional/memory_control.hh"
#include "mem/functional/physical.hh"
#include "sim/system.hh"
-#else // !FULL_SYSTEM
-#include "mem/functional/functional.hh"
#endif // FULL_SYSTEM
#include <algorithm>
using namespace std;
+using namespace TheISA;
template<class Impl>
-DefaultFetch<Impl>::CacheCompletionEvent::CacheCompletionEvent(MemReqPtr &_req,
- DefaultFetch *_fetch)
- : Event(&mainEventQueue, Delayed_Writeback_Pri),
- req(_req),
- fetch(_fetch)
+Tick
+DefaultFetch<Impl>::IcachePort::recvAtomic(PacketPtr pkt)
{
- this->setFlags(Event::AutoDelete);
+ panic("DefaultFetch doesn't expect recvAtomic callback!");
+ return curTick;
}
template<class Impl>
void
-DefaultFetch<Impl>::CacheCompletionEvent::process()
+DefaultFetch<Impl>::IcachePort::recvFunctional(PacketPtr pkt)
+{
+ panic("DefaultFetch doesn't expect recvFunctional callback!");
+}
+
+template<class Impl>
+void
+DefaultFetch<Impl>::IcachePort::recvStatusChange(Status status)
+{
+ if (status == RangeChange)
+ return;
+
+ panic("DefaultFetch doesn't expect recvStatusChange callback!");
+}
+
+template<class Impl>
+bool
+DefaultFetch<Impl>::IcachePort::recvTiming(Packet *pkt)
{
- fetch->processCacheCompletion(req);
+ fetch->processCacheCompletion(pkt);
+ return true;
}
template<class Impl>
-const char *
-DefaultFetch<Impl>::CacheCompletionEvent::description()
+void
+DefaultFetch<Impl>::IcachePort::recvRetry()
{
- return "DefaultFetch cache completion event";
+ panic("DefaultFetch doesn't support retry yet.");
+ // we shouldn't get a retry unless we have a packet that we're
+ // waiting to transmit
+/*
+ assert(cpu->dcache_pkt != NULL);
+ assert(cpu->_status == DcacheRetry);
+ Packet *tmp = cpu->dcache_pkt;
+ if (sendTiming(tmp)) {
+ cpu->_status = DcacheWaitResponse;
+ cpu->dcache_pkt = NULL;
+ }
+*/
}
template<class Impl>
DefaultFetch<Impl>::DefaultFetch(Params *params)
- : icacheInterface(params->icacheInterface),
- branchPred(params),
+ : branchPred(params),
decodeToFetchDelay(params->decodeToFetchDelay),
renameToFetchDelay(params->renameToFetchDelay),
iewToFetchDelay(params->iewToFetchDelay),
}
// Size of cache block.
- cacheBlkSize = icacheInterface ? icacheInterface->getBlockSize() : 64;
+ cacheBlkSize = 64;
// Create mask to get rid of offset bits.
cacheBlkMask = (cacheBlkSize - 1);
priorityList.push_back(tid);
- // Create a new memory request.
- memReq[tid] = NULL;
+ memPkt[tid] = NULL;
// Create space to store a cache line.
cacheData[tid] = new uint8_t[cacheBlkSize];
DPRINTF(Fetch, "Setting the CPU pointer.\n");
cpu = cpu_ptr;
+ // Name is finally available, so create the port.
+ icachePort = new IcachePort(this);
+
// Fetch needs to start fetching instructions at the very beginning,
// so it must start up in active state.
switchToActive();
template<class Impl>
void
-DefaultFetch<Impl>::processCacheCompletion(MemReqPtr &req)
+DefaultFetch<Impl>::processCacheCompletion(PacketPtr pkt)
{
- unsigned tid = req->thread_num;
+ unsigned tid = pkt->req->getThreadNum();
DPRINTF(Fetch, "[tid:%u] Waking up from cache miss.\n",tid);
// to return.
// Can keep track of how many cache accesses go unused due to
// misspeculation here.
- if (fetchStatus[tid] != IcacheMissStall ||
- req != memReq[tid] ||
+ if (fetchStatus[tid] != IcacheWaitResponse ||
+ pkt != memPkt[tid] ||
isSwitchedOut()) {
++fetchIcacheSquashes;
+ delete pkt;
return;
}
switchToActive();
- // Only switch to IcacheMissComplete if we're not stalled as well.
+ // Only switch to IcacheAccessComplete if we're not stalled as well.
if (checkStall(tid)) {
fetchStatus[tid] = Blocked;
} else {
- fetchStatus[tid] = IcacheMissComplete;
+ fetchStatus[tid] = IcacheAccessComplete;
}
// memcpy(cacheData[tid], memReq[tid]->data, memReq[tid]->size);
// Reset the mem req to NULL.
- memReq[tid] = NULL;
+ delete pkt->req;
+ delete pkt;
+ memPkt[tid] = NULL;
}
template <class Impl>
// Setup the memReq to do a read of the first instruction's address.
// Set the appropriate read size and flags as well.
- memReq[tid] = new MemReq();
+ // Build request here.
+ RequestPtr mem_req = new Request(tid, fetch_PC, cacheBlkSize, flags,
+ fetch_PC, cpu->readCpuId(), tid);
- memReq[tid]->asid = tid;
- memReq[tid]->thread_num = tid;
- memReq[tid]->data = new uint8_t[64];
- memReq[tid]->xc = cpu->xcBase(tid);
- memReq[tid]->cmd = Read;
- memReq[tid]->reset(fetch_PC, cacheBlkSize, flags);
+ memPkt[tid] = NULL;
// Translate the instruction request.
//#if FULL_SYSTEM
- fault = cpu->translateInstReq(memReq[tid]);
+ fault = cpu->translateInstReq(mem_req);
//#else
// fault = pTable->translate(memReq[tid]);
//#endif
}
#endif
+ // Build packet here.
+ PacketPtr data_pkt = new Packet(mem_req,
+ Packet::ReadReq, Packet::Broadcast);
+ data_pkt->dataStatic(cacheData[tid]);
+
DPRINTF(Fetch, "Fetch: Doing instruction read.\n");
- fault = cpu->mem->read(memReq[tid], cacheData[tid]);
- // This read may change when the mem interface changes.
+
+ fetchedCacheLines++;
// Now do the timing access to see whether or not the instruction
// exists within the cache.
- if (icacheInterface && !icacheInterface->isBlocked()) {
- DPRINTF(Fetch, "Doing cache access.\n");
-
- memReq[tid]->completionEvent = NULL;
-
- memReq[tid]->time = curTick;
-
- MemAccessResult result = icacheInterface->access(memReq[tid]);
-
- fetchedCacheLines++;
-
- // If the cache missed, then schedule an event to wake
- // up this stage once the cache miss completes.
- // @todo: Possibly allow for longer than 1 cycle cache hits.
- if (result != MA_HIT && icacheInterface->doEvents()) {
-
- memReq[tid]->completionEvent =
- new CacheCompletionEvent(memReq[tid], this);
-
- lastIcacheStall[tid] = curTick;
-
- DPRINTF(Activity, "[tid:%i]: Activity: Stalling due to I-cache "
- "miss.\n", tid);
-
- fetchStatus[tid] = IcacheMissStall;
- } else {
- DPRINTF(Fetch, "[tid:%i]: I-Cache hit. Doing Instruction "
- "read.\n", tid);
-
-// memcpy(cacheData[tid], memReq[tid]->data, memReq[tid]->size);
- }
- } else {
+ if (!icachePort->sendTiming(data_pkt)) {
DPRINTF(Fetch, "[tid:%i] Out of MSHRs!\n", tid);
ret_fault = NoFault;
return false;
}
+
+ DPRINTF(Fetch, "Doing cache access.\n");
+
+ lastIcacheStall[tid] = curTick;
+
+ DPRINTF(Activity, "[tid:%i]: Activity: Waiting on I-cache "
+ "response.\n", tid);
+
+ fetchStatus[tid] = IcacheWaitResponse;
}
ret_fault = fault;
nextPC[tid] = new_PC + instSize;
// Clear the icache miss if it's outstanding.
- if (fetchStatus[tid] == IcacheMissStall && icacheInterface) {
+ if (fetchStatus[tid] == IcacheWaitResponse) {
DPRINTF(Fetch, "[tid:%i]: Squashing outstanding Icache miss.\n",
tid);
- memReq[tid] = NULL;
+ delete memPkt[tid];
+ memPkt[tid] = NULL;
}
fetchStatus[tid] = Squashing;
if (fetchStatus[tid] == Running ||
fetchStatus[tid] == Squashing ||
- fetchStatus[tid] == IcacheMissComplete) {
+ fetchStatus[tid] == IcacheAccessComplete) {
if (_status == Inactive) {
DPRINTF(Activity, "[tid:%i]: Activating stage.\n",tid);
- if (fetchStatus[tid] == IcacheMissComplete) {
+ if (fetchStatus[tid] == IcacheAccessComplete) {
DPRINTF(Activity, "[tid:%i]: Activating fetch due to cache"
"completion\n",tid);
}
}
}
- if (checkStall(tid) && fetchStatus[tid] != IcacheMissStall) {
+ if (checkStall(tid) && fetchStatus[tid] != IcacheWaitResponse) {
DPRINTF(Fetch, "[tid:%i]: Setting to blocked\n",tid);
fetchStatus[tid] = Blocked;
// If returning from the delay of a cache miss, then update the status
// to running, otherwise do the cache access. Possibly move this up
// to tick() function.
- if (fetchStatus[tid] == IcacheMissComplete) {
+ if (fetchStatus[tid] == IcacheAccessComplete) {
DPRINTF(Fetch, "[tid:%i]: Icache miss is complete.\n",
tid);
++fetchBlockedCycles;
} else if (fetchStatus[tid] == Squashing) {
++fetchSquashCycles;
- } else if (fetchStatus[tid] == IcacheMissStall) {
+ } else if (fetchStatus[tid] == IcacheWaitResponse) {
++icacheStallCycles;
}
- // Status is Idle, Squashing, Blocked, or IcacheMissStall, so
+ // Status is Idle, Squashing, Blocked, or IcacheWaitResponse, so
// fetch should do nothing.
return;
}
++fetchCycles;
// If we had a stall due to an icache miss, then return.
- if (fetchStatus[tid] == IcacheMissStall) {
+ if (fetchStatus[tid] == IcacheWaitResponse) {
++icacheStallCycles;
status_change = true;
return;
} else {
// We shouldn't be in an icache miss and also have a fault (an ITB
// miss)
- if (fetchStatus[tid] == IcacheMissStall) {
+ if (fetchStatus[tid] == IcacheWaitResponse) {
panic("Fetch should have exited prior to this!");
}
int tid = *((*activeThreads).begin());
if (fetchStatus[tid] == Running ||
- fetchStatus[tid] == IcacheMissComplete ||
+ fetchStatus[tid] == IcacheAccessComplete ||
fetchStatus[tid] == Idle) {
return tid;
} else {
assert(high_pri <= numThreads);
if (fetchStatus[high_pri] == Running ||
- fetchStatus[high_pri] == IcacheMissComplete ||
+ fetchStatus[high_pri] == IcacheAccessComplete ||
fetchStatus[high_pri] == Idle) {
priorityList.erase(pri_iter);
unsigned high_pri = PQ.top();
if (fetchStatus[high_pri] == Running ||
- fetchStatus[high_pri] == IcacheMissComplete ||
+ fetchStatus[high_pri] == IcacheAccessComplete ||
fetchStatus[high_pri] == Idle)
return high_pri;
else
unsigned high_pri = PQ.top();
if (fetchStatus[high_pri] == Running ||
- fetchStatus[high_pri] == IcacheMissComplete ||
+ fetchStatus[high_pri] == IcacheAccessComplete ||
fetchStatus[high_pri] == Idle)
return high_pri;
else
#include <queue>
#include "arch/isa_traits.hh"
+#include "base/misc.hh"
#include "base/trace.hh"
#include "base/traceflags.hh"
#include "cpu/o3/comm.hh"
--- /dev/null
+/*
+ * Copyright (c) 2002-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sstream>
+
+#include "cpu/o3/fu_pool.hh"
+#include "encumbered/cpu/full/fu_pool.hh"
+#include "sim/builder.hh"
+
+using namespace std;
+
+////////////////////////////////////////////////////////////////////////////
+//
+// A pool of function units
+//
+
+inline void
+FUPool::FUIdxQueue::addFU(int fu_idx)
+{
+ funcUnitsIdx.push_back(fu_idx);
+ ++size;
+}
+
+inline int
+FUPool::FUIdxQueue::getFU()
+{
+ int retval = funcUnitsIdx[idx++];
+
+ if (idx == size)
+ idx = 0;
+
+ return retval;
+}
+
+FUPool::~FUPool()
+{
+ fuListIterator i = funcUnits.begin();
+ fuListIterator end = funcUnits.end();
+ for (; i != end; ++i)
+ delete *i;
+}
+
+
+// Constructor
+FUPool::FUPool(string name, vector<FUDesc *> paramList)
+ : SimObject(name)
+{
+ numFU = 0;
+
+ funcUnits.clear();
+
+ for (int i = 0; i < Num_OpClasses; ++i) {
+ maxOpLatencies[i] = 0;
+ maxIssueLatencies[i] = 0;
+ }
+
+ //
+ // Iterate through the list of FUDescData structures
+ //
+ for (FUDDiterator i = paramList.begin(); i != paramList.end(); ++i) {
+
+ //
+ // Don't bother with this if we're not going to create any FU's
+ //
+ if ((*i)->number) {
+ //
+ // Create the FuncUnit object from this structure
+ // - add the capabilities listed in the FU's operation
+ // description
+ //
+ // We create the first unit, then duplicate it as needed
+ //
+ FuncUnit *fu = new FuncUnit;
+
+ OPDDiterator j = (*i)->opDescList.begin();
+ OPDDiterator end = (*i)->opDescList.end();
+ for (; j != end; ++j) {
+ // indicate that this pool has this capability
+ capabilityList.set((*j)->opClass);
+
+ // Add each of the FU's that will have this capability to the
+ // appropriate queue.
+ for (int k = 0; k < (*i)->number; ++k)
+ fuPerCapList[(*j)->opClass].addFU(numFU + k);
+
+ // indicate that this FU has the capability
+ fu->addCapability((*j)->opClass, (*j)->opLat, (*j)->issueLat);
+
+ if ((*j)->opLat > maxOpLatencies[(*j)->opClass])
+ maxOpLatencies[(*j)->opClass] = (*j)->opLat;
+
+ if ((*j)->issueLat > maxIssueLatencies[(*j)->opClass])
+ maxIssueLatencies[(*j)->opClass] = (*j)->issueLat;
+ }
+
+ numFU++;
+
+ // Add the appropriate number of copies of this FU to the list
+ ostringstream s;
+
+ s << (*i)->name() << "(0)";
+ fu->name = s.str();
+ funcUnits.push_back(fu);
+
+ for (int c = 1; c < (*i)->number; ++c) {
+ ostringstream s;
+ numFU++;
+ FuncUnit *fu2 = new FuncUnit(*fu);
+
+ s << (*i)->name() << "(" << c << ")";
+ fu2->name = s.str();
+ funcUnits.push_back(fu2);
+ }
+ }
+ }
+
+ unitBusy.resize(numFU);
+
+ for (int i = 0; i < numFU; i++) {
+ unitBusy[i] = false;
+ }
+}
+
+void
+FUPool::annotateMemoryUnits(unsigned hit_latency)
+{
+ maxOpLatencies[MemReadOp] = hit_latency;
+
+ fuListIterator i = funcUnits.begin();
+ fuListIterator iend = funcUnits.end();
+ for (; i != iend; ++i) {
+ if ((*i)->provides(MemReadOp))
+ (*i)->opLatency(MemReadOp) = hit_latency;
+
+ if ((*i)->provides(MemWriteOp))
+ (*i)->opLatency(MemWriteOp) = hit_latency;
+ }
+}
+
+int
+FUPool::getUnit(OpClass capability)
+{
+ // If this pool doesn't have the specified capability,
+ // return this information to the caller
+ if (!capabilityList[capability])
+ return -2;
+
+ int fu_idx = fuPerCapList[capability].getFU();
+ int start_idx = fu_idx;
+
+ // Iterate through the circular queue if needed, stopping if we've reached
+ // the first element again.
+ while (unitBusy[fu_idx]) {
+ fu_idx = fuPerCapList[capability].getFU();
+ if (fu_idx == start_idx) {
+ // No FU available
+ return -1;
+ }
+ }
+
+ unitBusy[fu_idx] = true;
+
+ return fu_idx;
+}
+
+void
+FUPool::freeUnitNextCycle(int fu_idx)
+{
+ assert(unitBusy[fu_idx]);
+ unitsToBeFreed.push_back(fu_idx);
+}
+
+void
+FUPool::processFreeUnits()
+{
+ while (!unitsToBeFreed.empty()) {
+ int fu_idx = unitsToBeFreed.back();
+ unitsToBeFreed.pop_back();
+
+ assert(unitBusy[fu_idx]);
+
+ unitBusy[fu_idx] = false;
+ }
+}
+
+void
+FUPool::dump()
+{
+ cout << "Function Unit Pool (" << name() << ")\n";
+ cout << "======================================\n";
+ cout << "Free List:\n";
+
+ for (int i = 0; i < numFU; ++i) {
+ if (unitBusy[i]) {
+ continue;
+ }
+
+ cout << " [" << i << "] : ";
+
+ cout << funcUnits[i]->name << " ";
+
+ cout << "\n";
+ }
+
+ cout << "======================================\n";
+ cout << "Busy List:\n";
+ for (int i = 0; i < numFU; ++i) {
+ if (!unitBusy[i]) {
+ continue;
+ }
+
+ cout << " [" << i << "] : ";
+
+ cout << funcUnits[i]->name << " ";
+
+ cout << "\n";
+ }
+}
+
+void
+FUPool::switchOut()
+{
+}
+
+void
+FUPool::takeOverFrom()
+{
+ for (int i = 0; i < numFU; i++) {
+ unitBusy[i] = false;
+ }
+ unitsToBeFreed.clear();
+}
+
+//
+
+////////////////////////////////////////////////////////////////////////////
+//
+// The SimObjects we use to get the FU information into the simulator
+//
+////////////////////////////////////////////////////////////////////////////
+
+//
+// FUPool - Contails a list of FUDesc objects to make available
+//
+
+//
+// The FuPool object
+//
+
+BEGIN_DECLARE_SIM_OBJECT_PARAMS(FUPool)
+
+ SimObjectVectorParam<FUDesc *> FUList;
+
+END_DECLARE_SIM_OBJECT_PARAMS(FUPool)
+
+
+BEGIN_INIT_SIM_OBJECT_PARAMS(FUPool)
+
+ INIT_PARAM(FUList, "list of FU's for this pool")
+
+END_INIT_SIM_OBJECT_PARAMS(FUPool)
+
+
+CREATE_SIM_OBJECT(FUPool)
+{
+ return new FUPool(getInstanceName(), FUList);
+}
+
+REGISTER_SIM_OBJECT("FUPool", FUPool)
+
--- /dev/null
+/*
+ * Copyright (c) 2002-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __CPU_O3_FU_POOL_HH__
+#define __CPU_O3_FU_POOL_HH__
+
+#include <bitset>
+#include <list>
+#include <string>
+#include <vector>
+
+#include "base/sched_list.hh"
+#include "cpu/op_class.hh"
+#include "sim/sim_object.hh"
+
+class FUDesc;
+class FuncUnit;
+
+/**
+ * Pool of FU's, specific to the new CPU model. The old FU pool had lists of
+ * free units and busy units, and whenever a FU was needed it would iterate
+ * through the free units to find a FU that provided the capability. This pool
+ * has lists of units specific to each of the capabilities, and whenever a FU
+ * is needed, it iterates through that list to find a free unit. The previous
+ * FU pool would have to be ticked each cycle to update which units became
+ * free. This FU pool lets the IEW stage handle freeing units, which frees
+ * them as their scheduled execution events complete. This limits units in this
+ * model to either have identical issue and op latencies, or 1 cycle issue
+ * latencies.
+ */
+class FUPool : public SimObject
+{
+ private:
+ /** Maximum op execution latencies, per op class. */
+ unsigned maxOpLatencies[Num_OpClasses];
+ /** Maximum issue latencies, per op class. */
+ unsigned maxIssueLatencies[Num_OpClasses];
+
+ /** Bitvector listing capabilities of this FU pool. */
+ std::bitset<Num_OpClasses> capabilityList;
+
+ /** Bitvector listing which FUs are busy. */
+ std::vector<bool> unitBusy;
+
+ /** List of units to be freed at the end of this cycle. */
+ std::vector<int> unitsToBeFreed;
+
+ /**
+ * Class that implements a circular queue to hold FU indices. The hope is
+ * that FUs that have been just used will be moved to the end of the queue
+ * by iterating through it, thus leaving free units at the head of the
+ * queue.
+ */
+ class FUIdxQueue {
+ public:
+ /** Constructs a circular queue of FU indices. */
+ FUIdxQueue()
+ : idx(0), size(0)
+ { }
+
+ /** Adds a FU to the queue. */
+ inline void addFU(int fu_idx);
+
+ /** Returns the index of the FU at the head of the queue, and changes
+ * the index to the next element.
+ */
+ inline int getFU();
+
+ private:
+ /** Circular queue index. */
+ int idx;
+
+ /** Size of the queue. */
+ int size;
+
+ /** Queue of FU indices. */
+ std::vector<int> funcUnitsIdx;
+ };
+
+ /** Per op class queues of FUs that provide that capability. */
+ FUIdxQueue fuPerCapList[Num_OpClasses];
+
+ /** Number of FUs. */
+ int numFU;
+
+ /** Functional units. */
+ std::vector<FuncUnit *> funcUnits;
+
+ typedef std::vector<FuncUnit *>::iterator fuListIterator;
+
+ public:
+
+ /** Constructs a FU pool. */
+ FUPool(std::string name, std::vector<FUDesc *> l);
+ ~FUPool();
+
+ /** Annotates units that provide memory operations. Included only because
+ * old FU pool provided this function.
+ */
+ void annotateMemoryUnits(unsigned hit_latency);
+
+ /**
+ * Gets a FU providing the requested capability. Will mark the unit as busy,
+ * but leaves the freeing of the unit up to the IEW stage.
+ * @param capability The capability requested.
+ * @return Returns -2 if the FU pool does not have the capability, -1 if
+ * there is no free FU, and the FU's index otherwise.
+ */
+ int getUnit(OpClass capability);
+
+ /** Frees a FU at the end of this cycle. */
+ void freeUnitNextCycle(int fu_idx);
+
+ /** Frees all FUs on the list. */
+ void processFreeUnits();
+
+ /** Returns the total number of FUs. */
+ int size() { return numFU; }
+
+ /** Debugging function used to dump FU information. */
+ void dump();
+
+ /** Returns the operation execution latency of the given capability. */
+ unsigned getOpLatency(OpClass capability) {
+ return maxOpLatencies[capability];
+ }
+
+ /** Returns the issue latency of the given capability. */
+ unsigned getIssueLatency(OpClass capability) {
+ return maxIssueLatencies[capability];
+ }
+
+ void switchOut();
+ void takeOverFrom();
+};
+
+#endif // __CPU_O3_FU_POOL_HH__
/** Writeback status. */
StageStatus wbStatus;
- public:
- /** LdWriteback event for a load completion. */
- class LdWritebackEvent : public Event {
- private:
- /** Instruction that is writing back data to the register file. */
- DynInstPtr inst;
- /** Pointer to IEW stage. */
- DefaultIEW<Impl> *iewStage;
-
- public:
- /** Constructs a load writeback event. */
- LdWritebackEvent(DynInstPtr &_inst, DefaultIEW<Impl> *_iew);
-
- /** Processes writeback event. */
- virtual void process();
- /** Returns the description of the writeback event. */
- virtual const char *description();
- };
-
public:
/** Constructs a DefaultIEW with the given parameters. */
DefaultIEW(Params *params);
using namespace std;
-template<class Impl>
-DefaultIEW<Impl>::LdWritebackEvent::LdWritebackEvent(DynInstPtr &_inst,
- DefaultIEW<Impl> *_iew)
- : Event(&mainEventQueue), inst(_inst), iewStage(_iew)
-{
- this->setFlags(Event::AutoDelete);
-}
-
-template<class Impl>
-void
-DefaultIEW<Impl>::LdWritebackEvent::process()
-{
- DPRINTF(IEW, "Load writeback event [sn:%lli]\n", inst->seqNum);
- DPRINTF(Activity, "Activity: Ld Writeback event [sn:%lli]\n", inst->seqNum);
-
- //iewStage->ldstQueue.removeMSHR(inst->threadNumber,inst->seqNum);
-
- if (iewStage->isSwitchedOut()) {
- inst = NULL;
- return;
- } else if (inst->isSquashed()) {
- iewStage->wakeCPU();
- inst = NULL;
- return;
- }
-
- iewStage->wakeCPU();
-
- if (!inst->isExecuted()) {
- inst->setExecuted();
-
- // Complete access to copy data to proper place.
- if (inst->isStore()) {
- inst->completeAcc();
- }
- }
-
- // Need to insert instruction into queue to commit
- iewStage->instToCommit(inst);
-
- iewStage->activityThisCycle();
-
- inst = NULL;
-}
-
-template<class Impl>
-const char *
-DefaultIEW<Impl>::LdWritebackEvent::description()
-{
- return "Load writeback event";
-}
-
template<class Impl>
DefaultIEW<Impl>::DefaultIEW(Params *params)
: // @todo: Make this into a parameter.
ldstQueue.executeStore(inst);
// If the store had a fault then it may not have a mem req
- if (inst->req && !(inst->req->flags & LOCKED)) {
+ if (inst->req && !(inst->req->getFlags() & LOCKED)) {
inst->setExecuted();
instToCommit(inst);
else
iewExecutedInsts++;
#else
- iewExecutedInsts[thread_number]++;
+ iewExecutedInsts++;
#endif
//
#include "base/timebuf.hh"
#include "cpu/inst_seq.hh"
#include "cpu/o3/dep_graph.hh"
-#include "encumbered/cpu/full/op_class.hh"
+#include "cpu/op_class.hh"
#include "sim/host.hh"
class FUPool;
template <class Impl>
InstructionQueue<Impl>::InstructionQueue(Params *params)
- : dcacheInterface(params->dcacheInterface),
- fuPool(params->fuPool),
+ : fuPool(params->fuPool),
numEntries(params->numIQEntries),
totalWidth(params->issueWidth),
numPhysIntRegs(params->numPhysIntRegs),
--- /dev/null
+/*
+ * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "cpu/o3/alpha_dyn_inst.hh"
+#include "cpu/o3/alpha_cpu.hh"
+#include "cpu/o3/alpha_impl.hh"
+#include "cpu/o3/lsq_impl.hh"
+
+// Force the instantiation of LDSTQ for all the implementations we care about.
+template class LSQ<AlphaSimpleImpl>;
+
--- /dev/null
+/*
+ * Copyright (c) 2004-2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __CPU_O3_LSQ_HH__
+#define __CPU_O3_LSQ_HH__
+
+#include <map>
+#include <queue>
+
+#include "config/full_system.hh"
+#include "cpu/inst_seq.hh"
+//#include "cpu/o3/cpu_policy.hh"
+#include "cpu/o3/lsq_unit.hh"
+#include "mem/port.hh"
+//#include "mem/page_table.hh"
+#include "sim/sim_object.hh"
+
+template <class Impl>
+class LSQ {
+ public:
+ typedef typename Impl::Params Params;
+ typedef typename Impl::FullCPU FullCPU;
+ typedef typename Impl::DynInstPtr DynInstPtr;
+ typedef typename Impl::CPUPol::IEW IEW;
+ typedef typename Impl::CPUPol::LSQUnit LSQUnit;
+
+ enum LSQPolicy {
+ Dynamic,
+ Partitioned,
+ Threshold
+ };
+
+ /** Constructs an LSQ with the given parameters. */
+ LSQ(Params *params);
+
+ /** Returns the name of the LSQ. */
+ std::string name() const;
+
+ /** Sets the pointer to the list of active threads. */
+ void setActiveThreads(std::list<unsigned> *at_ptr);
+ /** Sets the CPU pointer. */
+ void setCPU(FullCPU *cpu_ptr);
+ /** Sets the IEW stage pointer. */
+ void setIEW(IEW *iew_ptr);
+ /** Sets the page table pointer. */
+// void setPageTable(PageTable *pt_ptr);
+
+ void switchOut();
+ void takeOverFrom();
+
+ /** Number of entries needed for the given amount of threads.*/
+ int entryAmount(int num_threads);
+ void removeEntries(unsigned tid);
+ /** Reset the max entries for each thread. */
+ void resetEntries();
+ /** Resize the max entries for a thread. */
+ void resizeEntries(unsigned size, unsigned tid);
+
+ /** Ticks the LSQ. */
+ void tick();
+ /** Ticks a specific LSQ Unit. */
+ void tick(unsigned tid)
+ { thread[tid].tick(); }
+
+ /** Inserts a load into the LSQ. */
+ void insertLoad(DynInstPtr &load_inst);
+ /** Inserts a store into the LSQ. */
+ void insertStore(DynInstPtr &store_inst);
+
+ /** Executes a load. */
+ Fault executeLoad(DynInstPtr &inst);
+
+ Fault executeLoad(int lq_idx, unsigned tid)
+ { return thread[tid].executeLoad(lq_idx); }
+
+ /** Executes a store. */
+ Fault executeStore(DynInstPtr &inst);
+
+ /**
+ * Commits loads up until the given sequence number for a specific thread.
+ */
+ void commitLoads(InstSeqNum &youngest_inst, unsigned tid)
+ { thread[tid].commitLoads(youngest_inst); }
+
+ /**
+ * Commits stores up until the given sequence number for a specific thread.
+ */
+ void commitStores(InstSeqNum &youngest_inst, unsigned tid)
+ { thread[tid].commitStores(youngest_inst); }
+
+ /**
+ * Attempts to write back stores until all cache ports are used or the
+ * interface becomes blocked.
+ */
+ void writebackStores();
+ /** Same as above, but only for one thread. */
+ void writebackStores(unsigned tid);
+
+ /**
+ * Squash instructions from a thread until the specified sequence number.
+ */
+ void squash(const InstSeqNum &squashed_num, unsigned tid)
+ { thread[tid].squash(squashed_num); }
+
+ /** Returns whether or not there was a memory ordering violation. */
+ bool violation();
+ /**
+ * Returns whether or not there was a memory ordering violation for a
+ * specific thread.
+ */
+ bool violation(unsigned tid)
+ { return thread[tid].violation(); }
+
+ /** Returns if a load is blocked due to the memory system for a specific
+ * thread.
+ */
+ bool loadBlocked(unsigned tid)
+ { return thread[tid].loadBlocked(); }
+
+ bool isLoadBlockedHandled(unsigned tid)
+ { return thread[tid].isLoadBlockedHandled(); }
+
+ void setLoadBlockedHandled(unsigned tid)
+ { thread[tid].setLoadBlockedHandled(); }
+
+ /** Gets the instruction that caused the memory ordering violation. */
+ DynInstPtr getMemDepViolator(unsigned tid)
+ { return thread[tid].getMemDepViolator(); }
+
+ /** Returns the head index of the load queue for a specific thread. */
+ int getLoadHead(unsigned tid)
+ { return thread[tid].getLoadHead(); }
+
+ /** Returns the sequence number of the head of the load queue. */
+ InstSeqNum getLoadHeadSeqNum(unsigned tid)
+ {
+ return thread[tid].getLoadHeadSeqNum();
+ }
+
+ /** Returns the head index of the store queue. */
+ int getStoreHead(unsigned tid)
+ { return thread[tid].getStoreHead(); }
+
+ /** Returns the sequence number of the head of the store queue. */
+ InstSeqNum getStoreHeadSeqNum(unsigned tid)
+ {
+ return thread[tid].getStoreHeadSeqNum();
+ }
+
+ /** Returns the number of instructions in all of the queues. */
+ int getCount();
+ /** Returns the number of instructions in the queues of one thread. */
+ int getCount(unsigned tid)
+ { return thread[tid].getCount(); }
+
+ /** Returns the total number of loads in the load queue. */
+ int numLoads();
+ /** Returns the total number of loads for a single thread. */
+ int numLoads(unsigned tid)
+ { return thread[tid].numLoads(); }
+
+ /** Returns the total number of stores in the store queue. */
+ int numStores();
+ /** Returns the total number of stores for a single thread. */
+ int numStores(unsigned tid)
+ { return thread[tid].numStores(); }
+
+ /** Returns the total number of loads that are ready. */
+ int numLoadsReady();
+ /** Returns the number of loads that are ready for a single thread. */
+ int numLoadsReady(unsigned tid)
+ { return thread[tid].numLoadsReady(); }
+
+ /** Returns the number of free entries. */
+ unsigned numFreeEntries();
+ /** Returns the number of free entries for a specific thread. */
+ unsigned numFreeEntries(unsigned tid);
+
+ /** Returns if the LSQ is full (either LQ or SQ is full). */
+ bool isFull();
+ /**
+ * Returns if the LSQ is full for a specific thread (either LQ or SQ is
+ * full).
+ */
+ bool isFull(unsigned tid);
+
+ /** Returns if any of the LQs are full. */
+ bool lqFull();
+ /** Returns if the LQ of a given thread is full. */
+ bool lqFull(unsigned tid);
+
+ /** Returns if any of the SQs are full. */
+ bool sqFull();
+ /** Returns if the SQ of a given thread is full. */
+ bool sqFull(unsigned tid);
+
+ /**
+ * Returns if the LSQ is stalled due to a memory operation that must be
+ * replayed.
+ */
+ bool isStalled();
+ /**
+ * Returns if the LSQ of a specific thread is stalled due to a memory
+ * operation that must be replayed.
+ */
+ bool isStalled(unsigned tid);
+
+ /** Returns whether or not there are any stores to write back to memory. */
+ bool hasStoresToWB();
+
+ /** Returns whether or not a specific thread has any stores to write back
+ * to memory.
+ */
+ bool hasStoresToWB(unsigned tid)
+ { return thread[tid].hasStoresToWB(); }
+
+ /** Returns the number of stores a specific thread has to write back. */
+ int numStoresToWB(unsigned tid)
+ { return thread[tid].numStoresToWB(); }
+
+ /** Returns if the LSQ will write back to memory this cycle. */
+ bool willWB();
+ /** Returns if the LSQ of a specific thread will write back to memory this
+ * cycle.
+ */
+ bool willWB(unsigned tid)
+ { return thread[tid].willWB(); }
+
+ /** Debugging function to print out all instructions. */
+ void dumpInsts();
+ /** Debugging function to print out instructions from a specific thread. */
+ void dumpInsts(unsigned tid)
+ { thread[tid].dumpInsts(); }
+
+ /** Executes a read operation, using the load specified at the load index. */
+ template <class T>
+ Fault read(RequestPtr req, T &data, int load_idx);
+
+ /** Executes a store operation, using the store specified at the store
+ * index.
+ */
+ template <class T>
+ Fault write(RequestPtr req, T &data, int store_idx);
+
+ private:
+ /** The LSQ policy for SMT mode. */
+ LSQPolicy lsqPolicy;
+
+ /** The LSQ units for individual threads. */
+ LSQUnit thread[Impl::MaxThreads];
+
+ /** The CPU pointer. */
+ FullCPU *cpu;
+
+ /** The IEW stage pointer. */
+ IEW *iewStage;
+
+ /** The pointer to the page table. */
+// PageTable *pTable;
+
+ /** List of Active Threads in System. */
+ std::list<unsigned> *activeThreads;
+
+ /** Total Size of LQ Entries. */
+ unsigned LQEntries;
+ /** Total Size of SQ Entries. */
+ unsigned SQEntries;
+
+ /** Max LQ Size - Used to Enforce Sharing Policies. */
+ unsigned maxLQEntries;
+
+ /** Max SQ Size - Used to Enforce Sharing Policies. */
+ unsigned maxSQEntries;
+
+ /** Number of Threads. */
+ unsigned numThreads;
+};
+
+template <class Impl>
+template <class T>
+Fault
+LSQ<Impl>::read(RequestPtr req, T &data, int load_idx)
+{
+ unsigned tid = req->getThreadNum();
+
+ return thread[tid].read(req, data, load_idx);
+}
+
+template <class Impl>
+template <class T>
+Fault
+LSQ<Impl>::write(RequestPtr req, T &data, int store_idx)
+{
+ unsigned tid = req->getThreadNum();
+
+ return thread[tid].write(req, data, store_idx);
+}
+
+#endif // __CPU_O3_LSQ_HH__
--- /dev/null
+/*
+ * Copyright (c) 2004-2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <algorithm>
+#include <string>
+
+#include "cpu/o3/lsq.hh"
+
+using namespace std;
+
+template <class Impl>
+LSQ<Impl>::LSQ(Params *params)
+ : LQEntries(params->LQEntries), SQEntries(params->SQEntries),
+ numThreads(params->numberOfThreads)
+{
+ DPRINTF(LSQ, "Creating LSQ object.\n");
+
+ //**********************************************/
+ //************ Handle SMT Parameters ***********/
+ //**********************************************/
+ string policy = params->smtLSQPolicy;
+
+ //Convert string to lowercase
+ std::transform(policy.begin(), policy.end(), policy.begin(),
+ (int(*)(int)) tolower);
+
+ //Figure out fetch policy
+ if (policy == "dynamic") {
+ lsqPolicy = Dynamic;
+
+ maxLQEntries = LQEntries;
+ maxSQEntries = SQEntries;
+
+ DPRINTF(LSQ, "LSQ sharing policy set to Dynamic\n");
+
+ } else if (policy == "partitioned") {
+ lsqPolicy = Partitioned;
+
+ //@todo:make work if part_amt doesnt divide evenly.
+ maxLQEntries = LQEntries / numThreads;
+ maxSQEntries = SQEntries / numThreads;
+
+ DPRINTF(Fetch, "LSQ sharing policy set to Partitioned: "
+ "%i entries per LQ | %i entries per SQ",
+ maxLQEntries,maxSQEntries);
+
+ } else if (policy == "threshold") {
+ lsqPolicy = Threshold;
+
+ assert(params->smtLSQThreshold > LQEntries);
+ assert(params->smtLSQThreshold > SQEntries);
+
+ //Divide up by threshold amount
+ //@todo: Should threads check the max and the total
+ //amount of the LSQ
+ maxLQEntries = params->smtLSQThreshold;
+ maxSQEntries = params->smtLSQThreshold;
+
+ DPRINTF(LSQ, "LSQ sharing policy set to Threshold: "
+ "%i entries per LQ | %i entries per SQ",
+ maxLQEntries,maxSQEntries);
+
+ } else {
+ assert(0 && "Invalid LSQ Sharing Policy.Options Are:{Dynamic,"
+ "Partitioned, Threshold}");
+ }
+
+ //Initialize LSQs
+ for (int tid=0; tid < numThreads; tid++) {
+ thread[tid].init(params, maxLQEntries, maxSQEntries, tid);
+ }
+}
+
+
+template<class Impl>
+std::string
+LSQ<Impl>::name() const
+{
+ return iewStage->name() + ".lsq";
+}
+
+template<class Impl>
+void
+LSQ<Impl>::setActiveThreads(list<unsigned> *at_ptr)
+{
+ activeThreads = at_ptr;
+ assert(activeThreads != 0);
+}
+
+template<class Impl>
+void
+LSQ<Impl>::setCPU(FullCPU *cpu_ptr)
+{
+ cpu = cpu_ptr;
+
+ for (int tid=0; tid < numThreads; tid++) {
+ thread[tid].setCPU(cpu_ptr);
+ }
+}
+
+template<class Impl>
+void
+LSQ<Impl>::setIEW(IEW *iew_ptr)
+{
+ iewStage = iew_ptr;
+
+ for (int tid=0; tid < numThreads; tid++) {
+ thread[tid].setIEW(iew_ptr);
+ }
+}
+
+#if 0
+template<class Impl>
+void
+LSQ<Impl>::setPageTable(PageTable *pt_ptr)
+{
+ for (int tid=0; tid < numThreads; tid++) {
+ thread[tid].setPageTable(pt_ptr);
+ }
+}
+#endif
+
+template <class Impl>
+void
+LSQ<Impl>::switchOut()
+{
+ for (int tid = 0; tid < numThreads; tid++) {
+ thread[tid].switchOut();
+ }
+}
+
+template <class Impl>
+void
+LSQ<Impl>::takeOverFrom()
+{
+ for (int tid = 0; tid < numThreads; tid++) {
+ thread[tid].takeOverFrom();
+ }
+}
+
+template <class Impl>
+int
+LSQ<Impl>::entryAmount(int num_threads)
+{
+ if (lsqPolicy == Partitioned) {
+ return LQEntries / num_threads;
+ } else {
+ return 0;
+ }
+}
+
+template <class Impl>
+void
+LSQ<Impl>::resetEntries()
+{
+ if (lsqPolicy != Dynamic || numThreads > 1) {
+ int active_threads = (*activeThreads).size();
+
+ list<unsigned>::iterator threads = (*activeThreads).begin();
+ list<unsigned>::iterator list_end = (*activeThreads).end();
+
+ int maxEntries;
+
+ if (lsqPolicy == Partitioned) {
+ maxEntries = LQEntries / active_threads;
+ } else if (lsqPolicy == Threshold && active_threads == 1) {
+ maxEntries = LQEntries;
+ } else {
+ maxEntries = LQEntries;
+ }
+
+ while (threads != list_end) {
+ resizeEntries(maxEntries,*threads++);
+ }
+ }
+}
+
+template<class Impl>
+void
+LSQ<Impl>::removeEntries(unsigned tid)
+{
+ thread[tid].clearLQ();
+ thread[tid].clearSQ();
+}
+
+template<class Impl>
+void
+LSQ<Impl>::resizeEntries(unsigned size,unsigned tid)
+{
+ thread[tid].resizeLQ(size);
+ thread[tid].resizeSQ(size);
+}
+
+template<class Impl>
+void
+LSQ<Impl>::tick()
+{
+ list<unsigned>::iterator active_threads = (*activeThreads).begin();
+
+ while (active_threads != (*activeThreads).end()) {
+ unsigned tid = *active_threads++;
+
+ thread[tid].tick();
+ }
+}
+
+template<class Impl>
+void
+LSQ<Impl>::insertLoad(DynInstPtr &load_inst)
+{
+ unsigned tid = load_inst->threadNumber;
+
+ thread[tid].insertLoad(load_inst);
+}
+
+template<class Impl>
+void
+LSQ<Impl>::insertStore(DynInstPtr &store_inst)
+{
+ unsigned tid = store_inst->threadNumber;
+
+ thread[tid].insertStore(store_inst);
+}
+
+template<class Impl>
+Fault
+LSQ<Impl>::executeLoad(DynInstPtr &inst)
+{
+ unsigned tid = inst->threadNumber;
+
+ return thread[tid].executeLoad(inst);
+}
+
+template<class Impl>
+Fault
+LSQ<Impl>::executeStore(DynInstPtr &inst)
+{
+ unsigned tid = inst->threadNumber;
+
+ return thread[tid].executeStore(inst);
+}
+
+template<class Impl>
+void
+LSQ<Impl>::writebackStores()
+{
+ list<unsigned>::iterator active_threads = (*activeThreads).begin();
+
+ while (active_threads != (*activeThreads).end()) {
+ unsigned tid = *active_threads++;
+
+ if (numStoresToWB(tid) > 0) {
+ DPRINTF(Writeback,"[tid:%i] Writing back stores. %i stores "
+ "available for Writeback.\n", tid, numStoresToWB(tid));
+ }
+
+ thread[tid].writebackStores();
+ }
+}
+
+template<class Impl>
+bool
+LSQ<Impl>::violation()
+{
+ /* Answers: Does Anybody Have a Violation?*/
+ list<unsigned>::iterator active_threads = (*activeThreads).begin();
+
+ while (active_threads != (*activeThreads).end()) {
+ unsigned tid = *active_threads++;
+ if (thread[tid].violation())
+ return true;
+ }
+
+ return false;
+}
+
+template<class Impl>
+int
+LSQ<Impl>::getCount()
+{
+ unsigned total = 0;
+
+ list<unsigned>::iterator active_threads = (*activeThreads).begin();
+
+ while (active_threads != (*activeThreads).end()) {
+ unsigned tid = *active_threads++;
+ total += getCount(tid);
+ }
+
+ return total;
+}
+
+template<class Impl>
+int
+LSQ<Impl>::numLoads()
+{
+ unsigned total = 0;
+
+ list<unsigned>::iterator active_threads = (*activeThreads).begin();
+
+ while (active_threads != (*activeThreads).end()) {
+ unsigned tid = *active_threads++;
+ total += numLoads(tid);
+ }
+
+ return total;
+}
+
+template<class Impl>
+int
+LSQ<Impl>::numStores()
+{
+ unsigned total = 0;
+
+ list<unsigned>::iterator active_threads = (*activeThreads).begin();
+
+ while (active_threads != (*activeThreads).end()) {
+ unsigned tid = *active_threads++;
+ total += thread[tid].numStores();
+ }
+
+ return total;
+}
+
+template<class Impl>
+int
+LSQ<Impl>::numLoadsReady()
+{
+ unsigned total = 0;
+
+ list<unsigned>::iterator active_threads = (*activeThreads).begin();
+
+ while (active_threads != (*activeThreads).end()) {
+ unsigned tid = *active_threads++;
+ total += thread[tid].numLoadsReady();
+ }
+
+ return total;
+}
+
+template<class Impl>
+unsigned
+LSQ<Impl>::numFreeEntries()
+{
+ unsigned total = 0;
+
+ list<unsigned>::iterator active_threads = (*activeThreads).begin();
+
+ while (active_threads != (*activeThreads).end()) {
+ unsigned tid = *active_threads++;
+ total += thread[tid].numFreeEntries();
+ }
+
+ return total;
+}
+
+template<class Impl>
+unsigned
+LSQ<Impl>::numFreeEntries(unsigned tid)
+{
+ //if( lsqPolicy == Dynamic )
+ //return numFreeEntries();
+ //else
+ return thread[tid].numFreeEntries();
+}
+
+template<class Impl>
+bool
+LSQ<Impl>::isFull()
+{
+ list<unsigned>::iterator active_threads = (*activeThreads).begin();
+
+ while (active_threads != (*activeThreads).end()) {
+ unsigned tid = *active_threads++;
+ if (! (thread[tid].lqFull() || thread[tid].sqFull()) )
+ return false;
+ }
+
+ return true;
+}
+
+template<class Impl>
+bool
+LSQ<Impl>::isFull(unsigned tid)
+{
+ //@todo: Change to Calculate All Entries for
+ //Dynamic Policy
+ if( lsqPolicy == Dynamic )
+ return isFull();
+ else
+ return thread[tid].lqFull() || thread[tid].sqFull();
+}
+
+template<class Impl>
+bool
+LSQ<Impl>::lqFull()
+{
+ list<unsigned>::iterator active_threads = (*activeThreads).begin();
+
+ while (active_threads != (*activeThreads).end()) {
+ unsigned tid = *active_threads++;
+ if (!thread[tid].lqFull())
+ return false;
+ }
+
+ return true;
+}
+
+template<class Impl>
+bool
+LSQ<Impl>::lqFull(unsigned tid)
+{
+ //@todo: Change to Calculate All Entries for
+ //Dynamic Policy
+ if( lsqPolicy == Dynamic )
+ return lqFull();
+ else
+ return thread[tid].lqFull();
+}
+
+template<class Impl>
+bool
+LSQ<Impl>::sqFull()
+{
+ list<unsigned>::iterator active_threads = (*activeThreads).begin();
+
+ while (active_threads != (*activeThreads).end()) {
+ unsigned tid = *active_threads++;
+ if (!sqFull(tid))
+ return false;
+ }
+
+ return true;
+}
+
+template<class Impl>
+bool
+LSQ<Impl>::sqFull(unsigned tid)
+{
+ //@todo: Change to Calculate All Entries for
+ //Dynamic Policy
+ if( lsqPolicy == Dynamic )
+ return sqFull();
+ else
+ return thread[tid].sqFull();
+}
+
+template<class Impl>
+bool
+LSQ<Impl>::isStalled()
+{
+ list<unsigned>::iterator active_threads = (*activeThreads).begin();
+
+ while (active_threads != (*activeThreads).end()) {
+ unsigned tid = *active_threads++;
+ if (!thread[tid].isStalled())
+ return false;
+ }
+
+ return true;
+}
+
+template<class Impl>
+bool
+LSQ<Impl>::isStalled(unsigned tid)
+{
+ if( lsqPolicy == Dynamic )
+ return isStalled();
+ else
+ return thread[tid].isStalled();
+}
+
+template<class Impl>
+bool
+LSQ<Impl>::hasStoresToWB()
+{
+ list<unsigned>::iterator active_threads = (*activeThreads).begin();
+
+ while (active_threads != (*activeThreads).end()) {
+ unsigned tid = *active_threads++;
+ if (!hasStoresToWB(tid))
+ return false;
+ }
+
+ return true;
+}
+
+template<class Impl>
+bool
+LSQ<Impl>::willWB()
+{
+ list<unsigned>::iterator active_threads = (*activeThreads).begin();
+
+ while (active_threads != (*activeThreads).end()) {
+ unsigned tid = *active_threads++;
+ if (!willWB(tid))
+ return false;
+ }
+
+ return true;
+}
+
+template<class Impl>
+void
+LSQ<Impl>::dumpInsts()
+{
+ list<unsigned>::iterator active_threads = (*activeThreads).begin();
+
+ while (active_threads != (*activeThreads).end()) {
+ unsigned tid = *active_threads++;
+ thread[tid].dumpInsts();
+ }
+}
--- /dev/null
+/*
+ * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "cpu/o3/alpha_dyn_inst.hh"
+#include "cpu/o3/alpha_cpu.hh"
+#include "cpu/o3/alpha_impl.hh"
+#include "cpu/o3/lsq_unit_impl.hh"
+
+// Force the instantiation of LDSTQ for all the implementations we care about.
+template class LSQUnit<AlphaSimpleImpl>;
+
--- /dev/null
+/*
+ * Copyright (c) 2004-2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __CPU_O3_LSQ_UNIT_HH__
+#define __CPU_O3_LSQ_UNIT_HH__
+
+#include <algorithm>
+#include <map>
+#include <queue>
+
+#include "arch/faults.hh"
+#include "config/full_system.hh"
+#include "base/hashmap.hh"
+#include "cpu/inst_seq.hh"
+#include "mem/packet.hh"
+#include "mem/port.hh"
+//#include "mem/page_table.hh"
+//#include "sim/debug.hh"
+//#include "sim/sim_object.hh"
+
+/**
+ * Class that implements the actual LQ and SQ for each specific
+ * thread. Both are circular queues; load entries are freed upon
+ * committing, while store entries are freed once they writeback. The
+ * LSQUnit tracks if there are memory ordering violations, and also
+ * detects partial load to store forwarding cases (a store only has
+ * part of a load's data) that requires the load to wait until the
+ * store writes back. In the former case it holds onto the instruction
+ * until the dependence unit looks at it, and in the latter it stalls
+ * the LSQ until the store writes back. At that point the load is
+ * replayed.
+ */
+template <class Impl>
+class LSQUnit {
+ protected:
+ typedef TheISA::IntReg IntReg;
+ public:
+ typedef typename Impl::Params Params;
+ typedef typename Impl::FullCPU FullCPU;
+ typedef typename Impl::DynInstPtr DynInstPtr;
+ typedef typename Impl::CPUPol::IEW IEW;
+ typedef typename Impl::CPUPol::IssueStruct IssueStruct;
+
+ public:
+ /** Constructs an LSQ unit. init() must be called prior to use. */
+ LSQUnit();
+
+ /** Initializes the LSQ unit with the specified number of entries. */
+ void init(Params *params, unsigned maxLQEntries,
+ unsigned maxSQEntries, unsigned id);
+
+ /** Returns the name of the LSQ unit. */
+ std::string name() const;
+
+ /** Sets the CPU pointer. */
+ void setCPU(FullCPU *cpu_ptr);
+
+ /** Sets the IEW stage pointer. */
+ void setIEW(IEW *iew_ptr)
+ { iewStage = iew_ptr; }
+
+ /** Sets the page table pointer. */
+// void setPageTable(PageTable *pt_ptr);
+
+ void switchOut();
+
+ void takeOverFrom();
+
+ bool isSwitchedOut() { return switchedOut; }
+
+ /** Ticks the LSQ unit, which in this case only resets the number of
+ * used cache ports.
+ * @todo: Move the number of used ports up to the LSQ level so it can
+ * be shared by all LSQ units.
+ */
+ void tick() { usedPorts = 0; }
+
+ /** Inserts an instruction. */
+ void insert(DynInstPtr &inst);
+ /** Inserts a load instruction. */
+ void insertLoad(DynInstPtr &load_inst);
+ /** Inserts a store instruction. */
+ void insertStore(DynInstPtr &store_inst);
+
+ /** Executes a load instruction. */
+ Fault executeLoad(DynInstPtr &inst);
+
+ Fault executeLoad(int lq_idx) { panic("Not implemented"); return NoFault; }
+ /** Executes a store instruction. */
+ Fault executeStore(DynInstPtr &inst);
+
+ /** Commits the head load. */
+ void commitLoad();
+ /** Commits loads older than a specific sequence number. */
+ void commitLoads(InstSeqNum &youngest_inst);
+
+ /** Commits stores older than a specific sequence number. */
+ void commitStores(InstSeqNum &youngest_inst);
+
+ /** Writes back stores. */
+ void writebackStores();
+
+ void completeDataAccess(PacketPtr pkt);
+
+ void completeStoreDataAccess(DynInstPtr &inst);
+
+ // @todo: Include stats in the LSQ unit.
+ //void regStats();
+
+ /** Clears all the entries in the LQ. */
+ void clearLQ();
+
+ /** Clears all the entries in the SQ. */
+ void clearSQ();
+
+ /** Resizes the LQ to a given size. */
+ void resizeLQ(unsigned size);
+
+ /** Resizes the SQ to a given size. */
+ void resizeSQ(unsigned size);
+
+ /** Squashes all instructions younger than a specific sequence number. */
+ void squash(const InstSeqNum &squashed_num);
+
+ /** Returns if there is a memory ordering violation. Value is reset upon
+ * call to getMemDepViolator().
+ */
+ bool violation() { return memDepViolator; }
+
+ /** Returns the memory ordering violator. */
+ DynInstPtr getMemDepViolator();
+
+ /** Returns if a load became blocked due to the memory system. */
+ bool loadBlocked()
+ { return isLoadBlocked; }
+
+ void clearLoadBlocked()
+ { isLoadBlocked = false; }
+
+ bool isLoadBlockedHandled()
+ { return loadBlockedHandled; }
+
+ void setLoadBlockedHandled()
+ { loadBlockedHandled = true; }
+
+ /** Returns the number of free entries (min of free LQ and SQ entries). */
+ unsigned numFreeEntries();
+
+ /** Returns the number of loads ready to execute. */
+ int numLoadsReady();
+
+ /** Returns the number of loads in the LQ. */
+ int numLoads() { return loads; }
+
+ /** Returns the number of stores in the SQ. */
+ int numStores() { return stores; }
+
+ /** Returns if either the LQ or SQ is full. */
+ bool isFull() { return lqFull() || sqFull(); }
+
+ /** Returns if the LQ is full. */
+ bool lqFull() { return loads >= (LQEntries - 1); }
+
+ /** Returns if the SQ is full. */
+ bool sqFull() { return stores >= (SQEntries - 1); }
+
+ /** Returns the number of instructions in the LSQ. */
+ unsigned getCount() { return loads + stores; }
+
+ /** Returns if there are any stores to writeback. */
+ bool hasStoresToWB() { return storesToWB; }
+
+ /** Returns the number of stores to writeback. */
+ int numStoresToWB() { return storesToWB; }
+
+ /** Returns if the LSQ unit will writeback on this cycle. */
+ bool willWB() { return storeQueue[storeWBIdx].canWB &&
+ !storeQueue[storeWBIdx].completed/* &&
+ !dcacheInterface->isBlocked()*/; }
+
+ private:
+ /** Completes the store at the specified index. */
+ void completeStore(int store_idx);
+
+ /** Increments the given store index (circular queue). */
+ inline void incrStIdx(int &store_idx);
+ /** Decrements the given store index (circular queue). */
+ inline void decrStIdx(int &store_idx);
+ /** Increments the given load index (circular queue). */
+ inline void incrLdIdx(int &load_idx);
+ /** Decrements the given load index (circular queue). */
+ inline void decrLdIdx(int &load_idx);
+
+ public:
+ /** Debugging function to dump instructions in the LSQ. */
+ void dumpInsts();
+
+ private:
+ /** Pointer to the CPU. */
+ FullCPU *cpu;
+
+ /** Pointer to the IEW stage. */
+ IEW *iewStage;
+
+ MemObject *mem;
+
+ class DcachePort : public Port
+ {
+ protected:
+ FullCPU *cpu;
+ LSQUnit *lsq;
+
+ public:
+ DcachePort(FullCPU *_cpu, LSQUnit *_lsq)
+ : Port(_lsq->name() + "-dport"), cpu(_cpu), lsq(_lsq)
+ { }
+
+ protected:
+ virtual Tick recvAtomic(PacketPtr pkt);
+
+ virtual void recvFunctional(PacketPtr pkt);
+
+ virtual void recvStatusChange(Status status);
+
+ virtual void getDeviceAddressRanges(AddrRangeList &resp,
+ AddrRangeList &snoop)
+ { resp.clear(); snoop.clear(); }
+
+ virtual bool recvTiming(PacketPtr pkt);
+
+ virtual void recvRetry();
+ };
+
+ /** Pointer to the D-cache. */
+ DcachePort *dcachePort;
+
+ /** Pointer to the page table. */
+// PageTable *pTable;
+
+ public:
+ struct SQEntry {
+ /** Constructs an empty store queue entry. */
+ SQEntry()
+ : inst(NULL), req(NULL), size(0), data(0),
+ canWB(0), committed(0), completed(0)
+ { }
+
+ /** Constructs a store queue entry for a given instruction. */
+ SQEntry(DynInstPtr &_inst)
+ : inst(_inst), req(NULL), size(0), data(0),
+ canWB(0), committed(0), completed(0)
+ { }
+
+ /** The store instruction. */
+ DynInstPtr inst;
+ /** The request for the store. */
+ RequestPtr req;
+ /** The size of the store. */
+ int size;
+ /** The store data. */
+ IntReg data;
+ /** Whether or not the store can writeback. */
+ bool canWB;
+ /** Whether or not the store is committed. */
+ bool committed;
+ /** Whether or not the store is completed. */
+ bool completed;
+ };
+
+ private:
+ /** The LSQUnit thread id. */
+ unsigned lsqID;
+
+ /** The store queue. */
+ std::vector<SQEntry> storeQueue;
+
+ /** The load queue. */
+ std::vector<DynInstPtr> loadQueue;
+
+ /** The number of LQ entries, plus a sentinel entry (circular queue).
+ * @todo: Consider having var that records the true number of LQ entries.
+ */
+ unsigned LQEntries;
+ /** The number of SQ entries, plus a sentinel entry (circular queue).
+ * @todo: Consider having var that records the true number of SQ entries.
+ */
+ unsigned SQEntries;
+
+ /** The number of load instructions in the LQ. */
+ int loads;
+ /** The number of store instructions in the SQ. */
+ int stores;
+ /** The number of store instructions in the SQ waiting to writeback. */
+ int storesToWB;
+
+ /** The index of the head instruction in the LQ. */
+ int loadHead;
+ /** The index of the tail instruction in the LQ. */
+ int loadTail;
+
+ /** The index of the head instruction in the SQ. */
+ int storeHead;
+ /** The index of the first instruction that may be ready to be
+ * written back, and has not yet been written back.
+ */
+ int storeWBIdx;
+ /** The index of the tail instruction in the SQ. */
+ int storeTail;
+
+ /// @todo Consider moving to a more advanced model with write vs read ports
+ /** The number of cache ports available each cycle. */
+ int cachePorts;
+
+ /** The number of used cache ports in this cycle. */
+ int usedPorts;
+
+ bool switchedOut;
+
+ //list<InstSeqNum> mshrSeqNums;
+
+ /** Wire to read information from the issue stage time queue. */
+ typename TimeBuffer<IssueStruct>::wire fromIssue;
+
+ /** Whether or not the LSQ is stalled. */
+ bool stalled;
+ /** The store that causes the stall due to partial store to load
+ * forwarding.
+ */
+ InstSeqNum stallingStoreIsn;
+ /** The index of the above store. */
+ int stallingLoadIdx;
+
+ /** Whether or not a load is blocked due to the memory system. */
+ bool isLoadBlocked;
+
+ bool loadBlockedHandled;
+
+ InstSeqNum blockedLoadSeqNum;
+
+ /** The oldest load that caused a memory ordering violation. */
+ DynInstPtr memDepViolator;
+
+ // Will also need how many read/write ports the Dcache has. Or keep track
+ // of that in stage that is one level up, and only call executeLoad/Store
+ // the appropriate number of times.
+/*
+ // total number of loads forwaded from LSQ stores
+ Stats::Vector<> lsq_forw_loads;
+
+ // total number of loads ignored due to invalid addresses
+ Stats::Vector<> inv_addr_loads;
+
+ // total number of software prefetches ignored due to invalid addresses
+ Stats::Vector<> inv_addr_swpfs;
+
+ // total non-speculative bogus addresses seen (debug var)
+ Counter sim_invalid_addrs;
+ Stats::Vector<> fu_busy; //cumulative fu busy
+
+ // ready loads blocked due to memory disambiguation
+ Stats::Vector<> lsq_blocked_loads;
+
+ Stats::Scalar<> lsqInversion;
+*/
+ public:
+ /** Executes the load at the given index. */
+ template <class T>
+ Fault read(Request *req, T &data, int load_idx);
+
+ /** Executes the store at the given index. */
+ template <class T>
+ Fault write(Request *req, T &data, int store_idx);
+
+ /** Returns the index of the head load instruction. */
+ int getLoadHead() { return loadHead; }
+ /** Returns the sequence number of the head load instruction. */
+ InstSeqNum getLoadHeadSeqNum()
+ {
+ if (loadQueue[loadHead]) {
+ return loadQueue[loadHead]->seqNum;
+ } else {
+ return 0;
+ }
+
+ }
+
+ /** Returns the index of the head store instruction. */
+ int getStoreHead() { return storeHead; }
+ /** Returns the sequence number of the head store instruction. */
+ InstSeqNum getStoreHeadSeqNum()
+ {
+ if (storeQueue[storeHead].inst) {
+ return storeQueue[storeHead].inst->seqNum;
+ } else {
+ return 0;
+ }
+
+ }
+
+ /** Returns whether or not the LSQ unit is stalled. */
+ bool isStalled() { return stalled; }
+};
+
+template <class Impl>
+template <class T>
+Fault
+LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
+{
+ DynInstPtr load_inst = loadQueue[load_idx];
+
+ assert(load_inst);
+
+ assert(!load_inst->isExecuted());
+
+ // Make sure this isn't an uncacheable access
+ // A bit of a hackish way to get uncached accesses to work only if they're
+ // at the head of the LSQ and are ready to commit (at the head of the ROB
+ // too).
+ if (req->getFlags() & UNCACHEABLE &&
+ (load_idx != loadHead || !load_inst->reachedCommit)) {
+ iewStage->rescheduleMemInst(load_inst);
+ return TheISA::genMachineCheckFault();
+ }
+
+ // Check the SQ for any previous stores that might lead to forwarding
+ int store_idx = load_inst->sqIdx;
+
+ int store_size = 0;
+
+ DPRINTF(LSQUnit, "Read called, load idx: %i, store idx: %i, "
+ "storeHead: %i addr: %#x\n",
+ load_idx, store_idx, storeHead, req->getPaddr());
+
+#if 0
+ if (req->getFlags() & LOCKED) {
+ cpu->lockAddr = req->getPaddr();
+ cpu->lockFlag = true;
+ }
+#endif
+
+ while (store_idx != -1) {
+ // End once we've reached the top of the LSQ
+ if (store_idx == storeWBIdx) {
+ break;
+ }
+
+ // Move the index to one younger
+ if (--store_idx < 0)
+ store_idx += SQEntries;
+
+ assert(storeQueue[store_idx].inst);
+
+ store_size = storeQueue[store_idx].size;
+
+ if (store_size == 0)
+ continue;
+
+ // Check if the store data is within the lower and upper bounds of
+ // addresses that the request needs.
+ bool store_has_lower_limit =
+ req->getVaddr() >= storeQueue[store_idx].inst->effAddr;
+ bool store_has_upper_limit =
+ (req->getVaddr() + req->getSize()) <=
+ (storeQueue[store_idx].inst->effAddr + store_size);
+ bool lower_load_has_store_part =
+ req->getVaddr() < (storeQueue[store_idx].inst->effAddr +
+ store_size);
+ bool upper_load_has_store_part =
+ (req->getVaddr() + req->getSize()) >
+ storeQueue[store_idx].inst->effAddr;
+
+ // If the store's data has all of the data needed, we can forward.
+ if (store_has_lower_limit && store_has_upper_limit) {
+ // Get shift amount for offset into the store's data.
+ int shift_amt = req->getVaddr() & (store_size - 1);
+ // @todo: Magic number, assumes byte addressing
+ shift_amt = shift_amt << 3;
+
+ // Cast this to type T?
+ data = storeQueue[store_idx].data >> shift_amt;
+
+ assert(!load_inst->memData);
+ load_inst->memData = new uint8_t[64];
+
+ memcpy(load_inst->memData, &data, req->getSize());
+
+ DPRINTF(LSQUnit, "Forwarding from store idx %i to load to "
+ "addr %#x, data %#x\n",
+ store_idx, req->getVaddr(), *(load_inst->memData));
+/*
+ typename LdWritebackEvent *wb =
+ new typename LdWritebackEvent(load_inst,
+ iewStage);
+
+ // We'll say this has a 1 cycle load-store forwarding latency
+ // for now.
+ // @todo: Need to make this a parameter.
+ wb->schedule(curTick);
+*/
+ // Should keep track of stat for forwarded data
+ return NoFault;
+ } else if ((store_has_lower_limit && lower_load_has_store_part) ||
+ (store_has_upper_limit && upper_load_has_store_part) ||
+ (lower_load_has_store_part && upper_load_has_store_part)) {
+ // This is the partial store-load forwarding case where a store
+ // has only part of the load's data.
+
+ // If it's already been written back, then don't worry about
+ // stalling on it.
+ if (storeQueue[store_idx].completed) {
+ continue;
+ }
+
+ // Must stall load and force it to retry, so long as it's the oldest
+ // load that needs to do so.
+ if (!stalled ||
+ (stalled &&
+ load_inst->seqNum <
+ loadQueue[stallingLoadIdx]->seqNum)) {
+ stalled = true;
+ stallingStoreIsn = storeQueue[store_idx].inst->seqNum;
+ stallingLoadIdx = load_idx;
+ }
+
+ // Tell IQ/mem dep unit that this instruction will need to be
+ // rescheduled eventually
+ iewStage->rescheduleMemInst(load_inst);
+
+ // Do not generate a writeback event as this instruction is not
+ // complete.
+ DPRINTF(LSQUnit, "Load-store forwarding mis-match. "
+ "Store idx %i to load addr %#x\n",
+ store_idx, req->getVaddr());
+
+ return NoFault;
+ }
+ }
+
+ // If there's no forwarding case, then go access memory
+ DPRINTF(LSQUnit, "Doing functional access for inst [sn:%lli] PC %#x\n",
+ load_inst->seqNum, load_inst->readPC());
+
+ assert(!load_inst->memData);
+ load_inst->memData = new uint8_t[64];
+
+ ++usedPorts;
+
+ DPRINTF(LSQUnit, "Doing timing access for inst PC %#x\n",
+ load_inst->readPC());
+
+ PacketPtr data_pkt = new Packet(req, Packet::ReadReq, Packet::Broadcast);
+ data_pkt->dataStatic(load_inst->memData);
+
+ // if we have a cache, do cache access too
+ if (!dcachePort->sendTiming(data_pkt)) {
+ // There's an older load that's already going to squash.
+ if (isLoadBlocked && blockedLoadSeqNum < load_inst->seqNum)
+ return NoFault;
+
+ // Record that the load was blocked due to memory. This
+ // load will squash all instructions after it, be
+ // refetched, and re-executed.
+ isLoadBlocked = true;
+ loadBlockedHandled = false;
+ blockedLoadSeqNum = load_inst->seqNum;
+ // No fault occurred, even though the interface is blocked.
+ return NoFault;
+ }
+
+ if (data_pkt->result != Packet::Success) {
+ DPRINTF(LSQUnit, "LSQUnit: D-cache miss!\n");
+ DPRINTF(Activity, "Activity: ld accessing mem miss [sn:%lli]\n",
+ load_inst->seqNum);
+ } else {
+ DPRINTF(LSQUnit, "LSQUnit: D-cache hit!\n");
+ DPRINTF(Activity, "Activity: ld accessing mem hit [sn:%lli]\n",
+ load_inst->seqNum);
+ }
+
+ return NoFault;
+}
+
+template <class Impl>
+template <class T>
+Fault
+LSQUnit<Impl>::write(Request *req, T &data, int store_idx)
+{
+ assert(storeQueue[store_idx].inst);
+
+ DPRINTF(LSQUnit, "Doing write to store idx %i, addr %#x data %#x"
+ " | storeHead:%i [sn:%i]\n",
+ store_idx, req->getPaddr(), data, storeHead,
+ storeQueue[store_idx].inst->seqNum);
+
+ storeQueue[store_idx].req = req;
+ storeQueue[store_idx].size = sizeof(T);
+ storeQueue[store_idx].data = data;
+
+ // This function only writes the data to the store queue, so no fault
+ // can happen here.
+ return NoFault;
+}
+
+#endif // __CPU_O3_LSQ_UNIT_HH__
--- /dev/null
+/*
+ * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "cpu/checker/cpu.hh"
+#include "cpu/o3/lsq_unit.hh"
+#include "base/str.hh"
+#include "mem/request.hh"
+
+template<class Impl>
+void
+LSQUnit<Impl>::completeDataAccess(PacketPtr pkt)
+{
+/*
+ DPRINTF(IEW, "Load writeback event [sn:%lli]\n", inst->seqNum);
+ DPRINTF(Activity, "Activity: Ld Writeback event [sn:%lli]\n", inst->seqNum);
+
+ //iewStage->ldstQueue.removeMSHR(inst->threadNumber,inst->seqNum);
+
+ if (iewStage->isSwitchedOut()) {
+ inst = NULL;
+ return;
+ } else if (inst->isSquashed()) {
+ iewStage->wakeCPU();
+ inst = NULL;
+ return;
+ }
+
+ iewStage->wakeCPU();
+
+ if (!inst->isExecuted()) {
+ inst->setExecuted();
+
+ // Complete access to copy data to proper place.
+ inst->completeAcc();
+ }
+
+ // Need to insert instruction into queue to commit
+ iewStage->instToCommit(inst);
+
+ iewStage->activityThisCycle();
+
+ inst = NULL;
+*/
+}
+
+template<class Impl>
+void
+LSQUnit<Impl>::completeStoreDataAccess(DynInstPtr &inst)
+{
+/*
+ DPRINTF(LSQ, "Cache miss complete for store idx:%i\n", storeIdx);
+ DPRINTF(Activity, "Activity: st writeback event idx:%i\n", storeIdx);
+
+ //lsqPtr->removeMSHR(lsqPtr->storeQueue[storeIdx].inst->seqNum);
+
+ if (lsqPtr->isSwitchedOut())
+ return;
+
+ lsqPtr->cpu->wakeCPU();
+
+ if (wb)
+ lsqPtr->completeDataAccess(storeIdx);
+ lsqPtr->completeStore(storeIdx);
+*/
+}
+
+template <class Impl>
+Tick
+LSQUnit<Impl>::DcachePort::recvAtomic(PacketPtr pkt)
+{
+ panic("O3CPU model does not work with atomic mode!");
+ return curTick;
+}
+
+template <class Impl>
+void
+LSQUnit<Impl>::DcachePort::recvFunctional(PacketPtr pkt)
+{
+ panic("O3CPU doesn't expect recvFunctional callback!");
+}
+
+template <class Impl>
+void
+LSQUnit<Impl>::DcachePort::recvStatusChange(Status status)
+{
+ if (status == RangeChange)
+ return;
+
+ panic("O3CPU doesn't expect recvStatusChange callback!");
+}
+
+template <class Impl>
+bool
+LSQUnit<Impl>::DcachePort::recvTiming(PacketPtr pkt)
+{
+ lsq->completeDataAccess(pkt);
+ return true;
+}
+
+template <class Impl>
+void
+LSQUnit<Impl>::DcachePort::recvRetry()
+{
+ panic("Retry unsupported for now!");
+ // we shouldn't get a retry unless we have a packet that we're
+ // waiting to transmit
+/*
+ assert(cpu->dcache_pkt != NULL);
+ assert(cpu->_status == DcacheRetry);
+ PacketPtr tmp = cpu->dcache_pkt;
+ if (sendTiming(tmp)) {
+ cpu->_status = DcacheWaitResponse;
+ cpu->dcache_pkt = NULL;
+ }
+*/
+}
+
+template <class Impl>
+LSQUnit<Impl>::LSQUnit()
+ : loads(0), stores(0), storesToWB(0), stalled(false), isLoadBlocked(false),
+ loadBlockedHandled(false)
+{
+}
+
+template<class Impl>
+void
+LSQUnit<Impl>::init(Params *params, unsigned maxLQEntries,
+ unsigned maxSQEntries, unsigned id)
+{
+ DPRINTF(LSQUnit, "Creating LSQUnit%i object.\n",id);
+
+ switchedOut = false;
+
+ lsqID = id;
+
+ // Add 1 for the sentinel entry (they are circular queues).
+ LQEntries = maxLQEntries + 1;
+ SQEntries = maxSQEntries + 1;
+
+ loadQueue.resize(LQEntries);
+ storeQueue.resize(SQEntries);
+
+ loadHead = loadTail = 0;
+
+ storeHead = storeWBIdx = storeTail = 0;
+
+ usedPorts = 0;
+ cachePorts = params->cachePorts;
+
+ Port *mem_dport = params->mem->getPort("");
+ dcachePort->setPeer(mem_dport);
+ mem_dport->setPeer(dcachePort);
+
+ memDepViolator = NULL;
+
+ blockedLoadSeqNum = 0;
+}
+
+template<class Impl>
+void
+LSQUnit<Impl>::setCPU(FullCPU *cpu_ptr)
+{
+ cpu = cpu_ptr;
+ dcachePort = new DcachePort(cpu, this);
+}
+
+template<class Impl>
+std::string
+LSQUnit<Impl>::name() const
+{
+ if (Impl::MaxThreads == 1) {
+ return iewStage->name() + ".lsq";
+ } else {
+ return iewStage->name() + ".lsq.thread." + to_string(lsqID);
+ }
+}
+
+template<class Impl>
+void
+LSQUnit<Impl>::clearLQ()
+{
+ loadQueue.clear();
+}
+
+template<class Impl>
+void
+LSQUnit<Impl>::clearSQ()
+{
+ storeQueue.clear();
+}
+
+#if 0
+template<class Impl>
+void
+LSQUnit<Impl>::setPageTable(PageTable *pt_ptr)
+{
+ DPRINTF(LSQUnit, "Setting the page table pointer.\n");
+ pTable = pt_ptr;
+}
+#endif
+
+template<class Impl>
+void
+LSQUnit<Impl>::switchOut()
+{
+ switchedOut = true;
+ for (int i = 0; i < loadQueue.size(); ++i)
+ loadQueue[i] = NULL;
+
+ assert(storesToWB == 0);
+}
+
+template<class Impl>
+void
+LSQUnit<Impl>::takeOverFrom()
+{
+ switchedOut = false;
+ loads = stores = storesToWB = 0;
+
+ loadHead = loadTail = 0;
+
+ storeHead = storeWBIdx = storeTail = 0;
+
+ usedPorts = 0;
+
+ memDepViolator = NULL;
+
+ blockedLoadSeqNum = 0;
+
+ stalled = false;
+ isLoadBlocked = false;
+ loadBlockedHandled = false;
+}
+
+template<class Impl>
+void
+LSQUnit<Impl>::resizeLQ(unsigned size)
+{
+ unsigned size_plus_sentinel = size + 1;
+ assert(size_plus_sentinel >= LQEntries);
+
+ if (size_plus_sentinel > LQEntries) {
+ while (size_plus_sentinel > loadQueue.size()) {
+ DynInstPtr dummy;
+ loadQueue.push_back(dummy);
+ LQEntries++;
+ }
+ } else {
+ LQEntries = size_plus_sentinel;
+ }
+
+}
+
+template<class Impl>
+void
+LSQUnit<Impl>::resizeSQ(unsigned size)
+{
+ unsigned size_plus_sentinel = size + 1;
+ if (size_plus_sentinel > SQEntries) {
+ while (size_plus_sentinel > storeQueue.size()) {
+ SQEntry dummy;
+ storeQueue.push_back(dummy);
+ SQEntries++;
+ }
+ } else {
+ SQEntries = size_plus_sentinel;
+ }
+}
+
+template <class Impl>
+void
+LSQUnit<Impl>::insert(DynInstPtr &inst)
+{
+ assert(inst->isMemRef());
+
+ assert(inst->isLoad() || inst->isStore());
+
+ if (inst->isLoad()) {
+ insertLoad(inst);
+ } else {
+ insertStore(inst);
+ }
+
+ inst->setInLSQ();
+}
+
+template <class Impl>
+void
+LSQUnit<Impl>::insertLoad(DynInstPtr &load_inst)
+{
+ assert((loadTail + 1) % LQEntries != loadHead);
+ assert(loads < LQEntries);
+
+ DPRINTF(LSQUnit, "Inserting load PC %#x, idx:%i [sn:%lli]\n",
+ load_inst->readPC(), loadTail, load_inst->seqNum);
+
+ load_inst->lqIdx = loadTail;
+
+ if (stores == 0) {
+ load_inst->sqIdx = -1;
+ } else {
+ load_inst->sqIdx = storeTail;
+ }
+
+ loadQueue[loadTail] = load_inst;
+
+ incrLdIdx(loadTail);
+
+ ++loads;
+}
+
+template <class Impl>
+void
+LSQUnit<Impl>::insertStore(DynInstPtr &store_inst)
+{
+ // Make sure it is not full before inserting an instruction.
+ assert((storeTail + 1) % SQEntries != storeHead);
+ assert(stores < SQEntries);
+
+ DPRINTF(LSQUnit, "Inserting store PC %#x, idx:%i [sn:%lli]\n",
+ store_inst->readPC(), storeTail, store_inst->seqNum);
+
+ store_inst->sqIdx = storeTail;
+ store_inst->lqIdx = loadTail;
+
+ storeQueue[storeTail] = SQEntry(store_inst);
+
+ incrStIdx(storeTail);
+
+ ++stores;
+}
+
+template <class Impl>
+typename Impl::DynInstPtr
+LSQUnit<Impl>::getMemDepViolator()
+{
+ DynInstPtr temp = memDepViolator;
+
+ memDepViolator = NULL;
+
+ return temp;
+}
+
+template <class Impl>
+unsigned
+LSQUnit<Impl>::numFreeEntries()
+{
+ unsigned free_lq_entries = LQEntries - loads;
+ unsigned free_sq_entries = SQEntries - stores;
+
+ // Both the LQ and SQ entries have an extra dummy entry to differentiate
+ // empty/full conditions. Subtract 1 from the free entries.
+ if (free_lq_entries < free_sq_entries) {
+ return free_lq_entries - 1;
+ } else {
+ return free_sq_entries - 1;
+ }
+}
+
+template <class Impl>
+int
+LSQUnit<Impl>::numLoadsReady()
+{
+ int load_idx = loadHead;
+ int retval = 0;
+
+ while (load_idx != loadTail) {
+ assert(loadQueue[load_idx]);
+
+ if (loadQueue[load_idx]->readyToIssue()) {
+ ++retval;
+ }
+ }
+
+ return retval;
+}
+
+template <class Impl>
+Fault
+LSQUnit<Impl>::executeLoad(DynInstPtr &inst)
+{
+ // Execute a specific load.
+ Fault load_fault = NoFault;
+
+ DPRINTF(LSQUnit, "Executing load PC %#x, [sn:%lli]\n",
+ inst->readPC(),inst->seqNum);
+
+ load_fault = inst->initiateAcc();
+
+ // If the instruction faulted, then we need to send it along to commit
+ // without the instruction completing.
+ if (load_fault != NoFault) {
+ // Send this instruction to commit, also make sure iew stage
+ // realizes there is activity.
+ iewStage->instToCommit(inst);
+ iewStage->activityThisCycle();
+ }
+
+ return load_fault;
+}
+
+template <class Impl>
+Fault
+LSQUnit<Impl>::executeStore(DynInstPtr &store_inst)
+{
+ using namespace TheISA;
+ // Make sure that a store exists.
+ assert(stores != 0);
+
+ int store_idx = store_inst->sqIdx;
+
+ DPRINTF(LSQUnit, "Executing store PC %#x [sn:%lli]\n",
+ store_inst->readPC(), store_inst->seqNum);
+
+ // Check the recently completed loads to see if any match this store's
+ // address. If so, then we have a memory ordering violation.
+ int load_idx = store_inst->lqIdx;
+
+ Fault store_fault = store_inst->initiateAcc();
+// Fault store_fault = store_inst->execute();
+
+ if (storeQueue[store_idx].size == 0) {
+ DPRINTF(LSQUnit,"Fault on Store PC %#x, [sn:%lli],Size = 0\n",
+ store_inst->readPC(),store_inst->seqNum);
+
+ return store_fault;
+ }
+
+ assert(store_fault == NoFault);
+
+ if (store_inst->isStoreConditional()) {
+ // Store conditionals need to set themselves as able to
+ // writeback if we haven't had a fault by here.
+ storeQueue[store_idx].canWB = true;
+
+ ++storesToWB;
+ }
+
+ if (!memDepViolator) {
+ while (load_idx != loadTail) {
+ // Really only need to check loads that have actually executed
+ // It's safe to check all loads because effAddr is set to
+ // InvalAddr when the dyn inst is created.
+
+ // @todo: For now this is extra conservative, detecting a
+ // violation if the addresses match assuming all accesses
+ // are quad word accesses.
+
+ // @todo: Fix this, magic number being used here
+ if ((loadQueue[load_idx]->effAddr >> 8) ==
+ (store_inst->effAddr >> 8)) {
+ // A load incorrectly passed this store. Squash and refetch.
+ // For now return a fault to show that it was unsuccessful.
+ memDepViolator = loadQueue[load_idx];
+
+ return genMachineCheckFault();
+ }
+
+ incrLdIdx(load_idx);
+ }
+
+ // If we've reached this point, there was no violation.
+ memDepViolator = NULL;
+ }
+
+ return store_fault;
+}
+
+template <class Impl>
+void
+LSQUnit<Impl>::commitLoad()
+{
+ assert(loadQueue[loadHead]);
+
+ DPRINTF(LSQUnit, "Committing head load instruction, PC %#x\n",
+ loadQueue[loadHead]->readPC());
+
+
+ loadQueue[loadHead] = NULL;
+
+ incrLdIdx(loadHead);
+
+ --loads;
+}
+
+template <class Impl>
+void
+LSQUnit<Impl>::commitLoads(InstSeqNum &youngest_inst)
+{
+ assert(loads == 0 || loadQueue[loadHead]);
+
+ while (loads != 0 && loadQueue[loadHead]->seqNum <= youngest_inst) {
+ commitLoad();
+ }
+}
+
+template <class Impl>
+void
+LSQUnit<Impl>::commitStores(InstSeqNum &youngest_inst)
+{
+ assert(stores == 0 || storeQueue[storeHead].inst);
+
+ int store_idx = storeHead;
+
+ while (store_idx != storeTail) {
+ assert(storeQueue[store_idx].inst);
+ // Mark any stores that are now committed and have not yet
+ // been marked as able to write back.
+ if (!storeQueue[store_idx].canWB) {
+ if (storeQueue[store_idx].inst->seqNum > youngest_inst) {
+ break;
+ }
+ DPRINTF(LSQUnit, "Marking store as able to write back, PC "
+ "%#x [sn:%lli]\n",
+ storeQueue[store_idx].inst->readPC(),
+ storeQueue[store_idx].inst->seqNum);
+
+ storeQueue[store_idx].canWB = true;
+
+ ++storesToWB;
+ }
+
+ incrStIdx(store_idx);
+ }
+}
+
+template <class Impl>
+void
+LSQUnit<Impl>::writebackStores()
+{
+ while (storesToWB > 0 &&
+ storeWBIdx != storeTail &&
+ storeQueue[storeWBIdx].inst &&
+ storeQueue[storeWBIdx].canWB &&
+ usedPorts < cachePorts) {
+
+ // Store didn't write any data so no need to write it back to
+ // memory.
+ if (storeQueue[storeWBIdx].size == 0) {
+ completeStore(storeWBIdx);
+
+ incrStIdx(storeWBIdx);
+
+ continue;
+ }
+/*
+ if (dcacheInterface && dcacheInterface->isBlocked()) {
+ DPRINTF(LSQUnit, "Unable to write back any more stores, cache"
+ " is blocked!\n");
+ break;
+ }
+*/
+ ++usedPorts;
+
+ if (storeQueue[storeWBIdx].inst->isDataPrefetch()) {
+ incrStIdx(storeWBIdx);
+
+ continue;
+ }
+
+ assert(storeQueue[storeWBIdx].req);
+ assert(!storeQueue[storeWBIdx].committed);
+
+ DynInstPtr inst = storeQueue[storeWBIdx].inst;
+
+ Request *req = storeQueue[storeWBIdx].req;
+ storeQueue[storeWBIdx].committed = true;
+
+ assert(!inst->memData);
+ inst->memData = new uint8_t[64];
+ memcpy(inst->memData, (uint8_t *)&storeQueue[storeWBIdx].data, req->getSize());
+
+ PacketPtr data_pkt = new Packet(req, Packet::WriteReq, Packet::Broadcast);
+ data_pkt->dataStatic(inst->memData);
+
+ DPRINTF(LSQUnit, "D-Cache: Writing back store idx:%i PC:%#x "
+ "to Addr:%#x, data:%#x [sn:%lli]\n",
+ storeWBIdx, storeQueue[storeWBIdx].inst->readPC(),
+ req->getPaddr(), *(inst->memData),
+ storeQueue[storeWBIdx].inst->seqNum);
+
+ if (!dcachePort->sendTiming(data_pkt)) {
+ // Need to handle becoming blocked on a store.
+ } else {
+ /*
+ StoreCompletionEvent *store_event = new
+ StoreCompletionEvent(storeWBIdx, NULL, this);
+ */
+ if (isStalled() &&
+ storeQueue[storeWBIdx].inst->seqNum == stallingStoreIsn) {
+ DPRINTF(LSQUnit, "Unstalling, stalling store [sn:%lli] "
+ "load idx:%i\n",
+ stallingStoreIsn, stallingLoadIdx);
+ stalled = false;
+ stallingStoreIsn = 0;
+ iewStage->replayMemInst(loadQueue[stallingLoadIdx]);
+ }
+/*
+ typename LdWritebackEvent *wb = NULL;
+ if (req->flags & LOCKED) {
+ // Stx_C should not generate a system port transaction
+ // if it misses in the cache, but that might be hard
+ // to accomplish without explicit cache support.
+ wb = new typename
+ LdWritebackEvent(storeQueue[storeWBIdx].inst,
+ iewStage);
+ store_event->wbEvent = wb;
+ }
+*/
+ if (data_pkt->result != Packet::Success) {
+ DPRINTF(LSQUnit,"D-Cache Write Miss on idx:%i!\n",
+ storeWBIdx);
+
+ DPRINTF(Activity, "Active st accessing mem miss [sn:%lli]\n",
+ storeQueue[storeWBIdx].inst->seqNum);
+
+ //mshrSeqNums.push_back(storeQueue[storeWBIdx].inst->seqNum);
+
+ //DPRINTF(LSQUnit, "Added MSHR. count = %i\n",mshrSeqNums.size());
+
+ // @todo: Increment stat here.
+ } else {
+ DPRINTF(LSQUnit,"D-Cache: Write Hit on idx:%i !\n",
+ storeWBIdx);
+
+ DPRINTF(Activity, "Active st accessing mem hit [sn:%lli]\n",
+ storeQueue[storeWBIdx].inst->seqNum);
+ }
+
+ incrStIdx(storeWBIdx);
+ }
+ }
+
+ // Not sure this should set it to 0.
+ usedPorts = 0;
+
+ assert(stores >= 0 && storesToWB >= 0);
+}
+
+/*template <class Impl>
+void
+LSQUnit<Impl>::removeMSHR(InstSeqNum seqNum)
+{
+ list<InstSeqNum>::iterator mshr_it = find(mshrSeqNums.begin(),
+ mshrSeqNums.end(),
+ seqNum);
+
+ if (mshr_it != mshrSeqNums.end()) {
+ mshrSeqNums.erase(mshr_it);
+ DPRINTF(LSQUnit, "Removing MSHR. count = %i\n",mshrSeqNums.size());
+ }
+}*/
+
+template <class Impl>
+void
+LSQUnit<Impl>::squash(const InstSeqNum &squashed_num)
+{
+ DPRINTF(LSQUnit, "Squashing until [sn:%lli]!"
+ "(Loads:%i Stores:%i)\n", squashed_num, loads, stores);
+
+ int load_idx = loadTail;
+ decrLdIdx(load_idx);
+
+ while (loads != 0 && loadQueue[load_idx]->seqNum > squashed_num) {
+ DPRINTF(LSQUnit,"Load Instruction PC %#x squashed, "
+ "[sn:%lli]\n",
+ loadQueue[load_idx]->readPC(),
+ loadQueue[load_idx]->seqNum);
+
+ if (isStalled() && load_idx == stallingLoadIdx) {
+ stalled = false;
+ stallingStoreIsn = 0;
+ stallingLoadIdx = 0;
+ }
+
+ // Clear the smart pointer to make sure it is decremented.
+ loadQueue[load_idx]->squashed = true;
+ loadQueue[load_idx] = NULL;
+ --loads;
+
+ // Inefficient!
+ loadTail = load_idx;
+
+ decrLdIdx(load_idx);
+ }
+
+ if (isLoadBlocked) {
+ if (squashed_num < blockedLoadSeqNum) {
+ isLoadBlocked = false;
+ loadBlockedHandled = false;
+ blockedLoadSeqNum = 0;
+ }
+ }
+
+ int store_idx = storeTail;
+ decrStIdx(store_idx);
+
+ while (stores != 0 &&
+ storeQueue[store_idx].inst->seqNum > squashed_num) {
+ // Instructions marked as can WB are already committed.
+ if (storeQueue[store_idx].canWB) {
+ break;
+ }
+
+ DPRINTF(LSQUnit,"Store Instruction PC %#x squashed, "
+ "idx:%i [sn:%lli]\n",
+ storeQueue[store_idx].inst->readPC(),
+ store_idx, storeQueue[store_idx].inst->seqNum);
+
+ // I don't think this can happen. It should have been cleared
+ // by the stalling load.
+ if (isStalled() &&
+ storeQueue[store_idx].inst->seqNum == stallingStoreIsn) {
+ panic("Is stalled should have been cleared by stalling load!\n");
+ stalled = false;
+ stallingStoreIsn = 0;
+ }
+
+ // Clear the smart pointer to make sure it is decremented.
+ storeQueue[store_idx].inst->squashed = true;
+ storeQueue[store_idx].inst = NULL;
+ storeQueue[store_idx].canWB = 0;
+
+ storeQueue[store_idx].req = NULL;
+ --stores;
+
+ // Inefficient!
+ storeTail = store_idx;
+
+ decrStIdx(store_idx);
+ }
+}
+
+template <class Impl>
+void
+LSQUnit<Impl>::completeStore(int store_idx)
+{
+ assert(storeQueue[store_idx].inst);
+ storeQueue[store_idx].completed = true;
+ --storesToWB;
+ // A bit conservative because a store completion may not free up entries,
+ // but hopefully avoids two store completions in one cycle from making
+ // the CPU tick twice.
+ cpu->activityThisCycle();
+
+ if (store_idx == storeHead) {
+ do {
+ incrStIdx(storeHead);
+
+ --stores;
+ } while (storeQueue[storeHead].completed &&
+ storeHead != storeTail);
+
+ iewStage->updateLSQNextCycle = true;
+ }
+
+ DPRINTF(LSQUnit, "Completing store [sn:%lli], idx:%i, store head "
+ "idx:%i\n",
+ storeQueue[store_idx].inst->seqNum, store_idx, storeHead);
+
+ if (isStalled() &&
+ storeQueue[store_idx].inst->seqNum == stallingStoreIsn) {
+ DPRINTF(LSQUnit, "Unstalling, stalling store [sn:%lli] "
+ "load idx:%i\n",
+ stallingStoreIsn, stallingLoadIdx);
+ stalled = false;
+ stallingStoreIsn = 0;
+ iewStage->replayMemInst(loadQueue[stallingLoadIdx]);
+ }
+
+ storeQueue[store_idx].inst->setCompleted();
+
+ // Tell the checker we've completed this instruction. Some stores
+ // may get reported twice to the checker, but the checker can
+ // handle that case.
+ if (cpu->checker) {
+ cpu->checker->tick(storeQueue[store_idx].inst);
+ }
+}
+
+template <class Impl>
+inline void
+LSQUnit<Impl>::incrStIdx(int &store_idx)
+{
+ if (++store_idx >= SQEntries)
+ store_idx = 0;
+}
+
+template <class Impl>
+inline void
+LSQUnit<Impl>::decrStIdx(int &store_idx)
+{
+ if (--store_idx < 0)
+ store_idx += SQEntries;
+}
+
+template <class Impl>
+inline void
+LSQUnit<Impl>::incrLdIdx(int &load_idx)
+{
+ if (++load_idx >= LQEntries)
+ load_idx = 0;
+}
+
+template <class Impl>
+inline void
+LSQUnit<Impl>::decrLdIdx(int &load_idx)
+{
+ if (--load_idx < 0)
+ load_idx += LQEntries;
+}
+
+template <class Impl>
+void
+LSQUnit<Impl>::dumpInsts()
+{
+ cprintf("Load store queue: Dumping instructions.\n");
+ cprintf("Load queue size: %i\n", loads);
+ cprintf("Load queue: ");
+
+ int load_idx = loadHead;
+
+ while (load_idx != loadTail && loadQueue[load_idx]) {
+ cprintf("%#x ", loadQueue[load_idx]->readPC());
+
+ incrLdIdx(load_idx);
+ }
+
+ cprintf("Store queue size: %i\n", stores);
+ cprintf("Store queue: ");
+
+ int store_idx = storeHead;
+
+ while (store_idx != storeTail && storeQueue[store_idx].inst) {
+ cprintf("%#x ", storeQueue[store_idx].inst->readPC());
+
+ incrStIdx(store_idx);
+ }
+
+ cprintf("\n");
+}
#include "arch/isa_traits.hh"
#include "arch/faults.hh"
+#include "arch/types.hh"
#include "base/trace.hh"
#include "config/full_system.hh"
#include "cpu/o3/comm.hh"
/**
* Simple physical register file class.
- * This really only depends on the ISA, and not the Impl. Things that are
- * in the ifdef FULL_SYSTEM are pretty dependent on the ISA, and probably
- * should go in the AlphaFullCPU.
+ * Right now this is specific to Alpha until we decide if/how to make things
+ * generic enough to support other ISAs.
*/
template <class Impl>
class PhysRegFile
protected:
typedef TheISA::IntReg IntReg;
typedef TheISA::FloatReg FloatReg;
+ typedef TheISA::FloatRegBits FloatRegBits;
typedef TheISA::MiscRegFile MiscRegFile;
typedef TheISA::MiscReg MiscReg;
+
+ typedef union {
+ FloatReg d;
+ FloatRegBits q;
+ } PhysFloatReg;
+
// Note that most of the definitions of the IntReg, FloatReg, etc. exist
// within the Impl/ISA class and not within this PhysRegFile class.
assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs);
- FloatReg floatReg = floatRegFile.readReg(reg_idx, width);
+ FloatReg floatReg = floatRegFile[reg_idx].d;
DPRINTF(IEW, "RegFile: Access to %d byte float register %i, has "
"data %8.8d\n", int(reg_idx), (double)floatReg);
assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs);
- FloatReg floatReg = floatRegFile.readReg(reg_idx);
+ FloatReg floatReg = floatRegFile[reg_idx].d;
DPRINTF(IEW, "RegFile: Access to float register %i, has "
"data %8.8d\n", int(reg_idx), (double)floatReg);
assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs);
- FloatRegBits floatRegBits = floatRegFile.readRegBits(reg_idx, width);
+ FloatRegBits floatRegBits = floatRegFile[reg_idx].q;
DPRINTF(IEW, "RegFile: Access to %d byte float register %i as int, "
"has data %lli\n", int(reg_idx), (uint64_t)floatRegBits);
assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs);
- FloatRegBits floatRegBits = floatRegFile.readRegBits(reg_idx);
+ FloatRegBits floatRegBits = floatRegFile[reg_idx].q;
DPRINTF(IEW, "RegFile: Access to float register %i as int, "
"has data %lli\n", int(reg_idx), (uint64_t)floatRegBits);
int(reg_idx), (double)val);
if (reg_idx != TheISA::ZeroReg)
- floatRegFile.setReg(reg_idx, val, width);
+ floatRegFile[reg_idx].d = width;
}
/** Sets a double precision floating point register to the given value. */
int(reg_idx), (double)val);
if (reg_idx != TheISA::ZeroReg)
- floatRegFile.setReg(reg_idx, val);
+ floatRegFile[reg_idx].d = val;
}
/** Sets a floating point register to the given integer value. */
DPRINTF(IEW, "RegFile: Setting float register %i to %lli\n",
int(reg_idx), (uint64_t)val);
- floatRegFile.setRegBits(reg_idx, val, width);
+ floatRegFile[reg_idx].q = val;
}
void setFloatRegBits(PhysRegIndex reg_idx, FloatRegBits val)
DPRINTF(IEW, "RegFile: Setting float register %i to %lli\n",
int(reg_idx), (uint64_t)val);
+
+ floatRegFile[reg_idx].q = val;
+ }
+
+ MiscReg readMiscReg(int misc_reg, unsigned thread_id)
+ {
+ return miscRegs[thread_id].readReg(misc_reg);
}
MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault,
std::vector<IntReg> intRegFile;
/** Floating point register file. */
- std::vector<FloatReg> floatRegFile;
+ std::vector<PhysFloatReg> floatRegFile;
/** Miscellaneous register file. */
MiscRegFile miscRegs[Impl::MaxThreads];
#ifndef __CPU_O3_SAT_COUNTER_HH__
#define __CPU_O3_SAT_COUNTER_HH__
+#include "base/misc.hh"
#include "sim/host.hh"
/**
--- /dev/null
+/*
+ * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "cpu/o3/scoreboard.hh"
+
+Scoreboard::Scoreboard(unsigned activeThreads,
+ unsigned _numLogicalIntRegs,
+ unsigned _numPhysicalIntRegs,
+ unsigned _numLogicalFloatRegs,
+ unsigned _numPhysicalFloatRegs,
+ unsigned _numMiscRegs,
+ unsigned _zeroRegIdx)
+ : numLogicalIntRegs(_numLogicalIntRegs),
+ numPhysicalIntRegs(_numPhysicalIntRegs),
+ numLogicalFloatRegs(_numLogicalFloatRegs),
+ numPhysicalFloatRegs(_numPhysicalFloatRegs),
+ numMiscRegs(_numMiscRegs),
+ zeroRegIdx(_zeroRegIdx)
+{
+ //Get Register Sizes
+ numLogicalRegs = numLogicalIntRegs + numLogicalFloatRegs;
+ numPhysicalRegs = numPhysicalIntRegs + numPhysicalFloatRegs;
+
+ //Resize scoreboard appropriately
+ regScoreBoard.resize(numPhysicalRegs + (numMiscRegs * activeThreads));
+
+ //Initialize values
+ for (int i=0; i < numLogicalIntRegs * activeThreads; i++) {
+ regScoreBoard[i] = 1;
+ }
+
+ for (int i= numPhysicalIntRegs;
+ i < numPhysicalIntRegs + (numLogicalFloatRegs * activeThreads);
+ i++) {
+ regScoreBoard[i] = 1;
+ }
+
+ for (int i = numPhysicalRegs;
+ i < numPhysicalRegs + (numMiscRegs * activeThreads);
+ i++) {
+ regScoreBoard[i] = 1;
+ }
+}
+
+std::string
+Scoreboard::name() const
+{
+ return "cpu.scoreboard";
+}
+
+bool
+Scoreboard::getReg(PhysRegIndex phys_reg)
+{
+ // Always ready if int or fp zero reg.
+ if (phys_reg == zeroRegIdx ||
+ phys_reg == (zeroRegIdx + numPhysicalIntRegs)) {
+ return 1;
+ }
+
+ return regScoreBoard[phys_reg];
+}
+
+void
+Scoreboard::setReg(PhysRegIndex phys_reg)
+{
+ DPRINTF(Scoreboard, "Setting reg %i as ready\n", phys_reg);
+
+ regScoreBoard[phys_reg] = 1;
+}
+
+void
+Scoreboard::unsetReg(PhysRegIndex ready_reg)
+{
+ if (ready_reg == zeroRegIdx ||
+ ready_reg == (zeroRegIdx + numPhysicalIntRegs)) {
+ // Don't do anything if int or fp zero reg.
+ return;
+ }
+
+ regScoreBoard[ready_reg] = 0;
+}
--- /dev/null
+/*
+ * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __CPU_O3_SCOREBOARD_HH__
+#define __CPU_O3_SCOREBOARD_HH__
+
+#include <iostream>
+#include <utility>
+#include <vector>
+#include "arch/alpha/isa_traits.hh"
+#include "base/trace.hh"
+#include "base/traceflags.hh"
+#include "cpu/o3/comm.hh"
+
+/**
+ * Implements a simple scoreboard to track which registers are ready.
+ * This class assumes that the fp registers start, index wise, right after
+ * the integer registers. The misc. registers start, index wise, right after
+ * the fp registers.
+ * @todo: Fix up handling of the zero register in case the decoder does not
+ * automatically make insts that write the zero register into nops.
+ */
+class Scoreboard
+{
+ public:
+ /** Constructs a scoreboard.
+ * @param activeThreads The number of active threads.
+ * @param _numLogicalIntRegs Number of logical integer registers.
+ * @param _numPhysicalIntRegs Number of physical integer registers.
+ * @param _numLogicalFloatRegs Number of logical fp registers.
+ * @param _numPhysicalFloatRegs Number of physical fp registers.
+ * @param _numMiscRegs Number of miscellaneous registers.
+ * @param _zeroRegIdx Index of the zero register.
+ */
+ Scoreboard(unsigned activeThreads,
+ unsigned _numLogicalIntRegs,
+ unsigned _numPhysicalIntRegs,
+ unsigned _numLogicalFloatRegs,
+ unsigned _numPhysicalFloatRegs,
+ unsigned _numMiscRegs,
+ unsigned _zeroRegIdx);
+
+ /** Destructor. */
+ ~Scoreboard() {}
+
+ /** Returns the name of the scoreboard. */
+ std::string name() const;
+
+ /** Checks if the register is ready. */
+ bool getReg(PhysRegIndex ready_reg);
+
+ /** Sets the register as ready. */
+ void setReg(PhysRegIndex phys_reg);
+
+ /** Sets the register as not ready. */
+ void unsetReg(PhysRegIndex ready_reg);
+
+ private:
+ /** Scoreboard of physical integer registers, saying whether or not they
+ * are ready.
+ */
+ std::vector<bool> regScoreBoard;
+
+ /** Number of logical integer registers. */
+ int numLogicalIntRegs;
+
+ /** Number of physical integer registers. */
+ int numPhysicalIntRegs;
+
+ /** Number of logical floating point registers. */
+ int numLogicalFloatRegs;
+
+ /** Number of physical floating point registers. */
+ int numPhysicalFloatRegs;
+
+ /** Number of miscellaneous registers. */
+ int numMiscRegs;
+
+ /** Number of logical integer + float registers. */
+ int numLogicalRegs;
+
+ /** Number of physical integer + float registers. */
+ int numPhysicalRegs;
+
+ /** The logical index of the zero register. */
+ int zeroRegIdx;
+};
+
+#endif
--- /dev/null
+/*
+ * Copyright (c) 2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __CPU_O3_THREAD_STATE_HH__
+#define __CPU_O3_THREAD_STATE_HH__
+
+#include "arch/faults.hh"
+#include "arch/isa_traits.hh"
+#include "cpu/exec_context.hh"
+#include "cpu/thread_state.hh"
+
+class Event;
+class Process;
+
+#if FULL_SYSTEM
+class EndQuiesceEvent;
+class FunctionProfile;
+class ProfileNode;
+#else
+class FunctionalMemory;
+class Process;
+#endif
+
+/**
+ * Class that has various thread state, such as the status, the
+ * current instruction being processed, whether or not the thread has
+ * a trap pending or is being externally updated, the ExecContext
+ * proxy pointer, etc. It also handles anything related to a specific
+ * thread's process, such as syscalls and checking valid addresses.
+ */
+template <class Impl>
+struct O3ThreadState : public ThreadState {
+ typedef ExecContext::Status Status;
+ typedef typename Impl::FullCPU FullCPU;
+
+ Status _status;
+
+ // Current instruction
+ TheISA::MachInst inst;
+ private:
+ FullCPU *cpu;
+ public:
+
+ bool inSyscall;
+
+ bool trapPending;
+
+#if FULL_SYSTEM
+ O3ThreadState(FullCPU *_cpu, int _thread_num, FunctionalMemory *_mem)
+ : ThreadState(-1, _thread_num, _mem),
+ inSyscall(0), trapPending(0)
+ { }
+#else
+ O3ThreadState(FullCPU *_cpu, int _thread_num, Process *_process, int _asid)
+ : ThreadState(-1, _thread_num, NULL, _process, _asid),
+ cpu(_cpu), inSyscall(0), trapPending(0)
+ { }
+
+ O3ThreadState(FullCPU *_cpu, int _thread_num, FunctionalMemory *_mem,
+ int _asid)
+ : ThreadState(-1, _thread_num, _mem, NULL, _asid),
+ cpu(_cpu), inSyscall(0), trapPending(0)
+ { }
+#endif
+
+ ExecContext *xcProxy;
+
+ ExecContext *getXCProxy() { return xcProxy; }
+
+ Status status() const { return _status; }
+
+ void setStatus(Status new_status) { _status = new_status; }
+
+ bool misspeculating() { return false; }
+
+ void setInst(TheISA::MachInst _inst) { inst = _inst; }
+
+ Counter readFuncExeInst() { return funcExeInst; }
+
+ void setFuncExeInst(Counter new_val) { funcExeInst = new_val; }
+
+#if !FULL_SYSTEM
+ void syscall(int64_t callnum) { process->syscall(callnum, xcProxy); }
+#endif
+};
+
+#endif // __CPU_O3_THREAD_STATE_HH__
/**
* Array mapping OpClass enum values to strings. Defined in op_class.cc.
*/
-extern const char *opClassStrings[];
+extern const char *opClassStrings[Num_OpClasses];
#endif // __CPU__OP_CLASS_HH__
--- /dev/null
+
+#include "cpu/ozone/back_end_impl.hh"
+#include "cpu/ozone/ozone_impl.hh"
+
+//template class BackEnd<OzoneImpl>;
--- /dev/null
+
+#ifndef __CPU_OZONE_BACK_END_HH__
+#define __CPU_OZONE_BACK_END_HH__
+
+#include <list>
+#include <queue>
+#include <string>
+
+#include "arch/faults.hh"
+#include "base/timebuf.hh"
+#include "cpu/inst_seq.hh"
+#include "cpu/ozone/rename_table.hh"
+#include "cpu/ozone/thread_state.hh"
+#include "mem/request.hh"
+#include "sim/eventq.hh"
+
+class ExecContext;
+
+template <class Impl>
+class OzoneThreadState;
+
+template <class Impl>
+class BackEnd
+{
+ public:
+ typedef OzoneThreadState<Impl> Thread;
+
+ typedef typename Impl::Params Params;
+ typedef typename Impl::DynInst DynInst;
+ typedef typename Impl::DynInstPtr DynInstPtr;
+ typedef typename Impl::FullCPU FullCPU;
+ typedef typename Impl::FrontEnd FrontEnd;
+ typedef typename Impl::FullCPU::CommStruct CommStruct;
+
+ struct SizeStruct {
+ int size;
+ };
+
+ typedef SizeStruct DispatchToIssue;
+ typedef SizeStruct IssueToExec;
+ typedef SizeStruct ExecToCommit;
+ typedef SizeStruct Writeback;
+
+ TimeBuffer<DispatchToIssue> d2i;
+ typename TimeBuffer<DispatchToIssue>::wire instsToDispatch;
+ TimeBuffer<IssueToExec> i2e;
+ typename TimeBuffer<IssueToExec>::wire instsToExecute;
+ TimeBuffer<ExecToCommit> e2c;
+ TimeBuffer<Writeback> numInstsToWB;
+
+ TimeBuffer<CommStruct> *comm;
+ typename TimeBuffer<CommStruct>::wire toIEW;
+ typename TimeBuffer<CommStruct>::wire fromCommit;
+
+ class InstQueue {
+ enum queue {
+ NonSpec,
+ IQ,
+ ToBeScheduled,
+ ReadyList,
+ ReplayList
+ };
+ struct pqCompare {
+ bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const
+ {
+ return lhs->seqNum > rhs->seqNum;
+ }
+ };
+ public:
+ InstQueue(Params *params);
+
+ std::string name() const;
+
+ void regStats();
+
+ void setIssueExecQueue(TimeBuffer<IssueToExec> *i2e_queue);
+
+ void setBE(BackEnd *_be) { be = _be; }
+
+ void insert(DynInstPtr &inst);
+
+ void scheduleReadyInsts();
+
+ void scheduleNonSpec(const InstSeqNum &sn);
+
+ DynInstPtr getReadyInst();
+
+ void commit(const InstSeqNum &sn) {}
+
+ void squash(const InstSeqNum &sn);
+
+ int wakeDependents(DynInstPtr &inst);
+
+ /** Tells memory dependence unit that a memory instruction needs to be
+ * rescheduled. It will re-execute once replayMemInst() is called.
+ */
+ void rescheduleMemInst(DynInstPtr &inst);
+
+ /** Re-executes all rescheduled memory instructions. */
+ void replayMemInst(DynInstPtr &inst);
+
+ /** Completes memory instruction. */
+ void completeMemInst(DynInstPtr &inst);
+
+ void violation(DynInstPtr &inst, DynInstPtr &violation) { }
+
+ bool isFull() { return numInsts >= size; }
+
+ void dumpInsts();
+
+ private:
+ bool find(queue q, typename std::list<DynInstPtr>::iterator it);
+ BackEnd *be;
+ TimeBuffer<IssueToExec> *i2e;
+ typename TimeBuffer<IssueToExec>::wire numIssued;
+ typedef typename std::list<DynInstPtr> InstList;
+ typedef typename std::list<DynInstPtr>::iterator InstListIt;
+ typedef typename std::priority_queue<DynInstPtr, std::vector<DynInstPtr>, pqCompare> ReadyInstQueue;
+ // Not sure I need the IQ list; it just needs to be a count.
+ InstList iq;
+ InstList toBeScheduled;
+ InstList readyList;
+ InstList nonSpec;
+ InstList replayList;
+ ReadyInstQueue readyQueue;
+ public:
+ int size;
+ int numInsts;
+ int width;
+
+ Stats::VectorDistribution<> occ_dist;
+
+ Stats::Vector<> inst_count;
+ Stats::Vector<> peak_inst_count;
+ Stats::Scalar<> empty_count;
+ Stats::Scalar<> current_count;
+ Stats::Scalar<> fullCount;
+
+ Stats::Formula occ_rate;
+ Stats::Formula avg_residency;
+ Stats::Formula empty_rate;
+ Stats::Formula full_rate;
+ };
+
+ /** LdWriteback event for a load completion. */
+ class LdWritebackEvent : public Event {
+ private:
+ /** Instruction that is writing back data to the register file. */
+ DynInstPtr inst;
+ /** Pointer to IEW stage. */
+ BackEnd *be;
+
+ public:
+ /** Constructs a load writeback event. */
+ LdWritebackEvent(DynInstPtr &_inst, BackEnd *be);
+
+ /** Processes writeback event. */
+ virtual void process();
+ /** Returns the description of the writeback event. */
+ virtual const char *description();
+ };
+
+ BackEnd(Params *params);
+
+ std::string name() const;
+
+ void regStats();
+
+ void setCPU(FullCPU *cpu_ptr)
+ { cpu = cpu_ptr; }
+
+ void setFrontEnd(FrontEnd *front_end_ptr)
+ { frontEnd = front_end_ptr; }
+
+ void setXC(ExecContext *xc_ptr)
+ { xc = xc_ptr; }
+
+ void setThreadState(Thread *thread_ptr)
+ { thread = thread_ptr; }
+
+ void setCommBuffer(TimeBuffer<CommStruct> *_comm);
+
+ void tick();
+ void squash();
+ void squashFromXC();
+ bool xcSquash;
+
+ template <class T>
+ Fault read(RequestPtr req, T &data, int load_idx);
+
+ template <class T>
+ Fault write(RequestPtr req, T &data, int store_idx);
+
+ Addr readCommitPC() { return commitPC; }
+
+ Addr commitPC;
+
+ bool robEmpty() { return instList.empty(); }
+
+ bool isFull() { return numInsts >= numROBEntries; }
+ bool isBlocked() { return status == Blocked || dispatchStatus == Blocked; }
+
+ /** Tells memory dependence unit that a memory instruction needs to be
+ * rescheduled. It will re-execute once replayMemInst() is called.
+ */
+ void rescheduleMemInst(DynInstPtr &inst)
+ { IQ.rescheduleMemInst(inst); }
+
+ /** Re-executes all rescheduled memory instructions. */
+ void replayMemInst(DynInstPtr &inst)
+ { IQ.replayMemInst(inst); }
+
+ /** Completes memory instruction. */
+ void completeMemInst(DynInstPtr &inst)
+ { IQ.completeMemInst(inst); }
+
+ void fetchFault(Fault &fault);
+
+ private:
+ void updateStructures();
+ void dispatchInsts();
+ void dispatchStall();
+ void checkDispatchStatus();
+ void scheduleReadyInsts();
+ void executeInsts();
+ void commitInsts();
+ void addToIQ(DynInstPtr &inst);
+ void addToLSQ(DynInstPtr &inst);
+ void instToCommit(DynInstPtr &inst);
+ void writebackInsts();
+ bool commitInst(int inst_num);
+ void squash(const InstSeqNum &sn);
+ void squashDueToBranch(DynInstPtr &inst);
+ void squashDueToMemBlocked(DynInstPtr &inst);
+ void updateExeInstStats(DynInstPtr &inst);
+ void updateComInstStats(DynInstPtr &inst);
+
+ public:
+ FullCPU *cpu;
+
+ FrontEnd *frontEnd;
+
+ ExecContext *xc;
+
+ Thread *thread;
+
+ enum Status {
+ Running,
+ Idle,
+ DcacheMissStall,
+ DcacheMissComplete,
+ Blocked
+ };
+
+ Status status;
+
+ Status dispatchStatus;
+
+ Counter funcExeInst;
+
+ private:
+// typedef typename Impl::InstQueue InstQueue;
+
+ InstQueue IQ;
+
+ typedef typename Impl::LdstQueue LdstQueue;
+
+ LdstQueue LSQ;
+ public:
+ RenameTable<Impl> commitRenameTable;
+
+ RenameTable<Impl> renameTable;
+ private:
+ class DCacheCompletionEvent : public Event
+ {
+ private:
+ BackEnd *be;
+
+ public:
+ DCacheCompletionEvent(BackEnd *_be);
+
+ virtual void process();
+ virtual const char *description();
+ };
+
+ friend class DCacheCompletionEvent;
+
+ DCacheCompletionEvent cacheCompletionEvent;
+
+ MemInterface *dcacheInterface;
+
+ Request *memReq;
+
+ // General back end width. Used if the more specific isn't given.
+ int width;
+
+ // Dispatch width.
+ int dispatchWidth;
+ int numDispatchEntries;
+ int dispatchSize;
+
+ int issueWidth;
+
+ // Writeback width
+ int wbWidth;
+
+ // Commit width
+ int commitWidth;
+
+ /** Index into queue of instructions being written back. */
+ unsigned wbNumInst;
+
+ /** Cycle number within the queue of instructions being written
+ * back. Used in case there are too many instructions writing
+ * back at the current cycle and writesbacks need to be scheduled
+ * for the future. See comments in instToCommit().
+ */
+ unsigned wbCycle;
+
+ int numROBEntries;
+ int numInsts;
+
+ bool squashPending;
+ InstSeqNum squashSeqNum;
+ Addr squashNextPC;
+
+ Fault faultFromFetch;
+
+ private:
+ typedef typename std::list<DynInstPtr>::iterator InstListIt;
+
+ std::list<DynInstPtr> instList;
+ std::list<DynInstPtr> dispatch;
+ std::list<DynInstPtr> writeback;
+
+ int latency;
+
+ int squashLatency;
+
+ bool exactFullStall;
+
+ bool fetchRedirect[Impl::MaxThreads];
+
+ // number of cycles stalled for D-cache misses
+/* Stats::Scalar<> dcacheStallCycles;
+ Counter lastDcacheStall;
+*/
+ Stats::Vector<> rob_cap_events;
+ Stats::Vector<> rob_cap_inst_count;
+ Stats::Vector<> iq_cap_events;
+ Stats::Vector<> iq_cap_inst_count;
+ // total number of instructions executed
+ Stats::Vector<> exe_inst;
+ Stats::Vector<> exe_swp;
+ Stats::Vector<> exe_nop;
+ Stats::Vector<> exe_refs;
+ Stats::Vector<> exe_loads;
+ Stats::Vector<> exe_branches;
+
+ Stats::Vector<> issued_ops;
+
+ // total number of loads forwaded from LSQ stores
+ Stats::Vector<> lsq_forw_loads;
+
+ // total number of loads ignored due to invalid addresses
+ Stats::Vector<> inv_addr_loads;
+
+ // total number of software prefetches ignored due to invalid addresses
+ Stats::Vector<> inv_addr_swpfs;
+ // ready loads blocked due to memory disambiguation
+ Stats::Vector<> lsq_blocked_loads;
+
+ Stats::Scalar<> lsqInversion;
+
+ Stats::Vector<> n_issued_dist;
+ Stats::VectorDistribution<> issue_delay_dist;
+
+ Stats::VectorDistribution<> queue_res_dist;
+/*
+ Stats::Vector<> stat_fu_busy;
+ Stats::Vector2d<> stat_fuBusy;
+ Stats::Vector<> dist_unissued;
+ Stats::Vector2d<> stat_issued_inst_type;
+
+ Stats::Formula misspec_cnt;
+ Stats::Formula misspec_ipc;
+ Stats::Formula issue_rate;
+ Stats::Formula issue_stores;
+ Stats::Formula issue_op_rate;
+ Stats::Formula fu_busy_rate;
+ Stats::Formula commit_stores;
+ Stats::Formula commit_ipc;
+ Stats::Formula commit_ipb;
+ Stats::Formula lsq_inv_rate;
+*/
+ Stats::Vector<> writeback_count;
+ Stats::Vector<> producer_inst;
+ Stats::Vector<> consumer_inst;
+ Stats::Vector<> wb_penalized;
+
+ Stats::Formula wb_rate;
+ Stats::Formula wb_fanout;
+ Stats::Formula wb_penalized_rate;
+
+ // total number of instructions committed
+ Stats::Vector<> stat_com_inst;
+ Stats::Vector<> stat_com_swp;
+ Stats::Vector<> stat_com_refs;
+ Stats::Vector<> stat_com_loads;
+ Stats::Vector<> stat_com_membars;
+ Stats::Vector<> stat_com_branches;
+
+ Stats::Distribution<> n_committed_dist;
+
+ Stats::Scalar<> commit_eligible_samples;
+ Stats::Vector<> commit_eligible;
+
+ Stats::Scalar<> ROB_fcount;
+ Stats::Formula ROB_full_rate;
+
+ Stats::Vector<> ROB_count; // cumulative ROB occupancy
+ Stats::Formula ROB_occ_rate;
+ Stats::VectorDistribution<> ROB_occ_dist;
+ public:
+ void dumpInsts();
+};
+
+template <class Impl>
+template <class T>
+Fault
+BackEnd<Impl>::read(RequestPtr req, T &data, int load_idx)
+{
+/* memReq->reset(addr, sizeof(T), flags);
+
+ // translate to physical address
+ Fault fault = cpu->translateDataReadReq(memReq);
+
+ // if we have a cache, do cache access too
+ if (fault == NoFault && dcacheInterface) {
+ memReq->cmd = Read;
+ memReq->completionEvent = NULL;
+ memReq->time = curTick;
+ memReq->flags &= ~INST_READ;
+ MemAccessResult result = dcacheInterface->access(memReq);
+
+ // Ugly hack to get an event scheduled *only* if the access is
+ // a miss. We really should add first-class support for this
+ // at some point.
+ if (result != MA_HIT && dcacheInterface->doEvents()) {
+ // Fix this hack for keeping funcExeInst correct with loads that
+ // are executed twice.
+ --funcExeInst;
+
+ memReq->completionEvent = &cacheCompletionEvent;
+ lastDcacheStall = curTick;
+// unscheduleTickEvent();
+// status = DcacheMissStall;
+ DPRINTF(OzoneCPU, "Dcache miss stall!\n");
+ } else {
+ // do functional access
+ fault = thread->mem->read(memReq, data);
+
+ }
+ }
+*/
+/*
+ if (!dcacheInterface && (memReq->flags & UNCACHEABLE))
+ recordEvent("Uncached Read");
+*/
+ return LSQ.read(req, data, load_idx);
+}
+
+template <class Impl>
+template <class T>
+Fault
+BackEnd<Impl>::write(RequestPtr req, T &data, int store_idx)
+{
+/*
+ memReq->reset(addr, sizeof(T), flags);
+
+ // translate to physical address
+ Fault fault = cpu->translateDataWriteReq(memReq);
+
+ if (fault == NoFault && dcacheInterface) {
+ memReq->cmd = Write;
+ memcpy(memReq->data,(uint8_t *)&data,memReq->size);
+ memReq->completionEvent = NULL;
+ memReq->time = curTick;
+ memReq->flags &= ~INST_READ;
+ MemAccessResult result = dcacheInterface->access(memReq);
+
+ // Ugly hack to get an event scheduled *only* if the access is
+ // a miss. We really should add first-class support for this
+ // at some point.
+ if (result != MA_HIT && dcacheInterface->doEvents()) {
+ memReq->completionEvent = &cacheCompletionEvent;
+ lastDcacheStall = curTick;
+// unscheduleTickEvent();
+// status = DcacheMissStall;
+ DPRINTF(OzoneCPU, "Dcache miss stall!\n");
+ }
+ }
+
+ if (res && (fault == NoFault))
+ *res = memReq->result;
+ */
+/*
+ if (!dcacheInterface && (memReq->flags & UNCACHEABLE))
+ recordEvent("Uncached Write");
+*/
+ return LSQ.write(req, data, store_idx);
+}
+
+#endif // __CPU_OZONE_BACK_END_HH__
--- /dev/null
+
+#include "encumbered/cpu/full/op_class.hh"
+#include "cpu/ozone/back_end.hh"
+
+template <class Impl>
+BackEnd<Impl>::InstQueue::InstQueue(Params *params)
+ : size(params->numIQEntries), numInsts(0), width(params->issueWidth)
+{
+}
+
+template <class Impl>
+std::string
+BackEnd<Impl>::InstQueue::name() const
+{
+ return be->name() + ".iq";
+}
+
+template <class Impl>
+void
+BackEnd<Impl>::InstQueue::regStats()
+{
+ using namespace Stats;
+
+ occ_dist
+ .init(1, 0, size, 2)
+ .name(name() + "occ_dist")
+ .desc("IQ Occupancy per cycle")
+ .flags(total | cdf)
+ ;
+
+ inst_count
+ .init(1)
+ .name(name() + "cum_num_insts")
+ .desc("Total occupancy")
+ .flags(total)
+ ;
+
+ peak_inst_count
+ .init(1)
+ .name(name() + "peak_occupancy")
+ .desc("Peak IQ occupancy")
+ .flags(total)
+ ;
+
+ current_count
+ .name(name() + "current_count")
+ .desc("Occupancy this cycle")
+ ;
+
+ empty_count
+ .name(name() + "empty_count")
+ .desc("Number of empty cycles")
+ ;
+
+ fullCount
+ .name(name() + "full_count")
+ .desc("Number of full cycles")
+ ;
+
+
+ occ_rate
+ .name(name() + "occ_rate")
+ .desc("Average occupancy")
+ .flags(total)
+ ;
+ occ_rate = inst_count / be->cpu->numCycles;
+
+ avg_residency
+ .name(name() + "avg_residency")
+ .desc("Average IQ residency")
+ .flags(total)
+ ;
+ avg_residency = occ_rate / be->cpu->numCycles;
+
+ empty_rate
+ .name(name() + "empty_rate")
+ .desc("Fraction of cycles empty")
+ ;
+ empty_rate = 100 * empty_count / be->cpu->numCycles;
+
+ full_rate
+ .name(name() + "full_rate")
+ .desc("Fraction of cycles full")
+ ;
+ full_rate = 100 * fullCount / be->cpu->numCycles;
+}
+
+template <class Impl>
+void
+BackEnd<Impl>::InstQueue::setIssueExecQueue(TimeBuffer<IssueToExec> *i2e_queue)
+{
+ i2e = i2e_queue;
+ numIssued = i2e->getWire(0);
+}
+
+template <class Impl>
+void
+BackEnd<Impl>::InstQueue::insert(DynInstPtr &inst)
+{
+ numInsts++;
+ inst_count[0]++;
+ if (!inst->isNonSpeculative()) {
+ DPRINTF(BE, "Instruction [sn:%lli] added to IQ\n", inst->seqNum);
+ if (inst->readyToIssue()) {
+ toBeScheduled.push_front(inst);
+ inst->iqIt = toBeScheduled.begin();
+ inst->iqItValid = true;
+ } else {
+ iq.push_front(inst);
+ inst->iqIt = iq.begin();
+ inst->iqItValid = true;
+ }
+ } else {
+ DPRINTF(BE, "Nonspeculative instruction [sn:%lli] added to IQ\n", inst->seqNum);
+ nonSpec.push_front(inst);
+ inst->iqIt = nonSpec.begin();
+ inst->iqItValid = true;
+ }
+}
+
+template <class Impl>
+void
+BackEnd<Impl>::InstQueue::scheduleReadyInsts()
+{
+ int scheduled = numIssued->size;
+ InstListIt iq_it = --toBeScheduled.end();
+ InstListIt iq_end_it = toBeScheduled.end();
+
+ while (iq_it != iq_end_it && scheduled < width) {
+// if ((*iq_it)->readyToIssue()) {
+ DPRINTF(BE, "Instruction [sn:%lli] PC:%#x is ready\n",
+ (*iq_it)->seqNum, (*iq_it)->readPC());
+ readyQueue.push(*iq_it);
+ readyList.push_front(*iq_it);
+
+ (*iq_it)->iqIt = readyList.begin();
+
+ toBeScheduled.erase(iq_it--);
+
+ ++scheduled;
+// } else {
+// iq_it++;
+// }
+ }
+
+ numIssued->size+= scheduled;
+}
+
+template <class Impl>
+void
+BackEnd<Impl>::InstQueue::scheduleNonSpec(const InstSeqNum &sn)
+{
+/*
+ InstListIt non_spec_it = nonSpec.begin();
+ InstListIt non_spec_end_it = nonSpec.end();
+
+ while ((*non_spec_it)->seqNum != sn) {
+ non_spec_it++;
+ assert(non_spec_it != non_spec_end_it);
+ }
+*/
+ DynInstPtr inst = nonSpec.back();
+
+ DPRINTF(BE, "Nonspeculative instruction [sn:%lli] scheduled\n", inst->seqNum);
+
+ assert(inst->seqNum == sn);
+
+ assert(find(NonSpec, inst->iqIt));
+ nonSpec.erase(inst->iqIt);
+ readyList.push_front(inst);
+ inst->iqIt = readyList.begin();
+ readyQueue.push(inst);
+ numIssued->size++;
+}
+
+template <class Impl>
+typename Impl::DynInstPtr
+BackEnd<Impl>::InstQueue::getReadyInst()
+{
+ assert(!readyList.empty());
+
+ DynInstPtr inst = readyQueue.top();
+ readyQueue.pop();
+ assert(find(ReadyList, inst->iqIt));
+ readyList.erase(inst->iqIt);
+ inst->iqItValid = false;
+// if (!inst->isMemRef())
+ --numInsts;
+ return inst;
+}
+
+template <class Impl>
+void
+BackEnd<Impl>::InstQueue::squash(const InstSeqNum &sn)
+{
+ InstListIt iq_it = iq.begin();
+ InstListIt iq_end_it = iq.end();
+
+ while (iq_it != iq_end_it && (*iq_it)->seqNum > sn) {
+ DPRINTF(BE, "Instruction [sn:%lli] removed from IQ\n", (*iq_it)->seqNum);
+ (*iq_it)->iqItValid = false;
+ iq.erase(iq_it++);
+ --numInsts;
+ }
+
+ iq_it = nonSpec.begin();
+ iq_end_it = nonSpec.end();
+
+ while (iq_it != iq_end_it && (*iq_it)->seqNum > sn) {
+ DPRINTF(BE, "Instruction [sn:%lli] removed from IQ\n", (*iq_it)->seqNum);
+ (*iq_it)->iqItValid = false;
+ nonSpec.erase(iq_it++);
+ --numInsts;
+ }
+
+ iq_it = replayList.begin();
+ iq_end_it = replayList.end();
+
+ while (iq_it != iq_end_it) {
+ if ((*iq_it)->seqNum > sn) {
+ DPRINTF(BE, "Instruction [sn:%lli] removed from IQ\n", (*iq_it)->seqNum);
+ (*iq_it)->iqItValid = false;
+ replayList.erase(iq_it++);
+ --numInsts;
+ } else {
+ iq_it++;
+ }
+ }
+
+ assert(numInsts >= 0);
+/*
+ InstListIt ready_it = readyList.begin();
+ InstListIt ready_end_it = readyList.end();
+
+ while (ready_it != ready_end_it) {
+ if ((*ready_it)->seqNum > sn) {
+ readyList.erase(ready_it++);
+ } else {
+ ready_it++;
+ }
+ }
+*/
+}
+
+template <class Impl>
+int
+BackEnd<Impl>::InstQueue::wakeDependents(DynInstPtr &inst)
+{
+ assert(!inst->isSquashed());
+ std::vector<DynInstPtr> &dependents = inst->getDependents();
+ int num_outputs = dependents.size();
+
+ DPRINTF(BE, "Waking instruction [sn:%lli] dependents in IQ\n", inst->seqNum);
+
+ for (int i = 0; i < num_outputs; i++) {
+ DynInstPtr dep_inst = dependents[i];
+ dep_inst->markSrcRegReady();
+ DPRINTF(BE, "Marking source reg ready [sn:%lli] in IQ\n", dep_inst->seqNum);
+
+ if (dep_inst->readyToIssue() && dep_inst->iqItValid) {
+ if (dep_inst->isNonSpeculative()) {
+ assert(find(NonSpec, dep_inst->iqIt));
+ nonSpec.erase(dep_inst->iqIt);
+ } else {
+ assert(find(IQ, dep_inst->iqIt));
+ iq.erase(dep_inst->iqIt);
+ }
+
+ toBeScheduled.push_front(dep_inst);
+ dep_inst->iqIt = toBeScheduled.begin();
+ }
+ }
+ return num_outputs;
+}
+
+template <class Impl>
+void
+BackEnd<Impl>::InstQueue::rescheduleMemInst(DynInstPtr &inst)
+{
+ DPRINTF(BE, "Rescheduling memory instruction [sn:%lli]\n", inst->seqNum);
+ assert(!inst->iqItValid);
+ replayList.push_front(inst);
+ inst->iqIt = replayList.begin();
+ inst->iqItValid = true;
+ ++numInsts;
+}
+
+template <class Impl>
+void
+BackEnd<Impl>::InstQueue::replayMemInst(DynInstPtr &inst)
+{
+ DPRINTF(BE, "Replaying memory instruction [sn:%lli]\n", inst->seqNum);
+ assert(find(ReplayList, inst->iqIt));
+ InstListIt iq_it = --replayList.end();
+ InstListIt iq_end_it = replayList.end();
+ while (iq_it != iq_end_it) {
+ DynInstPtr rescheduled_inst = (*iq_it);
+
+ DPRINTF(BE, "Memory instruction [sn:%lli] also replayed\n", inst->seqNum);
+ replayList.erase(iq_it--);
+ toBeScheduled.push_front(rescheduled_inst);
+ rescheduled_inst->iqIt = toBeScheduled.begin();
+ }
+}
+
+template <class Impl>
+void
+BackEnd<Impl>::InstQueue::completeMemInst(DynInstPtr &inst)
+{
+ panic("Not implemented.");
+}
+
+template <class Impl>
+bool
+BackEnd<Impl>::InstQueue::find(queue q, InstListIt it)
+{
+ InstListIt iq_it, iq_end_it;
+ switch(q) {
+ case NonSpec:
+ iq_it = nonSpec.begin();
+ iq_end_it = nonSpec.end();
+ break;
+ case IQ:
+ iq_it = iq.begin();
+ iq_end_it = iq.end();
+ break;
+ case ToBeScheduled:
+ iq_it = toBeScheduled.begin();
+ iq_end_it = toBeScheduled.end();
+ break;
+ case ReadyList:
+ iq_it = readyList.begin();
+ iq_end_it = readyList.end();
+ break;
+ case ReplayList:
+ iq_it = replayList.begin();
+ iq_end_it = replayList.end();
+ }
+
+ while (iq_it != it && iq_it != iq_end_it) {
+ iq_it++;
+ }
+ if (iq_it == it) {
+ return true;
+ } else {
+ return false;
+ }
+}
+
+template <class Impl>
+void
+BackEnd<Impl>::InstQueue::dumpInsts()
+{
+ cprintf("IQ size: %i\n", iq.size());
+
+ InstListIt inst_list_it = --iq.end();
+
+ int num = 0;
+ int valid_num = 0;
+ while (inst_list_it != iq.end())
+ {
+ cprintf("Instruction:%i\n",
+ num);
+ if (!(*inst_list_it)->isSquashed()) {
+ if (!(*inst_list_it)->isIssued()) {
+ ++valid_num;
+ cprintf("Count:%i\n", valid_num);
+ } else if ((*inst_list_it)->isMemRef() &&
+ !(*inst_list_it)->memOpDone) {
+ // Loads that have not been marked as executed still count
+ // towards the total instructions.
+ ++valid_num;
+ cprintf("Count:%i\n", valid_num);
+ }
+ }
+
+ cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
+ "Issued:%i\nSquashed:%i\n",
+ (*inst_list_it)->readPC(),
+ (*inst_list_it)->seqNum,
+ (*inst_list_it)->threadNumber,
+ (*inst_list_it)->isIssued(),
+ (*inst_list_it)->isSquashed());
+
+ if ((*inst_list_it)->isMemRef()) {
+ cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
+ }
+
+ cprintf("\n");
+
+ inst_list_it--;
+ ++num;
+ }
+
+ cprintf("nonSpec size: %i\n", nonSpec.size());
+
+ inst_list_it = --nonSpec.end();
+
+ while (inst_list_it != nonSpec.end())
+ {
+ cprintf("Instruction:%i\n",
+ num);
+ if (!(*inst_list_it)->isSquashed()) {
+ if (!(*inst_list_it)->isIssued()) {
+ ++valid_num;
+ cprintf("Count:%i\n", valid_num);
+ } else if ((*inst_list_it)->isMemRef() &&
+ !(*inst_list_it)->memOpDone) {
+ // Loads that have not been marked as executed still count
+ // towards the total instructions.
+ ++valid_num;
+ cprintf("Count:%i\n", valid_num);
+ }
+ }
+
+ cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
+ "Issued:%i\nSquashed:%i\n",
+ (*inst_list_it)->readPC(),
+ (*inst_list_it)->seqNum,
+ (*inst_list_it)->threadNumber,
+ (*inst_list_it)->isIssued(),
+ (*inst_list_it)->isSquashed());
+
+ if ((*inst_list_it)->isMemRef()) {
+ cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
+ }
+
+ cprintf("\n");
+
+ inst_list_it--;
+ ++num;
+ }
+
+ cprintf("toBeScheduled size: %i\n", toBeScheduled.size());
+
+ inst_list_it = --toBeScheduled.end();
+
+ while (inst_list_it != toBeScheduled.end())
+ {
+ cprintf("Instruction:%i\n",
+ num);
+ if (!(*inst_list_it)->isSquashed()) {
+ if (!(*inst_list_it)->isIssued()) {
+ ++valid_num;
+ cprintf("Count:%i\n", valid_num);
+ } else if ((*inst_list_it)->isMemRef() &&
+ !(*inst_list_it)->memOpDone) {
+ // Loads that have not been marked as executed still count
+ // towards the total instructions.
+ ++valid_num;
+ cprintf("Count:%i\n", valid_num);
+ }
+ }
+
+ cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
+ "Issued:%i\nSquashed:%i\n",
+ (*inst_list_it)->readPC(),
+ (*inst_list_it)->seqNum,
+ (*inst_list_it)->threadNumber,
+ (*inst_list_it)->isIssued(),
+ (*inst_list_it)->isSquashed());
+
+ if ((*inst_list_it)->isMemRef()) {
+ cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
+ }
+
+ cprintf("\n");
+
+ inst_list_it--;
+ ++num;
+ }
+
+ cprintf("readyList size: %i\n", readyList.size());
+
+ inst_list_it = --readyList.end();
+
+ while (inst_list_it != readyList.end())
+ {
+ cprintf("Instruction:%i\n",
+ num);
+ if (!(*inst_list_it)->isSquashed()) {
+ if (!(*inst_list_it)->isIssued()) {
+ ++valid_num;
+ cprintf("Count:%i\n", valid_num);
+ } else if ((*inst_list_it)->isMemRef() &&
+ !(*inst_list_it)->memOpDone) {
+ // Loads that have not been marked as executed still count
+ // towards the total instructions.
+ ++valid_num;
+ cprintf("Count:%i\n", valid_num);
+ }
+ }
+
+ cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
+ "Issued:%i\nSquashed:%i\n",
+ (*inst_list_it)->readPC(),
+ (*inst_list_it)->seqNum,
+ (*inst_list_it)->threadNumber,
+ (*inst_list_it)->isIssued(),
+ (*inst_list_it)->isSquashed());
+
+ if ((*inst_list_it)->isMemRef()) {
+ cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
+ }
+
+ cprintf("\n");
+
+ inst_list_it--;
+ ++num;
+ }
+}
+
+template<class Impl>
+BackEnd<Impl>::LdWritebackEvent::LdWritebackEvent(DynInstPtr &_inst,
+ BackEnd<Impl> *_be)
+ : Event(&mainEventQueue), inst(_inst), be(_be)
+{
+ this->setFlags(Event::AutoDelete);
+}
+
+template<class Impl>
+void
+BackEnd<Impl>::LdWritebackEvent::process()
+{
+ DPRINTF(BE, "Load writeback event [sn:%lli]\n", inst->seqNum);
+// DPRINTF(Activity, "Activity: Ld Writeback event [sn:%lli]\n", inst->seqNum);
+
+ //iewStage->ldstQueue.removeMSHR(inst->threadNumber,inst->seqNum);
+
+// iewStage->wakeCPU();
+
+ if (inst->isSquashed()) {
+ inst = NULL;
+ return;
+ }
+
+ if (!inst->isExecuted()) {
+ inst->setExecuted();
+
+ // Execute again to copy data to proper place.
+ inst->completeAcc();
+ }
+
+ // Need to insert instruction into queue to commit
+ be->instToCommit(inst);
+
+ //wroteToTimeBuffer = true;
+// iewStage->activityThisCycle();
+
+ inst = NULL;
+}
+
+template<class Impl>
+const char *
+BackEnd<Impl>::LdWritebackEvent::description()
+{
+ return "Load writeback event";
+}
+
+
+template <class Impl>
+BackEnd<Impl>::DCacheCompletionEvent::DCacheCompletionEvent(BackEnd *_be)
+ : Event(&mainEventQueue, CPU_Tick_Pri), be(_be)
+{
+}
+
+template <class Impl>
+void
+BackEnd<Impl>::DCacheCompletionEvent::process()
+{
+}
+
+template <class Impl>
+const char *
+BackEnd<Impl>::DCacheCompletionEvent::description()
+{
+ return "Cache completion event";
+}
+
+template <class Impl>
+BackEnd<Impl>::BackEnd(Params *params)
+ : d2i(5, 5), i2e(5, 5), e2c(5, 5), numInstsToWB(5, 5),
+ xcSquash(false), IQ(params),
+ cacheCompletionEvent(this), width(params->backEndWidth),
+ exactFullStall(true)
+{
+ numROBEntries = params->numROBEntries;
+ numInsts = 0;
+ numDispatchEntries = 32;
+ IQ.setBE(this);
+ LSQ.setBE(this);
+
+ // Setup IQ and LSQ with their parameters here.
+ instsToDispatch = d2i.getWire(-1);
+
+ instsToExecute = i2e.getWire(-1);
+
+ IQ.setIssueExecQueue(&i2e);
+
+ dispatchWidth = params->dispatchWidth ? params->dispatchWidth : width;
+ issueWidth = params->issueWidth ? params->issueWidth : width;
+ wbWidth = params->wbWidth ? params->wbWidth : width;
+ commitWidth = params->commitWidth ? params->commitWidth : width;
+
+ LSQ.init(params, params->LQEntries, params->SQEntries, 0);
+
+ dispatchStatus = Running;
+}
+
+template <class Impl>
+std::string
+BackEnd<Impl>::name() const
+{
+ return cpu->name() + ".backend";
+}
+
+template <class Impl>
+void
+BackEnd<Impl>::regStats()
+{
+ using namespace Stats;
+ rob_cap_events
+ .init(cpu->number_of_threads)
+ .name(name() + ".ROB:cap_events")
+ .desc("number of cycles where ROB cap was active")
+ .flags(total)
+ ;
+
+ rob_cap_inst_count
+ .init(cpu->number_of_threads)
+ .name(name() + ".ROB:cap_inst")
+ .desc("number of instructions held up by ROB cap")
+ .flags(total)
+ ;
+
+ iq_cap_events
+ .init(cpu->number_of_threads)
+ .name(name() +".IQ:cap_events" )
+ .desc("number of cycles where IQ cap was active")
+ .flags(total)
+ ;
+
+ iq_cap_inst_count
+ .init(cpu->number_of_threads)
+ .name(name() + ".IQ:cap_inst")
+ .desc("number of instructions held up by IQ cap")
+ .flags(total)
+ ;
+
+
+ exe_inst
+ .init(cpu->number_of_threads)
+ .name(name() + ".ISSUE:count")
+ .desc("number of insts issued")
+ .flags(total)
+ ;
+
+ exe_swp
+ .init(cpu->number_of_threads)
+ .name(name() + ".ISSUE:swp")
+ .desc("number of swp insts issued")
+ .flags(total)
+ ;
+
+ exe_nop
+ .init(cpu->number_of_threads)
+ .name(name() + ".ISSUE:nop")
+ .desc("number of nop insts issued")
+ .flags(total)
+ ;
+
+ exe_refs
+ .init(cpu->number_of_threads)
+ .name(name() + ".ISSUE:refs")
+ .desc("number of memory reference insts issued")
+ .flags(total)
+ ;
+
+ exe_loads
+ .init(cpu->number_of_threads)
+ .name(name() + ".ISSUE:loads")
+ .desc("number of load insts issued")
+ .flags(total)
+ ;
+
+ exe_branches
+ .init(cpu->number_of_threads)
+ .name(name() + ".ISSUE:branches")
+ .desc("Number of branches issued")
+ .flags(total)
+ ;
+
+ issued_ops
+ .init(cpu->number_of_threads)
+ .name(name() + ".ISSUE:op_count")
+ .desc("number of insts issued")
+ .flags(total)
+ ;
+
+/*
+ for (int i=0; i<Num_OpClasses; ++i) {
+ stringstream subname;
+ subname << opClassStrings[i] << "_delay";
+ issue_delay_dist.subname(i, subname.str());
+ }
+*/
+ //
+ // Other stats
+ //
+ lsq_forw_loads
+ .init(cpu->number_of_threads)
+ .name(name() + ".LSQ:forw_loads")
+ .desc("number of loads forwarded via LSQ")
+ .flags(total)
+ ;
+
+ inv_addr_loads
+ .init(cpu->number_of_threads)
+ .name(name() + ".ISSUE:addr_loads")
+ .desc("number of invalid-address loads")
+ .flags(total)
+ ;
+
+ inv_addr_swpfs
+ .init(cpu->number_of_threads)
+ .name(name() + ".ISSUE:addr_swpfs")
+ .desc("number of invalid-address SW prefetches")
+ .flags(total)
+ ;
+
+ lsq_blocked_loads
+ .init(cpu->number_of_threads)
+ .name(name() + ".LSQ:blocked_loads")
+ .desc("number of ready loads not issued due to memory disambiguation")
+ .flags(total)
+ ;
+
+ lsqInversion
+ .name(name() + ".ISSUE:lsq_invert")
+ .desc("Number of times LSQ instruction issued early")
+ ;
+
+ n_issued_dist
+ .init(issueWidth + 1)
+ .name(name() + ".ISSUE:issued_per_cycle")
+ .desc("Number of insts issued each cycle")
+ .flags(total | pdf | dist)
+ ;
+ issue_delay_dist
+ .init(Num_OpClasses,0,99,2)
+ .name(name() + ".ISSUE:")
+ .desc("cycles from operands ready to issue")
+ .flags(pdf | cdf)
+ ;
+
+ queue_res_dist
+ .init(Num_OpClasses, 0, 99, 2)
+ .name(name() + ".IQ:residence:")
+ .desc("cycles from dispatch to issue")
+ .flags(total | pdf | cdf )
+ ;
+ for (int i = 0; i < Num_OpClasses; ++i) {
+ queue_res_dist.subname(i, opClassStrings[i]);
+ }
+
+ writeback_count
+ .init(cpu->number_of_threads)
+ .name(name() + ".WB:count")
+ .desc("cumulative count of insts written-back")
+ .flags(total)
+ ;
+
+ producer_inst
+ .init(cpu->number_of_threads)
+ .name(name() + ".WB:producers")
+ .desc("num instructions producing a value")
+ .flags(total)
+ ;
+
+ consumer_inst
+ .init(cpu->number_of_threads)
+ .name(name() + ".WB:consumers")
+ .desc("num instructions consuming a value")
+ .flags(total)
+ ;
+
+ wb_penalized
+ .init(cpu->number_of_threads)
+ .name(name() + ".WB:penalized")
+ .desc("number of instrctions required to write to 'other' IQ")
+ .flags(total)
+ ;
+
+
+ wb_penalized_rate
+ .name(name() + ".WB:penalized_rate")
+ .desc ("fraction of instructions written-back that wrote to 'other' IQ")
+ .flags(total)
+ ;
+
+ wb_penalized_rate = wb_penalized / writeback_count;
+
+ wb_fanout
+ .name(name() + ".WB:fanout")
+ .desc("average fanout of values written-back")
+ .flags(total)
+ ;
+
+ wb_fanout = producer_inst / consumer_inst;
+
+ wb_rate
+ .name(name() + ".WB:rate")
+ .desc("insts written-back per cycle")
+ .flags(total)
+ ;
+ wb_rate = writeback_count / cpu->numCycles;
+
+ stat_com_inst
+ .init(cpu->number_of_threads)
+ .name(name() + ".COM:count")
+ .desc("Number of instructions committed")
+ .flags(total)
+ ;
+
+ stat_com_swp
+ .init(cpu->number_of_threads)
+ .name(name() + ".COM:swp_count")
+ .desc("Number of s/w prefetches committed")
+ .flags(total)
+ ;
+
+ stat_com_refs
+ .init(cpu->number_of_threads)
+ .name(name() + ".COM:refs")
+ .desc("Number of memory references committed")
+ .flags(total)
+ ;
+
+ stat_com_loads
+ .init(cpu->number_of_threads)
+ .name(name() + ".COM:loads")
+ .desc("Number of loads committed")
+ .flags(total)
+ ;
+
+ stat_com_membars
+ .init(cpu->number_of_threads)
+ .name(name() + ".COM:membars")
+ .desc("Number of memory barriers committed")
+ .flags(total)
+ ;
+
+ stat_com_branches
+ .init(cpu->number_of_threads)
+ .name(name() + ".COM:branches")
+ .desc("Number of branches committed")
+ .flags(total)
+ ;
+ n_committed_dist
+ .init(0,commitWidth,1)
+ .name(name() + ".COM:committed_per_cycle")
+ .desc("Number of insts commited each cycle")
+ .flags(pdf)
+ ;
+
+ //
+ // Commit-Eligible instructions...
+ //
+ // -> The number of instructions eligible to commit in those
+ // cycles where we reached our commit BW limit (less the number
+ // actually committed)
+ //
+ // -> The average value is computed over ALL CYCLES... not just
+ // the BW limited cycles
+ //
+ // -> The standard deviation is computed only over cycles where
+ // we reached the BW limit
+ //
+ commit_eligible
+ .init(cpu->number_of_threads)
+ .name(name() + ".COM:bw_limited")
+ .desc("number of insts not committed due to BW limits")
+ .flags(total)
+ ;
+
+ commit_eligible_samples
+ .name(name() + ".COM:bw_lim_events")
+ .desc("number cycles where commit BW limit reached")
+ ;
+
+ ROB_fcount
+ .name(name() + ".ROB:full_count")
+ .desc("number of cycles where ROB was full")
+ ;
+
+ ROB_count
+ .init(cpu->number_of_threads)
+ .name(name() + ".ROB:occupancy")
+ .desc(name() + ".ROB occupancy (cumulative)")
+ .flags(total)
+ ;
+
+ ROB_full_rate
+ .name(name() + ".ROB:full_rate")
+ .desc("ROB full per cycle")
+ ;
+ ROB_full_rate = ROB_fcount / cpu->numCycles;
+
+ ROB_occ_rate
+ .name(name() + ".ROB:occ_rate")
+ .desc("ROB occupancy rate")
+ .flags(total)
+ ;
+ ROB_occ_rate = ROB_count / cpu->numCycles;
+
+ ROB_occ_dist
+ .init(cpu->number_of_threads,0,numROBEntries,2)
+ .name(name() + ".ROB:occ_dist")
+ .desc("ROB Occupancy per cycle")
+ .flags(total | cdf)
+ ;
+
+ IQ.regStats();
+}
+
+template <class Impl>
+void
+BackEnd<Impl>::setCommBuffer(TimeBuffer<CommStruct> *_comm)
+{
+ comm = _comm;
+ toIEW = comm->getWire(0);
+ fromCommit = comm->getWire(-1);
+}
+
+template <class Impl>
+void
+BackEnd<Impl>::tick()
+{
+ DPRINTF(BE, "Ticking back end\n");
+
+ ROB_count[0]+= numInsts;
+
+ wbCycle = 0;
+
+ if (xcSquash) {
+ squashFromXC();
+ }
+
+ // Read in any done instruction information and update the IQ or LSQ.
+ updateStructures();
+
+ if (dispatchStatus != Blocked) {
+ d2i.advance();
+ dispatchInsts();
+ } else {
+ checkDispatchStatus();
+ }
+
+ i2e.advance();
+ scheduleReadyInsts();
+
+ e2c.advance();
+ executeInsts();
+
+ numInstsToWB.advance();
+ writebackInsts();
+
+ commitInsts();
+
+ DPRINTF(BE, "IQ entries in use: %i, ROB entries in use: %i, LSQ loads: %i, LSQ stores: %i\n",
+ IQ.numInsts, numInsts, LSQ.numLoads(), LSQ.numStores());
+
+ assert(numInsts == instList.size());
+}
+
+template <class Impl>
+void
+BackEnd<Impl>::updateStructures()
+{
+ if (fromCommit->doneSeqNum) {
+ IQ.commit(fromCommit->doneSeqNum);
+ LSQ.commitLoads(fromCommit->doneSeqNum);
+ LSQ.commitStores(fromCommit->doneSeqNum);
+ }
+
+ if (fromCommit->nonSpecSeqNum) {
+ if (fromCommit->uncached) {
+ LSQ.executeLoad(fromCommit->lqIdx);
+ } else {
+ IQ.scheduleNonSpec(
+ fromCommit->nonSpecSeqNum);
+ }
+ }
+}
+
+template <class Impl>
+void
+BackEnd<Impl>::addToIQ(DynInstPtr &inst)
+{
+ // Do anything IQ specific here?
+ IQ.insert(inst);
+}
+
+template <class Impl>
+void
+BackEnd<Impl>::addToLSQ(DynInstPtr &inst)
+{
+ // Do anything LSQ specific here?
+ LSQ.insert(inst);
+}
+
+template <class Impl>
+void
+BackEnd<Impl>::dispatchInsts()
+{
+ DPRINTF(BE, "Trying to dispatch instructions.\n");
+
+ // Pull instructions out of the front end.
+ int disp_width = dispatchWidth ? dispatchWidth : width;
+
+ // Could model dispatching time, but in general 1 cycle is probably
+ // good enough.
+
+ if (dispatchSize < numDispatchEntries) {
+ for (int i = 0; i < disp_width; i++) {
+ // Get instructions
+ DynInstPtr inst = frontEnd->getInst();
+
+ if (!inst) {
+ // No more instructions to get
+ break;
+ }
+
+ DPRINTF(BE, "Processing instruction [sn:%lli] PC:%#x\n",
+ inst->seqNum, inst->readPC());
+
+ for (int i = 0; i < inst->numDestRegs(); ++i)
+ renameTable[inst->destRegIdx(i)] = inst;
+
+ // Add to queue to be dispatched.
+ dispatch.push_back(inst);
+
+ d2i[0].size++;
+ ++dispatchSize;
+ }
+ }
+
+ assert(dispatch.size() < 64);
+
+ for (int i = 0; i < instsToDispatch->size; ++i) {
+ assert(!dispatch.empty());
+ // Get instruction from front of time buffer
+ DynInstPtr inst = dispatch.front();
+ dispatch.pop_front();
+ --dispatchSize;
+
+ if (inst->isSquashed())
+ continue;
+
+ ++numInsts;
+ instList.push_back(inst);
+
+ DPRINTF(BE, "Dispatching instruction [sn:%lli] PC:%#x\n",
+ inst->seqNum, inst->readPC());
+
+ addToIQ(inst);
+
+ if (inst->isMemRef()) {
+ addToLSQ(inst);
+ }
+
+ if (inst->isNonSpeculative()) {
+ inst->setCanCommit();
+ }
+
+ // Check if IQ or LSQ is full. If so we'll need to break and stop
+ // removing instructions. Also update the number of insts to remove
+ // from the queue.
+ if (exactFullStall) {
+ bool stall = false;
+ if (IQ.isFull()) {
+ DPRINTF(BE, "IQ is full!\n");
+ stall = true;
+ } else if (LSQ.isFull()) {
+ DPRINTF(BE, "LSQ is full!\n");
+ stall = true;
+ } else if (isFull()) {
+ DPRINTF(BE, "ROB is full!\n");
+ stall = true;
+ ROB_fcount++;
+ }
+ if (stall) {
+ instsToDispatch->size-= i+1;
+ dispatchStall();
+ return;
+ }
+ }
+ }
+
+ // Check if IQ or LSQ is full. If so we'll need to break and stop
+ // removing instructions. Also update the number of insts to remove
+ // from the queue. Check here if we don't care about exact stall
+ // conditions.
+
+ bool stall = false;
+ if (IQ.isFull()) {
+ DPRINTF(BE, "IQ is full!\n");
+ stall = true;
+ } else if (LSQ.isFull()) {
+ DPRINTF(BE, "LSQ is full!\n");
+ stall = true;
+ } else if (isFull()) {
+ DPRINTF(BE, "ROB is full!\n");
+ stall = true;
+ ROB_fcount++;
+ }
+ if (stall) {
+ d2i.advance();
+ dispatchStall();
+ return;
+ }
+}
+
+template <class Impl>
+void
+BackEnd<Impl>::dispatchStall()
+{
+ dispatchStatus = Blocked;
+ if (!cpu->decoupledFrontEnd) {
+ // Tell front end to stall here through a timebuffer, or just tell
+ // it directly.
+ }
+}
+
+template <class Impl>
+void
+BackEnd<Impl>::checkDispatchStatus()
+{
+ DPRINTF(BE, "Checking dispatch status\n");
+ assert(dispatchStatus == Blocked);
+ if (!IQ.isFull() && !LSQ.isFull() && !isFull()) {
+ DPRINTF(BE, "Dispatch no longer blocked\n");
+ dispatchStatus = Running;
+ dispatchInsts();
+ }
+}
+
+template <class Impl>
+void
+BackEnd<Impl>::scheduleReadyInsts()
+{
+ // Tell IQ to put any ready instructions into the instruction list.
+ // Probably want to have a list of DynInstPtrs returned here. Then I
+ // can choose to either put them into a time buffer to simulate
+ // IQ scheduling time, or hand them directly off to the next stage.
+ // Do you ever want to directly hand it off to the next stage?
+ DPRINTF(BE, "Trying to schedule ready instructions\n");
+ IQ.scheduleReadyInsts();
+}
+
+template <class Impl>
+void
+BackEnd<Impl>::executeInsts()
+{
+ int insts_to_execute = instsToExecute->size;
+
+ issued_ops[0]+= insts_to_execute;
+ n_issued_dist[insts_to_execute]++;
+
+ DPRINTF(BE, "Trying to execute %i instructions\n", insts_to_execute);
+
+ fetchRedirect[0] = false;
+
+ while (insts_to_execute > 0) {
+ // Get ready instruction from the IQ (or queue coming out of IQ)
+ // Execute the ready instruction.
+ // Wakeup any dependents if it's done.
+ DynInstPtr inst = IQ.getReadyInst();
+
+ DPRINTF(BE, "Executing inst [sn:%lli] PC: %#x\n",
+ inst->seqNum, inst->readPC());
+
+ ++funcExeInst;
+
+ // Check if the instruction is squashed; if so then skip it
+ // and don't count it towards the FU usage.
+ if (inst->isSquashed()) {
+ DPRINTF(BE, "Execute: Instruction was squashed.\n");
+
+ // Not sure how to handle this plus the method of sending # of
+ // instructions to use. Probably will just have to count it
+ // towards the bandwidth usage, but not the FU usage.
+ --insts_to_execute;
+
+ // Consider this instruction executed so that commit can go
+ // ahead and retire the instruction.
+ inst->setExecuted();
+
+ // Not sure if I should set this here or just let commit try to
+ // commit any squashed instructions. I like the latter a bit more.
+ inst->setCanCommit();
+
+// ++iewExecSquashedInsts;
+
+ continue;
+ }
+
+ Fault fault = NoFault;
+
+ // Execute instruction.
+ // Note that if the instruction faults, it will be handled
+ // at the commit stage.
+ if (inst->isMemRef() &&
+ (!inst->isDataPrefetch() && !inst->isInstPrefetch())) {
+ DPRINTF(BE, "Execute: Initiating access for memory "
+ "reference.\n");
+
+ // Tell the LDSTQ to execute this instruction (if it is a load).
+ if (inst->isLoad()) {
+ // Loads will mark themselves as executed, and their writeback
+ // event adds the instruction to the queue to commit
+ fault = LSQ.executeLoad(inst);
+
+// ++iewExecLoadInsts;
+ } else if (inst->isStore()) {
+ LSQ.executeStore(inst);
+
+// ++iewExecStoreInsts;
+
+ if (!(inst->req->flags & LOCKED)) {
+ inst->setExecuted();
+
+ instToCommit(inst);
+ }
+ // Store conditionals will mark themselves as executed, and
+ // their writeback event will add the instruction to the queue
+ // to commit.
+ } else {
+ panic("Unexpected memory type!\n");
+ }
+
+ } else {
+ inst->execute();
+
+// ++iewExecutedInsts;
+
+ inst->setExecuted();
+
+ instToCommit(inst);
+ }
+
+ updateExeInstStats(inst);
+
+ // Probably should have some sort of function for this.
+ // More general question of how to handle squashes? Have some sort of
+ // squash unit that controls it? Probably...
+ // Check if branch was correct. This check happens after the
+ // instruction is added to the queue because even if the branch
+ // is mispredicted, the branch instruction itself is still valid.
+ // Only handle this if there hasn't already been something that
+ // redirects fetch in this group of instructions.
+
+ // This probably needs to prioritize the redirects if a different
+ // scheduler is used. Currently the scheduler schedules the oldest
+ // instruction first, so the branch resolution order will be correct.
+ unsigned tid = inst->threadNumber;
+
+ if (!fetchRedirect[tid]) {
+
+ if (inst->mispredicted()) {
+ fetchRedirect[tid] = true;
+
+ DPRINTF(BE, "Execute: Branch mispredict detected.\n");
+ DPRINTF(BE, "Execute: Redirecting fetch to PC: %#x.\n",
+ inst->nextPC);
+
+ // If incorrect, then signal the ROB that it must be squashed.
+ squashDueToBranch(inst);
+
+ if (inst->predTaken()) {
+// predictedTakenIncorrect++;
+ } else {
+// predictedNotTakenIncorrect++;
+ }
+ } else if (LSQ.violation()) {
+ fetchRedirect[tid] = true;
+
+ // Get the DynInst that caused the violation. Note that this
+ // clears the violation signal.
+ DynInstPtr violator;
+ violator = LSQ.getMemDepViolator();
+
+ DPRINTF(BE, "LDSTQ detected a violation. Violator PC: "
+ "%#x, inst PC: %#x. Addr is: %#x.\n",
+ violator->readPC(), inst->readPC(), inst->physEffAddr);
+
+ // Tell the instruction queue that a violation has occured.
+// IQ.violation(inst, violator);
+
+ // Squash.
+// squashDueToMemOrder(inst,tid);
+ squashDueToBranch(inst);
+
+// ++memOrderViolationEvents;
+ } else if (LSQ.loadBlocked()) {
+ fetchRedirect[tid] = true;
+
+ DPRINTF(BE, "Load operation couldn't execute because the "
+ "memory system is blocked. PC: %#x [sn:%lli]\n",
+ inst->readPC(), inst->seqNum);
+
+ squashDueToMemBlocked(inst);
+ }
+ }
+
+// instList.pop_front();
+
+ --insts_to_execute;
+
+ // keep an instruction count
+ thread->numInst++;
+ thread->numInsts++;
+ }
+
+ assert(insts_to_execute >= 0);
+}
+
+template<class Impl>
+void
+BackEnd<Impl>::instToCommit(DynInstPtr &inst)
+{
+ int wb_width = wbWidth;
+ // First check the time slot that this instruction will write
+ // to. If there are free write ports at the time, then go ahead
+ // and write the instruction to that time. If there are not,
+ // keep looking back to see where's the first time there's a
+ // free slot. What happens if you run out of free spaces?
+ // For now naively assume that all instructions take one cycle.
+ // Otherwise would have to look into the time buffer based on the
+ // latency of the instruction.
+
+ DPRINTF(BE, "Sending instructions to commit [sn:%lli] PC %#x.\n",
+ inst->seqNum, inst->readPC());
+
+ while (numInstsToWB[wbCycle].size >= wb_width) {
+ ++wbCycle;
+
+ assert(wbCycle < 5);
+ }
+
+ // Add finished instruction to queue to commit.
+ writeback.push_back(inst);
+ numInstsToWB[wbCycle].size++;
+
+ if (wbCycle)
+ wb_penalized[0]++;
+}
+
+template <class Impl>
+void
+BackEnd<Impl>::writebackInsts()
+{
+ int wb_width = wbWidth;
+ // Using this method I'm not quite sure how to prevent an
+ // instruction from waking its own dependents multiple times,
+ // without the guarantee that commit always has enough bandwidth
+ // to accept all instructions being written back. This guarantee
+ // might not be too unrealistic.
+ InstListIt wb_inst_it = writeback.begin();
+ InstListIt wb_end_it = writeback.end();
+ int inst_num = 0;
+ int consumer_insts = 0;
+
+ for (; inst_num < wb_width &&
+ wb_inst_it != wb_end_it; inst_num++) {
+ DynInstPtr inst = (*wb_inst_it);
+
+ // Some instructions will be sent to commit without having
+ // executed because they need commit to handle them.
+ // E.g. Uncached loads have not actually executed when they
+ // are first sent to commit. Instead commit must tell the LSQ
+ // when it's ready to execute the uncached load.
+ if (!inst->isSquashed()) {
+ DPRINTF(BE, "Writing back instruction [sn:%lli] PC %#x.\n",
+ inst->seqNum, inst->readPC());
+
+ inst->setCanCommit();
+ inst->setResultReady();
+
+ if (inst->isExecuted()) {
+ int dependents = IQ.wakeDependents(inst);
+ if (dependents) {
+ producer_inst[0]++;
+ consumer_insts+= dependents;
+ }
+ }
+ }
+
+ writeback.erase(wb_inst_it++);
+ }
+ LSQ.writebackStores();
+ consumer_inst[0]+= consumer_insts;
+ writeback_count[0]+= inst_num;
+}
+
+template <class Impl>
+bool
+BackEnd<Impl>::commitInst(int inst_num)
+{
+ // Read instruction from the head of the ROB
+ DynInstPtr inst = instList.front();
+
+ // Make sure instruction is valid
+ assert(inst);
+
+ if (!inst->readyToCommit())
+ return false;
+
+ DPRINTF(BE, "Trying to commit instruction [sn:%lli] PC:%#x\n",
+ inst->seqNum, inst->readPC());
+
+ // If the instruction is not executed yet, then it is a non-speculative
+ // or store inst. Signal backwards that it should be executed.
+ if (!inst->isExecuted()) {
+ // Keep this number correct. We have not yet actually executed
+ // and committed this instruction.
+// thread->funcExeInst--;
+
+ if (inst->isNonSpeculative()) {
+#if !FULL_SYSTEM
+ // Hack to make sure syscalls aren't executed until all stores
+ // write back their data. This direct communication shouldn't
+ // be used for anything other than this.
+ if (inst_num > 0 || LSQ.hasStoresToWB()) {
+ DPRINTF(BE, "Waiting for all stores to writeback.\n");
+ return false;
+ }
+#endif
+
+ DPRINTF(BE, "Encountered a store or non-speculative "
+ "instruction at the head of the ROB, PC %#x.\n",
+ inst->readPC());
+
+ // Send back the non-speculative instruction's sequence number.
+ toIEW->nonSpecSeqNum = inst->seqNum;
+
+ // Change the instruction so it won't try to commit again until
+ // it is executed.
+ inst->clearCanCommit();
+
+// ++commitNonSpecStalls;
+
+ return false;
+ } else if (inst->isLoad()) {
+ DPRINTF(BE, "[sn:%lli]: Uncached load, PC %#x.\n",
+ inst->seqNum, inst->readPC());
+
+ // Send back the non-speculative instruction's sequence
+ // number. Maybe just tell the lsq to re-execute the load.
+ toIEW->nonSpecSeqNum = inst->seqNum;
+ toIEW->uncached = true;
+ toIEW->lqIdx = inst->lqIdx;
+
+ inst->clearCanCommit();
+
+ return false;
+ } else {
+ panic("Trying to commit un-executed instruction "
+ "of unknown type!\n");
+ }
+ }
+
+ // Now check if it's one of the special trap or barrier or
+ // serializing instructions.
+ if (inst->isThreadSync())
+ {
+ // Not handled for now.
+ panic("Barrier instructions are not handled yet.\n");
+ }
+
+ // Check if the instruction caused a fault. If so, trap.
+ Fault inst_fault = inst->getFault();
+
+ if (inst_fault != NoFault) {
+ if (!inst->isNop()) {
+#if FULL_SYSTEM
+ DPRINTF(BE, "Inst [sn:%lli] PC %#x has a fault\n",
+ inst->seqNum, inst->readPC());
+
+// assert(!thread->inSyscall);
+
+// thread->inSyscall = true;
+
+ // Consider holding onto the trap and waiting until the trap event
+ // happens for this to be executed.
+ inst_fault->invoke(thread->getXCProxy());
+
+ // Exit state update mode to avoid accidental updating.
+// thread->inSyscall = false;
+
+// commitStatus = TrapPending;
+
+ // Generate trap squash event.
+// generateTrapEvent();
+
+ return false;
+#else // !FULL_SYSTEM
+ panic("fault (%d) detected @ PC %08p", inst_fault,
+ inst->PC);
+#endif // FULL_SYSTEM
+ }
+ }
+
+ if (inst->isControl()) {
+// ++commitCommittedBranches;
+ }
+
+ int freed_regs = 0;
+
+ for (int i = 0; i < inst->numDestRegs(); ++i) {
+ DPRINTF(BE, "Commit rename map setting register %i to [sn:%lli]\n",
+ (int)inst->destRegIdx(i), inst->seqNum);
+ thread->renameTable[inst->destRegIdx(i)] = inst;
+ ++freed_regs;
+ }
+
+ if (inst->traceData) {
+ inst->traceData->finalize();
+ inst->traceData = NULL;
+ }
+
+ inst->clearDependents();
+
+ frontEnd->addFreeRegs(freed_regs);
+
+ instList.pop_front();
+
+ --numInsts;
+ cpu->numInst++;
+ thread->numInsts++;
+ ++thread->funcExeInst;
+ thread->PC = inst->readNextPC();
+ updateComInstStats(inst);
+
+ // Write the done sequence number here.
+ toIEW->doneSeqNum = inst->seqNum;
+
+#if FULL_SYSTEM
+ int count = 0;
+ Addr oldpc;
+ do {
+ if (count == 0)
+ assert(!thread->inSyscall && !thread->trapPending);
+ oldpc = thread->readPC();
+ cpu->system->pcEventQueue.service(
+ thread->getXCProxy());
+ count++;
+ } while (oldpc != thread->readPC());
+ if (count > 1) {
+ DPRINTF(BE, "PC skip function event, stopping commit\n");
+// completed_last_inst = false;
+// squashPending = true;
+ return false;
+ }
+#endif
+ return true;
+}
+
+template <class Impl>
+void
+BackEnd<Impl>::commitInsts()
+{
+ int commit_width = commitWidth ? commitWidth : width;
+
+ // Not sure this should be a loop or not.
+ int inst_num = 0;
+ while (!instList.empty() && inst_num < commit_width) {
+ if (instList.front()->isSquashed()) {
+ panic("No squashed insts should still be on the list!");
+ instList.front()->clearDependents();
+ instList.pop_front();
+ continue;
+ }
+
+ if (!commitInst(inst_num++)) {
+ break;
+ }
+ }
+ n_committed_dist.sample(inst_num);
+}
+
+template <class Impl>
+void
+BackEnd<Impl>::squash(const InstSeqNum &sn)
+{
+ IQ.squash(sn);
+ LSQ.squash(sn);
+
+ int freed_regs = 0;
+ InstListIt dispatch_end = dispatch.end();
+ InstListIt insts_it = dispatch.end();
+ insts_it--;
+
+ while (insts_it != dispatch_end && (*insts_it)->seqNum > sn)
+ {
+ if ((*insts_it)->isSquashed()) {
+ --insts_it;
+ continue;
+ }
+ DPRINTF(BE, "Squashing instruction on dispatch list PC %#x, [sn:%lli].\n",
+ (*insts_it)->readPC(),
+ (*insts_it)->seqNum);
+
+ // Mark the instruction as squashed, and ready to commit so that
+ // it can drain out of the pipeline.
+ (*insts_it)->setSquashed();
+
+ (*insts_it)->setCanCommit();
+
+ // Be careful with IPRs and such here
+ for (int i = 0; i < (*insts_it)->numDestRegs(); ++i) {
+ DynInstPtr prev_dest = (*insts_it)->getPrevDestInst(i);
+ DPRINTF(BE, "Commit rename map setting register %i to [sn:%lli]\n",
+ (int)(*insts_it)->destRegIdx(i), prev_dest);
+ renameTable[(*insts_it)->destRegIdx(i)] = prev_dest;
+ ++freed_regs;
+ }
+
+ (*insts_it)->clearDependents();
+
+ --insts_it;
+ }
+
+ insts_it = instList.end();
+ insts_it--;
+
+ while (!instList.empty() && (*insts_it)->seqNum > sn)
+ {
+ if ((*insts_it)->isSquashed()) {
+ --insts_it;
+ continue;
+ }
+ DPRINTF(BE, "Squashing instruction on inst list PC %#x, [sn:%lli].\n",
+ (*insts_it)->readPC(),
+ (*insts_it)->seqNum);
+
+ // Mark the instruction as squashed, and ready to commit so that
+ // it can drain out of the pipeline.
+ (*insts_it)->setSquashed();
+
+ (*insts_it)->setCanCommit();
+
+ for (int i = 0; i < (*insts_it)->numDestRegs(); ++i) {
+ DynInstPtr prev_dest = (*insts_it)->getPrevDestInst(i);
+ DPRINTF(BE, "Commit rename map setting register %i to [sn:%lli]\n",
+ (int)(*insts_it)->destRegIdx(i), prev_dest);
+ renameTable[(*insts_it)->destRegIdx(i)] = prev_dest;
+ ++freed_regs;
+ }
+
+ (*insts_it)->clearDependents();
+
+ instList.erase(insts_it--);
+ --numInsts;
+ }
+
+ frontEnd->addFreeRegs(freed_regs);
+}
+
+template <class Impl>
+void
+BackEnd<Impl>::squashFromXC()
+{
+ xcSquash = true;
+}
+
+template <class Impl>
+void
+BackEnd<Impl>::squashDueToBranch(DynInstPtr &inst)
+{
+ // Update the branch predictor state I guess
+ squash(inst->seqNum);
+ frontEnd->squash(inst->seqNum, inst->readNextPC(),
+ true, inst->mispredicted());
+}
+
+template <class Impl>
+void
+BackEnd<Impl>::squashDueToMemBlocked(DynInstPtr &inst)
+{
+ DPRINTF(IEW, "Memory blocked, squashing load and younger insts, "
+ "PC: %#x [sn:%i].\n", inst->readPC(), inst->seqNum);
+
+ squash(inst->seqNum - 1);
+ frontEnd->squash(inst->seqNum - 1, inst->readPC());
+}
+
+template <class Impl>
+void
+BackEnd<Impl>::fetchFault(Fault &fault)
+{
+ faultFromFetch = fault;
+}
+
+template <class Impl>
+void
+BackEnd<Impl>::updateExeInstStats(DynInstPtr &inst)
+{
+ int thread_number = inst->threadNumber;
+
+ //
+ // Pick off the software prefetches
+ //
+#ifdef TARGET_ALPHA
+ if (inst->isDataPrefetch())
+ exe_swp[thread_number]++;
+ else
+ exe_inst[thread_number]++;
+#else
+ exe_inst[thread_number]++;
+#endif
+
+ //
+ // Control operations
+ //
+ if (inst->isControl())
+ exe_branches[thread_number]++;
+
+ //
+ // Memory operations
+ //
+ if (inst->isMemRef()) {
+ exe_refs[thread_number]++;
+
+ if (inst->isLoad())
+ exe_loads[thread_number]++;
+ }
+}
+
+template <class Impl>
+void
+BackEnd<Impl>::updateComInstStats(DynInstPtr &inst)
+{
+ unsigned thread = inst->threadNumber;
+
+ //
+ // Pick off the software prefetches
+ //
+#ifdef TARGET_ALPHA
+ if (inst->isDataPrefetch()) {
+ stat_com_swp[thread]++;
+ } else {
+ stat_com_inst[thread]++;
+ }
+#else
+ stat_com_inst[thread]++;
+#endif
+
+ //
+ // Control Instructions
+ //
+ if (inst->isControl())
+ stat_com_branches[thread]++;
+
+ //
+ // Memory references
+ //
+ if (inst->isMemRef()) {
+ stat_com_refs[thread]++;
+
+ if (inst->isLoad()) {
+ stat_com_loads[thread]++;
+ }
+ }
+
+ if (inst->isMemBarrier()) {
+ stat_com_membars[thread]++;
+ }
+}
+
+template <class Impl>
+void
+BackEnd<Impl>::dumpInsts()
+{
+ int num = 0;
+ int valid_num = 0;
+
+ InstListIt inst_list_it = instList.begin();
+
+ cprintf("Inst list size: %i\n", instList.size());
+
+ while (inst_list_it != instList.end())
+ {
+ cprintf("Instruction:%i\n",
+ num);
+ if (!(*inst_list_it)->isSquashed()) {
+ if (!(*inst_list_it)->isIssued()) {
+ ++valid_num;
+ cprintf("Count:%i\n", valid_num);
+ } else if ((*inst_list_it)->isMemRef() &&
+ !(*inst_list_it)->memOpDone) {
+ // Loads that have not been marked as executed still count
+ // towards the total instructions.
+ ++valid_num;
+ cprintf("Count:%i\n", valid_num);
+ }
+ }
+
+ cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
+ "Issued:%i\nSquashed:%i\n",
+ (*inst_list_it)->readPC(),
+ (*inst_list_it)->seqNum,
+ (*inst_list_it)->threadNumber,
+ (*inst_list_it)->isIssued(),
+ (*inst_list_it)->isSquashed());
+
+ if ((*inst_list_it)->isMemRef()) {
+ cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
+ }
+
+ cprintf("\n");
+
+ inst_list_it++;
+ ++num;
+ }
+
+ cprintf("Dispatch list size: %i\n", dispatch.size());
+
+ inst_list_it = dispatch.begin();
+
+ while (inst_list_it != dispatch.end())
+ {
+ cprintf("Instruction:%i\n",
+ num);
+ if (!(*inst_list_it)->isSquashed()) {
+ if (!(*inst_list_it)->isIssued()) {
+ ++valid_num;
+ cprintf("Count:%i\n", valid_num);
+ } else if ((*inst_list_it)->isMemRef() &&
+ !(*inst_list_it)->memOpDone) {
+ // Loads that have not been marked as executed still count
+ // towards the total instructions.
+ ++valid_num;
+ cprintf("Count:%i\n", valid_num);
+ }
+ }
+
+ cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
+ "Issued:%i\nSquashed:%i\n",
+ (*inst_list_it)->readPC(),
+ (*inst_list_it)->seqNum,
+ (*inst_list_it)->threadNumber,
+ (*inst_list_it)->isIssued(),
+ (*inst_list_it)->isSquashed());
+
+ if ((*inst_list_it)->isMemRef()) {
+ cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
+ }
+
+ cprintf("\n");
+
+ inst_list_it++;
+ ++num;
+ }
+
+ cprintf("Writeback list size: %i\n", writeback.size());
+
+ inst_list_it = writeback.begin();
+
+ while (inst_list_it != writeback.end())
+ {
+ cprintf("Instruction:%i\n",
+ num);
+ if (!(*inst_list_it)->isSquashed()) {
+ if (!(*inst_list_it)->isIssued()) {
+ ++valid_num;
+ cprintf("Count:%i\n", valid_num);
+ } else if ((*inst_list_it)->isMemRef() &&
+ !(*inst_list_it)->memOpDone) {
+ // Loads that have not been marked as executed still count
+ // towards the total instructions.
+ ++valid_num;
+ cprintf("Count:%i\n", valid_num);
+ }
+ }
+
+ cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
+ "Issued:%i\nSquashed:%i\n",
+ (*inst_list_it)->readPC(),
+ (*inst_list_it)->seqNum,
+ (*inst_list_it)->threadNumber,
+ (*inst_list_it)->isIssued(),
+ (*inst_list_it)->isSquashed());
+
+ if ((*inst_list_it)->isMemRef()) {
+ cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
+ }
+
+ cprintf("\n");
+
+ inst_list_it++;
+ ++num;
+ }
+}
#include "cpu/ozone/thread_state.hh"
#include "cpu/pc_event.hh"
#include "cpu/static_inst.hh"
-#include "mem/mem_interface.hh"
#include "sim/eventq.hh"
// forward declarations
class Checkpoint;
class EndQuiesceEvent;
-class MemInterface;
+class Request;
namespace Trace {
class InstRecord;
typedef typename Impl::DynInst DynInst;
typedef typename Impl::DynInstPtr DynInstPtr;
+ typedef TheISA::FloatReg FloatReg;
+ typedef TheISA::FloatRegBits FloatRegBits;
typedef TheISA::MiscReg MiscReg;
public:
int readCpuId() { return thread->cpuId; }
- FunctionalMemory *getMemPtr() { return thread->mem; }
+ TranslatingPort *getMemPort() { return /*thread->port*/NULL; }
#if FULL_SYSTEM
System *getSystemPtr() { return cpu->system; }
uint64_t readIntReg(int reg_idx);
- float readFloatRegSingle(int reg_idx);
+ FloatReg readFloatReg(int reg_idx, int width);
- double readFloatRegDouble(int reg_idx);
+ FloatReg readFloatReg(int reg_idx);
- uint64_t readFloatRegInt(int reg_idx);
+ FloatRegBits readFloatRegBits(int reg_idx, int width);
+
+ FloatRegBits readFloatRegBits(int reg_idx);
void setIntReg(int reg_idx, uint64_t val);
- void setFloatRegSingle(int reg_idx, float val);
+ void setFloatReg(int reg_idx, FloatReg val, int width);
+
+ void setFloatReg(int reg_idx, FloatReg val);
- void setFloatRegDouble(int reg_idx, double val);
+ void setFloatRegBits(int reg_idx, FloatRegBits val, int width);
- void setFloatRegInt(int reg_idx, uint64_t val);
+ void setFloatRegBits(int reg_idx, FloatRegBits val);
uint64_t readPC() { return thread->PC; }
void setPC(Addr val);
uint64_t readNextPC() { return thread->nextPC; }
void setNextPC(Addr val);
+ uint64_t readNextNPC()
+ {
+ panic("Alpha has no NextNPC!");
+ return 0;
+ }
+
+ void setNextNPC(uint64_t val)
+ { panic("Alpha has no NextNPC!"); }
+
public:
// ISA stuff:
MiscReg readMiscReg(int misc_reg);
void setFuncExeInst(Counter new_val)
{ thread->funcExeInst = new_val; }
#endif
+ void changeRegFileContext(TheISA::RegFile::ContextParam param,
+ TheISA::RegFile::ContextVal val)
+ { panic("Not supported on Alpha!"); }
};
// execution context proxy
#endif
// L1 instruction cache
- MemInterface *icacheInterface;
+// MemInterface *icacheInterface;
// L1 data cache
- MemInterface *dcacheInterface;
+// MemInterface *dcacheInterface;
/** Pointer to memory. */
FunctionalMemory *mem;
int getInstAsid() { return thread.asid; }
int getDataAsid() { return thread.asid; }
- Fault dummyTranslation(MemReqPtr &req)
- {
-#if 0
- assert((req->vaddr >> 48 & 0xffff) == 0);
-#endif
-
- // put the asid in the upper 16 bits of the paddr
- req->paddr = req->vaddr & ~((Addr)0xffff << sizeof(Addr) * 8 - 16);
- req->paddr = req->paddr | (Addr)req->asid << sizeof(Addr) * 8 - 16;
- return NoFault;
- }
-
/** Translates instruction requestion in syscall emulation mode. */
- Fault translateInstReq(MemReqPtr &req)
+ Fault translateInstReq(Request *req)
{
- return dummyTranslation(req);
+ return thread.translateInstReq(req);
}
/** Translates data read request in syscall emulation mode. */
- Fault translateDataReadReq(MemReqPtr &req)
+ Fault translateDataReadReq(Request *req)
{
- return dummyTranslation(req);
+ return thread.translateDataReadReq(req);
}
/** Translates data write request in syscall emulation mode. */
- Fault translateDataWriteReq(MemReqPtr &req)
+ Fault translateDataWriteReq(Request *req)
{
- return dummyTranslation(req);
+ return thread.translateDataWriteReq(req);
}
#endif
/** Old CPU read from memory function. No longer used. */
template <class T>
- Fault read(MemReqPtr &req, T &data)
+ Fault read(Request *req, T &data)
{
#if 0
#if FULL_SYSTEM && defined(TARGET_ALPHA)
req->xc->setMiscReg(TheISA::Lock_Flag_DepTag, true);
}
#endif
-#endif
- Fault error;
if (req->flags & LOCKED) {
lockAddrList.insert(req->paddr);
lockFlag = true;
}
+#endif
+ Fault error;
error = this->mem->read(req, data);
data = gtoh(data);
/** CPU read function, forwards read to LSQ. */
template <class T>
- Fault read(MemReqPtr &req, T &data, int load_idx)
+ Fault read(Request *req, T &data, int load_idx)
{
return backEnd->read(req, data, load_idx);
}
/** Old CPU write to memory function. No longer used. */
template <class T>
- Fault write(MemReqPtr &req, T &data)
+ Fault write(Request *req, T &data)
{
#if 0
#if FULL_SYSTEM && defined(TARGET_ALPHA)
}
}
-#endif
#endif
if (req->flags & LOCKED) {
}
}
}
+#endif
return this->mem->write(req, (T)htog(data));
}
/** CPU write function, forwards write to LSQ. */
template <class T>
- Fault write(MemReqPtr &req, T &data, int store_idx)
+ Fault write(Request *req, T &data, int store_idx)
{
return backEnd->write(req, data, store_idx);
}
--- /dev/null
+
+#include <string>
+
+#include "cpu/checker/cpu.hh"
+#include "cpu/inst_seq.hh"
+#include "cpu/ozone/cpu.hh"
+#include "cpu/ozone/ozone_impl.hh"
+#include "cpu/ozone/simple_impl.hh"
+#include "cpu/ozone/simple_params.hh"
+#include "mem/cache/base_cache.hh"
+#include "sim/builder.hh"
+#include "sim/process.hh"
+#include "sim/sim_object.hh"
+
+class DerivOzoneCPU : public OzoneCPU<OzoneImpl>
+{
+ public:
+ DerivOzoneCPU(SimpleParams *p)
+ : OzoneCPU<OzoneImpl>(p)
+ { }
+};
+
+class SimpleOzoneCPU : public OzoneCPU<SimpleImpl>
+{
+ public:
+ SimpleOzoneCPU(SimpleParams *p)
+ : OzoneCPU<SimpleImpl>(p)
+ { }
+};
+
+
+////////////////////////////////////////////////////////////////////////
+//
+// OzoneCPU Simulation Object
+//
+
+BEGIN_DECLARE_SIM_OBJECT_PARAMS(DerivOzoneCPU)
+
+ Param<int> clock;
+ Param<int> numThreads;
+
+#if FULL_SYSTEM
+SimObjectParam<System *> system;
+Param<int> cpu_id;
+SimObjectParam<AlphaITB *> itb;
+SimObjectParam<AlphaDTB *> dtb;
+#else
+SimObjectVectorParam<Process *> workload;
+//SimObjectParam<PageTable *> page_table;
+#endif // FULL_SYSTEM
+
+SimObjectParam<FunctionalMemory *> mem;
+
+SimObjectParam<BaseCPU *> checker;
+
+Param<Counter> max_insts_any_thread;
+Param<Counter> max_insts_all_threads;
+Param<Counter> max_loads_any_thread;
+Param<Counter> max_loads_all_threads;
+
+SimObjectParam<BaseCache *> icache;
+SimObjectParam<BaseCache *> dcache;
+
+Param<unsigned> cachePorts;
+Param<unsigned> width;
+Param<unsigned> frontEndWidth;
+Param<unsigned> backEndWidth;
+Param<unsigned> backEndSquashLatency;
+Param<unsigned> backEndLatency;
+Param<unsigned> maxInstBufferSize;
+Param<unsigned> numPhysicalRegs;
+Param<unsigned> maxOutstandingMemOps;
+
+Param<unsigned> decodeToFetchDelay;
+Param<unsigned> renameToFetchDelay;
+Param<unsigned> iewToFetchDelay;
+Param<unsigned> commitToFetchDelay;
+Param<unsigned> fetchWidth;
+
+Param<unsigned> renameToDecodeDelay;
+Param<unsigned> iewToDecodeDelay;
+Param<unsigned> commitToDecodeDelay;
+Param<unsigned> fetchToDecodeDelay;
+Param<unsigned> decodeWidth;
+
+Param<unsigned> iewToRenameDelay;
+Param<unsigned> commitToRenameDelay;
+Param<unsigned> decodeToRenameDelay;
+Param<unsigned> renameWidth;
+
+Param<unsigned> commitToIEWDelay;
+Param<unsigned> renameToIEWDelay;
+Param<unsigned> issueToExecuteDelay;
+Param<unsigned> issueWidth;
+Param<unsigned> executeWidth;
+Param<unsigned> executeIntWidth;
+Param<unsigned> executeFloatWidth;
+Param<unsigned> executeBranchWidth;
+Param<unsigned> executeMemoryWidth;
+
+Param<unsigned> iewToCommitDelay;
+Param<unsigned> renameToROBDelay;
+Param<unsigned> commitWidth;
+Param<unsigned> squashWidth;
+
+Param<unsigned> localPredictorSize;
+Param<unsigned> localCtrBits;
+Param<unsigned> localHistoryTableSize;
+Param<unsigned> localHistoryBits;
+Param<unsigned> globalPredictorSize;
+Param<unsigned> globalCtrBits;
+Param<unsigned> globalHistoryBits;
+Param<unsigned> choicePredictorSize;
+Param<unsigned> choiceCtrBits;
+
+Param<unsigned> BTBEntries;
+Param<unsigned> BTBTagSize;
+
+Param<unsigned> RASSize;
+
+Param<unsigned> LQEntries;
+Param<unsigned> SQEntries;
+Param<unsigned> LFSTSize;
+Param<unsigned> SSITSize;
+
+Param<unsigned> numPhysIntRegs;
+Param<unsigned> numPhysFloatRegs;
+Param<unsigned> numIQEntries;
+Param<unsigned> numROBEntries;
+
+Param<bool> decoupledFrontEnd;
+Param<int> dispatchWidth;
+Param<int> wbWidth;
+
+Param<unsigned> smtNumFetchingThreads;
+Param<std::string> smtFetchPolicy;
+Param<std::string> smtLSQPolicy;
+Param<unsigned> smtLSQThreshold;
+Param<std::string> smtIQPolicy;
+Param<unsigned> smtIQThreshold;
+Param<std::string> smtROBPolicy;
+Param<unsigned> smtROBThreshold;
+Param<std::string> smtCommitPolicy;
+
+Param<unsigned> instShiftAmt;
+
+Param<bool> defer_registration;
+
+Param<bool> function_trace;
+Param<Tick> function_trace_start;
+
+END_DECLARE_SIM_OBJECT_PARAMS(DerivOzoneCPU)
+
+BEGIN_INIT_SIM_OBJECT_PARAMS(DerivOzoneCPU)
+
+ INIT_PARAM(clock, "clock speed"),
+ INIT_PARAM(numThreads, "number of HW thread contexts"),
+
+#if FULL_SYSTEM
+ INIT_PARAM(system, "System object"),
+ INIT_PARAM(cpu_id, "processor ID"),
+ INIT_PARAM(itb, "Instruction translation buffer"),
+ INIT_PARAM(dtb, "Data translation buffer"),
+#else
+ INIT_PARAM(workload, "Processes to run"),
+// INIT_PARAM(page_table, "Page table"),
+#endif // FULL_SYSTEM
+
+ INIT_PARAM_DFLT(mem, "Memory", NULL),
+
+ INIT_PARAM_DFLT(checker, "Checker CPU", NULL),
+
+ INIT_PARAM_DFLT(max_insts_any_thread,
+ "Terminate when any thread reaches this inst count",
+ 0),
+ INIT_PARAM_DFLT(max_insts_all_threads,
+ "Terminate when all threads have reached"
+ "this inst count",
+ 0),
+ INIT_PARAM_DFLT(max_loads_any_thread,
+ "Terminate when any thread reaches this load count",
+ 0),
+ INIT_PARAM_DFLT(max_loads_all_threads,
+ "Terminate when all threads have reached this load"
+ "count",
+ 0),
+
+ INIT_PARAM_DFLT(icache, "L1 instruction cache", NULL),
+ INIT_PARAM_DFLT(dcache, "L1 data cache", NULL),
+
+ INIT_PARAM_DFLT(cachePorts, "Cache Ports", 200),
+ INIT_PARAM_DFLT(width, "Width", 1),
+ INIT_PARAM_DFLT(frontEndWidth, "Front end width", 1),
+ INIT_PARAM_DFLT(backEndWidth, "Back end width", 1),
+ INIT_PARAM_DFLT(backEndSquashLatency, "Back end squash latency", 1),
+ INIT_PARAM_DFLT(backEndLatency, "Back end latency", 1),
+ INIT_PARAM_DFLT(maxInstBufferSize, "Maximum instruction buffer size", 16),
+ INIT_PARAM(numPhysicalRegs, "Number of physical registers"),
+ INIT_PARAM_DFLT(maxOutstandingMemOps, "Maximum outstanding memory operations", 4),
+
+ INIT_PARAM(decodeToFetchDelay, "Decode to fetch delay"),
+ INIT_PARAM(renameToFetchDelay, "Rename to fetch delay"),
+ INIT_PARAM(iewToFetchDelay, "Issue/Execute/Writeback to fetch"
+ "delay"),
+ INIT_PARAM(commitToFetchDelay, "Commit to fetch delay"),
+ INIT_PARAM(fetchWidth, "Fetch width"),
+ INIT_PARAM(renameToDecodeDelay, "Rename to decode delay"),
+ INIT_PARAM(iewToDecodeDelay, "Issue/Execute/Writeback to decode"
+ "delay"),
+ INIT_PARAM(commitToDecodeDelay, "Commit to decode delay"),
+ INIT_PARAM(fetchToDecodeDelay, "Fetch to decode delay"),
+ INIT_PARAM(decodeWidth, "Decode width"),
+
+ INIT_PARAM(iewToRenameDelay, "Issue/Execute/Writeback to rename"
+ "delay"),
+ INIT_PARAM(commitToRenameDelay, "Commit to rename delay"),
+ INIT_PARAM(decodeToRenameDelay, "Decode to rename delay"),
+ INIT_PARAM(renameWidth, "Rename width"),
+
+ INIT_PARAM(commitToIEWDelay, "Commit to "
+ "Issue/Execute/Writeback delay"),
+ INIT_PARAM(renameToIEWDelay, "Rename to "
+ "Issue/Execute/Writeback delay"),
+ INIT_PARAM(issueToExecuteDelay, "Issue to execute delay (internal"
+ "to the IEW stage)"),
+ INIT_PARAM(issueWidth, "Issue width"),
+ INIT_PARAM(executeWidth, "Execute width"),
+ INIT_PARAM(executeIntWidth, "Integer execute width"),
+ INIT_PARAM(executeFloatWidth, "Floating point execute width"),
+ INIT_PARAM(executeBranchWidth, "Branch execute width"),
+ INIT_PARAM(executeMemoryWidth, "Memory execute width"),
+
+ INIT_PARAM(iewToCommitDelay, "Issue/Execute/Writeback to commit "
+ "delay"),
+ INIT_PARAM(renameToROBDelay, "Rename to reorder buffer delay"),
+ INIT_PARAM(commitWidth, "Commit width"),
+ INIT_PARAM(squashWidth, "Squash width"),
+
+ INIT_PARAM(localPredictorSize, "Size of local predictor"),
+ INIT_PARAM(localCtrBits, "Bits per counter"),
+ INIT_PARAM(localHistoryTableSize, "Size of local history table"),
+ INIT_PARAM(localHistoryBits, "Bits for the local history"),
+ INIT_PARAM(globalPredictorSize, "Size of global predictor"),
+ INIT_PARAM(globalCtrBits, "Bits per counter"),
+ INIT_PARAM(globalHistoryBits, "Bits of history"),
+ INIT_PARAM(choicePredictorSize, "Size of choice predictor"),
+ INIT_PARAM(choiceCtrBits, "Bits of choice counters"),
+
+ INIT_PARAM(BTBEntries, "Number of BTB entries"),
+ INIT_PARAM(BTBTagSize, "Size of the BTB tags, in bits"),
+
+ INIT_PARAM(RASSize, "RAS size"),
+
+ INIT_PARAM(LQEntries, "Number of load queue entries"),
+ INIT_PARAM(SQEntries, "Number of store queue entries"),
+ INIT_PARAM(LFSTSize, "Last fetched store table size"),
+ INIT_PARAM(SSITSize, "Store set ID table size"),
+
+ INIT_PARAM(numPhysIntRegs, "Number of physical integer registers"),
+ INIT_PARAM(numPhysFloatRegs, "Number of physical floating point "
+ "registers"),
+ INIT_PARAM(numIQEntries, "Number of instruction queue entries"),
+ INIT_PARAM(numROBEntries, "Number of reorder buffer entries"),
+
+ INIT_PARAM_DFLT(decoupledFrontEnd, "Decoupled front end", true),
+ INIT_PARAM_DFLT(dispatchWidth, "Dispatch width", 0),
+ INIT_PARAM_DFLT(wbWidth, "Writeback width", 0),
+
+ INIT_PARAM_DFLT(smtNumFetchingThreads, "SMT Number of Fetching Threads", 1),
+ INIT_PARAM_DFLT(smtFetchPolicy, "SMT Fetch Policy", "SingleThread"),
+ INIT_PARAM_DFLT(smtLSQPolicy, "SMT LSQ Sharing Policy", "Partitioned"),
+ INIT_PARAM_DFLT(smtLSQThreshold,"SMT LSQ Threshold", 100),
+ INIT_PARAM_DFLT(smtIQPolicy, "SMT IQ Policy", "Partitioned"),
+ INIT_PARAM_DFLT(smtIQThreshold, "SMT IQ Threshold", 100),
+ INIT_PARAM_DFLT(smtROBPolicy, "SMT ROB Sharing Policy", "Partitioned"),
+ INIT_PARAM_DFLT(smtROBThreshold,"SMT ROB Threshold", 100),
+ INIT_PARAM_DFLT(smtCommitPolicy,"SMT Commit Fetch Policy", "RoundRobin"),
+
+ INIT_PARAM(instShiftAmt, "Number of bits to shift instructions by"),
+ INIT_PARAM(defer_registration, "defer system registration (for sampling)"),
+
+ INIT_PARAM(function_trace, "Enable function trace"),
+ INIT_PARAM(function_trace_start, "Cycle to start function trace")
+
+END_INIT_SIM_OBJECT_PARAMS(DerivOzoneCPU)
+
+CREATE_SIM_OBJECT(DerivOzoneCPU)
+{
+ DerivOzoneCPU *cpu;
+
+#if FULL_SYSTEM
+ // Full-system only supports a single thread for the moment.
+ int actual_num_threads = 1;
+#else
+ // In non-full-system mode, we infer the number of threads from
+ // the workload if it's not explicitly specified.
+ int actual_num_threads =
+ numThreads.isValid() ? numThreads : workload.size();
+
+ if (workload.size() == 0) {
+ fatal("Must specify at least one workload!");
+ }
+
+#endif
+
+ SimpleParams *params = new SimpleParams;
+
+ params->clock = clock;
+
+ params->name = getInstanceName();
+ params->numberOfThreads = actual_num_threads;
+
+#if FULL_SYSTEM
+ params->system = system;
+ params->cpu_id = cpu_id;
+ params->itb = itb;
+ params->dtb = dtb;
+#else
+ params->workload = workload;
+// params->pTable = page_table;
+#endif // FULL_SYSTEM
+
+ params->mem = mem;
+ params->checker = checker;
+ params->max_insts_any_thread = max_insts_any_thread;
+ params->max_insts_all_threads = max_insts_all_threads;
+ params->max_loads_any_thread = max_loads_any_thread;
+ params->max_loads_all_threads = max_loads_all_threads;
+
+ //
+ // Caches
+ //
+ params->icacheInterface = icache ? icache->getInterface() : NULL;
+ params->dcacheInterface = dcache ? dcache->getInterface() : NULL;
+ params->cachePorts = cachePorts;
+
+ params->width = width;
+ params->frontEndWidth = frontEndWidth;
+ params->backEndWidth = backEndWidth;
+ params->backEndSquashLatency = backEndSquashLatency;
+ params->backEndLatency = backEndLatency;
+ params->maxInstBufferSize = maxInstBufferSize;
+ params->numPhysicalRegs = numPhysIntRegs + numPhysFloatRegs;
+ params->maxOutstandingMemOps = maxOutstandingMemOps;
+
+ params->decodeToFetchDelay = decodeToFetchDelay;
+ params->renameToFetchDelay = renameToFetchDelay;
+ params->iewToFetchDelay = iewToFetchDelay;
+ params->commitToFetchDelay = commitToFetchDelay;
+ params->fetchWidth = fetchWidth;
+
+ params->renameToDecodeDelay = renameToDecodeDelay;
+ params->iewToDecodeDelay = iewToDecodeDelay;
+ params->commitToDecodeDelay = commitToDecodeDelay;
+ params->fetchToDecodeDelay = fetchToDecodeDelay;
+ params->decodeWidth = decodeWidth;
+
+ params->iewToRenameDelay = iewToRenameDelay;
+ params->commitToRenameDelay = commitToRenameDelay;
+ params->decodeToRenameDelay = decodeToRenameDelay;
+ params->renameWidth = renameWidth;
+
+ params->commitToIEWDelay = commitToIEWDelay;
+ params->renameToIEWDelay = renameToIEWDelay;
+ params->issueToExecuteDelay = issueToExecuteDelay;
+ params->issueWidth = issueWidth;
+ params->executeWidth = executeWidth;
+ params->executeIntWidth = executeIntWidth;
+ params->executeFloatWidth = executeFloatWidth;
+ params->executeBranchWidth = executeBranchWidth;
+ params->executeMemoryWidth = executeMemoryWidth;
+
+ params->iewToCommitDelay = iewToCommitDelay;
+ params->renameToROBDelay = renameToROBDelay;
+ params->commitWidth = commitWidth;
+ params->squashWidth = squashWidth;
+
+
+ params->localPredictorSize = localPredictorSize;
+ params->localCtrBits = localCtrBits;
+ params->localHistoryTableSize = localHistoryTableSize;
+ params->localHistoryBits = localHistoryBits;
+ params->globalPredictorSize = globalPredictorSize;
+ params->globalCtrBits = globalCtrBits;
+ params->globalHistoryBits = globalHistoryBits;
+ params->choicePredictorSize = choicePredictorSize;
+ params->choiceCtrBits = choiceCtrBits;
+
+ params->BTBEntries = BTBEntries;
+ params->BTBTagSize = BTBTagSize;
+
+ params->RASSize = RASSize;
+
+ params->LQEntries = LQEntries;
+ params->SQEntries = SQEntries;
+
+ params->SSITSize = SSITSize;
+ params->LFSTSize = LFSTSize;
+
+ params->numPhysIntRegs = numPhysIntRegs;
+ params->numPhysFloatRegs = numPhysFloatRegs;
+ params->numIQEntries = numIQEntries;
+ params->numROBEntries = numROBEntries;
+
+ params->decoupledFrontEnd = decoupledFrontEnd;
+ params->dispatchWidth = dispatchWidth;
+ params->wbWidth = wbWidth;
+
+ params->smtNumFetchingThreads = smtNumFetchingThreads;
+ params->smtFetchPolicy = smtFetchPolicy;
+ params->smtIQPolicy = smtIQPolicy;
+ params->smtLSQPolicy = smtLSQPolicy;
+ params->smtLSQThreshold = smtLSQThreshold;
+ params->smtROBPolicy = smtROBPolicy;
+ params->smtROBThreshold = smtROBThreshold;
+ params->smtCommitPolicy = smtCommitPolicy;
+
+ params->instShiftAmt = 2;
+
+ params->deferRegistration = defer_registration;
+
+ params->functionTrace = function_trace;
+ params->functionTraceStart = function_trace_start;
+
+ cpu = new DerivOzoneCPU(params);
+
+ return cpu;
+}
+
+REGISTER_SIM_OBJECT("DerivOzoneCPU", DerivOzoneCPU)
+
+
+
+////////////////////////////////////////////////////////////////////////
+//
+// OzoneCPU Simulation Object
+//
+
+BEGIN_DECLARE_SIM_OBJECT_PARAMS(SimpleOzoneCPU)
+
+ Param<int> clock;
+ Param<int> numThreads;
+
+#if FULL_SYSTEM
+SimObjectParam<System *> system;
+Param<int> cpu_id;
+SimObjectParam<AlphaITB *> itb;
+SimObjectParam<AlphaDTB *> dtb;
+#else
+SimObjectVectorParam<Process *> workload;
+//SimObjectParam<PageTable *> page_table;
+#endif // FULL_SYSTEM
+
+SimObjectParam<FunctionalMemory *> mem;
+
+SimObjectParam<BaseCPU *> checker;
+
+Param<Counter> max_insts_any_thread;
+Param<Counter> max_insts_all_threads;
+Param<Counter> max_loads_any_thread;
+Param<Counter> max_loads_all_threads;
+
+SimObjectParam<BaseCache *> icache;
+SimObjectParam<BaseCache *> dcache;
+
+Param<unsigned> cachePorts;
+Param<unsigned> width;
+Param<unsigned> frontEndWidth;
+Param<unsigned> backEndWidth;
+Param<unsigned> backEndSquashLatency;
+Param<unsigned> backEndLatency;
+Param<unsigned> maxInstBufferSize;
+Param<unsigned> numPhysicalRegs;
+
+Param<unsigned> decodeToFetchDelay;
+Param<unsigned> renameToFetchDelay;
+Param<unsigned> iewToFetchDelay;
+Param<unsigned> commitToFetchDelay;
+Param<unsigned> fetchWidth;
+
+Param<unsigned> renameToDecodeDelay;
+Param<unsigned> iewToDecodeDelay;
+Param<unsigned> commitToDecodeDelay;
+Param<unsigned> fetchToDecodeDelay;
+Param<unsigned> decodeWidth;
+
+Param<unsigned> iewToRenameDelay;
+Param<unsigned> commitToRenameDelay;
+Param<unsigned> decodeToRenameDelay;
+Param<unsigned> renameWidth;
+
+Param<unsigned> commitToIEWDelay;
+Param<unsigned> renameToIEWDelay;
+Param<unsigned> issueToExecuteDelay;
+Param<unsigned> issueWidth;
+Param<unsigned> executeWidth;
+Param<unsigned> executeIntWidth;
+Param<unsigned> executeFloatWidth;
+Param<unsigned> executeBranchWidth;
+Param<unsigned> executeMemoryWidth;
+
+Param<unsigned> iewToCommitDelay;
+Param<unsigned> renameToROBDelay;
+Param<unsigned> commitWidth;
+Param<unsigned> squashWidth;
+
+Param<unsigned> localPredictorSize;
+Param<unsigned> localCtrBits;
+Param<unsigned> localHistoryTableSize;
+Param<unsigned> localHistoryBits;
+Param<unsigned> globalPredictorSize;
+Param<unsigned> globalCtrBits;
+Param<unsigned> globalHistoryBits;
+Param<unsigned> choicePredictorSize;
+Param<unsigned> choiceCtrBits;
+
+Param<unsigned> BTBEntries;
+Param<unsigned> BTBTagSize;
+
+Param<unsigned> RASSize;
+
+Param<unsigned> LQEntries;
+Param<unsigned> SQEntries;
+Param<unsigned> LFSTSize;
+Param<unsigned> SSITSize;
+
+Param<unsigned> numPhysIntRegs;
+Param<unsigned> numPhysFloatRegs;
+Param<unsigned> numIQEntries;
+Param<unsigned> numROBEntries;
+
+Param<bool> decoupledFrontEnd;
+Param<int> dispatchWidth;
+Param<int> wbWidth;
+
+Param<unsigned> smtNumFetchingThreads;
+Param<std::string> smtFetchPolicy;
+Param<std::string> smtLSQPolicy;
+Param<unsigned> smtLSQThreshold;
+Param<std::string> smtIQPolicy;
+Param<unsigned> smtIQThreshold;
+Param<std::string> smtROBPolicy;
+Param<unsigned> smtROBThreshold;
+Param<std::string> smtCommitPolicy;
+
+Param<unsigned> instShiftAmt;
+
+Param<bool> defer_registration;
+
+Param<bool> function_trace;
+Param<Tick> function_trace_start;
+
+END_DECLARE_SIM_OBJECT_PARAMS(SimpleOzoneCPU)
+
+BEGIN_INIT_SIM_OBJECT_PARAMS(SimpleOzoneCPU)
+
+ INIT_PARAM(clock, "clock speed"),
+ INIT_PARAM(numThreads, "number of HW thread contexts"),
+
+#if FULL_SYSTEM
+ INIT_PARAM(system, "System object"),
+ INIT_PARAM(cpu_id, "processor ID"),
+ INIT_PARAM(itb, "Instruction translation buffer"),
+ INIT_PARAM(dtb, "Data translation buffer"),
+#else
+ INIT_PARAM(workload, "Processes to run"),
+// INIT_PARAM(page_table, "Page table"),
+#endif // FULL_SYSTEM
+
+ INIT_PARAM_DFLT(mem, "Memory", NULL),
+
+ INIT_PARAM_DFLT(checker, "Checker CPU", NULL),
+
+ INIT_PARAM_DFLT(max_insts_any_thread,
+ "Terminate when any thread reaches this inst count",
+ 0),
+ INIT_PARAM_DFLT(max_insts_all_threads,
+ "Terminate when all threads have reached"
+ "this inst count",
+ 0),
+ INIT_PARAM_DFLT(max_loads_any_thread,
+ "Terminate when any thread reaches this load count",
+ 0),
+ INIT_PARAM_DFLT(max_loads_all_threads,
+ "Terminate when all threads have reached this load"
+ "count",
+ 0),
+
+ INIT_PARAM_DFLT(icache, "L1 instruction cache", NULL),
+ INIT_PARAM_DFLT(dcache, "L1 data cache", NULL),
+
+ INIT_PARAM_DFLT(cachePorts, "Cache Ports", 200),
+ INIT_PARAM_DFLT(width, "Width", 1),
+ INIT_PARAM_DFLT(frontEndWidth, "Front end width", 1),
+ INIT_PARAM_DFLT(backEndWidth, "Back end width", 1),
+ INIT_PARAM_DFLT(backEndSquashLatency, "Back end squash latency", 1),
+ INIT_PARAM_DFLT(backEndLatency, "Back end latency", 1),
+ INIT_PARAM_DFLT(maxInstBufferSize, "Maximum instruction buffer size", 16),
+ INIT_PARAM(numPhysicalRegs, "Number of physical registers"),
+
+ INIT_PARAM(decodeToFetchDelay, "Decode to fetch delay"),
+ INIT_PARAM(renameToFetchDelay, "Rename to fetch delay"),
+ INIT_PARAM(iewToFetchDelay, "Issue/Execute/Writeback to fetch"
+ "delay"),
+ INIT_PARAM(commitToFetchDelay, "Commit to fetch delay"),
+ INIT_PARAM(fetchWidth, "Fetch width"),
+ INIT_PARAM(renameToDecodeDelay, "Rename to decode delay"),
+ INIT_PARAM(iewToDecodeDelay, "Issue/Execute/Writeback to decode"
+ "delay"),
+ INIT_PARAM(commitToDecodeDelay, "Commit to decode delay"),
+ INIT_PARAM(fetchToDecodeDelay, "Fetch to decode delay"),
+ INIT_PARAM(decodeWidth, "Decode width"),
+
+ INIT_PARAM(iewToRenameDelay, "Issue/Execute/Writeback to rename"
+ "delay"),
+ INIT_PARAM(commitToRenameDelay, "Commit to rename delay"),
+ INIT_PARAM(decodeToRenameDelay, "Decode to rename delay"),
+ INIT_PARAM(renameWidth, "Rename width"),
+
+ INIT_PARAM(commitToIEWDelay, "Commit to "
+ "Issue/Execute/Writeback delay"),
+ INIT_PARAM(renameToIEWDelay, "Rename to "
+ "Issue/Execute/Writeback delay"),
+ INIT_PARAM(issueToExecuteDelay, "Issue to execute delay (internal"
+ "to the IEW stage)"),
+ INIT_PARAM(issueWidth, "Issue width"),
+ INIT_PARAM(executeWidth, "Execute width"),
+ INIT_PARAM(executeIntWidth, "Integer execute width"),
+ INIT_PARAM(executeFloatWidth, "Floating point execute width"),
+ INIT_PARAM(executeBranchWidth, "Branch execute width"),
+ INIT_PARAM(executeMemoryWidth, "Memory execute width"),
+
+ INIT_PARAM(iewToCommitDelay, "Issue/Execute/Writeback to commit "
+ "delay"),
+ INIT_PARAM(renameToROBDelay, "Rename to reorder buffer delay"),
+ INIT_PARAM(commitWidth, "Commit width"),
+ INIT_PARAM(squashWidth, "Squash width"),
+
+ INIT_PARAM(localPredictorSize, "Size of local predictor"),
+ INIT_PARAM(localCtrBits, "Bits per counter"),
+ INIT_PARAM(localHistoryTableSize, "Size of local history table"),
+ INIT_PARAM(localHistoryBits, "Bits for the local history"),
+ INIT_PARAM(globalPredictorSize, "Size of global predictor"),
+ INIT_PARAM(globalCtrBits, "Bits per counter"),
+ INIT_PARAM(globalHistoryBits, "Bits of history"),
+ INIT_PARAM(choicePredictorSize, "Size of choice predictor"),
+ INIT_PARAM(choiceCtrBits, "Bits of choice counters"),
+
+ INIT_PARAM(BTBEntries, "Number of BTB entries"),
+ INIT_PARAM(BTBTagSize, "Size of the BTB tags, in bits"),
+
+ INIT_PARAM(RASSize, "RAS size"),
+
+ INIT_PARAM(LQEntries, "Number of load queue entries"),
+ INIT_PARAM(SQEntries, "Number of store queue entries"),
+ INIT_PARAM(LFSTSize, "Last fetched store table size"),
+ INIT_PARAM(SSITSize, "Store set ID table size"),
+
+ INIT_PARAM(numPhysIntRegs, "Number of physical integer registers"),
+ INIT_PARAM(numPhysFloatRegs, "Number of physical floating point "
+ "registers"),
+ INIT_PARAM(numIQEntries, "Number of instruction queue entries"),
+ INIT_PARAM(numROBEntries, "Number of reorder buffer entries"),
+
+ INIT_PARAM_DFLT(decoupledFrontEnd, "Decoupled front end", true),
+ INIT_PARAM_DFLT(dispatchWidth, "Dispatch width", 0),
+ INIT_PARAM_DFLT(wbWidth, "Writeback width", 0),
+
+ INIT_PARAM_DFLT(smtNumFetchingThreads, "SMT Number of Fetching Threads", 1),
+ INIT_PARAM_DFLT(smtFetchPolicy, "SMT Fetch Policy", "SingleThread"),
+ INIT_PARAM_DFLT(smtLSQPolicy, "SMT LSQ Sharing Policy", "Partitioned"),
+ INIT_PARAM_DFLT(smtLSQThreshold,"SMT LSQ Threshold", 100),
+ INIT_PARAM_DFLT(smtIQPolicy, "SMT IQ Policy", "Partitioned"),
+ INIT_PARAM_DFLT(smtIQThreshold, "SMT IQ Threshold", 100),
+ INIT_PARAM_DFLT(smtROBPolicy, "SMT ROB Sharing Policy", "Partitioned"),
+ INIT_PARAM_DFLT(smtROBThreshold,"SMT ROB Threshold", 100),
+ INIT_PARAM_DFLT(smtCommitPolicy,"SMT Commit Fetch Policy", "RoundRobin"),
+
+ INIT_PARAM(instShiftAmt, "Number of bits to shift instructions by"),
+ INIT_PARAM(defer_registration, "defer system registration (for sampling)"),
+
+ INIT_PARAM(function_trace, "Enable function trace"),
+ INIT_PARAM(function_trace_start, "Cycle to start function trace")
+
+END_INIT_SIM_OBJECT_PARAMS(SimpleOzoneCPU)
+
+CREATE_SIM_OBJECT(SimpleOzoneCPU)
+{
+ SimpleOzoneCPU *cpu;
+
+#if FULL_SYSTEM
+ // Full-system only supports a single thread for the moment.
+ int actual_num_threads = 1;
+#else
+ // In non-full-system mode, we infer the number of threads from
+ // the workload if it's not explicitly specified.
+ int actual_num_threads =
+ numThreads.isValid() ? numThreads : workload.size();
+
+ if (workload.size() == 0) {
+ fatal("Must specify at least one workload!");
+ }
+
+#endif
+
+ SimpleParams *params = new SimpleParams;
+
+ params->clock = clock;
+
+ params->name = getInstanceName();
+ params->numberOfThreads = actual_num_threads;
+
+#if FULL_SYSTEM
+ params->system = system;
+ params->cpu_id = cpu_id;
+ params->itb = itb;
+ params->dtb = dtb;
+#else
+ params->workload = workload;
+// params->pTable = page_table;
+#endif // FULL_SYSTEM
+
+ params->mem = mem;
+ params->checker = checker;
+ params->max_insts_any_thread = max_insts_any_thread;
+ params->max_insts_all_threads = max_insts_all_threads;
+ params->max_loads_any_thread = max_loads_any_thread;
+ params->max_loads_all_threads = max_loads_all_threads;
+
+ //
+ // Caches
+ //
+ params->icacheInterface = icache ? icache->getInterface() : NULL;
+ params->dcacheInterface = dcache ? dcache->getInterface() : NULL;
+ params->cachePorts = cachePorts;
+
+ params->width = width;
+ params->frontEndWidth = frontEndWidth;
+ params->backEndWidth = backEndWidth;
+ params->backEndSquashLatency = backEndSquashLatency;
+ params->backEndLatency = backEndLatency;
+ params->maxInstBufferSize = maxInstBufferSize;
+ params->numPhysicalRegs = numPhysIntRegs + numPhysFloatRegs;
+
+ params->decodeToFetchDelay = decodeToFetchDelay;
+ params->renameToFetchDelay = renameToFetchDelay;
+ params->iewToFetchDelay = iewToFetchDelay;
+ params->commitToFetchDelay = commitToFetchDelay;
+ params->fetchWidth = fetchWidth;
+
+ params->renameToDecodeDelay = renameToDecodeDelay;
+ params->iewToDecodeDelay = iewToDecodeDelay;
+ params->commitToDecodeDelay = commitToDecodeDelay;
+ params->fetchToDecodeDelay = fetchToDecodeDelay;
+ params->decodeWidth = decodeWidth;
+
+ params->iewToRenameDelay = iewToRenameDelay;
+ params->commitToRenameDelay = commitToRenameDelay;
+ params->decodeToRenameDelay = decodeToRenameDelay;
+ params->renameWidth = renameWidth;
+
+ params->commitToIEWDelay = commitToIEWDelay;
+ params->renameToIEWDelay = renameToIEWDelay;
+ params->issueToExecuteDelay = issueToExecuteDelay;
+ params->issueWidth = issueWidth;
+ params->executeWidth = executeWidth;
+ params->executeIntWidth = executeIntWidth;
+ params->executeFloatWidth = executeFloatWidth;
+ params->executeBranchWidth = executeBranchWidth;
+ params->executeMemoryWidth = executeMemoryWidth;
+
+ params->iewToCommitDelay = iewToCommitDelay;
+ params->renameToROBDelay = renameToROBDelay;
+ params->commitWidth = commitWidth;
+ params->squashWidth = squashWidth;
+
+
+ params->localPredictorSize = localPredictorSize;
+ params->localCtrBits = localCtrBits;
+ params->localHistoryTableSize = localHistoryTableSize;
+ params->localHistoryBits = localHistoryBits;
+ params->globalPredictorSize = globalPredictorSize;
+ params->globalCtrBits = globalCtrBits;
+ params->globalHistoryBits = globalHistoryBits;
+ params->choicePredictorSize = choicePredictorSize;
+ params->choiceCtrBits = choiceCtrBits;
+
+ params->BTBEntries = BTBEntries;
+ params->BTBTagSize = BTBTagSize;
+
+ params->RASSize = RASSize;
+
+ params->LQEntries = LQEntries;
+ params->SQEntries = SQEntries;
+
+ params->SSITSize = SSITSize;
+ params->LFSTSize = LFSTSize;
+
+ params->numPhysIntRegs = numPhysIntRegs;
+ params->numPhysFloatRegs = numPhysFloatRegs;
+ params->numIQEntries = numIQEntries;
+ params->numROBEntries = numROBEntries;
+
+ params->decoupledFrontEnd = decoupledFrontEnd;
+ params->dispatchWidth = dispatchWidth;
+ params->wbWidth = wbWidth;
+
+ params->smtNumFetchingThreads = smtNumFetchingThreads;
+ params->smtFetchPolicy = smtFetchPolicy;
+ params->smtIQPolicy = smtIQPolicy;
+ params->smtLSQPolicy = smtLSQPolicy;
+ params->smtLSQThreshold = smtLSQThreshold;
+ params->smtROBPolicy = smtROBPolicy;
+ params->smtROBThreshold = smtROBThreshold;
+ params->smtCommitPolicy = smtCommitPolicy;
+
+ params->instShiftAmt = 2;
+
+ params->deferRegistration = defer_registration;
+
+ params->functionTrace = function_trace;
+ params->functionTraceStart = function_trace_start;
+
+ cpu = new SimpleOzoneCPU(params);
+
+ return cpu;
+}
+
+REGISTER_SIM_OBJECT("SimpleOzoneCPU", SimpleOzoneCPU)
+
template <class Impl>
float
-OzoneCPU<Impl>::OzoneXC::readFloatRegSingle(int reg_idx)
+OzoneCPU<Impl>::OzoneXC::readFloatReg(int reg_idx, int width)
{
int idx = reg_idx + TheISA::FP_Base_DepTag;
- return thread->renameTable[idx]->readFloatResult();
+ switch(width) {
+ case 32:
+ return thread->renameTable[idx]->readFloatResult();
+ case 64:
+ return thread->renameTable[idx]->readDoubleResult();
+ default:
+ panic("Unsupported width!");
+ return 0;
+ }
}
template <class Impl>
double
-OzoneCPU<Impl>::OzoneXC::readFloatRegDouble(int reg_idx)
+OzoneCPU<Impl>::OzoneXC::readFloatReg(int reg_idx)
{
int idx = reg_idx + TheISA::FP_Base_DepTag;
- return thread->renameTable[idx]->readDoubleResult();
+ return thread->renameTable[idx]->readFloatResult();
}
template <class Impl>
uint64_t
-OzoneCPU<Impl>::OzoneXC::readFloatRegInt(int reg_idx)
+OzoneCPU<Impl>::OzoneXC::readFloatRegBits(int reg_idx, int width)
+{
+ int idx = reg_idx + TheISA::FP_Base_DepTag;
+ return thread->renameTable[idx]->readIntResult();
+}
+
+template <class Impl>
+uint64_t
+OzoneCPU<Impl>::OzoneXC::readFloatRegBits(int reg_idx)
{
int idx = reg_idx + TheISA::FP_Base_DepTag;
return thread->renameTable[idx]->readIntResult();
template <class Impl>
void
-OzoneCPU<Impl>::OzoneXC::setFloatRegSingle(int reg_idx, float val)
+OzoneCPU<Impl>::OzoneXC::setFloatReg(int reg_idx, FloatReg val, int width)
{
- panic("Unimplemented!");
+ int idx = reg_idx + TheISA::FP_Base_DepTag;
+ switch(width) {
+ case 32:
+ panic("Unimplemented!");
+ break;
+ case 64:
+ thread->renameTable[idx]->setDoubleResult(val);
+ break;
+ default:
+ panic("Unsupported width!");
+ }
+
+ if (!thread->inSyscall) {
+ cpu->squashFromXC();
+ }
}
template <class Impl>
void
-OzoneCPU<Impl>::OzoneXC::setFloatRegDouble(int reg_idx, double val)
+OzoneCPU<Impl>::OzoneXC::setFloatReg(int reg_idx, FloatReg val)
{
int idx = reg_idx + TheISA::FP_Base_DepTag;
template <class Impl>
void
-OzoneCPU<Impl>::OzoneXC::setFloatRegInt(int reg_idx, uint64_t val)
+OzoneCPU<Impl>::OzoneXC::setFloatRegBits(int reg_idx, FloatRegBits val,
+ int width)
+{
+ panic("Unimplemented!");
+}
+
+template <class Impl>
+void
+OzoneCPU<Impl>::OzoneXC::setFloatRegBits(int reg_idx, FloatRegBits val)
{
panic("Unimplemented!");
}
--- /dev/null
+/*
+ * Copyright (c) 2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "cpu/ozone/dyn_inst_impl.hh"
+#include "cpu/ozone/ozone_impl.hh"
+//#include "cpu/ozone/simple_impl.hh"
+
+template class OzoneDynInst<OzoneImpl>;
+//template class OzoneDynInst<SimpleImpl>;
+
--- /dev/null
+/*
+ * Copyright (c) 2005-2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __CPU_OZONE_DYN_INST_HH__
+#define __CPU_OZONE_DYN_INST_HH__
+
+#include "arch/isa_traits.hh"
+#include "config/full_system.hh"
+#include "cpu/base_dyn_inst.hh"
+#include "cpu/ozone/cpu.hh" // MUST include this
+#include "cpu/inst_seq.hh"
+//#include "cpu/ozone/simple_impl.hh" // Would be nice to not have to include this
+#include "cpu/ozone/ozone_impl.hh"
+
+#include <list>
+#include <vector>
+
+template <class Impl>
+class OzoneDynInst : public BaseDynInst<Impl>
+{
+ public:
+ // Typedefs
+ typedef typename Impl::FullCPU FullCPU;
+
+ typedef typename FullCPU::ImplState ImplState;
+
+ // Typedef for DynInstPtr. This is really just a RefCountingPtr<OoODynInst>.
+ typedef typename Impl::DynInstPtr DynInstPtr;
+
+ typedef TheISA::ExtMachInst ExtMachInst;
+ typedef TheISA::MachInst MachInst;
+ typedef TheISA::MiscReg MiscReg;
+ typedef typename std::list<DynInstPtr>::iterator ListIt;
+
+ // Note that this is duplicated from the BaseDynInst class; I'm
+ // simply not sure the enum would carry through so I could use it
+ // in array declarations in this class.
+ enum {
+ MaxInstSrcRegs = TheISA::MaxInstSrcRegs,
+ MaxInstDestRegs = TheISA::MaxInstDestRegs
+ };
+
+ OzoneDynInst(FullCPU *cpu);
+
+ OzoneDynInst(ExtMachInst inst, Addr PC, Addr Pred_PC,
+ InstSeqNum seq_num, FullCPU *cpu);
+
+ OzoneDynInst(StaticInstPtr inst);
+
+ ~OzoneDynInst();
+
+ void setSrcInst(DynInstPtr &newSrcInst, int regIdx)
+ { srcInsts[regIdx] = newSrcInst; }
+
+ bool srcInstReady(int regIdx);
+
+ void setPrevDestInst(DynInstPtr &oldDestInst, int regIdx)
+ { prevDestInst[regIdx] = oldDestInst; }
+
+ DynInstPtr &getPrevDestInst(int regIdx)
+ { return prevDestInst[regIdx]; }
+
+ void addDependent(DynInstPtr &dependent_inst);
+
+ std::vector<DynInstPtr> &getDependents() { return dependents; }
+ std::vector<DynInstPtr> &getMemDeps() { return memDependents; }
+ std::list<DynInstPtr> &getMemSrcs() { return srcMemInsts; }
+
+ void wakeDependents();
+
+ void wakeMemDependents();
+
+ void addMemDependent(DynInstPtr &inst) { memDependents.push_back(inst); }
+
+ void addSrcMemInst(DynInstPtr &inst) { srcMemInsts.push_back(inst); }
+
+ void markMemInstReady(OzoneDynInst<Impl> *inst);
+
+ // For now I will remove instructions from the list when they wake
+ // up. In the future, you only really need a counter.
+ bool memDepReady() { return srcMemInsts.empty(); }
+
+ private:
+ void initInstPtrs();
+
+ std::vector<DynInstPtr> dependents;
+
+ std::vector<DynInstPtr> memDependents;
+
+ std::list<DynInstPtr> srcMemInsts;
+
+ /** The instruction that produces the value of the source
+ * registers. These may be NULL if the value has already been
+ * read from the source instruction.
+ */
+ DynInstPtr srcInsts[MaxInstSrcRegs];
+
+ /**
+ * Previous rename instruction for this destination.
+ */
+ DynInstPtr prevDestInst[MaxInstSrcRegs];
+
+ public:
+
+ Fault initiateAcc();
+
+ Fault completeAcc();
+
+ // The register accessor methods provide the index of the
+ // instruction's operand (e.g., 0 or 1), not the architectural
+ // register index, to simplify the implementation of register
+ // renaming. We find the architectural register index by indexing
+ // into the instruction's own operand index table. Note that a
+ // raw pointer to the StaticInst is provided instead of a
+ // ref-counted StaticInstPtr to redice overhead. This is fine as
+ // long as these methods don't copy the pointer into any long-term
+ // storage (which is pretty hard to imagine they would have reason
+ // to do).
+
+ uint64_t readIntReg(const StaticInst *si, int idx)
+ {
+ return srcInsts[idx]->readIntResult();
+ }
+
+ float readFloatRegSingle(const StaticInst *si, int idx)
+ {
+ return srcInsts[idx]->readFloatResult();
+ }
+
+ double readFloatRegDouble(const StaticInst *si, int idx)
+ {
+ return srcInsts[idx]->readDoubleResult();
+ }
+
+ uint64_t readFloatRegInt(const StaticInst *si, int idx)
+ {
+ return srcInsts[idx]->readIntResult();
+ }
+
+ /** @todo: Make results into arrays so they can handle multiple dest
+ * registers.
+ */
+ void setIntReg(const StaticInst *si, int idx, uint64_t val)
+ {
+ BaseDynInst<Impl>::setIntReg(si, idx, val);
+ }
+
+ void setFloatRegSingle(const StaticInst *si, int idx, float val)
+ {
+ BaseDynInst<Impl>::setFloatRegSingle(si, idx, val);
+ }
+
+ void setFloatRegDouble(const StaticInst *si, int idx, double val)
+ {
+ BaseDynInst<Impl>::setFloatRegDouble(si, idx, val);
+ }
+
+ void setFloatRegInt(const StaticInst *si, int idx, uint64_t val)
+ {
+ BaseDynInst<Impl>::setFloatRegInt(si, idx, val);
+ }
+
+ void setIntResult(uint64_t result) { this->instResult.integer = result; }
+ void setDoubleResult(double result) { this->instResult.dbl = result; }
+
+ bool srcsReady();
+ bool eaSrcsReady();
+
+ Fault execute();
+
+ Fault executeEAComp()
+ { return NoFault; }
+
+ Fault executeMemAcc()
+ { return this->staticInst->memAccInst()->execute(this, this->traceData); }
+
+ void clearDependents();
+
+ void clearMemDependents();
+
+ public:
+ // ISA stuff
+ MiscReg readMiscReg(int misc_reg);
+
+ MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault);
+
+ Fault setMiscReg(int misc_reg, const MiscReg &val);
+
+ Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val);
+
+#if FULL_SYSTEM
+ Fault hwrei();
+ int readIntrFlag();
+ void setIntrFlag(int val);
+ bool inPalMode();
+ void trap(Fault fault);
+ bool simPalCheck(int palFunc);
+#else
+ void syscall();
+#endif
+
+ ListIt iqIt;
+ bool iqItValid;
+};
+
+#endif // __CPU_OZONE_DYN_INST_HH__
--- /dev/null
+/*
+ * Copyright (c) 2005-2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "arch/faults.hh"
+#include "arch/isa_traits.hh"
+#include "config/full_system.hh"
+#include "cpu/ozone/dyn_inst.hh"
+#include "kern/kernel_stats.hh"
+
+using namespace TheISA;
+
+template <class Impl>
+OzoneDynInst<Impl>::OzoneDynInst(FullCPU *cpu)
+ : BaseDynInst<Impl>(0, 0, 0, 0, cpu)
+{
+ this->setResultReady();
+
+ initInstPtrs();
+}
+
+template <class Impl>
+OzoneDynInst<Impl>::OzoneDynInst(ExtMachInst inst, Addr PC, Addr Pred_PC,
+ InstSeqNum seq_num, FullCPU *cpu)
+ : BaseDynInst<Impl>(inst, PC, Pred_PC, seq_num, cpu)
+{
+ initInstPtrs();
+}
+
+template <class Impl>
+OzoneDynInst<Impl>::OzoneDynInst(StaticInstPtr _staticInst)
+ : BaseDynInst<Impl>(_staticInst)
+{
+ initInstPtrs();
+}
+
+template <class Impl>
+OzoneDynInst<Impl>::~OzoneDynInst()
+{
+ DPRINTF(BE, "[sn:%lli] destructor called\n", this->seqNum);
+ for (int i = 0; i < this->numSrcRegs(); ++i) {
+ srcInsts[i] = NULL;
+ }
+
+ for (int i = 0; i < this->numDestRegs(); ++i) {
+ prevDestInst[i] = NULL;
+ }
+
+ dependents.clear();
+}
+
+template <class Impl>
+Fault
+OzoneDynInst<Impl>::execute()
+{
+ // @todo: Pretty convoluted way to avoid squashing from happening when using
+ // the XC during an instruction's execution (specifically for instructions
+ // that have sideeffects that use the XC). Fix this.
+ bool in_syscall = this->thread->inSyscall;
+ this->thread->inSyscall = true;
+
+ this->fault = this->staticInst->execute(this, this->traceData);
+
+ this->thread->inSyscall = in_syscall;
+
+ return this->fault;
+}
+
+template <class Impl>
+Fault
+OzoneDynInst<Impl>::initiateAcc()
+{
+ // @todo: Pretty convoluted way to avoid squashing from happening when using
+ // the XC during an instruction's execution (specifically for instructions
+ // that have sideeffects that use the XC). Fix this.
+ bool in_syscall = this->thread->inSyscall;
+ this->thread->inSyscall = true;
+
+ this->fault = this->staticInst->initiateAcc(this, this->traceData);
+
+ this->thread->inSyscall = in_syscall;
+
+ return this->fault;
+}
+
+template <class Impl>
+Fault
+OzoneDynInst<Impl>::completeAcc()
+{
+ if (this->isLoad()) {
+ this->fault = this->staticInst->completeAcc(this->req->data,
+ this,
+ this->traceData);
+ } else if (this->isStore()) {
+ this->fault = this->staticInst->completeAcc((uint8_t*)&this->req->result,
+ this,
+ this->traceData);
+ } else {
+ panic("Unknown type!");
+ }
+
+ return this->fault;
+}
+
+template <class Impl>
+bool
+OzoneDynInst<Impl>::srcInstReady(int regIdx)
+{
+ return srcInsts[regIdx]->isResultReady();
+}
+
+template <class Impl>
+void
+OzoneDynInst<Impl>::addDependent(DynInstPtr &dependent_inst)
+{
+ dependents.push_back(dependent_inst);
+}
+
+template <class Impl>
+void
+OzoneDynInst<Impl>::wakeDependents()
+{
+ for (int i = 0; i < dependents.size(); ++i) {
+ dependents[i]->markSrcRegReady();
+ }
+}
+
+template <class Impl>
+void
+OzoneDynInst<Impl>::wakeMemDependents()
+{
+ for (int i = 0; i < memDependents.size(); ++i) {
+ memDependents[i]->markMemInstReady(this);
+ }
+}
+
+template <class Impl>
+void
+OzoneDynInst<Impl>::markMemInstReady(OzoneDynInst<Impl> *inst)
+{
+ ListIt mem_it = srcMemInsts.begin();
+ while ((*mem_it) != inst && mem_it != srcMemInsts.end()) {
+ mem_it++;
+ }
+ assert(mem_it != srcMemInsts.end());
+
+ srcMemInsts.erase(mem_it);
+}
+
+template <class Impl>
+void
+OzoneDynInst<Impl>::initInstPtrs()
+{
+ for (int i = 0; i < MaxInstSrcRegs; ++i) {
+ srcInsts[i] = NULL;
+ }
+ iqItValid = false;
+}
+
+template <class Impl>
+bool
+OzoneDynInst<Impl>::srcsReady()
+{
+ for (int i = 0; i < this->numSrcRegs(); ++i) {
+ if (!srcInsts[i]->isResultReady())
+ return false;
+ }
+
+ return true;
+}
+
+template <class Impl>
+bool
+OzoneDynInst<Impl>::eaSrcsReady()
+{
+ for (int i = 1; i < this->numSrcRegs(); ++i) {
+ if (!srcInsts[i]->isResultReady())
+ return false;
+ }
+
+ return true;
+}
+
+template <class Impl>
+void
+OzoneDynInst<Impl>::clearDependents()
+{
+ dependents.clear();
+ for (int i = 0; i < this->numSrcRegs(); ++i) {
+ srcInsts[i] = NULL;
+ }
+ for (int i = 0; i < this->numDestRegs(); ++i) {
+ prevDestInst[i] = NULL;
+ }
+}
+
+template <class Impl>
+void
+OzoneDynInst<Impl>::clearMemDependents()
+{
+ memDependents.clear();
+}
+
+template <class Impl>
+MiscReg
+OzoneDynInst<Impl>::readMiscReg(int misc_reg)
+{
+ return this->thread->readMiscReg(misc_reg);
+}
+
+template <class Impl>
+MiscReg
+OzoneDynInst<Impl>::readMiscRegWithEffect(int misc_reg, Fault &fault)
+{
+ return this->thread->readMiscRegWithEffect(misc_reg, fault);
+}
+
+template <class Impl>
+Fault
+OzoneDynInst<Impl>::setMiscReg(int misc_reg, const MiscReg &val)
+{
+ this->setIntResult(val);
+ return this->thread->setMiscReg(misc_reg, val);
+}
+
+template <class Impl>
+Fault
+OzoneDynInst<Impl>::setMiscRegWithEffect(int misc_reg, const MiscReg &val)
+{
+ return this->thread->setMiscRegWithEffect(misc_reg, val);
+}
+
+#if FULL_SYSTEM
+
+template <class Impl>
+Fault
+OzoneDynInst<Impl>::hwrei()
+{
+ if (!this->cpu->inPalMode(this->readPC()))
+ return new AlphaISA::UnimplementedOpcodeFault;
+
+ this->setNextPC(this->thread->readMiscReg(AlphaISA::IPR_EXC_ADDR));
+
+ this->cpu->hwrei();
+
+ // FIXME: XXX check for interrupts? XXX
+ return NoFault;
+}
+
+template <class Impl>
+int
+OzoneDynInst<Impl>::readIntrFlag()
+{
+return this->cpu->readIntrFlag();
+}
+
+template <class Impl>
+void
+OzoneDynInst<Impl>::setIntrFlag(int val)
+{
+ this->cpu->setIntrFlag(val);
+}
+
+template <class Impl>
+bool
+OzoneDynInst<Impl>::inPalMode()
+{
+ return this->cpu->inPalMode();
+}
+
+template <class Impl>
+void
+OzoneDynInst<Impl>::trap(Fault fault)
+{
+ fault->invoke(this->thread->getXCProxy());
+}
+
+template <class Impl>
+bool
+OzoneDynInst<Impl>::simPalCheck(int palFunc)
+{
+ return this->cpu->simPalCheck(palFunc);
+}
+#else
+template <class Impl>
+void
+OzoneDynInst<Impl>::syscall()
+{
+ this->cpu->syscall();
+}
+#endif
--- /dev/null
+
+#include "cpu/ozone/front_end_impl.hh"
+#include "cpu/ozone/ozone_impl.hh"
+#include "cpu/ozone/simple_impl.hh"
+
+template class FrontEnd<OzoneImpl>;
+template class FrontEnd<SimpleImpl>;
--- /dev/null
+/*
+ * Copyright (c) 2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __CPU_OZONE_FRONT_END_HH__
+#define __CPU_OZONE_FRONT_END_HH__
+
+#include <deque>
+
+#include "cpu/inst_seq.hh"
+#include "cpu/o3/bpred_unit.hh"
+#include "cpu/ozone/rename_table.hh"
+#include "mem/request.hh"
+#include "sim/eventq.hh"
+#include "sim/stats.hh"
+
+class ExecContext;
+class MemInterface;
+template <class>
+class OzoneThreadState;
+class PageTable;
+template <class>
+class TimeBuffer;
+
+template <class Impl>
+class FrontEnd
+{
+ public:
+ typedef typename Impl::Params Params;
+ typedef typename Impl::DynInst DynInst;
+ typedef typename Impl::DynInstPtr DynInstPtr;
+ typedef typename Impl::FullCPU FullCPU;
+ typedef typename Impl::BackEnd BackEnd;
+
+ typedef typename Impl::FullCPU::OzoneXC OzoneXC;
+ typedef typename Impl::FullCPU::CommStruct CommStruct;
+
+ FrontEnd(Params *params);
+
+ std::string name() const;
+
+ void setCPU(FullCPU *cpu_ptr)
+ { cpu = cpu_ptr; }
+
+ void setBackEnd(BackEnd *back_end_ptr)
+ { backEnd = back_end_ptr; }
+
+ void setCommBuffer(TimeBuffer<CommStruct> *_comm);
+
+ void setXC(ExecContext *xc_ptr);
+
+ void setThreadState(OzoneThreadState<Impl> *thread_ptr)
+ { thread = thread_ptr; }
+
+ void regStats();
+
+ void tick();
+ Fault fetchCacheLine();
+ void processInst(DynInstPtr &inst);
+ void squash(const InstSeqNum &squash_num, const Addr &next_PC,
+ const bool is_branch = false, const bool branch_taken = false);
+ DynInstPtr getInst();
+
+ void processCacheCompletion(Packet *pkt);
+
+ void addFreeRegs(int num_freed);
+
+ bool isEmpty() { return instBuffer.empty(); }
+
+ void switchOut();
+
+ void doSwitchOut();
+
+ void takeOverFrom(ExecContext *old_xc = NULL);
+
+ bool isSwitchedOut() { return switchedOut; }
+
+ bool switchedOut;
+
+ private:
+ bool updateStatus();
+
+ void checkBE();
+ DynInstPtr getInstFromCacheline();
+ void renameInst(DynInstPtr &inst);
+ // Returns true if we need to stop the front end this cycle
+ bool processBarriers(DynInstPtr &inst);
+
+ void handleFault(Fault &fault);
+ public:
+ Fault getFault() { return fetchFault; }
+ private:
+ Fault fetchFault;
+
+ // Align an address (typically a PC) to the start of an I-cache block.
+ // We fold in the PISA 64- to 32-bit conversion here as well.
+ Addr icacheBlockAlignPC(Addr addr)
+ {
+ addr = TheISA::realPCToFetchPC(addr);
+ return (addr & ~(cacheBlkMask));
+ }
+
+ InstSeqNum getAndIncrementInstSeq()
+ { return cpu->globalSeqNum++; }
+
+ public:
+ FullCPU *cpu;
+
+ BackEnd *backEnd;
+
+ ExecContext *xc;
+
+ OzoneThreadState<Impl> *thread;
+
+ enum Status {
+ Running,
+ Idle,
+ IcacheMissStall,
+ IcacheMissComplete,
+ SerializeBlocked,
+ SerializeComplete,
+ RenameBlocked,
+ QuiescePending,
+ TrapPending,
+ BEBlocked
+ };
+
+ Status status;
+
+ private:
+ TimeBuffer<CommStruct> *comm;
+ typename TimeBuffer<CommStruct>::wire fromCommit;
+
+ typedef typename Impl::BranchPred BranchPred;
+
+ BranchPred branchPred;
+
+ class IcachePort : public Port
+ {
+ protected:
+ FrontEnd *fe;
+
+ public:
+ IcachePort(const std::string &_name, FrontEnd *_fe)
+ : Port(_name), fe(_fe)
+ { }
+
+ protected:
+ virtual Tick recvAtomic(PacketPtr pkt);
+
+ virtual void recvFunctional(PacketPtr pkt);
+
+ virtual void recvStatusChange(Status status);
+
+ virtual void getDeviceAddressRanges(AddrRangeList &resp,
+ AddrRangeList &snoop)
+ { resp.clear(); snoop.clear(); }
+
+ virtual bool recvTiming(PacketPtr pkt);
+
+ virtual void recvRetry();
+ };
+
+ IcachePort icachePort;
+
+#if !FULL_SYSTEM
+ PageTable *pTable;
+#endif
+
+ RequestPtr memReq;
+
+ /** Mask to get a cache block's address. */
+ Addr cacheBlkMask;
+
+ unsigned cacheBlkSize;
+
+ Addr cacheBlkPC;
+
+ /** The cache line being fetched. */
+ uint8_t *cacheData;
+
+ bool fetchCacheLineNextCycle;
+
+ bool cacheBlkValid;
+
+ public:
+ RenameTable<Impl> renameTable;
+
+ private:
+ Addr PC;
+ Addr nextPC;
+
+ public:
+ void setPC(Addr val) { PC = val; }
+ void setNextPC(Addr val) { nextPC = val; }
+
+ void wakeFromQuiesce();
+
+ void dumpInsts();
+
+ private:
+ typedef typename std::deque<DynInstPtr> InstBuff;
+ typedef typename InstBuff::iterator InstBuffIt;
+
+ InstBuff instBuffer;
+
+ int instBufferSize;
+
+ int maxInstBufferSize;
+
+ int width;
+
+ int freeRegs;
+
+ int numPhysRegs;
+
+ bool serializeNext;
+
+ DynInstPtr barrierInst;
+
+ public:
+ bool interruptPending;
+ private:
+ // number of idle cycles
+/*
+ Stats::Average<> notIdleFraction;
+ Stats::Formula idleFraction;
+*/
+ // @todo: Consider making these vectors and tracking on a per thread basis.
+ /** Stat for total number of cycles stalled due to an icache miss. */
+ Stats::Scalar<> icacheStallCycles;
+ /** Stat for total number of fetched instructions. */
+ Stats::Scalar<> fetchedInsts;
+ Stats::Scalar<> fetchedBranches;
+ /** Stat for total number of predicted branches. */
+ Stats::Scalar<> predictedBranches;
+ /** Stat for total number of cycles spent fetching. */
+ Stats::Scalar<> fetchCycles;
+
+ Stats::Scalar<> fetchIdleCycles;
+ /** Stat for total number of cycles spent squashing. */
+ Stats::Scalar<> fetchSquashCycles;
+ /** Stat for total number of cycles spent blocked due to other stages in
+ * the pipeline.
+ */
+ Stats::Scalar<> fetchBlockedCycles;
+ /** Stat for total number of fetched cache lines. */
+ Stats::Scalar<> fetchedCacheLines;
+
+ Stats::Scalar<> fetchIcacheSquashes;
+ /** Distribution of number of instructions fetched each cycle. */
+ Stats::Distribution<> fetchNisnDist;
+// Stats::Vector<> qfull_iq_occupancy;
+// Stats::VectorDistribution<> qfull_iq_occ_dist_;
+ Stats::Formula idleRate;
+ Stats::Formula branchRate;
+ Stats::Formula fetchRate;
+ Stats::Scalar<> IFQCount; // cumulative IFQ occupancy
+ Stats::Formula IFQOccupancy;
+ Stats::Formula IFQLatency;
+ Stats::Scalar<> IFQFcount; // cumulative IFQ full count
+ Stats::Formula IFQFullRate;
+
+ Stats::Scalar<> dispatchCountStat;
+ Stats::Scalar<> dispatchedSerializing;
+ Stats::Scalar<> dispatchedTempSerializing;
+ Stats::Scalar<> dispatchSerializeStallCycles;
+ Stats::Formula dispatchRate;
+ Stats::Formula regIntFull;
+ Stats::Formula regFpFull;
+};
+
+#endif // __CPU_OZONE_FRONT_END_HH__
--- /dev/null
+/*
+ * Copyright (c) 2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "arch/faults.hh"
+#include "arch/isa_traits.hh"
+#include "base/statistics.hh"
+#include "cpu/exec_context.hh"
+#include "cpu/exetrace.hh"
+#include "cpu/ozone/front_end.hh"
+#include "mem/mem_interface.hh"
+#include "sim/byte_swap.hh"
+
+using namespace TheISA;
+
+template <class Impl>
+FrontEnd<Impl>::FrontEnd(Params *params)
+ : branchPred(params),
+ icacheInterface(params->icacheInterface),
+ instBufferSize(0),
+ maxInstBufferSize(params->maxInstBufferSize),
+ width(params->frontEndWidth),
+ freeRegs(params->numPhysicalRegs),
+ numPhysRegs(params->numPhysicalRegs),
+ serializeNext(false),
+ interruptPending(false)
+{
+ switchedOut = false;
+
+ status = Idle;
+
+ memReq = NULL;
+ // Size of cache block.
+ cacheBlkSize = icacheInterface ? icacheInterface->getBlockSize() : 64;
+
+ assert(isPowerOf2(cacheBlkSize));
+
+ // Create mask to get rid of offset bits.
+ cacheBlkMask = (cacheBlkSize - 1);
+
+ // Create space to store a cache line.
+ cacheData = new uint8_t[cacheBlkSize];
+
+ fetchCacheLineNextCycle = true;
+
+ cacheBlkValid = false;
+
+#if !FULL_SYSTEM
+// pTable = params->pTable;
+#endif
+ fetchFault = NoFault;
+}
+
+template <class Impl>
+std::string
+FrontEnd<Impl>::name() const
+{
+ return cpu->name() + ".frontend";
+}
+
+template <class Impl>
+void
+FrontEnd<Impl>::setCommBuffer(TimeBuffer<CommStruct> *_comm)
+{
+ comm = _comm;
+ // @todo: Hardcoded for now. Allow this to be set by a latency.
+ fromCommit = comm->getWire(-1);
+}
+
+template <class Impl>
+void
+FrontEnd<Impl>::setXC(ExecContext *xc_ptr)
+{
+ xc = xc_ptr;
+}
+
+template <class Impl>
+void
+FrontEnd<Impl>::regStats()
+{
+ icacheStallCycles
+ .name(name() + ".icacheStallCycles")
+ .desc("Number of cycles fetch is stalled on an Icache miss")
+ .prereq(icacheStallCycles);
+
+ fetchedInsts
+ .name(name() + ".fetchedInsts")
+ .desc("Number of instructions fetch has processed")
+ .prereq(fetchedInsts);
+
+ fetchedBranches
+ .name(name() + ".fetchedBranches")
+ .desc("Number of fetched branches")
+ .prereq(fetchedBranches);
+
+ predictedBranches
+ .name(name() + ".predictedBranches")
+ .desc("Number of branches that fetch has predicted taken")
+ .prereq(predictedBranches);
+
+ fetchCycles
+ .name(name() + ".fetchCycles")
+ .desc("Number of cycles fetch has run and was not squashing or"
+ " blocked")
+ .prereq(fetchCycles);
+
+ fetchIdleCycles
+ .name(name() + ".fetchIdleCycles")
+ .desc("Number of cycles fetch was idle")
+ .prereq(fetchIdleCycles);
+
+ fetchSquashCycles
+ .name(name() + ".fetchSquashCycles")
+ .desc("Number of cycles fetch has spent squashing")
+ .prereq(fetchSquashCycles);
+
+ fetchBlockedCycles
+ .name(name() + ".fetchBlockedCycles")
+ .desc("Number of cycles fetch has spent blocked")
+ .prereq(fetchBlockedCycles);
+
+ fetchedCacheLines
+ .name(name() + ".fetchedCacheLines")
+ .desc("Number of cache lines fetched")
+ .prereq(fetchedCacheLines);
+
+ fetchIcacheSquashes
+ .name(name() + ".fetchIcacheSquashes")
+ .desc("Number of outstanding Icache misses that were squashed")
+ .prereq(fetchIcacheSquashes);
+
+ fetchNisnDist
+ .init(/* base value */ 0,
+ /* last value */ width,
+ /* bucket size */ 1)
+ .name(name() + ".rateDist")
+ .desc("Number of instructions fetched each cycle (Total)")
+ .flags(Stats::pdf);
+
+ idleRate
+ .name(name() + ".idleRate")
+ .desc("Percent of cycles fetch was idle")
+ .prereq(idleRate);
+ idleRate = fetchIdleCycles * 100 / cpu->numCycles;
+
+ branchRate
+ .name(name() + ".branchRate")
+ .desc("Number of branch fetches per cycle")
+ .flags(Stats::total);
+ branchRate = fetchedBranches / cpu->numCycles;
+
+ fetchRate
+ .name(name() + ".rate")
+ .desc("Number of inst fetches per cycle")
+ .flags(Stats::total);
+ fetchRate = fetchedInsts / cpu->numCycles;
+
+ IFQCount
+ .name(name() + ".IFQ:count")
+ .desc("cumulative IFQ occupancy")
+ ;
+
+ IFQFcount
+ .name(name() + ".IFQ:fullCount")
+ .desc("cumulative IFQ full count")
+ .flags(Stats::total)
+ ;
+
+ IFQOccupancy
+ .name(name() + ".IFQ:occupancy")
+ .desc("avg IFQ occupancy (inst's)")
+ ;
+ IFQOccupancy = IFQCount / cpu->numCycles;
+
+ IFQLatency
+ .name(name() + ".IFQ:latency")
+ .desc("avg IFQ occupant latency (cycle's)")
+ .flags(Stats::total)
+ ;
+
+ IFQFullRate
+ .name(name() + ".IFQ:fullRate")
+ .desc("fraction of time (cycles) IFQ was full")
+ .flags(Stats::total);
+ ;
+ IFQFullRate = IFQFcount * Stats::constant(100) / cpu->numCycles;
+
+ dispatchCountStat
+ .name(name() + ".DIS:count")
+ .desc("cumulative count of dispatched insts")
+ .flags(Stats::total)
+ ;
+
+ dispatchedSerializing
+ .name(name() + ".DIS:serializingInsts")
+ .desc("count of serializing insts dispatched")
+ .flags(Stats::total)
+ ;
+
+ dispatchedTempSerializing
+ .name(name() + ".DIS:tempSerializingInsts")
+ .desc("count of temporary serializing insts dispatched")
+ .flags(Stats::total)
+ ;
+
+ dispatchSerializeStallCycles
+ .name(name() + ".DIS:serializeStallCycles")
+ .desc("count of cycles dispatch stalled for serializing inst")
+ .flags(Stats::total)
+ ;
+
+ dispatchRate
+ .name(name() + ".DIS:rate")
+ .desc("dispatched insts per cycle")
+ .flags(Stats::total)
+ ;
+ dispatchRate = dispatchCountStat / cpu->numCycles;
+
+ regIntFull
+ .name(name() + ".REG:int:full")
+ .desc("number of cycles where there were no INT registers")
+ ;
+
+ regFpFull
+ .name(name() + ".REG:fp:full")
+ .desc("number of cycles where there were no FP registers")
+ ;
+ IFQLatency = IFQOccupancy / dispatchRate;
+
+ branchPred.regStats();
+}
+
+template <class Impl>
+void
+FrontEnd<Impl>::tick()
+{
+ if (switchedOut)
+ return;
+
+ // @todo: Maybe I want to just have direct communication...
+ if (fromCommit->doneSeqNum) {
+ branchPred.update(fromCommit->doneSeqNum, 0);
+ }
+
+ IFQCount += instBufferSize;
+ IFQFcount += instBufferSize == maxInstBufferSize;
+
+ // Fetch cache line
+ if (status == IcacheMissComplete) {
+ cacheBlkValid = true;
+
+ status = Running;
+ if (barrierInst)
+ status = SerializeBlocked;
+ if (freeRegs <= 0)
+ status = RenameBlocked;
+ checkBE();
+ } else if (status == IcacheMissStall) {
+ DPRINTF(FE, "Still in Icache miss stall.\n");
+ icacheStallCycles++;
+ return;
+ }
+
+ if (status == RenameBlocked || status == SerializeBlocked ||
+ status == TrapPending || status == BEBlocked) {
+ // Will cause a one cycle bubble between changing state and
+ // restarting.
+ DPRINTF(FE, "In blocked status.\n");
+
+ fetchBlockedCycles++;
+
+ if (status == SerializeBlocked) {
+ dispatchSerializeStallCycles++;
+ }
+ updateStatus();
+ return;
+ } else if (status == QuiescePending) {
+ DPRINTF(FE, "Waiting for quiesce to execute or get squashed.\n");
+ return;
+ } else if (status != IcacheMissComplete) {
+ if (fetchCacheLineNextCycle) {
+ Fault fault = fetchCacheLine();
+ if (fault != NoFault) {
+ handleFault(fault);
+ fetchFault = fault;
+ return;
+ }
+ fetchCacheLineNextCycle = false;
+ }
+ // If miss, stall until it returns.
+ if (status == IcacheMissStall) {
+ // Tell CPU to not tick me for now.
+ return;
+ }
+ }
+
+ fetchCycles++;
+
+ int num_inst = 0;
+
+ // Otherwise loop and process instructions.
+ // One way to hack infinite width is to set width and maxInstBufferSize
+ // both really high. Inelegant, but probably will work.
+ while (num_inst < width &&
+ instBufferSize < maxInstBufferSize) {
+ // Get instruction from cache line.
+ DynInstPtr inst = getInstFromCacheline();
+
+ if (!inst) {
+ // PC is no longer in the cache line, end fetch.
+ // Might want to check this at the end of the cycle so that
+ // there's no cycle lost to checking for a new cache line.
+ DPRINTF(FE, "Need to get new cache line\n");
+ fetchCacheLineNextCycle = true;
+ break;
+ }
+
+ processInst(inst);
+
+ if (status == SerializeBlocked) {
+ break;
+ }
+
+ // Possibly push into a time buffer that estimates the front end
+ // latency
+ instBuffer.push_back(inst);
+ ++instBufferSize;
+ ++num_inst;
+
+#if FULL_SYSTEM
+ if (inst->isQuiesce()) {
+ warn("%lli: Quiesce instruction encountered, halting fetch!", curTick);
+ status = QuiescePending;
+ break;
+ }
+#endif
+
+ if (inst->predTaken()) {
+ // Start over with tick?
+ break;
+ } else if (freeRegs <= 0) {
+ DPRINTF(FE, "Ran out of free registers to rename to!\n");
+ status = RenameBlocked;
+ break;
+ } else if (serializeNext) {
+ break;
+ }
+ }
+
+ fetchNisnDist.sample(num_inst);
+ checkBE();
+
+ DPRINTF(FE, "Num insts processed: %i, Inst Buffer size: %i, Free "
+ "Regs %i\n", num_inst, instBufferSize, freeRegs);
+}
+
+template <class Impl>
+Fault
+FrontEnd<Impl>::fetchCacheLine()
+{
+ // Read a cache line, based on the current PC.
+#if FULL_SYSTEM
+ // Flag to say whether or not address is physical addr.
+ unsigned flags = cpu->inPalMode(PC) ? PHYSICAL : 0;
+#else
+ unsigned flags = 0;
+#endif // FULL_SYSTEM
+ Fault fault = NoFault;
+
+ if (interruptPending && flags == 0) {
+ return fault;
+ }
+
+ // Align the fetch PC so it's at the start of a cache block.
+ Addr fetch_PC = icacheBlockAlignPC(PC);
+
+ DPRINTF(FE, "Fetching cache line starting at %#x.\n", fetch_PC);
+
+ // Setup the memReq to do a read of the first isntruction's address.
+ // Set the appropriate read size and flags as well.
+ memReq = new MemReq();
+
+ memReq->asid = 0;
+ memReq->thread_num = 0;
+ memReq->data = new uint8_t[64];
+ memReq->xc = xc;
+ memReq->cmd = Read;
+ memReq->reset(fetch_PC, cacheBlkSize, flags);
+
+ // Translate the instruction request.
+ fault = cpu->translateInstReq(memReq);
+
+ // Now do the timing access to see whether or not the instruction
+ // exists within the cache.
+ if (icacheInterface && fault == NoFault) {
+#if FULL_SYSTEM
+ if (cpu->system->memctrl->badaddr(memReq->paddr) ||
+ memReq->flags & UNCACHEABLE) {
+ DPRINTF(FE, "Fetch: Bad address %#x (hopefully on a "
+ "misspeculating path!",
+ memReq->paddr);
+ return TheISA::genMachineCheckFault();
+ }
+#endif
+
+ memReq->completionEvent = NULL;
+
+ memReq->time = curTick;
+ fault = cpu->mem->read(memReq, cacheData);
+
+ MemAccessResult res = icacheInterface->access(memReq);
+
+ // If the cache missed then schedule an event to wake
+ // up this stage once the cache miss completes.
+ if (icacheInterface->doEvents() && res != MA_HIT) {
+ memReq->completionEvent = new ICacheCompletionEvent(memReq, this);
+
+ status = IcacheMissStall;
+
+ cacheBlkValid = false;
+
+ DPRINTF(FE, "Cache miss.\n");
+ } else {
+ DPRINTF(FE, "Cache hit.\n");
+
+ cacheBlkValid = true;
+
+// memcpy(cacheData, memReq->data, memReq->size);
+ }
+ }
+
+ // Note that this will set the cache block PC a bit earlier than it should
+ // be set.
+ cacheBlkPC = fetch_PC;
+
+ ++fetchedCacheLines;
+
+ DPRINTF(FE, "Done fetching cache line.\n");
+
+ return fault;
+}
+
+template <class Impl>
+void
+FrontEnd<Impl>::processInst(DynInstPtr &inst)
+{
+ if (processBarriers(inst)) {
+ return;
+ }
+
+ Addr inst_PC = inst->readPC();
+
+ if (!inst->isControl()) {
+ inst->setPredTarg(inst->readNextPC());
+ } else {
+ fetchedBranches++;
+ if (branchPred.predict(inst, inst_PC, inst->threadNumber)) {
+ predictedBranches++;
+ }
+ }
+
+ Addr next_PC = inst->readPredTarg();
+
+ DPRINTF(FE, "[sn:%lli] Predicted and processed inst PC %#x, next PC "
+ "%#x\n", inst->seqNum, inst_PC, next_PC);
+
+// inst->setNextPC(next_PC);
+
+ // Not sure where I should set this
+ PC = next_PC;
+
+ renameInst(inst);
+}
+
+template <class Impl>
+bool
+FrontEnd<Impl>::processBarriers(DynInstPtr &inst)
+{
+ if (serializeNext) {
+ inst->setSerializeBefore();
+ serializeNext = false;
+ } else if (!inst->isSerializing() &&
+ !inst->isIprAccess() &&
+ !inst->isStoreConditional()) {
+ return false;
+ }
+
+ if ((inst->isIprAccess() || inst->isSerializeBefore()) &&
+ !inst->isSerializeHandled()) {
+ DPRINTF(FE, "Serialize before instruction encountered.\n");
+
+ if (!inst->isTempSerializeBefore()) {
+ dispatchedSerializing++;
+ inst->setSerializeHandled();
+ } else {
+ dispatchedTempSerializing++;
+ }
+
+ // Change status over to SerializeBlocked so that other stages know
+ // what this is blocked on.
+ status = SerializeBlocked;
+
+ barrierInst = inst;
+ return true;
+ } else if ((inst->isStoreConditional() || inst->isSerializeAfter())
+ && !inst->isSerializeHandled()) {
+ DPRINTF(FE, "Serialize after instruction encountered.\n");
+
+ inst->setSerializeHandled();
+
+ dispatchedSerializing++;
+
+ serializeNext = true;
+ return false;
+ }
+ return false;
+}
+
+template <class Impl>
+void
+FrontEnd<Impl>::handleFault(Fault &fault)
+{
+ DPRINTF(FE, "Fault at fetch, telling commit\n");
+
+ // We're blocked on the back end until it handles this fault.
+ status = TrapPending;
+
+ // Get a sequence number.
+ InstSeqNum inst_seq = getAndIncrementInstSeq();
+ // We will use a nop in order to carry the fault.
+ ExtMachInst ext_inst = TheISA::NoopMachInst;
+
+ // Create a new DynInst from the dummy nop.
+ DynInstPtr instruction = new DynInst(ext_inst, PC,
+ PC+sizeof(MachInst),
+ inst_seq, cpu);
+ instruction->setPredTarg(instruction->readNextPC());
+// instruction->setThread(tid);
+
+// instruction->setASID(tid);
+
+ instruction->setState(thread);
+
+ instruction->traceData = NULL;
+
+ instruction->fault = fault;
+ instruction->setCanIssue();
+ instBuffer.push_back(instruction);
+ ++instBufferSize;
+}
+
+template <class Impl>
+void
+FrontEnd<Impl>::squash(const InstSeqNum &squash_num, const Addr &next_PC,
+ const bool is_branch, const bool branch_taken)
+{
+ DPRINTF(FE, "Squashing from [sn:%lli], setting PC to %#x\n",
+ squash_num, next_PC);
+
+ if (fetchFault != NoFault)
+ fetchFault = NoFault;
+
+ while (!instBuffer.empty() &&
+ instBuffer.back()->seqNum > squash_num) {
+ DynInstPtr inst = instBuffer.back();
+
+ DPRINTF(FE, "Squashing instruction [sn:%lli] PC %#x\n",
+ inst->seqNum, inst->readPC());
+
+ inst->clearDependents();
+
+ instBuffer.pop_back();
+ --instBufferSize;
+
+ freeRegs+= inst->numDestRegs();
+ }
+
+ // Copy over rename table from the back end.
+ renameTable.copyFrom(backEnd->renameTable);
+
+ PC = next_PC;
+
+ // Update BP with proper information.
+ if (is_branch) {
+ branchPred.squash(squash_num, next_PC, branch_taken, 0);
+ } else {
+ branchPred.squash(squash_num, 0);
+ }
+
+ // Clear the icache miss if it's outstanding.
+ if (status == IcacheMissStall && icacheInterface) {
+ DPRINTF(FE, "Squashing outstanding Icache miss.\n");
+ memReq = NULL;
+ }
+
+ if (status == SerializeBlocked) {
+ assert(barrierInst->seqNum > squash_num);
+ barrierInst = NULL;
+ }
+
+ // Unless this squash originated from the front end, we're probably
+ // in running mode now.
+ // Actually might want to make this latency dependent.
+ status = Running;
+ fetchCacheLineNextCycle = true;
+}
+
+template <class Impl>
+typename Impl::DynInstPtr
+FrontEnd<Impl>::getInst()
+{
+ if (instBufferSize == 0) {
+ return NULL;
+ }
+
+ DynInstPtr inst = instBuffer.front();
+
+ instBuffer.pop_front();
+
+ --instBufferSize;
+
+ dispatchCountStat++;
+
+ return inst;
+}
+
+template <class Impl>
+void
+FrontEnd<Impl>::processCacheCompletion(MemReqPtr &req)
+{
+ DPRINTF(FE, "Processing cache completion\n");
+
+ // Do something here.
+ if (status != IcacheMissStall ||
+ req != memReq ||
+ switchedOut) {
+ DPRINTF(FE, "Previous fetch was squashed.\n");
+ fetchIcacheSquashes++;
+ return;
+ }
+
+ status = IcacheMissComplete;
+
+/* if (checkStall(tid)) {
+ fetchStatus[tid] = Blocked;
+ } else {
+ fetchStatus[tid] = IcacheMissComplete;
+ }
+*/
+// memcpy(cacheData, memReq->data, memReq->size);
+
+ // Reset the completion event to NULL.
+// memReq->completionEvent = NULL;
+ memReq = NULL;
+}
+
+template <class Impl>
+void
+FrontEnd<Impl>::addFreeRegs(int num_freed)
+{
+ if (status == RenameBlocked && freeRegs + num_freed > 0) {
+ status = Running;
+ }
+
+ DPRINTF(FE, "Adding %i freed registers\n", num_freed);
+
+ freeRegs+= num_freed;
+
+// assert(freeRegs <= numPhysRegs);
+ if (freeRegs > numPhysRegs)
+ freeRegs = numPhysRegs;
+}
+
+template <class Impl>
+bool
+FrontEnd<Impl>::updateStatus()
+{
+ bool serialize_block = !backEnd->robEmpty() || instBufferSize;
+ bool be_block = cpu->decoupledFrontEnd ? false : backEnd->isBlocked();
+ bool ret_val = false;
+
+ if (status == SerializeBlocked && !serialize_block) {
+ status = SerializeComplete;
+ ret_val = true;
+ }
+
+ if (status == BEBlocked && !be_block) {
+ if (barrierInst) {
+ status = SerializeBlocked;
+ } else {
+ status = Running;
+ }
+ ret_val = true;
+ }
+ return ret_val;
+}
+
+template <class Impl>
+void
+FrontEnd<Impl>::checkBE()
+{
+ bool be_block = cpu->decoupledFrontEnd ? false : backEnd->isBlocked();
+ if (be_block) {
+ if (status == Running || status == Idle) {
+ status = BEBlocked;
+ }
+ }
+}
+
+template <class Impl>
+typename Impl::DynInstPtr
+FrontEnd<Impl>::getInstFromCacheline()
+{
+ if (status == SerializeComplete) {
+ DynInstPtr inst = barrierInst;
+ status = Running;
+ barrierInst = NULL;
+ inst->clearSerializeBefore();
+ return inst;
+ }
+
+ InstSeqNum inst_seq;
+ MachInst inst;
+ // @todo: Fix this magic number used here to handle word offset (and
+ // getting rid of PAL bit)
+ unsigned offset = (PC & cacheBlkMask) & ~3;
+
+ // PC of inst is not in this cache block
+ if (PC >= (cacheBlkPC + cacheBlkSize) || PC < cacheBlkPC || !cacheBlkValid) {
+ return NULL;
+ }
+
+ //////////////////////////
+ // Fetch one instruction
+ //////////////////////////
+
+ // Get a sequence number.
+ inst_seq = getAndIncrementInstSeq();
+
+ // Make sure this is a valid index.
+ assert(offset <= cacheBlkSize - sizeof(MachInst));
+
+ // Get the instruction from the array of the cache line.
+ inst = htog(*reinterpret_cast<MachInst *>(&cacheData[offset]));
+
+ ExtMachInst decode_inst = TheISA::makeExtMI(inst, PC);
+
+ // Create a new DynInst from the instruction fetched.
+ DynInstPtr instruction = new DynInst(decode_inst, PC, PC+sizeof(MachInst),
+ inst_seq, cpu);
+
+ instruction->setState(thread);
+
+ DPRINTF(FE, "Instruction [sn:%lli] created, with PC %#x\n%s\n",
+ inst_seq, instruction->readPC(),
+ instruction->staticInst->disassemble(PC));
+
+ instruction->traceData =
+ Trace::getInstRecord(curTick, xc, cpu,
+ instruction->staticInst,
+ instruction->readPC(), 0);
+
+ // Increment stat of fetched instructions.
+ ++fetchedInsts;
+
+ return instruction;
+}
+
+template <class Impl>
+void
+FrontEnd<Impl>::renameInst(DynInstPtr &inst)
+{
+ DynInstPtr src_inst = NULL;
+ int num_src_regs = inst->numSrcRegs();
+ if (num_src_regs == 0) {
+ inst->setCanIssue();
+ } else {
+ for (int i = 0; i < num_src_regs; ++i) {
+ src_inst = renameTable[inst->srcRegIdx(i)];
+
+ inst->setSrcInst(src_inst, i);
+
+ DPRINTF(FE, "[sn:%lli]: Src reg %i is inst [sn:%lli]\n",
+ inst->seqNum, (int)inst->srcRegIdx(i), src_inst->seqNum);
+
+ if (src_inst->isResultReady()) {
+ DPRINTF(FE, "Reg ready.\n");
+ inst->markSrcRegReady(i);
+ } else {
+ DPRINTF(FE, "Adding to dependent list.\n");
+ src_inst->addDependent(inst);
+ }
+ }
+ }
+
+ for (int i = 0; i < inst->numDestRegs(); ++i) {
+ RegIndex idx = inst->destRegIdx(i);
+
+ DPRINTF(FE, "Dest reg %i is now inst [sn:%lli], was previously "
+ "[sn:%lli]\n",
+ (int)inst->destRegIdx(i), inst->seqNum,
+ renameTable[idx]->seqNum);
+
+ inst->setPrevDestInst(renameTable[idx], i);
+
+ renameTable[idx] = inst;
+ --freeRegs;
+ }
+}
+
+template <class Impl>
+void
+FrontEnd<Impl>::wakeFromQuiesce()
+{
+ DPRINTF(FE, "Waking up from quiesce\n");
+ // Hopefully this is safe
+ status = Running;
+}
+
+template <class Impl>
+void
+FrontEnd<Impl>::switchOut()
+{
+ switchedOut = true;
+ cpu->signalSwitched();
+}
+
+template <class Impl>
+void
+FrontEnd<Impl>::doSwitchOut()
+{
+ memReq = NULL;
+ squash(0, 0);
+ instBuffer.clear();
+ instBufferSize = 0;
+ status = Idle;
+}
+
+template <class Impl>
+void
+FrontEnd<Impl>::takeOverFrom(ExecContext *old_xc)
+{
+ assert(freeRegs == numPhysRegs);
+ fetchCacheLineNextCycle = true;
+
+ cacheBlkValid = false;
+
+#if !FULL_SYSTEM
+// pTable = params->pTable;
+#endif
+ fetchFault = NoFault;
+ serializeNext = false;
+ barrierInst = NULL;
+ status = Running;
+ switchedOut = false;
+ interruptPending = false;
+}
+
+template <class Impl>
+void
+FrontEnd<Impl>::dumpInsts()
+{
+ cprintf("instBuffer size: %i\n", instBuffer.size());
+
+ InstBuffIt buff_it = instBuffer.begin();
+
+ for (int num = 0; buff_it != instBuffer.end(); num++) {
+ cprintf("Instruction:%i\nPC:%#x\n[tid:%i]\n[sn:%lli]\nIssued:%i\n"
+ "Squashed:%i\n\n",
+ num, (*buff_it)->readPC(), (*buff_it)->threadNumber,
+ (*buff_it)->seqNum, (*buff_it)->isIssued(),
+ (*buff_it)->isSquashed());
+ buff_it++;
+ }
+}
+
+template <class Impl>
+FrontEnd<Impl>::ICacheCompletionEvent::ICacheCompletionEvent(MemReqPtr &_req, FrontEnd *fe)
+ : Event(&mainEventQueue, Delayed_Writeback_Pri), req(_req), frontEnd(fe)
+{
+ this->setFlags(Event::AutoDelete);
+}
+
+template <class Impl>
+void
+FrontEnd<Impl>::ICacheCompletionEvent::process()
+{
+ frontEnd->processCacheCompletion(req);
+}
+
+template <class Impl>
+const char *
+FrontEnd<Impl>::ICacheCompletionEvent::description()
+{
+ return "ICache completion event";
+}
--- /dev/null
+
+#include "cpu/ozone/inorder_back_end_impl.hh"
+#include "cpu/ozone/simple_impl.hh"
+
+template class InorderBackEnd<SimpleImpl>;
--- /dev/null
+
+#ifndef __CPU_OZONE_INORDER_BACK_END_HH__
+#define __CPU_OZONE_INORDER_BACK_END_HH__
+
+#include <list>
+
+#include "arch/faults.hh"
+#include "base/timebuf.hh"
+#include "cpu/exec_context.hh"
+#include "cpu/inst_seq.hh"
+#include "cpu/ozone/rename_table.hh"
+#include "cpu/ozone/thread_state.hh"
+#include "mem/request.hh"
+#include "sim/eventq.hh"
+
+template <class Impl>
+class InorderBackEnd
+{
+ public:
+ typedef typename Impl::Params Params;
+ typedef typename Impl::DynInstPtr DynInstPtr;
+ typedef typename Impl::FullCPU FullCPU;
+ typedef typename Impl::FrontEnd FrontEnd;
+
+ typedef typename FullCPU::OzoneXC OzoneXC;
+ typedef typename Impl::FullCPU::CommStruct CommStruct;
+
+ InorderBackEnd(Params *params);
+
+ std::string name() const;
+
+ void setCPU(FullCPU *cpu_ptr)
+ { cpu = cpu_ptr; }
+
+ void setFrontEnd(FrontEnd *front_end_ptr)
+ { frontEnd = front_end_ptr; }
+
+ void setCommBuffer(TimeBuffer<CommStruct> *_comm)
+ { comm = _comm; }
+
+ void setXC(ExecContext *xc_ptr);
+
+ void setThreadState(OzoneThreadState<Impl> *thread_ptr);
+
+ void regStats() { }
+
+#if FULL_SYSTEM
+ void checkInterrupts();
+#endif
+
+ void tick();
+ void executeInsts();
+ void squash(const InstSeqNum &squash_num, const Addr &next_PC);
+
+ void squashFromXC();
+ void generateXCEvent() { }
+
+ bool robEmpty() { return instList.empty(); }
+
+ bool isFull() { return false; }
+ bool isBlocked() { return status == DcacheMissStoreStall ||
+ status == DcacheMissLoadStall ||
+ interruptBlocked; }
+
+ void fetchFault(Fault &fault);
+
+ void dumpInsts();
+
+ private:
+ void handleFault();
+
+ void setSquashInfoFromXC();
+
+ bool squashPending;
+ InstSeqNum squashSeqNum;
+ Addr squashNextPC;
+
+ Fault faultFromFetch;
+
+ bool interruptBlocked;
+
+ public:
+ template <class T>
+ Fault read(Addr addr, T &data, unsigned flags);
+
+ template <class T>
+ Fault read(RequestPtr req, T &data, int load_idx);
+
+ template <class T>
+ Fault write(T data, Addr addr, unsigned flags, uint64_t *res);
+
+ template <class T>
+ Fault write(RequestPtr req, T &data, int store_idx);
+
+ Addr readCommitPC() { return commitPC; }
+
+ Addr commitPC;
+
+ void switchOut() { panic("Not implemented!"); }
+ void doSwitchOut() { panic("Not implemented!"); }
+ void takeOverFrom(ExecContext *old_xc = NULL) { panic("Not implemented!"); }
+
+ public:
+ FullCPU *cpu;
+
+ FrontEnd *frontEnd;
+
+ ExecContext *xc;
+
+ OzoneThreadState<Impl> *thread;
+
+ RenameTable<Impl> renameTable;
+
+ protected:
+ enum Status {
+ Running,
+ Idle,
+ DcacheMissLoadStall,
+ DcacheMissStoreStall,
+ DcacheMissComplete,
+ Blocked
+ };
+
+ Status status;
+
+ class DCacheCompletionEvent : public Event
+ {
+ private:
+ InorderBackEnd *be;
+
+ public:
+ DCacheCompletionEvent(InorderBackEnd *_be);
+
+ virtual void process();
+ virtual const char *description();
+
+ DynInstPtr inst;
+ };
+
+ friend class DCacheCompletionEvent;
+
+ DCacheCompletionEvent cacheCompletionEvent;
+
+// MemInterface *dcacheInterface;
+
+ RequestPtr memReq;
+
+ private:
+ typedef typename std::list<DynInstPtr>::iterator InstListIt;
+
+ std::list<DynInstPtr> instList;
+
+ // General back end width. Used if the more specific isn't given.
+ int width;
+
+ int latency;
+
+ int squashLatency;
+
+ TimeBuffer<int> numInstsToWB;
+ TimeBuffer<int>::wire instsAdded;
+ TimeBuffer<int>::wire instsToExecute;
+
+ TimeBuffer<CommStruct> *comm;
+ // number of cycles stalled for D-cache misses
+ Stats::Scalar<> dcacheStallCycles;
+ Counter lastDcacheStall;
+};
+
+template <class Impl>
+template <class T>
+Fault
+InorderBackEnd<Impl>::read(Addr addr, T &data, unsigned flags)
+{
+ memReq->reset(addr, sizeof(T), flags);
+
+ // translate to physical address
+ Fault fault = cpu->translateDataReadReq(memReq);
+
+ // if we have a cache, do cache access too
+ if (fault == NoFault && dcacheInterface) {
+ memReq->cmd = Read;
+ memReq->completionEvent = NULL;
+ memReq->time = curTick;
+ memReq->flags &= ~INST_READ;
+ MemAccessResult result = dcacheInterface->access(memReq);
+
+ // Ugly hack to get an event scheduled *only* if the access is
+ // a miss. We really should add first-class support for this
+ // at some point.
+ if (result != MA_HIT) {
+ // Fix this hack for keeping funcExeInst correct with loads that
+ // are executed twice.
+ memReq->completionEvent = &cacheCompletionEvent;
+ lastDcacheStall = curTick;
+// unscheduleTickEvent();
+ status = DcacheMissLoadStall;
+ DPRINTF(IBE, "Dcache miss stall!\n");
+ } else {
+ // do functional access
+ DPRINTF(IBE, "Dcache hit!\n");
+ }
+ }
+/*
+ if (!dcacheInterface && (memReq->flags & UNCACHEABLE))
+ recordEvent("Uncached Read");
+*/
+ return fault;
+}
+#if 0
+template <class Impl>
+template <class T>
+Fault
+InorderBackEnd<Impl>::read(MemReqPtr &req, T &data)
+{
+#if FULL_SYSTEM && defined(TARGET_ALPHA)
+ if (req->flags & LOCKED) {
+ req->xc->setMiscReg(TheISA::Lock_Addr_DepTag, req->paddr);
+ req->xc->setMiscReg(TheISA::Lock_Flag_DepTag, true);
+ }
+#endif
+
+ Fault error;
+ error = thread->mem->read(req, data);
+ data = LittleEndianGuest::gtoh(data);
+ return error;
+}
+#endif
+
+template <class Impl>
+template <class T>
+Fault
+InorderBackEnd<Impl>::write(T data, Addr addr, unsigned flags, uint64_t *res)
+{
+ memReq->reset(addr, sizeof(T), flags);
+
+ // translate to physical address
+ Fault fault = cpu->translateDataWriteReq(memReq);
+
+ if (fault == NoFault && dcacheInterface) {
+ memReq->cmd = Write;
+// memcpy(memReq->data,(uint8_t *)&data,memReq->size);
+ memReq->completionEvent = NULL;
+ memReq->time = curTick;
+ memReq->flags &= ~INST_READ;
+ MemAccessResult result = dcacheInterface->access(memReq);
+
+ // Ugly hack to get an event scheduled *only* if the access is
+ // a miss. We really should add first-class support for this
+ // at some point.
+ if (result != MA_HIT) {
+ memReq->completionEvent = &cacheCompletionEvent;
+ lastDcacheStall = curTick;
+// unscheduleTickEvent();
+ status = DcacheMissStoreStall;
+ DPRINTF(IBE, "Dcache miss stall!\n");
+ } else {
+ DPRINTF(IBE, "Dcache hit!\n");
+ }
+ }
+
+ if (res && (fault == NoFault))
+ *res = memReq->result;
+/*
+ if (!dcacheInterface && (memReq->flags & UNCACHEABLE))
+ recordEvent("Uncached Write");
+*/
+ return fault;
+}
+#if 0
+template <class Impl>
+template <class T>
+Fault
+InorderBackEnd<Impl>::write(MemReqPtr &req, T &data)
+{
+#if FULL_SYSTEM && defined(TARGET_ALPHA)
+ ExecContext *xc;
+
+ // If this is a store conditional, act appropriately
+ if (req->flags & LOCKED) {
+ xc = req->xc;
+
+ if (req->flags & UNCACHEABLE) {
+ // Don't update result register (see stq_c in isa_desc)
+ req->result = 2;
+ xc->setStCondFailures(0);//Needed? [RGD]
+ } else {
+ bool lock_flag = xc->readMiscReg(TheISA::Lock_Flag_DepTag);
+ Addr lock_addr = xc->readMiscReg(TheISA::Lock_Addr_DepTag);
+ req->result = lock_flag;
+ if (!lock_flag ||
+ ((lock_addr & ~0xf) != (req->paddr & ~0xf))) {
+ xc->setMiscReg(TheISA::Lock_Flag_DepTag, false);
+ xc->setStCondFailures(xc->readStCondFailures() + 1);
+ if (((xc->readStCondFailures()) % 100000) == 0) {
+ std::cerr << "Warning: "
+ << xc->readStCondFailures()
+ << " consecutive store conditional failures "
+ << "on cpu " << req->xc->readCpuId()
+ << std::endl;
+ }
+ return NoFault;
+ }
+ else xc->setStCondFailures(0);
+ }
+ }
+
+ // Need to clear any locked flags on other proccessors for
+ // this address. Only do this for succsful Store Conditionals
+ // and all other stores (WH64?). Unsuccessful Store
+ // Conditionals would have returned above, and wouldn't fall
+ // through.
+ for (int i = 0; i < cpu->system->execContexts.size(); i++){
+ xc = cpu->system->execContexts[i];
+ if ((xc->readMiscReg(TheISA::Lock_Addr_DepTag) & ~0xf) ==
+ (req->paddr & ~0xf)) {
+ xc->setMiscReg(TheISA::Lock_Flag_DepTag, false);
+ }
+ }
+
+#endif
+ return thread->mem->write(req, (T)LittleEndianGuest::htog(data));
+}
+#endif
+
+template <class Impl>
+template <class T>
+Fault
+InorderBackEnd<Impl>::read(MemReqPtr &req, T &data, int load_idx)
+{
+// panic("Unimplemented!");
+// memReq->reset(addr, sizeof(T), flags);
+
+ // translate to physical address
+// Fault fault = cpu->translateDataReadReq(req);
+ req->cmd = Read;
+ req->completionEvent = NULL;
+ req->time = curTick;
+ assert(!req->data);
+ req->data = new uint8_t[64];
+ req->flags &= ~INST_READ;
+ Fault fault = cpu->read(req, data);
+ memcpy(req->data, &data, sizeof(T));
+
+ // if we have a cache, do cache access too
+ if (dcacheInterface) {
+ MemAccessResult result = dcacheInterface->access(req);
+
+ // Ugly hack to get an event scheduled *only* if the access is
+ // a miss. We really should add first-class support for this
+ // at some point.
+ if (result != MA_HIT) {
+ req->completionEvent = &cacheCompletionEvent;
+ lastDcacheStall = curTick;
+// unscheduleTickEvent();
+ status = DcacheMissLoadStall;
+ DPRINTF(IBE, "Dcache miss load stall!\n");
+ } else {
+ DPRINTF(IBE, "Dcache hit!\n");
+
+ }
+ }
+
+/*
+ if (!dcacheInterface && (req->flags & UNCACHEABLE))
+ recordEvent("Uncached Read");
+*/
+ return NoFault;
+}
+
+template <class Impl>
+template <class T>
+Fault
+InorderBackEnd<Impl>::write(MemReqPtr &req, T &data, int store_idx)
+{
+// req->reset(addr, sizeof(T), flags);
+
+ // translate to physical address
+// Fault fault = cpu->translateDataWriteReq(req);
+
+ req->cmd = Write;
+ req->completionEvent = NULL;
+ req->time = curTick;
+ assert(!req->data);
+ req->data = new uint8_t[64];
+ memcpy(req->data, (uint8_t *)&data, req->size);
+
+ switch(req->size) {
+ case 1:
+ cpu->write(req, (uint8_t &)data);
+ break;
+ case 2:
+ cpu->write(req, (uint16_t &)data);
+ break;
+ case 4:
+ cpu->write(req, (uint32_t &)data);
+ break;
+ case 8:
+ cpu->write(req, (uint64_t &)data);
+ break;
+ default:
+ panic("Unexpected store size!\n");
+ }
+
+ if (dcacheInterface) {
+ req->cmd = Write;
+ req->data = new uint8_t[64];
+ memcpy(req->data,(uint8_t *)&data,req->size);
+ req->completionEvent = NULL;
+ req->time = curTick;
+ req->flags &= ~INST_READ;
+ MemAccessResult result = dcacheInterface->access(req);
+
+ // Ugly hack to get an event scheduled *only* if the access is
+ // a miss. We really should add first-class support for this
+ // at some point.
+ if (result != MA_HIT) {
+ req->completionEvent = &cacheCompletionEvent;
+ lastDcacheStall = curTick;
+// unscheduleTickEvent();
+ status = DcacheMissStoreStall;
+ DPRINTF(IBE, "Dcache miss store stall!\n");
+ } else {
+ DPRINTF(IBE, "Dcache hit!\n");
+
+ }
+ }
+/*
+ if (req->flags & LOCKED) {
+ if (req->flags & UNCACHEABLE) {
+ // Don't update result register (see stq_c in isa_desc)
+ req->result = 2;
+ } else {
+ req->result = 1;
+ }
+ }
+*/
+/*
+ if (res && (fault == NoFault))
+ *res = req->result;
+ */
+/*
+ if (!dcacheInterface && (req->flags & UNCACHEABLE))
+ recordEvent("Uncached Write");
+*/
+ return NoFault;
+}
+
+#endif // __CPU_OZONE_INORDER_BACK_END_HH__
--- /dev/null
+
+#include "arch/faults.hh"
+#include "arch/isa_traits.hh"
+#include "cpu/ozone/inorder_back_end.hh"
+#include "cpu/ozone/thread_state.hh"
+
+using namespace TheISA;
+
+template <class Impl>
+InorderBackEnd<Impl>::InorderBackEnd(Params *params)
+ : squashPending(false),
+ squashSeqNum(0),
+ squashNextPC(0),
+ faultFromFetch(NoFault),
+ interruptBlocked(false),
+ cacheCompletionEvent(this),
+ dcacheInterface(params->dcacheInterface),
+ width(params->backEndWidth),
+ latency(params->backEndLatency),
+ squashLatency(params->backEndSquashLatency),
+ numInstsToWB(0, latency + 1)
+{
+ instsAdded = numInstsToWB.getWire(latency);
+ instsToExecute = numInstsToWB.getWire(0);
+
+ memReq = new MemReq;
+ memReq->data = new uint8_t[64];
+ status = Running;
+}
+
+template <class Impl>
+std::string
+InorderBackEnd<Impl>::name() const
+{
+ return cpu->name() + ".inorderbackend";
+}
+
+template <class Impl>
+void
+InorderBackEnd<Impl>::setXC(ExecContext *xc_ptr)
+{
+ xc = xc_ptr;
+ memReq->xc = xc;
+}
+
+template <class Impl>
+void
+InorderBackEnd<Impl>::setThreadState(OzoneThreadState<Impl> *thread_ptr)
+{
+ thread = thread_ptr;
+ thread->setFuncExeInst(0);
+}
+
+#if FULL_SYSTEM
+template <class Impl>
+void
+InorderBackEnd<Impl>::checkInterrupts()
+{
+ //Check if there are any outstanding interrupts
+ //Handle the interrupts
+ int ipl = 0;
+ int summary = 0;
+
+ cpu->checkInterrupts = false;
+
+ if (thread->readMiscReg(IPR_ASTRR))
+ panic("asynchronous traps not implemented\n");
+
+ if (thread->readMiscReg(IPR_SIRR)) {
+ for (int i = INTLEVEL_SOFTWARE_MIN;
+ i < INTLEVEL_SOFTWARE_MAX; i++) {
+ if (thread->readMiscReg(IPR_SIRR) & (ULL(1) << i)) {
+ // See table 4-19 of the 21164 hardware reference
+ ipl = (i - INTLEVEL_SOFTWARE_MIN) + 1;
+ summary |= (ULL(1) << i);
+ }
+ }
+ }
+
+ uint64_t interrupts = cpu->intr_status();
+
+ if (interrupts) {
+ for (int i = INTLEVEL_EXTERNAL_MIN;
+ i < INTLEVEL_EXTERNAL_MAX; i++) {
+ if (interrupts & (ULL(1) << i)) {
+ // See table 4-19 of the 21164 hardware reference
+ ipl = i;
+ summary |= (ULL(1) << i);
+ }
+ }
+ }
+
+ if (ipl && ipl > thread->readMiscReg(IPR_IPLR)) {
+ thread->inSyscall = true;
+
+ thread->setMiscReg(IPR_ISR, summary);
+ thread->setMiscReg(IPR_INTID, ipl);
+ Fault(new InterruptFault)->invoke(xc);
+ DPRINTF(Flow, "Interrupt! IPLR=%d ipl=%d summary=%x\n",
+ thread->readMiscReg(IPR_IPLR), ipl, summary);
+
+ // May need to go 1 inst prior
+ squashPending = true;
+
+ thread->inSyscall = false;
+
+ setSquashInfoFromXC();
+ }
+}
+#endif
+
+template <class Impl>
+void
+InorderBackEnd<Impl>::tick()
+{
+ // Squash due to an external source
+ // Not sure if this or an interrupt has higher priority
+ if (squashPending) {
+ squash(squashSeqNum, squashNextPC);
+ return;
+ }
+
+ // if (interrupt) then set thread PC, stall front end, record that
+ // I'm waiting for it to drain. (for now just squash)
+#if FULL_SYSTEM
+ if (interruptBlocked ||
+ (cpu->checkInterrupts &&
+ cpu->check_interrupts() &&
+ !cpu->inPalMode())) {
+ if (!robEmpty()) {
+ interruptBlocked = true;
+ } else if (robEmpty() && cpu->inPalMode()) {
+ // Will need to let the front end continue a bit until
+ // we're out of pal mode. Hopefully we never get into an
+ // infinite loop...
+ interruptBlocked = false;
+ } else {
+ interruptBlocked = false;
+ checkInterrupts();
+ return;
+ }
+ }
+#endif
+
+ if (status != DcacheMissLoadStall &&
+ status != DcacheMissStoreStall) {
+ for (int i = 0; i < width && (*instsAdded) < width; ++i) {
+ DynInstPtr inst = frontEnd->getInst();
+
+ if (!inst)
+ break;
+
+ instList.push_back(inst);
+
+ (*instsAdded)++;
+ }
+
+#if FULL_SYSTEM
+ if (faultFromFetch && robEmpty() && frontEnd->isEmpty()) {
+ handleFault();
+ } else {
+ executeInsts();
+ }
+#else
+ executeInsts();
+#endif
+ }
+}
+
+template <class Impl>
+void
+InorderBackEnd<Impl>::executeInsts()
+{
+ bool completed_last_inst = true;
+ int insts_to_execute = *instsToExecute;
+ int freed_regs = 0;
+
+ while (insts_to_execute > 0) {
+ assert(!instList.empty());
+ DynInstPtr inst = instList.front();
+
+ commitPC = inst->readPC();
+
+ thread->setPC(commitPC);
+ thread->setNextPC(inst->readNextPC());
+
+#if FULL_SYSTEM
+ int count = 0;
+ Addr oldpc;
+ do {
+ if (count == 0)
+ assert(!thread->inSyscall && !thread->trapPending);
+ oldpc = thread->readPC();
+ cpu->system->pcEventQueue.service(
+ thread->getXCProxy());
+ count++;
+ } while (oldpc != thread->readPC());
+ if (count > 1) {
+ DPRINTF(IBE, "PC skip function event, stopping commit\n");
+ completed_last_inst = false;
+ squashPending = true;
+ break;
+ }
+#endif
+
+ Fault inst_fault = NoFault;
+
+ if (status == DcacheMissComplete) {
+ DPRINTF(IBE, "Completing inst [sn:%lli]\n", inst->seqNum);
+ status = Running;
+ } else if (inst->isMemRef() && status != DcacheMissComplete &&
+ (!inst->isDataPrefetch() && !inst->isInstPrefetch())) {
+ DPRINTF(IBE, "Initiating mem op inst [sn:%lli] PC: %#x\n",
+ inst->seqNum, inst->readPC());
+
+ cacheCompletionEvent.inst = inst;
+ inst_fault = inst->initiateAcc();
+ if (inst_fault == NoFault &&
+ status != DcacheMissLoadStall &&
+ status != DcacheMissStoreStall) {
+ inst_fault = inst->completeAcc();
+ }
+ ++thread->funcExeInst;
+ } else {
+ DPRINTF(IBE, "Executing inst [sn:%lli] PC: %#x\n",
+ inst->seqNum, inst->readPC());
+ inst_fault = inst->execute();
+ ++thread->funcExeInst;
+ }
+
+ // Will need to be able to break this loop in case the load
+ // misses. Split access/complete ops would be useful here
+ // with writeback events.
+ if (status == DcacheMissLoadStall) {
+ *instsToExecute = insts_to_execute;
+
+ completed_last_inst = false;
+ break;
+ } else if (status == DcacheMissStoreStall) {
+ // Figure out how to fix this hack. Probably have DcacheMissLoad
+ // vs DcacheMissStore.
+ *instsToExecute = insts_to_execute;
+ completed_last_inst = false;
+/*
+ instList.pop_front();
+ --insts_to_execute;
+ if (inst->traceData) {
+ inst->traceData->finalize();
+ }
+*/
+
+ // Don't really need to stop for a store stall as long as
+ // the memory system is able to handle store forwarding
+ // and such. Breaking out might help avoid the cache
+ // interface becoming blocked.
+ break;
+ }
+
+ inst->setExecuted();
+ inst->setCompleted();
+ inst->setCanCommit();
+
+ instList.pop_front();
+
+ --insts_to_execute;
+ --(*instsToExecute);
+
+ if (inst->traceData) {
+ inst->traceData->finalize();
+ inst->traceData = NULL;
+ }
+
+ if (inst_fault != NoFault) {
+#if FULL_SYSTEM
+ DPRINTF(IBE, "Inst [sn:%lli] PC %#x has a fault\n",
+ inst->seqNum, inst->readPC());
+
+ assert(!thread->inSyscall);
+
+ thread->inSyscall = true;
+
+ // Hack for now; DTB will sometimes need the machine instruction
+ // for when faults happen. So we will set it here, prior to the
+ // DTB possibly needing it for this translation.
+ thread->setInst(
+ static_cast<TheISA::MachInst>(inst->staticInst->machInst));
+
+ // Consider holding onto the trap and waiting until the trap event
+ // happens for this to be executed.
+ inst_fault->invoke(xc);
+
+ // Exit state update mode to avoid accidental updating.
+ thread->inSyscall = false;
+
+ squashPending = true;
+
+ // Generate trap squash event.
+// generateTrapEvent(tid);
+ completed_last_inst = false;
+ break;
+#else // !FULL_SYSTEM
+ panic("fault (%d) detected @ PC %08p", inst_fault,
+ inst->PC);
+#endif // FULL_SYSTEM
+ }
+
+ for (int i = 0; i < inst->numDestRegs(); ++i) {
+ renameTable[inst->destRegIdx(i)] = inst;
+ thread->renameTable[inst->destRegIdx(i)] = inst;
+ ++freed_regs;
+ }
+
+ inst->clearDependents();
+
+ comm->access(0)->doneSeqNum = inst->seqNum;
+
+ if (inst->mispredicted()) {
+ squash(inst->seqNum, inst->readNextPC());
+
+ thread->setNextPC(inst->readNextPC());
+
+ break;
+ } else if (squashPending) {
+ // Something external happened that caused the CPU to squash.
+ // Break out of commit and handle the squash next cycle.
+ break;
+ }
+ // If it didn't mispredict, then it executed fine. Send back its
+ // registers and BP info? What about insts that may still have
+ // latency, like loads? Probably can send back the information after
+ // it is completed.
+
+ // keep an instruction count
+ cpu->numInst++;
+ thread->numInsts++;
+ }
+
+ frontEnd->addFreeRegs(freed_regs);
+
+ assert(insts_to_execute >= 0);
+
+ // Should only advance this if I have executed all instructions.
+ if (insts_to_execute == 0) {
+ numInstsToWB.advance();
+ }
+
+ // Should I set the PC to the next PC here? What do I set next PC to?
+ if (completed_last_inst) {
+ thread->setPC(thread->readNextPC());
+ thread->setNextPC(thread->readPC() + sizeof(MachInst));
+ }
+
+ if (squashPending) {
+ setSquashInfoFromXC();
+ }
+}
+
+template <class Impl>
+void
+InorderBackEnd<Impl>::handleFault()
+{
+ DPRINTF(Commit, "Handling fault from fetch\n");
+
+ assert(!thread->inSyscall);
+
+ thread->inSyscall = true;
+
+ // Consider holding onto the trap and waiting until the trap event
+ // happens for this to be executed.
+ faultFromFetch->invoke(xc);
+
+ // Exit state update mode to avoid accidental updating.
+ thread->inSyscall = false;
+
+ squashPending = true;
+
+ setSquashInfoFromXC();
+}
+
+template <class Impl>
+void
+InorderBackEnd<Impl>::squash(const InstSeqNum &squash_num, const Addr &next_PC)
+{
+ DPRINTF(IBE, "Squashing from [sn:%lli], setting PC to %#x\n",
+ squash_num, next_PC);
+
+ InstListIt squash_it = --(instList.end());
+
+ int freed_regs = 0;
+
+ while (!instList.empty() && (*squash_it)->seqNum > squash_num) {
+ DynInstPtr inst = *squash_it;
+
+ DPRINTF(IBE, "Squashing instruction PC %#x, [sn:%lli].\n",
+ inst->readPC(),
+ inst->seqNum);
+
+ // May cause problems with misc regs
+ freed_regs+= inst->numDestRegs();
+ inst->clearDependents();
+ squash_it--;
+ instList.pop_back();
+ }
+
+ frontEnd->addFreeRegs(freed_regs);
+
+ for (int i = 0; i < latency+1; ++i) {
+ numInstsToWB.advance();
+ }
+
+ squashPending = false;
+
+ // Probably want to make sure that this squash is the one that set the
+ // thread into inSyscall mode.
+ thread->inSyscall = false;
+
+ // Tell front end to squash, reset PC to new one.
+ frontEnd->squash(squash_num, next_PC);
+
+ faultFromFetch = NULL;
+}
+
+template <class Impl>
+void
+InorderBackEnd<Impl>::squashFromXC()
+{
+ // Record that I need to squash
+ squashPending = true;
+
+ thread->inSyscall = true;
+}
+
+template <class Impl>
+void
+InorderBackEnd<Impl>::setSquashInfoFromXC()
+{
+ // Need to handle the case of the instList being empty. In that case
+ // probably any number works, except maybe with stores in the store buffer.
+ squashSeqNum = instList.empty() ? 0 : instList.front()->seqNum - 1;
+
+ squashNextPC = thread->PC;
+}
+
+template <class Impl>
+void
+InorderBackEnd<Impl>::fetchFault(Fault &fault)
+{
+ faultFromFetch = fault;
+}
+
+template <class Impl>
+void
+InorderBackEnd<Impl>::dumpInsts()
+{
+ int num = 0;
+ int valid_num = 0;
+
+ InstListIt inst_list_it = instList.begin();
+
+ cprintf("Inst list size: %i\n", instList.size());
+
+ while (inst_list_it != instList.end())
+ {
+ cprintf("Instruction:%i\n",
+ num);
+ if (!(*inst_list_it)->isSquashed()) {
+ if (!(*inst_list_it)->isIssued()) {
+ ++valid_num;
+ cprintf("Count:%i\n", valid_num);
+ } else if ((*inst_list_it)->isMemRef() &&
+ !(*inst_list_it)->memOpDone) {
+ // Loads that have not been marked as executed still count
+ // towards the total instructions.
+ ++valid_num;
+ cprintf("Count:%i\n", valid_num);
+ }
+ }
+
+ cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
+ "Issued:%i\nSquashed:%i\n",
+ (*inst_list_it)->readPC(),
+ (*inst_list_it)->seqNum,
+ (*inst_list_it)->threadNumber,
+ (*inst_list_it)->isIssued(),
+ (*inst_list_it)->isSquashed());
+
+ if ((*inst_list_it)->isMemRef()) {
+ cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
+ }
+
+ cprintf("\n");
+
+ inst_list_it++;
+ ++num;
+ }
+}
+
+template <class Impl>
+InorderBackEnd<Impl>::DCacheCompletionEvent::DCacheCompletionEvent(
+ InorderBackEnd *_be)
+ : Event(&mainEventQueue, CPU_Tick_Pri), be(_be)
+{
+// this->setFlags(Event::AutoDelete);
+}
+
+template <class Impl>
+void
+InorderBackEnd<Impl>::DCacheCompletionEvent::process()
+{
+ inst->completeAcc();
+ be->status = DcacheMissComplete;
+}
+
+template <class Impl>
+const char *
+InorderBackEnd<Impl>::DCacheCompletionEvent::description()
+{
+ return "DCache completion event";
+}
--- /dev/null
+/*
+ * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "cpu/ozone/dyn_inst.hh"
+#include "cpu/ozone/ozone_impl.hh"
+#include "cpu/ozone/simple_impl.hh"
+#include "cpu/ozone/inst_queue_impl.hh"
+
+// Force instantiation of InstructionQueue.
+template class InstQueue<SimpleImpl>;
+template class InstQueue<OzoneImpl>;
--- /dev/null
+/*
+ * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __CPU_OZONE_INST_QUEUE_HH__
+#define __CPU_OZONE_INST_QUEUE_HH__
+
+#include <list>
+#include <map>
+#include <queue>
+#include <vector>
+
+#include "base/statistics.hh"
+#include "base/timebuf.hh"
+#include "cpu/inst_seq.hh"
+#include "sim/host.hh"
+
+class FUPool;
+class MemInterface;
+
+/**
+ * A standard instruction queue class. It holds ready instructions, in
+ * order, in seperate priority queues to facilitate the scheduling of
+ * instructions. The IQ uses a separate linked list to track dependencies.
+ * Similar to the rename map and the free list, it expects that
+ * floating point registers have their indices start after the integer
+ * registers (ie with 96 int and 96 fp registers, regs 0-95 are integer
+ * and 96-191 are fp). This remains true even for both logical and
+ * physical register indices. The IQ depends on the memory dependence unit to
+ * track when memory operations are ready in terms of ordering; register
+ * dependencies are tracked normally. Right now the IQ also handles the
+ * execution timing; this is mainly to allow back-to-back scheduling without
+ * requiring IEW to be able to peek into the IQ. At the end of the execution
+ * latency, the instruction is put into the queue to execute, where it will
+ * have the execute() function called on it.
+ * @todo: Make IQ able to handle multiple FU pools.
+ */
+template <class Impl>
+class InstQueue
+{
+ public:
+ //Typedefs from the Impl.
+ typedef typename Impl::FullCPU FullCPU;
+ typedef typename Impl::DynInstPtr DynInstPtr;
+ typedef typename Impl::Params Params;
+ typedef typename Impl::IssueStruct IssueStruct;
+/*
+ typedef typename Impl::CPUPol::IEW IEW;
+ typedef typename Impl::CPUPol::MemDepUnit MemDepUnit;
+ typedef typename Impl::CPUPol::IssueStruct IssueStruct;
+ typedef typename Impl::CPUPol::TimeStruct TimeStruct;
+*/
+ // Typedef of iterator through the list of instructions.
+ typedef typename std::list<DynInstPtr>::iterator ListIt;
+
+ friend class Impl::FullCPU;
+#if 0
+ /** FU completion event class. */
+ class FUCompletion : public Event {
+ private:
+ /** Executing instruction. */
+ DynInstPtr inst;
+
+ /** Index of the FU used for executing. */
+ int fuIdx;
+
+ /** Pointer back to the instruction queue. */
+ InstQueue<Impl> *iqPtr;
+
+ public:
+ /** Construct a FU completion event. */
+ FUCompletion(DynInstPtr &_inst, int fu_idx,
+ InstQueue<Impl> *iq_ptr);
+
+ virtual void process();
+ virtual const char *description();
+ };
+#endif
+ /** Constructs an IQ. */
+ InstQueue(Params *params);
+
+ /** Destructs the IQ. */
+ ~InstQueue();
+
+ /** Returns the name of the IQ. */
+ std::string name() const;
+
+ /** Registers statistics. */
+ void regStats();
+
+ /** Sets CPU pointer. */
+ void setCPU(FullCPU *_cpu) { cpu = _cpu; }
+#if 0
+ /** Sets active threads list. */
+ void setActiveThreads(list<unsigned> *at_ptr);
+
+ /** Sets the IEW pointer. */
+ void setIEW(IEW *iew_ptr) { iewStage = iew_ptr; }
+#endif
+ /** Sets the timer buffer between issue and execute. */
+ void setIssueToExecuteQueue(TimeBuffer<IssueStruct> *i2eQueue);
+#if 0
+ /** Sets the global time buffer. */
+ void setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr);
+
+ /** Number of entries needed for given amount of threads. */
+ int entryAmount(int num_threads);
+
+ /** Resets max entries for all threads. */
+ void resetEntries();
+#endif
+ /** Returns total number of free entries. */
+ unsigned numFreeEntries();
+
+ /** Returns number of free entries for a thread. */
+ unsigned numFreeEntries(unsigned tid);
+
+ /** Returns whether or not the IQ is full. */
+ bool isFull();
+
+ /** Returns whether or not the IQ is full for a specific thread. */
+ bool isFull(unsigned tid);
+
+ /** Returns if there are any ready instructions in the IQ. */
+ bool hasReadyInsts();
+
+ /** Inserts a new instruction into the IQ. */
+ void insert(DynInstPtr &new_inst);
+
+ /** Inserts a new, non-speculative instruction into the IQ. */
+ void insertNonSpec(DynInstPtr &new_inst);
+#if 0
+ /**
+ * Advances the tail of the IQ, used if an instruction is not added to the
+ * IQ for scheduling.
+ * @todo: Rename this function.
+ */
+ void advanceTail(DynInstPtr &inst);
+
+ /** Process FU completion event. */
+ void processFUCompletion(DynInstPtr &inst, int fu_idx);
+#endif
+ /**
+ * Schedules ready instructions, adding the ready ones (oldest first) to
+ * the queue to execute.
+ */
+ void scheduleReadyInsts();
+
+ /** Schedules a single specific non-speculative instruction. */
+ void scheduleNonSpec(const InstSeqNum &inst);
+
+ /**
+ * Commits all instructions up to and including the given sequence number,
+ * for a specific thread.
+ */
+ void commit(const InstSeqNum &inst, unsigned tid = 0);
+
+ /** Wakes all dependents of a completed instruction. */
+ void wakeDependents(DynInstPtr &completed_inst);
+
+ /** Adds a ready memory instruction to the ready list. */
+ void addReadyMemInst(DynInstPtr &ready_inst);
+#if 0
+ /**
+ * Reschedules a memory instruction. It will be ready to issue once
+ * replayMemInst() is called.
+ */
+ void rescheduleMemInst(DynInstPtr &resched_inst);
+
+ /** Replays a memory instruction. It must be rescheduled first. */
+ void replayMemInst(DynInstPtr &replay_inst);
+#endif
+ /** Completes a memory operation. */
+ void completeMemInst(DynInstPtr &completed_inst);
+#if 0
+ /** Indicates an ordering violation between a store and a load. */
+ void violation(DynInstPtr &store, DynInstPtr &faulting_load);
+#endif
+ /**
+ * Squashes instructions for a thread. Squashing information is obtained
+ * from the time buffer.
+ */
+ void squash(unsigned tid); // Probably want the ISN
+
+ /** Returns the number of used entries for a thread. */
+ unsigned getCount(unsigned tid) { return count[tid]; };
+
+ /** Updates the number of free entries. */
+ void updateFreeEntries(int num) { freeEntries += num; }
+
+ /** Debug function to print all instructions. */
+ void printInsts();
+
+ private:
+ /** Does the actual squashing. */
+ void doSquash(unsigned tid);
+
+ /////////////////////////
+ // Various pointers
+ /////////////////////////
+
+ /** Pointer to the CPU. */
+ FullCPU *cpu;
+
+ /** Cache interface. */
+ MemInterface *dcacheInterface;
+#if 0
+ /** Pointer to IEW stage. */
+ IEW *iewStage;
+
+ /** The memory dependence unit, which tracks/predicts memory dependences
+ * between instructions.
+ */
+ MemDepUnit memDepUnit[Impl::MaxThreads];
+#endif
+ /** The queue to the execute stage. Issued instructions will be written
+ * into it.
+ */
+ TimeBuffer<IssueStruct> *issueToExecuteQueue;
+#if 0
+ /** The backwards time buffer. */
+ TimeBuffer<TimeStruct> *timeBuffer;
+
+ /** Wire to read information from timebuffer. */
+ typename TimeBuffer<TimeStruct>::wire fromCommit;
+
+ /** Function unit pool. */
+ FUPool *fuPool;
+#endif
+ //////////////////////////////////////
+ // Instruction lists, ready queues, and ordering
+ //////////////////////////////////////
+
+ /** List of all the instructions in the IQ (some of which may be issued). */
+ std::list<DynInstPtr> instList[Impl::MaxThreads];
+
+ /**
+ * Struct for comparing entries to be added to the priority queue. This
+ * gives reverse ordering to the instructions in terms of sequence
+ * numbers: the instructions with smaller sequence numbers (and hence
+ * are older) will be at the top of the priority queue.
+ */
+ struct pqCompare {
+ bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const
+ {
+ return lhs->seqNum > rhs->seqNum;
+ }
+ };
+
+ /**
+ * Struct for an IQ entry. It includes the instruction and an iterator
+ * to the instruction's spot in the IQ.
+ */
+ struct IQEntry {
+ DynInstPtr inst;
+ ListIt iqIt;
+ };
+
+ typedef std::priority_queue<DynInstPtr, std::vector<DynInstPtr>, pqCompare>
+ ReadyInstQueue;
+
+ typedef std::map<DynInstPtr, pqCompare> ReadyInstMap;
+ typedef typename std::map<DynInstPtr, pqCompare>::iterator ReadyMapIt;
+
+ /** List of ready instructions.
+ */
+ ReadyInstQueue readyInsts;
+
+ /** List of non-speculative instructions that will be scheduled
+ * once the IQ gets a signal from commit. While it's redundant to
+ * have the key be a part of the value (the sequence number is stored
+ * inside of DynInst), when these instructions are woken up only
+ * the sequence number will be available. Thus it is most efficient to be
+ * able to search by the sequence number alone.
+ */
+ std::map<InstSeqNum, DynInstPtr> nonSpecInsts;
+
+ typedef typename std::map<InstSeqNum, DynInstPtr>::iterator NonSpecMapIt;
+#if 0
+ /** Entry for the list age ordering by op class. */
+ struct ListOrderEntry {
+ OpClass queueType;
+ InstSeqNum oldestInst;
+ };
+
+ /** List that contains the age order of the oldest instruction of each
+ * ready queue. Used to select the oldest instruction available
+ * among op classes.
+ */
+ std::list<ListOrderEntry> listOrder;
+
+ typedef typename std::list<ListOrderEntry>::iterator ListOrderIt;
+
+ /** Tracks if each ready queue is on the age order list. */
+ bool queueOnList[Num_OpClasses];
+
+ /** Iterators of each ready queue. Points to their spot in the age order
+ * list.
+ */
+ ListOrderIt readyIt[Num_OpClasses];
+
+ /** Add an op class to the age order list. */
+ void addToOrderList(OpClass op_class);
+
+ /**
+ * Called when the oldest instruction has been removed from a ready queue;
+ * this places that ready queue into the proper spot in the age order list.
+ */
+ void moveToYoungerInst(ListOrderIt age_order_it);
+#endif
+ //////////////////////////////////////
+ // Various parameters
+ //////////////////////////////////////
+#if 0
+ /** IQ Resource Sharing Policy */
+ enum IQPolicy {
+ Dynamic,
+ Partitioned,
+ Threshold
+ };
+
+ /** IQ sharing policy for SMT. */
+ IQPolicy iqPolicy;
+#endif
+ /** Number of Total Threads*/
+ unsigned numThreads;
+#if 0
+ /** Pointer to list of active threads. */
+ list<unsigned> *activeThreads;
+#endif
+ /** Per Thread IQ count */
+ unsigned count[Impl::MaxThreads];
+
+ /** Max IQ Entries Per Thread */
+ unsigned maxEntries[Impl::MaxThreads];
+
+ /** Number of free IQ entries left. */
+ unsigned freeEntries;
+
+ /** The number of entries in the instruction queue. */
+ unsigned numEntries;
+
+ /** The total number of instructions that can be issued in one cycle. */
+ unsigned totalWidth;
+#if 0
+ /** The number of physical registers in the CPU. */
+ unsigned numPhysRegs;
+
+ /** The number of physical integer registers in the CPU. */
+ unsigned numPhysIntRegs;
+
+ /** The number of floating point registers in the CPU. */
+ unsigned numPhysFloatRegs;
+#endif
+ /** Delay between commit stage and the IQ.
+ * @todo: Make there be a distinction between the delays within IEW.
+ */
+ unsigned commitToIEWDelay;
+
+ //////////////////////////////////
+ // Variables needed for squashing
+ //////////////////////////////////
+
+ /** The sequence number of the squashed instruction. */
+ InstSeqNum squashedSeqNum[Impl::MaxThreads];
+
+ /** Iterator that points to the last instruction that has been squashed.
+ * This will not be valid unless the IQ is in the process of squashing.
+ */
+ ListIt squashIt[Impl::MaxThreads];
+#if 0
+ ///////////////////////////////////
+ // Dependency graph stuff
+ ///////////////////////////////////
+
+ class DependencyEntry
+ {
+ public:
+ DependencyEntry()
+ : inst(NULL), next(NULL)
+ { }
+
+ DynInstPtr inst;
+ //Might want to include data about what arch. register the
+ //dependence is waiting on.
+ DependencyEntry *next;
+
+ //This function, and perhaps this whole class, stand out a little
+ //bit as they don't fit a classification well. I want access
+ //to the underlying structure of the linked list, yet at
+ //the same time it feels like this should be something abstracted
+ //away. So for now it will sit here, within the IQ, until
+ //a better implementation is decided upon.
+ // This function probably shouldn't be within the entry...
+ void insert(DynInstPtr &new_inst);
+
+ void remove(DynInstPtr &inst_to_remove);
+
+ // Debug variable, remove when done testing.
+ static unsigned mem_alloc_counter;
+ };
+
+ /** Array of linked lists. Each linked list is a list of all the
+ * instructions that depend upon a given register. The actual
+ * register's index is used to index into the graph; ie all
+ * instructions in flight that are dependent upon r34 will be
+ * in the linked list of dependGraph[34].
+ */
+ DependencyEntry *dependGraph;
+
+ /** A cache of the recently woken registers. It is 1 if the register
+ * has been woken up recently, and 0 if the register has been added
+ * to the dependency graph and has not yet received its value. It
+ * is basically a secondary scoreboard, and should pretty much mirror
+ * the scoreboard that exists in the rename map.
+ */
+ vector<bool> regScoreboard;
+
+ /** Adds an instruction to the dependency graph, as a producer. */
+ bool addToDependents(DynInstPtr &new_inst);
+
+ /** Adds an instruction to the dependency graph, as a consumer. */
+ void createDependency(DynInstPtr &new_inst);
+#endif
+ /** Moves an instruction to the ready queue if it is ready. */
+ void addIfReady(DynInstPtr &inst);
+
+ /** Debugging function to count how many entries are in the IQ. It does
+ * a linear walk through the instructions, so do not call this function
+ * during normal execution.
+ */
+ int countInsts();
+#if 0
+ /** Debugging function to dump out the dependency graph.
+ */
+ void dumpDependGraph();
+#endif
+ /** Debugging function to dump all the list sizes, as well as print
+ * out the list of nonspeculative instructions. Should not be used
+ * in any other capacity, but it has no harmful sideaffects.
+ */
+ void dumpLists();
+
+ /** Debugging function to dump out all instructions that are in the
+ * IQ.
+ */
+ void dumpInsts();
+
+ /** Stat for number of instructions added. */
+ Stats::Scalar<> iqInstsAdded;
+ /** Stat for number of non-speculative instructions added. */
+ Stats::Scalar<> iqNonSpecInstsAdded;
+// Stats::Scalar<> iqIntInstsAdded;
+ /** Stat for number of integer instructions issued. */
+ Stats::Scalar<> iqIntInstsIssued;
+// Stats::Scalar<> iqFloatInstsAdded;
+ /** Stat for number of floating point instructions issued. */
+ Stats::Scalar<> iqFloatInstsIssued;
+// Stats::Scalar<> iqBranchInstsAdded;
+ /** Stat for number of branch instructions issued. */
+ Stats::Scalar<> iqBranchInstsIssued;
+// Stats::Scalar<> iqMemInstsAdded;
+ /** Stat for number of memory instructions issued. */
+ Stats::Scalar<> iqMemInstsIssued;
+// Stats::Scalar<> iqMiscInstsAdded;
+ /** Stat for number of miscellaneous instructions issued. */
+ Stats::Scalar<> iqMiscInstsIssued;
+ /** Stat for number of squashed instructions that were ready to issue. */
+ Stats::Scalar<> iqSquashedInstsIssued;
+ /** Stat for number of squashed instructions examined when squashing. */
+ Stats::Scalar<> iqSquashedInstsExamined;
+ /** Stat for number of squashed instruction operands examined when
+ * squashing.
+ */
+ Stats::Scalar<> iqSquashedOperandsExamined;
+ /** Stat for number of non-speculative instructions removed due to a squash.
+ */
+ Stats::Scalar<> iqSquashedNonSpecRemoved;
+
+};
+
+#endif //__CPU_OZONE_INST_QUEUE_HH__
--- /dev/null
+/*
+ * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+// Todo:
+// Current ordering allows for 0 cycle added-to-scheduled. Could maybe fake
+// it; either do in reverse order, or have added instructions put into a
+// different ready queue that, in scheduleRreadyInsts(), gets put onto the
+// normal ready queue. This would however give only a one cycle delay,
+// but probably is more flexible to actually add in a delay parameter than
+// just running it backwards.
+
+#include <vector>
+
+#include "sim/root.hh"
+
+#include "cpu/ozone/inst_queue.hh"
+#if 0
+template <class Impl>
+InstQueue<Impl>::FUCompletion::FUCompletion(DynInstPtr &_inst,
+ int fu_idx,
+ InstQueue<Impl> *iq_ptr)
+ : Event(&mainEventQueue, Stat_Event_Pri),
+ inst(_inst), fuIdx(fu_idx), iqPtr(iq_ptr)
+{
+ this->setFlags(Event::AutoDelete);
+}
+
+template <class Impl>
+void
+InstQueue<Impl>::FUCompletion::process()
+{
+ iqPtr->processFUCompletion(inst, fuIdx);
+}
+
+
+template <class Impl>
+const char *
+InstQueue<Impl>::FUCompletion::description()
+{
+ return "Functional unit completion event";
+}
+#endif
+template <class Impl>
+InstQueue<Impl>::InstQueue(Params *params)
+ : dcacheInterface(params->dcacheInterface),
+// fuPool(params->fuPool),
+ numEntries(params->numIQEntries),
+ totalWidth(params->issueWidth),
+// numPhysIntRegs(params->numPhysIntRegs),
+// numPhysFloatRegs(params->numPhysFloatRegs),
+ commitToIEWDelay(params->commitToIEWDelay)
+{
+// assert(fuPool);
+
+// numThreads = params->numberOfThreads;
+ numThreads = 1;
+
+ //Initialize thread IQ counts
+ for (int i = 0; i <numThreads; i++) {
+ count[i] = 0;
+ }
+
+ // Initialize the number of free IQ entries.
+ freeEntries = numEntries;
+
+ // Set the number of physical registers as the number of int + float
+// numPhysRegs = numPhysIntRegs + numPhysFloatRegs;
+
+// DPRINTF(IQ, "There are %i physical registers.\n", numPhysRegs);
+
+ //Create an entry for each physical register within the
+ //dependency graph.
+// dependGraph = new DependencyEntry[numPhysRegs];
+
+ // Resize the register scoreboard.
+// regScoreboard.resize(numPhysRegs);
+/*
+ //Initialize Mem Dependence Units
+ for (int i = 0; i < numThreads; i++) {
+ memDepUnit[i].init(params,i);
+ memDepUnit[i].setIQ(this);
+ }
+
+ // Initialize all the head pointers to point to NULL, and all the
+ // entries as unready.
+ // Note that in actuality, the registers corresponding to the logical
+ // registers start off as ready. However this doesn't matter for the
+ // IQ as the instruction should have been correctly told if those
+ // registers are ready in rename. Thus it can all be initialized as
+ // unready.
+ for (int i = 0; i < numPhysRegs; ++i) {
+ dependGraph[i].next = NULL;
+ dependGraph[i].inst = NULL;
+ regScoreboard[i] = false;
+ }
+*/
+ for (int i = 0; i < numThreads; ++i) {
+ squashedSeqNum[i] = 0;
+ }
+/*
+ for (int i = 0; i < Num_OpClasses; ++i) {
+ queueOnList[i] = false;
+ readyIt[i] = listOrder.end();
+ }
+
+ string policy = params->smtIQPolicy;
+
+ //Convert string to lowercase
+ std::transform(policy.begin(), policy.end(), policy.begin(),
+ (int(*)(int)) tolower);
+
+ //Figure out resource sharing policy
+ if (policy == "dynamic") {
+ iqPolicy = Dynamic;
+
+ //Set Max Entries to Total ROB Capacity
+ for (int i = 0; i < numThreads; i++) {
+ maxEntries[i] = numEntries;
+ }
+
+ } else if (policy == "partitioned") {
+ iqPolicy = Partitioned;
+
+ //@todo:make work if part_amt doesnt divide evenly.
+ int part_amt = numEntries / numThreads;
+
+ //Divide ROB up evenly
+ for (int i = 0; i < numThreads; i++) {
+ maxEntries[i] = part_amt;
+ }
+
+ DPRINTF(Fetch, "IQ sharing policy set to Partitioned:"
+ "%i entries per thread.\n",part_amt);
+
+ } else if (policy == "threshold") {
+ iqPolicy = Threshold;
+
+ double threshold = (double)params->smtIQThreshold / 100;
+
+ int thresholdIQ = (int)((double)threshold * numEntries);
+
+ //Divide up by threshold amount
+ for (int i = 0; i < numThreads; i++) {
+ maxEntries[i] = thresholdIQ;
+ }
+
+ DPRINTF(Fetch, "IQ sharing policy set to Threshold:"
+ "%i entries per thread.\n",thresholdIQ);
+ } else {
+ assert(0 && "Invalid IQ Sharing Policy.Options Are:{Dynamic,"
+ "Partitioned, Threshold}");
+ }
+*/
+}
+
+template <class Impl>
+InstQueue<Impl>::~InstQueue()
+{
+ // Clear the dependency graph
+/*
+ DependencyEntry *curr;
+ DependencyEntry *prev;
+
+ for (int i = 0; i < numPhysRegs; ++i) {
+ curr = dependGraph[i].next;
+
+ while (curr) {
+ DependencyEntry::mem_alloc_counter--;
+
+ prev = curr;
+ curr = prev->next;
+ prev->inst = NULL;
+
+ delete prev;
+ }
+
+ if (dependGraph[i].inst) {
+ dependGraph[i].inst = NULL;
+ }
+
+ dependGraph[i].next = NULL;
+ }
+
+ assert(DependencyEntry::mem_alloc_counter == 0);
+
+ delete [] dependGraph;
+*/
+}
+
+template <class Impl>
+std::string
+InstQueue<Impl>::name() const
+{
+ return cpu->name() + ".iq";
+}
+
+template <class Impl>
+void
+InstQueue<Impl>::regStats()
+{
+ iqInstsAdded
+ .name(name() + ".iqInstsAdded")
+ .desc("Number of instructions added to the IQ (excludes non-spec)")
+ .prereq(iqInstsAdded);
+
+ iqNonSpecInstsAdded
+ .name(name() + ".iqNonSpecInstsAdded")
+ .desc("Number of non-speculative instructions added to the IQ")
+ .prereq(iqNonSpecInstsAdded);
+
+// iqIntInstsAdded;
+
+ iqIntInstsIssued
+ .name(name() + ".iqIntInstsIssued")
+ .desc("Number of integer instructions issued")
+ .prereq(iqIntInstsIssued);
+
+// iqFloatInstsAdded;
+
+ iqFloatInstsIssued
+ .name(name() + ".iqFloatInstsIssued")
+ .desc("Number of float instructions issued")
+ .prereq(iqFloatInstsIssued);
+
+// iqBranchInstsAdded;
+
+ iqBranchInstsIssued
+ .name(name() + ".iqBranchInstsIssued")
+ .desc("Number of branch instructions issued")
+ .prereq(iqBranchInstsIssued);
+
+// iqMemInstsAdded;
+
+ iqMemInstsIssued
+ .name(name() + ".iqMemInstsIssued")
+ .desc("Number of memory instructions issued")
+ .prereq(iqMemInstsIssued);
+
+// iqMiscInstsAdded;
+
+ iqMiscInstsIssued
+ .name(name() + ".iqMiscInstsIssued")
+ .desc("Number of miscellaneous instructions issued")
+ .prereq(iqMiscInstsIssued);
+
+ iqSquashedInstsIssued
+ .name(name() + ".iqSquashedInstsIssued")
+ .desc("Number of squashed instructions issued")
+ .prereq(iqSquashedInstsIssued);
+
+ iqSquashedInstsExamined
+ .name(name() + ".iqSquashedInstsExamined")
+ .desc("Number of squashed instructions iterated over during squash;"
+ " mainly for profiling")
+ .prereq(iqSquashedInstsExamined);
+
+ iqSquashedOperandsExamined
+ .name(name() + ".iqSquashedOperandsExamined")
+ .desc("Number of squashed operands that are examined and possibly "
+ "removed from graph")
+ .prereq(iqSquashedOperandsExamined);
+
+ iqSquashedNonSpecRemoved
+ .name(name() + ".iqSquashedNonSpecRemoved")
+ .desc("Number of squashed non-spec instructions that were removed")
+ .prereq(iqSquashedNonSpecRemoved);
+/*
+ for ( int i=0; i < numThreads; i++) {
+ // Tell mem dependence unit to reg stats as well.
+ memDepUnit[i].regStats();
+ }
+*/
+}
+/*
+template <class Impl>
+void
+InstQueue<Impl>::setActiveThreads(list<unsigned> *at_ptr)
+{
+ DPRINTF(IQ, "Setting active threads list pointer.\n");
+ activeThreads = at_ptr;
+}
+*/
+template <class Impl>
+void
+InstQueue<Impl>::setIssueToExecuteQueue(TimeBuffer<IssueStruct> *i2e_ptr)
+{
+ DPRINTF(IQ, "Set the issue to execute queue.\n");
+ issueToExecuteQueue = i2e_ptr;
+}
+/*
+template <class Impl>
+void
+InstQueue<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr)
+{
+ DPRINTF(IQ, "Set the time buffer.\n");
+ timeBuffer = tb_ptr;
+
+ fromCommit = timeBuffer->getWire(-commitToIEWDelay);
+}
+
+template <class Impl>
+int
+InstQueue<Impl>::entryAmount(int num_threads)
+{
+ if (iqPolicy == Partitioned) {
+ return numEntries / num_threads;
+ } else {
+ return 0;
+ }
+}
+
+
+template <class Impl>
+void
+InstQueue<Impl>::resetEntries()
+{
+ if (iqPolicy != Dynamic || numThreads > 1) {
+ int active_threads = (*activeThreads).size();
+
+ list<unsigned>::iterator threads = (*activeThreads).begin();
+ list<unsigned>::iterator list_end = (*activeThreads).end();
+
+ while (threads != list_end) {
+ if (iqPolicy == Partitioned) {
+ maxEntries[*threads++] = numEntries / active_threads;
+ } else if(iqPolicy == Threshold && active_threads == 1) {
+ maxEntries[*threads++] = numEntries;
+ }
+ }
+ }
+}
+*/
+template <class Impl>
+unsigned
+InstQueue<Impl>::numFreeEntries()
+{
+ return freeEntries;
+}
+
+template <class Impl>
+unsigned
+InstQueue<Impl>::numFreeEntries(unsigned tid)
+{
+ return maxEntries[tid] - count[tid];
+}
+
+// Might want to do something more complex if it knows how many instructions
+// will be issued this cycle.
+template <class Impl>
+bool
+InstQueue<Impl>::isFull()
+{
+ if (freeEntries == 0) {
+ return(true);
+ } else {
+ return(false);
+ }
+}
+
+template <class Impl>
+bool
+InstQueue<Impl>::isFull(unsigned tid)
+{
+ if (numFreeEntries(tid) == 0) {
+ return(true);
+ } else {
+ return(false);
+ }
+}
+
+template <class Impl>
+bool
+InstQueue<Impl>::hasReadyInsts()
+{
+/*
+ if (!listOrder.empty()) {
+ return true;
+ }
+
+ for (int i = 0; i < Num_OpClasses; ++i) {
+ if (!readyInsts[i].empty()) {
+ return true;
+ }
+ }
+
+ return false;
+*/
+ return readyInsts.empty();
+}
+
+template <class Impl>
+void
+InstQueue<Impl>::insert(DynInstPtr &new_inst)
+{
+ // Make sure the instruction is valid
+ assert(new_inst);
+
+ DPRINTF(IQ, "Adding instruction PC %#x to the IQ.\n",
+ new_inst->readPC());
+
+ // Check if there are any free entries. Panic if there are none.
+ // Might want to have this return a fault in the future instead of
+ // panicing.
+ assert(freeEntries != 0);
+
+ instList[new_inst->threadNumber].push_back(new_inst);
+
+ // Decrease the number of free entries.
+ --freeEntries;
+
+ //Mark Instruction as in IQ
+// new_inst->setInIQ();
+/*
+ // Look through its source registers (physical regs), and mark any
+ // dependencies.
+ addToDependents(new_inst);
+
+ // Have this instruction set itself as the producer of its destination
+ // register(s).
+ createDependency(new_inst);
+*/
+ // If it's a memory instruction, add it to the memory dependency
+ // unit.
+// if (new_inst->isMemRef()) {
+// memDepUnit[new_inst->threadNumber].insert(new_inst);
+// } else {
+ // If the instruction is ready then add it to the ready list.
+ addIfReady(new_inst);
+// }
+
+ ++iqInstsAdded;
+
+
+ //Update Thread IQ Count
+ count[new_inst->threadNumber]++;
+
+ assert(freeEntries == (numEntries - countInsts()));
+}
+
+template <class Impl>
+void
+InstQueue<Impl>::insertNonSpec(DynInstPtr &new_inst)
+{
+ nonSpecInsts[new_inst->seqNum] = new_inst;
+
+ // @todo: Clean up this code; can do it by setting inst as unable
+ // to issue, then calling normal insert on the inst.
+
+ // Make sure the instruction is valid
+ assert(new_inst);
+
+ DPRINTF(IQ, "Adding instruction PC %#x to the IQ.\n",
+ new_inst->readPC());
+
+ // Check if there are any free entries. Panic if there are none.
+ // Might want to have this return a fault in the future instead of
+ // panicing.
+ assert(freeEntries != 0);
+
+ instList[new_inst->threadNumber].push_back(new_inst);
+
+ // Decrease the number of free entries.
+ --freeEntries;
+
+ //Mark Instruction as in IQ
+// new_inst->setInIQ();
+/*
+ // Have this instruction set itself as the producer of its destination
+ // register(s).
+ createDependency(new_inst);
+
+ // If it's a memory instruction, add it to the memory dependency
+ // unit.
+ if (new_inst->isMemRef()) {
+ memDepUnit[new_inst->threadNumber].insertNonSpec(new_inst);
+ }
+*/
+ ++iqNonSpecInstsAdded;
+
+ //Update Thread IQ Count
+ count[new_inst->threadNumber]++;
+
+ assert(freeEntries == (numEntries - countInsts()));
+}
+/*
+template <class Impl>
+void
+InstQueue<Impl>::advanceTail(DynInstPtr &inst)
+{
+ // Have this instruction set itself as the producer of its destination
+ // register(s).
+ createDependency(inst);
+}
+
+template <class Impl>
+void
+InstQueue<Impl>::addToOrderList(OpClass op_class)
+{
+ assert(!readyInsts[op_class].empty());
+
+ ListOrderEntry queue_entry;
+
+ queue_entry.queueType = op_class;
+
+ queue_entry.oldestInst = readyInsts[op_class].top()->seqNum;
+
+ ListOrderIt list_it = listOrder.begin();
+ ListOrderIt list_end_it = listOrder.end();
+
+ while (list_it != list_end_it) {
+ if ((*list_it).oldestInst > queue_entry.oldestInst) {
+ break;
+ }
+
+ list_it++;
+ }
+
+ readyIt[op_class] = listOrder.insert(list_it, queue_entry);
+ queueOnList[op_class] = true;
+}
+
+template <class Impl>
+void
+InstQueue<Impl>::moveToYoungerInst(ListOrderIt list_order_it)
+{
+ // Get iterator of next item on the list
+ // Delete the original iterator
+ // Determine if the next item is either the end of the list or younger
+ // than the new instruction. If so, then add in a new iterator right here.
+ // If not, then move along.
+ ListOrderEntry queue_entry;
+ OpClass op_class = (*list_order_it).queueType;
+ ListOrderIt next_it = list_order_it;
+
+ ++next_it;
+
+ queue_entry.queueType = op_class;
+ queue_entry.oldestInst = readyInsts[op_class].top()->seqNum;
+
+ while (next_it != listOrder.end() &&
+ (*next_it).oldestInst < queue_entry.oldestInst) {
+ ++next_it;
+ }
+
+ readyIt[op_class] = listOrder.insert(next_it, queue_entry);
+}
+
+template <class Impl>
+void
+InstQueue<Impl>::processFUCompletion(DynInstPtr &inst, int fu_idx)
+{
+ // The CPU could have been sleeping until this op completed (*extremely*
+ // long latency op). Wake it if it was. This may be overkill.
+ iewStage->wakeCPU();
+
+ fuPool->freeUnit(fu_idx);
+
+ int &size = issueToExecuteQueue->access(0)->size;
+
+ issueToExecuteQueue->access(0)->insts[size++] = inst;
+}
+*/
+// @todo: Figure out a better way to remove the squashed items from the
+// lists. Checking the top item of each list to see if it's squashed
+// wastes time and forces jumps.
+template <class Impl>
+void
+InstQueue<Impl>::scheduleReadyInsts()
+{
+ DPRINTF(IQ, "Attempting to schedule ready instructions from "
+ "the IQ.\n");
+
+// IssueStruct *i2e_info = issueToExecuteQueue->access(0);
+/*
+ // Will need to reorder the list if either a queue is not on the list,
+ // or it has an older instruction than last time.
+ for (int i = 0; i < Num_OpClasses; ++i) {
+ if (!readyInsts[i].empty()) {
+ if (!queueOnList[i]) {
+ addToOrderList(OpClass(i));
+ } else if (readyInsts[i].top()->seqNum <
+ (*readyIt[i]).oldestInst) {
+ listOrder.erase(readyIt[i]);
+ addToOrderList(OpClass(i));
+ }
+ }
+ }
+
+ // Have iterator to head of the list
+ // While I haven't exceeded bandwidth or reached the end of the list,
+ // Try to get a FU that can do what this op needs.
+ // If successful, change the oldestInst to the new top of the list, put
+ // the queue in the proper place in the list.
+ // Increment the iterator.
+ // This will avoid trying to schedule a certain op class if there are no
+ // FUs that handle it.
+ ListOrderIt order_it = listOrder.begin();
+ ListOrderIt order_end_it = listOrder.end();
+ int total_issued = 0;
+ int exec_queue_slot = i2e_info->size;
+
+ while (exec_queue_slot < totalWidth && order_it != order_end_it) {
+ OpClass op_class = (*order_it).queueType;
+
+ assert(!readyInsts[op_class].empty());
+
+ DynInstPtr issuing_inst = readyInsts[op_class].top();
+
+ assert(issuing_inst->seqNum == (*order_it).oldestInst);
+
+ if (issuing_inst->isSquashed()) {
+ readyInsts[op_class].pop();
+
+ if (!readyInsts[op_class].empty()) {
+ moveToYoungerInst(order_it);
+ } else {
+ readyIt[op_class] = listOrder.end();
+ queueOnList[op_class] = false;
+ }
+
+ listOrder.erase(order_it++);
+
+ ++iqSquashedInstsIssued;
+
+ continue;
+ }
+
+ int idx = fuPool->getUnit(op_class);
+
+ if (idx != -1) {
+ int op_latency = fuPool->getOpLatency(op_class);
+
+ if (op_latency == 1) {
+ i2e_info->insts[exec_queue_slot++] = issuing_inst;
+ i2e_info->size++;
+
+ // Add the FU onto the list of FU's to be freed next cycle.
+ fuPool->freeUnit(idx);
+ } else {
+ int issue_latency = fuPool->getIssueLatency(op_class);
+
+ if (issue_latency > 1) {
+ // Generate completion event for the FU
+ FUCompletion *execution = new FUCompletion(issuing_inst,
+ idx, this);
+
+ execution->schedule(curTick + issue_latency - 1);
+ } else {
+ i2e_info->insts[exec_queue_slot++] = issuing_inst;
+ i2e_info->size++;
+
+ // Add the FU onto the list of FU's to be freed next cycle.
+ fuPool->freeUnit(idx);
+ }
+ }
+
+ DPRINTF(IQ, "Thread %i: Issuing instruction PC %#x "
+ "[sn:%lli]\n",
+ issuing_inst->threadNumber, issuing_inst->readPC(),
+ issuing_inst->seqNum);
+
+ readyInsts[op_class].pop();
+
+ if (!readyInsts[op_class].empty()) {
+ moveToYoungerInst(order_it);
+ } else {
+ readyIt[op_class] = listOrder.end();
+ queueOnList[op_class] = false;
+ }
+
+ issuing_inst->setIssued();
+ ++total_issued;
+
+ if (!issuing_inst->isMemRef()) {
+ // Memory instructions can not be freed from the IQ until they
+ // complete.
+ ++freeEntries;
+ count[issuing_inst->threadNumber]--;
+ issuing_inst->removeInIQ();
+ } else {
+ memDepUnit[issuing_inst->threadNumber].issue(issuing_inst);
+ }
+
+ listOrder.erase(order_it++);
+ } else {
+ ++order_it;
+ }
+ }
+
+ if (total_issued) {
+ cpu->activityThisCycle();
+ } else {
+ DPRINTF(IQ, "Not able to schedule any instructions.\n");
+ }
+*/
+}
+
+template <class Impl>
+void
+InstQueue<Impl>::scheduleNonSpec(const InstSeqNum &inst)
+{
+ DPRINTF(IQ, "Marking nonspeculative instruction with sequence "
+ "number %i as ready to execute.\n", inst);
+
+ NonSpecMapIt inst_it = nonSpecInsts.find(inst);
+
+ assert(inst_it != nonSpecInsts.end());
+
+// unsigned tid = (*inst_it).second->threadNumber;
+
+ // Mark this instruction as ready to issue.
+ (*inst_it).second->setCanIssue();
+
+ // Now schedule the instruction.
+// if (!(*inst_it).second->isMemRef()) {
+ addIfReady((*inst_it).second);
+// } else {
+// memDepUnit[tid].nonSpecInstReady((*inst_it).second);
+// }
+
+ nonSpecInsts.erase(inst_it);
+}
+
+template <class Impl>
+void
+InstQueue<Impl>::commit(const InstSeqNum &inst, unsigned tid)
+{
+ /*Need to go through each thread??*/
+ DPRINTF(IQ, "[tid:%i]: Committing instructions older than [sn:%i]\n",
+ tid,inst);
+
+ ListIt iq_it = instList[tid].begin();
+
+ while (iq_it != instList[tid].end() &&
+ (*iq_it)->seqNum <= inst) {
+ ++iq_it;
+ instList[tid].pop_front();
+ }
+
+ assert(freeEntries == (numEntries - countInsts()));
+}
+
+template <class Impl>
+void
+InstQueue<Impl>::wakeDependents(DynInstPtr &completed_inst)
+{
+ DPRINTF(IQ, "Waking dependents of completed instruction.\n");
+ // Look at the physical destination register of the DynInst
+ // and look it up on the dependency graph. Then mark as ready
+ // any instructions within the instruction queue.
+/*
+ DependencyEntry *curr;
+ DependencyEntry *prev;
+*/
+ // Tell the memory dependence unit to wake any dependents on this
+ // instruction if it is a memory instruction. Also complete the memory
+ // instruction at this point since we know it executed fine.
+ // @todo: Might want to rename "completeMemInst" to
+ // something that indicates that it won't need to be replayed, and call
+ // this earlier. Might not be a big deal.
+ if (completed_inst->isMemRef()) {
+// memDepUnit[completed_inst->threadNumber].wakeDependents(completed_inst);
+ completeMemInst(completed_inst);
+ }
+ completed_inst->wakeDependents();
+/*
+ for (int dest_reg_idx = 0;
+ dest_reg_idx < completed_inst->numDestRegs();
+ dest_reg_idx++)
+ {
+ PhysRegIndex dest_reg =
+ completed_inst->renamedDestRegIdx(dest_reg_idx);
+
+ // Special case of uniq or control registers. They are not
+ // handled by the IQ and thus have no dependency graph entry.
+ // @todo Figure out a cleaner way to handle this.
+ if (dest_reg >= numPhysRegs) {
+ continue;
+ }
+
+ DPRINTF(IQ, "Waking any dependents on register %i.\n",
+ (int) dest_reg);
+
+ //Maybe abstract this part into a function.
+ //Go through the dependency chain, marking the registers as ready
+ //within the waiting instructions.
+
+ curr = dependGraph[dest_reg].next;
+
+ while (curr) {
+ DPRINTF(IQ, "Waking up a dependent instruction, PC%#x.\n",
+ curr->inst->readPC());
+
+ // Might want to give more information to the instruction
+ // so that it knows which of its source registers is ready.
+ // However that would mean that the dependency graph entries
+ // would need to hold the src_reg_idx.
+ curr->inst->markSrcRegReady();
+
+ addIfReady(curr->inst);
+
+ DependencyEntry::mem_alloc_counter--;
+
+ prev = curr;
+ curr = prev->next;
+ prev->inst = NULL;
+
+ delete prev;
+ }
+
+ // Reset the head node now that all of its dependents have been woken
+ // up.
+ dependGraph[dest_reg].next = NULL;
+ dependGraph[dest_reg].inst = NULL;
+
+ // Mark the scoreboard as having that register ready.
+ regScoreboard[dest_reg] = true;
+ }
+*/
+}
+
+template <class Impl>
+void
+InstQueue<Impl>::addReadyMemInst(DynInstPtr &ready_inst)
+{
+ OpClass op_class = ready_inst->opClass();
+
+ readyInsts.push(ready_inst);
+
+ DPRINTF(IQ, "Instruction is ready to issue, putting it onto "
+ "the ready list, PC %#x opclass:%i [sn:%lli].\n",
+ ready_inst->readPC(), op_class, ready_inst->seqNum);
+}
+/*
+template <class Impl>
+void
+InstQueue<Impl>::rescheduleMemInst(DynInstPtr &resched_inst)
+{
+ memDepUnit[resched_inst->threadNumber].reschedule(resched_inst);
+}
+
+template <class Impl>
+void
+InstQueue<Impl>::replayMemInst(DynInstPtr &replay_inst)
+{
+ memDepUnit[replay_inst->threadNumber].replay(replay_inst);
+}
+*/
+template <class Impl>
+void
+InstQueue<Impl>::completeMemInst(DynInstPtr &completed_inst)
+{
+ int tid = completed_inst->threadNumber;
+
+ DPRINTF(IQ, "Completing mem instruction PC:%#x [sn:%lli]\n",
+ completed_inst->readPC(), completed_inst->seqNum);
+
+ ++freeEntries;
+
+// completed_inst->memOpDone = true;
+
+// memDepUnit[tid].completed(completed_inst);
+
+ count[tid]--;
+}
+/*
+template <class Impl>
+void
+InstQueue<Impl>::violation(DynInstPtr &store,
+ DynInstPtr &faulting_load)
+{
+ memDepUnit[store->threadNumber].violation(store, faulting_load);
+}
+*/
+template <class Impl>
+void
+InstQueue<Impl>::squash(unsigned tid)
+{
+ DPRINTF(IQ, "[tid:%i]: Starting to squash instructions in "
+ "the IQ.\n", tid);
+
+ // Read instruction sequence number of last instruction out of the
+ // time buffer.
+// squashedSeqNum[tid] = fromCommit->commitInfo[tid].doneSeqNum;
+
+ // Setup the squash iterator to point to the tail.
+ squashIt[tid] = instList[tid].end();
+ --squashIt[tid];
+
+ // Call doSquash if there are insts in the IQ
+ if (count[tid] > 0) {
+ doSquash(tid);
+ }
+
+ // Also tell the memory dependence unit to squash.
+// memDepUnit[tid].squash(squashedSeqNum[tid], tid);
+}
+
+template <class Impl>
+void
+InstQueue<Impl>::doSquash(unsigned tid)
+{
+ // Make sure the squashed sequence number is valid.
+ assert(squashedSeqNum[tid] != 0);
+
+ DPRINTF(IQ, "[tid:%i]: Squashing until sequence number %i!\n",
+ tid, squashedSeqNum[tid]);
+
+ // Squash any instructions younger than the squashed sequence number
+ // given.
+ while (squashIt[tid] != instList[tid].end() &&
+ (*squashIt[tid])->seqNum > squashedSeqNum[tid]) {
+
+ DynInstPtr squashed_inst = (*squashIt[tid]);
+
+ // Only handle the instruction if it actually is in the IQ and
+ // hasn't already been squashed in the IQ.
+ if (squashed_inst->threadNumber != tid ||
+ squashed_inst->isSquashedInIQ()) {
+ --squashIt[tid];
+ continue;
+ }
+
+ if (!squashed_inst->isIssued() ||
+ (squashed_inst->isMemRef()/* &&
+ !squashed_inst->memOpDone*/)) {
+
+ // Remove the instruction from the dependency list.
+ if (!squashed_inst->isNonSpeculative()) {
+/*
+ for (int src_reg_idx = 0;
+ src_reg_idx < squashed_inst->numSrcRegs();
+ src_reg_idx++)
+ {
+ PhysRegIndex src_reg =
+ squashed_inst->renamedSrcRegIdx(src_reg_idx);
+
+ // Only remove it from the dependency graph if it was
+ // placed there in the first place.
+ // HACK: This assumes that instructions woken up from the
+ // dependency chain aren't informed that a specific src
+ // register has become ready. This may not always be true
+ // in the future.
+ // Instead of doing a linked list traversal, we can just
+ // remove these squashed instructions either at issue time,
+ // or when the register is overwritten. The only downside
+ // to this is it leaves more room for error.
+
+ if (!squashed_inst->isReadySrcRegIdx(src_reg_idx) &&
+ src_reg < numPhysRegs) {
+ dependGraph[src_reg].remove(squashed_inst);
+ }
+
+
+ ++iqSquashedOperandsExamined;
+ }
+*/
+ // Might want to remove producers as well.
+ } else {
+ nonSpecInsts[squashed_inst->seqNum] = NULL;
+
+ nonSpecInsts.erase(squashed_inst->seqNum);
+
+ ++iqSquashedNonSpecRemoved;
+ }
+
+ // Might want to also clear out the head of the dependency graph.
+
+ // Mark it as squashed within the IQ.
+ squashed_inst->setSquashedInIQ();
+
+ // @todo: Remove this hack where several statuses are set so the
+ // inst will flow through the rest of the pipeline.
+ squashed_inst->setIssued();
+ squashed_inst->setCanCommit();
+// squashed_inst->removeInIQ();
+
+ //Update Thread IQ Count
+ count[squashed_inst->threadNumber]--;
+
+ ++freeEntries;
+
+ if (numThreads > 1) {
+ DPRINTF(IQ, "[tid:%i]: Instruction PC %#x squashed.\n",
+ tid, squashed_inst->readPC());
+ } else {
+ DPRINTF(IQ, "Instruction PC %#x squashed.\n",
+ squashed_inst->readPC());
+ }
+ }
+
+ --squashIt[tid];
+ ++iqSquashedInstsExamined;
+ }
+}
+/*
+template <class Impl>
+void
+InstQueue<Impl>::DependencyEntry::insert(DynInstPtr &new_inst)
+{
+ //Add this new, dependent instruction at the head of the dependency
+ //chain.
+
+ // First create the entry that will be added to the head of the
+ // dependency chain.
+ DependencyEntry *new_entry = new DependencyEntry;
+ new_entry->next = this->next;
+ new_entry->inst = new_inst;
+
+ // Then actually add it to the chain.
+ this->next = new_entry;
+
+ ++mem_alloc_counter;
+}
+
+template <class Impl>
+void
+InstQueue<Impl>::DependencyEntry::remove(DynInstPtr &inst_to_remove)
+{
+ DependencyEntry *prev = this;
+ DependencyEntry *curr = this->next;
+
+ // Make sure curr isn't NULL. Because this instruction is being
+ // removed from a dependency list, it must have been placed there at
+ // an earlier time. The dependency chain should not be empty,
+ // unless the instruction dependent upon it is already ready.
+ if (curr == NULL) {
+ return;
+ }
+
+ // Find the instruction to remove within the dependency linked list.
+ while (curr->inst != inst_to_remove) {
+ prev = curr;
+ curr = curr->next;
+
+ assert(curr != NULL);
+ }
+
+ // Now remove this instruction from the list.
+ prev->next = curr->next;
+
+ --mem_alloc_counter;
+
+ // Could push this off to the destructor of DependencyEntry
+ curr->inst = NULL;
+
+ delete curr;
+}
+
+template <class Impl>
+bool
+InstQueue<Impl>::addToDependents(DynInstPtr &new_inst)
+{
+ // Loop through the instruction's source registers, adding
+ // them to the dependency list if they are not ready.
+ int8_t total_src_regs = new_inst->numSrcRegs();
+ bool return_val = false;
+
+ for (int src_reg_idx = 0;
+ src_reg_idx < total_src_regs;
+ src_reg_idx++)
+ {
+ // Only add it to the dependency graph if it's not ready.
+ if (!new_inst->isReadySrcRegIdx(src_reg_idx)) {
+ PhysRegIndex src_reg = new_inst->renamedSrcRegIdx(src_reg_idx);
+
+ // Check the IQ's scoreboard to make sure the register
+ // hasn't become ready while the instruction was in flight
+ // between stages. Only if it really isn't ready should
+ // it be added to the dependency graph.
+ if (src_reg >= numPhysRegs) {
+ continue;
+ } else if (regScoreboard[src_reg] == false) {
+ DPRINTF(IQ, "Instruction PC %#x has src reg %i that "
+ "is being added to the dependency chain.\n",
+ new_inst->readPC(), src_reg);
+
+ dependGraph[src_reg].insert(new_inst);
+
+ // Change the return value to indicate that something
+ // was added to the dependency graph.
+ return_val = true;
+ } else {
+ DPRINTF(IQ, "Instruction PC %#x has src reg %i that "
+ "became ready before it reached the IQ.\n",
+ new_inst->readPC(), src_reg);
+ // Mark a register ready within the instruction.
+ new_inst->markSrcRegReady();
+ }
+ }
+ }
+
+ return return_val;
+}
+
+template <class Impl>
+void
+InstQueue<Impl>::createDependency(DynInstPtr &new_inst)
+{
+ //Actually nothing really needs to be marked when an
+ //instruction becomes the producer of a register's value,
+ //but for convenience a ptr to the producing instruction will
+ //be placed in the head node of the dependency links.
+ int8_t total_dest_regs = new_inst->numDestRegs();
+
+ for (int dest_reg_idx = 0;
+ dest_reg_idx < total_dest_regs;
+ dest_reg_idx++)
+ {
+ PhysRegIndex dest_reg = new_inst->renamedDestRegIdx(dest_reg_idx);
+
+ // Instructions that use the misc regs will have a reg number
+ // higher than the normal physical registers. In this case these
+ // registers are not renamed, and there is no need to track
+ // dependencies as these instructions must be executed at commit.
+ if (dest_reg >= numPhysRegs) {
+ continue;
+ }
+
+ if (dependGraph[dest_reg].next) {
+ dumpDependGraph();
+ panic("Dependency graph %i not empty!", dest_reg);
+ }
+
+ dependGraph[dest_reg].inst = new_inst;
+
+ // Mark the scoreboard to say it's not yet ready.
+ regScoreboard[dest_reg] = false;
+ }
+}
+*/
+template <class Impl>
+void
+InstQueue<Impl>::addIfReady(DynInstPtr &inst)
+{
+ //If the instruction now has all of its source registers
+ // available, then add it to the list of ready instructions.
+ if (inst->readyToIssue()) {
+
+ //Add the instruction to the proper ready list.
+ if (inst->isMemRef()) {
+
+ DPRINTF(IQ, "Checking if memory instruction can issue.\n");
+
+ // Message to the mem dependence unit that this instruction has
+ // its registers ready.
+
+// memDepUnit[inst->threadNumber].regsReady(inst);
+
+ return;
+ }
+
+ OpClass op_class = inst->opClass();
+
+ DPRINTF(IQ, "Instruction is ready to issue, putting it onto "
+ "the ready list, PC %#x opclass:%i [sn:%lli].\n",
+ inst->readPC(), op_class, inst->seqNum);
+
+ readyInsts.push(inst);
+ }
+}
+
+template <class Impl>
+int
+InstQueue<Impl>::countInsts()
+{
+ //ksewell:This works but definitely could use a cleaner write
+ //with a more intuitive way of counting. Right now it's
+ //just brute force ....
+
+#if 0
+ int total_insts = 0;
+
+ for (int i = 0; i < numThreads; ++i) {
+ ListIt count_it = instList[i].begin();
+
+ while (count_it != instList[i].end()) {
+ if (!(*count_it)->isSquashed() && !(*count_it)->isSquashedInIQ()) {
+ if (!(*count_it)->isIssued()) {
+ ++total_insts;
+ } else if ((*count_it)->isMemRef() &&
+ !(*count_it)->memOpDone) {
+ // Loads that have not been marked as executed still count
+ // towards the total instructions.
+ ++total_insts;
+ }
+ }
+
+ ++count_it;
+ }
+ }
+
+ return total_insts;
+#else
+ return numEntries - freeEntries;
+#endif
+}
+/*
+template <class Impl>
+void
+InstQueue<Impl>::dumpDependGraph()
+{
+ DependencyEntry *curr;
+
+ for (int i = 0; i < numPhysRegs; ++i)
+ {
+ curr = &dependGraph[i];
+
+ if (curr->inst) {
+ cprintf("dependGraph[%i]: producer: %#x [sn:%lli] consumer: ",
+ i, curr->inst->readPC(), curr->inst->seqNum);
+ } else {
+ cprintf("dependGraph[%i]: No producer. consumer: ", i);
+ }
+
+ while (curr->next != NULL) {
+ curr = curr->next;
+
+ cprintf("%#x [sn:%lli] ",
+ curr->inst->readPC(), curr->inst->seqNum);
+ }
+
+ cprintf("\n");
+ }
+}
+*/
+template <class Impl>
+void
+InstQueue<Impl>::dumpLists()
+{
+ for (int i = 0; i < Num_OpClasses; ++i) {
+ cprintf("Ready list %i size: %i\n", i, readyInsts.size());
+
+ cprintf("\n");
+ }
+
+ cprintf("Non speculative list size: %i\n", nonSpecInsts.size());
+
+ NonSpecMapIt non_spec_it = nonSpecInsts.begin();
+ NonSpecMapIt non_spec_end_it = nonSpecInsts.end();
+
+ cprintf("Non speculative list: ");
+
+ while (non_spec_it != non_spec_end_it) {
+ cprintf("%#x [sn:%lli]", (*non_spec_it).second->readPC(),
+ (*non_spec_it).second->seqNum);
+ ++non_spec_it;
+ }
+
+ cprintf("\n");
+/*
+ ListOrderIt list_order_it = listOrder.begin();
+ ListOrderIt list_order_end_it = listOrder.end();
+ int i = 1;
+
+ cprintf("List order: ");
+
+ while (list_order_it != list_order_end_it) {
+ cprintf("%i OpClass:%i [sn:%lli] ", i, (*list_order_it).queueType,
+ (*list_order_it).oldestInst);
+
+ ++list_order_it;
+ ++i;
+ }
+*/
+ cprintf("\n");
+}
+
+
+template <class Impl>
+void
+InstQueue<Impl>::dumpInsts()
+{
+ for (int i = 0; i < numThreads; ++i) {
+// int num = 0;
+// int valid_num = 0;
+/*
+ ListIt inst_list_it = instList[i].begin();
+
+ while (inst_list_it != instList[i].end())
+ {
+ cprintf("Instruction:%i\n",
+ num);
+ if (!(*inst_list_it)->isSquashed()) {
+ if (!(*inst_list_it)->isIssued()) {
+ ++valid_num;
+ cprintf("Count:%i\n", valid_num);
+ } else if ((*inst_list_it)->isMemRef() &&
+ !(*inst_list_it)->memOpDone) {
+ // Loads that have not been marked as executed still count
+ // towards the total instructions.
+ ++valid_num;
+ cprintf("Count:%i\n", valid_num);
+ }
+ }
+
+ cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
+ "Issued:%i\nSquashed:%i\n",
+ (*inst_list_it)->readPC(),
+ (*inst_list_it)->seqNum,
+ (*inst_list_it)->threadNumber,
+ (*inst_list_it)->isIssued(),
+ (*inst_list_it)->isSquashed());
+
+ if ((*inst_list_it)->isMemRef()) {
+ cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
+ }
+
+ cprintf("\n");
+
+ inst_list_it++;
+ ++num;
+ }
+*/
+ }
+}
--- /dev/null
+/*
+ * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "cpu/ozone/ozone_impl.hh"
+#include "cpu/ozone/lsq_unit_impl.hh"
+
+// Force the instantiation of LDSTQ for all the implementations we care about.
+template class OzoneLSQ<OzoneImpl>;
+
--- /dev/null
+/*
+ * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __CPU_OZONE_LSQ_UNIT_HH__
+#define __CPU_OZONE_LSQ_UNIT_HH__
+
+#include <map>
+#include <queue>
+#include <algorithm>
+
+#include "arch/faults.hh"
+#include "arch/isa_traits.hh"
+#include "config/full_system.hh"
+#include "base/hashmap.hh"
+#include "cpu/inst_seq.hh"
+#include "mem/mem_interface.hh"
+//#include "mem/page_table.hh"
+#include "sim/sim_object.hh"
+
+class PageTable;
+
+/**
+ * Class that implements the actual LQ and SQ for each specific thread.
+ * Both are circular queues; load entries are freed upon committing, while
+ * store entries are freed once they writeback. The LSQUnit tracks if there
+ * are memory ordering violations, and also detects partial load to store
+ * forwarding cases (a store only has part of a load's data) that requires
+ * the load to wait until the store writes back. In the former case it
+ * holds onto the instruction until the dependence unit looks at it, and
+ * in the latter it stalls the LSQ until the store writes back. At that
+ * point the load is replayed.
+ */
+template <class Impl>
+class OzoneLSQ {
+ public:
+ typedef typename Impl::Params Params;
+ typedef typename Impl::FullCPU FullCPU;
+ typedef typename Impl::BackEnd BackEnd;
+ typedef typename Impl::DynInstPtr DynInstPtr;
+ typedef typename Impl::IssueStruct IssueStruct;
+
+ typedef TheISA::IntReg IntReg;
+
+ typedef typename std::map<InstSeqNum, DynInstPtr>::iterator LdMapIt;
+
+ private:
+ class StoreCompletionEvent : public Event {
+ public:
+ /** Constructs a store completion event. */
+ StoreCompletionEvent(int store_idx, Event *wb_event, OzoneLSQ *lsq_ptr);
+
+ /** Processes the store completion event. */
+ void process();
+
+ /** Returns the description of this event. */
+ const char *description();
+
+ private:
+ /** The store index of the store being written back. */
+ int storeIdx;
+ /** The writeback event for the store. Needed for store
+ * conditionals.
+ */
+ Event *wbEvent;
+ /** The pointer to the LSQ unit that issued the store. */
+ OzoneLSQ<Impl> *lsqPtr;
+ };
+
+ friend class StoreCompletionEvent;
+
+ public:
+ /** Constructs an LSQ unit. init() must be called prior to use. */
+ OzoneLSQ();
+
+ /** Initializes the LSQ unit with the specified number of entries. */
+ void init(Params *params, unsigned maxLQEntries,
+ unsigned maxSQEntries, unsigned id);
+
+ /** Returns the name of the LSQ unit. */
+ std::string name() const;
+
+ /** Sets the CPU pointer. */
+ void setCPU(FullCPU *cpu_ptr)
+ { cpu = cpu_ptr; }
+
+ /** Sets the back-end stage pointer. */
+ void setBE(BackEnd *be_ptr)
+ { be = be_ptr; }
+
+ /** Sets the page table pointer. */
+ void setPageTable(PageTable *pt_ptr);
+
+ /** Ticks the LSQ unit, which in this case only resets the number of
+ * used cache ports.
+ * @todo: Move the number of used ports up to the LSQ level so it can
+ * be shared by all LSQ units.
+ */
+ void tick() { usedPorts = 0; }
+
+ /** Inserts an instruction. */
+ void insert(DynInstPtr &inst);
+ /** Inserts a load instruction. */
+ void insertLoad(DynInstPtr &load_inst);
+ /** Inserts a store instruction. */
+ void insertStore(DynInstPtr &store_inst);
+
+ /** Executes a load instruction. */
+ Fault executeLoad(DynInstPtr &inst);
+
+ Fault executeLoad(int lq_idx);
+ /** Executes a store instruction. */
+ Fault executeStore(DynInstPtr &inst);
+
+ /** Commits the head load. */
+ void commitLoad();
+ /** Commits a specific load, given by the sequence number. */
+ void commitLoad(InstSeqNum &inst);
+ /** Commits loads older than a specific sequence number. */
+ void commitLoads(InstSeqNum &youngest_inst);
+
+ /** Commits stores older than a specific sequence number. */
+ void commitStores(InstSeqNum &youngest_inst);
+
+ /** Writes back stores. */
+ void writebackStores();
+
+ // @todo: Include stats in the LSQ unit.
+ //void regStats();
+
+ /** Clears all the entries in the LQ. */
+ void clearLQ();
+
+ /** Clears all the entries in the SQ. */
+ void clearSQ();
+
+ /** Resizes the LQ to a given size. */
+ void resizeLQ(unsigned size);
+
+ /** Resizes the SQ to a given size. */
+ void resizeSQ(unsigned size);
+
+ /** Squashes all instructions younger than a specific sequence number. */
+ void squash(const InstSeqNum &squashed_num);
+
+ /** Returns if there is a memory ordering violation. Value is reset upon
+ * call to getMemDepViolator().
+ */
+ bool violation() { return memDepViolator; }
+
+ /** Returns the memory ordering violator. */
+ DynInstPtr getMemDepViolator();
+
+ /** Returns if a load became blocked due to the memory system. It clears
+ * the bool's value upon this being called.
+ */
+ inline bool loadBlocked();
+
+ /** Returns the number of free entries (min of free LQ and SQ entries). */
+ unsigned numFreeEntries();
+
+ /** Returns the number of loads ready to execute. */
+ int numLoadsReady();
+
+ /** Returns the number of loads in the LQ. */
+ int numLoads() { return loads; }
+
+ /** Returns the number of stores in the SQ. */
+ int numStores() { return stores; }
+
+ /** Returns if either the LQ or SQ is full. */
+ bool isFull() { return lqFull() || sqFull(); }
+
+ /** Returns if the LQ is full. */
+ bool lqFull() { return loads >= (LQEntries - 1); }
+
+ /** Returns if the SQ is full. */
+ bool sqFull() { return stores >= (SQEntries - 1); }
+
+ /** Debugging function to dump instructions in the LSQ. */
+ void dumpInsts();
+
+ /** Returns the number of instructions in the LSQ. */
+ unsigned getCount() { return loads + stores; }
+
+ /** Returns if there are any stores to writeback. */
+ bool hasStoresToWB() { return storesToWB; }
+
+ /** Returns the number of stores to writeback. */
+ int numStoresToWB() { return storesToWB; }
+
+ /** Returns if the LSQ unit will writeback on this cycle. */
+ bool willWB() { return storeQueue[storeWBIdx].canWB &&
+ !storeQueue[storeWBIdx].completed &&
+ !dcacheInterface->isBlocked(); }
+
+ private:
+ /** Completes the store at the specified index. */
+ void completeStore(int store_idx);
+
+ /** Increments the given store index (circular queue). */
+ inline void incrStIdx(int &store_idx);
+ /** Decrements the given store index (circular queue). */
+ inline void decrStIdx(int &store_idx);
+ /** Increments the given load index (circular queue). */
+ inline void incrLdIdx(int &load_idx);
+ /** Decrements the given load index (circular queue). */
+ inline void decrLdIdx(int &load_idx);
+
+ private:
+ /** Pointer to the CPU. */
+ FullCPU *cpu;
+
+ /** Pointer to the back-end stage. */
+ BackEnd *be;
+
+ /** Pointer to the D-cache. */
+ MemInterface *dcacheInterface;
+
+ /** Pointer to the page table. */
+ PageTable *pTable;
+
+ public:
+ struct SQEntry {
+ /** Constructs an empty store queue entry. */
+ SQEntry()
+ : inst(NULL), req(NULL), size(0), data(0),
+ canWB(0), committed(0), completed(0)
+ { }
+
+ /** Constructs a store queue entry for a given instruction. */
+ SQEntry(DynInstPtr &_inst)
+ : inst(_inst), req(NULL), size(0), data(0),
+ canWB(0), committed(0), completed(0)
+ { }
+
+ /** The store instruction. */
+ DynInstPtr inst;
+ /** The memory request for the store. */
+ MemReqPtr req;
+ /** The size of the store. */
+ int size;
+ /** The store data. */
+ IntReg data;
+ /** Whether or not the store can writeback. */
+ bool canWB;
+ /** Whether or not the store is committed. */
+ bool committed;
+ /** Whether or not the store is completed. */
+ bool completed;
+ };
+
+ enum Status {
+ Running,
+ Idle,
+ DcacheMissStall,
+ DcacheMissSwitch
+ };
+
+ private:
+ /** The OzoneLSQ thread id. */
+ unsigned lsqID;
+
+ /** The status of the LSQ unit. */
+ Status _status;
+
+ /** The store queue. */
+ std::vector<SQEntry> storeQueue;
+
+ /** The load queue. */
+ std::vector<DynInstPtr> loadQueue;
+
+ // Consider making these 16 bits
+ /** The number of LQ entries. */
+ unsigned LQEntries;
+ /** The number of SQ entries. */
+ unsigned SQEntries;
+
+ /** The number of load instructions in the LQ. */
+ int loads;
+ /** The number of store instructions in the SQ (excludes those waiting to
+ * writeback).
+ */
+ int stores;
+ /** The number of store instructions in the SQ waiting to writeback. */
+ int storesToWB;
+
+ /** The index of the head instruction in the LQ. */
+ int loadHead;
+ /** The index of the tail instruction in the LQ. */
+ int loadTail;
+
+ /** The index of the head instruction in the SQ. */
+ int storeHead;
+ /** The index of the first instruction that is ready to be written back,
+ * and has not yet been written back.
+ */
+ int storeWBIdx;
+ /** The index of the tail instruction in the SQ. */
+ int storeTail;
+
+ /// @todo Consider moving to a more advanced model with write vs read ports
+ /** The number of cache ports available each cycle. */
+ int cachePorts;
+
+ /** The number of used cache ports in this cycle. */
+ int usedPorts;
+
+ //list<InstSeqNum> mshrSeqNums;
+
+ //Stats::Scalar<> dcacheStallCycles;
+ Counter lastDcacheStall;
+
+ /** Wire to read information from the issue stage time queue. */
+ typename TimeBuffer<IssueStruct>::wire fromIssue;
+
+ // Make these per thread?
+ /** Whether or not the LSQ is stalled. */
+ bool stalled;
+ /** The store that causes the stall due to partial store to load
+ * forwarding.
+ */
+ InstSeqNum stallingStoreIsn;
+ /** The index of the above store. */
+ int stallingLoadIdx;
+
+ /** Whether or not a load is blocked due to the memory system. It is
+ * cleared when this value is checked via loadBlocked().
+ */
+ bool isLoadBlocked;
+
+ /** The oldest faulting load instruction. */
+ DynInstPtr loadFaultInst;
+ /** The oldest faulting store instruction. */
+ DynInstPtr storeFaultInst;
+
+ /** The oldest load that caused a memory ordering violation. */
+ DynInstPtr memDepViolator;
+
+ // Will also need how many read/write ports the Dcache has. Or keep track
+ // of that in stage that is one level up, and only call executeLoad/Store
+ // the appropriate number of times.
+
+ public:
+ /** Executes the load at the given index. */
+ template <class T>
+ Fault read(MemReqPtr &req, T &data, int load_idx);
+
+ /** Executes the store at the given index. */
+ template <class T>
+ Fault write(MemReqPtr &req, T &data, int store_idx);
+
+ /** Returns the index of the head load instruction. */
+ int getLoadHead() { return loadHead; }
+ /** Returns the sequence number of the head load instruction. */
+ InstSeqNum getLoadHeadSeqNum()
+ {
+ if (loadQueue[loadHead]) {
+ return loadQueue[loadHead]->seqNum;
+ } else {
+ return 0;
+ }
+
+ }
+
+ /** Returns the index of the head store instruction. */
+ int getStoreHead() { return storeHead; }
+ /** Returns the sequence number of the head store instruction. */
+ InstSeqNum getStoreHeadSeqNum()
+ {
+ if (storeQueue[storeHead].inst) {
+ return storeQueue[storeHead].inst->seqNum;
+ } else {
+ return 0;
+ }
+
+ }
+
+ /** Returns whether or not the LSQ unit is stalled. */
+ bool isStalled() { return stalled; }
+};
+
+template <class Impl>
+template <class T>
+Fault
+OzoneLSQ<Impl>::read(MemReqPtr &req, T &data, int load_idx)
+{
+ //Depending on issue2execute delay a squashed load could
+ //execute if it is found to be squashed in the same
+ //cycle it is scheduled to execute
+ assert(loadQueue[load_idx]);
+
+ if (loadQueue[load_idx]->isExecuted()) {
+ panic("Should not reach this point with split ops!");
+
+ memcpy(&data,req->data,req->size);
+
+ return NoFault;
+ }
+
+ // Make sure this isn't an uncacheable access
+ // A bit of a hackish way to get uncached accesses to work only if they're
+ // at the head of the LSQ and are ready to commit (at the head of the ROB
+ // too).
+ // @todo: Fix uncached accesses.
+ if (req->flags & UNCACHEABLE &&
+ (load_idx != loadHead || !loadQueue[load_idx]->readyToCommit())) {
+
+ return TheISA::genMachineCheckFault();
+ }
+
+ // Check the SQ for any previous stores that might lead to forwarding
+ int store_idx = loadQueue[load_idx]->sqIdx;
+
+ int store_size = 0;
+
+ DPRINTF(OzoneLSQ, "Read called, load idx: %i, store idx: %i, "
+ "storeHead: %i addr: %#x\n",
+ load_idx, store_idx, storeHead, req->paddr);
+
+ while (store_idx != -1) {
+ // End once we've reached the top of the LSQ
+ if (store_idx == storeWBIdx) {
+ break;
+ }
+
+ // Move the index to one younger
+ if (--store_idx < 0)
+ store_idx += SQEntries;
+
+ assert(storeQueue[store_idx].inst);
+
+ store_size = storeQueue[store_idx].size;
+
+ if (store_size == 0)
+ continue;
+
+ // Check if the store data is within the lower and upper bounds of
+ // addresses that the request needs.
+ bool store_has_lower_limit =
+ req->vaddr >= storeQueue[store_idx].inst->effAddr;
+ bool store_has_upper_limit =
+ (req->vaddr + req->size) <= (storeQueue[store_idx].inst->effAddr +
+ store_size);
+ bool lower_load_has_store_part =
+ req->vaddr < (storeQueue[store_idx].inst->effAddr +
+ store_size);
+ bool upper_load_has_store_part =
+ (req->vaddr + req->size) > storeQueue[store_idx].inst->effAddr;
+
+ // If the store's data has all of the data needed, we can forward.
+ if (store_has_lower_limit && store_has_upper_limit) {
+
+ int shift_amt = req->vaddr & (store_size - 1);
+ // Assumes byte addressing
+ shift_amt = shift_amt << 3;
+
+ // Cast this to type T?
+ data = storeQueue[store_idx].data >> shift_amt;
+
+ req->cmd = Read;
+ assert(!req->completionEvent);
+ req->completionEvent = NULL;
+ req->time = curTick;
+ assert(!req->data);
+ req->data = new uint8_t[64];
+
+ memcpy(req->data, &data, req->size);
+
+ DPRINTF(OzoneLSQ, "Forwarding from store idx %i to load to "
+ "addr %#x, data %#x\n",
+ store_idx, req->vaddr, *(req->data));
+
+ typename BackEnd::LdWritebackEvent *wb =
+ new typename BackEnd::LdWritebackEvent(loadQueue[load_idx],
+ be);
+
+ // We'll say this has a 1 cycle load-store forwarding latency
+ // for now.
+ // FIXME - Need to make this a parameter.
+ wb->schedule(curTick);
+
+ // Should keep track of stat for forwarded data
+ return NoFault;
+ } else if ((store_has_lower_limit && lower_load_has_store_part) ||
+ (store_has_upper_limit && upper_load_has_store_part) ||
+ (lower_load_has_store_part && upper_load_has_store_part)) {
+ // This is the partial store-load forwarding case where a store
+ // has only part of the load's data.
+
+ // If it's already been written back, then don't worry about
+ // stalling on it.
+ if (storeQueue[store_idx].completed) {
+ continue;
+ }
+
+ // Must stall load and force it to retry, so long as it's the oldest
+ // load that needs to do so.
+ if (!stalled ||
+ (stalled &&
+ loadQueue[load_idx]->seqNum <
+ loadQueue[stallingLoadIdx]->seqNum)) {
+ stalled = true;
+ stallingStoreIsn = storeQueue[store_idx].inst->seqNum;
+ stallingLoadIdx = load_idx;
+ }
+
+ // Tell IQ/mem dep unit that this instruction will need to be
+ // rescheduled eventually
+ be->rescheduleMemInst(loadQueue[load_idx]);
+
+ DPRINTF(OzoneLSQ, "Load-store forwarding mis-match. "
+ "Store idx %i to load addr %#x\n",
+ store_idx, req->vaddr);
+
+ return NoFault;
+ }
+ }
+
+
+ // If there's no forwarding case, then go access memory
+ DynInstPtr inst = loadQueue[load_idx];
+
+ ++usedPorts;
+
+ // if we have a cache, do cache access too
+ if (dcacheInterface) {
+ if (dcacheInterface->isBlocked()) {
+ isLoadBlocked = true;
+ // No fault occurred, even though the interface is blocked.
+ return NoFault;
+ }
+
+ DPRINTF(OzoneLSQ, "D-cache: PC:%#x reading from paddr:%#x "
+ "vaddr:%#x flags:%i\n",
+ inst->readPC(), req->paddr, req->vaddr, req->flags);
+
+ // Setup MemReq pointer
+ req->cmd = Read;
+ req->completionEvent = NULL;
+ req->time = curTick;
+ assert(!req->data);
+ req->data = new uint8_t[64];
+
+ assert(!req->completionEvent);
+ typedef typename BackEnd::LdWritebackEvent LdWritebackEvent;
+
+ LdWritebackEvent *wb = new LdWritebackEvent(loadQueue[load_idx], be);
+
+ req->completionEvent = wb;
+
+ // Do Cache Access
+ MemAccessResult result = dcacheInterface->access(req);
+
+ // Ugly hack to get an event scheduled *only* if the access is
+ // a miss. We really should add first-class support for this
+ // at some point.
+ // @todo: Probably should support having no events
+ if (result != MA_HIT) {
+ DPRINTF(OzoneLSQ, "D-cache miss!\n");
+ DPRINTF(Activity, "Activity: ld accessing mem miss [sn:%lli]\n",
+ inst->seqNum);
+
+ lastDcacheStall = curTick;
+
+ _status = DcacheMissStall;
+
+ wb->setDcacheMiss();
+
+ } else {
+// DPRINTF(Activity, "Activity: ld accessing mem hit [sn:%lli]\n",
+// inst->seqNum);
+
+ DPRINTF(OzoneLSQ, "D-cache hit!\n");
+ }
+ } else {
+ fatal("Must use D-cache with new memory system");
+ }
+
+ return NoFault;
+}
+
+template <class Impl>
+template <class T>
+Fault
+OzoneLSQ<Impl>::write(MemReqPtr &req, T &data, int store_idx)
+{
+ assert(storeQueue[store_idx].inst);
+
+ DPRINTF(OzoneLSQ, "Doing write to store idx %i, addr %#x data %#x"
+ " | storeHead:%i [sn:%i]\n",
+ store_idx, req->paddr, data, storeHead,
+ storeQueue[store_idx].inst->seqNum);
+
+ storeQueue[store_idx].req = req;
+ storeQueue[store_idx].size = sizeof(T);
+ storeQueue[store_idx].data = data;
+
+ // This function only writes the data to the store queue, so no fault
+ // can happen here.
+ return NoFault;
+}
+
+template <class Impl>
+inline bool
+OzoneLSQ<Impl>::loadBlocked()
+{
+ bool ret_val = isLoadBlocked;
+ isLoadBlocked = false;
+ return ret_val;
+}
+
+#endif // __CPU_OZONE_LSQ_UNIT_HH__
--- /dev/null
+/*
+ * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "arch/isa_traits.hh"
+#include "base/str.hh"
+#include "cpu/ozone/lsq_unit.hh"
+
+template <class Impl>
+OzoneLSQ<Impl>::StoreCompletionEvent::StoreCompletionEvent(int store_idx,
+ Event *wb_event,
+ OzoneLSQ<Impl> *lsq_ptr)
+ : Event(&mainEventQueue),
+ storeIdx(store_idx),
+ wbEvent(wb_event),
+ lsqPtr(lsq_ptr)
+{
+ this->setFlags(Event::AutoDelete);
+}
+
+template <class Impl>
+void
+OzoneLSQ<Impl>::StoreCompletionEvent::process()
+{
+ DPRINTF(OzoneLSQ, "Cache miss complete for store idx:%i\n", storeIdx);
+
+ //lsqPtr->removeMSHR(lsqPtr->storeQueue[storeIdx].inst->seqNum);
+
+// lsqPtr->cpu->wakeCPU();
+ if (wbEvent)
+ wbEvent->process();
+ lsqPtr->completeStore(storeIdx);
+}
+
+template <class Impl>
+const char *
+OzoneLSQ<Impl>::StoreCompletionEvent::description()
+{
+ return "LSQ store completion event";
+}
+
+template <class Impl>
+OzoneLSQ<Impl>::OzoneLSQ()
+ : loads(0), stores(0), storesToWB(0), stalled(false), isLoadBlocked(false)
+{
+}
+
+template<class Impl>
+void
+OzoneLSQ<Impl>::init(Params *params, unsigned maxLQEntries,
+ unsigned maxSQEntries, unsigned id)
+
+{
+ DPRINTF(OzoneLSQ, "Creating OzoneLSQ%i object.\n",id);
+
+ lsqID = id;
+
+ LQEntries = maxLQEntries;
+ SQEntries = maxSQEntries;
+
+ loadQueue.resize(LQEntries);
+ storeQueue.resize(SQEntries);
+
+
+ // May want to initialize these entries to NULL
+
+ loadHead = loadTail = 0;
+
+ storeHead = storeWBIdx = storeTail = 0;
+
+ usedPorts = 0;
+ cachePorts = params->cachePorts;
+
+ dcacheInterface = params->dcacheInterface;
+
+ loadFaultInst = storeFaultInst = memDepViolator = NULL;
+}
+
+template<class Impl>
+std::string
+OzoneLSQ<Impl>::name() const
+{
+ return "lsqunit";
+}
+
+template<class Impl>
+void
+OzoneLSQ<Impl>::clearLQ()
+{
+ loadQueue.clear();
+}
+
+template<class Impl>
+void
+OzoneLSQ<Impl>::clearSQ()
+{
+ storeQueue.clear();
+}
+
+template<class Impl>
+void
+OzoneLSQ<Impl>::setPageTable(PageTable *pt_ptr)
+{
+ DPRINTF(OzoneLSQ, "Setting the page table pointer.\n");
+ pTable = pt_ptr;
+}
+
+template<class Impl>
+void
+OzoneLSQ<Impl>::resizeLQ(unsigned size)
+{
+ assert( size >= LQEntries);
+
+ if (size > LQEntries) {
+ while (size > loadQueue.size()) {
+ DynInstPtr dummy;
+ loadQueue.push_back(dummy);
+ LQEntries++;
+ }
+ } else {
+ LQEntries = size;
+ }
+
+}
+
+template<class Impl>
+void
+OzoneLSQ<Impl>::resizeSQ(unsigned size)
+{
+ if (size > SQEntries) {
+ while (size > storeQueue.size()) {
+ SQEntry dummy;
+ storeQueue.push_back(dummy);
+ SQEntries++;
+ }
+ } else {
+ SQEntries = size;
+ }
+}
+
+template <class Impl>
+void
+OzoneLSQ<Impl>::insert(DynInstPtr &inst)
+{
+ // Make sure we really have a memory reference.
+ assert(inst->isMemRef());
+
+ // Make sure it's one of the two classes of memory references.
+ assert(inst->isLoad() || inst->isStore());
+
+ if (inst->isLoad()) {
+ insertLoad(inst);
+ } else {
+ insertStore(inst);
+ }
+
+// inst->setInLSQ();
+}
+
+template <class Impl>
+void
+OzoneLSQ<Impl>::insertLoad(DynInstPtr &load_inst)
+{
+ assert((loadTail + 1) % LQEntries != loadHead && loads < LQEntries);
+
+ DPRINTF(OzoneLSQ, "Inserting load PC %#x, idx:%i [sn:%lli]\n",
+ load_inst->readPC(), loadTail, load_inst->seqNum);
+
+ load_inst->lqIdx = loadTail;
+
+ if (stores == 0) {
+ load_inst->sqIdx = -1;
+ } else {
+ load_inst->sqIdx = storeTail;
+ }
+
+ loadQueue[loadTail] = load_inst;
+
+ incrLdIdx(loadTail);
+
+ ++loads;
+}
+
+template <class Impl>
+void
+OzoneLSQ<Impl>::insertStore(DynInstPtr &store_inst)
+{
+ // Make sure it is not full before inserting an instruction.
+ assert((storeTail + 1) % SQEntries != storeHead);
+ assert(stores < SQEntries);
+
+ DPRINTF(OzoneLSQ, "Inserting store PC %#x, idx:%i [sn:%lli]\n",
+ store_inst->readPC(), storeTail, store_inst->seqNum);
+
+ store_inst->sqIdx = storeTail;
+ store_inst->lqIdx = loadTail;
+
+ storeQueue[storeTail] = SQEntry(store_inst);
+
+ incrStIdx(storeTail);
+
+ ++stores;
+
+}
+
+template <class Impl>
+typename Impl::DynInstPtr
+OzoneLSQ<Impl>::getMemDepViolator()
+{
+ DynInstPtr temp = memDepViolator;
+
+ memDepViolator = NULL;
+
+ return temp;
+}
+
+template <class Impl>
+unsigned
+OzoneLSQ<Impl>::numFreeEntries()
+{
+ unsigned free_lq_entries = LQEntries - loads;
+ unsigned free_sq_entries = SQEntries - stores;
+
+ // Both the LQ and SQ entries have an extra dummy entry to differentiate
+ // empty/full conditions. Subtract 1 from the free entries.
+ if (free_lq_entries < free_sq_entries) {
+ return free_lq_entries - 1;
+ } else {
+ return free_sq_entries - 1;
+ }
+}
+
+template <class Impl>
+int
+OzoneLSQ<Impl>::numLoadsReady()
+{
+ int load_idx = loadHead;
+ int retval = 0;
+
+ while (load_idx != loadTail) {
+ assert(loadQueue[load_idx]);
+
+ if (loadQueue[load_idx]->readyToIssue()) {
+ ++retval;
+ }
+ }
+
+ return retval;
+}
+
+#if 0
+template <class Impl>
+Fault
+OzoneLSQ<Impl>::executeLoad()
+{
+ Fault load_fault = NoFault;
+ DynInstPtr load_inst;
+
+ assert(readyLoads.size() != 0);
+
+ // Execute a ready load.
+ LdMapIt ready_it = readyLoads.begin();
+
+ load_inst = (*ready_it).second;
+
+ // Execute the instruction, which is held in the data portion of the
+ // iterator.
+ load_fault = load_inst->execute();
+
+ // If it executed successfully, then switch it over to the executed
+ // loads list.
+ if (load_fault == NoFault) {
+ executedLoads[load_inst->seqNum] = load_inst;
+
+ readyLoads.erase(ready_it);
+ } else {
+ loadFaultInst = load_inst;
+ }
+
+ return load_fault;
+}
+#endif
+
+template <class Impl>
+Fault
+OzoneLSQ<Impl>::executeLoad(DynInstPtr &inst)
+{
+ // Execute a specific load.
+ Fault load_fault = NoFault;
+
+ DPRINTF(OzoneLSQ, "Executing load PC %#x, [sn:%lli]\n",
+ inst->readPC(),inst->seqNum);
+
+ // Make sure it's really in the list.
+ // Normally it should always be in the list. However,
+ /* due to a syscall it may not be the list.
+#ifdef DEBUG
+ int i = loadHead;
+ while (1) {
+ if (i == loadTail && !find(inst)) {
+ assert(0 && "Load not in the queue!");
+ } else if (loadQueue[i] == inst) {
+ break;
+ }
+
+ i = i + 1;
+ if (i >= LQEntries) {
+ i = 0;
+ }
+ }
+#endif // DEBUG*/
+
+ load_fault = inst->initiateAcc();
+
+ // Might want to make sure that I'm not overwriting a previously faulting
+ // instruction that hasn't been checked yet.
+ // Actually probably want the oldest faulting load
+ if (load_fault != NoFault) {
+ // Maybe just set it as can commit here, although that might cause
+ // some other problems with sending traps to the ROB too quickly.
+// iewStage->instToCommit(inst);
+// iewStage->activityThisCycle();
+ }
+
+ return load_fault;
+}
+
+template <class Impl>
+Fault
+OzoneLSQ<Impl>::executeLoad(int lq_idx)
+{
+ // Very hackish. Not sure the best way to check that this
+ // instruction is at the head of the ROB. I should have some sort
+ // of extra information here so that I'm not overloading the
+ // canCommit signal for 15 different things.
+ loadQueue[lq_idx]->setCanCommit();
+ Fault ret_fault = executeLoad(loadQueue[lq_idx]);
+ loadQueue[lq_idx]->clearCanCommit();
+ return ret_fault;
+}
+
+template <class Impl>
+Fault
+OzoneLSQ<Impl>::executeStore(DynInstPtr &store_inst)
+{
+ // Make sure that a store exists.
+ assert(stores != 0);
+
+ int store_idx = store_inst->sqIdx;
+
+ DPRINTF(OzoneLSQ, "Executing store PC %#x [sn:%lli]\n",
+ store_inst->readPC(), store_inst->seqNum);
+
+ // Check the recently completed loads to see if any match this store's
+ // address. If so, then we have a memory ordering violation.
+ int load_idx = store_inst->lqIdx;
+
+ Fault store_fault = store_inst->initiateAcc();
+
+ // Store size should now be available. Use it to get proper offset for
+ // addr comparisons.
+ int size = storeQueue[store_idx].size;
+
+ if (size == 0) {
+ DPRINTF(OzoneLSQ,"Fault on Store PC %#x, [sn:%lli],Size = 0\n",
+ store_inst->readPC(),store_inst->seqNum);
+
+ return store_fault;
+ }
+
+ assert(store_fault == NoFault);
+
+ if (!storeFaultInst) {
+ if (store_fault != NoFault) {
+ panic("Fault in a store instruction!");
+ storeFaultInst = store_inst;
+ } else if (store_inst->isNonSpeculative()) {
+ // Nonspeculative accesses (namely store conditionals)
+ // need to set themselves as able to writeback if we
+ // haven't had a fault by here.
+ storeQueue[store_idx].canWB = true;
+
+ ++storesToWB;
+ }
+ }
+
+ if (!memDepViolator) {
+ while (load_idx != loadTail) {
+ // Actually should only check loads that have actually executed
+ // Might be safe because effAddr is set to InvalAddr when the
+ // dyn inst is created.
+
+ // Must actually check all addrs in the proper size range
+ // Which is more correct than needs to be. What if for now we just
+ // assume all loads are quad-word loads, and do the addr based
+ // on that.
+ // @todo: Fix this, magic number being used here
+ if ((loadQueue[load_idx]->effAddr >> 8) ==
+ (store_inst->effAddr >> 8)) {
+ // A load incorrectly passed this store. Squash and refetch.
+ // For now return a fault to show that it was unsuccessful.
+ memDepViolator = loadQueue[load_idx];
+
+ return TheISA::genMachineCheckFault();
+ }
+
+ incrLdIdx(load_idx);
+ }
+
+ // If we've reached this point, there was no violation.
+ memDepViolator = NULL;
+ }
+
+ return store_fault;
+}
+
+template <class Impl>
+void
+OzoneLSQ<Impl>::commitLoad()
+{
+ assert(loadQueue[loadHead]);
+
+ DPRINTF(OzoneLSQ, "[sn:%lli] Committing head load instruction, PC %#x\n",
+ loadQueue[loadHead]->seqNum, loadQueue[loadHead]->readPC());
+
+
+ loadQueue[loadHead] = NULL;
+
+ incrLdIdx(loadHead);
+
+ --loads;
+}
+
+template <class Impl>
+void
+OzoneLSQ<Impl>::commitLoad(InstSeqNum &inst)
+{
+ // Hopefully I don't use this function too much
+ panic("Don't use this function!");
+
+ int i = loadHead;
+ while (1) {
+ if (i == loadTail) {
+ assert(0 && "Load not in the queue!");
+ } else if (loadQueue[i]->seqNum == inst) {
+ break;
+ }
+
+ ++i;
+ if (i >= LQEntries) {
+ i = 0;
+ }
+ }
+
+// loadQueue[i]->removeInLSQ();
+ loadQueue[i] = NULL;
+ --loads;
+}
+
+template <class Impl>
+void
+OzoneLSQ<Impl>::commitLoads(InstSeqNum &youngest_inst)
+{
+ assert(loads == 0 || loadQueue[loadHead]);
+
+ while (loads != 0 && loadQueue[loadHead]->seqNum <= youngest_inst) {
+ commitLoad();
+ }
+}
+
+template <class Impl>
+void
+OzoneLSQ<Impl>::commitStores(InstSeqNum &youngest_inst)
+{
+ assert(stores == 0 || storeQueue[storeHead].inst);
+
+ int store_idx = storeHead;
+
+ while (store_idx != storeTail) {
+ assert(storeQueue[store_idx].inst);
+ if (!storeQueue[store_idx].canWB) {
+ if (storeQueue[store_idx].inst->seqNum > youngest_inst) {
+ break;
+ }
+ DPRINTF(OzoneLSQ, "Marking store as able to write back, PC "
+ "%#x [sn:%lli]\n",
+ storeQueue[store_idx].inst->readPC(),
+ storeQueue[store_idx].inst->seqNum);
+
+ storeQueue[store_idx].canWB = true;
+
+// --stores;
+ ++storesToWB;
+ }
+
+ incrStIdx(store_idx);
+ }
+}
+
+template <class Impl>
+void
+OzoneLSQ<Impl>::writebackStores()
+{
+ while (storesToWB > 0 &&
+ storeWBIdx != storeTail &&
+ storeQueue[storeWBIdx].inst &&
+ storeQueue[storeWBIdx].canWB &&
+ usedPorts < cachePorts) {
+
+ if (storeQueue[storeWBIdx].size == 0) {
+ completeStore(storeWBIdx);
+
+ incrStIdx(storeWBIdx);
+
+ continue;
+ }
+
+ if (dcacheInterface && dcacheInterface->isBlocked()) {
+ DPRINTF(OzoneLSQ, "Unable to write back any more stores, cache"
+ " is blocked!\n");
+ break;
+ }
+
+ ++usedPorts;
+
+ if (storeQueue[storeWBIdx].inst->isDataPrefetch()) {
+ incrStIdx(storeWBIdx);
+
+ continue;
+ }
+
+ assert(storeQueue[storeWBIdx].req);
+ assert(!storeQueue[storeWBIdx].committed);
+
+ MemReqPtr req = storeQueue[storeWBIdx].req;
+ storeQueue[storeWBIdx].committed = true;
+
+// Fault fault = cpu->translateDataReadReq(req);
+ req->cmd = Write;
+ req->completionEvent = NULL;
+ req->time = curTick;
+ assert(!req->data);
+ req->data = new uint8_t[64];
+ memcpy(req->data, (uint8_t *)&storeQueue[storeWBIdx].data, req->size);
+
+ DPRINTF(OzoneLSQ, "D-Cache: Writing back store idx:%i PC:%#x "
+ "to Addr:%#x, data:%#x [sn:%lli]\n",
+ storeWBIdx,storeQueue[storeWBIdx].inst->readPC(),
+ req->paddr, *(req->data),
+ storeQueue[storeWBIdx].inst->seqNum);
+
+// if (fault != NoFault) {
+ //What should we do if there is a fault???
+ //for now panic
+// panic("Page Table Fault!!!!!\n");
+// }
+
+ if (dcacheInterface) {
+ MemAccessResult result = dcacheInterface->access(req);
+
+ //@todo temp fix for LL/SC (works fine for 1 CPU)
+ if (req->flags & LOCKED) {
+ req->result=1;
+ panic("LL/SC! oh no no support!!!");
+ }
+
+ if (isStalled() &&
+ storeQueue[storeWBIdx].inst->seqNum == stallingStoreIsn) {
+ DPRINTF(OzoneLSQ, "Unstalling, stalling store [sn:%lli] "
+ "load idx:%i\n",
+ stallingStoreIsn, stallingLoadIdx);
+ stalled = false;
+ stallingStoreIsn = 0;
+ be->replayMemInst(loadQueue[stallingLoadIdx]);
+ }
+
+ if (result != MA_HIT && dcacheInterface->doEvents()) {
+ Event *wb = NULL;
+/*
+ typename IEW::LdWritebackEvent *wb = NULL;
+ if (req->flags & LOCKED) {
+ // Stx_C does not generate a system port transaction.
+ req->result=0;
+ wb = new typename IEW::LdWritebackEvent(storeQueue[storeWBIdx].inst,
+ iewStage);
+ }
+*/
+ DPRINTF(OzoneLSQ,"D-Cache Write Miss!\n");
+
+// DPRINTF(Activity, "Active st accessing mem miss [sn:%lli]\n",
+// storeQueue[storeWBIdx].inst->seqNum);
+
+ // Will stores need their own kind of writeback events?
+ // Do stores even need writeback events?
+ assert(!req->completionEvent);
+ req->completionEvent = new
+ StoreCompletionEvent(storeWBIdx, wb, this);
+
+ lastDcacheStall = curTick;
+
+ _status = DcacheMissStall;
+
+ //mshrSeqNums.push_back(storeQueue[storeWBIdx].inst->seqNum);
+
+ //DPRINTF(OzoneLSQ, "Added MSHR. count = %i\n",mshrSeqNums.size());
+
+ // Increment stat here or something
+ } else {
+ DPRINTF(OzoneLSQ,"D-Cache: Write Hit on idx:%i !\n",
+ storeWBIdx);
+
+// DPRINTF(Activity, "Active st accessing mem hit [sn:%lli]\n",
+// storeQueue[storeWBIdx].inst->seqNum);
+
+ if (req->flags & LOCKED) {
+ // Stx_C does not generate a system port transaction.
+ req->result=1;
+ typename BackEnd::LdWritebackEvent *wb =
+ new typename BackEnd::LdWritebackEvent(storeQueue[storeWBIdx].inst,
+ be);
+ wb->schedule(curTick);
+ }
+
+ completeStore(storeWBIdx);
+ }
+
+ incrStIdx(storeWBIdx);
+ } else {
+ panic("Must HAVE DCACHE!!!!!\n");
+ }
+ }
+
+ // Not sure this should set it to 0.
+ usedPorts = 0;
+
+ assert(stores >= 0 && storesToWB >= 0);
+}
+
+/*template <class Impl>
+void
+OzoneLSQ<Impl>::removeMSHR(InstSeqNum seqNum)
+{
+ list<InstSeqNum>::iterator mshr_it = find(mshrSeqNums.begin(),
+ mshrSeqNums.end(),
+ seqNum);
+
+ if (mshr_it != mshrSeqNums.end()) {
+ mshrSeqNums.erase(mshr_it);
+ DPRINTF(OzoneLSQ, "Removing MSHR. count = %i\n",mshrSeqNums.size());
+ }
+}*/
+
+template <class Impl>
+void
+OzoneLSQ<Impl>::squash(const InstSeqNum &squashed_num)
+{
+ DPRINTF(OzoneLSQ, "Squashing until [sn:%lli]!"
+ "(Loads:%i Stores:%i)\n",squashed_num,loads,stores);
+
+ int load_idx = loadTail;
+ decrLdIdx(load_idx);
+
+ while (loads != 0 && loadQueue[load_idx]->seqNum > squashed_num) {
+
+ // Clear the smart pointer to make sure it is decremented.
+ DPRINTF(OzoneLSQ,"Load Instruction PC %#x squashed, "
+ "[sn:%lli]\n",
+ loadQueue[load_idx]->readPC(),
+ loadQueue[load_idx]->seqNum);
+
+ if (isStalled() && load_idx == stallingLoadIdx) {
+ stalled = false;
+ stallingStoreIsn = 0;
+ stallingLoadIdx = 0;
+ }
+
+// loadQueue[load_idx]->squashed = true;
+ loadQueue[load_idx] = NULL;
+ --loads;
+
+ // Inefficient!
+ loadTail = load_idx;
+
+ decrLdIdx(load_idx);
+ }
+
+ int store_idx = storeTail;
+ decrStIdx(store_idx);
+
+ while (stores != 0 && storeQueue[store_idx].inst->seqNum > squashed_num) {
+
+ // Clear the smart pointer to make sure it is decremented.
+ DPRINTF(OzoneLSQ,"Store Instruction PC %#x squashed, "
+ "idx:%i [sn:%lli]\n",
+ storeQueue[store_idx].inst->readPC(),
+ store_idx, storeQueue[store_idx].inst->seqNum);
+
+ // I don't think this can happen. It should have been cleared by the
+ // stalling load.
+ if (isStalled() &&
+ storeQueue[store_idx].inst->seqNum == stallingStoreIsn) {
+ panic("Is stalled should have been cleared by stalling load!\n");
+ stalled = false;
+ stallingStoreIsn = 0;
+ }
+
+// storeQueue[store_idx].inst->squashed = true;
+ storeQueue[store_idx].inst = NULL;
+ storeQueue[store_idx].canWB = 0;
+
+ if (storeQueue[store_idx].req) {
+ assert(!storeQueue[store_idx].req->completionEvent);
+ }
+ storeQueue[store_idx].req = NULL;
+ --stores;
+
+ // Inefficient!
+ storeTail = store_idx;
+
+ decrStIdx(store_idx);
+ }
+}
+
+template <class Impl>
+void
+OzoneLSQ<Impl>::dumpInsts()
+{
+ cprintf("Load store queue: Dumping instructions.\n");
+ cprintf("Load queue size: %i\n", loads);
+ cprintf("Load queue: ");
+
+ int load_idx = loadHead;
+
+ while (load_idx != loadTail && loadQueue[load_idx]) {
+ cprintf("[sn:%lli] %#x ", loadQueue[load_idx]->seqNum,
+ loadQueue[load_idx]->readPC());
+
+ incrLdIdx(load_idx);
+ }
+
+ cprintf("\nStore queue size: %i\n", stores);
+ cprintf("Store queue: ");
+
+ int store_idx = storeHead;
+
+ while (store_idx != storeTail && storeQueue[store_idx].inst) {
+ cprintf("[sn:%lli] %#x ", storeQueue[store_idx].inst->seqNum,
+ storeQueue[store_idx].inst->readPC());
+
+ incrStIdx(store_idx);
+ }
+
+ cprintf("\n");
+}
+
+template <class Impl>
+void
+OzoneLSQ<Impl>::completeStore(int store_idx)
+{
+ assert(storeQueue[store_idx].inst);
+ storeQueue[store_idx].completed = true;
+ --storesToWB;
+ // A bit conservative because a store completion may not free up entries,
+ // but hopefully avoids two store completions in one cycle from making
+ // the CPU tick twice.
+// cpu->activityThisCycle();
+
+ if (store_idx == storeHead) {
+ do {
+ incrStIdx(storeHead);
+
+ --stores;
+ } while (storeQueue[storeHead].completed &&
+ storeHead != storeTail);
+
+// be->updateLSQNextCycle = true;
+ }
+
+ DPRINTF(OzoneLSQ, "Store head idx:%i\n", storeHead);
+
+ if (isStalled() &&
+ storeQueue[store_idx].inst->seqNum == stallingStoreIsn) {
+ DPRINTF(OzoneLSQ, "Unstalling, stalling store [sn:%lli] "
+ "load idx:%i\n",
+ stallingStoreIsn, stallingLoadIdx);
+ stalled = false;
+ stallingStoreIsn = 0;
+ be->replayMemInst(loadQueue[stallingLoadIdx]);
+ }
+}
+
+template <class Impl>
+inline void
+OzoneLSQ<Impl>::incrStIdx(int &store_idx)
+{
+ if (++store_idx >= SQEntries)
+ store_idx = 0;
+}
+
+template <class Impl>
+inline void
+OzoneLSQ<Impl>::decrStIdx(int &store_idx)
+{
+ if (--store_idx < 0)
+ store_idx += SQEntries;
+}
+
+template <class Impl>
+inline void
+OzoneLSQ<Impl>::incrLdIdx(int &load_idx)
+{
+ if (++load_idx >= LQEntries)
+ load_idx = 0;
+}
+
+template <class Impl>
+inline void
+OzoneLSQ<Impl>::decrLdIdx(int &load_idx)
+{
+ if (--load_idx < 0)
+ load_idx += LQEntries;
+}
--- /dev/null
+
+#include "cpu/ozone/lw_back_end_impl.hh"
+#include "cpu/ozone/ozone_impl.hh"
+
+template class LWBackEnd<OzoneImpl>;
--- /dev/null
+/*
+ * Copyright (c) 2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __CPU_OZONE_LW_BACK_END_HH__
+#define __CPU_OZONE_LW_BACK_END_HH__
+
+#include <list>
+#include <queue>
+#include <set>
+#include <string>
+
+#include "arch/faults.hh"
+#include "base/timebuf.hh"
+#include "cpu/inst_seq.hh"
+#include "cpu/ozone/rename_table.hh"
+#include "cpu/ozone/thread_state.hh"
+#include "mem/request.hh"
+#include "sim/eventq.hh"
+
+template <class>
+class Checker;
+class ExecContext;
+
+template <class Impl>
+class OzoneThreadState;
+
+template <class Impl>
+class LWBackEnd
+{
+ public:
+ typedef OzoneThreadState<Impl> Thread;
+
+ typedef typename Impl::Params Params;
+ typedef typename Impl::DynInst DynInst;
+ typedef typename Impl::DynInstPtr DynInstPtr;
+ typedef typename Impl::FullCPU FullCPU;
+ typedef typename Impl::FrontEnd FrontEnd;
+ typedef typename Impl::FullCPU::CommStruct CommStruct;
+
+ struct SizeStruct {
+ int size;
+ };
+
+ typedef SizeStruct DispatchToIssue;
+ typedef SizeStruct IssueToExec;
+ typedef SizeStruct ExecToCommit;
+ typedef SizeStruct Writeback;
+
+ TimeBuffer<DispatchToIssue> d2i;
+ typename TimeBuffer<DispatchToIssue>::wire instsToDispatch;
+ TimeBuffer<IssueToExec> i2e;
+ typename TimeBuffer<IssueToExec>::wire instsToExecute;
+ TimeBuffer<ExecToCommit> e2c;
+ TimeBuffer<Writeback> numInstsToWB;
+
+ TimeBuffer<CommStruct> *comm;
+ typename TimeBuffer<CommStruct>::wire toIEW;
+ typename TimeBuffer<CommStruct>::wire fromCommit;
+
+ class TrapEvent : public Event {
+ private:
+ LWBackEnd<Impl> *be;
+
+ public:
+ TrapEvent(LWBackEnd<Impl> *_be);
+
+ void process();
+ const char *description();
+ };
+
+ /** LdWriteback event for a load completion. */
+ class LdWritebackEvent : public Event {
+ private:
+ /** Instruction that is writing back data to the register file. */
+ DynInstPtr inst;
+ /** Pointer to IEW stage. */
+ LWBackEnd *be;
+
+ bool dcacheMiss;
+
+ public:
+ /** Constructs a load writeback event. */
+ LdWritebackEvent(DynInstPtr &_inst, LWBackEnd *be);
+
+ /** Processes writeback event. */
+ virtual void process();
+ /** Returns the description of the writeback event. */
+ virtual const char *description();
+
+ void setDcacheMiss() { dcacheMiss = true; be->addDcacheMiss(inst); }
+ };
+
+ LWBackEnd(Params *params);
+
+ std::string name() const;
+
+ void regStats();
+
+ void setCPU(FullCPU *cpu_ptr);
+
+ void setFrontEnd(FrontEnd *front_end_ptr)
+ { frontEnd = front_end_ptr; }
+
+ void setXC(ExecContext *xc_ptr)
+ { xc = xc_ptr; }
+
+ void setThreadState(Thread *thread_ptr)
+ { thread = thread_ptr; }
+
+ void setCommBuffer(TimeBuffer<CommStruct> *_comm);
+
+ void tick();
+ void squash();
+ void generateXCEvent() { xcSquash = true; }
+ void squashFromXC();
+ void squashFromTrap();
+ void checkInterrupts();
+ bool trapSquash;
+ bool xcSquash;
+
+ template <class T>
+ Fault read(RequestPtr req, T &data, int load_idx);
+
+ template <class T>
+ Fault write(RequestPtr req, T &data, int store_idx);
+
+ Addr readCommitPC() { return commitPC; }
+
+ Addr commitPC;
+
+ Tick lastCommitCycle;
+
+ bool robEmpty() { return instList.empty(); }
+
+ bool isFull() { return numInsts >= numROBEntries; }
+ bool isBlocked() { return status == Blocked || dispatchStatus == Blocked; }
+
+ void fetchFault(Fault &fault);
+
+ int wakeDependents(DynInstPtr &inst, bool memory_deps = false);
+
+ /** Tells memory dependence unit that a memory instruction needs to be
+ * rescheduled. It will re-execute once replayMemInst() is called.
+ */
+ void rescheduleMemInst(DynInstPtr &inst);
+
+ /** Re-executes all rescheduled memory instructions. */
+ void replayMemInst(DynInstPtr &inst);
+
+ /** Completes memory instruction. */
+ void completeMemInst(DynInstPtr &inst) { }
+
+ void addDcacheMiss(DynInstPtr &inst)
+ {
+ waitingMemOps.insert(inst->seqNum);
+ numWaitingMemOps++;
+ DPRINTF(BE, "Adding a Dcache miss mem op [sn:%lli], total %i\n",
+ inst->seqNum, numWaitingMemOps);
+ }
+
+ void removeDcacheMiss(DynInstPtr &inst)
+ {
+ assert(waitingMemOps.find(inst->seqNum) != waitingMemOps.end());
+ waitingMemOps.erase(inst->seqNum);
+ numWaitingMemOps--;
+ DPRINTF(BE, "Removing a Dcache miss mem op [sn:%lli], total %i\n",
+ inst->seqNum, numWaitingMemOps);
+ }
+
+ void addWaitingMemOp(DynInstPtr &inst)
+ {
+ waitingMemOps.insert(inst->seqNum);
+ numWaitingMemOps++;
+ DPRINTF(BE, "Adding a waiting mem op [sn:%lli], total %i\n",
+ inst->seqNum, numWaitingMemOps);
+ }
+
+ void removeWaitingMemOp(DynInstPtr &inst)
+ {
+ assert(waitingMemOps.find(inst->seqNum) != waitingMemOps.end());
+ waitingMemOps.erase(inst->seqNum);
+ numWaitingMemOps--;
+ DPRINTF(BE, "Removing a waiting mem op [sn:%lli], total %i\n",
+ inst->seqNum, numWaitingMemOps);
+ }
+
+ void instToCommit(DynInstPtr &inst);
+
+ void switchOut();
+ void doSwitchOut();
+ void takeOverFrom(ExecContext *old_xc = NULL);
+
+ bool isSwitchedOut() { return switchedOut; }
+
+ private:
+ void generateTrapEvent(Tick latency = 0);
+ void handleFault(Fault &fault, Tick latency = 0);
+ void updateStructures();
+ void dispatchInsts();
+ void dispatchStall();
+ void checkDispatchStatus();
+ void executeInsts();
+ void commitInsts();
+ void addToLSQ(DynInstPtr &inst);
+ void writebackInsts();
+ bool commitInst(int inst_num);
+ void squash(const InstSeqNum &sn);
+ void squashDueToBranch(DynInstPtr &inst);
+ void squashDueToMemViolation(DynInstPtr &inst);
+ void squashDueToMemBlocked(DynInstPtr &inst);
+ void updateExeInstStats(DynInstPtr &inst);
+ void updateComInstStats(DynInstPtr &inst);
+
+ public:
+ FullCPU *cpu;
+
+ FrontEnd *frontEnd;
+
+ ExecContext *xc;
+
+ Thread *thread;
+
+ enum Status {
+ Running,
+ Idle,
+ DcacheMissStall,
+ DcacheMissComplete,
+ Blocked,
+ TrapPending
+ };
+
+ Status status;
+
+ Status dispatchStatus;
+
+ Status commitStatus;
+
+ Counter funcExeInst;
+
+ private:
+ typedef typename Impl::LdstQueue LdstQueue;
+
+ LdstQueue LSQ;
+ public:
+ RenameTable<Impl> commitRenameTable;
+
+ RenameTable<Impl> renameTable;
+ private:
+ class DCacheCompletionEvent : public Event
+ {
+ private:
+ LWBackEnd *be;
+
+ public:
+ DCacheCompletionEvent(LWBackEnd *_be);
+
+ virtual void process();
+ virtual const char *description();
+ };
+
+ friend class DCacheCompletionEvent;
+
+ DCacheCompletionEvent cacheCompletionEvent;
+
+ MemInterface *dcacheInterface;
+
+ // General back end width. Used if the more specific isn't given.
+ int width;
+
+ // Dispatch width.
+ int dispatchWidth;
+ int numDispatchEntries;
+ int dispatchSize;
+
+ int waitingInsts;
+
+ int issueWidth;
+
+ // Writeback width
+ int wbWidth;
+
+ // Commit width
+ int commitWidth;
+
+ /** Index into queue of instructions being written back. */
+ unsigned wbNumInst;
+
+ /** Cycle number within the queue of instructions being written
+ * back. Used in case there are too many instructions writing
+ * back at the current cycle and writesbacks need to be scheduled
+ * for the future. See comments in instToCommit().
+ */
+ unsigned wbCycle;
+
+ int numROBEntries;
+ int numInsts;
+
+ std::set<InstSeqNum> waitingMemOps;
+ typedef std::set<InstSeqNum>::iterator MemIt;
+ int numWaitingMemOps;
+ unsigned maxOutstandingMemOps;
+
+ bool squashPending;
+ InstSeqNum squashSeqNum;
+ Addr squashNextPC;
+
+ Fault faultFromFetch;
+ bool fetchHasFault;
+
+ bool switchedOut;
+ bool switchPending;
+
+ DynInstPtr memBarrier;
+
+ private:
+ struct pqCompare {
+ bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const
+ {
+ return lhs->seqNum > rhs->seqNum;
+ }
+ };
+
+ typedef typename std::priority_queue<DynInstPtr, std::vector<DynInstPtr>, pqCompare> ReadyInstQueue;
+ ReadyInstQueue exeList;
+
+ typedef typename std::list<DynInstPtr>::iterator InstListIt;
+
+ std::list<DynInstPtr> instList;
+ std::list<DynInstPtr> waitingList;
+ std::list<DynInstPtr> replayList;
+ std::list<DynInstPtr> writeback;
+
+ int latency;
+
+ int squashLatency;
+
+ bool exactFullStall;
+
+ // number of cycles stalled for D-cache misses
+/* Stats::Scalar<> dcacheStallCycles;
+ Counter lastDcacheStall;
+*/
+ Stats::Vector<> rob_cap_events;
+ Stats::Vector<> rob_cap_inst_count;
+ Stats::Vector<> iq_cap_events;
+ Stats::Vector<> iq_cap_inst_count;
+ // total number of instructions executed
+ Stats::Vector<> exe_inst;
+ Stats::Vector<> exe_swp;
+ Stats::Vector<> exe_nop;
+ Stats::Vector<> exe_refs;
+ Stats::Vector<> exe_loads;
+ Stats::Vector<> exe_branches;
+
+ Stats::Vector<> issued_ops;
+
+ // total number of loads forwaded from LSQ stores
+ Stats::Vector<> lsq_forw_loads;
+
+ // total number of loads ignored due to invalid addresses
+ Stats::Vector<> inv_addr_loads;
+
+ // total number of software prefetches ignored due to invalid addresses
+ Stats::Vector<> inv_addr_swpfs;
+ // ready loads blocked due to memory disambiguation
+ Stats::Vector<> lsq_blocked_loads;
+
+ Stats::Scalar<> lsqInversion;
+
+ Stats::Vector<> n_issued_dist;
+ Stats::VectorDistribution<> issue_delay_dist;
+
+ Stats::VectorDistribution<> queue_res_dist;
+/*
+ Stats::Vector<> stat_fu_busy;
+ Stats::Vector2d<> stat_fuBusy;
+ Stats::Vector<> dist_unissued;
+ Stats::Vector2d<> stat_issued_inst_type;
+
+ Stats::Formula misspec_cnt;
+ Stats::Formula misspec_ipc;
+ Stats::Formula issue_rate;
+ Stats::Formula issue_stores;
+ Stats::Formula issue_op_rate;
+ Stats::Formula fu_busy_rate;
+ Stats::Formula commit_stores;
+ Stats::Formula commit_ipc;
+ Stats::Formula commit_ipb;
+ Stats::Formula lsq_inv_rate;
+*/
+ Stats::Vector<> writeback_count;
+ Stats::Vector<> producer_inst;
+ Stats::Vector<> consumer_inst;
+ Stats::Vector<> wb_penalized;
+
+ Stats::Formula wb_rate;
+ Stats::Formula wb_fanout;
+ Stats::Formula wb_penalized_rate;
+
+ // total number of instructions committed
+ Stats::Vector<> stat_com_inst;
+ Stats::Vector<> stat_com_swp;
+ Stats::Vector<> stat_com_refs;
+ Stats::Vector<> stat_com_loads;
+ Stats::Vector<> stat_com_membars;
+ Stats::Vector<> stat_com_branches;
+
+ Stats::Distribution<> n_committed_dist;
+
+ Stats::Scalar<> commit_eligible_samples;
+ Stats::Vector<> commit_eligible;
+
+ Stats::Vector<> squashedInsts;
+ Stats::Vector<> ROBSquashedInsts;
+
+ Stats::Scalar<> ROB_fcount;
+ Stats::Formula ROB_full_rate;
+
+ Stats::Vector<> ROB_count; // cumulative ROB occupancy
+ Stats::Formula ROB_occ_rate;
+ Stats::VectorDistribution<> ROB_occ_dist;
+ public:
+ void dumpInsts();
+
+ Checker<DynInstPtr> *checker;
+};
+
+template <class Impl>
+template <class T>
+Fault
+LWBackEnd<Impl>::read(RequestPtr req, T &data, int load_idx)
+{
+ return LSQ.read(req, data, load_idx);
+}
+
+template <class Impl>
+template <class T>
+Fault
+LWBackEnd<Impl>::write(RequestPtr req, T &data, int store_idx)
+{
+ return LSQ.write(req, data, store_idx);
+}
+
+#endif // __CPU_OZONE_LW_BACK_END_HH__
--- /dev/null
+/*
+ * Copyright (c) 2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "cpu/checker/cpu.hh"
+#include "cpu/ozone/lw_back_end.hh"
+#include "encumbered/cpu/full/op_class.hh"
+
+template <class Impl>
+void
+LWBackEnd<Impl>::generateTrapEvent(Tick latency)
+{
+ DPRINTF(BE, "Generating trap event\n");
+
+ TrapEvent *trap = new TrapEvent(this);
+
+ trap->schedule(curTick + cpu->cycles(latency));
+
+ thread->trapPending = true;
+}
+
+template <class Impl>
+int
+LWBackEnd<Impl>::wakeDependents(DynInstPtr &inst, bool memory_deps)
+{
+ assert(!inst->isSquashed());
+ std::vector<DynInstPtr> &dependents = memory_deps ? inst->getMemDeps() :
+ inst->getDependents();
+ int num_outputs = dependents.size();
+
+ DPRINTF(BE, "Waking instruction [sn:%lli] dependents in IQ\n", inst->seqNum);
+
+ for (int i = 0; i < num_outputs; i++) {
+ DynInstPtr dep_inst = dependents[i];
+ if (!memory_deps) {
+ dep_inst->markSrcRegReady();
+ } else {
+ if (!dep_inst->isSquashed())
+ dep_inst->markMemInstReady(inst.get());
+ }
+
+ DPRINTF(BE, "Marking source reg ready [sn:%lli] in IQ\n", dep_inst->seqNum);
+
+ if (dep_inst->readyToIssue() && dep_inst->isInROB() &&
+ !dep_inst->isNonSpeculative() && !dep_inst->isStoreConditional() &&
+ dep_inst->memDepReady() && !dep_inst->isMemBarrier() &&
+ !dep_inst->isWriteBarrier()) {
+ DPRINTF(BE, "Adding instruction to exeList [sn:%lli]\n",
+ dep_inst->seqNum);
+ exeList.push(dep_inst);
+ if (dep_inst->iqItValid) {
+ DPRINTF(BE, "Removing instruction from waiting list\n");
+ waitingList.erase(dep_inst->iqIt);
+ waitingInsts--;
+ dep_inst->iqItValid = false;
+ assert(waitingInsts >= 0);
+ }
+ if (dep_inst->isMemRef()) {
+ removeWaitingMemOp(dep_inst);
+ DPRINTF(BE, "Issued a waiting mem op [sn:%lli]\n",
+ dep_inst->seqNum);
+ }
+ }
+ }
+ return num_outputs;
+}
+
+template <class Impl>
+void
+LWBackEnd<Impl>::rescheduleMemInst(DynInstPtr &inst)
+{
+ replayList.push_front(inst);
+}
+
+template <class Impl>
+LWBackEnd<Impl>::TrapEvent::TrapEvent(LWBackEnd<Impl> *_be)
+ : Event(&mainEventQueue, CPU_Tick_Pri), be(_be)
+{
+ this->setFlags(Event::AutoDelete);
+}
+
+template <class Impl>
+void
+LWBackEnd<Impl>::TrapEvent::process()
+{
+ be->trapSquash = true;
+}
+
+template <class Impl>
+const char *
+LWBackEnd<Impl>::TrapEvent::description()
+{
+ return "Trap event";
+}
+
+template <class Impl>
+void
+LWBackEnd<Impl>::replayMemInst(DynInstPtr &inst)
+{
+ bool found_inst = false;
+ while (!replayList.empty()) {
+ exeList.push(replayList.front());
+ if (replayList.front() == inst) {
+ found_inst = true;
+ }
+ replayList.pop_front();
+ }
+ assert(found_inst);
+}
+
+template<class Impl>
+LWBackEnd<Impl>::LdWritebackEvent::LdWritebackEvent(DynInstPtr &_inst,
+ LWBackEnd<Impl> *_be)
+ : Event(&mainEventQueue), inst(_inst), be(_be), dcacheMiss(false)
+{
+ this->setFlags(Event::AutoDelete);
+}
+
+template<class Impl>
+void
+LWBackEnd<Impl>::LdWritebackEvent::process()
+{
+ DPRINTF(BE, "Load writeback event [sn:%lli]\n", inst->seqNum);
+// DPRINTF(Activity, "Activity: Ld Writeback event [sn:%lli]\n", inst->seqNum);
+
+ //iewStage->ldstQueue.removeMSHR(inst->threadNumber,inst->seqNum);
+
+// iewStage->wakeCPU();
+
+ if (be->isSwitchedOut())
+ return;
+
+ if (dcacheMiss) {
+ be->removeDcacheMiss(inst);
+ }
+
+ if (inst->isSquashed()) {
+ inst = NULL;
+ return;
+ }
+
+ if (!inst->isExecuted()) {
+ inst->setExecuted();
+
+ // Execute again to copy data to proper place.
+ inst->completeAcc();
+ }
+
+ // Need to insert instruction into queue to commit
+ be->instToCommit(inst);
+
+ //wroteToTimeBuffer = true;
+// iewStage->activityThisCycle();
+
+ inst = NULL;
+}
+
+template<class Impl>
+const char *
+LWBackEnd<Impl>::LdWritebackEvent::description()
+{
+ return "Load writeback event";
+}
+
+
+template <class Impl>
+LWBackEnd<Impl>::DCacheCompletionEvent::DCacheCompletionEvent(LWBackEnd *_be)
+ : Event(&mainEventQueue, CPU_Tick_Pri), be(_be)
+{
+}
+
+template <class Impl>
+void
+LWBackEnd<Impl>::DCacheCompletionEvent::process()
+{
+}
+
+template <class Impl>
+const char *
+LWBackEnd<Impl>::DCacheCompletionEvent::description()
+{
+ return "Cache completion event";
+}
+
+template <class Impl>
+LWBackEnd<Impl>::LWBackEnd(Params *params)
+ : d2i(5, 5), i2e(5, 5), e2c(5, 5), numInstsToWB(5, 5),
+ trapSquash(false), xcSquash(false), cacheCompletionEvent(this),
+ dcacheInterface(params->dcacheInterface), width(params->backEndWidth),
+ exactFullStall(true)
+{
+ numROBEntries = params->numROBEntries;
+ numInsts = 0;
+ numDispatchEntries = 32;
+ maxOutstandingMemOps = params->maxOutstandingMemOps;
+ numWaitingMemOps = 0;
+ waitingInsts = 0;
+ switchedOut = false;
+ switchPending = false;
+
+ LSQ.setBE(this);
+
+ // Setup IQ and LSQ with their parameters here.
+ instsToDispatch = d2i.getWire(-1);
+
+ instsToExecute = i2e.getWire(-1);
+
+ dispatchWidth = params->dispatchWidth ? params->dispatchWidth : width;
+ issueWidth = params->issueWidth ? params->issueWidth : width;
+ wbWidth = params->wbWidth ? params->wbWidth : width;
+ commitWidth = params->commitWidth ? params->commitWidth : width;
+
+ LSQ.init(params, params->LQEntries, params->SQEntries, 0);
+
+ dispatchStatus = Running;
+}
+
+template <class Impl>
+std::string
+LWBackEnd<Impl>::name() const
+{
+ return cpu->name() + ".backend";
+}
+
+template <class Impl>
+void
+LWBackEnd<Impl>::regStats()
+{
+ using namespace Stats;
+ rob_cap_events
+ .init(cpu->number_of_threads)
+ .name(name() + ".ROB:cap_events")
+ .desc("number of cycles where ROB cap was active")
+ .flags(total)
+ ;
+
+ rob_cap_inst_count
+ .init(cpu->number_of_threads)
+ .name(name() + ".ROB:cap_inst")
+ .desc("number of instructions held up by ROB cap")
+ .flags(total)
+ ;
+
+ iq_cap_events
+ .init(cpu->number_of_threads)
+ .name(name() +".IQ:cap_events" )
+ .desc("number of cycles where IQ cap was active")
+ .flags(total)
+ ;
+
+ iq_cap_inst_count
+ .init(cpu->number_of_threads)
+ .name(name() + ".IQ:cap_inst")
+ .desc("number of instructions held up by IQ cap")
+ .flags(total)
+ ;
+
+
+ exe_inst
+ .init(cpu->number_of_threads)
+ .name(name() + ".ISSUE:count")
+ .desc("number of insts issued")
+ .flags(total)
+ ;
+
+ exe_swp
+ .init(cpu->number_of_threads)
+ .name(name() + ".ISSUE:swp")
+ .desc("number of swp insts issued")
+ .flags(total)
+ ;
+
+ exe_nop
+ .init(cpu->number_of_threads)
+ .name(name() + ".ISSUE:nop")
+ .desc("number of nop insts issued")
+ .flags(total)
+ ;
+
+ exe_refs
+ .init(cpu->number_of_threads)
+ .name(name() + ".ISSUE:refs")
+ .desc("number of memory reference insts issued")
+ .flags(total)
+ ;
+
+ exe_loads
+ .init(cpu->number_of_threads)
+ .name(name() + ".ISSUE:loads")
+ .desc("number of load insts issued")
+ .flags(total)
+ ;
+
+ exe_branches
+ .init(cpu->number_of_threads)
+ .name(name() + ".ISSUE:branches")
+ .desc("Number of branches issued")
+ .flags(total)
+ ;
+
+ issued_ops
+ .init(cpu->number_of_threads)
+ .name(name() + ".ISSUE:op_count")
+ .desc("number of insts issued")
+ .flags(total)
+ ;
+
+/*
+ for (int i=0; i<Num_OpClasses; ++i) {
+ stringstream subname;
+ subname << opClassStrings[i] << "_delay";
+ issue_delay_dist.subname(i, subname.str());
+ }
+*/
+ //
+ // Other stats
+ //
+ lsq_forw_loads
+ .init(cpu->number_of_threads)
+ .name(name() + ".LSQ:forw_loads")
+ .desc("number of loads forwarded via LSQ")
+ .flags(total)
+ ;
+
+ inv_addr_loads
+ .init(cpu->number_of_threads)
+ .name(name() + ".ISSUE:addr_loads")
+ .desc("number of invalid-address loads")
+ .flags(total)
+ ;
+
+ inv_addr_swpfs
+ .init(cpu->number_of_threads)
+ .name(name() + ".ISSUE:addr_swpfs")
+ .desc("number of invalid-address SW prefetches")
+ .flags(total)
+ ;
+
+ lsq_blocked_loads
+ .init(cpu->number_of_threads)
+ .name(name() + ".LSQ:blocked_loads")
+ .desc("number of ready loads not issued due to memory disambiguation")
+ .flags(total)
+ ;
+
+ lsqInversion
+ .name(name() + ".ISSUE:lsq_invert")
+ .desc("Number of times LSQ instruction issued early")
+ ;
+
+ n_issued_dist
+ .init(issueWidth + 1)
+ .name(name() + ".ISSUE:issued_per_cycle")
+ .desc("Number of insts issued each cycle")
+ .flags(total | pdf | dist)
+ ;
+ issue_delay_dist
+ .init(Num_OpClasses,0,99,2)
+ .name(name() + ".ISSUE:")
+ .desc("cycles from operands ready to issue")
+ .flags(pdf | cdf)
+ ;
+
+ queue_res_dist
+ .init(Num_OpClasses, 0, 99, 2)
+ .name(name() + ".IQ:residence:")
+ .desc("cycles from dispatch to issue")
+ .flags(total | pdf | cdf )
+ ;
+ for (int i = 0; i < Num_OpClasses; ++i) {
+ queue_res_dist.subname(i, opClassStrings[i]);
+ }
+
+ writeback_count
+ .init(cpu->number_of_threads)
+ .name(name() + ".WB:count")
+ .desc("cumulative count of insts written-back")
+ .flags(total)
+ ;
+
+ producer_inst
+ .init(cpu->number_of_threads)
+ .name(name() + ".WB:producers")
+ .desc("num instructions producing a value")
+ .flags(total)
+ ;
+
+ consumer_inst
+ .init(cpu->number_of_threads)
+ .name(name() + ".WB:consumers")
+ .desc("num instructions consuming a value")
+ .flags(total)
+ ;
+
+ wb_penalized
+ .init(cpu->number_of_threads)
+ .name(name() + ".WB:penalized")
+ .desc("number of instrctions required to write to 'other' IQ")
+ .flags(total)
+ ;
+
+
+ wb_penalized_rate
+ .name(name() + ".WB:penalized_rate")
+ .desc ("fraction of instructions written-back that wrote to 'other' IQ")
+ .flags(total)
+ ;
+
+ wb_penalized_rate = wb_penalized / writeback_count;
+
+ wb_fanout
+ .name(name() + ".WB:fanout")
+ .desc("average fanout of values written-back")
+ .flags(total)
+ ;
+
+ wb_fanout = producer_inst / consumer_inst;
+
+ wb_rate
+ .name(name() + ".WB:rate")
+ .desc("insts written-back per cycle")
+ .flags(total)
+ ;
+ wb_rate = writeback_count / cpu->numCycles;
+
+ stat_com_inst
+ .init(cpu->number_of_threads)
+ .name(name() + ".COM:count")
+ .desc("Number of instructions committed")
+ .flags(total)
+ ;
+
+ stat_com_swp
+ .init(cpu->number_of_threads)
+ .name(name() + ".COM:swp_count")
+ .desc("Number of s/w prefetches committed")
+ .flags(total)
+ ;
+
+ stat_com_refs
+ .init(cpu->number_of_threads)
+ .name(name() + ".COM:refs")
+ .desc("Number of memory references committed")
+ .flags(total)
+ ;
+
+ stat_com_loads
+ .init(cpu->number_of_threads)
+ .name(name() + ".COM:loads")
+ .desc("Number of loads committed")
+ .flags(total)
+ ;
+
+ stat_com_membars
+ .init(cpu->number_of_threads)
+ .name(name() + ".COM:membars")
+ .desc("Number of memory barriers committed")
+ .flags(total)
+ ;
+
+ stat_com_branches
+ .init(cpu->number_of_threads)
+ .name(name() + ".COM:branches")
+ .desc("Number of branches committed")
+ .flags(total)
+ ;
+ n_committed_dist
+ .init(0,commitWidth,1)
+ .name(name() + ".COM:committed_per_cycle")
+ .desc("Number of insts commited each cycle")
+ .flags(pdf)
+ ;
+
+ //
+ // Commit-Eligible instructions...
+ //
+ // -> The number of instructions eligible to commit in those
+ // cycles where we reached our commit BW limit (less the number
+ // actually committed)
+ //
+ // -> The average value is computed over ALL CYCLES... not just
+ // the BW limited cycles
+ //
+ // -> The standard deviation is computed only over cycles where
+ // we reached the BW limit
+ //
+ commit_eligible
+ .init(cpu->number_of_threads)
+ .name(name() + ".COM:bw_limited")
+ .desc("number of insts not committed due to BW limits")
+ .flags(total)
+ ;
+
+ commit_eligible_samples
+ .name(name() + ".COM:bw_lim_events")
+ .desc("number cycles where commit BW limit reached")
+ ;
+
+ squashedInsts
+ .init(cpu->number_of_threads)
+ .name(name() + ".COM:squashed_insts")
+ .desc("Number of instructions removed from inst list")
+ ;
+
+ ROBSquashedInsts
+ .init(cpu->number_of_threads)
+ .name(name() + ".COM:rob_squashed_insts")
+ .desc("Number of instructions removed from inst list when they reached the head of the ROB")
+ ;
+
+ ROB_fcount
+ .name(name() + ".ROB:full_count")
+ .desc("number of cycles where ROB was full")
+ ;
+
+ ROB_count
+ .init(cpu->number_of_threads)
+ .name(name() + ".ROB:occupancy")
+ .desc(name() + ".ROB occupancy (cumulative)")
+ .flags(total)
+ ;
+
+ ROB_full_rate
+ .name(name() + ".ROB:full_rate")
+ .desc("ROB full per cycle")
+ ;
+ ROB_full_rate = ROB_fcount / cpu->numCycles;
+
+ ROB_occ_rate
+ .name(name() + ".ROB:occ_rate")
+ .desc("ROB occupancy rate")
+ .flags(total)
+ ;
+ ROB_occ_rate = ROB_count / cpu->numCycles;
+
+ ROB_occ_dist
+ .init(cpu->number_of_threads,0,numROBEntries,2)
+ .name(name() + ".ROB:occ_dist")
+ .desc("ROB Occupancy per cycle")
+ .flags(total | cdf)
+ ;
+}
+
+template <class Impl>
+void
+LWBackEnd<Impl>::setCPU(FullCPU *cpu_ptr)
+{
+ cpu = cpu_ptr;
+ LSQ.setCPU(cpu_ptr);
+ checker = cpu->checker;
+}
+
+template <class Impl>
+void
+LWBackEnd<Impl>::setCommBuffer(TimeBuffer<CommStruct> *_comm)
+{
+ comm = _comm;
+ toIEW = comm->getWire(0);
+ fromCommit = comm->getWire(-1);
+}
+
+#if FULL_SYSTEM
+template <class Impl>
+void
+LWBackEnd<Impl>::checkInterrupts()
+{
+ if (cpu->checkInterrupts &&
+ cpu->check_interrupts() &&
+ !cpu->inPalMode(thread->readPC()) &&
+ !trapSquash &&
+ !xcSquash) {
+ frontEnd->interruptPending = true;
+ if (robEmpty() && !LSQ.hasStoresToWB()) {
+ // Will need to squash all instructions currently in flight and have
+ // the interrupt handler restart at the last non-committed inst.
+ // Most of that can be handled through the trap() function. The
+ // processInterrupts() function really just checks for interrupts
+ // and then calls trap() if there is an interrupt present.
+
+ // Not sure which thread should be the one to interrupt. For now
+ // always do thread 0.
+ assert(!thread->inSyscall);
+ thread->inSyscall = true;
+
+ // CPU will handle implementation of the interrupt.
+ cpu->processInterrupts();
+
+ // Now squash or record that I need to squash this cycle.
+ commitStatus = TrapPending;
+
+ // Exit state update mode to avoid accidental updating.
+ thread->inSyscall = false;
+
+ // Generate trap squash event.
+ generateTrapEvent();
+
+ DPRINTF(BE, "Interrupt detected.\n");
+ } else {
+ DPRINTF(BE, "Interrupt must wait for ROB to drain.\n");
+ }
+ }
+}
+
+template <class Impl>
+void
+LWBackEnd<Impl>::handleFault(Fault &fault, Tick latency)
+{
+ DPRINTF(BE, "Handling fault!\n");
+
+ assert(!thread->inSyscall);
+
+ thread->inSyscall = true;
+
+ // Consider holding onto the trap and waiting until the trap event
+ // happens for this to be executed.
+ fault->invoke(thread->getXCProxy());
+
+ // Exit state update mode to avoid accidental updating.
+ thread->inSyscall = false;
+
+ commitStatus = TrapPending;
+
+ // Generate trap squash event.
+ generateTrapEvent(latency);
+}
+#endif
+
+template <class Impl>
+void
+LWBackEnd<Impl>::tick()
+{
+ DPRINTF(BE, "Ticking back end\n");
+
+ if (switchPending && robEmpty() && !LSQ.hasStoresToWB()) {
+ cpu->signalSwitched();
+ return;
+ }
+
+ ROB_count[0]+= numInsts;
+
+ wbCycle = 0;
+
+ // Read in any done instruction information and update the IQ or LSQ.
+ updateStructures();
+
+#if FULL_SYSTEM
+ checkInterrupts();
+
+ if (trapSquash) {
+ assert(!xcSquash);
+ squashFromTrap();
+ } else if (xcSquash) {
+ squashFromXC();
+ }
+#endif
+
+ if (dispatchStatus != Blocked) {
+ dispatchInsts();
+ } else {
+ checkDispatchStatus();
+ }
+
+ if (commitStatus != TrapPending) {
+ executeInsts();
+
+ commitInsts();
+ }
+
+ LSQ.writebackStores();
+
+ DPRINTF(BE, "Waiting insts: %i, mem ops: %i, ROB entries in use: %i, "
+ "LSQ loads: %i, LSQ stores: %i\n",
+ waitingInsts, numWaitingMemOps, numInsts,
+ LSQ.numLoads(), LSQ.numStores());
+
+#ifdef DEBUG
+ assert(numInsts == instList.size());
+ assert(waitingInsts == waitingList.size());
+ assert(numWaitingMemOps == waitingMemOps.size());
+ assert(!switchedOut);
+#endif
+}
+
+template <class Impl>
+void
+LWBackEnd<Impl>::updateStructures()
+{
+ if (fromCommit->doneSeqNum) {
+ LSQ.commitLoads(fromCommit->doneSeqNum);
+ LSQ.commitStores(fromCommit->doneSeqNum);
+ }
+
+ if (fromCommit->nonSpecSeqNum) {
+ if (fromCommit->uncached) {
+// LSQ.executeLoad(fromCommit->lqIdx);
+ } else {
+// IQ.scheduleNonSpec(
+// fromCommit->nonSpecSeqNum);
+ }
+ }
+}
+
+template <class Impl>
+void
+LWBackEnd<Impl>::addToLSQ(DynInstPtr &inst)
+{
+ // Do anything LSQ specific here?
+ LSQ.insert(inst);
+}
+
+template <class Impl>
+void
+LWBackEnd<Impl>::dispatchInsts()
+{
+ DPRINTF(BE, "Trying to dispatch instructions.\n");
+
+ while (numInsts < numROBEntries &&
+ numWaitingMemOps < maxOutstandingMemOps) {
+ // Get instruction from front of time buffer
+ DynInstPtr inst = frontEnd->getInst();
+ if (!inst) {
+ break;
+ } else if (inst->isSquashed()) {
+ continue;
+ }
+
+ ++numInsts;
+ instList.push_front(inst);
+
+ inst->setInROB();
+
+ DPRINTF(BE, "Dispatching instruction [sn:%lli] PC:%#x\n",
+ inst->seqNum, inst->readPC());
+
+ for (int i = 0; i < inst->numDestRegs(); ++i)
+ renameTable[inst->destRegIdx(i)] = inst;
+
+ if (inst->isMemBarrier() || inst->isWriteBarrier()) {
+ if (memBarrier) {
+ DPRINTF(BE, "Instruction [sn:%lli] is waiting on "
+ "barrier [sn:%lli].\n",
+ inst->seqNum, memBarrier->seqNum);
+ memBarrier->addMemDependent(inst);
+ inst->addSrcMemInst(memBarrier);
+ }
+ memBarrier = inst;
+ inst->setCanCommit();
+ } else if (inst->readyToIssue() &&
+ !inst->isNonSpeculative() &&
+ !inst->isStoreConditional()) {
+ if (inst->isMemRef()) {
+
+ LSQ.insert(inst);
+ if (memBarrier) {
+ DPRINTF(BE, "Instruction [sn:%lli] is waiting on "
+ "barrier [sn:%lli].\n",
+ inst->seqNum, memBarrier->seqNum);
+ memBarrier->addMemDependent(inst);
+ inst->addSrcMemInst(memBarrier);
+ addWaitingMemOp(inst);
+
+ waitingList.push_front(inst);
+ inst->iqIt = waitingList.begin();
+ inst->iqItValid = true;
+ waitingInsts++;
+ } else {
+ DPRINTF(BE, "Instruction [sn:%lli] ready, addding to "
+ "exeList.\n",
+ inst->seqNum);
+ exeList.push(inst);
+ }
+ } else if (inst->isNop()) {
+ DPRINTF(BE, "Nop encountered [sn:%lli], skipping exeList.\n",
+ inst->seqNum);
+ inst->setIssued();
+ inst->setExecuted();
+ inst->setCanCommit();
+ } else {
+ DPRINTF(BE, "Instruction [sn:%lli] ready, addding to "
+ "exeList.\n",
+ inst->seqNum);
+ exeList.push(inst);
+ }
+ } else {
+ if (inst->isNonSpeculative() || inst->isStoreConditional()) {
+ inst->setCanCommit();
+ DPRINTF(BE, "Adding non speculative instruction\n");
+ }
+
+ if (inst->isMemRef()) {
+ addWaitingMemOp(inst);
+ LSQ.insert(inst);
+ if (memBarrier) {
+ memBarrier->addMemDependent(inst);
+ inst->addSrcMemInst(memBarrier);
+
+ DPRINTF(BE, "Instruction [sn:%lli] is waiting on "
+ "barrier [sn:%lli].\n",
+ inst->seqNum, memBarrier->seqNum);
+ }
+ }
+
+ DPRINTF(BE, "Instruction [sn:%lli] not ready, addding to "
+ "waitingList.\n",
+ inst->seqNum);
+ waitingList.push_front(inst);
+ inst->iqIt = waitingList.begin();
+ inst->iqItValid = true;
+ waitingInsts++;
+ }
+ }
+
+ // Check if IQ or LSQ is full. If so we'll need to break and stop
+ // removing instructions. Also update the number of insts to remove
+ // from the queue. Check here if we don't care about exact stall
+ // conditions.
+/*
+ bool stall = false;
+ if (IQ.isFull()) {
+ DPRINTF(BE, "IQ is full!\n");
+ stall = true;
+ } else if (LSQ.isFull()) {
+ DPRINTF(BE, "LSQ is full!\n");
+ stall = true;
+ } else if (isFull()) {
+ DPRINTF(BE, "ROB is full!\n");
+ stall = true;
+ ROB_fcount++;
+ }
+ if (stall) {
+ d2i.advance();
+ dispatchStall();
+ return;
+ }
+*/
+}
+
+template <class Impl>
+void
+LWBackEnd<Impl>::dispatchStall()
+{
+ dispatchStatus = Blocked;
+ if (!cpu->decoupledFrontEnd) {
+ // Tell front end to stall here through a timebuffer, or just tell
+ // it directly.
+ }
+}
+
+template <class Impl>
+void
+LWBackEnd<Impl>::checkDispatchStatus()
+{
+ DPRINTF(BE, "Checking dispatch status\n");
+ assert(dispatchStatus == Blocked);
+ if (!LSQ.isFull() && !isFull()) {
+ DPRINTF(BE, "Dispatch no longer blocked\n");
+ dispatchStatus = Running;
+ dispatchInsts();
+ }
+}
+
+template <class Impl>
+void
+LWBackEnd<Impl>::executeInsts()
+{
+ DPRINTF(BE, "Trying to execute instructions\n");
+
+ int num_executed = 0;
+ while (!exeList.empty() && num_executed < issueWidth) {
+ DynInstPtr inst = exeList.top();
+
+ DPRINTF(BE, "Executing inst [sn:%lli] PC: %#x\n",
+ inst->seqNum, inst->readPC());
+
+ // Check if the instruction is squashed; if so then skip it
+ // and don't count it towards the FU usage.
+ if (inst->isSquashed()) {
+ DPRINTF(BE, "Execute: Instruction was squashed.\n");
+
+ // Not sure how to handle this plus the method of sending # of
+ // instructions to use. Probably will just have to count it
+ // towards the bandwidth usage, but not the FU usage.
+ ++num_executed;
+
+ // Consider this instruction executed so that commit can go
+ // ahead and retire the instruction.
+ inst->setExecuted();
+
+ // Not sure if I should set this here or just let commit try to
+ // commit any squashed instructions. I like the latter a bit more.
+ inst->setCanCommit();
+
+// ++iewExecSquashedInsts;
+ exeList.pop();
+
+ continue;
+ }
+
+ Fault fault = NoFault;
+
+ // Execute instruction.
+ // Note that if the instruction faults, it will be handled
+ // at the commit stage.
+ if (inst->isMemRef() &&
+ (!inst->isDataPrefetch() && !inst->isInstPrefetch())) {
+ if (dcacheInterface->isBlocked()) {
+ // Should I move the instruction aside?
+ DPRINTF(BE, "Execute: dcache is blocked\n");
+ break;
+ }
+ DPRINTF(BE, "Execute: Initiating access for memory "
+ "reference.\n");
+
+ if (inst->isLoad()) {
+ LSQ.executeLoad(inst);
+ } else if (inst->isStore()) {
+ LSQ.executeStore(inst);
+ if (inst->req && !(inst->req->flags & LOCKED)) {
+ inst->setExecuted();
+
+ instToCommit(inst);
+ }
+ } else {
+ panic("Unknown mem type!");
+ }
+ } else {
+ inst->execute();
+
+ inst->setExecuted();
+
+ instToCommit(inst);
+ }
+
+ updateExeInstStats(inst);
+
+ ++funcExeInst;
+ ++num_executed;
+
+ exeList.pop();
+
+ if (inst->mispredicted()) {
+ squashDueToBranch(inst);
+ break;
+ } else if (LSQ.violation()) {
+ // Get the DynInst that caused the violation. Note that this
+ // clears the violation signal.
+ DynInstPtr violator;
+ violator = LSQ.getMemDepViolator();
+
+ DPRINTF(BE, "LDSTQ detected a violation. Violator PC: "
+ "%#x, inst PC: %#x. Addr is: %#x.\n",
+ violator->readPC(), inst->readPC(), inst->physEffAddr);
+
+ // Squash.
+ squashDueToMemViolation(inst);
+ }
+ }
+
+ issued_ops[0]+= num_executed;
+ n_issued_dist[num_executed]++;
+}
+
+template<class Impl>
+void
+LWBackEnd<Impl>::instToCommit(DynInstPtr &inst)
+{
+
+ DPRINTF(BE, "Sending instructions to commit [sn:%lli] PC %#x.\n",
+ inst->seqNum, inst->readPC());
+
+ if (!inst->isSquashed()) {
+ DPRINTF(BE, "Writing back instruction [sn:%lli] PC %#x.\n",
+ inst->seqNum, inst->readPC());
+
+ inst->setCanCommit();
+
+ if (inst->isExecuted()) {
+ inst->setResultReady();
+ int dependents = wakeDependents(inst);
+ if (dependents) {
+ producer_inst[0]++;
+ consumer_inst[0]+= dependents;
+ }
+ }
+ }
+
+ writeback_count[0]++;
+}
+#if 0
+template <class Impl>
+void
+LWBackEnd<Impl>::writebackInsts()
+{
+ int wb_width = wbWidth;
+ // Using this method I'm not quite sure how to prevent an
+ // instruction from waking its own dependents multiple times,
+ // without the guarantee that commit always has enough bandwidth
+ // to accept all instructions being written back. This guarantee
+ // might not be too unrealistic.
+ InstListIt wb_inst_it = writeback.begin();
+ InstListIt wb_end_it = writeback.end();
+ int inst_num = 0;
+ int consumer_insts = 0;
+
+ for (; inst_num < wb_width &&
+ wb_inst_it != wb_end_it; inst_num++) {
+ DynInstPtr inst = (*wb_inst_it);
+
+ // Some instructions will be sent to commit without having
+ // executed because they need commit to handle them.
+ // E.g. Uncached loads have not actually executed when they
+ // are first sent to commit. Instead commit must tell the LSQ
+ // when it's ready to execute the uncached load.
+ if (!inst->isSquashed()) {
+ DPRINTF(BE, "Writing back instruction [sn:%lli] PC %#x.\n",
+ inst->seqNum, inst->readPC());
+
+ inst->setCanCommit();
+ inst->setResultReady();
+
+ if (inst->isExecuted()) {
+ int dependents = wakeDependents(inst);
+ if (dependents) {
+ producer_inst[0]++;
+ consumer_insts+= dependents;
+ }
+ }
+ }
+
+ writeback.erase(wb_inst_it++);
+ }
+ LSQ.writebackStores();
+ consumer_inst[0]+= consumer_insts;
+ writeback_count[0]+= inst_num;
+}
+#endif
+template <class Impl>
+bool
+LWBackEnd<Impl>::commitInst(int inst_num)
+{
+ // Read instruction from the head of the ROB
+ DynInstPtr inst = instList.back();
+
+ // Make sure instruction is valid
+ assert(inst);
+
+ if (!inst->readyToCommit())
+ return false;
+
+ DPRINTF(BE, "Trying to commit instruction [sn:%lli] PC:%#x\n",
+ inst->seqNum, inst->readPC());
+
+ thread->setPC(inst->readPC());
+ thread->setNextPC(inst->readNextPC());
+ inst->reachedCommit = true;
+
+ // If the instruction is not executed yet, then it is a non-speculative
+ // or store inst. Signal backwards that it should be executed.
+ if (!inst->isExecuted()) {
+ if (inst->isNonSpeculative() ||
+ inst->isStoreConditional() ||
+ inst->isMemBarrier() ||
+ inst->isWriteBarrier()) {
+#if !FULL_SYSTEM
+ // Hack to make sure syscalls aren't executed until all stores
+ // write back their data. This direct communication shouldn't
+ // be used for anything other than this.
+ if (inst_num > 0 || LSQ.hasStoresToWB())
+#else
+ if ((inst->isMemBarrier() || inst->isWriteBarrier() ||
+ inst->isQuiesce()) &&
+ LSQ.hasStoresToWB())
+#endif
+ {
+ DPRINTF(BE, "Waiting for all stores to writeback.\n");
+ return false;
+ }
+
+ DPRINTF(BE, "Encountered a store or non-speculative "
+ "instruction at the head of the ROB, PC %#x.\n",
+ inst->readPC());
+
+ if (inst->isMemBarrier() || inst->isWriteBarrier()) {
+ DPRINTF(BE, "Waking dependents on barrier [sn:%lli]\n",
+ inst->seqNum);
+ assert(memBarrier);
+ wakeDependents(inst, true);
+ if (memBarrier == inst)
+ memBarrier = NULL;
+ inst->clearMemDependents();
+ }
+
+ // Send back the non-speculative instruction's sequence number.
+ if (inst->iqItValid) {
+ DPRINTF(BE, "Removing instruction from waiting list\n");
+ waitingList.erase(inst->iqIt);
+ inst->iqItValid = false;
+ waitingInsts--;
+ assert(waitingInsts >= 0);
+ if (inst->isStore())
+ removeWaitingMemOp(inst);
+ }
+
+ exeList.push(inst);
+
+ // Change the instruction so it won't try to commit again until
+ // it is executed.
+ inst->clearCanCommit();
+
+// ++commitNonSpecStalls;
+
+ return false;
+ } else if (inst->isLoad()) {
+ DPRINTF(BE, "[sn:%lli]: Uncached load, PC %#x.\n",
+ inst->seqNum, inst->readPC());
+
+ // Send back the non-speculative instruction's sequence
+ // number. Maybe just tell the lsq to re-execute the load.
+
+ // Send back the non-speculative instruction's sequence number.
+ if (inst->iqItValid) {
+ DPRINTF(BE, "Removing instruction from waiting list\n");
+ waitingList.erase(inst->iqIt);
+ inst->iqItValid = false;
+ waitingInsts--;
+ assert(waitingInsts >= 0);
+ removeWaitingMemOp(inst);
+ }
+ replayMemInst(inst);
+
+ inst->clearCanCommit();
+
+ return false;
+ } else {
+ panic("Trying to commit un-executed instruction "
+ "of unknown type!\n");
+ }
+ }
+
+ // Not handled for now.
+ assert(!inst->isThreadSync());
+ assert(inst->memDepReady());
+ // Stores will mark themselves as totally completed as they need
+ // to wait to writeback to memory. @todo: Hack...attempt to fix
+ // having the checker be forced to wait until a store completes in
+ // order to check all of the instructions. If the store at the
+ // head of the check list misses, but a later store hits, then
+ // loads in the checker may see the younger store values instead
+ // of the store they should see. Either the checker needs its own
+ // memory (annoying to update), its own store buffer (how to tell
+ // which value is correct?), or something else...
+ if (!inst->isStore()) {
+ inst->setCompleted();
+ }
+ // Check if the instruction caused a fault. If so, trap.
+ Fault inst_fault = inst->getFault();
+
+ // Use checker prior to updating anything due to traps or PC
+ // based events.
+ if (checker) {
+ checker->tick(inst);
+ }
+
+ if (inst_fault != NoFault) {
+ DPRINTF(BE, "Inst [sn:%lli] PC %#x has a fault\n",
+ inst->seqNum, inst->readPC());
+
+ // Instruction is completed as it has a fault.
+ inst->setCompleted();
+
+ if (LSQ.hasStoresToWB()) {
+ DPRINTF(BE, "Stores still in flight, will wait until drained.\n");
+ return false;
+ } else if (inst_num != 0) {
+ DPRINTF(BE, "Will wait until instruction is head of commit group.\n");
+ return false;
+ } else if (checker && inst->isStore()) {
+ checker->tick(inst);
+ }
+
+ thread->setInst(
+ static_cast<TheISA::MachInst>(inst->staticInst->machInst));
+#if FULL_SYSTEM
+ handleFault(inst_fault);
+ return false;
+#else // !FULL_SYSTEM
+ panic("fault (%d) detected @ PC %08p", inst_fault,
+ inst->PC);
+#endif // FULL_SYSTEM
+ }
+
+ int freed_regs = 0;
+
+ for (int i = 0; i < inst->numDestRegs(); ++i) {
+ DPRINTF(BE, "Commit rename map setting reg %i to [sn:%lli]\n",
+ (int)inst->destRegIdx(i), inst->seqNum);
+ thread->renameTable[inst->destRegIdx(i)] = inst;
+ ++freed_regs;
+ }
+
+ if (inst->traceData) {
+ inst->traceData->setFetchSeq(inst->seqNum);
+ inst->traceData->setCPSeq(thread->numInst);
+ inst->traceData->finalize();
+ inst->traceData = NULL;
+ }
+
+ inst->clearDependents();
+
+ frontEnd->addFreeRegs(freed_regs);
+
+ instList.pop_back();
+
+ --numInsts;
+ ++thread->funcExeInst;
+ // Maybe move this to where the fault is handled; if the fault is
+ // handled, don't try to set this myself as the fault will set it.
+ // If not, then I set thread->PC = thread->nextPC and
+ // thread->nextPC = thread->nextPC + 4.
+ thread->setPC(thread->readNextPC());
+ thread->setNextPC(thread->readNextPC() + sizeof(TheISA::MachInst));
+ updateComInstStats(inst);
+
+ // Write the done sequence number here.
+ toIEW->doneSeqNum = inst->seqNum;
+ lastCommitCycle = curTick;
+
+#if FULL_SYSTEM
+ int count = 0;
+ Addr oldpc;
+ do {
+ if (count == 0)
+ assert(!thread->inSyscall && !thread->trapPending);
+ oldpc = thread->readPC();
+ cpu->system->pcEventQueue.service(
+ thread->getXCProxy());
+ count++;
+ } while (oldpc != thread->readPC());
+ if (count > 1) {
+ DPRINTF(BE, "PC skip function event, stopping commit\n");
+ xcSquash = true;
+ return false;
+ }
+#endif
+ return true;
+}
+
+template <class Impl>
+void
+LWBackEnd<Impl>::commitInsts()
+{
+ // Not sure this should be a loop or not.
+ int inst_num = 0;
+ while (!instList.empty() && inst_num < commitWidth) {
+ if (instList.back()->isSquashed()) {
+ instList.back()->clearDependents();
+ instList.pop_back();
+ --numInsts;
+ ROBSquashedInsts[instList.back()->threadNumber]++;
+ continue;
+ }
+
+ if (!commitInst(inst_num++)) {
+ DPRINTF(BE, "Can't commit, Instruction [sn:%lli] PC "
+ "%#x is head of ROB and not ready\n",
+ instList.back()->seqNum, instList.back()->readPC());
+ --inst_num;
+ break;
+ }
+ }
+ n_committed_dist.sample(inst_num);
+}
+
+template <class Impl>
+void
+LWBackEnd<Impl>::squash(const InstSeqNum &sn)
+{
+ LSQ.squash(sn);
+
+ int freed_regs = 0;
+ InstListIt waiting_list_end = waitingList.end();
+ InstListIt insts_it = waitingList.begin();
+
+ while (insts_it != waiting_list_end && (*insts_it)->seqNum > sn)
+ {
+ if ((*insts_it)->isSquashed()) {
+ ++insts_it;
+ continue;
+ }
+ DPRINTF(BE, "Squashing instruction on waitingList PC %#x, [sn:%lli].\n",
+ (*insts_it)->readPC(),
+ (*insts_it)->seqNum);
+
+ if ((*insts_it)->isMemRef()) {
+ DPRINTF(BE, "Squashing a waiting mem op [sn:%lli]\n",
+ (*insts_it)->seqNum);
+ removeWaitingMemOp((*insts_it));
+ }
+
+ waitingList.erase(insts_it++);
+ waitingInsts--;
+ }
+ assert(waitingInsts >= 0);
+
+ insts_it = instList.begin();
+
+ while (!instList.empty() && (*insts_it)->seqNum > sn)
+ {
+ if ((*insts_it)->isSquashed()) {
+ ++insts_it;
+ continue;
+ }
+ DPRINTF(BE, "Squashing instruction on inst list PC %#x, [sn:%lli].\n",
+ (*insts_it)->readPC(),
+ (*insts_it)->seqNum);
+
+ // Mark the instruction as squashed, and ready to commit so that
+ // it can drain out of the pipeline.
+ (*insts_it)->setSquashed();
+
+ (*insts_it)->setCanCommit();
+
+ (*insts_it)->removeInROB();
+
+ for (int i = 0; i < (*insts_it)->numDestRegs(); ++i) {
+ DynInstPtr prev_dest = (*insts_it)->getPrevDestInst(i);
+ DPRINTF(BE, "Commit rename map setting reg %i to [sn:%lli]\n",
+ (int)(*insts_it)->destRegIdx(i), prev_dest->seqNum);
+ renameTable[(*insts_it)->destRegIdx(i)] = prev_dest;
+ ++freed_regs;
+ }
+
+ (*insts_it)->clearDependents();
+
+ squashedInsts[(*insts_it)->threadNumber]++;
+
+ instList.erase(insts_it++);
+ --numInsts;
+ }
+
+ insts_it = waitingList.begin();
+ while (!waitingList.empty() && insts_it != waitingList.end()) {
+ if ((*insts_it)->seqNum < sn) {
+ ++insts_it;
+ continue;
+ }
+ assert((*insts_it)->isSquashed());
+
+ waitingList.erase(insts_it++);
+ waitingInsts--;
+ }
+
+ while (memBarrier && memBarrier->seqNum > sn) {
+ DPRINTF(BE, "[sn:%lli] Memory barrier squashed (or previously "
+ "squashed)\n", memBarrier->seqNum);
+ memBarrier->clearMemDependents();
+ if (memBarrier->memDepReady()) {
+ DPRINTF(BE, "No previous barrier\n");
+ memBarrier = NULL;
+ } else {
+ std::list<DynInstPtr> &srcs = memBarrier->getMemSrcs();
+ memBarrier = srcs.front();
+ srcs.pop_front();
+ assert(srcs.empty());
+ DPRINTF(BE, "Previous barrier: [sn:%lli]\n",
+ memBarrier->seqNum);
+ }
+ }
+
+ frontEnd->addFreeRegs(freed_regs);
+}
+
+template <class Impl>
+void
+LWBackEnd<Impl>::squashFromXC()
+{
+ InstSeqNum squashed_inst = robEmpty() ? 0 : instList.back()->seqNum - 1;
+ squash(squashed_inst);
+ frontEnd->squash(squashed_inst, thread->readPC(),
+ false, false);
+ frontEnd->interruptPending = false;
+
+ thread->trapPending = false;
+ thread->inSyscall = false;
+ xcSquash = false;
+ commitStatus = Running;
+}
+
+template <class Impl>
+void
+LWBackEnd<Impl>::squashFromTrap()
+{
+ InstSeqNum squashed_inst = robEmpty() ? 0 : instList.back()->seqNum - 1;
+ squash(squashed_inst);
+ frontEnd->squash(squashed_inst, thread->readPC(),
+ false, false);
+ frontEnd->interruptPending = false;
+
+ thread->trapPending = false;
+ thread->inSyscall = false;
+ trapSquash = false;
+ commitStatus = Running;
+}
+
+template <class Impl>
+void
+LWBackEnd<Impl>::squashDueToBranch(DynInstPtr &inst)
+{
+ // Update the branch predictor state I guess
+ DPRINTF(BE, "Squashing due to branch [sn:%lli], will restart at PC %#x\n",
+ inst->seqNum, inst->readNextPC());
+ squash(inst->seqNum);
+ frontEnd->squash(inst->seqNum, inst->readNextPC(),
+ true, inst->mispredicted());
+}
+
+template <class Impl>
+void
+LWBackEnd<Impl>::squashDueToMemViolation(DynInstPtr &inst)
+{
+ // Update the branch predictor state I guess
+ DPRINTF(BE, "Squashing due to violation [sn:%lli], will restart at PC %#x\n",
+ inst->seqNum, inst->readNextPC());
+ squash(inst->seqNum);
+ frontEnd->squash(inst->seqNum, inst->readNextPC(),
+ false, inst->mispredicted());
+}
+
+template <class Impl>
+void
+LWBackEnd<Impl>::squashDueToMemBlocked(DynInstPtr &inst)
+{
+ DPRINTF(IEW, "Memory blocked, squashing load and younger insts, "
+ "PC: %#x [sn:%i].\n", inst->readPC(), inst->seqNum);
+
+ squash(inst->seqNum - 1);
+ frontEnd->squash(inst->seqNum - 1, inst->readPC());
+}
+
+template <class Impl>
+void
+LWBackEnd<Impl>::fetchFault(Fault &fault)
+{
+ faultFromFetch = fault;
+ fetchHasFault = true;
+}
+
+template <class Impl>
+void
+LWBackEnd<Impl>::switchOut()
+{
+ switchPending = true;
+}
+
+template <class Impl>
+void
+LWBackEnd<Impl>::doSwitchOut()
+{
+ switchedOut = true;
+ switchPending = false;
+ // Need to get rid of all committed, non-speculative state and write it
+ // to memory/XC. In this case this is stores that have committed and not
+ // yet written back.
+ assert(robEmpty());
+ assert(!LSQ.hasStoresToWB());
+
+ LSQ.switchOut();
+
+ squash(0);
+}
+
+template <class Impl>
+void
+LWBackEnd<Impl>::takeOverFrom(ExecContext *old_xc)
+{
+ switchedOut = false;
+ xcSquash = false;
+ trapSquash = false;
+
+ numInsts = 0;
+ numWaitingMemOps = 0;
+ waitingMemOps.clear();
+ waitingInsts = 0;
+ switchedOut = false;
+ dispatchStatus = Running;
+ commitStatus = Running;
+ LSQ.takeOverFrom(old_xc);
+}
+
+template <class Impl>
+void
+LWBackEnd<Impl>::updateExeInstStats(DynInstPtr &inst)
+{
+ int thread_number = inst->threadNumber;
+
+ //
+ // Pick off the software prefetches
+ //
+#ifdef TARGET_ALPHA
+ if (inst->isDataPrefetch())
+ exe_swp[thread_number]++;
+ else
+ exe_inst[thread_number]++;
+#else
+ exe_inst[thread_number]++;
+#endif
+
+ //
+ // Control operations
+ //
+ if (inst->isControl())
+ exe_branches[thread_number]++;
+
+ //
+ // Memory operations
+ //
+ if (inst->isMemRef()) {
+ exe_refs[thread_number]++;
+
+ if (inst->isLoad())
+ exe_loads[thread_number]++;
+ }
+}
+
+template <class Impl>
+void
+LWBackEnd<Impl>::updateComInstStats(DynInstPtr &inst)
+{
+ unsigned tid = inst->threadNumber;
+
+ // keep an instruction count
+ thread->numInst++;
+ thread->numInsts++;
+
+ cpu->numInst++;
+ //
+ // Pick off the software prefetches
+ //
+#ifdef TARGET_ALPHA
+ if (inst->isDataPrefetch()) {
+ stat_com_swp[tid]++;
+ } else {
+ stat_com_inst[tid]++;
+ }
+#else
+ stat_com_inst[tid]++;
+#endif
+
+ //
+ // Control Instructions
+ //
+ if (inst->isControl())
+ stat_com_branches[tid]++;
+
+ //
+ // Memory references
+ //
+ if (inst->isMemRef()) {
+ stat_com_refs[tid]++;
+
+ if (inst->isLoad()) {
+ stat_com_loads[tid]++;
+ }
+ }
+
+ if (inst->isMemBarrier()) {
+ stat_com_membars[tid]++;
+ }
+}
+
+template <class Impl>
+void
+LWBackEnd<Impl>::dumpInsts()
+{
+ int num = 0;
+ int valid_num = 0;
+
+ InstListIt inst_list_it = --(instList.end());
+
+ cprintf("ExeList size: %i\n", exeList.size());
+
+ cprintf("Inst list size: %i\n", instList.size());
+
+ while (inst_list_it != instList.end())
+ {
+ cprintf("Instruction:%i\n",
+ num);
+ if (!(*inst_list_it)->isSquashed()) {
+ if (!(*inst_list_it)->isIssued()) {
+ ++valid_num;
+ cprintf("Count:%i\n", valid_num);
+ } else if ((*inst_list_it)->isMemRef() &&
+ !(*inst_list_it)->memOpDone) {
+ // Loads that have not been marked as executed still count
+ // towards the total instructions.
+ ++valid_num;
+ cprintf("Count:%i\n", valid_num);
+ }
+ }
+
+ cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
+ "Issued:%i\nSquashed:%i\n",
+ (*inst_list_it)->readPC(),
+ (*inst_list_it)->seqNum,
+ (*inst_list_it)->threadNumber,
+ (*inst_list_it)->isIssued(),
+ (*inst_list_it)->isSquashed());
+
+ if ((*inst_list_it)->isMemRef()) {
+ cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
+ }
+
+ cprintf("\n");
+
+ inst_list_it--;
+ ++num;
+ }
+
+ cprintf("Waiting list size: %i\n", waitingList.size());
+
+ inst_list_it = --(waitingList.end());
+
+ while (inst_list_it != waitingList.end())
+ {
+ cprintf("Instruction:%i\n",
+ num);
+ if (!(*inst_list_it)->isSquashed()) {
+ if (!(*inst_list_it)->isIssued()) {
+ ++valid_num;
+ cprintf("Count:%i\n", valid_num);
+ } else if ((*inst_list_it)->isMemRef() &&
+ !(*inst_list_it)->memOpDone) {
+ // Loads that have not been marked as executed still count
+ // towards the total instructions.
+ ++valid_num;
+ cprintf("Count:%i\n", valid_num);
+ }
+ }
+
+ cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
+ "Issued:%i\nSquashed:%i\n",
+ (*inst_list_it)->readPC(),
+ (*inst_list_it)->seqNum,
+ (*inst_list_it)->threadNumber,
+ (*inst_list_it)->isIssued(),
+ (*inst_list_it)->isSquashed());
+
+ if ((*inst_list_it)->isMemRef()) {
+ cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
+ }
+
+ cprintf("\n");
+
+ inst_list_it--;
+ ++num;
+ }
+
+ cprintf("waitingMemOps list size: %i\n", waitingMemOps.size());
+
+ MemIt waiting_it = waitingMemOps.begin();
+
+ while (waiting_it != waitingMemOps.end())
+ {
+ cprintf("[sn:%lli] ", (*waiting_it));
+ waiting_it++;
+ ++num;
+ }
+ cprintf("\n");
+}
--- /dev/null
+/*
+ * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "cpu/ozone/ozone_impl.hh"
+#include "cpu/ozone/lw_lsq_impl.hh"
+
+// Force the instantiation of LDSTQ for all the implementations we care about.
+template class OzoneLWLSQ<OzoneImpl>;
+
--- /dev/null
+/*
+ * Copyright (c) 2004-2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __CPU_OZONE_LW_LSQ_HH__
+#define __CPU_OZONE_LW_LSQ_HH__
+
+#include <list>
+#include <map>
+#include <queue>
+#include <algorithm>
+
+#include "arch/faults.hh"
+#include "arch/isa_traits.hh"
+#include "config/full_system.hh"
+#include "base/hashmap.hh"
+#include "cpu/inst_seq.hh"
+#include "mem/packet.hh"
+#include "mem/port.hh"
+//#include "mem/page_table.hh"
+#include "sim/debug.hh"
+#include "sim/sim_object.hh"
+
+//class PageTable;
+
+/**
+ * Class that implements the actual LQ and SQ for each specific thread.
+ * Both are circular queues; load entries are freed upon committing, while
+ * store entries are freed once they writeback. The LSQUnit tracks if there
+ * are memory ordering violations, and also detects partial load to store
+ * forwarding cases (a store only has part of a load's data) that requires
+ * the load to wait until the store writes back. In the former case it
+ * holds onto the instruction until the dependence unit looks at it, and
+ * in the latter it stalls the LSQ until the store writes back. At that
+ * point the load is replayed.
+ */
+template <class Impl>
+class OzoneLWLSQ {
+ public:
+ typedef typename Impl::Params Params;
+ typedef typename Impl::FullCPU FullCPU;
+ typedef typename Impl::BackEnd BackEnd;
+ typedef typename Impl::DynInstPtr DynInstPtr;
+ typedef typename Impl::IssueStruct IssueStruct;
+
+ typedef TheISA::IntReg IntReg;
+
+ typedef typename std::map<InstSeqNum, DynInstPtr>::iterator LdMapIt;
+
+ private:
+ class StoreCompletionEvent : public Event {
+ public:
+ /** Constructs a store completion event. */
+ StoreCompletionEvent(DynInstPtr &inst, BackEnd *be,
+ Event *wb_event, OzoneLWLSQ *lsq_ptr);
+
+ /** Processes the store completion event. */
+ void process();
+
+ /** Returns the description of this event. */
+ const char *description();
+
+ private:
+ /** The store index of the store being written back. */
+ DynInstPtr inst;
+
+ BackEnd *be;
+ /** The writeback event for the store. Needed for store
+ * conditionals.
+ */
+ public:
+ Event *wbEvent;
+ bool miss;
+ private:
+ /** The pointer to the LSQ unit that issued the store. */
+ OzoneLWLSQ<Impl> *lsqPtr;
+ };
+
+ public:
+ /** Constructs an LSQ unit. init() must be called prior to use. */
+ OzoneLWLSQ();
+
+ /** Initializes the LSQ unit with the specified number of entries. */
+ void init(Params *params, unsigned maxLQEntries,
+ unsigned maxSQEntries, unsigned id);
+
+ /** Returns the name of the LSQ unit. */
+ std::string name() const;
+
+ /** Sets the CPU pointer. */
+ void setCPU(FullCPU *cpu_ptr)
+ { cpu = cpu_ptr; }
+
+ /** Sets the back-end stage pointer. */
+ void setBE(BackEnd *be_ptr)
+ { be = be_ptr; }
+
+ /** Sets the page table pointer. */
+// void setPageTable(PageTable *pt_ptr);
+
+ /** Ticks the LSQ unit, which in this case only resets the number of
+ * used cache ports.
+ * @todo: Move the number of used ports up to the LSQ level so it can
+ * be shared by all LSQ units.
+ */
+ void tick() { usedPorts = 0; }
+
+ /** Inserts an instruction. */
+ void insert(DynInstPtr &inst);
+ /** Inserts a load instruction. */
+ void insertLoad(DynInstPtr &load_inst);
+ /** Inserts a store instruction. */
+ void insertStore(DynInstPtr &store_inst);
+
+ /** Executes a load instruction. */
+ Fault executeLoad(DynInstPtr &inst);
+
+ /** Executes a store instruction. */
+ Fault executeStore(DynInstPtr &inst);
+
+ /** Commits the head load. */
+ void commitLoad();
+ /** Commits loads older than a specific sequence number. */
+ void commitLoads(InstSeqNum &youngest_inst);
+
+ /** Commits stores older than a specific sequence number. */
+ void commitStores(InstSeqNum &youngest_inst);
+
+ /** Writes back stores. */
+ void writebackStores();
+
+ // @todo: Include stats in the LSQ unit.
+ //void regStats();
+
+ /** Clears all the entries in the LQ. */
+ void clearLQ();
+
+ /** Clears all the entries in the SQ. */
+ void clearSQ();
+
+ /** Resizes the LQ to a given size. */
+ void resizeLQ(unsigned size);
+
+ /** Resizes the SQ to a given size. */
+ void resizeSQ(unsigned size);
+
+ /** Squashes all instructions younger than a specific sequence number. */
+ void squash(const InstSeqNum &squashed_num);
+
+ /** Returns if there is a memory ordering violation. Value is reset upon
+ * call to getMemDepViolator().
+ */
+ bool violation() { return memDepViolator; }
+
+ /** Returns the memory ordering violator. */
+ DynInstPtr getMemDepViolator();
+
+ /** Returns if a load became blocked due to the memory system. It clears
+ * the bool's value upon this being called.
+ */
+ bool loadBlocked()
+ { return isLoadBlocked; }
+
+ void clearLoadBlocked()
+ { isLoadBlocked = false; }
+
+ bool isLoadBlockedHandled()
+ { return loadBlockedHandled; }
+
+ void setLoadBlockedHandled()
+ { loadBlockedHandled = true; }
+
+ /** Returns the number of free entries (min of free LQ and SQ entries). */
+ unsigned numFreeEntries();
+
+ /** Returns the number of loads ready to execute. */
+ int numLoadsReady();
+
+ /** Returns the number of loads in the LQ. */
+ int numLoads() { return loads; }
+
+ /** Returns the number of stores in the SQ. */
+ int numStores() { return stores; }
+
+ /** Returns if either the LQ or SQ is full. */
+ bool isFull() { return lqFull() || sqFull(); }
+
+ /** Returns if the LQ is full. */
+ bool lqFull() { return loads >= (LQEntries - 1); }
+
+ /** Returns if the SQ is full. */
+ bool sqFull() { return stores >= (SQEntries - 1); }
+
+ /** Debugging function to dump instructions in the LSQ. */
+ void dumpInsts();
+
+ /** Returns the number of instructions in the LSQ. */
+ unsigned getCount() { return loads + stores; }
+
+ /** Returns if there are any stores to writeback. */
+ bool hasStoresToWB() { return storesToWB; }
+
+ /** Returns the number of stores to writeback. */
+ int numStoresToWB() { return storesToWB; }
+
+ /** Returns if the LSQ unit will writeback on this cycle. */
+ bool willWB() { return storeQueue.back().canWB &&
+ !storeQueue.back().completed/* &&
+ !dcacheInterface->isBlocked()*/; }
+
+ void switchOut();
+
+ void takeOverFrom(ExecContext *old_xc = NULL);
+
+ bool isSwitchedOut() { return switchedOut; }
+
+ bool switchedOut;
+
+ private:
+ /** Completes the store at the specified index. */
+ void completeStore(int store_idx);
+
+ private:
+ /** Pointer to the CPU. */
+ FullCPU *cpu;
+
+ /** Pointer to the back-end stage. */
+ BackEnd *be;
+
+ MemObject *mem;
+
+ class DcachePort : public Port
+ {
+ protected:
+ FullCPU *cpu;
+
+ public:
+ DcachePort(const std::string &_name, FullCPU *_cpu)
+ : Port(_name), cpu(_cpu)
+ { }
+
+ protected:
+ virtual Tick recvAtomic(PacketPtr pkt);
+
+ virtual void recvFunctional(PacketPtr pkt);
+
+ virtual void recvStatusChange(Status status);
+
+ virtual void getDeviceAddressRanges(AddrRangeList &resp,
+ AddrRangeList &snoop)
+ { resp.clear(); snoop.clear(); }
+
+ virtual bool recvTiming(PacketPtr pkt);
+
+ virtual void recvRetry();
+ };
+
+ /** Pointer to the D-cache. */
+ DcachePort dcachePort;
+
+ /** Pointer to the page table. */
+// PageTable *pTable;
+
+ public:
+ struct SQEntry {
+ /** Constructs an empty store queue entry. */
+ SQEntry()
+ : inst(NULL), req(NULL), size(0), data(0),
+ canWB(0), committed(0), completed(0), lqIt(NULL)
+ { }
+
+ /** Constructs a store queue entry for a given instruction. */
+ SQEntry(DynInstPtr &_inst)
+ : inst(_inst), req(NULL), size(0), data(0),
+ canWB(0), committed(0), completed(0), lqIt(NULL)
+ { }
+
+ /** The store instruction. */
+ DynInstPtr inst;
+ /** The memory request for the store. */
+ RequestPtr req;
+ /** The size of the store. */
+ int size;
+ /** The store data. */
+ IntReg data;
+ /** Whether or not the store can writeback. */
+ bool canWB;
+ /** Whether or not the store is committed. */
+ bool committed;
+ /** Whether or not the store is completed. */
+ bool completed;
+
+ typename std::list<DynInstPtr>::iterator lqIt;
+ };
+
+ enum Status {
+ Running,
+ Idle,
+ DcacheMissStall,
+ DcacheMissSwitch
+ };
+
+ private:
+ /** The OzoneLWLSQ thread id. */
+ unsigned lsqID;
+
+ /** The status of the LSQ unit. */
+ Status _status;
+
+ /** The store queue. */
+ std::list<SQEntry> storeQueue;
+ /** The load queue. */
+ std::list<DynInstPtr> loadQueue;
+
+ typedef typename std::list<SQEntry>::iterator SQIt;
+ typedef typename std::list<DynInstPtr>::iterator LQIt;
+
+
+ struct HashFn {
+ size_t operator() (const int a) const
+ {
+ unsigned hash = (((a >> 14) ^ ((a >> 2) & 0xffff))) & 0x7FFFFFFF;
+
+ return hash;
+ }
+ };
+
+ m5::hash_map<int, SQIt, HashFn> SQItHash;
+ std::queue<int> SQIndices;
+ m5::hash_map<int, LQIt, HashFn> LQItHash;
+ std::queue<int> LQIndices;
+
+ typedef typename m5::hash_map<int, LQIt, HashFn>::iterator LQHashIt;
+ typedef typename m5::hash_map<int, SQIt, HashFn>::iterator SQHashIt;
+ // Consider making these 16 bits
+ /** The number of LQ entries. */
+ unsigned LQEntries;
+ /** The number of SQ entries. */
+ unsigned SQEntries;
+
+ /** The number of load instructions in the LQ. */
+ int loads;
+ /** The number of store instructions in the SQ (excludes those waiting to
+ * writeback).
+ */
+ int stores;
+
+ int storesToWB;
+
+ /// @todo Consider moving to a more advanced model with write vs read ports
+ /** The number of cache ports available each cycle. */
+ int cachePorts;
+
+ /** The number of used cache ports in this cycle. */
+ int usedPorts;
+
+ //list<InstSeqNum> mshrSeqNums;
+
+ //Stats::Scalar<> dcacheStallCycles;
+ Counter lastDcacheStall;
+
+ // Make these per thread?
+ /** Whether or not the LSQ is stalled. */
+ bool stalled;
+ /** The store that causes the stall due to partial store to load
+ * forwarding.
+ */
+ InstSeqNum stallingStoreIsn;
+ /** The index of the above store. */
+ LQIt stallingLoad;
+
+ /** Whether or not a load is blocked due to the memory system. It is
+ * cleared when this value is checked via loadBlocked().
+ */
+ bool isLoadBlocked;
+
+ bool loadBlockedHandled;
+
+ InstSeqNum blockedLoadSeqNum;
+
+ /** The oldest faulting load instruction. */
+ DynInstPtr loadFaultInst;
+ /** The oldest faulting store instruction. */
+ DynInstPtr storeFaultInst;
+
+ /** The oldest load that caused a memory ordering violation. */
+ DynInstPtr memDepViolator;
+
+ // Will also need how many read/write ports the Dcache has. Or keep track
+ // of that in stage that is one level up, and only call executeLoad/Store
+ // the appropriate number of times.
+
+ public:
+ /** Executes the load at the given index. */
+ template <class T>
+ Fault read(RequestPtr req, T &data, int load_idx);
+
+ /** Executes the store at the given index. */
+ template <class T>
+ Fault write(RequestPtr req, T &data, int store_idx);
+
+ /** Returns the sequence number of the head load instruction. */
+ InstSeqNum getLoadHeadSeqNum()
+ {
+ if (!loadQueue.empty()) {
+ return loadQueue.back()->seqNum;
+ } else {
+ return 0;
+ }
+
+ }
+
+ /** Returns the sequence number of the head store instruction. */
+ InstSeqNum getStoreHeadSeqNum()
+ {
+ if (!storeQueue.empty()) {
+ return storeQueue.back().inst->seqNum;
+ } else {
+ return 0;
+ }
+
+ }
+
+ /** Returns whether or not the LSQ unit is stalled. */
+ bool isStalled() { return stalled; }
+};
+
+template <class Impl>
+template <class T>
+Fault
+OzoneLWLSQ<Impl>::read(RequestPtr req, T &data, int load_idx)
+{
+ //Depending on issue2execute delay a squashed load could
+ //execute if it is found to be squashed in the same
+ //cycle it is scheduled to execute
+ typename m5::hash_map<int, LQIt, HashFn>::iterator
+ lq_hash_it = LQItHash.find(load_idx);
+ assert(lq_hash_it != LQItHash.end());
+ DynInstPtr inst = (*(*lq_hash_it).second);
+
+ // Make sure this isn't an uncacheable access
+ // A bit of a hackish way to get uncached accesses to work only if they're
+ // at the head of the LSQ and are ready to commit (at the head of the ROB
+ // too).
+ // @todo: Fix uncached accesses.
+ if (req->getFlags() & UNCACHEABLE &&
+ (inst != loadQueue.back() || !inst->reachedCommit)) {
+ DPRINTF(OzoneLSQ, "[sn:%lli] Uncached load and not head of "
+ "commit/LSQ!\n",
+ inst->seqNum);
+ be->rescheduleMemInst(inst);
+ return TheISA::genMachineCheckFault();
+ }
+
+ // Check the SQ for any previous stores that might lead to forwarding
+ SQIt sq_it = storeQueue.begin();
+ int store_size = 0;
+
+ DPRINTF(OzoneLSQ, "Read called, load idx: %i addr: %#x\n",
+ load_idx, req->getPaddr());
+
+ while (sq_it != storeQueue.end() && (*sq_it).inst->seqNum > inst->seqNum)
+ ++sq_it;
+
+ while (1) {
+ // End once we've reached the top of the LSQ
+ if (sq_it == storeQueue.end()) {
+ break;
+ }
+
+ assert((*sq_it).inst);
+
+ store_size = (*sq_it).size;
+
+ if (store_size == 0) {
+ sq_it++;
+ continue;
+ }
+
+ // Check if the store data is within the lower and upper bounds of
+ // addresses that the request needs.
+ bool store_has_lower_limit =
+ req->getVaddr() >= (*sq_it).inst->effAddr;
+ bool store_has_upper_limit =
+ (req->getVaddr() + req->getSize()) <= ((*sq_it).inst->effAddr +
+ store_size);
+ bool lower_load_has_store_part =
+ req->getVaddr() < ((*sq_it).inst->effAddr +
+ store_size);
+ bool upper_load_has_store_part =
+ (req->getVaddr() + req->getSize()) > (*sq_it).inst->effAddr;
+
+ // If the store's data has all of the data needed, we can forward.
+ if (store_has_lower_limit && store_has_upper_limit) {
+ int shift_amt = req->getVaddr() & (store_size - 1);
+ // Assumes byte addressing
+ shift_amt = shift_amt << 3;
+
+ // Cast this to type T?
+ data = (*sq_it).data >> shift_amt;
+
+ assert(!inst->memData);
+ inst->memData = new uint8_t[64];
+
+ memcpy(inst->memData, &data, req->getSize());
+
+ DPRINTF(OzoneLSQ, "Forwarding from store [sn:%lli] to load to "
+ "[sn:%lli] addr %#x, data %#x\n",
+ (*sq_it).inst->seqNum, inst->seqNum, req->vaddr, *(inst->memData));
+/*
+ typename BackEnd::LdWritebackEvent *wb =
+ new typename BackEnd::LdWritebackEvent(inst,
+ be);
+
+ // We'll say this has a 1 cycle load-store forwarding latency
+ // for now.
+ // FIXME - Need to make this a parameter.
+ wb->schedule(curTick);
+*/
+ // Should keep track of stat for forwarded data
+ return NoFault;
+ } else if ((store_has_lower_limit && lower_load_has_store_part) ||
+ (store_has_upper_limit && upper_load_has_store_part) ||
+ (lower_load_has_store_part && upper_load_has_store_part)) {
+ // This is the partial store-load forwarding case where a store
+ // has only part of the load's data.
+
+ // If it's already been written back, then don't worry about
+ // stalling on it.
+ if ((*sq_it).completed) {
+ sq_it++;
+ break;
+ }
+
+ // Must stall load and force it to retry, so long as it's the oldest
+ // load that needs to do so.
+ if (!stalled ||
+ (stalled &&
+ inst->seqNum <
+ (*stallingLoad)->seqNum)) {
+ stalled = true;
+ stallingStoreIsn = (*sq_it).inst->seqNum;
+ stallingLoad = (*lq_hash_it).second;
+ }
+
+ // Tell IQ/mem dep unit that this instruction will need to be
+ // rescheduled eventually
+ be->rescheduleMemInst(inst);
+
+ DPRINTF(OzoneLSQ, "Load-store forwarding mis-match. "
+ "Store [sn:%lli] to load addr %#x\n",
+ (*sq_it).inst->seqNum, req->vaddr);
+
+ return NoFault;
+ }
+ sq_it++;
+ }
+
+ // If there's no forwarding case, then go access memory
+ DPRINTF(OzoneLSQ, "Doing functional access for inst PC %#x\n",
+ inst->readPC());
+
+ assert(!inst->memData);
+ inst->memData = new uint8_t[64];
+
+ ++usedPorts;
+
+ DPRINTF(OzoneLSQ, "Doing timing access for inst PC %#x\n",
+ inst->readPC());
+
+ PacketPtr data_pkt = new Packet(req, Packet::ReadReq, Packet::Broadcast);
+ data_pkt->dataStatic(inst->memData);
+
+ // if we have a cache, do cache access too
+ if (!dcachePort.sendTiming(data_pkt)) {
+ // There's an older load that's already going to squash.
+ if (isLoadBlocked && blockedLoadSeqNum < inst->seqNum)
+ return NoFault;
+
+ // Record that the load was blocked due to memory. This
+ // load will squash all instructions after it, be
+ // refetched, and re-executed.
+ isLoadBlocked = true;
+ loadBlockedHandled = false;
+ blockedLoadSeqNum = inst->seqNum;
+ // No fault occurred, even though the interface is blocked.
+ return NoFault;
+ }
+
+ if (data_pkt->result != Packet::Success) {
+ DPRINTF(OzoneLSQ, "OzoneLSQ: D-cache miss!\n");
+ DPRINTF(Activity, "Activity: ld accessing mem miss [sn:%lli]\n",
+ inst->seqNum);
+ } else {
+ DPRINTF(OzoneLSQ, "OzoneLSQ: D-cache hit!\n");
+ DPRINTF(Activity, "Activity: ld accessing mem hit [sn:%lli]\n",
+ inst->seqNum);
+ }
+
+ return NoFault;
+}
+
+template <class Impl>
+template <class T>
+Fault
+OzoneLWLSQ<Impl>::write(RequestPtr req, T &data, int store_idx)
+{
+ SQHashIt sq_hash_it = SQItHash.find(store_idx);
+ assert(sq_hash_it != SQItHash.end());
+
+ SQIt sq_it = (*sq_hash_it).second;
+ assert((*sq_it).inst);
+
+ DPRINTF(OzoneLSQ, "Doing write to store idx %i, addr %#x data %#x"
+ " | [sn:%lli]\n",
+ store_idx, req->getPaddr(), data, (*sq_it).inst->seqNum);
+
+ (*sq_it).req = req;
+ (*sq_it).size = sizeof(T);
+ (*sq_it).data = data;
+/*
+ assert(!req->data);
+ req->data = new uint8_t[64];
+ memcpy(req->data, (uint8_t *)&(*sq_it).data, req->size);
+*/
+
+ // This function only writes the data to the store queue, so no fault
+ // can happen here.
+ return NoFault;
+}
+
+#endif // __CPU_OZONE_LW_LSQ_HH__
--- /dev/null
+/*
+ * Copyright (c) 2004-2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "arch/isa_traits.hh"
+#include "base/str.hh"
+#include "cpu/ozone/lw_lsq.hh"
+#include "cpu/checker/cpu.hh"
+
+template <class Impl>
+OzoneLWLSQ<Impl>::StoreCompletionEvent::StoreCompletionEvent(DynInstPtr &_inst,
+ BackEnd *_be,
+ Event *wb_event,
+ OzoneLWLSQ<Impl> *lsq_ptr)
+ : Event(&mainEventQueue),
+ inst(_inst),
+ be(_be),
+ wbEvent(wb_event),
+ miss(false),
+ lsqPtr(lsq_ptr)
+{
+ this->setFlags(Event::AutoDelete);
+}
+
+template <class Impl>
+void
+OzoneLWLSQ<Impl>::StoreCompletionEvent::process()
+{
+ DPRINTF(OzoneLSQ, "Cache miss complete for store [sn:%lli]\n",
+ inst->seqNum);
+
+ //lsqPtr->removeMSHR(lsqPtr->storeQueue[storeIdx].inst->seqNum);
+
+// lsqPtr->cpu->wakeCPU();
+ if (lsqPtr->isSwitchedOut()) {
+ if (wbEvent)
+ delete wbEvent;
+
+ return;
+ }
+
+ if (wbEvent) {
+ wbEvent->process();
+ delete wbEvent;
+ }
+
+ lsqPtr->completeStore(inst->sqIdx);
+ if (miss)
+ be->removeDcacheMiss(inst);
+}
+
+template <class Impl>
+const char *
+OzoneLWLSQ<Impl>::StoreCompletionEvent::description()
+{
+ return "LSQ store completion event";
+}
+
+template <class Impl>
+OzoneLWLSQ<Impl>::OzoneLWLSQ()
+ : loads(0), stores(0), storesToWB(0), stalled(false), isLoadBlocked(false),
+ loadBlockedHandled(false)
+{
+}
+
+template<class Impl>
+void
+OzoneLWLSQ<Impl>::init(Params *params, unsigned maxLQEntries,
+ unsigned maxSQEntries, unsigned id)
+{
+ DPRINTF(OzoneLSQ, "Creating OzoneLWLSQ%i object.\n",id);
+
+ lsqID = id;
+
+ LQEntries = maxLQEntries;
+ SQEntries = maxSQEntries;
+
+ for (int i = 0; i < LQEntries * 2; i++) {
+ LQIndices.push(i);
+ SQIndices.push(i);
+ }
+
+ usedPorts = 0;
+ cachePorts = params->cachePorts;
+
+ dcacheInterface = params->dcacheInterface;
+
+ loadFaultInst = storeFaultInst = memDepViolator = NULL;
+
+ blockedLoadSeqNum = 0;
+}
+
+template<class Impl>
+std::string
+OzoneLWLSQ<Impl>::name() const
+{
+ return "lsqunit";
+}
+
+template<class Impl>
+void
+OzoneLWLSQ<Impl>::clearLQ()
+{
+ loadQueue.clear();
+}
+
+template<class Impl>
+void
+OzoneLWLSQ<Impl>::clearSQ()
+{
+ storeQueue.clear();
+}
+/*
+template<class Impl>
+void
+OzoneLWLSQ<Impl>::setPageTable(PageTable *pt_ptr)
+{
+ DPRINTF(OzoneLSQ, "Setting the page table pointer.\n");
+ pTable = pt_ptr;
+}
+*/
+template<class Impl>
+void
+OzoneLWLSQ<Impl>::resizeLQ(unsigned size)
+{
+ assert( size >= LQEntries);
+
+ if (size > LQEntries) {
+ while (size > loadQueue.size()) {
+ DynInstPtr dummy;
+ loadQueue.push_back(dummy);
+ LQEntries++;
+ }
+ } else {
+ LQEntries = size;
+ }
+
+}
+
+template<class Impl>
+void
+OzoneLWLSQ<Impl>::resizeSQ(unsigned size)
+{
+ if (size > SQEntries) {
+ while (size > storeQueue.size()) {
+ SQEntry dummy;
+ storeQueue.push_back(dummy);
+ SQEntries++;
+ }
+ } else {
+ SQEntries = size;
+ }
+}
+
+template <class Impl>
+void
+OzoneLWLSQ<Impl>::insert(DynInstPtr &inst)
+{
+ // Make sure we really have a memory reference.
+ assert(inst->isMemRef());
+
+ // Make sure it's one of the two classes of memory references.
+ assert(inst->isLoad() || inst->isStore());
+
+ if (inst->isLoad()) {
+ insertLoad(inst);
+ } else {
+ insertStore(inst);
+ }
+}
+
+template <class Impl>
+void
+OzoneLWLSQ<Impl>::insertLoad(DynInstPtr &load_inst)
+{
+ assert(loads < LQEntries * 2);
+ assert(!LQIndices.empty());
+ int load_index = LQIndices.front();
+ LQIndices.pop();
+
+ DPRINTF(OzoneLSQ, "Inserting load PC %#x, idx:%i [sn:%lli]\n",
+ load_inst->readPC(), load_index, load_inst->seqNum);
+
+ load_inst->lqIdx = load_index;
+
+ loadQueue.push_front(load_inst);
+ LQItHash[load_index] = loadQueue.begin();
+
+ ++loads;
+}
+
+template <class Impl>
+void
+OzoneLWLSQ<Impl>::insertStore(DynInstPtr &store_inst)
+{
+ // Make sure it is not full before inserting an instruction.
+ assert(stores - storesToWB < SQEntries);
+
+ assert(!SQIndices.empty());
+ int store_index = SQIndices.front();
+ SQIndices.pop();
+
+ DPRINTF(OzoneLSQ, "Inserting store PC %#x, idx:%i [sn:%lli]\n",
+ store_inst->readPC(), store_index, store_inst->seqNum);
+
+ store_inst->sqIdx = store_index;
+ SQEntry entry(store_inst);
+ if (loadQueue.empty()) {
+ entry.lqIt = loadQueue.end();
+ } else {
+ entry.lqIt = loadQueue.begin();
+ }
+ storeQueue.push_front(entry);
+
+ SQItHash[store_index] = storeQueue.begin();
+
+ ++stores;
+}
+
+template <class Impl>
+typename Impl::DynInstPtr
+OzoneLWLSQ<Impl>::getMemDepViolator()
+{
+ DynInstPtr temp = memDepViolator;
+
+ memDepViolator = NULL;
+
+ return temp;
+}
+
+template <class Impl>
+unsigned
+OzoneLWLSQ<Impl>::numFreeEntries()
+{
+ unsigned free_lq_entries = LQEntries - loads;
+ unsigned free_sq_entries = SQEntries - stores;
+
+ // Both the LQ and SQ entries have an extra dummy entry to differentiate
+ // empty/full conditions. Subtract 1 from the free entries.
+ if (free_lq_entries < free_sq_entries) {
+ return free_lq_entries - 1;
+ } else {
+ return free_sq_entries - 1;
+ }
+}
+
+template <class Impl>
+int
+OzoneLWLSQ<Impl>::numLoadsReady()
+{
+ int retval = 0;
+ LQIt lq_it = loadQueue.begin();
+ LQIt end_it = loadQueue.end();
+
+ while (lq_it != end_it) {
+ if ((*lq_it)->readyToIssue()) {
+ ++retval;
+ }
+ }
+
+ return retval;
+}
+
+template <class Impl>
+Fault
+OzoneLWLSQ<Impl>::executeLoad(DynInstPtr &inst)
+{
+ // Execute a specific load.
+ Fault load_fault = NoFault;
+
+ DPRINTF(OzoneLSQ, "Executing load PC %#x, [sn:%lli]\n",
+ inst->readPC(),inst->seqNum);
+
+ // Make sure it's really in the list.
+ // Normally it should always be in the list. However,
+ /* due to a syscall it may not be the list.
+#ifdef DEBUG
+ int i = loadHead;
+ while (1) {
+ if (i == loadTail && !find(inst)) {
+ assert(0 && "Load not in the queue!");
+ } else if (loadQueue[i] == inst) {
+ break;
+ }
+
+ i = i + 1;
+ if (i >= LQEntries) {
+ i = 0;
+ }
+ }
+#endif // DEBUG*/
+
+ load_fault = inst->initiateAcc();
+
+ // Might want to make sure that I'm not overwriting a previously faulting
+ // instruction that hasn't been checked yet.
+ // Actually probably want the oldest faulting load
+ if (load_fault != NoFault) {
+ DPRINTF(OzoneLSQ, "Load [sn:%lli] has a fault\n", inst->seqNum);
+ // Maybe just set it as can commit here, although that might cause
+ // some other problems with sending traps to the ROB too quickly.
+ be->instToCommit(inst);
+// iewStage->activityThisCycle();
+ }
+
+ return load_fault;
+}
+
+template <class Impl>
+Fault
+OzoneLWLSQ<Impl>::executeStore(DynInstPtr &store_inst)
+{
+ // Make sure that a store exists.
+ assert(stores != 0);
+
+ int store_idx = store_inst->sqIdx;
+ SQHashIt sq_hash_it = SQItHash.find(store_idx);
+ assert(sq_hash_it != SQItHash.end());
+ DPRINTF(OzoneLSQ, "Executing store PC %#x [sn:%lli]\n",
+ store_inst->readPC(), store_inst->seqNum);
+
+ SQIt sq_it = (*sq_hash_it).second;
+
+ Fault store_fault = store_inst->initiateAcc();
+
+ // Store size should now be available. Use it to get proper offset for
+ // addr comparisons.
+ int size = (*sq_it).size;
+
+ if (size == 0) {
+ DPRINTF(OzoneLSQ,"Fault on Store PC %#x, [sn:%lli],Size = 0\n",
+ store_inst->readPC(),store_inst->seqNum);
+
+ return store_fault;
+ }
+
+ assert(store_fault == NoFault);
+
+ if (!storeFaultInst) {
+ if (store_fault != NoFault) {
+ panic("Fault in a store instruction!");
+ storeFaultInst = store_inst;
+ } else if (store_inst->isStoreConditional()) {
+ // Store conditionals need to set themselves as able to
+ // writeback if we haven't had a fault by here.
+ (*sq_it).canWB = true;
+
+ ++storesToWB;
+ DPRINTF(OzoneLSQ, "Nonspeculative store! storesToWB:%i\n",
+ storesToWB);
+ }
+ }
+
+ LQIt lq_it = --(loadQueue.end());
+
+ if (!memDepViolator) {
+ while (lq_it != loadQueue.end()) {
+ if ((*lq_it)->seqNum < store_inst->seqNum) {
+ lq_it--;
+ continue;
+ }
+ // Actually should only check loads that have actually executed
+ // Might be safe because effAddr is set to InvalAddr when the
+ // dyn inst is created.
+
+ // Must actually check all addrs in the proper size range
+ // Which is more correct than needs to be. What if for now we just
+ // assume all loads are quad-word loads, and do the addr based
+ // on that.
+ // @todo: Fix this, magic number being used here
+ if (((*lq_it)->effAddr >> 8) ==
+ (store_inst->effAddr >> 8)) {
+ // A load incorrectly passed this store. Squash and refetch.
+ // For now return a fault to show that it was unsuccessful.
+ memDepViolator = (*lq_it);
+
+ return TheISA::genMachineCheckFault();
+ }
+
+ lq_it--;
+ }
+
+ // If we've reached this point, there was no violation.
+ memDepViolator = NULL;
+ }
+
+ return store_fault;
+}
+
+template <class Impl>
+void
+OzoneLWLSQ<Impl>::commitLoad()
+{
+ assert(!loadQueue.empty());
+
+ DPRINTF(OzoneLSQ, "[sn:%lli] Committing head load instruction, PC %#x\n",
+ loadQueue.back()->seqNum, loadQueue.back()->readPC());
+
+ LQIndices.push(loadQueue.back()->lqIdx);
+ LQItHash.erase(loadQueue.back()->lqIdx);
+
+ loadQueue.pop_back();
+
+ --loads;
+}
+
+template <class Impl>
+void
+OzoneLWLSQ<Impl>::commitLoads(InstSeqNum &youngest_inst)
+{
+ assert(loads == 0 || !loadQueue.empty());
+
+ while (loads != 0 &&
+ loadQueue.back()->seqNum <= youngest_inst) {
+ commitLoad();
+ }
+}
+
+template <class Impl>
+void
+OzoneLWLSQ<Impl>::commitStores(InstSeqNum &youngest_inst)
+{
+ assert(stores == 0 || !storeQueue.empty());
+
+ SQIt sq_it = --(storeQueue.end());
+ while (!storeQueue.empty() && sq_it != storeQueue.end()) {
+ assert((*sq_it).inst);
+ if (!(*sq_it).canWB) {
+ if ((*sq_it).inst->seqNum > youngest_inst) {
+ break;
+ }
+ ++storesToWB;
+
+ DPRINTF(OzoneLSQ, "Marking store as able to write back, PC "
+ "%#x [sn:%lli], storesToWB:%i\n",
+ (*sq_it).inst->readPC(),
+ (*sq_it).inst->seqNum,
+ storesToWB);
+
+ (*sq_it).canWB = true;
+ }
+
+ sq_it--;
+ }
+}
+
+template <class Impl>
+void
+OzoneLWLSQ<Impl>::writebackStores()
+{
+ SQIt sq_it = --(storeQueue.end());
+ while (storesToWB > 0 &&
+ sq_it != storeQueue.end() &&
+ (*sq_it).inst &&
+ (*sq_it).canWB &&
+ usedPorts < cachePorts) {
+
+ DynInstPtr inst = (*sq_it).inst;
+
+ if ((*sq_it).size == 0 && !(*sq_it).completed) {
+ sq_it--;
+ completeStore(inst->sqIdx);
+
+ continue;
+ }
+
+ if (inst->isDataPrefetch() || (*sq_it).committed) {
+ sq_it--;
+ continue;
+ }
+
+ if (dcacheInterface && dcacheInterface->isBlocked()) {
+ DPRINTF(OzoneLSQ, "Unable to write back any more stores, cache"
+ " is blocked!\n");
+ break;
+ }
+
+ ++usedPorts;
+
+ assert((*sq_it).req);
+ assert(!(*sq_it).committed);
+
+ (*sq_it).committed = true;
+
+ MemReqPtr req = (*sq_it).req;
+
+ req->cmd = Write;
+ req->completionEvent = NULL;
+ req->time = curTick;
+
+ switch((*sq_it).size) {
+ case 1:
+ cpu->write(req, (uint8_t &)(*sq_it).data);
+ break;
+ case 2:
+ cpu->write(req, (uint16_t &)(*sq_it).data);
+ break;
+ case 4:
+ cpu->write(req, (uint32_t &)(*sq_it).data);
+ break;
+ case 8:
+ cpu->write(req, (uint64_t &)(*sq_it).data);
+ break;
+ default:
+ panic("Unexpected store size!\n");
+ }
+ if (!(req->flags & LOCKED)) {
+ (*sq_it).inst->setCompleted();
+ if (cpu->checker) {
+ cpu->checker->tick((*sq_it).inst);
+ }
+ }
+
+ DPRINTF(OzoneLSQ, "D-Cache: Writing back store idx:%i PC:%#x "
+ "to Addr:%#x, data:%#x [sn:%lli]\n",
+ inst->sqIdx,inst->readPC(),
+ req->paddr, *(req->data),
+ inst->seqNum);
+
+ if (dcacheInterface) {
+ assert(!req->completionEvent);
+ StoreCompletionEvent *store_event = new
+ StoreCompletionEvent(inst, be, NULL, this);
+ req->completionEvent = store_event;
+
+ MemAccessResult result = dcacheInterface->access(req);
+
+ if (isStalled() &&
+ inst->seqNum == stallingStoreIsn) {
+ DPRINTF(OzoneLSQ, "Unstalling, stalling store [sn:%lli] "
+ "load [sn:%lli]\n",
+ stallingStoreIsn, (*stallingLoad)->seqNum);
+ stalled = false;
+ stallingStoreIsn = 0;
+ be->replayMemInst((*stallingLoad));
+ }
+
+ if (result != MA_HIT && dcacheInterface->doEvents()) {
+ store_event->miss = true;
+ typename BackEnd::LdWritebackEvent *wb = NULL;
+ if (req->flags & LOCKED) {
+ wb = new typename BackEnd::LdWritebackEvent(inst,
+ be);
+ store_event->wbEvent = wb;
+ }
+
+ DPRINTF(OzoneLSQ,"D-Cache Write Miss!\n");
+
+// DPRINTF(Activity, "Active st accessing mem miss [sn:%lli]\n",
+// inst->seqNum);
+
+ be->addDcacheMiss(inst);
+
+ lastDcacheStall = curTick;
+
+ _status = DcacheMissStall;
+
+ // Increment stat here or something
+
+ sq_it--;
+ } else {
+ DPRINTF(OzoneLSQ,"D-Cache: Write Hit on idx:%i !\n",
+ inst->sqIdx);
+
+// DPRINTF(Activity, "Active st accessing mem hit [sn:%lli]\n",
+// inst->seqNum);
+
+ if (req->flags & LOCKED) {
+ // Stx_C does not generate a system port
+ // transaction in the 21264, but that might be
+ // hard to accomplish in this model.
+
+ typename BackEnd::LdWritebackEvent *wb =
+ new typename BackEnd::LdWritebackEvent(inst,
+ be);
+ store_event->wbEvent = wb;
+ }
+ sq_it--;
+ }
+ } else {
+ panic("Must HAVE DCACHE!!!!!\n");
+ }
+ }
+
+ // Not sure this should set it to 0.
+ usedPorts = 0;
+
+ assert(stores >= 0 && storesToWB >= 0);
+}
+
+template <class Impl>
+void
+OzoneLWLSQ<Impl>::squash(const InstSeqNum &squashed_num)
+{
+ DPRINTF(OzoneLSQ, "Squashing until [sn:%lli]!"
+ "(Loads:%i Stores:%i)\n",squashed_num,loads,stores);
+
+
+ LQIt lq_it = loadQueue.begin();
+
+ while (loads != 0 && (*lq_it)->seqNum > squashed_num) {
+ assert(!loadQueue.empty());
+ // Clear the smart pointer to make sure it is decremented.
+ DPRINTF(OzoneLSQ,"Load Instruction PC %#x squashed, "
+ "[sn:%lli]\n",
+ (*lq_it)->readPC(),
+ (*lq_it)->seqNum);
+
+ if (isStalled() && lq_it == stallingLoad) {
+ stalled = false;
+ stallingStoreIsn = 0;
+ stallingLoad = NULL;
+ }
+
+ --loads;
+
+ // Inefficient!
+ LQHashIt lq_hash_it = LQItHash.find((*lq_it)->lqIdx);
+ assert(lq_hash_it != LQItHash.end());
+ LQItHash.erase(lq_hash_it);
+ LQIndices.push((*lq_it)->lqIdx);
+ loadQueue.erase(lq_it++);
+ }
+
+ if (isLoadBlocked) {
+ if (squashed_num < blockedLoadSeqNum) {
+ isLoadBlocked = false;
+ loadBlockedHandled = false;
+ blockedLoadSeqNum = 0;
+ }
+ }
+
+ SQIt sq_it = storeQueue.begin();
+
+ while (stores != 0 && (*sq_it).inst->seqNum > squashed_num) {
+ assert(!storeQueue.empty());
+
+ if ((*sq_it).canWB) {
+ break;
+ }
+
+ // Clear the smart pointer to make sure it is decremented.
+ DPRINTF(OzoneLSQ,"Store Instruction PC %#x idx:%i squashed [sn:%lli]\n",
+ (*sq_it).inst->readPC(), (*sq_it).inst->sqIdx,
+ (*sq_it).inst->seqNum);
+
+ // I don't think this can happen. It should have been cleared by the
+ // stalling load.
+ if (isStalled() &&
+ (*sq_it).inst->seqNum == stallingStoreIsn) {
+ panic("Is stalled should have been cleared by stalling load!\n");
+ stalled = false;
+ stallingStoreIsn = 0;
+ }
+
+ SQHashIt sq_hash_it = SQItHash.find((*sq_it).inst->sqIdx);
+ assert(sq_hash_it != SQItHash.end());
+ SQItHash.erase(sq_hash_it);
+ SQIndices.push((*sq_it).inst->sqIdx);
+ (*sq_it).inst = NULL;
+ (*sq_it).canWB = 0;
+
+ if ((*sq_it).req) {
+ assert(!(*sq_it).req->completionEvent);
+ }
+ (*sq_it).req = NULL;
+ --stores;
+ storeQueue.erase(sq_it++);
+ }
+}
+
+template <class Impl>
+void
+OzoneLWLSQ<Impl>::dumpInsts()
+{
+ cprintf("Load store queue: Dumping instructions.\n");
+ cprintf("Load queue size: %i\n", loads);
+ cprintf("Load queue: ");
+
+ LQIt lq_it = --(loadQueue.end());
+
+ while (lq_it != loadQueue.end() && (*lq_it)) {
+ cprintf("[sn:%lli] %#x ", (*lq_it)->seqNum,
+ (*lq_it)->readPC());
+
+ lq_it--;
+ }
+
+ cprintf("\nStore queue size: %i\n", stores);
+ cprintf("Store queue: ");
+
+ SQIt sq_it = --(storeQueue.end());
+
+ while (sq_it != storeQueue.end() && (*sq_it).inst) {
+ cprintf("[sn:%lli]\nPC:%#x\nSize:%i\nCommitted:%i\nCompleted:%i\ncanWB:%i\n",
+ (*sq_it).inst->seqNum,
+ (*sq_it).inst->readPC(),
+ (*sq_it).size,
+ (*sq_it).committed,
+ (*sq_it).completed,
+ (*sq_it).canWB);
+
+ sq_it--;
+ }
+
+ cprintf("\n");
+}
+
+template <class Impl>
+void
+OzoneLWLSQ<Impl>::completeStore(int store_idx)
+{
+ SQHashIt sq_hash_it = SQItHash.find(store_idx);
+ assert(sq_hash_it != SQItHash.end());
+ SQIt sq_it = (*sq_hash_it).second;
+
+ assert((*sq_it).inst);
+ (*sq_it).completed = true;
+ DynInstPtr inst = (*sq_it).inst;
+
+ --storesToWB;
+
+ if (isStalled() &&
+ inst->seqNum == stallingStoreIsn) {
+ DPRINTF(OzoneLSQ, "Unstalling, stalling store [sn:%lli] "
+ "load [sn:%lli]\n",
+ stallingStoreIsn, (*stallingLoad)->seqNum);
+ stalled = false;
+ stallingStoreIsn = 0;
+ be->replayMemInst((*stallingLoad));
+ }
+
+ DPRINTF(OzoneLSQ, "Completing store idx:%i [sn:%lli], storesToWB:%i\n",
+ inst->sqIdx, inst->seqNum, storesToWB);
+
+ assert(!storeQueue.empty());
+ SQItHash.erase(sq_hash_it);
+ SQIndices.push(inst->sqIdx);
+ storeQueue.erase(sq_it);
+ --stores;
+
+ inst->setCompleted();
+ if (cpu->checker) {
+ cpu->checker->tick(inst);
+ }
+}
+
+template <class Impl>
+void
+OzoneLWLSQ<Impl>::switchOut()
+{
+ assert(storesToWB == 0);
+ switchedOut = true;
+ SQIt sq_it = --(storeQueue.end());
+ while (storesToWB > 0 &&
+ sq_it != storeQueue.end() &&
+ (*sq_it).inst &&
+ (*sq_it).canWB) {
+
+ DynInstPtr inst = (*sq_it).inst;
+
+ if ((*sq_it).size == 0 && !(*sq_it).completed) {
+ sq_it--;
+ continue;
+ }
+
+ // Store conditionals don't complete until *after* they have written
+ // back. If it's here and not yet sent to memory, then don't bother
+ // as it's not part of committed state.
+ if (inst->isDataPrefetch() || (*sq_it).committed) {
+ sq_it--;
+ continue;
+ } else if ((*sq_it).req->flags & LOCKED) {
+ sq_it--;
+ assert(!(*sq_it).canWB ||
+ ((*sq_it).canWB && (*sq_it).req->flags & LOCKED));
+ continue;
+ }
+
+ assert((*sq_it).req);
+ assert(!(*sq_it).committed);
+
+ MemReqPtr req = (*sq_it).req;
+ (*sq_it).committed = true;
+
+ req->cmd = Write;
+ req->completionEvent = NULL;
+ req->time = curTick;
+ assert(!req->data);
+ req->data = new uint8_t[64];
+ memcpy(req->data, (uint8_t *)&(*sq_it).data, req->size);
+
+ DPRINTF(OzoneLSQ, "Switching out : Writing back store idx:%i PC:%#x "
+ "to Addr:%#x, data:%#x directly to memory [sn:%lli]\n",
+ inst->sqIdx,inst->readPC(),
+ req->paddr, *(req->data),
+ inst->seqNum);
+
+ switch((*sq_it).size) {
+ case 1:
+ cpu->write(req, (uint8_t &)(*sq_it).data);
+ break;
+ case 2:
+ cpu->write(req, (uint16_t &)(*sq_it).data);
+ break;
+ case 4:
+ cpu->write(req, (uint32_t &)(*sq_it).data);
+ break;
+ case 8:
+ cpu->write(req, (uint64_t &)(*sq_it).data);
+ break;
+ default:
+ panic("Unexpected store size!\n");
+ }
+ }
+
+ // Clear the queue to free up resources
+ storeQueue.clear();
+ loadQueue.clear();
+ loads = stores = storesToWB = 0;
+}
+
+template <class Impl>
+void
+OzoneLWLSQ<Impl>::takeOverFrom(ExecContext *old_xc)
+{
+ // Clear out any old state. May be redundant if this is the first time
+ // the CPU is being used.
+ stalled = false;
+ isLoadBlocked = false;
+ loadBlockedHandled = false;
+ switchedOut = false;
+
+ // Could do simple checks here to see if indices are on twice
+ while (!LQIndices.empty())
+ LQIndices.pop();
+ while (!SQIndices.empty())
+ SQIndices.pop();
+
+ for (int i = 0; i < LQEntries * 2; i++) {
+ LQIndices.push(i);
+ SQIndices.push(i);
+ }
+
+ usedPorts = 0;
+
+ loadFaultInst = storeFaultInst = memDepViolator = NULL;
+
+ blockedLoadSeqNum = 0;
+}
--- /dev/null
+
+#ifndef __CPU_OZONE_NULL_PREDICTOR_HH__
+#define __CPU_OZONE_NULL_PREDICTOR_HH__
+
+#include "arch/isa_traits.hh"
+#include "cpu/inst_seq.hh"
+
+template <class Impl>
+class NullPredictor
+{
+ public:
+ typedef typename Impl::Params Params;
+ typedef typename Impl::DynInstPtr DynInstPtr;
+
+ NullPredictor(Params *p) { }
+
+ struct BPredInfo {
+ BPredInfo()
+ : PC(0), nextPC(0)
+ { }
+
+ BPredInfo(const Addr &pc, const Addr &next_pc)
+ : PC(pc), nextPC(next_pc)
+ { }
+
+ Addr PC;
+ Addr nextPC;
+ };
+
+ BPredInfo lookup(Addr &PC) { return BPredInfo(PC, PC+4); }
+
+ void undo(BPredInfo &bp_info) { return; }
+
+ /**
+ * Predicts whether or not the instruction is a taken branch, and the
+ * target of the branch if it is taken.
+ * @param inst The branch instruction.
+ * @param PC The predicted PC is passed back through this parameter.
+ * @param tid The thread id.
+ * @return Returns if the branch is taken or not.
+ */
+ bool predict(DynInstPtr &inst, Addr &PC, unsigned tid)
+ { return false; }
+
+ /**
+ * Tells the branch predictor to commit any updates until the given
+ * sequence number.
+ * @param done_sn The sequence number to commit any older updates up until.
+ * @param tid The thread id.
+ */
+ void update(const InstSeqNum &done_sn, unsigned tid) { }
+
+ /**
+ * Squashes all outstanding updates until a given sequence number.
+ * @param squashed_sn The sequence number to squash any younger updates up
+ * until.
+ * @param tid The thread id.
+ */
+ void squash(const InstSeqNum &squashed_sn, unsigned tid) { }
+
+ /**
+ * Squashes all outstanding updates until a given sequence number, and
+ * corrects that sn's update with the proper address and taken/not taken.
+ * @param squashed_sn The sequence number to squash any younger updates up
+ * until.
+ * @param corr_target The correct branch target.
+ * @param actually_taken The correct branch direction.
+ * @param tid The thread id.
+ */
+ void squash(const InstSeqNum &squashed_sn, const Addr &corr_target,
+ bool actually_taken, unsigned tid)
+ { }
+
+};
+
+#endif // __CPU_OZONE_NULL_PREDICTOR_HH__
--- /dev/null
+/*
+ * Copyright (c) 2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __CPU_OZONE_OZONE_IMPL_HH__
+#define __CPU_OZONE_OZONE_IMPL_HH__
+
+#include "arch/alpha/isa_traits.hh"
+#include "cpu/o3/bpred_unit.hh"
+#include "cpu/ozone/front_end.hh"
+#include "cpu/ozone/inst_queue.hh"
+#include "cpu/ozone/lw_lsq.hh"
+#include "cpu/ozone/lw_back_end.hh"
+#include "cpu/ozone/null_predictor.hh"
+#include "cpu/ozone/dyn_inst.hh"
+#include "cpu/ozone/simple_params.hh"
+
+template <class Impl>
+class OzoneCPU;
+
+template <class Impl>
+class OzoneDynInst;
+
+struct OzoneImpl {
+ typedef SimpleParams Params;
+ typedef OzoneCPU<OzoneImpl> OzoneCPU;
+ typedef OzoneCPU FullCPU;
+
+ // Would like to put these into their own area.
+// typedef NullPredictor BranchPred;
+ typedef TwobitBPredUnit<OzoneImpl> BranchPred;
+ typedef FrontEnd<OzoneImpl> FrontEnd;
+ // Will need IQ, LSQ eventually
+ typedef LWBackEnd<OzoneImpl> BackEnd;
+
+ typedef InstQueue<OzoneImpl> InstQueue;
+ typedef OzoneLWLSQ<OzoneImpl> LdstQueue;
+
+ typedef OzoneDynInst<OzoneImpl> DynInst;
+ typedef RefCountingPtr<DynInst> DynInstPtr;
+
+ typedef uint64_t IssueStruct;
+
+ enum {
+ MaxThreads = 1
+ };
+};
+
+#endif // __CPU_OZONE_OZONE_IMPL_HH__
--- /dev/null
+
+#include "cpu/ozone/rename_table_impl.hh"
+#include "cpu/ozone/ozone_impl.hh"
+#include "cpu/ozone/simple_impl.hh"
+
+template class RenameTable<OzoneImpl>;
+template class RenameTable<SimpleImpl>;
--- /dev/null
+/*
+ * Copyright (c) 2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __CPU_OZONE_RENAME_TABLE_HH__
+#define __CPU_OZONE_RENAME_TABLE_HH__
+
+#include "arch/isa_traits.hh"
+
+/** Rename table that holds the rename of each architectural register to
+ * producing DynInst. Needs to support copying from one table to another.
+ */
+
+template <class Impl>
+class RenameTable {
+ public:
+ typedef typename Impl::DynInstPtr DynInstPtr;
+
+ RenameTable();
+
+ void copyFrom(const RenameTable<Impl> &table_to_copy);
+
+ DynInstPtr &operator [] (int index)
+ { return table[index]; }
+
+ DynInstPtr table[TheISA::TotalNumRegs];
+};
+
+#endif // __CPU_OZONE_RENAME_TABLE_HH__
--- /dev/null
+
+#include <cstdlib> // Not really sure what to include to get NULL
+#include "cpu/ozone/rename_table.hh"
+
+template <class Impl>
+RenameTable<Impl>::RenameTable()
+{
+ // Actually should set these to dummy dyn insts that have the initial value
+ // and force their values to be initialized. This keeps everything the
+ // same.
+ for (int i = 0; i < TheISA::TotalNumRegs; ++i) {
+ table[i] = NULL;
+ }
+}
+
+template <class Impl>
+void
+RenameTable<Impl>::copyFrom(const RenameTable<Impl> &table_to_copy)
+{
+ for (int i = 0; i < TheISA::TotalNumRegs; ++i) {
+ table[i] = table_to_copy.table[i];
+ }
+}
--- /dev/null
+/*
+ * Copyright (c) 2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __CPU_OZONE_SIMPLE_IMPL_HH__
+#define __CPU_OZONE_SIMPLE_IMPL_HH__
+
+#include "arch/isa_traits.hh"
+#include "cpu/o3/bpred_unit.hh"
+#include "cpu/ozone/cpu.hh"
+#include "cpu/ozone/front_end.hh"
+#include "cpu/ozone/inorder_back_end.hh"
+#include "cpu/ozone/null_predictor.hh"
+#include "cpu/ozone/dyn_inst.hh"
+#include "cpu/ozone/simple_params.hh"
+
+//template <class Impl>
+//class OzoneCPU;
+
+template <class Impl>
+class OzoneDynInst;
+
+struct SimpleImpl {
+ typedef SimpleParams Params;
+ typedef OzoneCPU<SimpleImpl> OzoneCPU;
+ typedef OzoneCPU FullCPU;
+
+ // Would like to put these into their own area.
+// typedef NullPredictor BranchPred;
+ typedef TwobitBPredUnit<SimpleImpl> BranchPred;
+ typedef FrontEnd<SimpleImpl> FrontEnd;
+ // Will need IQ, LSQ eventually
+ typedef InorderBackEnd<SimpleImpl> BackEnd;
+
+ typedef OzoneDynInst<SimpleImpl> DynInst;
+ typedef RefCountingPtr<DynInst> DynInstPtr;
+
+ typedef uint64_t IssueStruct;
+
+ enum {
+ MaxThreads = 1
+ };
+};
+
+#endif // __CPU_OZONE_SIMPLE_IMPL_HH__
--- /dev/null
+
+
+#ifndef __CPU_OZONE_SIMPLE_PARAMS_HH__
+#define __CPU_OZONE_SIMPLE_PARAMS_HH__
+
+#include "cpu/ozone/cpu.hh"
+
+//Forward declarations
+class AlphaDTB;
+class AlphaITB;
+class FUPool;
+class FunctionalMemory;
+class MemInterface;
+class PageTable;
+class Process;
+class System;
+
+/**
+ * This file defines the parameters that will be used for the OzoneCPU.
+ * This must be defined externally so that the Impl can have a params class
+ * defined that it can pass to all of the individual stages.
+ */
+
+class SimpleParams : public BaseCPU::Params
+{
+ public:
+
+#if FULL_SYSTEM
+ AlphaITB *itb; AlphaDTB *dtb;
+#else
+ std::vector<Process *> workload;
+// Process *process;
+#endif // FULL_SYSTEM
+
+ //Page Table
+ PageTable *pTable;
+
+ FunctionalMemory *mem;
+
+ //
+ // Caches
+ //
+ MemInterface *icacheInterface;
+ MemInterface *dcacheInterface;
+
+ unsigned cachePorts;
+ unsigned width;
+ unsigned frontEndWidth;
+ unsigned backEndWidth;
+ unsigned backEndSquashLatency;
+ unsigned backEndLatency;
+ unsigned maxInstBufferSize;
+ unsigned numPhysicalRegs;
+ unsigned maxOutstandingMemOps;
+ //
+ // Fetch
+ //
+ unsigned decodeToFetchDelay;
+ unsigned renameToFetchDelay;
+ unsigned iewToFetchDelay;
+ unsigned commitToFetchDelay;
+ unsigned fetchWidth;
+
+ //
+ // Decode
+ //
+ unsigned renameToDecodeDelay;
+ unsigned iewToDecodeDelay;
+ unsigned commitToDecodeDelay;
+ unsigned fetchToDecodeDelay;
+ unsigned decodeWidth;
+
+ //
+ // Rename
+ //
+ unsigned iewToRenameDelay;
+ unsigned commitToRenameDelay;
+ unsigned decodeToRenameDelay;
+ unsigned renameWidth;
+
+ //
+ // IEW
+ //
+ unsigned commitToIEWDelay;
+ unsigned renameToIEWDelay;
+ unsigned issueToExecuteDelay;
+ unsigned issueWidth;
+ unsigned executeWidth;
+ unsigned executeIntWidth;
+ unsigned executeFloatWidth;
+ unsigned executeBranchWidth;
+ unsigned executeMemoryWidth;
+ FUPool *fuPool;
+
+ //
+ // Commit
+ //
+ unsigned iewToCommitDelay;
+ unsigned renameToROBDelay;
+ unsigned commitWidth;
+ unsigned squashWidth;
+
+ //
+ // Branch predictor (BP & BTB)
+ //
+ unsigned localPredictorSize;
+ unsigned localCtrBits;
+ unsigned localHistoryTableSize;
+ unsigned localHistoryBits;
+ unsigned globalPredictorSize;
+ unsigned globalCtrBits;
+ unsigned globalHistoryBits;
+ unsigned choicePredictorSize;
+ unsigned choiceCtrBits;
+
+ unsigned BTBEntries;
+ unsigned BTBTagSize;
+
+ unsigned RASSize;
+
+ //
+ // Load store queue
+ //
+ unsigned LQEntries;
+ unsigned SQEntries;
+
+ //
+ // Memory dependence
+ //
+ unsigned SSITSize;
+ unsigned LFSTSize;
+
+ //
+ // Miscellaneous
+ //
+ unsigned numPhysIntRegs;
+ unsigned numPhysFloatRegs;
+ unsigned numIQEntries;
+ unsigned numROBEntries;
+
+ bool decoupledFrontEnd;
+ int dispatchWidth;
+ int wbWidth;
+
+ //SMT Parameters
+ unsigned smtNumFetchingThreads;
+
+ std::string smtFetchPolicy;
+
+ std::string smtIQPolicy;
+ unsigned smtIQThreshold;
+
+ std::string smtLSQPolicy;
+ unsigned smtLSQThreshold;
+
+ std::string smtCommitPolicy;
+
+ std::string smtROBPolicy;
+ unsigned smtROBThreshold;
+
+ // Probably can get this from somewhere.
+ unsigned instShiftAmt;
+};
+
+#endif // __CPU_OZONE_SIMPLE_PARAMS_HH__
--- /dev/null
+/*
+ * Copyright (c) 2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __CPU_OZONE_THREAD_STATE_HH__
+#define __CPU_OZONE_THREAD_STATE_HH__
+
+#include "arch/faults.hh"
+#include "arch/isa_traits.hh"
+#include "cpu/exec_context.hh"
+#include "cpu/thread_state.hh"
+#include "sim/process.hh"
+
+class Event;
+//class Process;
+
+#if FULL_SYSTEM
+class EndQuiesceEvent;
+class FunctionProfile;
+class ProfileNode;
+#else
+class Process;
+class FunctionalMemory;
+#endif
+
+// Maybe this ozone thread state should only really have committed state?
+// I need to think about why I'm using this and what it's useful for. Clearly
+// has benefits for SMT; basically serves same use as CPUExecContext.
+// Makes the ExecContext proxy easier. Gives organization/central access point
+// to state of a thread that can be accessed normally (i.e. not in-flight
+// stuff within a OoO processor). Does this need an XC proxy within it?
+template <class Impl>
+struct OzoneThreadState : public ThreadState {
+ typedef typename ExecContext::Status Status;
+ typedef typename Impl::FullCPU FullCPU;
+ typedef TheISA::MiscReg MiscReg;
+
+#if FULL_SYSTEM
+ OzoneThreadState(FullCPU *_cpu, int _thread_num, FunctionalMemory *_mem)
+ : ThreadState(-1, _thread_num, _mem),
+ inSyscall(0), trapPending(0)
+ {
+ memset(®s, 0, sizeof(TheISA::RegFile));
+ }
+#else
+ OzoneThreadState(FullCPU *_cpu, int _thread_num, Process *_process, int _asid)
+ : ThreadState(-1, _thread_num, NULL, _process, _asid),
+ cpu(_cpu), inSyscall(0), trapPending(0)
+ {
+ memset(®s, 0, sizeof(TheISA::RegFile));
+ }
+
+ OzoneThreadState(FullCPU *_cpu, int _thread_num, FunctionalMemory *_mem,
+ int _asid)
+ : ThreadState(-1, _thread_num, _mem, NULL, _asid),
+ cpu(_cpu), inSyscall(0), trapPending(0)
+ {
+ memset(®s, 0, sizeof(TheISA::RegFile));
+ }
+#endif
+
+ Status _status;
+
+ Status status() const { return _status; }
+
+ void setStatus(Status new_status) { _status = new_status; }
+
+ RenameTable<Impl> renameTable;
+ Addr PC;
+ Addr nextPC;
+
+ // Current instruction
+ TheISA::MachInst inst;
+
+ TheISA::RegFile regs;
+
+ typename Impl::FullCPU *cpu;
+
+ bool inSyscall;
+
+ bool trapPending;
+
+ ExecContext *xcProxy;
+
+ ExecContext *getXCProxy() { return xcProxy; }
+
+#if !FULL_SYSTEM
+ Fault translateInstReq(Request *req)
+ {
+ return process->pTable->translate(req);
+ }
+ Fault translateDataReadReq(Request *req)
+ {
+ return process->pTable->translate(req);
+ }
+ Fault translateDataWriteReq(Request *req)
+ {
+ return process->pTable->translate(req);
+ }
+#else
+ Fault translateInstReq(Request *req)
+ {
+ return cpu->itb->translate(req);
+ }
+
+ Fault translateDataReadReq(Request *req)
+ {
+ return cpu->dtb->translate(req, false);
+ }
+
+ Fault translateDataWriteReq(Request *req)
+ {
+ return cpu->dtb->translate(req, true);
+ }
+#endif
+
+ MiscReg readMiscReg(int misc_reg)
+ {
+ return regs.readMiscReg(misc_reg);
+ }
+
+ MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault)
+ {
+ return regs.readMiscRegWithEffect(misc_reg, fault, xcProxy);
+ }
+
+ Fault setMiscReg(int misc_reg, const MiscReg &val)
+ {
+ return regs.setMiscReg(misc_reg, val);
+ }
+
+ Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val)
+ {
+ return regs.setMiscRegWithEffect(misc_reg, val, xcProxy);
+ }
+
+ uint64_t readPC()
+ { return PC; }
+
+ void setPC(uint64_t val)
+ { PC = val; }
+
+ uint64_t readNextPC()
+ { return nextPC; }
+
+ void setNextPC(uint64_t val)
+ { nextPC = val; }
+
+ bool misspeculating() { return false; }
+
+ void setInst(TheISA::MachInst _inst) { inst = _inst; }
+
+ Counter readFuncExeInst() { return funcExeInst; }
+
+ void setFuncExeInst(Counter new_val) { funcExeInst = new_val; }
+};
+
+#endif // __CPU_OZONE_THREAD_STATE_HH__
--- /dev/null
+
+#include "cpu/exec_context.hh"
+#include "cpu/quiesce_event.hh"
+
+EndQuiesceEvent::EndQuiesceEvent(ExecContext *_xc)
+ : Event(&mainEventQueue), xc(_xc)
+{
+}
+
+void
+EndQuiesceEvent::process()
+{
+ xc->activate();
+}
+
+const char*
+EndQuiesceEvent::description()
+{
+ return "End Quiesce Event.";
+}
--- /dev/null
+#ifndef __CPU_QUIESCE_EVENT_HH__
+#define __CPU_QUIESCE_EVENT_HH__
+
+#include "sim/eventq.hh"
+
+class ExecContext;
+
+/** Event for timing out quiesce instruction */
+struct EndQuiesceEvent : public Event
+{
+ /** A pointer to the execution context that is quiesced */
+ ExecContext *xc;
+
+ EndQuiesceEvent(ExecContext *_xc);
+
+ /** Event process to occur at interrupt*/
+ virtual void process();
+
+ /** Event description */
+ virtual const char *description();
+};
+
+#endif // __CPU_QUIESCE_EVENT_HH__
--- /dev/null
+/*
+ * Copyright (c) 2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __CPU_THREAD_STATE_HH__
+#define __CPU_THREAD_STATE_HH__
+
+#include "cpu/exec_context.hh"
+
+#if FULL_SYSTEM
+class EndQuiesceEvent;
+class FunctionProfile;
+class ProfileNode;
+namespace Kernel {
+ class Statistics;
+};
+#else
+class FunctionalMemory;
+class Process;
+#endif
+
+/**
+ * Struct for holding general thread state that is needed across CPU
+ * models. This includes things such as pointers to the process,
+ * memory, quiesce events, and certain stats. This can be expanded
+ * to hold more thread-specific stats within it.
+ */
+struct ThreadState {
+#if FULL_SYSTEM
+ ThreadState(int _cpuId, int _tid, FunctionalMemory *_mem)
+ : cpuId(_cpuId), tid(_tid), mem(_mem), lastActivate(0), lastSuspend(0),
+ profile(NULL), profileNode(NULL), profilePC(0), quiesceEvent(NULL)
+#else
+ ThreadState(int _cpuId, int _tid, FunctionalMemory *_mem,
+ Process *_process, short _asid)
+ : cpuId(_cpuId), tid(_tid), mem(_mem), process(_process), asid(_asid)
+#endif
+ {
+ funcExeInst = 0;
+ storeCondFailures = 0;
+ }
+
+ ExecContext::Status status;
+
+ int cpuId;
+
+ // Index of hardware thread context on the CPU that this represents.
+ int tid;
+
+ Counter numInst;
+ Stats::Scalar<> numInsts;
+ Stats::Scalar<> numMemRefs;
+
+ // number of simulated loads
+ Counter numLoad;
+ Counter startNumLoad;
+
+ FunctionalMemory *mem; // functional storage for process address space
+
+#if FULL_SYSTEM
+ Tick lastActivate;
+ Tick lastSuspend;
+
+ FunctionProfile *profile;
+ ProfileNode *profileNode;
+ Addr profilePC;
+
+ EndQuiesceEvent *quiesceEvent;
+
+ Kernel::Statistics *kernelStats;
+#else
+ Process *process;
+
+ // Address space ID. Note that this is used for TIMING cache
+ // simulation only; all functional memory accesses should use
+ // one of the FunctionalMemory pointers above.
+ short asid;
+
+#endif
+
+ /**
+ * Temporary storage to pass the source address from copy_load to
+ * copy_store.
+ * @todo Remove this temporary when we have a better way to do it.
+ */
+ Addr copySrcAddr;
+ /**
+ * Temp storage for the physical source address of a copy.
+ * @todo Remove this temporary when we have a better way to do it.
+ */
+ Addr copySrcPhysAddr;
+
+ /*
+ * number of executed instructions, for matching with syscall trace
+ * points in EIO files.
+ */
+ Counter funcExeInst;
+
+ //
+ // Count failed store conditionals so we can warn of apparent
+ // application deadlock situations.
+ unsigned storeCondFailures;
+};
+
+#endif // __CPU_THREAD_STATE_HH__
: validCpuAndThreadNums(false)
{ setPhys(_paddr, _size, _flags); }
+ Request(int _asid, Addr _vaddr, int _size, int _flags, Addr _pc,
+ int _cpuNum, int _threadNum)
+ {
+ setThreadContext(_cpuNum, _threadNum);
+ setVirt(_asid, _vaddr, _size, _flags, _pc);
+ }
+
/**
* Set up CPU and thread numbers. */
void setThreadContext(int _cpuNum, int _threadNum)
--- /dev/null
+from m5 import *
+from FullCPU import OpType
+from FullCPU import OpDesc
+from FullCPU import FUDesc
+
+class FUPool(SimObject):
+ type = 'FUPool'
+ FUList = VectorParam.FUDesc("list of FU's for this pool")
--- /dev/null
+from m5 import *
+from BaseCPU import BaseCPU
+
+class DerivOzoneCPU(BaseCPU):
+ type = 'DerivOzoneCPU'
+
+ numThreads = Param.Unsigned("number of HW thread contexts")
+
+ if not build_env['FULL_SYSTEM']:
+ mem = Param.FunctionalMemory(NULL, "memory")
+
+ checker = Param.BaseCPU("Checker CPU")
+
+ width = Param.Unsigned("Width")
+ frontEndWidth = Param.Unsigned("Front end width")
+ backEndWidth = Param.Unsigned("Back end width")
+ backEndSquashLatency = Param.Unsigned("Back end squash latency")
+ backEndLatency = Param.Unsigned("Back end latency")
+ maxInstBufferSize = Param.Unsigned("Maximum instruction buffer size")
+ maxOutstandingMemOps = Param.Unsigned("Maximum number of outstanding memory operations")
+ decodeToFetchDelay = Param.Unsigned("Decode to fetch delay")
+ renameToFetchDelay = Param.Unsigned("Rename to fetch delay")
+ iewToFetchDelay = Param.Unsigned("Issue/Execute/Writeback to fetch "
+ "delay")
+ commitToFetchDelay = Param.Unsigned("Commit to fetch delay")
+ fetchWidth = Param.Unsigned("Fetch width")
+
+ renameToDecodeDelay = Param.Unsigned("Rename to decode delay")
+ iewToDecodeDelay = Param.Unsigned("Issue/Execute/Writeback to decode "
+ "delay")
+ commitToDecodeDelay = Param.Unsigned("Commit to decode delay")
+ fetchToDecodeDelay = Param.Unsigned("Fetch to decode delay")
+ decodeWidth = Param.Unsigned("Decode width")
+
+ iewToRenameDelay = Param.Unsigned("Issue/Execute/Writeback to rename "
+ "delay")
+ commitToRenameDelay = Param.Unsigned("Commit to rename delay")
+ decodeToRenameDelay = Param.Unsigned("Decode to rename delay")
+ renameWidth = Param.Unsigned("Rename width")
+
+ commitToIEWDelay = Param.Unsigned("Commit to "
+ "Issue/Execute/Writeback delay")
+ renameToIEWDelay = Param.Unsigned("Rename to "
+ "Issue/Execute/Writeback delay")
+ issueToExecuteDelay = Param.Unsigned("Issue to execute delay (internal "
+ "to the IEW stage)")
+ issueWidth = Param.Unsigned("Issue width")
+ executeWidth = Param.Unsigned("Execute width")
+ executeIntWidth = Param.Unsigned("Integer execute width")
+ executeFloatWidth = Param.Unsigned("Floating point execute width")
+ executeBranchWidth = Param.Unsigned("Branch execute width")
+ executeMemoryWidth = Param.Unsigned("Memory execute width")
+
+ iewToCommitDelay = Param.Unsigned("Issue/Execute/Writeback to commit "
+ "delay")
+ renameToROBDelay = Param.Unsigned("Rename to reorder buffer delay")
+ commitWidth = Param.Unsigned("Commit width")
+ squashWidth = Param.Unsigned("Squash width")
+
+ localPredictorSize = Param.Unsigned("Size of local predictor")
+ localCtrBits = Param.Unsigned("Bits per counter")
+ localHistoryTableSize = Param.Unsigned("Size of local history table")
+ localHistoryBits = Param.Unsigned("Bits for the local history")
+ globalPredictorSize = Param.Unsigned("Size of global predictor")
+ globalCtrBits = Param.Unsigned("Bits per counter")
+ globalHistoryBits = Param.Unsigned("Bits of history")
+ choicePredictorSize = Param.Unsigned("Size of choice predictor")
+ choiceCtrBits = Param.Unsigned("Bits of choice counters")
+
+ BTBEntries = Param.Unsigned("Number of BTB entries")
+ BTBTagSize = Param.Unsigned("Size of the BTB tags, in bits")
+
+ RASSize = Param.Unsigned("RAS size")
+
+ LQEntries = Param.Unsigned("Number of load queue entries")
+ SQEntries = Param.Unsigned("Number of store queue entries")
+ LFSTSize = Param.Unsigned("Last fetched store table size")
+ SSITSize = Param.Unsigned("Store set ID table size")
+
+ numPhysIntRegs = Param.Unsigned("Number of physical integer registers")
+ numPhysFloatRegs = Param.Unsigned("Number of physical floating point "
+ "registers")
+ numIQEntries = Param.Unsigned("Number of instruction queue entries")
+ numROBEntries = Param.Unsigned("Number of reorder buffer entries")
+
+ instShiftAmt = Param.Unsigned("Number of bits to shift instructions by")
+
+ function_trace = Param.Bool(False, "Enable function trace")
+ function_trace_start = Param.Tick(0, "Cycle to start function trace")
--- /dev/null
+from m5 import *
+from BaseCPU import BaseCPU
+
+class SimpleOzoneCPU(BaseCPU):
+ type = 'SimpleOzoneCPU'
+
+ numThreads = Param.Unsigned("number of HW thread contexts")
+
+ if not build_env['FULL_SYSTEM']:
+ mem = Param.FunctionalMemory(NULL, "memory")
+
+ width = Param.Unsigned("Width")
+ frontEndWidth = Param.Unsigned("Front end width")
+ backEndWidth = Param.Unsigned("Back end width")
+ backEndSquashLatency = Param.Unsigned("Back end squash latency")
+ backEndLatency = Param.Unsigned("Back end latency")
+ maxInstBufferSize = Param.Unsigned("Maximum instruction buffer size")
+ decodeToFetchDelay = Param.Unsigned("Decode to fetch delay")
+ renameToFetchDelay = Param.Unsigned("Rename to fetch delay")
+ iewToFetchDelay = Param.Unsigned("Issue/Execute/Writeback to fetch "
+ "delay")
+ commitToFetchDelay = Param.Unsigned("Commit to fetch delay")
+ fetchWidth = Param.Unsigned("Fetch width")
+
+ renameToDecodeDelay = Param.Unsigned("Rename to decode delay")
+ iewToDecodeDelay = Param.Unsigned("Issue/Execute/Writeback to decode "
+ "delay")
+ commitToDecodeDelay = Param.Unsigned("Commit to decode delay")
+ fetchToDecodeDelay = Param.Unsigned("Fetch to decode delay")
+ decodeWidth = Param.Unsigned("Decode width")
+
+ iewToRenameDelay = Param.Unsigned("Issue/Execute/Writeback to rename "
+ "delay")
+ commitToRenameDelay = Param.Unsigned("Commit to rename delay")
+ decodeToRenameDelay = Param.Unsigned("Decode to rename delay")
+ renameWidth = Param.Unsigned("Rename width")
+
+ commitToIEWDelay = Param.Unsigned("Commit to "
+ "Issue/Execute/Writeback delay")
+ renameToIEWDelay = Param.Unsigned("Rename to "
+ "Issue/Execute/Writeback delay")
+ issueToExecuteDelay = Param.Unsigned("Issue to execute delay (internal "
+ "to the IEW stage)")
+ issueWidth = Param.Unsigned("Issue width")
+ executeWidth = Param.Unsigned("Execute width")
+ executeIntWidth = Param.Unsigned("Integer execute width")
+ executeFloatWidth = Param.Unsigned("Floating point execute width")
+ executeBranchWidth = Param.Unsigned("Branch execute width")
+ executeMemoryWidth = Param.Unsigned("Memory execute width")
+
+ iewToCommitDelay = Param.Unsigned("Issue/Execute/Writeback to commit "
+ "delay")
+ renameToROBDelay = Param.Unsigned("Rename to reorder buffer delay")
+ commitWidth = Param.Unsigned("Commit width")
+ squashWidth = Param.Unsigned("Squash width")
+
+ localPredictorSize = Param.Unsigned("Size of local predictor")
+ localCtrBits = Param.Unsigned("Bits per counter")
+ localHistoryTableSize = Param.Unsigned("Size of local history table")
+ localHistoryBits = Param.Unsigned("Bits for the local history")
+ globalPredictorSize = Param.Unsigned("Size of global predictor")
+ globalCtrBits = Param.Unsigned("Bits per counter")
+ globalHistoryBits = Param.Unsigned("Bits of history")
+ choicePredictorSize = Param.Unsigned("Size of choice predictor")
+ choiceCtrBits = Param.Unsigned("Bits of choice counters")
+
+ BTBEntries = Param.Unsigned("Number of BTB entries")
+ BTBTagSize = Param.Unsigned("Size of the BTB tags, in bits")
+
+ RASSize = Param.Unsigned("RAS size")
+
+ LQEntries = Param.Unsigned("Number of load queue entries")
+ SQEntries = Param.Unsigned("Number of store queue entries")
+ LFSTSize = Param.Unsigned("Last fetched store table size")
+ SSITSize = Param.Unsigned("Store set ID table size")
+
+ numPhysIntRegs = Param.Unsigned("Number of physical integer registers")
+ numPhysFloatRegs = Param.Unsigned("Number of physical floating point "
+ "registers")
+ numIQEntries = Param.Unsigned("Number of instruction queue entries")
+ numROBEntries = Param.Unsigned("Number of reorder buffer entries")
+
+ instShiftAmt = Param.Unsigned("Number of bits to shift instructions by")
+
+ function_trace = Param.Bool(False, "Enable function trace")
+ function_trace_start = Param.Tick(0, "Cycle to start function trace")