From: Kevin Lim Date: Fri, 2 Jun 2006 22:15:20 +0000 (-0400) Subject: Fixes to get compiling to work. This is mainly fixing up some includes; changing... X-Git-Tag: m5_2.0_beta1~36^2~111 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=7940c10ace28d5b93a61d4d278e6647e0c497149;p=gem5.git Fixes to get compiling to work. This is mainly fixing up some includes; changing functions within the XCs; changing MemReqPtrs to Requests or Packets where appropriate. Currently the O3 and Ozone CPUs do not work in the new memory system; I still need to fix up the ports to work and handle responses properly. This check-in is so that the merge between m5 and newmem is no longer outstanding. src/SConscript: Need to include FU Pool for new CPU model. I'll try to figure out a cleaner way to handle this in the future. src/base/traceflags.py: Include new traces flags, fix up merge mess up. src/cpu/SConscript: Include the base_dyn_inst.cc as one of othe sources. Don't compile the Ozone CPU for now. src/cpu/base.cc: Remove an extra } from the merge. src/cpu/base_dyn_inst.cc: Fixes to make compiling work. Don't instantiate the OzoneCPU for now. src/cpu/base_dyn_inst.hh: src/cpu/o3/2bit_local_pred.cc: src/cpu/o3/alpha_cpu_builder.cc: src/cpu/o3/alpha_cpu_impl.hh: src/cpu/o3/alpha_dyn_inst.hh: src/cpu/o3/alpha_params.hh: src/cpu/o3/bpred_unit.cc: src/cpu/o3/btb.hh: src/cpu/o3/commit.hh: src/cpu/o3/commit_impl.hh: src/cpu/o3/cpu.cc: src/cpu/o3/cpu.hh: src/cpu/o3/fetch.hh: src/cpu/o3/fetch_impl.hh: src/cpu/o3/free_list.hh: src/cpu/o3/iew.hh: src/cpu/o3/iew_impl.hh: src/cpu/o3/inst_queue.hh: src/cpu/o3/inst_queue_impl.hh: src/cpu/o3/regfile.hh: src/cpu/o3/sat_counter.hh: src/cpu/op_class.hh: src/cpu/ozone/cpu.hh: src/cpu/checker/cpu.cc: src/cpu/checker/cpu.hh: src/cpu/checker/exec_context.hh: src/cpu/checker/o3_cpu_builder.cc: src/cpu/ozone/cpu_impl.hh: src/mem/request.hh: src/cpu/o3/fu_pool.hh: src/cpu/o3/lsq.hh: src/cpu/o3/lsq_unit.hh: src/cpu/o3/lsq_unit_impl.hh: src/cpu/o3/thread_state.hh: src/cpu/ozone/back_end.hh: src/cpu/ozone/dyn_inst.cc: src/cpu/ozone/dyn_inst.hh: src/cpu/ozone/front_end.hh: src/cpu/ozone/inorder_back_end.hh: src/cpu/ozone/lw_back_end.hh: src/cpu/ozone/lw_lsq.hh: src/cpu/ozone/ozone_impl.hh: src/cpu/ozone/thread_state.hh: Fixes to get compiling to work. src/cpu/o3/alpha_cpu.hh: Fixes to get compiling to work. Float reg accessors have changed, as well as MemReqPtrs to RequestPtrs. src/cpu/o3/alpha_dyn_inst_impl.hh: Fixes to get compiling to work. Pass in the packet to the completeAcc function. Fix up syscall function. --HG-- rename : cpu/activity.cc => src/cpu/activity.cc rename : cpu/activity.hh => src/cpu/activity.hh rename : cpu/checker/cpu.cc => src/cpu/checker/cpu.cc rename : cpu/checker/cpu.hh => src/cpu/checker/cpu.hh rename : cpu/checker/cpu_builder.cc => src/cpu/checker/cpu_builder.cc rename : cpu/checker/exec_context.hh => src/cpu/checker/exec_context.hh rename : cpu/checker/o3_cpu_builder.cc => src/cpu/checker/o3_cpu_builder.cc rename : cpu/o3/dep_graph.hh => src/cpu/o3/dep_graph.hh rename : cpu/o3/fu_pool.cc => src/cpu/o3/fu_pool.cc rename : cpu/o3/fu_pool.hh => src/cpu/o3/fu_pool.hh rename : cpu/o3/lsq.cc => src/cpu/o3/lsq.cc rename : cpu/o3/lsq.hh => src/cpu/o3/lsq.hh rename : cpu/o3/lsq_impl.hh => src/cpu/o3/lsq_impl.hh rename : cpu/o3/lsq_unit.cc => src/cpu/o3/lsq_unit.cc rename : cpu/o3/lsq_unit.hh => src/cpu/o3/lsq_unit.hh rename : cpu/o3/lsq_unit_impl.hh => src/cpu/o3/lsq_unit_impl.hh rename : cpu/o3/scoreboard.cc => src/cpu/o3/scoreboard.cc rename : cpu/o3/scoreboard.hh => src/cpu/o3/scoreboard.hh rename : cpu/o3/thread_state.hh => src/cpu/o3/thread_state.hh rename : cpu/ozone/back_end.cc => src/cpu/ozone/back_end.cc rename : cpu/ozone/back_end.hh => src/cpu/ozone/back_end.hh rename : cpu/ozone/back_end_impl.hh => src/cpu/ozone/back_end_impl.hh rename : cpu/ozone/cpu_builder.cc => src/cpu/ozone/cpu_builder.cc rename : cpu/ozone/dyn_inst.cc => src/cpu/ozone/dyn_inst.cc rename : cpu/ozone/dyn_inst.hh => src/cpu/ozone/dyn_inst.hh rename : cpu/ozone/dyn_inst_impl.hh => src/cpu/ozone/dyn_inst_impl.hh rename : cpu/ozone/front_end.cc => src/cpu/ozone/front_end.cc rename : cpu/ozone/front_end.hh => src/cpu/ozone/front_end.hh rename : cpu/ozone/front_end_impl.hh => src/cpu/ozone/front_end_impl.hh rename : cpu/ozone/inorder_back_end.cc => src/cpu/ozone/inorder_back_end.cc rename : cpu/ozone/inorder_back_end.hh => src/cpu/ozone/inorder_back_end.hh rename : cpu/ozone/inorder_back_end_impl.hh => src/cpu/ozone/inorder_back_end_impl.hh rename : cpu/ozone/inst_queue.cc => src/cpu/ozone/inst_queue.cc rename : cpu/ozone/inst_queue.hh => src/cpu/ozone/inst_queue.hh rename : cpu/ozone/inst_queue_impl.hh => src/cpu/ozone/inst_queue_impl.hh rename : cpu/ozone/lsq_unit.cc => src/cpu/ozone/lsq_unit.cc rename : cpu/ozone/lsq_unit.hh => src/cpu/ozone/lsq_unit.hh rename : cpu/ozone/lsq_unit_impl.hh => src/cpu/ozone/lsq_unit_impl.hh rename : cpu/ozone/lw_back_end.cc => src/cpu/ozone/lw_back_end.cc rename : cpu/ozone/lw_back_end.hh => src/cpu/ozone/lw_back_end.hh rename : cpu/ozone/lw_back_end_impl.hh => src/cpu/ozone/lw_back_end_impl.hh rename : cpu/ozone/lw_lsq.cc => src/cpu/ozone/lw_lsq.cc rename : cpu/ozone/lw_lsq.hh => src/cpu/ozone/lw_lsq.hh rename : cpu/ozone/lw_lsq_impl.hh => src/cpu/ozone/lw_lsq_impl.hh rename : cpu/ozone/null_predictor.hh => src/cpu/ozone/null_predictor.hh rename : cpu/ozone/ozone_impl.hh => src/cpu/ozone/ozone_impl.hh rename : cpu/ozone/rename_table.cc => src/cpu/ozone/rename_table.cc rename : cpu/ozone/rename_table.hh => src/cpu/ozone/rename_table.hh rename : cpu/ozone/rename_table_impl.hh => src/cpu/ozone/rename_table_impl.hh rename : cpu/ozone/simple_impl.hh => src/cpu/ozone/simple_impl.hh rename : cpu/ozone/simple_params.hh => src/cpu/ozone/simple_params.hh rename : cpu/ozone/thread_state.hh => src/cpu/ozone/thread_state.hh rename : cpu/quiesce_event.cc => src/cpu/quiesce_event.cc rename : cpu/quiesce_event.hh => src/cpu/quiesce_event.hh rename : cpu/thread_state.hh => src/cpu/thread_state.hh rename : python/m5/objects/FUPool.py => src/python/m5/objects/FUPool.py rename : python/m5/objects/OzoneCPU.py => src/python/m5/objects/OzoneCPU.py rename : python/m5/objects/SimpleOzoneCPU.py => src/python/m5/objects/SimpleOzoneCPU.py extra : convert_revision : ca7f0fbf65ee1a70d482fb4eda9a1840c7f9b8f8 --- diff --git a/cpu/activity.cc b/cpu/activity.cc deleted file mode 100644 index 6dcb6e341..000000000 --- a/cpu/activity.cc +++ /dev/null @@ -1,122 +0,0 @@ - -#include "base/timebuf.hh" -#include "cpu/activity.hh" - -ActivityRecorder::ActivityRecorder(int num_stages, int longest_latency, - int activity) - : activityBuffer(longest_latency, 0), longestLatency(longest_latency), - activityCount(activity), numStages(num_stages) -{ - stageActive = new bool[numStages]; - memset(stageActive, 0, numStages); -} - -void -ActivityRecorder::activity() -{ - if (activityBuffer[0]) { - return; - } - - activityBuffer[0] = true; - - ++activityCount; - - DPRINTF(Activity, "Activity: %i\n", activityCount); -} - -void -ActivityRecorder::advance() -{ - if (activityBuffer[-longestLatency]) { - --activityCount; - - assert(activityCount >= 0); - - DPRINTF(Activity, "Activity: %i\n", activityCount); - - if (activityCount == 0) { - DPRINTF(Activity, "No activity left!\n"); - } - } - - activityBuffer.advance(); -} - -void -ActivityRecorder::activateStage(const int idx) -{ - if (!stageActive[idx]) { - ++activityCount; - - stageActive[idx] = true; - - DPRINTF(Activity, "Activity: %i\n", activityCount); - } else { - DPRINTF(Activity, "Stage %i already active.\n", idx); - } - -// assert(activityCount < longestLatency + numStages + 1); -} - -void -ActivityRecorder::deactivateStage(const int idx) -{ - if (stageActive[idx]) { - --activityCount; - - stageActive[idx] = false; - - DPRINTF(Activity, "Activity: %i\n", activityCount); - } else { - DPRINTF(Activity, "Stage %i already inactive.\n", idx); - } - - assert(activityCount >= 0); -} - -void -ActivityRecorder::reset() -{ - activityCount = 0; - memset(stageActive, 0, numStages); - for (int i = 0; i < longestLatency + 1; ++i) - activityBuffer.advance(); -} - -void -ActivityRecorder::dump() -{ - for (int i = 0; i <= longestLatency; ++i) { - cprintf("[Idx:%i %i] ", i, activityBuffer[-i]); - } - - cprintf("\n"); - - for (int i = 0; i < numStages; ++i) { - cprintf("[Stage:%i %i]\n", i, stageActive[i]); - } - - cprintf("\n"); - - cprintf("Activity count: %i\n", activityCount); -} - -void -ActivityRecorder::validate() -{ - int count = 0; - for (int i = 0; i <= longestLatency; ++i) { - if (activityBuffer[-i]) { - count++; - } - } - - for (int i = 0; i < numStages; ++i) { - if (stageActive[i]) { - count++; - } - } - - assert(count == activityCount); -} diff --git a/cpu/activity.hh b/cpu/activity.hh deleted file mode 100644 index 2d53dc4bb..000000000 --- a/cpu/activity.hh +++ /dev/null @@ -1,67 +0,0 @@ - -#ifndef __CPU_ACTIVITY_HH__ -#define __CPU_ACTIVITY_HH__ - -#include "base/timebuf.hh" -#include "base/trace.hh" - -class ActivityRecorder { - public: - ActivityRecorder(int num_stages, int longest_latency, int count); - - /** Records that there is activity this cycle. */ - void activity(); - /** Advances the activity buffer, decrementing the activityCount if active - * communication just left the time buffer, and descheduling the CPU if - * there is no activity. - */ - void advance(); - /** Marks a stage as active. */ - void activateStage(const int idx); - /** Deactivates a stage. */ - void deactivateStage(const int idx); - - int getActivityCount() { return activityCount; } - - void setActivityCount(int count) - { activityCount = count; } - - bool active() { return activityCount; } - - void reset(); - - void dump(); - - void validate(); - - private: - /** Time buffer that tracks if any cycles has active communication - * in them. It should be as long as the longest communication - * latency in the system. Each time any time buffer is written, - * the activity buffer should also be written to. The - * activityBuffer is advanced along with all the other time - * buffers, so it should have a 1 somewhere in it only if there - * is active communication in a time buffer. - */ - TimeBuffer activityBuffer; - - int longestLatency; - - /** Tracks how many stages and cycles of time buffer have - * activity. Stages increment this count when they switch to - * active, and decrement it when they switch to - * inactive. Whenever a cycle that previously had no information - * is written in the time buffer, this is incremented. When a - * cycle that had information exits the time buffer due to age, - * this count is decremented. When the count is 0, there is no - * activity in the CPU, and it can be descheduled. - */ - int activityCount; - - int numStages; - - /** Records which stages are active/inactive. */ - bool *stageActive; -}; - -#endif // __CPU_ACTIVITY_HH__ diff --git a/cpu/checker/cpu.cc b/cpu/checker/cpu.cc deleted file mode 100644 index 41ff6e769..000000000 --- a/cpu/checker/cpu.cc +++ /dev/null @@ -1,757 +0,0 @@ -/* - * Copyright (c) 2006 The Regents of The University of Michigan - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include - -#include "base/refcnt.hh" -#include "cpu/base.hh" -#include "cpu/base_dyn_inst.hh" -#include "cpu/checker/cpu.hh" -#include "cpu/cpu_exec_context.hh" -#include "cpu/exec_context.hh" -#include "cpu/static_inst.hh" -#include "sim/byteswap.hh" -#include "sim/sim_object.hh" -#include "sim/stats.hh" - -#include "cpu/o3/alpha_dyn_inst.hh" -#include "cpu/o3/alpha_impl.hh" - -#include "cpu/ozone/dyn_inst.hh" -#include "cpu/ozone/ozone_impl.hh" -#include "cpu/ozone/simple_impl.hh" - -#if FULL_SYSTEM -#include "sim/system.hh" -#include "arch/vtophys.hh" -#endif // FULL_SYSTEM - -using namespace std; -//The CheckerCPU does alpha only -using namespace AlphaISA; - -void -CheckerCPU::init() -{ -} - -CheckerCPU::CheckerCPU(Params *p) - : BaseCPU(p), cpuXC(NULL), xcProxy(NULL) -{ - memReq = new MemReq(); - memReq->xc = xcProxy; - memReq->asid = 0; - memReq->data = new uint8_t[64]; - - numInst = 0; - startNumInst = 0; - numLoad = 0; - startNumLoad = 0; - youngestSN = 0; - - changedPC = willChangePC = changedNextPC = false; - - exitOnError = p->exitOnError; -#if FULL_SYSTEM - itb = p->itb; - dtb = p->dtb; - systemPtr = NULL; - memPtr = NULL; -#endif -} - -CheckerCPU::~CheckerCPU() -{ -} - -void -CheckerCPU::setMemory(FunctionalMemory *mem) -{ - memPtr = mem; -#if !FULL_SYSTEM - cpuXC = new CPUExecContext(this, /* thread_num */ 0, mem, - /* asid */ 0); - - cpuXC->setStatus(ExecContext::Suspended); - xcProxy = cpuXC->getProxy(); - execContexts.push_back(xcProxy); -#else - if (systemPtr) { - cpuXC = new CPUExecContext(this, 0, systemPtr, itb, dtb, memPtr, false); - - cpuXC->setStatus(ExecContext::Suspended); - xcProxy = cpuXC->getProxy(); - execContexts.push_back(xcProxy); - memReq->xc = xcProxy; - delete cpuXC->kernelStats; - cpuXC->kernelStats = NULL; - } -#endif -} - -#if FULL_SYSTEM -void -CheckerCPU::setSystem(System *system) -{ - systemPtr = system; - - if (memPtr) { - cpuXC = new CPUExecContext(this, 0, systemPtr, itb, dtb, memPtr, false); - - cpuXC->setStatus(ExecContext::Suspended); - xcProxy = cpuXC->getProxy(); - execContexts.push_back(xcProxy); - memReq->xc = xcProxy; - delete cpuXC->kernelStats; - cpuXC->kernelStats = NULL; - } -} -#endif - -void -CheckerCPU::serialize(ostream &os) -{ -/* - BaseCPU::serialize(os); - SERIALIZE_SCALAR(inst); - nameOut(os, csprintf("%s.xc", name())); - cpuXC->serialize(os); - cacheCompletionEvent.serialize(os); -*/ -} - -void -CheckerCPU::unserialize(Checkpoint *cp, const string §ion) -{ -/* - BaseCPU::unserialize(cp, section); - UNSERIALIZE_SCALAR(inst); - cpuXC->unserialize(cp, csprintf("%s.xc", section)); -*/ -} - -Fault -CheckerCPU::copySrcTranslate(Addr src) -{ - panic("Unimplemented!"); -} - -Fault -CheckerCPU::copy(Addr dest) -{ - panic("Unimplemented!"); -} - -template -Fault -CheckerCPU::read(Addr addr, T &data, unsigned flags) -{ - memReq->reset(addr, sizeof(T), flags); - - // translate to physical address - translateDataReadReq(memReq); - - memReq->cmd = Read; - memReq->completionEvent = NULL; - memReq->time = curTick; - memReq->flags &= ~INST_READ; - - if (!(memReq->flags & UNCACHEABLE)) { - // Access memory to see if we have the same data - cpuXC->read(memReq, data); - } else { - // Assume the data is correct if it's an uncached access - memcpy(&data, &unverifiedResult.integer, sizeof(T)); - } - - return NoFault; -} - -#ifndef DOXYGEN_SHOULD_SKIP_THIS - -template -Fault -CheckerCPU::read(Addr addr, uint64_t &data, unsigned flags); - -template -Fault -CheckerCPU::read(Addr addr, uint32_t &data, unsigned flags); - -template -Fault -CheckerCPU::read(Addr addr, uint16_t &data, unsigned flags); - -template -Fault -CheckerCPU::read(Addr addr, uint8_t &data, unsigned flags); - -#endif //DOXYGEN_SHOULD_SKIP_THIS - -template<> -Fault -CheckerCPU::read(Addr addr, double &data, unsigned flags) -{ - return read(addr, *(uint64_t*)&data, flags); -} - -template<> -Fault -CheckerCPU::read(Addr addr, float &data, unsigned flags) -{ - return read(addr, *(uint32_t*)&data, flags); -} - -template<> -Fault -CheckerCPU::read(Addr addr, int32_t &data, unsigned flags) -{ - return read(addr, (uint32_t&)data, flags); -} - -template -Fault -CheckerCPU::write(T data, Addr addr, unsigned flags, uint64_t *res) -{ - memReq->reset(addr, sizeof(T), flags); - - // translate to physical address - cpuXC->translateDataWriteReq(memReq); - - // Can compare the write data and result only if it's cacheable, - // not a store conditional, or is a store conditional that - // succeeded. - // @todo: Verify that actual memory matches up with these values. - // Right now it only verifies that the instruction data is the - // same as what was in the request that got sent to memory; there - // is no verification that it is the same as what is in memory. - // This is because the LSQ would have to be snooped in the CPU to - // verify this data. - if (unverifiedReq && - !(unverifiedReq->flags & UNCACHEABLE) && - (!(unverifiedReq->flags & LOCKED) || - ((unverifiedReq->flags & LOCKED) && - unverifiedReq->result == 1))) { -#if 0 - memReq->cmd = Read; - memReq->completionEvent = NULL; - memReq->time = curTick; - memReq->flags &= ~INST_READ; - cpuXC->read(memReq, inst_data); -#endif - T inst_data; - memcpy(&inst_data, unverifiedReq->data, sizeof(T)); - - if (data != inst_data) { - warn("%lli: Store value does not match value in memory! " - "Instruction: %#x, memory: %#x", - curTick, inst_data, data); - handleError(); - } - } - - // Assume the result was the same as the one passed in. This checker - // doesn't check if the SC should succeed or fail, it just checks the - // value. - if (res) - *res = unverifiedReq->result; - - return NoFault; -} - - -#ifndef DOXYGEN_SHOULD_SKIP_THIS -template -Fault -CheckerCPU::write(uint64_t data, Addr addr, unsigned flags, uint64_t *res); - -template -Fault -CheckerCPU::write(uint32_t data, Addr addr, unsigned flags, uint64_t *res); - -template -Fault -CheckerCPU::write(uint16_t data, Addr addr, unsigned flags, uint64_t *res); - -template -Fault -CheckerCPU::write(uint8_t data, Addr addr, unsigned flags, uint64_t *res); - -#endif //DOXYGEN_SHOULD_SKIP_THIS - -template<> -Fault -CheckerCPU::write(double data, Addr addr, unsigned flags, uint64_t *res) -{ - return write(*(uint64_t*)&data, addr, flags, res); -} - -template<> -Fault -CheckerCPU::write(float data, Addr addr, unsigned flags, uint64_t *res) -{ - return write(*(uint32_t*)&data, addr, flags, res); -} - -template<> -Fault -CheckerCPU::write(int32_t data, Addr addr, unsigned flags, uint64_t *res) -{ - return write((uint32_t)data, addr, flags, res); -} - - -#if FULL_SYSTEM -Addr -CheckerCPU::dbg_vtophys(Addr addr) -{ - return vtophys(xcProxy, addr); -} -#endif // FULL_SYSTEM - -bool -CheckerCPU::translateInstReq(MemReqPtr &req) -{ -#if FULL_SYSTEM - return (cpuXC->translateInstReq(req) == NoFault); -#else - cpuXC->translateInstReq(req); - return true; -#endif -} - -void -CheckerCPU::translateDataReadReq(MemReqPtr &req) -{ - cpuXC->translateDataReadReq(req); - - if (req->vaddr != unverifiedReq->vaddr) { - warn("%lli: Request virtual addresses do not match! Inst: %#x, " - "checker: %#x", - curTick, unverifiedReq->vaddr, req->vaddr); - handleError(); - } - req->paddr = unverifiedReq->paddr; - - if (checkFlags(req)) { - warn("%lli: Request flags do not match! Inst: %#x, checker: %#x", - curTick, unverifiedReq->flags, req->flags); - handleError(); - } -} - -void -CheckerCPU::translateDataWriteReq(MemReqPtr &req) -{ - cpuXC->translateDataWriteReq(req); - - if (req->vaddr != unverifiedReq->vaddr) { - warn("%lli: Request virtual addresses do not match! Inst: %#x, " - "checker: %#x", - curTick, unverifiedReq->vaddr, req->vaddr); - handleError(); - } - req->paddr = unverifiedReq->paddr; - - if (checkFlags(req)) { - warn("%lli: Request flags do not match! Inst: %#x, checker: %#x", - curTick, unverifiedReq->flags, req->flags); - handleError(); - } -} - -bool -CheckerCPU::checkFlags(MemReqPtr &req) -{ - // Remove any dynamic flags that don't have to do with the request itself. - unsigned flags = unverifiedReq->flags; - unsigned mask = LOCKED | PHYSICAL | VPTE | ALTMODE | UNCACHEABLE | NO_FAULT; - flags = flags & (mask); - if (flags == req->flags) { - return false; - } else { - return true; - } -} - -template -void -Checker::tick(DynInstPtr &completed_inst) -{ - DynInstPtr inst; - - // Either check this instruction, or add it to a list of - // instructions waiting to be checked. Instructions must be - // checked in program order, so if a store has committed yet not - // completed, there may be some instructions that are waiting - // behind it that have completed and must be checked. - if (!instList.empty()) { - if (youngestSN < completed_inst->seqNum) { - DPRINTF(Checker, "Adding instruction [sn:%lli] PC:%#x to list.\n", - completed_inst->seqNum, completed_inst->readPC()); - instList.push_back(completed_inst); - youngestSN = completed_inst->seqNum; - } - - if (!instList.front()->isCompleted()) { - return; - } else { - inst = instList.front(); - instList.pop_front(); - } - } else { - if (!completed_inst->isCompleted()) { - if (youngestSN < completed_inst->seqNum) { - DPRINTF(Checker, "Adding instruction [sn:%lli] PC:%#x to list.\n", - completed_inst->seqNum, completed_inst->readPC()); - instList.push_back(completed_inst); - youngestSN = completed_inst->seqNum; - } - return; - } else { - if (youngestSN < completed_inst->seqNum) { - inst = completed_inst; - youngestSN = completed_inst->seqNum; - } else { - return; - } - } - } - - // Try to check all instructions that are completed, ending if we - // run out of instructions to check or if an instruction is not - // yet completed. - while (1) { - DPRINTF(Checker, "Processing instruction [sn:%lli] PC:%#x.\n", - inst->seqNum, inst->readPC()); - unverifiedResult.integer = inst->readIntResult(); - unverifiedReq = inst->req; - numCycles++; - - Fault fault = NoFault; - - // maintain $r0 semantics - cpuXC->setIntReg(ZeroReg, 0); -#ifdef TARGET_ALPHA - cpuXC->setFloatRegDouble(ZeroReg, 0.0); -#endif // TARGET_ALPHA - - // Check if any recent PC changes match up with anything we - // expect to happen. This is mostly to check if traps or - // PC-based events have occurred in both the checker and CPU. - if (changedPC) { - DPRINTF(Checker, "Changed PC recently to %#x\n", - cpuXC->readPC()); - if (willChangePC) { - if (newPC == cpuXC->readPC()) { - DPRINTF(Checker, "Changed PC matches expected PC\n"); - } else { - warn("%lli: Changed PC does not match expected PC, " - "changed: %#x, expected: %#x", - curTick, cpuXC->readPC(), newPC); - handleError(); - } - willChangePC = false; - } - changedPC = false; - } - if (changedNextPC) { - DPRINTF(Checker, "Changed NextPC recently to %#x\n", - cpuXC->readNextPC()); - changedNextPC = false; - } - - // Try to fetch the instruction - -#if FULL_SYSTEM -#define IFETCH_FLAGS(pc) ((pc) & 1) ? PHYSICAL : 0 -#else -#define IFETCH_FLAGS(pc) 0 -#endif - - // set up memory request for instruction fetch - memReq->cmd = Read; - memReq->reset(cpuXC->readPC() & ~3, sizeof(uint32_t), - IFETCH_FLAGS(cpuXC->readPC())); - - bool succeeded = translateInstReq(memReq); - - if (!succeeded) { - if (inst->getFault() == NoFault) { - // In this case the instruction was not a dummy - // instruction carrying an ITB fault. In the single - // threaded case the ITB should still be able to - // translate this instruction; in the SMT case it's - // possible that its ITB entry was kicked out. - warn("%lli: Instruction PC %#x was not found in the ITB!", - curTick, cpuXC->readPC()); - handleError(); - - // go to the next instruction - cpuXC->setPC(cpuXC->readNextPC()); - cpuXC->setNextPC(cpuXC->readNextPC() + sizeof(MachInst)); - - return; - } else { - // The instruction is carrying an ITB fault. Handle - // the fault and see if our results match the CPU on - // the next tick(). - fault = inst->getFault(); - } - } - - if (fault == NoFault) { - cpuXC->mem->read(memReq, machInst); - - // keep an instruction count - numInst++; - - // decode the instruction - machInst = gtoh(machInst); - // Checks that the instruction matches what we expected it to be. - // Checks both the machine instruction and the PC. - validateInst(inst); - - curStaticInst = StaticInst::decode(makeExtMI(machInst, - cpuXC->readPC())); - -#if FULL_SYSTEM - cpuXC->setInst(machInst); -#endif // FULL_SYSTEM - - fault = inst->getFault(); - } - - // Either the instruction was a fault and we should process the fault, - // or we should just go ahead execute the instruction. This assumes - // that the instruction is properly marked as a fault. - if (fault == NoFault) { - - cpuXC->func_exe_inst++; - - fault = curStaticInst->execute(this, NULL); - - // Checks to make sure instrution results are correct. - validateExecution(inst); - - if (curStaticInst->isLoad()) { - ++numLoad; - } - } - - if (fault != NoFault) { -#if FULL_SYSTEM - fault->invoke(xcProxy); - willChangePC = true; - newPC = cpuXC->readPC(); - DPRINTF(Checker, "Fault, PC is now %#x\n", newPC); -#else // !FULL_SYSTEM - fatal("fault (%d) detected @ PC 0x%08p", fault, cpuXC->readPC()); -#endif // FULL_SYSTEM - } else { -#if THE_ISA != MIPS_ISA - // go to the next instruction - cpuXC->setPC(cpuXC->readNextPC()); - cpuXC->setNextPC(cpuXC->readNextPC() + sizeof(MachInst)); -#else - // go to the next instruction - cpuXC->setPC(cpuXC->readNextPC()); - cpuXC->setNextPC(cpuXC->readNextNPC()); - cpuXC->setNextNPC(cpuXC->readNextNPC() + sizeof(MachInst)); -#endif - - } - -#if FULL_SYSTEM - // @todo: Determine if these should happen only if the - // instruction hasn't faulted. In the SimpleCPU case this may - // not be true, but in the O3 or Ozone case this may be true. - Addr oldpc; - int count = 0; - do { - oldpc = cpuXC->readPC(); - system->pcEventQueue.service(xcProxy); - count++; - } while (oldpc != cpuXC->readPC()); - if (count > 1) { - willChangePC = true; - newPC = cpuXC->readPC(); - DPRINTF(Checker, "PC Event, PC is now %#x\n", newPC); - } -#endif - - // @todo: Optionally can check all registers. (Or just those - // that have been modified). - validateState(); - - // Continue verifying instructions if there's another completed - // instruction waiting to be verified. - if (instList.empty()) { - break; - } else if (instList.front()->isCompleted()) { - inst = instList.front(); - instList.pop_front(); - } else { - break; - } - } -} - -template -void -Checker::switchOut(Sampler *s) -{ - instList.clear(); -} - -template -void -Checker::takeOverFrom(BaseCPU *oldCPU) -{ -} - -template -void -Checker::validateInst(DynInstPtr &inst) -{ - if (inst->readPC() != cpuXC->readPC()) { - warn("%lli: PCs do not match! Inst: %#x, checker: %#x", - curTick, inst->readPC(), cpuXC->readPC()); - if (changedPC) { - warn("%lli: Changed PCs recently, may not be an error", - curTick); - } else { - handleError(); - } - } - - MachInst mi = static_cast(inst->staticInst->machInst); - - if (mi != machInst) { - warn("%lli: Binary instructions do not match! Inst: %#x, " - "checker: %#x", - curTick, mi, machInst); - handleError(); - } -} - -template -void -Checker::validateExecution(DynInstPtr &inst) -{ - if (inst->numDestRegs()) { - // @todo: Support more destination registers. - if (inst->isUnverifiable()) { - // Unverifiable instructions assume they were executed - // properly by the CPU. Grab the result from the - // instruction and write it to the register. - RegIndex idx = inst->destRegIdx(0); - if (idx < TheISA::FP_Base_DepTag) { - cpuXC->setIntReg(idx, inst->readIntResult()); - } else if (idx < TheISA::Fpcr_DepTag) { - cpuXC->setFloatRegInt(idx, inst->readIntResult()); - } else { - cpuXC->setMiscReg(idx, inst->readIntResult()); - } - } else if (result.integer != inst->readIntResult()) { - warn("%lli: Instruction results do not match! (Results may not " - "actually be integers) Inst: %#x, checker: %#x", - curTick, inst->readIntResult(), result.integer); - handleError(); - } - } - - if (inst->readNextPC() != cpuXC->readNextPC()) { - warn("%lli: Instruction next PCs do not match! Inst: %#x, " - "checker: %#x", - curTick, inst->readNextPC(), cpuXC->readNextPC()); - handleError(); - } - - // Checking side effect registers can be difficult if they are not - // checked simultaneously with the execution of the instruction. - // This is because other valid instructions may have modified - // these registers in the meantime, and their values are not - // stored within the DynInst. - while (!miscRegIdxs.empty()) { - int misc_reg_idx = miscRegIdxs.front(); - miscRegIdxs.pop(); - - if (inst->xcBase()->readMiscReg(misc_reg_idx) != - cpuXC->readMiscReg(misc_reg_idx)) { - warn("%lli: Misc reg idx %i (side effect) does not match! " - "Inst: %#x, checker: %#x", - curTick, misc_reg_idx, - inst->xcBase()->readMiscReg(misc_reg_idx), - cpuXC->readMiscReg(misc_reg_idx)); - handleError(); - } - } -} - -template -void -Checker::validateState() -{ -} - -template -void -Checker::dumpInsts() -{ - int num = 0; - - InstListIt inst_list_it = --(instList.end()); - - cprintf("Inst list size: %i\n", instList.size()); - - while (inst_list_it != instList.end()) - { - cprintf("Instruction:%i\n", - num); - - cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n" - "Completed:%i\n", - (*inst_list_it)->readPC(), - (*inst_list_it)->seqNum, - (*inst_list_it)->threadNumber, - (*inst_list_it)->isCompleted()); - - cprintf("\n"); - - inst_list_it--; - ++num; - } - -} - -template -class Checker > >; - -template -class Checker > >; diff --git a/cpu/checker/cpu.hh b/cpu/checker/cpu.hh deleted file mode 100644 index 37fe59d95..000000000 --- a/cpu/checker/cpu.hh +++ /dev/null @@ -1,326 +0,0 @@ -/* - * Copyright (c) 2006 The Regents of The University of Michigan - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __CPU_CHECKER_CPU_HH__ -#define __CPU_CHECKER_CPU_HH__ - -#include -#include -#include - -#include "base/statistics.hh" -#include "config/full_system.hh" -#include "cpu/base.hh" -#include "cpu/base_dyn_inst.hh" -#include "cpu/cpu_exec_context.hh" -#include "cpu/pc_event.hh" -#include "cpu/static_inst.hh" -#include "sim/eventq.hh" - -// forward declarations -#if FULL_SYSTEM -class Processor; -class AlphaITB; -class AlphaDTB; -class PhysicalMemory; - -class RemoteGDB; -class GDBListener; - -#else - -class Process; - -#endif // FULL_SYSTEM -template -class BaseDynInst; -class ExecContext; -class MemInterface; -class Checkpoint; -class Sampler; - -class CheckerCPU : public BaseCPU -{ - protected: - typedef TheISA::MachInst MachInst; - typedef TheISA::MiscReg MiscReg; - public: - // main simulation loop (one cycle) - virtual void init(); - - struct Params : public BaseCPU::Params - { -#if FULL_SYSTEM - AlphaITB *itb; - AlphaDTB *dtb; - FunctionalMemory *mem; -#else - Process *process; -#endif - bool exitOnError; - }; - - public: - CheckerCPU(Params *p); - virtual ~CheckerCPU(); - - void setMemory(FunctionalMemory *mem); - - FunctionalMemory *memPtr; - -#if FULL_SYSTEM - void setSystem(System *system); - - System *systemPtr; -#endif - public: - // execution context - CPUExecContext *cpuXC; - - ExecContext *xcProxy; - - AlphaITB *itb; - AlphaDTB *dtb; - -#if FULL_SYSTEM - Addr dbg_vtophys(Addr addr); -#endif - - union Result { - uint64_t integer; - float fp; - double dbl; - }; - - Result result; - - // current instruction - MachInst machInst; - - // Refcounted pointer to the one memory request. - MemReqPtr memReq; - - StaticInstPtr curStaticInst; - - // number of simulated instructions - Counter numInst; - Counter startNumInst; - - std::queue miscRegIdxs; - - virtual Counter totalInstructions() const - { - return numInst - startNumInst; - } - - // number of simulated loads - Counter numLoad; - Counter startNumLoad; - - virtual void serialize(std::ostream &os); - virtual void unserialize(Checkpoint *cp, const std::string §ion); - - template - Fault read(Addr addr, T &data, unsigned flags); - - template - Fault write(T data, Addr addr, unsigned flags, uint64_t *res); - - // These functions are only used in CPU models that split - // effective address computation from the actual memory access. - void setEA(Addr EA) { panic("SimpleCPU::setEA() not implemented\n"); } - Addr getEA() { panic("SimpleCPU::getEA() not implemented\n"); } - - void prefetch(Addr addr, unsigned flags) - { - // need to do this... - } - - void writeHint(Addr addr, int size, unsigned flags) - { - // need to do this... - } - - Fault copySrcTranslate(Addr src); - - Fault copy(Addr dest); - - // The register accessor methods provide the index of the - // instruction's operand (e.g., 0 or 1), not the architectural - // register index, to simplify the implementation of register - // renaming. We find the architectural register index by indexing - // into the instruction's own operand index table. Note that a - // raw pointer to the StaticInst is provided instead of a - // ref-counted StaticInstPtr to redice overhead. This is fine as - // long as these methods don't copy the pointer into any long-term - // storage (which is pretty hard to imagine they would have reason - // to do). - - uint64_t readIntReg(const StaticInst *si, int idx) - { - return cpuXC->readIntReg(si->srcRegIdx(idx)); - } - - float readFloatRegSingle(const StaticInst *si, int idx) - { - int reg_idx = si->srcRegIdx(idx) - TheISA::FP_Base_DepTag; - return cpuXC->readFloatRegSingle(reg_idx); - } - - double readFloatRegDouble(const StaticInst *si, int idx) - { - int reg_idx = si->srcRegIdx(idx) - TheISA::FP_Base_DepTag; - return cpuXC->readFloatRegDouble(reg_idx); - } - - uint64_t readFloatRegInt(const StaticInst *si, int idx) - { - int reg_idx = si->srcRegIdx(idx) - TheISA::FP_Base_DepTag; - return cpuXC->readFloatRegInt(reg_idx); - } - - void setIntReg(const StaticInst *si, int idx, uint64_t val) - { - cpuXC->setIntReg(si->destRegIdx(idx), val); - result.integer = val; - } - - void setFloatRegSingle(const StaticInst *si, int idx, float val) - { - int reg_idx = si->destRegIdx(idx) - TheISA::FP_Base_DepTag; - cpuXC->setFloatRegSingle(reg_idx, val); - result.fp = val; - } - - void setFloatRegDouble(const StaticInst *si, int idx, double val) - { - int reg_idx = si->destRegIdx(idx) - TheISA::FP_Base_DepTag; - cpuXC->setFloatRegDouble(reg_idx, val); - result.dbl = val; - } - - void setFloatRegInt(const StaticInst *si, int idx, uint64_t val) - { - int reg_idx = si->destRegIdx(idx) - TheISA::FP_Base_DepTag; - cpuXC->setFloatRegInt(reg_idx, val); - result.integer = val; - } - - uint64_t readPC() { return cpuXC->readPC(); } - void setNextPC(uint64_t val) { - cpuXC->setNextPC(val); - } - - MiscReg readMiscReg(int misc_reg) - { - return cpuXC->readMiscReg(misc_reg); - } - - MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault) - { - return cpuXC->readMiscRegWithEffect(misc_reg, fault); - } - - Fault setMiscReg(int misc_reg, const MiscReg &val) - { - result.integer = val; - miscRegIdxs.push(misc_reg); - return cpuXC->setMiscReg(misc_reg, val); - } - - Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val) - { - miscRegIdxs.push(misc_reg); - return cpuXC->setMiscRegWithEffect(misc_reg, val); - } - - void recordPCChange(uint64_t val) { changedPC = true; } - void recordNextPCChange(uint64_t val) { changedNextPC = true; } - - bool translateInstReq(MemReqPtr &req); - void translateDataWriteReq(MemReqPtr &req); - void translateDataReadReq(MemReqPtr &req); - -#if FULL_SYSTEM - Fault hwrei() { return cpuXC->hwrei(); } - int readIntrFlag() { return cpuXC->readIntrFlag(); } - void setIntrFlag(int val) { cpuXC->setIntrFlag(val); } - bool inPalMode() { return cpuXC->inPalMode(); } - void ev5_trap(Fault fault) { fault->invoke(xcProxy); } - bool simPalCheck(int palFunc) { return cpuXC->simPalCheck(palFunc); } -#else - // Assume that the normal CPU's call to syscall was successful. - // The checker's state would have already been updated by the syscall. - void syscall() { } -#endif - - void handleError() - { - if (exitOnError) - panic("Checker found error!"); - } - bool checkFlags(MemReqPtr &req); - - ExecContext *xcBase() { return xcProxy; } - CPUExecContext *cpuXCBase() { return cpuXC; } - - Result unverifiedResult; - MemReqPtr unverifiedReq; - - bool changedPC; - bool willChangePC; - uint64_t newPC; - bool changedNextPC; - bool exitOnError; - - InstSeqNum youngestSN; -}; - -template -class Checker : public CheckerCPU -{ - public: - Checker(Params *p) - : CheckerCPU(p) - { } - - void switchOut(Sampler *s); - void takeOverFrom(BaseCPU *oldCPU); - - void tick(DynInstPtr &inst); - - void validateInst(DynInstPtr &inst); - void validateExecution(DynInstPtr &inst); - void validateState(); - - std::list instList; - typedef typename std::list::iterator InstListIt; - void dumpInsts(); -}; - -#endif // __CPU_CHECKER_CPU_HH__ diff --git a/cpu/checker/cpu_builder.cc b/cpu/checker/cpu_builder.cc deleted file mode 100644 index 397ccab14..000000000 --- a/cpu/checker/cpu_builder.cc +++ /dev/null @@ -1,126 +0,0 @@ - -#include - -#include "cpu/checker/cpu.hh" -#include "cpu/inst_seq.hh" -#include "cpu/ozone/dyn_inst.hh" -#include "cpu/ozone/ozone_impl.hh" -#include "mem/base_mem.hh" -#include "sim/builder.hh" -#include "sim/process.hh" -#include "sim/sim_object.hh" - -class OzoneChecker : public Checker > > -{ - public: - OzoneChecker(Params *p) - : Checker > >(p) - { } -}; - -//////////////////////////////////////////////////////////////////////// -// -// CheckerCPU Simulation Object -// -BEGIN_DECLARE_SIM_OBJECT_PARAMS(OzoneChecker) - - Param max_insts_any_thread; - Param max_insts_all_threads; - Param max_loads_any_thread; - Param max_loads_all_threads; - -#if FULL_SYSTEM - SimObjectParam itb; - SimObjectParam dtb; - SimObjectParam mem; - SimObjectParam system; - Param cpu_id; - Param profile; -#else - SimObjectParam workload; -#endif // FULL_SYSTEM - Param clock; - SimObjectParam icache; - SimObjectParam dcache; - - Param defer_registration; - Param exitOnError; - Param function_trace; - Param function_trace_start; - -END_DECLARE_SIM_OBJECT_PARAMS(OzoneChecker) - -BEGIN_INIT_SIM_OBJECT_PARAMS(OzoneChecker) - - INIT_PARAM(max_insts_any_thread, - "terminate when any thread reaches this inst count"), - INIT_PARAM(max_insts_all_threads, - "terminate when all threads have reached this inst count"), - INIT_PARAM(max_loads_any_thread, - "terminate when any thread reaches this load count"), - INIT_PARAM(max_loads_all_threads, - "terminate when all threads have reached this load count"), - -#if FULL_SYSTEM - INIT_PARAM(itb, "Instruction TLB"), - INIT_PARAM(dtb, "Data TLB"), - INIT_PARAM(mem, "memory"), - INIT_PARAM(system, "system object"), - INIT_PARAM(cpu_id, "processor ID"), - INIT_PARAM(profile, ""), -#else - INIT_PARAM(workload, "processes to run"), -#endif // FULL_SYSTEM - - INIT_PARAM(clock, "clock speed"), - INIT_PARAM(icache, "L1 instruction cache object"), - INIT_PARAM(dcache, "L1 data cache object"), - - INIT_PARAM(defer_registration, "defer system registration (for sampling)"), - INIT_PARAM(exitOnError, "exit on error"), - INIT_PARAM(function_trace, "Enable function trace"), - INIT_PARAM(function_trace_start, "Cycle to start function trace") - -END_INIT_SIM_OBJECT_PARAMS(OzoneChecker) - - -CREATE_SIM_OBJECT(OzoneChecker) -{ - OzoneChecker::Params *params = new OzoneChecker::Params(); - params->name = getInstanceName(); - params->numberOfThreads = 1; - params->max_insts_any_thread = 0; - params->max_insts_all_threads = 0; - params->max_loads_any_thread = 0; - params->max_loads_all_threads = 0; - params->exitOnError = exitOnError; - params->deferRegistration = defer_registration; - params->functionTrace = function_trace; - params->functionTraceStart = function_trace_start; - params->clock = clock; - // Hack to touch all parameters. Consider not deriving Checker - // from BaseCPU..it's not really a CPU in the end. - Counter temp; - temp = max_insts_any_thread; - temp = max_insts_all_threads; - temp = max_loads_any_thread; - temp = max_loads_all_threads; - BaseMem *cache = icache; - cache = dcache; - -#if FULL_SYSTEM - params->itb = itb; - params->dtb = dtb; - params->mem = mem; - params->system = system; - params->cpu_id = cpu_id; - params->profile = profile; -#else - params->process = workload; -#endif - - OzoneChecker *cpu = new OzoneChecker(params); - return cpu; -} - -REGISTER_SIM_OBJECT("OzoneChecker", OzoneChecker) diff --git a/cpu/checker/exec_context.hh b/cpu/checker/exec_context.hh deleted file mode 100644 index 38784867d..000000000 --- a/cpu/checker/exec_context.hh +++ /dev/null @@ -1,259 +0,0 @@ -/* - * Copyright (c) 2006 The Regents of The University of Michigan - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __CPU_CHECKER_EXEC_CONTEXT_HH__ -#define __CPU_CHECKER_EXEC_CONTEXT_HH__ - -#include "cpu/checker/cpu.hh" -#include "cpu/cpu_exec_context.hh" -#include "cpu/exec_context.hh" - -class EndQuiesceEvent; -namespace Kernel { - class Statistics; -}; - -template -class CheckerExecContext : public ExecContext -{ - public: - CheckerExecContext(XC *actual_xc, - CheckerCPU *checker_cpu) - : actualXC(actual_xc), checkerXC(checker_cpu->cpuXC), - checkerCPU(checker_cpu) - { } - - private: - XC *actualXC; - CPUExecContext *checkerXC; - CheckerCPU *checkerCPU; - - public: - - BaseCPU *getCpuPtr() { return actualXC->getCpuPtr(); } - - void setCpuId(int id) - { - actualXC->setCpuId(id); - checkerXC->setCpuId(id); - } - - int readCpuId() { return actualXC->readCpuId(); } - - FunctionalMemory *getMemPtr() { return actualXC->getMemPtr(); } - -#if FULL_SYSTEM - System *getSystemPtr() { return actualXC->getSystemPtr(); } - - PhysicalMemory *getPhysMemPtr() { return actualXC->getPhysMemPtr(); } - - AlphaITB *getITBPtr() { return actualXC->getITBPtr(); } - - AlphaDTB *getDTBPtr() { return actualXC->getDTBPtr(); } - - Kernel::Statistics *getKernelStats() { return actualXC->getKernelStats(); } -#else - Process *getProcessPtr() { return actualXC->getProcessPtr(); } -#endif - - Status status() const { return actualXC->status(); } - - void setStatus(Status new_status) - { - actualXC->setStatus(new_status); - checkerXC->setStatus(new_status); - } - - /// Set the status to Active. Optional delay indicates number of - /// cycles to wait before beginning execution. - void activate(int delay = 1) { actualXC->activate(delay); } - - /// Set the status to Suspended. - void suspend() { actualXC->suspend(); } - - /// Set the status to Unallocated. - void deallocate() { actualXC->deallocate(); } - - /// Set the status to Halted. - void halt() { actualXC->halt(); } - -#if FULL_SYSTEM - void dumpFuncProfile() { actualXC->dumpFuncProfile(); } -#endif - - void takeOverFrom(ExecContext *oldContext) - { - actualXC->takeOverFrom(oldContext); - checkerXC->takeOverFrom(oldContext); - } - - void regStats(const std::string &name) { actualXC->regStats(name); } - - void serialize(std::ostream &os) { actualXC->serialize(os); } - void unserialize(Checkpoint *cp, const std::string §ion) - { actualXC->unserialize(cp, section); } - -#if FULL_SYSTEM - EndQuiesceEvent *getQuiesceEvent() { return actualXC->getQuiesceEvent(); } - - Tick readLastActivate() { return actualXC->readLastActivate(); } - Tick readLastSuspend() { return actualXC->readLastSuspend(); } - - void profileClear() { return actualXC->profileClear(); } - void profileSample() { return actualXC->profileSample(); } -#endif - - int getThreadNum() { return actualXC->getThreadNum(); } - - // @todo: Do I need this? - MachInst getInst() { return actualXC->getInst(); } - - // @todo: Do I need this? - void copyArchRegs(ExecContext *xc) - { - actualXC->copyArchRegs(xc); - checkerXC->copyArchRegs(xc); - } - - void clearArchRegs() - { - actualXC->clearArchRegs(); - checkerXC->clearArchRegs(); - } - - // - // New accessors for new decoder. - // - uint64_t readIntReg(int reg_idx) - { return actualXC->readIntReg(reg_idx); } - - float readFloatRegSingle(int reg_idx) - { return actualXC->readFloatRegSingle(reg_idx); } - - double readFloatRegDouble(int reg_idx) - { return actualXC->readFloatRegDouble(reg_idx); } - - uint64_t readFloatRegInt(int reg_idx) - { return actualXC->readFloatRegInt(reg_idx); } - - void setIntReg(int reg_idx, uint64_t val) - { - actualXC->setIntReg(reg_idx, val); - checkerXC->setIntReg(reg_idx, val); - } - - void setFloatRegSingle(int reg_idx, float val) - { - actualXC->setFloatRegSingle(reg_idx, val); - checkerXC->setFloatRegSingle(reg_idx, val); - } - - void setFloatRegDouble(int reg_idx, double val) - { - actualXC->setFloatRegDouble(reg_idx, val); - checkerXC->setFloatRegSingle(reg_idx, val); - } - - void setFloatRegInt(int reg_idx, uint64_t val) - { - actualXC->setFloatRegInt(reg_idx, val); - checkerXC->setFloatRegInt(reg_idx, val); - } - - uint64_t readPC() { return actualXC->readPC(); } - - void setPC(uint64_t val) - { - actualXC->setPC(val); - checkerXC->setPC(val); - checkerCPU->recordPCChange(val); - } - - uint64_t readNextPC() { return actualXC->readNextPC(); } - - void setNextPC(uint64_t val) - { - actualXC->setNextPC(val); - checkerXC->setNextPC(val); - checkerCPU->recordNextPCChange(val); - } - - MiscReg readMiscReg(int misc_reg) - { return actualXC->readMiscReg(misc_reg); } - - MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault) - { return actualXC->readMiscRegWithEffect(misc_reg, fault); } - - Fault setMiscReg(int misc_reg, const MiscReg &val) - { - checkerXC->setMiscReg(misc_reg, val); - return actualXC->setMiscReg(misc_reg, val); - } - - Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val) - { - checkerXC->setMiscRegWithEffect(misc_reg, val); - return actualXC->setMiscRegWithEffect(misc_reg, val); - } - - unsigned readStCondFailures() - { return actualXC->readStCondFailures(); } - - void setStCondFailures(unsigned sc_failures) - { - checkerXC->setStCondFailures(sc_failures); - actualXC->setStCondFailures(sc_failures); - } -#if FULL_SYSTEM - bool inPalMode() { return actualXC->inPalMode(); } -#endif - - // @todo: Fix this! - bool misspeculating() { return actualXC->misspeculating(); } - -#if !FULL_SYSTEM - IntReg getSyscallArg(int i) { return actualXC->getSyscallArg(i); } - - // used to shift args for indirect syscall - void setSyscallArg(int i, IntReg val) - { - checkerXC->setSyscallArg(i, val); - actualXC->setSyscallArg(i, val); - } - - void setSyscallReturn(SyscallReturn return_value) - { - checkerXC->setSyscallReturn(return_value); - actualXC->setSyscallReturn(return_value); - } - - Counter readFuncExeInst() { return actualXC->readFuncExeInst(); } -#endif -}; - -#endif // __CPU_CHECKER_EXEC_CONTEXT_HH__ diff --git a/cpu/checker/o3_cpu_builder.cc b/cpu/checker/o3_cpu_builder.cc deleted file mode 100644 index 125bfa398..000000000 --- a/cpu/checker/o3_cpu_builder.cc +++ /dev/null @@ -1,126 +0,0 @@ - -#include - -#include "cpu/checker/cpu.hh" -#include "cpu/inst_seq.hh" -#include "cpu/o3/alpha_dyn_inst.hh" -#include "cpu/o3/alpha_impl.hh" -#include "mem/base_mem.hh" -#include "sim/builder.hh" -#include "sim/process.hh" -#include "sim/sim_object.hh" - -class O3Checker : public Checker > > -{ - public: - O3Checker(Params *p) - : Checker > >(p) - { } -}; - -//////////////////////////////////////////////////////////////////////// -// -// CheckerCPU Simulation Object -// -BEGIN_DECLARE_SIM_OBJECT_PARAMS(O3Checker) - - Param max_insts_any_thread; - Param max_insts_all_threads; - Param max_loads_any_thread; - Param max_loads_all_threads; - -#if FULL_SYSTEM - SimObjectParam itb; - SimObjectParam dtb; - SimObjectParam mem; - SimObjectParam system; - Param cpu_id; - Param profile; -#else - SimObjectParam workload; -#endif // FULL_SYSTEM - Param clock; - SimObjectParam icache; - SimObjectParam dcache; - - Param defer_registration; - Param exitOnError; - Param function_trace; - Param function_trace_start; - -END_DECLARE_SIM_OBJECT_PARAMS(O3Checker) - -BEGIN_INIT_SIM_OBJECT_PARAMS(O3Checker) - - INIT_PARAM(max_insts_any_thread, - "terminate when any thread reaches this inst count"), - INIT_PARAM(max_insts_all_threads, - "terminate when all threads have reached this inst count"), - INIT_PARAM(max_loads_any_thread, - "terminate when any thread reaches this load count"), - INIT_PARAM(max_loads_all_threads, - "terminate when all threads have reached this load count"), - -#if FULL_SYSTEM - INIT_PARAM(itb, "Instruction TLB"), - INIT_PARAM(dtb, "Data TLB"), - INIT_PARAM(mem, "memory"), - INIT_PARAM(system, "system object"), - INIT_PARAM(cpu_id, "processor ID"), - INIT_PARAM(profile, ""), -#else - INIT_PARAM(workload, "processes to run"), -#endif // FULL_SYSTEM - - INIT_PARAM(clock, "clock speed"), - INIT_PARAM(icache, "L1 instruction cache object"), - INIT_PARAM(dcache, "L1 data cache object"), - - INIT_PARAM(defer_registration, "defer system registration (for sampling)"), - INIT_PARAM(exitOnError, "exit on error"), - INIT_PARAM(function_trace, "Enable function trace"), - INIT_PARAM(function_trace_start, "Cycle to start function trace") - -END_INIT_SIM_OBJECT_PARAMS(O3Checker) - - -CREATE_SIM_OBJECT(O3Checker) -{ - O3Checker::Params *params = new O3Checker::Params(); - params->name = getInstanceName(); - params->numberOfThreads = 1; - params->max_insts_any_thread = 0; - params->max_insts_all_threads = 0; - params->max_loads_any_thread = 0; - params->max_loads_all_threads = 0; - params->exitOnError = exitOnError; - params->deferRegistration = defer_registration; - params->functionTrace = function_trace; - params->functionTraceStart = function_trace_start; - params->clock = clock; - // Hack to touch all parameters. Consider not deriving Checker - // from BaseCPU..it's not really a CPU in the end. - Counter temp; - temp = max_insts_any_thread; - temp = max_insts_all_threads; - temp = max_loads_any_thread; - temp = max_loads_all_threads; - BaseMem *cache = icache; - cache = dcache; - -#if FULL_SYSTEM - params->itb = itb; - params->dtb = dtb; - params->mem = mem; - params->system = system; - params->cpu_id = cpu_id; - params->profile = profile; -#else - params->process = workload; -#endif - - O3Checker *cpu = new O3Checker(params); - return cpu; -} - -REGISTER_SIM_OBJECT("O3Checker", O3Checker) diff --git a/cpu/o3/dep_graph.hh b/cpu/o3/dep_graph.hh deleted file mode 100644 index f8ae38da4..000000000 --- a/cpu/o3/dep_graph.hh +++ /dev/null @@ -1,213 +0,0 @@ - -#ifndef __CPU_O3_DEP_GRAPH_HH__ -#define __CPU_O3_DEP_GRAPH_HH__ - -#include "cpu/o3/comm.hh" - -template -class DependencyEntry -{ - public: - DependencyEntry() - : inst(NULL), next(NULL) - { } - - DynInstPtr inst; - //Might want to include data about what arch. register the - //dependence is waiting on. - DependencyEntry *next; -}; - -template -class DependencyGraph -{ - public: - typedef DependencyEntry DepEntry; - - DependencyGraph() - : numEntries(0), memAllocCounter(0), nodesTraversed(0), nodesRemoved(0) - { } - - void resize(int num_entries); - - void reset(); - - void insert(PhysRegIndex idx, DynInstPtr &new_inst); - - void setInst(PhysRegIndex idx, DynInstPtr &new_inst) - { dependGraph[idx].inst = new_inst; } - - void clearInst(PhysRegIndex idx) - { dependGraph[idx].inst = NULL; } - - void remove(PhysRegIndex idx, DynInstPtr &inst_to_remove); - - DynInstPtr pop(PhysRegIndex idx); - - bool empty(PhysRegIndex idx) { return !dependGraph[idx].next; } - - /** Debugging function to dump out the dependency graph. - */ - void dump(); - - private: - /** Array of linked lists. Each linked list is a list of all the - * instructions that depend upon a given register. The actual - * register's index is used to index into the graph; ie all - * instructions in flight that are dependent upon r34 will be - * in the linked list of dependGraph[34]. - */ - DepEntry *dependGraph; - - int numEntries; - - // Debug variable, remove when done testing. - unsigned memAllocCounter; - - public: - uint64_t nodesTraversed; - uint64_t nodesRemoved; -}; - -template -void -DependencyGraph::resize(int num_entries) -{ - numEntries = num_entries; - dependGraph = new DepEntry[numEntries]; -} - -template -void -DependencyGraph::reset() -{ - // Clear the dependency graph - DepEntry *curr; - DepEntry *prev; - - for (int i = 0; i < numEntries; ++i) { - curr = dependGraph[i].next; - - while (curr) { - memAllocCounter--; - - prev = curr; - curr = prev->next; - prev->inst = NULL; - - delete prev; - } - - if (dependGraph[i].inst) { - dependGraph[i].inst = NULL; - } - - dependGraph[i].next = NULL; - } -} - -template -void -DependencyGraph::insert(PhysRegIndex idx, DynInstPtr &new_inst) -{ - //Add this new, dependent instruction at the head of the dependency - //chain. - - // First create the entry that will be added to the head of the - // dependency chain. - DepEntry *new_entry = new DepEntry; - new_entry->next = dependGraph[idx].next; - new_entry->inst = new_inst; - - // Then actually add it to the chain. - dependGraph[idx].next = new_entry; - - ++memAllocCounter; -} - - -template -void -DependencyGraph::remove(PhysRegIndex idx, - DynInstPtr &inst_to_remove) -{ - DepEntry *prev = &dependGraph[idx]; - DepEntry *curr = dependGraph[idx].next; - - // Make sure curr isn't NULL. Because this instruction is being - // removed from a dependency list, it must have been placed there at - // an earlier time. The dependency chain should not be empty, - // unless the instruction dependent upon it is already ready. - if (curr == NULL) { - return; - } - - nodesRemoved++; - - // Find the instruction to remove within the dependency linked list. - while (curr->inst != inst_to_remove) { - prev = curr; - curr = curr->next; - nodesTraversed++; - - assert(curr != NULL); - } - - // Now remove this instruction from the list. - prev->next = curr->next; - - --memAllocCounter; - - // Could push this off to the destructor of DependencyEntry - curr->inst = NULL; - - delete curr; -} - -template -DynInstPtr -DependencyGraph::pop(PhysRegIndex idx) -{ - DepEntry *node; - node = dependGraph[idx].next; - DynInstPtr inst = NULL; - if (node) { - inst = node->inst; - dependGraph[idx].next = node->next; - node->inst = NULL; - memAllocCounter--; - delete node; - } - return inst; -} - -template -void -DependencyGraph::dump() -{ - DepEntry *curr; - - for (int i = 0; i < numEntries; ++i) - { - curr = &dependGraph[i]; - - if (curr->inst) { - cprintf("dependGraph[%i]: producer: %#x [sn:%lli] consumer: ", - i, curr->inst->readPC(), curr->inst->seqNum); - } else { - cprintf("dependGraph[%i]: No producer. consumer: ", i); - } - - while (curr->next != NULL) { - curr = curr->next; - - cprintf("%#x [sn:%lli] ", - curr->inst->readPC(), curr->inst->seqNum); - } - - cprintf("\n"); - } - cprintf("memAllocCounter: %i\n", memAllocCounter); -} - -#endif // __CPU_O3_DEP_GRAPH_HH__ diff --git a/cpu/o3/fu_pool.cc b/cpu/o3/fu_pool.cc deleted file mode 100644 index fb2b5c00d..000000000 --- a/cpu/o3/fu_pool.cc +++ /dev/null @@ -1,295 +0,0 @@ -/* - * Copyright (c) 2002-2005 The Regents of The University of Michigan - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include - -#include "cpu/o3/fu_pool.hh" -#include "encumbered/cpu/full/fu_pool.hh" -#include "sim/builder.hh" - -using namespace std; - -//////////////////////////////////////////////////////////////////////////// -// -// A pool of function units -// - -inline void -FUPool::FUIdxQueue::addFU(int fu_idx) -{ - funcUnitsIdx.push_back(fu_idx); - ++size; -} - -inline int -FUPool::FUIdxQueue::getFU() -{ - int retval = funcUnitsIdx[idx++]; - - if (idx == size) - idx = 0; - - return retval; -} - -FUPool::~FUPool() -{ - fuListIterator i = funcUnits.begin(); - fuListIterator end = funcUnits.end(); - for (; i != end; ++i) - delete *i; -} - - -// Constructor -FUPool::FUPool(string name, vector paramList) - : SimObject(name) -{ - numFU = 0; - - funcUnits.clear(); - - for (int i = 0; i < Num_OpClasses; ++i) { - maxOpLatencies[i] = 0; - maxIssueLatencies[i] = 0; - } - - // - // Iterate through the list of FUDescData structures - // - for (FUDDiterator i = paramList.begin(); i != paramList.end(); ++i) { - - // - // Don't bother with this if we're not going to create any FU's - // - if ((*i)->number) { - // - // Create the FuncUnit object from this structure - // - add the capabilities listed in the FU's operation - // description - // - // We create the first unit, then duplicate it as needed - // - FuncUnit *fu = new FuncUnit; - - OPDDiterator j = (*i)->opDescList.begin(); - OPDDiterator end = (*i)->opDescList.end(); - for (; j != end; ++j) { - // indicate that this pool has this capability - capabilityList.set((*j)->opClass); - - // Add each of the FU's that will have this capability to the - // appropriate queue. - for (int k = 0; k < (*i)->number; ++k) - fuPerCapList[(*j)->opClass].addFU(numFU + k); - - // indicate that this FU has the capability - fu->addCapability((*j)->opClass, (*j)->opLat, (*j)->issueLat); - - if ((*j)->opLat > maxOpLatencies[(*j)->opClass]) - maxOpLatencies[(*j)->opClass] = (*j)->opLat; - - if ((*j)->issueLat > maxIssueLatencies[(*j)->opClass]) - maxIssueLatencies[(*j)->opClass] = (*j)->issueLat; - } - - numFU++; - - // Add the appropriate number of copies of this FU to the list - ostringstream s; - - s << (*i)->name() << "(0)"; - fu->name = s.str(); - funcUnits.push_back(fu); - - for (int c = 1; c < (*i)->number; ++c) { - ostringstream s; - numFU++; - FuncUnit *fu2 = new FuncUnit(*fu); - - s << (*i)->name() << "(" << c << ")"; - fu2->name = s.str(); - funcUnits.push_back(fu2); - } - } - } - - unitBusy.resize(numFU); - - for (int i = 0; i < numFU; i++) { - unitBusy[i] = false; - } -} - -void -FUPool::annotateMemoryUnits(unsigned hit_latency) -{ - maxOpLatencies[MemReadOp] = hit_latency; - - fuListIterator i = funcUnits.begin(); - fuListIterator iend = funcUnits.end(); - for (; i != iend; ++i) { - if ((*i)->provides(MemReadOp)) - (*i)->opLatency(MemReadOp) = hit_latency; - - if ((*i)->provides(MemWriteOp)) - (*i)->opLatency(MemWriteOp) = hit_latency; - } -} - -int -FUPool::getUnit(OpClass capability) -{ - // If this pool doesn't have the specified capability, - // return this information to the caller - if (!capabilityList[capability]) - return -2; - - int fu_idx = fuPerCapList[capability].getFU(); - int start_idx = fu_idx; - - // Iterate through the circular queue if needed, stopping if we've reached - // the first element again. - while (unitBusy[fu_idx]) { - fu_idx = fuPerCapList[capability].getFU(); - if (fu_idx == start_idx) { - // No FU available - return -1; - } - } - - unitBusy[fu_idx] = true; - - return fu_idx; -} - -void -FUPool::freeUnitNextCycle(int fu_idx) -{ - assert(unitBusy[fu_idx]); - unitsToBeFreed.push_back(fu_idx); -} - -void -FUPool::processFreeUnits() -{ - while (!unitsToBeFreed.empty()) { - int fu_idx = unitsToBeFreed.back(); - unitsToBeFreed.pop_back(); - - assert(unitBusy[fu_idx]); - - unitBusy[fu_idx] = false; - } -} - -void -FUPool::dump() -{ - cout << "Function Unit Pool (" << name() << ")\n"; - cout << "======================================\n"; - cout << "Free List:\n"; - - for (int i = 0; i < numFU; ++i) { - if (unitBusy[i]) { - continue; - } - - cout << " [" << i << "] : "; - - cout << funcUnits[i]->name << " "; - - cout << "\n"; - } - - cout << "======================================\n"; - cout << "Busy List:\n"; - for (int i = 0; i < numFU; ++i) { - if (!unitBusy[i]) { - continue; - } - - cout << " [" << i << "] : "; - - cout << funcUnits[i]->name << " "; - - cout << "\n"; - } -} - -void -FUPool::switchOut() -{ -} - -void -FUPool::takeOverFrom() -{ - for (int i = 0; i < numFU; i++) { - unitBusy[i] = false; - } - unitsToBeFreed.clear(); -} - -// - -//////////////////////////////////////////////////////////////////////////// -// -// The SimObjects we use to get the FU information into the simulator -// -//////////////////////////////////////////////////////////////////////////// - -// -// FUPool - Contails a list of FUDesc objects to make available -// - -// -// The FuPool object -// - -BEGIN_DECLARE_SIM_OBJECT_PARAMS(FUPool) - - SimObjectVectorParam FUList; - -END_DECLARE_SIM_OBJECT_PARAMS(FUPool) - - -BEGIN_INIT_SIM_OBJECT_PARAMS(FUPool) - - INIT_PARAM(FUList, "list of FU's for this pool") - -END_INIT_SIM_OBJECT_PARAMS(FUPool) - - -CREATE_SIM_OBJECT(FUPool) -{ - return new FUPool(getInstanceName(), FUList); -} - -REGISTER_SIM_OBJECT("FUPool", FUPool) - diff --git a/cpu/o3/fu_pool.hh b/cpu/o3/fu_pool.hh deleted file mode 100644 index da6fdc802..000000000 --- a/cpu/o3/fu_pool.hh +++ /dev/null @@ -1,162 +0,0 @@ -/* - * Copyright (c) 2002-2005 The Regents of The University of Michigan - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __CPU_O3_FU_POOL_HH__ -#define __CPU_O3_FU_POOL_HH__ - -#include -#include -#include -#include - -#include "base/sched_list.hh" -#include "encumbered/cpu/full/op_class.hh" -#include "sim/sim_object.hh" - -class FUDesc; -class FuncUnit; - -/** - * Pool of FU's, specific to the new CPU model. The old FU pool had lists of - * free units and busy units, and whenever a FU was needed it would iterate - * through the free units to find a FU that provided the capability. This pool - * has lists of units specific to each of the capabilities, and whenever a FU - * is needed, it iterates through that list to find a free unit. The previous - * FU pool would have to be ticked each cycle to update which units became - * free. This FU pool lets the IEW stage handle freeing units, which frees - * them as their scheduled execution events complete. This limits units in this - * model to either have identical issue and op latencies, or 1 cycle issue - * latencies. - */ -class FUPool : public SimObject -{ - private: - /** Maximum op execution latencies, per op class. */ - unsigned maxOpLatencies[Num_OpClasses]; - /** Maximum issue latencies, per op class. */ - unsigned maxIssueLatencies[Num_OpClasses]; - - /** Bitvector listing capabilities of this FU pool. */ - std::bitset capabilityList; - - /** Bitvector listing which FUs are busy. */ - std::vector unitBusy; - - /** List of units to be freed at the end of this cycle. */ - std::vector unitsToBeFreed; - - /** - * Class that implements a circular queue to hold FU indices. The hope is - * that FUs that have been just used will be moved to the end of the queue - * by iterating through it, thus leaving free units at the head of the - * queue. - */ - class FUIdxQueue { - public: - /** Constructs a circular queue of FU indices. */ - FUIdxQueue() - : idx(0), size(0) - { } - - /** Adds a FU to the queue. */ - inline void addFU(int fu_idx); - - /** Returns the index of the FU at the head of the queue, and changes - * the index to the next element. - */ - inline int getFU(); - - private: - /** Circular queue index. */ - int idx; - - /** Size of the queue. */ - int size; - - /** Queue of FU indices. */ - std::vector funcUnitsIdx; - }; - - /** Per op class queues of FUs that provide that capability. */ - FUIdxQueue fuPerCapList[Num_OpClasses]; - - /** Number of FUs. */ - int numFU; - - /** Functional units. */ - std::vector funcUnits; - - typedef std::vector::iterator fuListIterator; - - public: - - /** Constructs a FU pool. */ - FUPool(std::string name, std::vector l); - ~FUPool(); - - /** Annotates units that provide memory operations. Included only because - * old FU pool provided this function. - */ - void annotateMemoryUnits(unsigned hit_latency); - - /** - * Gets a FU providing the requested capability. Will mark the unit as busy, - * but leaves the freeing of the unit up to the IEW stage. - * @param capability The capability requested. - * @return Returns -2 if the FU pool does not have the capability, -1 if - * there is no free FU, and the FU's index otherwise. - */ - int getUnit(OpClass capability); - - /** Frees a FU at the end of this cycle. */ - void freeUnitNextCycle(int fu_idx); - - /** Frees all FUs on the list. */ - void processFreeUnits(); - - /** Returns the total number of FUs. */ - int size() { return numFU; } - - /** Debugging function used to dump FU information. */ - void dump(); - - /** Returns the operation execution latency of the given capability. */ - unsigned getOpLatency(OpClass capability) { - return maxOpLatencies[capability]; - } - - /** Returns the issue latency of the given capability. */ - unsigned getIssueLatency(OpClass capability) { - return maxIssueLatencies[capability]; - } - - void switchOut(); - void takeOverFrom(); -}; - -#endif // __CPU_O3_FU_POOL_HH__ diff --git a/cpu/o3/lsq.cc b/cpu/o3/lsq.cc deleted file mode 100644 index 8991ab8f8..000000000 --- a/cpu/o3/lsq.cc +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Regents of The University of Michigan - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "cpu/o3/alpha_dyn_inst.hh" -#include "cpu/o3/alpha_cpu.hh" -#include "cpu/o3/alpha_impl.hh" -#include "cpu/o3/lsq_impl.hh" - -// Force the instantiation of LDSTQ for all the implementations we care about. -template class LSQ; - diff --git a/cpu/o3/lsq.hh b/cpu/o3/lsq.hh deleted file mode 100644 index a1eeccbe7..000000000 --- a/cpu/o3/lsq.hh +++ /dev/null @@ -1,324 +0,0 @@ -/* - * Copyright (c) 2004-2006 The Regents of The University of Michigan - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __CPU_O3_LSQ_HH__ -#define __CPU_O3_LSQ_HH__ - -#include -#include - -#include "config/full_system.hh" -#include "cpu/inst_seq.hh" -//#include "cpu/o3/cpu_policy.hh" -#include "cpu/o3/lsq_unit.hh" -#include "mem/mem_interface.hh" -//#include "mem/page_table.hh" -#include "sim/sim_object.hh" - -template -class LSQ { - public: - typedef typename Impl::Params Params; - typedef typename Impl::FullCPU FullCPU; - typedef typename Impl::DynInstPtr DynInstPtr; - typedef typename Impl::CPUPol::IEW IEW; - typedef typename Impl::CPUPol::LSQUnit LSQUnit; - - enum LSQPolicy { - Dynamic, - Partitioned, - Threshold - }; - - /** Constructs an LSQ with the given parameters. */ - LSQ(Params *params); - - /** Returns the name of the LSQ. */ - std::string name() const; - - /** Sets the pointer to the list of active threads. */ - void setActiveThreads(std::list *at_ptr); - /** Sets the CPU pointer. */ - void setCPU(FullCPU *cpu_ptr); - /** Sets the IEW stage pointer. */ - void setIEW(IEW *iew_ptr); - /** Sets the page table pointer. */ -// void setPageTable(PageTable *pt_ptr); - - void switchOut(); - void takeOverFrom(); - - /** Number of entries needed for the given amount of threads.*/ - int entryAmount(int num_threads); - void removeEntries(unsigned tid); - /** Reset the max entries for each thread. */ - void resetEntries(); - /** Resize the max entries for a thread. */ - void resizeEntries(unsigned size, unsigned tid); - - /** Ticks the LSQ. */ - void tick(); - /** Ticks a specific LSQ Unit. */ - void tick(unsigned tid) - { thread[tid].tick(); } - - /** Inserts a load into the LSQ. */ - void insertLoad(DynInstPtr &load_inst); - /** Inserts a store into the LSQ. */ - void insertStore(DynInstPtr &store_inst); - - /** Executes a load. */ - Fault executeLoad(DynInstPtr &inst); - - Fault executeLoad(int lq_idx, unsigned tid) - { return thread[tid].executeLoad(lq_idx); } - - /** Executes a store. */ - Fault executeStore(DynInstPtr &inst); - - /** - * Commits loads up until the given sequence number for a specific thread. - */ - void commitLoads(InstSeqNum &youngest_inst, unsigned tid) - { thread[tid].commitLoads(youngest_inst); } - - /** - * Commits stores up until the given sequence number for a specific thread. - */ - void commitStores(InstSeqNum &youngest_inst, unsigned tid) - { thread[tid].commitStores(youngest_inst); } - - /** - * Attempts to write back stores until all cache ports are used or the - * interface becomes blocked. - */ - void writebackStores(); - /** Same as above, but only for one thread. */ - void writebackStores(unsigned tid); - - /** - * Squash instructions from a thread until the specified sequence number. - */ - void squash(const InstSeqNum &squashed_num, unsigned tid) - { thread[tid].squash(squashed_num); } - - /** Returns whether or not there was a memory ordering violation. */ - bool violation(); - /** - * Returns whether or not there was a memory ordering violation for a - * specific thread. - */ - bool violation(unsigned tid) - { return thread[tid].violation(); } - - /** Returns if a load is blocked due to the memory system for a specific - * thread. - */ - bool loadBlocked(unsigned tid) - { return thread[tid].loadBlocked(); } - - bool isLoadBlockedHandled(unsigned tid) - { return thread[tid].isLoadBlockedHandled(); } - - void setLoadBlockedHandled(unsigned tid) - { thread[tid].setLoadBlockedHandled(); } - - /** Gets the instruction that caused the memory ordering violation. */ - DynInstPtr getMemDepViolator(unsigned tid) - { return thread[tid].getMemDepViolator(); } - - /** Returns the head index of the load queue for a specific thread. */ - int getLoadHead(unsigned tid) - { return thread[tid].getLoadHead(); } - - /** Returns the sequence number of the head of the load queue. */ - InstSeqNum getLoadHeadSeqNum(unsigned tid) - { - return thread[tid].getLoadHeadSeqNum(); - } - - /** Returns the head index of the store queue. */ - int getStoreHead(unsigned tid) - { return thread[tid].getStoreHead(); } - - /** Returns the sequence number of the head of the store queue. */ - InstSeqNum getStoreHeadSeqNum(unsigned tid) - { - return thread[tid].getStoreHeadSeqNum(); - } - - /** Returns the number of instructions in all of the queues. */ - int getCount(); - /** Returns the number of instructions in the queues of one thread. */ - int getCount(unsigned tid) - { return thread[tid].getCount(); } - - /** Returns the total number of loads in the load queue. */ - int numLoads(); - /** Returns the total number of loads for a single thread. */ - int numLoads(unsigned tid) - { return thread[tid].numLoads(); } - - /** Returns the total number of stores in the store queue. */ - int numStores(); - /** Returns the total number of stores for a single thread. */ - int numStores(unsigned tid) - { return thread[tid].numStores(); } - - /** Returns the total number of loads that are ready. */ - int numLoadsReady(); - /** Returns the number of loads that are ready for a single thread. */ - int numLoadsReady(unsigned tid) - { return thread[tid].numLoadsReady(); } - - /** Returns the number of free entries. */ - unsigned numFreeEntries(); - /** Returns the number of free entries for a specific thread. */ - unsigned numFreeEntries(unsigned tid); - - /** Returns if the LSQ is full (either LQ or SQ is full). */ - bool isFull(); - /** - * Returns if the LSQ is full for a specific thread (either LQ or SQ is - * full). - */ - bool isFull(unsigned tid); - - /** Returns if any of the LQs are full. */ - bool lqFull(); - /** Returns if the LQ of a given thread is full. */ - bool lqFull(unsigned tid); - - /** Returns if any of the SQs are full. */ - bool sqFull(); - /** Returns if the SQ of a given thread is full. */ - bool sqFull(unsigned tid); - - /** - * Returns if the LSQ is stalled due to a memory operation that must be - * replayed. - */ - bool isStalled(); - /** - * Returns if the LSQ of a specific thread is stalled due to a memory - * operation that must be replayed. - */ - bool isStalled(unsigned tid); - - /** Returns whether or not there are any stores to write back to memory. */ - bool hasStoresToWB(); - - /** Returns whether or not a specific thread has any stores to write back - * to memory. - */ - bool hasStoresToWB(unsigned tid) - { return thread[tid].hasStoresToWB(); } - - /** Returns the number of stores a specific thread has to write back. */ - int numStoresToWB(unsigned tid) - { return thread[tid].numStoresToWB(); } - - /** Returns if the LSQ will write back to memory this cycle. */ - bool willWB(); - /** Returns if the LSQ of a specific thread will write back to memory this - * cycle. - */ - bool willWB(unsigned tid) - { return thread[tid].willWB(); } - - /** Debugging function to print out all instructions. */ - void dumpInsts(); - /** Debugging function to print out instructions from a specific thread. */ - void dumpInsts(unsigned tid) - { thread[tid].dumpInsts(); } - - /** Executes a read operation, using the load specified at the load index. */ - template - Fault read(MemReqPtr &req, T &data, int load_idx); - - /** Executes a store operation, using the store specified at the store - * index. - */ - template - Fault write(MemReqPtr &req, T &data, int store_idx); - - private: - /** The LSQ policy for SMT mode. */ - LSQPolicy lsqPolicy; - - /** The LSQ units for individual threads. */ - LSQUnit thread[Impl::MaxThreads]; - - /** The CPU pointer. */ - FullCPU *cpu; - - /** The IEW stage pointer. */ - IEW *iewStage; - - /** The pointer to the page table. */ -// PageTable *pTable; - - /** List of Active Threads in System. */ - std::list *activeThreads; - - /** Total Size of LQ Entries. */ - unsigned LQEntries; - /** Total Size of SQ Entries. */ - unsigned SQEntries; - - /** Max LQ Size - Used to Enforce Sharing Policies. */ - unsigned maxLQEntries; - - /** Max SQ Size - Used to Enforce Sharing Policies. */ - unsigned maxSQEntries; - - /** Number of Threads. */ - unsigned numThreads; -}; - -template -template -Fault -LSQ::read(MemReqPtr &req, T &data, int load_idx) -{ - unsigned tid = req->thread_num; - - return thread[tid].read(req, data, load_idx); -} - -template -template -Fault -LSQ::write(MemReqPtr &req, T &data, int store_idx) -{ - unsigned tid = req->thread_num; - - return thread[tid].write(req, data, store_idx); -} - -#endif // __CPU_O3_LSQ_HH__ diff --git a/cpu/o3/lsq_impl.hh b/cpu/o3/lsq_impl.hh deleted file mode 100644 index a6ad27522..000000000 --- a/cpu/o3/lsq_impl.hh +++ /dev/null @@ -1,538 +0,0 @@ -/* - * Copyright (c) 2004-2006 The Regents of The University of Michigan - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include - -#include "cpu/o3/lsq.hh" - -using namespace std; - -template -LSQ::LSQ(Params *params) - : LQEntries(params->LQEntries), SQEntries(params->SQEntries), - numThreads(params->numberOfThreads) -{ - DPRINTF(LSQ, "Creating LSQ object.\n"); - - //**********************************************/ - //************ Handle SMT Parameters ***********/ - //**********************************************/ - string policy = params->smtLSQPolicy; - - //Convert string to lowercase - std::transform(policy.begin(), policy.end(), policy.begin(), - (int(*)(int)) tolower); - - //Figure out fetch policy - if (policy == "dynamic") { - lsqPolicy = Dynamic; - - maxLQEntries = LQEntries; - maxSQEntries = SQEntries; - - DPRINTF(LSQ, "LSQ sharing policy set to Dynamic\n"); - - } else if (policy == "partitioned") { - lsqPolicy = Partitioned; - - //@todo:make work if part_amt doesnt divide evenly. - maxLQEntries = LQEntries / numThreads; - maxSQEntries = SQEntries / numThreads; - - DPRINTF(Fetch, "LSQ sharing policy set to Partitioned: " - "%i entries per LQ | %i entries per SQ", - maxLQEntries,maxSQEntries); - - } else if (policy == "threshold") { - lsqPolicy = Threshold; - - assert(params->smtLSQThreshold > LQEntries); - assert(params->smtLSQThreshold > SQEntries); - - //Divide up by threshold amount - //@todo: Should threads check the max and the total - //amount of the LSQ - maxLQEntries = params->smtLSQThreshold; - maxSQEntries = params->smtLSQThreshold; - - DPRINTF(LSQ, "LSQ sharing policy set to Threshold: " - "%i entries per LQ | %i entries per SQ", - maxLQEntries,maxSQEntries); - - } else { - assert(0 && "Invalid LSQ Sharing Policy.Options Are:{Dynamic," - "Partitioned, Threshold}"); - } - - //Initialize LSQs - for (int tid=0; tid < numThreads; tid++) { - thread[tid].init(params, maxLQEntries, maxSQEntries, tid); - } -} - - -template -std::string -LSQ::name() const -{ - return iewStage->name() + ".lsq"; -} - -template -void -LSQ::setActiveThreads(list *at_ptr) -{ - activeThreads = at_ptr; - assert(activeThreads != 0); -} - -template -void -LSQ::setCPU(FullCPU *cpu_ptr) -{ - cpu = cpu_ptr; - - for (int tid=0; tid < numThreads; tid++) { - thread[tid].setCPU(cpu_ptr); - } -} - -template -void -LSQ::setIEW(IEW *iew_ptr) -{ - iewStage = iew_ptr; - - for (int tid=0; tid < numThreads; tid++) { - thread[tid].setIEW(iew_ptr); - } -} - -#if 0 -template -void -LSQ::setPageTable(PageTable *pt_ptr) -{ - for (int tid=0; tid < numThreads; tid++) { - thread[tid].setPageTable(pt_ptr); - } -} -#endif - -template -void -LSQ::switchOut() -{ - for (int tid = 0; tid < numThreads; tid++) { - thread[tid].switchOut(); - } -} - -template -void -LSQ::takeOverFrom() -{ - for (int tid = 0; tid < numThreads; tid++) { - thread[tid].takeOverFrom(); - } -} - -template -int -LSQ::entryAmount(int num_threads) -{ - if (lsqPolicy == Partitioned) { - return LQEntries / num_threads; - } else { - return 0; - } -} - -template -void -LSQ::resetEntries() -{ - if (lsqPolicy != Dynamic || numThreads > 1) { - int active_threads = (*activeThreads).size(); - - list::iterator threads = (*activeThreads).begin(); - list::iterator list_end = (*activeThreads).end(); - - int maxEntries; - - if (lsqPolicy == Partitioned) { - maxEntries = LQEntries / active_threads; - } else if (lsqPolicy == Threshold && active_threads == 1) { - maxEntries = LQEntries; - } else { - maxEntries = LQEntries; - } - - while (threads != list_end) { - resizeEntries(maxEntries,*threads++); - } - } -} - -template -void -LSQ::removeEntries(unsigned tid) -{ - thread[tid].clearLQ(); - thread[tid].clearSQ(); -} - -template -void -LSQ::resizeEntries(unsigned size,unsigned tid) -{ - thread[tid].resizeLQ(size); - thread[tid].resizeSQ(size); -} - -template -void -LSQ::tick() -{ - list::iterator active_threads = (*activeThreads).begin(); - - while (active_threads != (*activeThreads).end()) { - unsigned tid = *active_threads++; - - thread[tid].tick(); - } -} - -template -void -LSQ::insertLoad(DynInstPtr &load_inst) -{ - unsigned tid = load_inst->threadNumber; - - thread[tid].insertLoad(load_inst); -} - -template -void -LSQ::insertStore(DynInstPtr &store_inst) -{ - unsigned tid = store_inst->threadNumber; - - thread[tid].insertStore(store_inst); -} - -template -Fault -LSQ::executeLoad(DynInstPtr &inst) -{ - unsigned tid = inst->threadNumber; - - return thread[tid].executeLoad(inst); -} - -template -Fault -LSQ::executeStore(DynInstPtr &inst) -{ - unsigned tid = inst->threadNumber; - - return thread[tid].executeStore(inst); -} - -template -void -LSQ::writebackStores() -{ - list::iterator active_threads = (*activeThreads).begin(); - - while (active_threads != (*activeThreads).end()) { - unsigned tid = *active_threads++; - - if (numStoresToWB(tid) > 0) { - DPRINTF(Writeback,"[tid:%i] Writing back stores. %i stores " - "available for Writeback.\n", tid, numStoresToWB(tid)); - } - - thread[tid].writebackStores(); - } -} - -template -bool -LSQ::violation() -{ - /* Answers: Does Anybody Have a Violation?*/ - list::iterator active_threads = (*activeThreads).begin(); - - while (active_threads != (*activeThreads).end()) { - unsigned tid = *active_threads++; - if (thread[tid].violation()) - return true; - } - - return false; -} - -template -int -LSQ::getCount() -{ - unsigned total = 0; - - list::iterator active_threads = (*activeThreads).begin(); - - while (active_threads != (*activeThreads).end()) { - unsigned tid = *active_threads++; - total += getCount(tid); - } - - return total; -} - -template -int -LSQ::numLoads() -{ - unsigned total = 0; - - list::iterator active_threads = (*activeThreads).begin(); - - while (active_threads != (*activeThreads).end()) { - unsigned tid = *active_threads++; - total += numLoads(tid); - } - - return total; -} - -template -int -LSQ::numStores() -{ - unsigned total = 0; - - list::iterator active_threads = (*activeThreads).begin(); - - while (active_threads != (*activeThreads).end()) { - unsigned tid = *active_threads++; - total += thread[tid].numStores(); - } - - return total; -} - -template -int -LSQ::numLoadsReady() -{ - unsigned total = 0; - - list::iterator active_threads = (*activeThreads).begin(); - - while (active_threads != (*activeThreads).end()) { - unsigned tid = *active_threads++; - total += thread[tid].numLoadsReady(); - } - - return total; -} - -template -unsigned -LSQ::numFreeEntries() -{ - unsigned total = 0; - - list::iterator active_threads = (*activeThreads).begin(); - - while (active_threads != (*activeThreads).end()) { - unsigned tid = *active_threads++; - total += thread[tid].numFreeEntries(); - } - - return total; -} - -template -unsigned -LSQ::numFreeEntries(unsigned tid) -{ - //if( lsqPolicy == Dynamic ) - //return numFreeEntries(); - //else - return thread[tid].numFreeEntries(); -} - -template -bool -LSQ::isFull() -{ - list::iterator active_threads = (*activeThreads).begin(); - - while (active_threads != (*activeThreads).end()) { - unsigned tid = *active_threads++; - if (! (thread[tid].lqFull() || thread[tid].sqFull()) ) - return false; - } - - return true; -} - -template -bool -LSQ::isFull(unsigned tid) -{ - //@todo: Change to Calculate All Entries for - //Dynamic Policy - if( lsqPolicy == Dynamic ) - return isFull(); - else - return thread[tid].lqFull() || thread[tid].sqFull(); -} - -template -bool -LSQ::lqFull() -{ - list::iterator active_threads = (*activeThreads).begin(); - - while (active_threads != (*activeThreads).end()) { - unsigned tid = *active_threads++; - if (!thread[tid].lqFull()) - return false; - } - - return true; -} - -template -bool -LSQ::lqFull(unsigned tid) -{ - //@todo: Change to Calculate All Entries for - //Dynamic Policy - if( lsqPolicy == Dynamic ) - return lqFull(); - else - return thread[tid].lqFull(); -} - -template -bool -LSQ::sqFull() -{ - list::iterator active_threads = (*activeThreads).begin(); - - while (active_threads != (*activeThreads).end()) { - unsigned tid = *active_threads++; - if (!sqFull(tid)) - return false; - } - - return true; -} - -template -bool -LSQ::sqFull(unsigned tid) -{ - //@todo: Change to Calculate All Entries for - //Dynamic Policy - if( lsqPolicy == Dynamic ) - return sqFull(); - else - return thread[tid].sqFull(); -} - -template -bool -LSQ::isStalled() -{ - list::iterator active_threads = (*activeThreads).begin(); - - while (active_threads != (*activeThreads).end()) { - unsigned tid = *active_threads++; - if (!thread[tid].isStalled()) - return false; - } - - return true; -} - -template -bool -LSQ::isStalled(unsigned tid) -{ - if( lsqPolicy == Dynamic ) - return isStalled(); - else - return thread[tid].isStalled(); -} - -template -bool -LSQ::hasStoresToWB() -{ - list::iterator active_threads = (*activeThreads).begin(); - - while (active_threads != (*activeThreads).end()) { - unsigned tid = *active_threads++; - if (!hasStoresToWB(tid)) - return false; - } - - return true; -} - -template -bool -LSQ::willWB() -{ - list::iterator active_threads = (*activeThreads).begin(); - - while (active_threads != (*activeThreads).end()) { - unsigned tid = *active_threads++; - if (!willWB(tid)) - return false; - } - - return true; -} - -template -void -LSQ::dumpInsts() -{ - list::iterator active_threads = (*activeThreads).begin(); - - while (active_threads != (*activeThreads).end()) { - unsigned tid = *active_threads++; - thread[tid].dumpInsts(); - } -} diff --git a/cpu/o3/lsq_unit.cc b/cpu/o3/lsq_unit.cc deleted file mode 100644 index dd29007bc..000000000 --- a/cpu/o3/lsq_unit.cc +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Regents of The University of Michigan - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "cpu/o3/alpha_dyn_inst.hh" -#include "cpu/o3/alpha_cpu.hh" -#include "cpu/o3/alpha_impl.hh" -#include "cpu/o3/lsq_unit_impl.hh" - -// Force the instantiation of LDSTQ for all the implementations we care about. -template class LSQUnit; - diff --git a/cpu/o3/lsq_unit.hh b/cpu/o3/lsq_unit.hh deleted file mode 100644 index 942b4583d..000000000 --- a/cpu/o3/lsq_unit.hh +++ /dev/null @@ -1,632 +0,0 @@ -/* - * Copyright (c) 2004-2006 The Regents of The University of Michigan - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __CPU_O3_LSQ_UNIT_HH__ -#define __CPU_O3_LSQ_UNIT_HH__ - -#include -#include -#include - -#include "arch/faults.hh" -#include "config/full_system.hh" -#include "base/hashmap.hh" -#include "cpu/inst_seq.hh" -#include "mem/mem_interface.hh" -//#include "mem/page_table.hh" -//#include "sim/debug.hh" -//#include "sim/sim_object.hh" - -/** - * Class that implements the actual LQ and SQ for each specific - * thread. Both are circular queues; load entries are freed upon - * committing, while store entries are freed once they writeback. The - * LSQUnit tracks if there are memory ordering violations, and also - * detects partial load to store forwarding cases (a store only has - * part of a load's data) that requires the load to wait until the - * store writes back. In the former case it holds onto the instruction - * until the dependence unit looks at it, and in the latter it stalls - * the LSQ until the store writes back. At that point the load is - * replayed. - */ -template -class LSQUnit { - protected: - typedef TheISA::IntReg IntReg; - public: - typedef typename Impl::Params Params; - typedef typename Impl::FullCPU FullCPU; - typedef typename Impl::DynInstPtr DynInstPtr; - typedef typename Impl::CPUPol::IEW IEW; - typedef typename Impl::CPUPol::IssueStruct IssueStruct; - - private: - class StoreCompletionEvent : public Event { - public: - /** Constructs a store completion event. */ - StoreCompletionEvent(int store_idx, Event *wb_event, LSQUnit *lsq_ptr); - - /** Processes the store completion event. */ - void process(); - - /** Returns the description of this event. */ - const char *description(); - - /** The writeback event for the store. Needed for store - * conditionals. - */ - Event *wbEvent; - - private: - /** The store index of the store being written back. */ - int storeIdx; - private: - /** The pointer to the LSQ unit that issued the store. */ - LSQUnit *lsqPtr; - }; - - public: - /** Constructs an LSQ unit. init() must be called prior to use. */ - LSQUnit(); - - /** Initializes the LSQ unit with the specified number of entries. */ - void init(Params *params, unsigned maxLQEntries, - unsigned maxSQEntries, unsigned id); - - /** Returns the name of the LSQ unit. */ - std::string name() const; - - /** Sets the CPU pointer. */ - void setCPU(FullCPU *cpu_ptr) - { cpu = cpu_ptr; } - - /** Sets the IEW stage pointer. */ - void setIEW(IEW *iew_ptr) - { iewStage = iew_ptr; } - - /** Sets the page table pointer. */ -// void setPageTable(PageTable *pt_ptr); - - void switchOut(); - - void takeOverFrom(); - - bool isSwitchedOut() { return switchedOut; } - - /** Ticks the LSQ unit, which in this case only resets the number of - * used cache ports. - * @todo: Move the number of used ports up to the LSQ level so it can - * be shared by all LSQ units. - */ - void tick() { usedPorts = 0; } - - /** Inserts an instruction. */ - void insert(DynInstPtr &inst); - /** Inserts a load instruction. */ - void insertLoad(DynInstPtr &load_inst); - /** Inserts a store instruction. */ - void insertStore(DynInstPtr &store_inst); - - /** Executes a load instruction. */ - Fault executeLoad(DynInstPtr &inst); - - Fault executeLoad(int lq_idx) { panic("Not implemented"); return NoFault; } - /** Executes a store instruction. */ - Fault executeStore(DynInstPtr &inst); - - /** Commits the head load. */ - void commitLoad(); - /** Commits loads older than a specific sequence number. */ - void commitLoads(InstSeqNum &youngest_inst); - - /** Commits stores older than a specific sequence number. */ - void commitStores(InstSeqNum &youngest_inst); - - /** Writes back stores. */ - void writebackStores(); - - // @todo: Include stats in the LSQ unit. - //void regStats(); - - /** Clears all the entries in the LQ. */ - void clearLQ(); - - /** Clears all the entries in the SQ. */ - void clearSQ(); - - /** Resizes the LQ to a given size. */ - void resizeLQ(unsigned size); - - /** Resizes the SQ to a given size. */ - void resizeSQ(unsigned size); - - /** Squashes all instructions younger than a specific sequence number. */ - void squash(const InstSeqNum &squashed_num); - - /** Returns if there is a memory ordering violation. Value is reset upon - * call to getMemDepViolator(). - */ - bool violation() { return memDepViolator; } - - /** Returns the memory ordering violator. */ - DynInstPtr getMemDepViolator(); - - /** Returns if a load became blocked due to the memory system. */ - bool loadBlocked() - { return isLoadBlocked; } - - void clearLoadBlocked() - { isLoadBlocked = false; } - - bool isLoadBlockedHandled() - { return loadBlockedHandled; } - - void setLoadBlockedHandled() - { loadBlockedHandled = true; } - - /** Returns the number of free entries (min of free LQ and SQ entries). */ - unsigned numFreeEntries(); - - /** Returns the number of loads ready to execute. */ - int numLoadsReady(); - - /** Returns the number of loads in the LQ. */ - int numLoads() { return loads; } - - /** Returns the number of stores in the SQ. */ - int numStores() { return stores; } - - /** Returns if either the LQ or SQ is full. */ - bool isFull() { return lqFull() || sqFull(); } - - /** Returns if the LQ is full. */ - bool lqFull() { return loads >= (LQEntries - 1); } - - /** Returns if the SQ is full. */ - bool sqFull() { return stores >= (SQEntries - 1); } - - /** Returns the number of instructions in the LSQ. */ - unsigned getCount() { return loads + stores; } - - /** Returns if there are any stores to writeback. */ - bool hasStoresToWB() { return storesToWB; } - - /** Returns the number of stores to writeback. */ - int numStoresToWB() { return storesToWB; } - - /** Returns if the LSQ unit will writeback on this cycle. */ - bool willWB() { return storeQueue[storeWBIdx].canWB && - !storeQueue[storeWBIdx].completed && - !dcacheInterface->isBlocked(); } - - private: - /** Completes the store at the specified index. */ - void completeStore(int store_idx); - - /** Increments the given store index (circular queue). */ - inline void incrStIdx(int &store_idx); - /** Decrements the given store index (circular queue). */ - inline void decrStIdx(int &store_idx); - /** Increments the given load index (circular queue). */ - inline void incrLdIdx(int &load_idx); - /** Decrements the given load index (circular queue). */ - inline void decrLdIdx(int &load_idx); - - public: - /** Debugging function to dump instructions in the LSQ. */ - void dumpInsts(); - - private: - /** Pointer to the CPU. */ - FullCPU *cpu; - - /** Pointer to the IEW stage. */ - IEW *iewStage; - - /** Pointer to the D-cache. */ - MemInterface *dcacheInterface; - - /** Pointer to the page table. */ -// PageTable *pTable; - - public: - struct SQEntry { - /** Constructs an empty store queue entry. */ - SQEntry() - : inst(NULL), req(NULL), size(0), data(0), - canWB(0), committed(0), completed(0) - { } - - /** Constructs a store queue entry for a given instruction. */ - SQEntry(DynInstPtr &_inst) - : inst(_inst), req(NULL), size(0), data(0), - canWB(0), committed(0), completed(0) - { } - - /** The store instruction. */ - DynInstPtr inst; - /** The memory request for the store. */ - MemReqPtr req; - /** The size of the store. */ - int size; - /** The store data. */ - IntReg data; - /** Whether or not the store can writeback. */ - bool canWB; - /** Whether or not the store is committed. */ - bool committed; - /** Whether or not the store is completed. */ - bool completed; - }; - - private: - /** The LSQUnit thread id. */ - unsigned lsqID; - - /** The store queue. */ - std::vector storeQueue; - - /** The load queue. */ - std::vector loadQueue; - - /** The number of LQ entries, plus a sentinel entry (circular queue). - * @todo: Consider having var that records the true number of LQ entries. - */ - unsigned LQEntries; - /** The number of SQ entries, plus a sentinel entry (circular queue). - * @todo: Consider having var that records the true number of SQ entries. - */ - unsigned SQEntries; - - /** The number of load instructions in the LQ. */ - int loads; - /** The number of store instructions in the SQ. */ - int stores; - /** The number of store instructions in the SQ waiting to writeback. */ - int storesToWB; - - /** The index of the head instruction in the LQ. */ - int loadHead; - /** The index of the tail instruction in the LQ. */ - int loadTail; - - /** The index of the head instruction in the SQ. */ - int storeHead; - /** The index of the first instruction that may be ready to be - * written back, and has not yet been written back. - */ - int storeWBIdx; - /** The index of the tail instruction in the SQ. */ - int storeTail; - - /// @todo Consider moving to a more advanced model with write vs read ports - /** The number of cache ports available each cycle. */ - int cachePorts; - - /** The number of used cache ports in this cycle. */ - int usedPorts; - - bool switchedOut; - - //list mshrSeqNums; - - /** Wire to read information from the issue stage time queue. */ - typename TimeBuffer::wire fromIssue; - - /** Whether or not the LSQ is stalled. */ - bool stalled; - /** The store that causes the stall due to partial store to load - * forwarding. - */ - InstSeqNum stallingStoreIsn; - /** The index of the above store. */ - int stallingLoadIdx; - - /** Whether or not a load is blocked due to the memory system. */ - bool isLoadBlocked; - - bool loadBlockedHandled; - - InstSeqNum blockedLoadSeqNum; - - /** The oldest load that caused a memory ordering violation. */ - DynInstPtr memDepViolator; - - // Will also need how many read/write ports the Dcache has. Or keep track - // of that in stage that is one level up, and only call executeLoad/Store - // the appropriate number of times. -/* - // total number of loads forwaded from LSQ stores - Stats::Vector<> lsq_forw_loads; - - // total number of loads ignored due to invalid addresses - Stats::Vector<> inv_addr_loads; - - // total number of software prefetches ignored due to invalid addresses - Stats::Vector<> inv_addr_swpfs; - - // total non-speculative bogus addresses seen (debug var) - Counter sim_invalid_addrs; - Stats::Vector<> fu_busy; //cumulative fu busy - - // ready loads blocked due to memory disambiguation - Stats::Vector<> lsq_blocked_loads; - - Stats::Scalar<> lsqInversion; -*/ - public: - /** Executes the load at the given index. */ - template - Fault read(MemReqPtr &req, T &data, int load_idx); - - /** Executes the store at the given index. */ - template - Fault write(MemReqPtr &req, T &data, int store_idx); - - /** Returns the index of the head load instruction. */ - int getLoadHead() { return loadHead; } - /** Returns the sequence number of the head load instruction. */ - InstSeqNum getLoadHeadSeqNum() - { - if (loadQueue[loadHead]) { - return loadQueue[loadHead]->seqNum; - } else { - return 0; - } - - } - - /** Returns the index of the head store instruction. */ - int getStoreHead() { return storeHead; } - /** Returns the sequence number of the head store instruction. */ - InstSeqNum getStoreHeadSeqNum() - { - if (storeQueue[storeHead].inst) { - return storeQueue[storeHead].inst->seqNum; - } else { - return 0; - } - - } - - /** Returns whether or not the LSQ unit is stalled. */ - bool isStalled() { return stalled; } -}; - -template -template -Fault -LSQUnit::read(MemReqPtr &req, T &data, int load_idx) -{ - assert(loadQueue[load_idx]); - - assert(!loadQueue[load_idx]->isExecuted()); - - // Make sure this isn't an uncacheable access - // A bit of a hackish way to get uncached accesses to work only if they're - // at the head of the LSQ and are ready to commit (at the head of the ROB - // too). - if (req->flags & UNCACHEABLE && - (load_idx != loadHead || !loadQueue[load_idx]->reachedCommit)) { - iewStage->rescheduleMemInst(loadQueue[load_idx]); - return TheISA::genMachineCheckFault(); - } - - // Check the SQ for any previous stores that might lead to forwarding - int store_idx = loadQueue[load_idx]->sqIdx; - - int store_size = 0; - - DPRINTF(LSQUnit, "Read called, load idx: %i, store idx: %i, " - "storeHead: %i addr: %#x\n", - load_idx, store_idx, storeHead, req->paddr); - -#if 0 - if (req->flags & LOCKED) { - cpu->lockAddr = req->paddr; - cpu->lockFlag = true; - } -#endif - req->cmd = Read; - assert(!req->completionEvent); - req->completionEvent = NULL; - req->time = curTick; - - while (store_idx != -1) { - // End once we've reached the top of the LSQ - if (store_idx == storeWBIdx) { - break; - } - - // Move the index to one younger - if (--store_idx < 0) - store_idx += SQEntries; - - assert(storeQueue[store_idx].inst); - - store_size = storeQueue[store_idx].size; - - if (store_size == 0) - continue; - - // Check if the store data is within the lower and upper bounds of - // addresses that the request needs. - bool store_has_lower_limit = - req->vaddr >= storeQueue[store_idx].inst->effAddr; - bool store_has_upper_limit = - (req->vaddr + req->size) <= (storeQueue[store_idx].inst->effAddr + - store_size); - bool lower_load_has_store_part = - req->vaddr < (storeQueue[store_idx].inst->effAddr + - store_size); - bool upper_load_has_store_part = - (req->vaddr + req->size) > storeQueue[store_idx].inst->effAddr; - - // If the store's data has all of the data needed, we can forward. - if (store_has_lower_limit && store_has_upper_limit) { - // Get shift amount for offset into the store's data. - int shift_amt = req->vaddr & (store_size - 1); - // @todo: Magic number, assumes byte addressing - shift_amt = shift_amt << 3; - - // Cast this to type T? - data = storeQueue[store_idx].data >> shift_amt; - - assert(!req->data); - req->data = new uint8_t[64]; - - memcpy(req->data, &data, req->size); - - DPRINTF(LSQUnit, "Forwarding from store idx %i to load to " - "addr %#x, data %#x\n", - store_idx, req->vaddr, *(req->data)); - - typename IEW::LdWritebackEvent *wb = - new typename IEW::LdWritebackEvent(loadQueue[load_idx], - iewStage); - - // We'll say this has a 1 cycle load-store forwarding latency - // for now. - // @todo: Need to make this a parameter. - wb->schedule(curTick); - - // Should keep track of stat for forwarded data - return NoFault; - } else if ((store_has_lower_limit && lower_load_has_store_part) || - (store_has_upper_limit && upper_load_has_store_part) || - (lower_load_has_store_part && upper_load_has_store_part)) { - // This is the partial store-load forwarding case where a store - // has only part of the load's data. - - // If it's already been written back, then don't worry about - // stalling on it. - if (storeQueue[store_idx].completed) { - continue; - } - - // Must stall load and force it to retry, so long as it's the oldest - // load that needs to do so. - if (!stalled || - (stalled && - loadQueue[load_idx]->seqNum < - loadQueue[stallingLoadIdx]->seqNum)) { - stalled = true; - stallingStoreIsn = storeQueue[store_idx].inst->seqNum; - stallingLoadIdx = load_idx; - } - - // Tell IQ/mem dep unit that this instruction will need to be - // rescheduled eventually - iewStage->rescheduleMemInst(loadQueue[load_idx]); - - // Do not generate a writeback event as this instruction is not - // complete. - DPRINTF(LSQUnit, "Load-store forwarding mis-match. " - "Store idx %i to load addr %#x\n", - store_idx, req->vaddr); - - return NoFault; - } - } - - // If there's no forwarding case, then go access memory - DynInstPtr inst = loadQueue[load_idx]; - - DPRINTF(LSQUnit, "Doing functional access for inst [sn:%lli] PC %#x\n", - loadQueue[load_idx]->seqNum, loadQueue[load_idx]->readPC()); - - assert(!req->data); - req->data = new uint8_t[64]; - Fault fault = cpu->read(req, data); - memcpy(req->data, &data, sizeof(T)); - - ++usedPorts; - - // if we have a cache, do cache access too - if (fault == NoFault && dcacheInterface) { - if (dcacheInterface->isBlocked()) { - // There's an older load that's already going to squash. - if (isLoadBlocked && blockedLoadSeqNum < inst->seqNum) - return NoFault; - - // Record that the load was blocked due to memory. This - // load will squash all instructions after it, be - // refetched, and re-executed. - isLoadBlocked = true; - loadBlockedHandled = false; - blockedLoadSeqNum = inst->seqNum; - // No fault occurred, even though the interface is blocked. - return NoFault; - } - - DPRINTF(LSQUnit, "Doing timing access for inst PC %#x\n", - loadQueue[load_idx]->readPC()); - - assert(!req->completionEvent); - req->completionEvent = - new typename IEW::LdWritebackEvent(loadQueue[load_idx], iewStage); - MemAccessResult result = dcacheInterface->access(req); - - assert(dcacheInterface->doEvents()); - - if (result != MA_HIT) { - DPRINTF(LSQUnit, "LSQUnit: D-cache miss!\n"); - DPRINTF(Activity, "Activity: ld accessing mem miss [sn:%lli]\n", - inst->seqNum); - } else { - DPRINTF(LSQUnit, "LSQUnit: D-cache hit!\n"); - DPRINTF(Activity, "Activity: ld accessing mem hit [sn:%lli]\n", - inst->seqNum); - } - } - - return fault; -} - -template -template -Fault -LSQUnit::write(MemReqPtr &req, T &data, int store_idx) -{ - assert(storeQueue[store_idx].inst); - - DPRINTF(LSQUnit, "Doing write to store idx %i, addr %#x data %#x" - " | storeHead:%i [sn:%i]\n", - store_idx, req->paddr, data, storeHead, - storeQueue[store_idx].inst->seqNum); - - storeQueue[store_idx].req = req; - storeQueue[store_idx].size = sizeof(T); - storeQueue[store_idx].data = data; - - // This function only writes the data to the store queue, so no fault - // can happen here. - return NoFault; -} - -#endif // __CPU_O3_LSQ_UNIT_HH__ diff --git a/cpu/o3/lsq_unit_impl.hh b/cpu/o3/lsq_unit_impl.hh deleted file mode 100644 index 7974ddaad..000000000 --- a/cpu/o3/lsq_unit_impl.hh +++ /dev/null @@ -1,873 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Regents of The University of Michigan - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "cpu/checker/cpu.hh" -#include "cpu/o3/lsq_unit.hh" -#include "base/str.hh" - -template -LSQUnit::StoreCompletionEvent::StoreCompletionEvent(int store_idx, - Event *wb_event, - LSQUnit *lsq_ptr) - : Event(&mainEventQueue), - wbEvent(wb_event), - storeIdx(store_idx), - lsqPtr(lsq_ptr) -{ - this->setFlags(Event::AutoDelete); -} - -template -void -LSQUnit::StoreCompletionEvent::process() -{ - DPRINTF(LSQ, "Cache miss complete for store idx:%i\n", storeIdx); - DPRINTF(Activity, "Activity: st writeback event idx:%i\n", storeIdx); - - //lsqPtr->removeMSHR(lsqPtr->storeQueue[storeIdx].inst->seqNum); - - if (lsqPtr->isSwitchedOut()) - return; - - lsqPtr->cpu->wakeCPU(); - if (wbEvent) - wbEvent->process(); - lsqPtr->completeStore(storeIdx); -} - -template -const char * -LSQUnit::StoreCompletionEvent::description() -{ - return "LSQ store completion event"; -} - -template -LSQUnit::LSQUnit() - : loads(0), stores(0), storesToWB(0), stalled(false), isLoadBlocked(false), - loadBlockedHandled(false) -{ -} - -template -void -LSQUnit::init(Params *params, unsigned maxLQEntries, - unsigned maxSQEntries, unsigned id) - -{ - DPRINTF(LSQUnit, "Creating LSQUnit%i object.\n",id); - - switchedOut = false; - - lsqID = id; - - // Add 1 for the sentinel entry (they are circular queues). - LQEntries = maxLQEntries + 1; - SQEntries = maxSQEntries + 1; - - loadQueue.resize(LQEntries); - storeQueue.resize(SQEntries); - - loadHead = loadTail = 0; - - storeHead = storeWBIdx = storeTail = 0; - - usedPorts = 0; - cachePorts = params->cachePorts; - - dcacheInterface = params->dcacheInterface; - - memDepViolator = NULL; - - blockedLoadSeqNum = 0; -} - -template -std::string -LSQUnit::name() const -{ - if (Impl::MaxThreads == 1) { - return iewStage->name() + ".lsq"; - } else { - return iewStage->name() + ".lsq.thread." + to_string(lsqID); - } -} - -template -void -LSQUnit::clearLQ() -{ - loadQueue.clear(); -} - -template -void -LSQUnit::clearSQ() -{ - storeQueue.clear(); -} - -#if 0 -template -void -LSQUnit::setPageTable(PageTable *pt_ptr) -{ - DPRINTF(LSQUnit, "Setting the page table pointer.\n"); - pTable = pt_ptr; -} -#endif - -template -void -LSQUnit::switchOut() -{ - switchedOut = true; - for (int i = 0; i < loadQueue.size(); ++i) - loadQueue[i] = NULL; - - assert(storesToWB == 0); - - while (storesToWB > 0 && - storeWBIdx != storeTail && - storeQueue[storeWBIdx].inst && - storeQueue[storeWBIdx].canWB) { - - if (storeQueue[storeWBIdx].size == 0 || - storeQueue[storeWBIdx].inst->isDataPrefetch() || - storeQueue[storeWBIdx].committed || - storeQueue[storeWBIdx].req->flags & LOCKED) { - incrStIdx(storeWBIdx); - - continue; - } - - assert(storeQueue[storeWBIdx].req); - assert(!storeQueue[storeWBIdx].committed); - - MemReqPtr req = storeQueue[storeWBIdx].req; - storeQueue[storeWBIdx].committed = true; - - req->cmd = Write; - req->completionEvent = NULL; - req->time = curTick; - assert(!req->data); - req->data = new uint8_t[64]; - memcpy(req->data, (uint8_t *)&storeQueue[storeWBIdx].data, req->size); - - DPRINTF(LSQUnit, "D-Cache: Writing back store idx:%i PC:%#x " - "to Addr:%#x, data:%#x [sn:%lli]\n", - storeWBIdx,storeQueue[storeWBIdx].inst->readPC(), - req->paddr, *(req->data), - storeQueue[storeWBIdx].inst->seqNum); - - switch(storeQueue[storeWBIdx].size) { - case 1: - cpu->write(req, (uint8_t &)storeQueue[storeWBIdx].data); - break; - case 2: - cpu->write(req, (uint16_t &)storeQueue[storeWBIdx].data); - break; - case 4: - cpu->write(req, (uint32_t &)storeQueue[storeWBIdx].data); - break; - case 8: - cpu->write(req, (uint64_t &)storeQueue[storeWBIdx].data); - break; - default: - panic("Unexpected store size!\n"); - } - incrStIdx(storeWBIdx); - } -} - -template -void -LSQUnit::takeOverFrom() -{ - switchedOut = false; - loads = stores = storesToWB = 0; - - loadHead = loadTail = 0; - - storeHead = storeWBIdx = storeTail = 0; - - usedPorts = 0; - - memDepViolator = NULL; - - blockedLoadSeqNum = 0; - - stalled = false; - isLoadBlocked = false; - loadBlockedHandled = false; -} - -template -void -LSQUnit::resizeLQ(unsigned size) -{ - unsigned size_plus_sentinel = size + 1; - assert(size_plus_sentinel >= LQEntries); - - if (size_plus_sentinel > LQEntries) { - while (size_plus_sentinel > loadQueue.size()) { - DynInstPtr dummy; - loadQueue.push_back(dummy); - LQEntries++; - } - } else { - LQEntries = size_plus_sentinel; - } - -} - -template -void -LSQUnit::resizeSQ(unsigned size) -{ - unsigned size_plus_sentinel = size + 1; - if (size_plus_sentinel > SQEntries) { - while (size_plus_sentinel > storeQueue.size()) { - SQEntry dummy; - storeQueue.push_back(dummy); - SQEntries++; - } - } else { - SQEntries = size_plus_sentinel; - } -} - -template -void -LSQUnit::insert(DynInstPtr &inst) -{ - assert(inst->isMemRef()); - - assert(inst->isLoad() || inst->isStore()); - - if (inst->isLoad()) { - insertLoad(inst); - } else { - insertStore(inst); - } - - inst->setInLSQ(); -} - -template -void -LSQUnit::insertLoad(DynInstPtr &load_inst) -{ - assert((loadTail + 1) % LQEntries != loadHead); - assert(loads < LQEntries); - - DPRINTF(LSQUnit, "Inserting load PC %#x, idx:%i [sn:%lli]\n", - load_inst->readPC(), loadTail, load_inst->seqNum); - - load_inst->lqIdx = loadTail; - - if (stores == 0) { - load_inst->sqIdx = -1; - } else { - load_inst->sqIdx = storeTail; - } - - loadQueue[loadTail] = load_inst; - - incrLdIdx(loadTail); - - ++loads; -} - -template -void -LSQUnit::insertStore(DynInstPtr &store_inst) -{ - // Make sure it is not full before inserting an instruction. - assert((storeTail + 1) % SQEntries != storeHead); - assert(stores < SQEntries); - - DPRINTF(LSQUnit, "Inserting store PC %#x, idx:%i [sn:%lli]\n", - store_inst->readPC(), storeTail, store_inst->seqNum); - - store_inst->sqIdx = storeTail; - store_inst->lqIdx = loadTail; - - storeQueue[storeTail] = SQEntry(store_inst); - - incrStIdx(storeTail); - - ++stores; -} - -template -typename Impl::DynInstPtr -LSQUnit::getMemDepViolator() -{ - DynInstPtr temp = memDepViolator; - - memDepViolator = NULL; - - return temp; -} - -template -unsigned -LSQUnit::numFreeEntries() -{ - unsigned free_lq_entries = LQEntries - loads; - unsigned free_sq_entries = SQEntries - stores; - - // Both the LQ and SQ entries have an extra dummy entry to differentiate - // empty/full conditions. Subtract 1 from the free entries. - if (free_lq_entries < free_sq_entries) { - return free_lq_entries - 1; - } else { - return free_sq_entries - 1; - } -} - -template -int -LSQUnit::numLoadsReady() -{ - int load_idx = loadHead; - int retval = 0; - - while (load_idx != loadTail) { - assert(loadQueue[load_idx]); - - if (loadQueue[load_idx]->readyToIssue()) { - ++retval; - } - } - - return retval; -} - -template -Fault -LSQUnit::executeLoad(DynInstPtr &inst) -{ - // Execute a specific load. - Fault load_fault = NoFault; - - DPRINTF(LSQUnit, "Executing load PC %#x, [sn:%lli]\n", - inst->readPC(),inst->seqNum); - -// load_fault = inst->initiateAcc(); - load_fault = inst->execute(); - - // If the instruction faulted, then we need to send it along to commit - // without the instruction completing. - if (load_fault != NoFault) { - // Send this instruction to commit, also make sure iew stage - // realizes there is activity. - iewStage->instToCommit(inst); - iewStage->activityThisCycle(); - } - - return load_fault; -} - -template -Fault -LSQUnit::executeStore(DynInstPtr &store_inst) -{ - using namespace TheISA; - // Make sure that a store exists. - assert(stores != 0); - - int store_idx = store_inst->sqIdx; - - DPRINTF(LSQUnit, "Executing store PC %#x [sn:%lli]\n", - store_inst->readPC(), store_inst->seqNum); - - // Check the recently completed loads to see if any match this store's - // address. If so, then we have a memory ordering violation. - int load_idx = store_inst->lqIdx; - - Fault store_fault = store_inst->initiateAcc(); -// Fault store_fault = store_inst->execute(); - - if (storeQueue[store_idx].size == 0) { - DPRINTF(LSQUnit,"Fault on Store PC %#x, [sn:%lli],Size = 0\n", - store_inst->readPC(),store_inst->seqNum); - - return store_fault; - } - - assert(store_fault == NoFault); - - if (store_inst->isStoreConditional()) { - // Store conditionals need to set themselves as able to - // writeback if we haven't had a fault by here. - storeQueue[store_idx].canWB = true; - - ++storesToWB; - } - - if (!memDepViolator) { - while (load_idx != loadTail) { - // Really only need to check loads that have actually executed - // It's safe to check all loads because effAddr is set to - // InvalAddr when the dyn inst is created. - - // @todo: For now this is extra conservative, detecting a - // violation if the addresses match assuming all accesses - // are quad word accesses. - - // @todo: Fix this, magic number being used here - if ((loadQueue[load_idx]->effAddr >> 8) == - (store_inst->effAddr >> 8)) { - // A load incorrectly passed this store. Squash and refetch. - // For now return a fault to show that it was unsuccessful. - memDepViolator = loadQueue[load_idx]; - - return genMachineCheckFault(); - } - - incrLdIdx(load_idx); - } - - // If we've reached this point, there was no violation. - memDepViolator = NULL; - } - - return store_fault; -} - -template -void -LSQUnit::commitLoad() -{ - assert(loadQueue[loadHead]); - - DPRINTF(LSQUnit, "Committing head load instruction, PC %#x\n", - loadQueue[loadHead]->readPC()); - - - loadQueue[loadHead] = NULL; - - incrLdIdx(loadHead); - - --loads; -} - -template -void -LSQUnit::commitLoads(InstSeqNum &youngest_inst) -{ - assert(loads == 0 || loadQueue[loadHead]); - - while (loads != 0 && loadQueue[loadHead]->seqNum <= youngest_inst) { - commitLoad(); - } -} - -template -void -LSQUnit::commitStores(InstSeqNum &youngest_inst) -{ - assert(stores == 0 || storeQueue[storeHead].inst); - - int store_idx = storeHead; - - while (store_idx != storeTail) { - assert(storeQueue[store_idx].inst); - // Mark any stores that are now committed and have not yet - // been marked as able to write back. - if (!storeQueue[store_idx].canWB) { - if (storeQueue[store_idx].inst->seqNum > youngest_inst) { - break; - } - DPRINTF(LSQUnit, "Marking store as able to write back, PC " - "%#x [sn:%lli]\n", - storeQueue[store_idx].inst->readPC(), - storeQueue[store_idx].inst->seqNum); - - storeQueue[store_idx].canWB = true; - - ++storesToWB; - } - - incrStIdx(store_idx); - } -} - -template -void -LSQUnit::writebackStores() -{ - while (storesToWB > 0 && - storeWBIdx != storeTail && - storeQueue[storeWBIdx].inst && - storeQueue[storeWBIdx].canWB && - usedPorts < cachePorts) { - - // Store didn't write any data so no need to write it back to - // memory. - if (storeQueue[storeWBIdx].size == 0) { - completeStore(storeWBIdx); - - incrStIdx(storeWBIdx); - - continue; - } - - if (dcacheInterface && dcacheInterface->isBlocked()) { - DPRINTF(LSQUnit, "Unable to write back any more stores, cache" - " is blocked!\n"); - break; - } - - ++usedPorts; - - if (storeQueue[storeWBIdx].inst->isDataPrefetch()) { - incrStIdx(storeWBIdx); - - continue; - } - - assert(storeQueue[storeWBIdx].req); - assert(!storeQueue[storeWBIdx].committed); - - MemReqPtr req = storeQueue[storeWBIdx].req; - storeQueue[storeWBIdx].committed = true; - - req->cmd = Write; - req->completionEvent = NULL; - req->time = curTick; - assert(!req->data); - req->data = new uint8_t[64]; - memcpy(req->data, (uint8_t *)&storeQueue[storeWBIdx].data, req->size); - - DPRINTF(LSQUnit, "D-Cache: Writing back store idx:%i PC:%#x " - "to Addr:%#x, data:%#x [sn:%lli]\n", - storeWBIdx,storeQueue[storeWBIdx].inst->readPC(), - req->paddr, *(req->data), - storeQueue[storeWBIdx].inst->seqNum); - - switch(storeQueue[storeWBIdx].size) { - case 1: - cpu->write(req, (uint8_t &)storeQueue[storeWBIdx].data); - break; - case 2: - cpu->write(req, (uint16_t &)storeQueue[storeWBIdx].data); - break; - case 4: - cpu->write(req, (uint32_t &)storeQueue[storeWBIdx].data); - break; - case 8: - cpu->write(req, (uint64_t &)storeQueue[storeWBIdx].data); - break; - default: - panic("Unexpected store size!\n"); - } - - // Stores other than store conditionals are completed at this - // time. Mark them as completed and, if we have a checker, - // tell it that the instruction is completed. - // @todo: Figure out what time I can say stores are complete in - // the timing memory. - if (!(req->flags & LOCKED)) { - storeQueue[storeWBIdx].inst->setCompleted(); - if (cpu->checker) { - cpu->checker->tick(storeQueue[storeWBIdx].inst); - } - } - - if (dcacheInterface) { - assert(!req->completionEvent); - StoreCompletionEvent *store_event = new - StoreCompletionEvent(storeWBIdx, NULL, this); - req->completionEvent = store_event; - - MemAccessResult result = dcacheInterface->access(req); - - if (isStalled() && - storeQueue[storeWBIdx].inst->seqNum == stallingStoreIsn) { - DPRINTF(LSQUnit, "Unstalling, stalling store [sn:%lli] " - "load idx:%i\n", - stallingStoreIsn, stallingLoadIdx); - stalled = false; - stallingStoreIsn = 0; - iewStage->replayMemInst(loadQueue[stallingLoadIdx]); - } - - typename IEW::LdWritebackEvent *wb = NULL; - if (req->flags & LOCKED) { - // Stx_C should not generate a system port transaction - // if it misses in the cache, but that might be hard - // to accomplish without explicit cache support. - wb = new typename - IEW::LdWritebackEvent(storeQueue[storeWBIdx].inst, - iewStage); - store_event->wbEvent = wb; - } - - if (result != MA_HIT && dcacheInterface->doEvents()) { - DPRINTF(LSQUnit,"D-Cache Write Miss on idx:%i!\n", - storeWBIdx); - - DPRINTF(Activity, "Active st accessing mem miss [sn:%lli]\n", - storeQueue[storeWBIdx].inst->seqNum); - - //mshrSeqNums.push_back(storeQueue[storeWBIdx].inst->seqNum); - - //DPRINTF(LSQUnit, "Added MSHR. count = %i\n",mshrSeqNums.size()); - - // @todo: Increment stat here. - } else { - DPRINTF(LSQUnit,"D-Cache: Write Hit on idx:%i !\n", - storeWBIdx); - - DPRINTF(Activity, "Active st accessing mem hit [sn:%lli]\n", - storeQueue[storeWBIdx].inst->seqNum); - } - - incrStIdx(storeWBIdx); - } else { - panic("Must HAVE DCACHE!!!!!\n"); - } - } - - // Not sure this should set it to 0. - usedPorts = 0; - - assert(stores >= 0 && storesToWB >= 0); -} - -/*template -void -LSQUnit::removeMSHR(InstSeqNum seqNum) -{ - list::iterator mshr_it = find(mshrSeqNums.begin(), - mshrSeqNums.end(), - seqNum); - - if (mshr_it != mshrSeqNums.end()) { - mshrSeqNums.erase(mshr_it); - DPRINTF(LSQUnit, "Removing MSHR. count = %i\n",mshrSeqNums.size()); - } -}*/ - -template -void -LSQUnit::squash(const InstSeqNum &squashed_num) -{ - DPRINTF(LSQUnit, "Squashing until [sn:%lli]!" - "(Loads:%i Stores:%i)\n", squashed_num, loads, stores); - - int load_idx = loadTail; - decrLdIdx(load_idx); - - while (loads != 0 && loadQueue[load_idx]->seqNum > squashed_num) { - DPRINTF(LSQUnit,"Load Instruction PC %#x squashed, " - "[sn:%lli]\n", - loadQueue[load_idx]->readPC(), - loadQueue[load_idx]->seqNum); - - if (isStalled() && load_idx == stallingLoadIdx) { - stalled = false; - stallingStoreIsn = 0; - stallingLoadIdx = 0; - } - - // Clear the smart pointer to make sure it is decremented. - loadQueue[load_idx]->squashed = true; - loadQueue[load_idx] = NULL; - --loads; - - // Inefficient! - loadTail = load_idx; - - decrLdIdx(load_idx); - } - - if (isLoadBlocked) { - if (squashed_num < blockedLoadSeqNum) { - isLoadBlocked = false; - loadBlockedHandled = false; - blockedLoadSeqNum = 0; - } - } - - int store_idx = storeTail; - decrStIdx(store_idx); - - while (stores != 0 && - storeQueue[store_idx].inst->seqNum > squashed_num) { - // Instructions marked as can WB are already committed. - if (storeQueue[store_idx].canWB) { - break; - } - - DPRINTF(LSQUnit,"Store Instruction PC %#x squashed, " - "idx:%i [sn:%lli]\n", - storeQueue[store_idx].inst->readPC(), - store_idx, storeQueue[store_idx].inst->seqNum); - - // I don't think this can happen. It should have been cleared - // by the stalling load. - if (isStalled() && - storeQueue[store_idx].inst->seqNum == stallingStoreIsn) { - panic("Is stalled should have been cleared by stalling load!\n"); - stalled = false; - stallingStoreIsn = 0; - } - - // Clear the smart pointer to make sure it is decremented. - storeQueue[store_idx].inst->squashed = true; - storeQueue[store_idx].inst = NULL; - storeQueue[store_idx].canWB = 0; - - if (storeQueue[store_idx].req) { - // There should not be a completion event if the store has - // not yet committed. - assert(!storeQueue[store_idx].req->completionEvent); - } - - storeQueue[store_idx].req = NULL; - --stores; - - // Inefficient! - storeTail = store_idx; - - decrStIdx(store_idx); - } -} - -template -void -LSQUnit::completeStore(int store_idx) -{ - assert(storeQueue[store_idx].inst); - storeQueue[store_idx].completed = true; - --storesToWB; - // A bit conservative because a store completion may not free up entries, - // but hopefully avoids two store completions in one cycle from making - // the CPU tick twice. - cpu->activityThisCycle(); - - if (store_idx == storeHead) { - do { - incrStIdx(storeHead); - - --stores; - } while (storeQueue[storeHead].completed && - storeHead != storeTail); - - iewStage->updateLSQNextCycle = true; - } - - DPRINTF(LSQUnit, "Completing store [sn:%lli], idx:%i, store head " - "idx:%i\n", - storeQueue[store_idx].inst->seqNum, store_idx, storeHead); - - if (isStalled() && - storeQueue[store_idx].inst->seqNum == stallingStoreIsn) { - DPRINTF(LSQUnit, "Unstalling, stalling store [sn:%lli] " - "load idx:%i\n", - stallingStoreIsn, stallingLoadIdx); - stalled = false; - stallingStoreIsn = 0; - iewStage->replayMemInst(loadQueue[stallingLoadIdx]); - } - - storeQueue[store_idx].inst->setCompleted(); - - // Tell the checker we've completed this instruction. Some stores - // may get reported twice to the checker, but the checker can - // handle that case. - if (cpu->checker) { - cpu->checker->tick(storeQueue[store_idx].inst); - } -} - -template -inline void -LSQUnit::incrStIdx(int &store_idx) -{ - if (++store_idx >= SQEntries) - store_idx = 0; -} - -template -inline void -LSQUnit::decrStIdx(int &store_idx) -{ - if (--store_idx < 0) - store_idx += SQEntries; -} - -template -inline void -LSQUnit::incrLdIdx(int &load_idx) -{ - if (++load_idx >= LQEntries) - load_idx = 0; -} - -template -inline void -LSQUnit::decrLdIdx(int &load_idx) -{ - if (--load_idx < 0) - load_idx += LQEntries; -} - -template -void -LSQUnit::dumpInsts() -{ - cprintf("Load store queue: Dumping instructions.\n"); - cprintf("Load queue size: %i\n", loads); - cprintf("Load queue: "); - - int load_idx = loadHead; - - while (load_idx != loadTail && loadQueue[load_idx]) { - cprintf("%#x ", loadQueue[load_idx]->readPC()); - - incrLdIdx(load_idx); - } - - cprintf("Store queue size: %i\n", stores); - cprintf("Store queue: "); - - int store_idx = storeHead; - - while (store_idx != storeTail && storeQueue[store_idx].inst) { - cprintf("%#x ", storeQueue[store_idx].inst->readPC()); - - incrStIdx(store_idx); - } - - cprintf("\n"); -} diff --git a/cpu/o3/scoreboard.cc b/cpu/o3/scoreboard.cc deleted file mode 100644 index b0e433620..000000000 --- a/cpu/o3/scoreboard.cc +++ /dev/null @@ -1,106 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Regents of The University of Michigan - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "cpu/o3/scoreboard.hh" - -Scoreboard::Scoreboard(unsigned activeThreads, - unsigned _numLogicalIntRegs, - unsigned _numPhysicalIntRegs, - unsigned _numLogicalFloatRegs, - unsigned _numPhysicalFloatRegs, - unsigned _numMiscRegs, - unsigned _zeroRegIdx) - : numLogicalIntRegs(_numLogicalIntRegs), - numPhysicalIntRegs(_numPhysicalIntRegs), - numLogicalFloatRegs(_numLogicalFloatRegs), - numPhysicalFloatRegs(_numPhysicalFloatRegs), - numMiscRegs(_numMiscRegs), - zeroRegIdx(_zeroRegIdx) -{ - //Get Register Sizes - numLogicalRegs = numLogicalIntRegs + numLogicalFloatRegs; - numPhysicalRegs = numPhysicalIntRegs + numPhysicalFloatRegs; - - //Resize scoreboard appropriately - regScoreBoard.resize(numPhysicalRegs + (numMiscRegs * activeThreads)); - - //Initialize values - for (int i=0; i < numLogicalIntRegs * activeThreads; i++) { - regScoreBoard[i] = 1; - } - - for (int i= numPhysicalIntRegs; - i < numPhysicalIntRegs + (numLogicalFloatRegs * activeThreads); - i++) { - regScoreBoard[i] = 1; - } - - for (int i = numPhysicalRegs; - i < numPhysicalRegs + (numMiscRegs * activeThreads); - i++) { - regScoreBoard[i] = 1; - } -} - -std::string -Scoreboard::name() const -{ - return "cpu.scoreboard"; -} - -bool -Scoreboard::getReg(PhysRegIndex phys_reg) -{ - // Always ready if int or fp zero reg. - if (phys_reg == zeroRegIdx || - phys_reg == (zeroRegIdx + numPhysicalIntRegs)) { - return 1; - } - - return regScoreBoard[phys_reg]; -} - -void -Scoreboard::setReg(PhysRegIndex phys_reg) -{ - DPRINTF(Scoreboard, "Setting reg %i as ready\n", phys_reg); - - regScoreBoard[phys_reg] = 1; -} - -void -Scoreboard::unsetReg(PhysRegIndex ready_reg) -{ - if (ready_reg == zeroRegIdx || - ready_reg == (zeroRegIdx + numPhysicalIntRegs)) { - // Don't do anything if int or fp zero reg. - return; - } - - regScoreBoard[ready_reg] = 0; -} diff --git a/cpu/o3/scoreboard.hh b/cpu/o3/scoreboard.hh deleted file mode 100644 index 77f2cf157..000000000 --- a/cpu/o3/scoreboard.hh +++ /dev/null @@ -1,114 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Regents of The University of Michigan - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __CPU_O3_SCOREBOARD_HH__ -#define __CPU_O3_SCOREBOARD_HH__ - -#include -#include -#include -#include "arch/alpha/isa_traits.hh" -#include "base/trace.hh" -#include "base/traceflags.hh" -#include "cpu/o3/comm.hh" - -/** - * Implements a simple scoreboard to track which registers are ready. - * This class assumes that the fp registers start, index wise, right after - * the integer registers. The misc. registers start, index wise, right after - * the fp registers. - * @todo: Fix up handling of the zero register in case the decoder does not - * automatically make insts that write the zero register into nops. - */ -class Scoreboard -{ - public: - /** Constructs a scoreboard. - * @param activeThreads The number of active threads. - * @param _numLogicalIntRegs Number of logical integer registers. - * @param _numPhysicalIntRegs Number of physical integer registers. - * @param _numLogicalFloatRegs Number of logical fp registers. - * @param _numPhysicalFloatRegs Number of physical fp registers. - * @param _numMiscRegs Number of miscellaneous registers. - * @param _zeroRegIdx Index of the zero register. - */ - Scoreboard(unsigned activeThreads, - unsigned _numLogicalIntRegs, - unsigned _numPhysicalIntRegs, - unsigned _numLogicalFloatRegs, - unsigned _numPhysicalFloatRegs, - unsigned _numMiscRegs, - unsigned _zeroRegIdx); - - /** Destructor. */ - ~Scoreboard() {} - - /** Returns the name of the scoreboard. */ - std::string name() const; - - /** Checks if the register is ready. */ - bool getReg(PhysRegIndex ready_reg); - - /** Sets the register as ready. */ - void setReg(PhysRegIndex phys_reg); - - /** Sets the register as not ready. */ - void unsetReg(PhysRegIndex ready_reg); - - private: - /** Scoreboard of physical integer registers, saying whether or not they - * are ready. - */ - std::vector regScoreBoard; - - /** Number of logical integer registers. */ - int numLogicalIntRegs; - - /** Number of physical integer registers. */ - int numPhysicalIntRegs; - - /** Number of logical floating point registers. */ - int numLogicalFloatRegs; - - /** Number of physical floating point registers. */ - int numPhysicalFloatRegs; - - /** Number of miscellaneous registers. */ - int numMiscRegs; - - /** Number of logical integer + float registers. */ - int numLogicalRegs; - - /** Number of physical integer + float registers. */ - int numPhysicalRegs; - - /** The logical index of the zero register. */ - int zeroRegIdx; -}; - -#endif diff --git a/cpu/o3/thread_state.hh b/cpu/o3/thread_state.hh deleted file mode 100644 index 2c9788e4b..000000000 --- a/cpu/o3/thread_state.hh +++ /dev/null @@ -1,120 +0,0 @@ -/* - * Copyright (c) 2006 The Regents of The University of Michigan - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __CPU_O3_THREAD_STATE_HH__ -#define __CPU_O3_THREAD_STATE_HH__ - -#include "arch/faults.hh" -#include "arch/isa_traits.hh" -#include "cpu/exec_context.hh" -#include "cpu/thread_state.hh" - -class Event; -class Process; - -#if FULL_SYSTEM -class EndQuiesceEvent; -class FunctionProfile; -class ProfileNode; -#else -class FunctionalMemory; -class Process; -#endif - -/** - * Class that has various thread state, such as the status, the - * current instruction being processed, whether or not the thread has - * a trap pending or is being externally updated, the ExecContext - * proxy pointer, etc. It also handles anything related to a specific - * thread's process, such as syscalls and checking valid addresses. - */ -template -struct O3ThreadState : public ThreadState { - typedef ExecContext::Status Status; - typedef typename Impl::FullCPU FullCPU; - - Status _status; - - // Current instruction - TheISA::MachInst inst; - private: - FullCPU *cpu; - public: - - bool inSyscall; - - bool trapPending; - -#if FULL_SYSTEM - O3ThreadState(FullCPU *_cpu, int _thread_num, FunctionalMemory *_mem) - : ThreadState(-1, _thread_num, _mem), - inSyscall(0), trapPending(0) - { } -#else - O3ThreadState(FullCPU *_cpu, int _thread_num, Process *_process, int _asid) - : ThreadState(-1, _thread_num, _process->getMemory(), _process, _asid), - cpu(_cpu), inSyscall(0), trapPending(0) - { } - - O3ThreadState(FullCPU *_cpu, int _thread_num, FunctionalMemory *_mem, - int _asid) - : ThreadState(-1, _thread_num, _mem, NULL, _asid), - cpu(_cpu), inSyscall(0), trapPending(0) - { } -#endif - - ExecContext *xcProxy; - - ExecContext *getXCProxy() { return xcProxy; } - - Status status() const { return _status; } - - void setStatus(Status new_status) { _status = new_status; } - -#if !FULL_SYSTEM - bool validInstAddr(Addr addr) - { return process->validInstAddr(addr); } - - bool validDataAddr(Addr addr) - { return process->validDataAddr(addr); } -#endif - - bool misspeculating() { return false; } - - void setInst(TheISA::MachInst _inst) { inst = _inst; } - - Counter readFuncExeInst() { return funcExeInst; } - - void setFuncExeInst(Counter new_val) { funcExeInst = new_val; } - -#if !FULL_SYSTEM - void syscall() { process->syscall(xcProxy); } -#endif -}; - -#endif // __CPU_O3_THREAD_STATE_HH__ diff --git a/cpu/ozone/back_end.cc b/cpu/ozone/back_end.cc deleted file mode 100644 index cb014e4cc..000000000 --- a/cpu/ozone/back_end.cc +++ /dev/null @@ -1,5 +0,0 @@ - -#include "cpu/ozone/back_end_impl.hh" -#include "cpu/ozone/ozone_impl.hh" - -//template class BackEnd; diff --git a/cpu/ozone/back_end.hh b/cpu/ozone/back_end.hh deleted file mode 100644 index 14b011ab8..000000000 --- a/cpu/ozone/back_end.hh +++ /dev/null @@ -1,516 +0,0 @@ - -#ifndef __CPU_OZONE_BACK_END_HH__ -#define __CPU_OZONE_BACK_END_HH__ - -#include -#include -#include - -#include "arch/faults.hh" -#include "base/timebuf.hh" -#include "cpu/inst_seq.hh" -#include "cpu/ozone/rename_table.hh" -#include "cpu/ozone/thread_state.hh" -#include "mem/functional/functional.hh" -#include "mem/mem_interface.hh" -#include "mem/mem_req.hh" -#include "sim/eventq.hh" - -class ExecContext; - -template -class OzoneThreadState; - -template -class BackEnd -{ - public: - typedef OzoneThreadState Thread; - - typedef typename Impl::Params Params; - typedef typename Impl::DynInst DynInst; - typedef typename Impl::DynInstPtr DynInstPtr; - typedef typename Impl::FullCPU FullCPU; - typedef typename Impl::FrontEnd FrontEnd; - typedef typename Impl::FullCPU::CommStruct CommStruct; - - struct SizeStruct { - int size; - }; - - typedef SizeStruct DispatchToIssue; - typedef SizeStruct IssueToExec; - typedef SizeStruct ExecToCommit; - typedef SizeStruct Writeback; - - TimeBuffer d2i; - typename TimeBuffer::wire instsToDispatch; - TimeBuffer i2e; - typename TimeBuffer::wire instsToExecute; - TimeBuffer e2c; - TimeBuffer numInstsToWB; - - TimeBuffer *comm; - typename TimeBuffer::wire toIEW; - typename TimeBuffer::wire fromCommit; - - class InstQueue { - enum queue { - NonSpec, - IQ, - ToBeScheduled, - ReadyList, - ReplayList - }; - struct pqCompare { - bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const - { - return lhs->seqNum > rhs->seqNum; - } - }; - public: - InstQueue(Params *params); - - std::string name() const; - - void regStats(); - - void setIssueExecQueue(TimeBuffer *i2e_queue); - - void setBE(BackEnd *_be) { be = _be; } - - void insert(DynInstPtr &inst); - - void scheduleReadyInsts(); - - void scheduleNonSpec(const InstSeqNum &sn); - - DynInstPtr getReadyInst(); - - void commit(const InstSeqNum &sn) {} - - void squash(const InstSeqNum &sn); - - int wakeDependents(DynInstPtr &inst); - - /** Tells memory dependence unit that a memory instruction needs to be - * rescheduled. It will re-execute once replayMemInst() is called. - */ - void rescheduleMemInst(DynInstPtr &inst); - - /** Re-executes all rescheduled memory instructions. */ - void replayMemInst(DynInstPtr &inst); - - /** Completes memory instruction. */ - void completeMemInst(DynInstPtr &inst); - - void violation(DynInstPtr &inst, DynInstPtr &violation) { } - - bool isFull() { return numInsts >= size; } - - void dumpInsts(); - - private: - bool find(queue q, typename std::list::iterator it); - BackEnd *be; - TimeBuffer *i2e; - typename TimeBuffer::wire numIssued; - typedef typename std::list InstList; - typedef typename std::list::iterator InstListIt; - typedef typename std::priority_queue, pqCompare> ReadyInstQueue; - // Not sure I need the IQ list; it just needs to be a count. - InstList iq; - InstList toBeScheduled; - InstList readyList; - InstList nonSpec; - InstList replayList; - ReadyInstQueue readyQueue; - public: - int size; - int numInsts; - int width; - - Stats::VectorDistribution<> occ_dist; - - Stats::Vector<> inst_count; - Stats::Vector<> peak_inst_count; - Stats::Scalar<> empty_count; - Stats::Scalar<> current_count; - Stats::Scalar<> fullCount; - - Stats::Formula occ_rate; - Stats::Formula avg_residency; - Stats::Formula empty_rate; - Stats::Formula full_rate; - }; - - /** LdWriteback event for a load completion. */ - class LdWritebackEvent : public Event { - private: - /** Instruction that is writing back data to the register file. */ - DynInstPtr inst; - /** Pointer to IEW stage. */ - BackEnd *be; - - public: - /** Constructs a load writeback event. */ - LdWritebackEvent(DynInstPtr &_inst, BackEnd *be); - - /** Processes writeback event. */ - virtual void process(); - /** Returns the description of the writeback event. */ - virtual const char *description(); - }; - - BackEnd(Params *params); - - std::string name() const; - - void regStats(); - - void setCPU(FullCPU *cpu_ptr) - { cpu = cpu_ptr; } - - void setFrontEnd(FrontEnd *front_end_ptr) - { frontEnd = front_end_ptr; } - - void setXC(ExecContext *xc_ptr) - { xc = xc_ptr; } - - void setThreadState(Thread *thread_ptr) - { thread = thread_ptr; } - - void setCommBuffer(TimeBuffer *_comm); - - void tick(); - void squash(); - void squashFromXC(); - bool xcSquash; - - template - Fault read(MemReqPtr &req, T &data, int load_idx); - - template - Fault write(MemReqPtr &req, T &data, int store_idx); - - Addr readCommitPC() { return commitPC; } - - Addr commitPC; - - bool robEmpty() { return instList.empty(); } - - bool isFull() { return numInsts >= numROBEntries; } - bool isBlocked() { return status == Blocked || dispatchStatus == Blocked; } - - /** Tells memory dependence unit that a memory instruction needs to be - * rescheduled. It will re-execute once replayMemInst() is called. - */ - void rescheduleMemInst(DynInstPtr &inst) - { IQ.rescheduleMemInst(inst); } - - /** Re-executes all rescheduled memory instructions. */ - void replayMemInst(DynInstPtr &inst) - { IQ.replayMemInst(inst); } - - /** Completes memory instruction. */ - void completeMemInst(DynInstPtr &inst) - { IQ.completeMemInst(inst); } - - void fetchFault(Fault &fault); - - private: - void updateStructures(); - void dispatchInsts(); - void dispatchStall(); - void checkDispatchStatus(); - void scheduleReadyInsts(); - void executeInsts(); - void commitInsts(); - void addToIQ(DynInstPtr &inst); - void addToLSQ(DynInstPtr &inst); - void instToCommit(DynInstPtr &inst); - void writebackInsts(); - bool commitInst(int inst_num); - void squash(const InstSeqNum &sn); - void squashDueToBranch(DynInstPtr &inst); - void squashDueToMemBlocked(DynInstPtr &inst); - void updateExeInstStats(DynInstPtr &inst); - void updateComInstStats(DynInstPtr &inst); - - public: - FullCPU *cpu; - - FrontEnd *frontEnd; - - ExecContext *xc; - - Thread *thread; - - enum Status { - Running, - Idle, - DcacheMissStall, - DcacheMissComplete, - Blocked - }; - - Status status; - - Status dispatchStatus; - - Counter funcExeInst; - - private: -// typedef typename Impl::InstQueue InstQueue; - - InstQueue IQ; - - typedef typename Impl::LdstQueue LdstQueue; - - LdstQueue LSQ; - public: - RenameTable commitRenameTable; - - RenameTable renameTable; - private: - class DCacheCompletionEvent : public Event - { - private: - BackEnd *be; - - public: - DCacheCompletionEvent(BackEnd *_be); - - virtual void process(); - virtual const char *description(); - }; - - friend class DCacheCompletionEvent; - - DCacheCompletionEvent cacheCompletionEvent; - - MemInterface *dcacheInterface; - - MemReqPtr memReq; - - // General back end width. Used if the more specific isn't given. - int width; - - // Dispatch width. - int dispatchWidth; - int numDispatchEntries; - int dispatchSize; - - int issueWidth; - - // Writeback width - int wbWidth; - - // Commit width - int commitWidth; - - /** Index into queue of instructions being written back. */ - unsigned wbNumInst; - - /** Cycle number within the queue of instructions being written - * back. Used in case there are too many instructions writing - * back at the current cycle and writesbacks need to be scheduled - * for the future. See comments in instToCommit(). - */ - unsigned wbCycle; - - int numROBEntries; - int numInsts; - - bool squashPending; - InstSeqNum squashSeqNum; - Addr squashNextPC; - - Fault faultFromFetch; - - private: - typedef typename std::list::iterator InstListIt; - - std::list instList; - std::list dispatch; - std::list writeback; - - int latency; - - int squashLatency; - - bool exactFullStall; - - bool fetchRedirect[Impl::MaxThreads]; - - // number of cycles stalled for D-cache misses -/* Stats::Scalar<> dcacheStallCycles; - Counter lastDcacheStall; -*/ - Stats::Vector<> rob_cap_events; - Stats::Vector<> rob_cap_inst_count; - Stats::Vector<> iq_cap_events; - Stats::Vector<> iq_cap_inst_count; - // total number of instructions executed - Stats::Vector<> exe_inst; - Stats::Vector<> exe_swp; - Stats::Vector<> exe_nop; - Stats::Vector<> exe_refs; - Stats::Vector<> exe_loads; - Stats::Vector<> exe_branches; - - Stats::Vector<> issued_ops; - - // total number of loads forwaded from LSQ stores - Stats::Vector<> lsq_forw_loads; - - // total number of loads ignored due to invalid addresses - Stats::Vector<> inv_addr_loads; - - // total number of software prefetches ignored due to invalid addresses - Stats::Vector<> inv_addr_swpfs; - // ready loads blocked due to memory disambiguation - Stats::Vector<> lsq_blocked_loads; - - Stats::Scalar<> lsqInversion; - - Stats::Vector<> n_issued_dist; - Stats::VectorDistribution<> issue_delay_dist; - - Stats::VectorDistribution<> queue_res_dist; -/* - Stats::Vector<> stat_fu_busy; - Stats::Vector2d<> stat_fuBusy; - Stats::Vector<> dist_unissued; - Stats::Vector2d<> stat_issued_inst_type; - - Stats::Formula misspec_cnt; - Stats::Formula misspec_ipc; - Stats::Formula issue_rate; - Stats::Formula issue_stores; - Stats::Formula issue_op_rate; - Stats::Formula fu_busy_rate; - Stats::Formula commit_stores; - Stats::Formula commit_ipc; - Stats::Formula commit_ipb; - Stats::Formula lsq_inv_rate; -*/ - Stats::Vector<> writeback_count; - Stats::Vector<> producer_inst; - Stats::Vector<> consumer_inst; - Stats::Vector<> wb_penalized; - - Stats::Formula wb_rate; - Stats::Formula wb_fanout; - Stats::Formula wb_penalized_rate; - - // total number of instructions committed - Stats::Vector<> stat_com_inst; - Stats::Vector<> stat_com_swp; - Stats::Vector<> stat_com_refs; - Stats::Vector<> stat_com_loads; - Stats::Vector<> stat_com_membars; - Stats::Vector<> stat_com_branches; - - Stats::Distribution<> n_committed_dist; - - Stats::Scalar<> commit_eligible_samples; - Stats::Vector<> commit_eligible; - - Stats::Scalar<> ROB_fcount; - Stats::Formula ROB_full_rate; - - Stats::Vector<> ROB_count; // cumulative ROB occupancy - Stats::Formula ROB_occ_rate; - Stats::VectorDistribution<> ROB_occ_dist; - public: - void dumpInsts(); -}; - -template -template -Fault -BackEnd::read(MemReqPtr &req, T &data, int load_idx) -{ -/* memReq->reset(addr, sizeof(T), flags); - - // translate to physical address - Fault fault = cpu->translateDataReadReq(memReq); - - // if we have a cache, do cache access too - if (fault == NoFault && dcacheInterface) { - memReq->cmd = Read; - memReq->completionEvent = NULL; - memReq->time = curTick; - memReq->flags &= ~INST_READ; - MemAccessResult result = dcacheInterface->access(memReq); - - // Ugly hack to get an event scheduled *only* if the access is - // a miss. We really should add first-class support for this - // at some point. - if (result != MA_HIT && dcacheInterface->doEvents()) { - // Fix this hack for keeping funcExeInst correct with loads that - // are executed twice. - --funcExeInst; - - memReq->completionEvent = &cacheCompletionEvent; - lastDcacheStall = curTick; -// unscheduleTickEvent(); -// status = DcacheMissStall; - DPRINTF(OzoneCPU, "Dcache miss stall!\n"); - } else { - // do functional access - fault = thread->mem->read(memReq, data); - - } - } -*/ -/* - if (!dcacheInterface && (memReq->flags & UNCACHEABLE)) - recordEvent("Uncached Read"); -*/ - return LSQ.read(req, data, load_idx); -} - -template -template -Fault -BackEnd::write(MemReqPtr &req, T &data, int store_idx) -{ -/* - memReq->reset(addr, sizeof(T), flags); - - // translate to physical address - Fault fault = cpu->translateDataWriteReq(memReq); - - if (fault == NoFault && dcacheInterface) { - memReq->cmd = Write; - memcpy(memReq->data,(uint8_t *)&data,memReq->size); - memReq->completionEvent = NULL; - memReq->time = curTick; - memReq->flags &= ~INST_READ; - MemAccessResult result = dcacheInterface->access(memReq); - - // Ugly hack to get an event scheduled *only* if the access is - // a miss. We really should add first-class support for this - // at some point. - if (result != MA_HIT && dcacheInterface->doEvents()) { - memReq->completionEvent = &cacheCompletionEvent; - lastDcacheStall = curTick; -// unscheduleTickEvent(); -// status = DcacheMissStall; - DPRINTF(OzoneCPU, "Dcache miss stall!\n"); - } - } - - if (res && (fault == NoFault)) - *res = memReq->result; - */ -/* - if (!dcacheInterface && (memReq->flags & UNCACHEABLE)) - recordEvent("Uncached Write"); -*/ - return LSQ.write(req, data, store_idx); -} - -#endif // __CPU_OZONE_BACK_END_HH__ diff --git a/cpu/ozone/back_end_impl.hh b/cpu/ozone/back_end_impl.hh deleted file mode 100644 index 36770d65c..000000000 --- a/cpu/ozone/back_end_impl.hh +++ /dev/null @@ -1,1904 +0,0 @@ - -#include "encumbered/cpu/full/op_class.hh" -#include "cpu/ozone/back_end.hh" - -template -BackEnd::InstQueue::InstQueue(Params *params) - : size(params->numIQEntries), numInsts(0), width(params->issueWidth) -{ -} - -template -std::string -BackEnd::InstQueue::name() const -{ - return be->name() + ".iq"; -} - -template -void -BackEnd::InstQueue::regStats() -{ - using namespace Stats; - - occ_dist - .init(1, 0, size, 2) - .name(name() + "occ_dist") - .desc("IQ Occupancy per cycle") - .flags(total | cdf) - ; - - inst_count - .init(1) - .name(name() + "cum_num_insts") - .desc("Total occupancy") - .flags(total) - ; - - peak_inst_count - .init(1) - .name(name() + "peak_occupancy") - .desc("Peak IQ occupancy") - .flags(total) - ; - - current_count - .name(name() + "current_count") - .desc("Occupancy this cycle") - ; - - empty_count - .name(name() + "empty_count") - .desc("Number of empty cycles") - ; - - fullCount - .name(name() + "full_count") - .desc("Number of full cycles") - ; - - - occ_rate - .name(name() + "occ_rate") - .desc("Average occupancy") - .flags(total) - ; - occ_rate = inst_count / be->cpu->numCycles; - - avg_residency - .name(name() + "avg_residency") - .desc("Average IQ residency") - .flags(total) - ; - avg_residency = occ_rate / be->cpu->numCycles; - - empty_rate - .name(name() + "empty_rate") - .desc("Fraction of cycles empty") - ; - empty_rate = 100 * empty_count / be->cpu->numCycles; - - full_rate - .name(name() + "full_rate") - .desc("Fraction of cycles full") - ; - full_rate = 100 * fullCount / be->cpu->numCycles; -} - -template -void -BackEnd::InstQueue::setIssueExecQueue(TimeBuffer *i2e_queue) -{ - i2e = i2e_queue; - numIssued = i2e->getWire(0); -} - -template -void -BackEnd::InstQueue::insert(DynInstPtr &inst) -{ - numInsts++; - inst_count[0]++; - if (!inst->isNonSpeculative()) { - DPRINTF(BE, "Instruction [sn:%lli] added to IQ\n", inst->seqNum); - if (inst->readyToIssue()) { - toBeScheduled.push_front(inst); - inst->iqIt = toBeScheduled.begin(); - inst->iqItValid = true; - } else { - iq.push_front(inst); - inst->iqIt = iq.begin(); - inst->iqItValid = true; - } - } else { - DPRINTF(BE, "Nonspeculative instruction [sn:%lli] added to IQ\n", inst->seqNum); - nonSpec.push_front(inst); - inst->iqIt = nonSpec.begin(); - inst->iqItValid = true; - } -} - -template -void -BackEnd::InstQueue::scheduleReadyInsts() -{ - int scheduled = numIssued->size; - InstListIt iq_it = --toBeScheduled.end(); - InstListIt iq_end_it = toBeScheduled.end(); - - while (iq_it != iq_end_it && scheduled < width) { -// if ((*iq_it)->readyToIssue()) { - DPRINTF(BE, "Instruction [sn:%lli] PC:%#x is ready\n", - (*iq_it)->seqNum, (*iq_it)->readPC()); - readyQueue.push(*iq_it); - readyList.push_front(*iq_it); - - (*iq_it)->iqIt = readyList.begin(); - - toBeScheduled.erase(iq_it--); - - ++scheduled; -// } else { -// iq_it++; -// } - } - - numIssued->size+= scheduled; -} - -template -void -BackEnd::InstQueue::scheduleNonSpec(const InstSeqNum &sn) -{ -/* - InstListIt non_spec_it = nonSpec.begin(); - InstListIt non_spec_end_it = nonSpec.end(); - - while ((*non_spec_it)->seqNum != sn) { - non_spec_it++; - assert(non_spec_it != non_spec_end_it); - } -*/ - DynInstPtr inst = nonSpec.back(); - - DPRINTF(BE, "Nonspeculative instruction [sn:%lli] scheduled\n", inst->seqNum); - - assert(inst->seqNum == sn); - - assert(find(NonSpec, inst->iqIt)); - nonSpec.erase(inst->iqIt); - readyList.push_front(inst); - inst->iqIt = readyList.begin(); - readyQueue.push(inst); - numIssued->size++; -} - -template -typename Impl::DynInstPtr -BackEnd::InstQueue::getReadyInst() -{ - assert(!readyList.empty()); - - DynInstPtr inst = readyQueue.top(); - readyQueue.pop(); - assert(find(ReadyList, inst->iqIt)); - readyList.erase(inst->iqIt); - inst->iqItValid = false; -// if (!inst->isMemRef()) - --numInsts; - return inst; -} - -template -void -BackEnd::InstQueue::squash(const InstSeqNum &sn) -{ - InstListIt iq_it = iq.begin(); - InstListIt iq_end_it = iq.end(); - - while (iq_it != iq_end_it && (*iq_it)->seqNum > sn) { - DPRINTF(BE, "Instruction [sn:%lli] removed from IQ\n", (*iq_it)->seqNum); - (*iq_it)->iqItValid = false; - iq.erase(iq_it++); - --numInsts; - } - - iq_it = nonSpec.begin(); - iq_end_it = nonSpec.end(); - - while (iq_it != iq_end_it && (*iq_it)->seqNum > sn) { - DPRINTF(BE, "Instruction [sn:%lli] removed from IQ\n", (*iq_it)->seqNum); - (*iq_it)->iqItValid = false; - nonSpec.erase(iq_it++); - --numInsts; - } - - iq_it = replayList.begin(); - iq_end_it = replayList.end(); - - while (iq_it != iq_end_it) { - if ((*iq_it)->seqNum > sn) { - DPRINTF(BE, "Instruction [sn:%lli] removed from IQ\n", (*iq_it)->seqNum); - (*iq_it)->iqItValid = false; - replayList.erase(iq_it++); - --numInsts; - } else { - iq_it++; - } - } - - assert(numInsts >= 0); -/* - InstListIt ready_it = readyList.begin(); - InstListIt ready_end_it = readyList.end(); - - while (ready_it != ready_end_it) { - if ((*ready_it)->seqNum > sn) { - readyList.erase(ready_it++); - } else { - ready_it++; - } - } -*/ -} - -template -int -BackEnd::InstQueue::wakeDependents(DynInstPtr &inst) -{ - assert(!inst->isSquashed()); - std::vector &dependents = inst->getDependents(); - int num_outputs = dependents.size(); - - DPRINTF(BE, "Waking instruction [sn:%lli] dependents in IQ\n", inst->seqNum); - - for (int i = 0; i < num_outputs; i++) { - DynInstPtr dep_inst = dependents[i]; - dep_inst->markSrcRegReady(); - DPRINTF(BE, "Marking source reg ready [sn:%lli] in IQ\n", dep_inst->seqNum); - - if (dep_inst->readyToIssue() && dep_inst->iqItValid) { - if (dep_inst->isNonSpeculative()) { - assert(find(NonSpec, dep_inst->iqIt)); - nonSpec.erase(dep_inst->iqIt); - } else { - assert(find(IQ, dep_inst->iqIt)); - iq.erase(dep_inst->iqIt); - } - - toBeScheduled.push_front(dep_inst); - dep_inst->iqIt = toBeScheduled.begin(); - } - } - return num_outputs; -} - -template -void -BackEnd::InstQueue::rescheduleMemInst(DynInstPtr &inst) -{ - DPRINTF(BE, "Rescheduling memory instruction [sn:%lli]\n", inst->seqNum); - assert(!inst->iqItValid); - replayList.push_front(inst); - inst->iqIt = replayList.begin(); - inst->iqItValid = true; - ++numInsts; -} - -template -void -BackEnd::InstQueue::replayMemInst(DynInstPtr &inst) -{ - DPRINTF(BE, "Replaying memory instruction [sn:%lli]\n", inst->seqNum); - assert(find(ReplayList, inst->iqIt)); - InstListIt iq_it = --replayList.end(); - InstListIt iq_end_it = replayList.end(); - while (iq_it != iq_end_it) { - DynInstPtr rescheduled_inst = (*iq_it); - - DPRINTF(BE, "Memory instruction [sn:%lli] also replayed\n", inst->seqNum); - replayList.erase(iq_it--); - toBeScheduled.push_front(rescheduled_inst); - rescheduled_inst->iqIt = toBeScheduled.begin(); - } -} - -template -void -BackEnd::InstQueue::completeMemInst(DynInstPtr &inst) -{ - panic("Not implemented."); -} - -template -bool -BackEnd::InstQueue::find(queue q, InstListIt it) -{ - InstListIt iq_it, iq_end_it; - switch(q) { - case NonSpec: - iq_it = nonSpec.begin(); - iq_end_it = nonSpec.end(); - break; - case IQ: - iq_it = iq.begin(); - iq_end_it = iq.end(); - break; - case ToBeScheduled: - iq_it = toBeScheduled.begin(); - iq_end_it = toBeScheduled.end(); - break; - case ReadyList: - iq_it = readyList.begin(); - iq_end_it = readyList.end(); - break; - case ReplayList: - iq_it = replayList.begin(); - iq_end_it = replayList.end(); - } - - while (iq_it != it && iq_it != iq_end_it) { - iq_it++; - } - if (iq_it == it) { - return true; - } else { - return false; - } -} - -template -void -BackEnd::InstQueue::dumpInsts() -{ - cprintf("IQ size: %i\n", iq.size()); - - InstListIt inst_list_it = --iq.end(); - - int num = 0; - int valid_num = 0; - while (inst_list_it != iq.end()) - { - cprintf("Instruction:%i\n", - num); - if (!(*inst_list_it)->isSquashed()) { - if (!(*inst_list_it)->isIssued()) { - ++valid_num; - cprintf("Count:%i\n", valid_num); - } else if ((*inst_list_it)->isMemRef() && - !(*inst_list_it)->memOpDone) { - // Loads that have not been marked as executed still count - // towards the total instructions. - ++valid_num; - cprintf("Count:%i\n", valid_num); - } - } - - cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n" - "Issued:%i\nSquashed:%i\n", - (*inst_list_it)->readPC(), - (*inst_list_it)->seqNum, - (*inst_list_it)->threadNumber, - (*inst_list_it)->isIssued(), - (*inst_list_it)->isSquashed()); - - if ((*inst_list_it)->isMemRef()) { - cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone); - } - - cprintf("\n"); - - inst_list_it--; - ++num; - } - - cprintf("nonSpec size: %i\n", nonSpec.size()); - - inst_list_it = --nonSpec.end(); - - while (inst_list_it != nonSpec.end()) - { - cprintf("Instruction:%i\n", - num); - if (!(*inst_list_it)->isSquashed()) { - if (!(*inst_list_it)->isIssued()) { - ++valid_num; - cprintf("Count:%i\n", valid_num); - } else if ((*inst_list_it)->isMemRef() && - !(*inst_list_it)->memOpDone) { - // Loads that have not been marked as executed still count - // towards the total instructions. - ++valid_num; - cprintf("Count:%i\n", valid_num); - } - } - - cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n" - "Issued:%i\nSquashed:%i\n", - (*inst_list_it)->readPC(), - (*inst_list_it)->seqNum, - (*inst_list_it)->threadNumber, - (*inst_list_it)->isIssued(), - (*inst_list_it)->isSquashed()); - - if ((*inst_list_it)->isMemRef()) { - cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone); - } - - cprintf("\n"); - - inst_list_it--; - ++num; - } - - cprintf("toBeScheduled size: %i\n", toBeScheduled.size()); - - inst_list_it = --toBeScheduled.end(); - - while (inst_list_it != toBeScheduled.end()) - { - cprintf("Instruction:%i\n", - num); - if (!(*inst_list_it)->isSquashed()) { - if (!(*inst_list_it)->isIssued()) { - ++valid_num; - cprintf("Count:%i\n", valid_num); - } else if ((*inst_list_it)->isMemRef() && - !(*inst_list_it)->memOpDone) { - // Loads that have not been marked as executed still count - // towards the total instructions. - ++valid_num; - cprintf("Count:%i\n", valid_num); - } - } - - cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n" - "Issued:%i\nSquashed:%i\n", - (*inst_list_it)->readPC(), - (*inst_list_it)->seqNum, - (*inst_list_it)->threadNumber, - (*inst_list_it)->isIssued(), - (*inst_list_it)->isSquashed()); - - if ((*inst_list_it)->isMemRef()) { - cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone); - } - - cprintf("\n"); - - inst_list_it--; - ++num; - } - - cprintf("readyList size: %i\n", readyList.size()); - - inst_list_it = --readyList.end(); - - while (inst_list_it != readyList.end()) - { - cprintf("Instruction:%i\n", - num); - if (!(*inst_list_it)->isSquashed()) { - if (!(*inst_list_it)->isIssued()) { - ++valid_num; - cprintf("Count:%i\n", valid_num); - } else if ((*inst_list_it)->isMemRef() && - !(*inst_list_it)->memOpDone) { - // Loads that have not been marked as executed still count - // towards the total instructions. - ++valid_num; - cprintf("Count:%i\n", valid_num); - } - } - - cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n" - "Issued:%i\nSquashed:%i\n", - (*inst_list_it)->readPC(), - (*inst_list_it)->seqNum, - (*inst_list_it)->threadNumber, - (*inst_list_it)->isIssued(), - (*inst_list_it)->isSquashed()); - - if ((*inst_list_it)->isMemRef()) { - cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone); - } - - cprintf("\n"); - - inst_list_it--; - ++num; - } -} - -template -BackEnd::LdWritebackEvent::LdWritebackEvent(DynInstPtr &_inst, - BackEnd *_be) - : Event(&mainEventQueue), inst(_inst), be(_be) -{ - this->setFlags(Event::AutoDelete); -} - -template -void -BackEnd::LdWritebackEvent::process() -{ - DPRINTF(BE, "Load writeback event [sn:%lli]\n", inst->seqNum); -// DPRINTF(Activity, "Activity: Ld Writeback event [sn:%lli]\n", inst->seqNum); - - //iewStage->ldstQueue.removeMSHR(inst->threadNumber,inst->seqNum); - -// iewStage->wakeCPU(); - - if (inst->isSquashed()) { - inst = NULL; - return; - } - - if (!inst->isExecuted()) { - inst->setExecuted(); - - // Execute again to copy data to proper place. - inst->completeAcc(); - } - - // Need to insert instruction into queue to commit - be->instToCommit(inst); - - //wroteToTimeBuffer = true; -// iewStage->activityThisCycle(); - - inst = NULL; -} - -template -const char * -BackEnd::LdWritebackEvent::description() -{ - return "Load writeback event"; -} - - -template -BackEnd::DCacheCompletionEvent::DCacheCompletionEvent(BackEnd *_be) - : Event(&mainEventQueue, CPU_Tick_Pri), be(_be) -{ -} - -template -void -BackEnd::DCacheCompletionEvent::process() -{ -} - -template -const char * -BackEnd::DCacheCompletionEvent::description() -{ - return "Cache completion event"; -} - -template -BackEnd::BackEnd(Params *params) - : d2i(5, 5), i2e(5, 5), e2c(5, 5), numInstsToWB(5, 5), - xcSquash(false), IQ(params), - cacheCompletionEvent(this), width(params->backEndWidth), - exactFullStall(true) -{ - numROBEntries = params->numROBEntries; - numInsts = 0; - numDispatchEntries = 32; - IQ.setBE(this); - LSQ.setBE(this); - - // Setup IQ and LSQ with their parameters here. - instsToDispatch = d2i.getWire(-1); - - instsToExecute = i2e.getWire(-1); - - IQ.setIssueExecQueue(&i2e); - - dispatchWidth = params->dispatchWidth ? params->dispatchWidth : width; - issueWidth = params->issueWidth ? params->issueWidth : width; - wbWidth = params->wbWidth ? params->wbWidth : width; - commitWidth = params->commitWidth ? params->commitWidth : width; - - LSQ.init(params, params->LQEntries, params->SQEntries, 0); - - dispatchStatus = Running; -} - -template -std::string -BackEnd::name() const -{ - return cpu->name() + ".backend"; -} - -template -void -BackEnd::regStats() -{ - using namespace Stats; - rob_cap_events - .init(cpu->number_of_threads) - .name(name() + ".ROB:cap_events") - .desc("number of cycles where ROB cap was active") - .flags(total) - ; - - rob_cap_inst_count - .init(cpu->number_of_threads) - .name(name() + ".ROB:cap_inst") - .desc("number of instructions held up by ROB cap") - .flags(total) - ; - - iq_cap_events - .init(cpu->number_of_threads) - .name(name() +".IQ:cap_events" ) - .desc("number of cycles where IQ cap was active") - .flags(total) - ; - - iq_cap_inst_count - .init(cpu->number_of_threads) - .name(name() + ".IQ:cap_inst") - .desc("number of instructions held up by IQ cap") - .flags(total) - ; - - - exe_inst - .init(cpu->number_of_threads) - .name(name() + ".ISSUE:count") - .desc("number of insts issued") - .flags(total) - ; - - exe_swp - .init(cpu->number_of_threads) - .name(name() + ".ISSUE:swp") - .desc("number of swp insts issued") - .flags(total) - ; - - exe_nop - .init(cpu->number_of_threads) - .name(name() + ".ISSUE:nop") - .desc("number of nop insts issued") - .flags(total) - ; - - exe_refs - .init(cpu->number_of_threads) - .name(name() + ".ISSUE:refs") - .desc("number of memory reference insts issued") - .flags(total) - ; - - exe_loads - .init(cpu->number_of_threads) - .name(name() + ".ISSUE:loads") - .desc("number of load insts issued") - .flags(total) - ; - - exe_branches - .init(cpu->number_of_threads) - .name(name() + ".ISSUE:branches") - .desc("Number of branches issued") - .flags(total) - ; - - issued_ops - .init(cpu->number_of_threads) - .name(name() + ".ISSUE:op_count") - .desc("number of insts issued") - .flags(total) - ; - -/* - for (int i=0; inumber_of_threads) - .name(name() + ".LSQ:forw_loads") - .desc("number of loads forwarded via LSQ") - .flags(total) - ; - - inv_addr_loads - .init(cpu->number_of_threads) - .name(name() + ".ISSUE:addr_loads") - .desc("number of invalid-address loads") - .flags(total) - ; - - inv_addr_swpfs - .init(cpu->number_of_threads) - .name(name() + ".ISSUE:addr_swpfs") - .desc("number of invalid-address SW prefetches") - .flags(total) - ; - - lsq_blocked_loads - .init(cpu->number_of_threads) - .name(name() + ".LSQ:blocked_loads") - .desc("number of ready loads not issued due to memory disambiguation") - .flags(total) - ; - - lsqInversion - .name(name() + ".ISSUE:lsq_invert") - .desc("Number of times LSQ instruction issued early") - ; - - n_issued_dist - .init(issueWidth + 1) - .name(name() + ".ISSUE:issued_per_cycle") - .desc("Number of insts issued each cycle") - .flags(total | pdf | dist) - ; - issue_delay_dist - .init(Num_OpClasses,0,99,2) - .name(name() + ".ISSUE:") - .desc("cycles from operands ready to issue") - .flags(pdf | cdf) - ; - - queue_res_dist - .init(Num_OpClasses, 0, 99, 2) - .name(name() + ".IQ:residence:") - .desc("cycles from dispatch to issue") - .flags(total | pdf | cdf ) - ; - for (int i = 0; i < Num_OpClasses; ++i) { - queue_res_dist.subname(i, opClassStrings[i]); - } - - writeback_count - .init(cpu->number_of_threads) - .name(name() + ".WB:count") - .desc("cumulative count of insts written-back") - .flags(total) - ; - - producer_inst - .init(cpu->number_of_threads) - .name(name() + ".WB:producers") - .desc("num instructions producing a value") - .flags(total) - ; - - consumer_inst - .init(cpu->number_of_threads) - .name(name() + ".WB:consumers") - .desc("num instructions consuming a value") - .flags(total) - ; - - wb_penalized - .init(cpu->number_of_threads) - .name(name() + ".WB:penalized") - .desc("number of instrctions required to write to 'other' IQ") - .flags(total) - ; - - - wb_penalized_rate - .name(name() + ".WB:penalized_rate") - .desc ("fraction of instructions written-back that wrote to 'other' IQ") - .flags(total) - ; - - wb_penalized_rate = wb_penalized / writeback_count; - - wb_fanout - .name(name() + ".WB:fanout") - .desc("average fanout of values written-back") - .flags(total) - ; - - wb_fanout = producer_inst / consumer_inst; - - wb_rate - .name(name() + ".WB:rate") - .desc("insts written-back per cycle") - .flags(total) - ; - wb_rate = writeback_count / cpu->numCycles; - - stat_com_inst - .init(cpu->number_of_threads) - .name(name() + ".COM:count") - .desc("Number of instructions committed") - .flags(total) - ; - - stat_com_swp - .init(cpu->number_of_threads) - .name(name() + ".COM:swp_count") - .desc("Number of s/w prefetches committed") - .flags(total) - ; - - stat_com_refs - .init(cpu->number_of_threads) - .name(name() + ".COM:refs") - .desc("Number of memory references committed") - .flags(total) - ; - - stat_com_loads - .init(cpu->number_of_threads) - .name(name() + ".COM:loads") - .desc("Number of loads committed") - .flags(total) - ; - - stat_com_membars - .init(cpu->number_of_threads) - .name(name() + ".COM:membars") - .desc("Number of memory barriers committed") - .flags(total) - ; - - stat_com_branches - .init(cpu->number_of_threads) - .name(name() + ".COM:branches") - .desc("Number of branches committed") - .flags(total) - ; - n_committed_dist - .init(0,commitWidth,1) - .name(name() + ".COM:committed_per_cycle") - .desc("Number of insts commited each cycle") - .flags(pdf) - ; - - // - // Commit-Eligible instructions... - // - // -> The number of instructions eligible to commit in those - // cycles where we reached our commit BW limit (less the number - // actually committed) - // - // -> The average value is computed over ALL CYCLES... not just - // the BW limited cycles - // - // -> The standard deviation is computed only over cycles where - // we reached the BW limit - // - commit_eligible - .init(cpu->number_of_threads) - .name(name() + ".COM:bw_limited") - .desc("number of insts not committed due to BW limits") - .flags(total) - ; - - commit_eligible_samples - .name(name() + ".COM:bw_lim_events") - .desc("number cycles where commit BW limit reached") - ; - - ROB_fcount - .name(name() + ".ROB:full_count") - .desc("number of cycles where ROB was full") - ; - - ROB_count - .init(cpu->number_of_threads) - .name(name() + ".ROB:occupancy") - .desc(name() + ".ROB occupancy (cumulative)") - .flags(total) - ; - - ROB_full_rate - .name(name() + ".ROB:full_rate") - .desc("ROB full per cycle") - ; - ROB_full_rate = ROB_fcount / cpu->numCycles; - - ROB_occ_rate - .name(name() + ".ROB:occ_rate") - .desc("ROB occupancy rate") - .flags(total) - ; - ROB_occ_rate = ROB_count / cpu->numCycles; - - ROB_occ_dist - .init(cpu->number_of_threads,0,numROBEntries,2) - .name(name() + ".ROB:occ_dist") - .desc("ROB Occupancy per cycle") - .flags(total | cdf) - ; - - IQ.regStats(); -} - -template -void -BackEnd::setCommBuffer(TimeBuffer *_comm) -{ - comm = _comm; - toIEW = comm->getWire(0); - fromCommit = comm->getWire(-1); -} - -template -void -BackEnd::tick() -{ - DPRINTF(BE, "Ticking back end\n"); - - ROB_count[0]+= numInsts; - - wbCycle = 0; - - if (xcSquash) { - squashFromXC(); - } - - // Read in any done instruction information and update the IQ or LSQ. - updateStructures(); - - if (dispatchStatus != Blocked) { - d2i.advance(); - dispatchInsts(); - } else { - checkDispatchStatus(); - } - - i2e.advance(); - scheduleReadyInsts(); - - e2c.advance(); - executeInsts(); - - numInstsToWB.advance(); - writebackInsts(); - - commitInsts(); - - DPRINTF(BE, "IQ entries in use: %i, ROB entries in use: %i, LSQ loads: %i, LSQ stores: %i\n", - IQ.numInsts, numInsts, LSQ.numLoads(), LSQ.numStores()); - - assert(numInsts == instList.size()); -} - -template -void -BackEnd::updateStructures() -{ - if (fromCommit->doneSeqNum) { - IQ.commit(fromCommit->doneSeqNum); - LSQ.commitLoads(fromCommit->doneSeqNum); - LSQ.commitStores(fromCommit->doneSeqNum); - } - - if (fromCommit->nonSpecSeqNum) { - if (fromCommit->uncached) { - LSQ.executeLoad(fromCommit->lqIdx); - } else { - IQ.scheduleNonSpec( - fromCommit->nonSpecSeqNum); - } - } -} - -template -void -BackEnd::addToIQ(DynInstPtr &inst) -{ - // Do anything IQ specific here? - IQ.insert(inst); -} - -template -void -BackEnd::addToLSQ(DynInstPtr &inst) -{ - // Do anything LSQ specific here? - LSQ.insert(inst); -} - -template -void -BackEnd::dispatchInsts() -{ - DPRINTF(BE, "Trying to dispatch instructions.\n"); - - // Pull instructions out of the front end. - int disp_width = dispatchWidth ? dispatchWidth : width; - - // Could model dispatching time, but in general 1 cycle is probably - // good enough. - - if (dispatchSize < numDispatchEntries) { - for (int i = 0; i < disp_width; i++) { - // Get instructions - DynInstPtr inst = frontEnd->getInst(); - - if (!inst) { - // No more instructions to get - break; - } - - DPRINTF(BE, "Processing instruction [sn:%lli] PC:%#x\n", - inst->seqNum, inst->readPC()); - - for (int i = 0; i < inst->numDestRegs(); ++i) - renameTable[inst->destRegIdx(i)] = inst; - - // Add to queue to be dispatched. - dispatch.push_back(inst); - - d2i[0].size++; - ++dispatchSize; - } - } - - assert(dispatch.size() < 64); - - for (int i = 0; i < instsToDispatch->size; ++i) { - assert(!dispatch.empty()); - // Get instruction from front of time buffer - DynInstPtr inst = dispatch.front(); - dispatch.pop_front(); - --dispatchSize; - - if (inst->isSquashed()) - continue; - - ++numInsts; - instList.push_back(inst); - - DPRINTF(BE, "Dispatching instruction [sn:%lli] PC:%#x\n", - inst->seqNum, inst->readPC()); - - addToIQ(inst); - - if (inst->isMemRef()) { - addToLSQ(inst); - } - - if (inst->isNonSpeculative()) { - inst->setCanCommit(); - } - - // Check if IQ or LSQ is full. If so we'll need to break and stop - // removing instructions. Also update the number of insts to remove - // from the queue. - if (exactFullStall) { - bool stall = false; - if (IQ.isFull()) { - DPRINTF(BE, "IQ is full!\n"); - stall = true; - } else if (LSQ.isFull()) { - DPRINTF(BE, "LSQ is full!\n"); - stall = true; - } else if (isFull()) { - DPRINTF(BE, "ROB is full!\n"); - stall = true; - ROB_fcount++; - } - if (stall) { - instsToDispatch->size-= i+1; - dispatchStall(); - return; - } - } - } - - // Check if IQ or LSQ is full. If so we'll need to break and stop - // removing instructions. Also update the number of insts to remove - // from the queue. Check here if we don't care about exact stall - // conditions. - - bool stall = false; - if (IQ.isFull()) { - DPRINTF(BE, "IQ is full!\n"); - stall = true; - } else if (LSQ.isFull()) { - DPRINTF(BE, "LSQ is full!\n"); - stall = true; - } else if (isFull()) { - DPRINTF(BE, "ROB is full!\n"); - stall = true; - ROB_fcount++; - } - if (stall) { - d2i.advance(); - dispatchStall(); - return; - } -} - -template -void -BackEnd::dispatchStall() -{ - dispatchStatus = Blocked; - if (!cpu->decoupledFrontEnd) { - // Tell front end to stall here through a timebuffer, or just tell - // it directly. - } -} - -template -void -BackEnd::checkDispatchStatus() -{ - DPRINTF(BE, "Checking dispatch status\n"); - assert(dispatchStatus == Blocked); - if (!IQ.isFull() && !LSQ.isFull() && !isFull()) { - DPRINTF(BE, "Dispatch no longer blocked\n"); - dispatchStatus = Running; - dispatchInsts(); - } -} - -template -void -BackEnd::scheduleReadyInsts() -{ - // Tell IQ to put any ready instructions into the instruction list. - // Probably want to have a list of DynInstPtrs returned here. Then I - // can choose to either put them into a time buffer to simulate - // IQ scheduling time, or hand them directly off to the next stage. - // Do you ever want to directly hand it off to the next stage? - DPRINTF(BE, "Trying to schedule ready instructions\n"); - IQ.scheduleReadyInsts(); -} - -template -void -BackEnd::executeInsts() -{ - int insts_to_execute = instsToExecute->size; - - issued_ops[0]+= insts_to_execute; - n_issued_dist[insts_to_execute]++; - - DPRINTF(BE, "Trying to execute %i instructions\n", insts_to_execute); - - fetchRedirect[0] = false; - - while (insts_to_execute > 0) { - // Get ready instruction from the IQ (or queue coming out of IQ) - // Execute the ready instruction. - // Wakeup any dependents if it's done. - DynInstPtr inst = IQ.getReadyInst(); - - DPRINTF(BE, "Executing inst [sn:%lli] PC: %#x\n", - inst->seqNum, inst->readPC()); - - ++funcExeInst; - - // Check if the instruction is squashed; if so then skip it - // and don't count it towards the FU usage. - if (inst->isSquashed()) { - DPRINTF(BE, "Execute: Instruction was squashed.\n"); - - // Not sure how to handle this plus the method of sending # of - // instructions to use. Probably will just have to count it - // towards the bandwidth usage, but not the FU usage. - --insts_to_execute; - - // Consider this instruction executed so that commit can go - // ahead and retire the instruction. - inst->setExecuted(); - - // Not sure if I should set this here or just let commit try to - // commit any squashed instructions. I like the latter a bit more. - inst->setCanCommit(); - -// ++iewExecSquashedInsts; - - continue; - } - - Fault fault = NoFault; - - // Execute instruction. - // Note that if the instruction faults, it will be handled - // at the commit stage. - if (inst->isMemRef() && - (!inst->isDataPrefetch() && !inst->isInstPrefetch())) { - DPRINTF(BE, "Execute: Initiating access for memory " - "reference.\n"); - - // Tell the LDSTQ to execute this instruction (if it is a load). - if (inst->isLoad()) { - // Loads will mark themselves as executed, and their writeback - // event adds the instruction to the queue to commit - fault = LSQ.executeLoad(inst); - -// ++iewExecLoadInsts; - } else if (inst->isStore()) { - LSQ.executeStore(inst); - -// ++iewExecStoreInsts; - - if (!(inst->req->flags & LOCKED)) { - inst->setExecuted(); - - instToCommit(inst); - } - // Store conditionals will mark themselves as executed, and - // their writeback event will add the instruction to the queue - // to commit. - } else { - panic("Unexpected memory type!\n"); - } - - } else { - inst->execute(); - -// ++iewExecutedInsts; - - inst->setExecuted(); - - instToCommit(inst); - } - - updateExeInstStats(inst); - - // Probably should have some sort of function for this. - // More general question of how to handle squashes? Have some sort of - // squash unit that controls it? Probably... - // Check if branch was correct. This check happens after the - // instruction is added to the queue because even if the branch - // is mispredicted, the branch instruction itself is still valid. - // Only handle this if there hasn't already been something that - // redirects fetch in this group of instructions. - - // This probably needs to prioritize the redirects if a different - // scheduler is used. Currently the scheduler schedules the oldest - // instruction first, so the branch resolution order will be correct. - unsigned tid = inst->threadNumber; - - if (!fetchRedirect[tid]) { - - if (inst->mispredicted()) { - fetchRedirect[tid] = true; - - DPRINTF(BE, "Execute: Branch mispredict detected.\n"); - DPRINTF(BE, "Execute: Redirecting fetch to PC: %#x.\n", - inst->nextPC); - - // If incorrect, then signal the ROB that it must be squashed. - squashDueToBranch(inst); - - if (inst->predTaken()) { -// predictedTakenIncorrect++; - } else { -// predictedNotTakenIncorrect++; - } - } else if (LSQ.violation()) { - fetchRedirect[tid] = true; - - // Get the DynInst that caused the violation. Note that this - // clears the violation signal. - DynInstPtr violator; - violator = LSQ.getMemDepViolator(); - - DPRINTF(BE, "LDSTQ detected a violation. Violator PC: " - "%#x, inst PC: %#x. Addr is: %#x.\n", - violator->readPC(), inst->readPC(), inst->physEffAddr); - - // Tell the instruction queue that a violation has occured. -// IQ.violation(inst, violator); - - // Squash. -// squashDueToMemOrder(inst,tid); - squashDueToBranch(inst); - -// ++memOrderViolationEvents; - } else if (LSQ.loadBlocked()) { - fetchRedirect[tid] = true; - - DPRINTF(BE, "Load operation couldn't execute because the " - "memory system is blocked. PC: %#x [sn:%lli]\n", - inst->readPC(), inst->seqNum); - - squashDueToMemBlocked(inst); - } - } - -// instList.pop_front(); - - --insts_to_execute; - - // keep an instruction count - thread->numInst++; - thread->numInsts++; - } - - assert(insts_to_execute >= 0); -} - -template -void -BackEnd::instToCommit(DynInstPtr &inst) -{ - int wb_width = wbWidth; - // First check the time slot that this instruction will write - // to. If there are free write ports at the time, then go ahead - // and write the instruction to that time. If there are not, - // keep looking back to see where's the first time there's a - // free slot. What happens if you run out of free spaces? - // For now naively assume that all instructions take one cycle. - // Otherwise would have to look into the time buffer based on the - // latency of the instruction. - - DPRINTF(BE, "Sending instructions to commit [sn:%lli] PC %#x.\n", - inst->seqNum, inst->readPC()); - - while (numInstsToWB[wbCycle].size >= wb_width) { - ++wbCycle; - - assert(wbCycle < 5); - } - - // Add finished instruction to queue to commit. - writeback.push_back(inst); - numInstsToWB[wbCycle].size++; - - if (wbCycle) - wb_penalized[0]++; -} - -template -void -BackEnd::writebackInsts() -{ - int wb_width = wbWidth; - // Using this method I'm not quite sure how to prevent an - // instruction from waking its own dependents multiple times, - // without the guarantee that commit always has enough bandwidth - // to accept all instructions being written back. This guarantee - // might not be too unrealistic. - InstListIt wb_inst_it = writeback.begin(); - InstListIt wb_end_it = writeback.end(); - int inst_num = 0; - int consumer_insts = 0; - - for (; inst_num < wb_width && - wb_inst_it != wb_end_it; inst_num++) { - DynInstPtr inst = (*wb_inst_it); - - // Some instructions will be sent to commit without having - // executed because they need commit to handle them. - // E.g. Uncached loads have not actually executed when they - // are first sent to commit. Instead commit must tell the LSQ - // when it's ready to execute the uncached load. - if (!inst->isSquashed()) { - DPRINTF(BE, "Writing back instruction [sn:%lli] PC %#x.\n", - inst->seqNum, inst->readPC()); - - inst->setCanCommit(); - inst->setResultReady(); - - if (inst->isExecuted()) { - int dependents = IQ.wakeDependents(inst); - if (dependents) { - producer_inst[0]++; - consumer_insts+= dependents; - } - } - } - - writeback.erase(wb_inst_it++); - } - LSQ.writebackStores(); - consumer_inst[0]+= consumer_insts; - writeback_count[0]+= inst_num; -} - -template -bool -BackEnd::commitInst(int inst_num) -{ - // Read instruction from the head of the ROB - DynInstPtr inst = instList.front(); - - // Make sure instruction is valid - assert(inst); - - if (!inst->readyToCommit()) - return false; - - DPRINTF(BE, "Trying to commit instruction [sn:%lli] PC:%#x\n", - inst->seqNum, inst->readPC()); - - // If the instruction is not executed yet, then it is a non-speculative - // or store inst. Signal backwards that it should be executed. - if (!inst->isExecuted()) { - // Keep this number correct. We have not yet actually executed - // and committed this instruction. -// thread->funcExeInst--; - - if (inst->isNonSpeculative()) { -#if !FULL_SYSTEM - // Hack to make sure syscalls aren't executed until all stores - // write back their data. This direct communication shouldn't - // be used for anything other than this. - if (inst_num > 0 || LSQ.hasStoresToWB()) { - DPRINTF(BE, "Waiting for all stores to writeback.\n"); - return false; - } -#endif - - DPRINTF(BE, "Encountered a store or non-speculative " - "instruction at the head of the ROB, PC %#x.\n", - inst->readPC()); - - // Send back the non-speculative instruction's sequence number. - toIEW->nonSpecSeqNum = inst->seqNum; - - // Change the instruction so it won't try to commit again until - // it is executed. - inst->clearCanCommit(); - -// ++commitNonSpecStalls; - - return false; - } else if (inst->isLoad()) { - DPRINTF(BE, "[sn:%lli]: Uncached load, PC %#x.\n", - inst->seqNum, inst->readPC()); - - // Send back the non-speculative instruction's sequence - // number. Maybe just tell the lsq to re-execute the load. - toIEW->nonSpecSeqNum = inst->seqNum; - toIEW->uncached = true; - toIEW->lqIdx = inst->lqIdx; - - inst->clearCanCommit(); - - return false; - } else { - panic("Trying to commit un-executed instruction " - "of unknown type!\n"); - } - } - - // Now check if it's one of the special trap or barrier or - // serializing instructions. - if (inst->isThreadSync()) - { - // Not handled for now. - panic("Barrier instructions are not handled yet.\n"); - } - - // Check if the instruction caused a fault. If so, trap. - Fault inst_fault = inst->getFault(); - - if (inst_fault != NoFault) { - if (!inst->isNop()) { -#if FULL_SYSTEM - DPRINTF(BE, "Inst [sn:%lli] PC %#x has a fault\n", - inst->seqNum, inst->readPC()); - -// assert(!thread->inSyscall); - -// thread->inSyscall = true; - - // Consider holding onto the trap and waiting until the trap event - // happens for this to be executed. - inst_fault->invoke(thread->getXCProxy()); - - // Exit state update mode to avoid accidental updating. -// thread->inSyscall = false; - -// commitStatus = TrapPending; - - // Generate trap squash event. -// generateTrapEvent(); - - return false; -#else // !FULL_SYSTEM - panic("fault (%d) detected @ PC %08p", inst_fault, - inst->PC); -#endif // FULL_SYSTEM - } - } - - if (inst->isControl()) { -// ++commitCommittedBranches; - } - - int freed_regs = 0; - - for (int i = 0; i < inst->numDestRegs(); ++i) { - DPRINTF(BE, "Commit rename map setting register %i to [sn:%lli]\n", - (int)inst->destRegIdx(i), inst->seqNum); - thread->renameTable[inst->destRegIdx(i)] = inst; - ++freed_regs; - } - - if (inst->traceData) { - inst->traceData->finalize(); - inst->traceData = NULL; - } - - inst->clearDependents(); - - frontEnd->addFreeRegs(freed_regs); - - instList.pop_front(); - - --numInsts; - cpu->numInst++; - thread->numInsts++; - ++thread->funcExeInst; - thread->PC = inst->readNextPC(); - updateComInstStats(inst); - - // Write the done sequence number here. - toIEW->doneSeqNum = inst->seqNum; - -#if FULL_SYSTEM - int count = 0; - Addr oldpc; - do { - if (count == 0) - assert(!thread->inSyscall && !thread->trapPending); - oldpc = thread->readPC(); - cpu->system->pcEventQueue.service( - thread->getXCProxy()); - count++; - } while (oldpc != thread->readPC()); - if (count > 1) { - DPRINTF(BE, "PC skip function event, stopping commit\n"); -// completed_last_inst = false; -// squashPending = true; - return false; - } -#endif - return true; -} - -template -void -BackEnd::commitInsts() -{ - int commit_width = commitWidth ? commitWidth : width; - - // Not sure this should be a loop or not. - int inst_num = 0; - while (!instList.empty() && inst_num < commit_width) { - if (instList.front()->isSquashed()) { - panic("No squashed insts should still be on the list!"); - instList.front()->clearDependents(); - instList.pop_front(); - continue; - } - - if (!commitInst(inst_num++)) { - break; - } - } - n_committed_dist.sample(inst_num); -} - -template -void -BackEnd::squash(const InstSeqNum &sn) -{ - IQ.squash(sn); - LSQ.squash(sn); - - int freed_regs = 0; - InstListIt dispatch_end = dispatch.end(); - InstListIt insts_it = dispatch.end(); - insts_it--; - - while (insts_it != dispatch_end && (*insts_it)->seqNum > sn) - { - if ((*insts_it)->isSquashed()) { - --insts_it; - continue; - } - DPRINTF(BE, "Squashing instruction on dispatch list PC %#x, [sn:%lli].\n", - (*insts_it)->readPC(), - (*insts_it)->seqNum); - - // Mark the instruction as squashed, and ready to commit so that - // it can drain out of the pipeline. - (*insts_it)->setSquashed(); - - (*insts_it)->setCanCommit(); - - // Be careful with IPRs and such here - for (int i = 0; i < (*insts_it)->numDestRegs(); ++i) { - DynInstPtr prev_dest = (*insts_it)->getPrevDestInst(i); - DPRINTF(BE, "Commit rename map setting register %i to [sn:%lli]\n", - (int)(*insts_it)->destRegIdx(i), prev_dest); - renameTable[(*insts_it)->destRegIdx(i)] = prev_dest; - ++freed_regs; - } - - (*insts_it)->clearDependents(); - - --insts_it; - } - - insts_it = instList.end(); - insts_it--; - - while (!instList.empty() && (*insts_it)->seqNum > sn) - { - if ((*insts_it)->isSquashed()) { - --insts_it; - continue; - } - DPRINTF(BE, "Squashing instruction on inst list PC %#x, [sn:%lli].\n", - (*insts_it)->readPC(), - (*insts_it)->seqNum); - - // Mark the instruction as squashed, and ready to commit so that - // it can drain out of the pipeline. - (*insts_it)->setSquashed(); - - (*insts_it)->setCanCommit(); - - for (int i = 0; i < (*insts_it)->numDestRegs(); ++i) { - DynInstPtr prev_dest = (*insts_it)->getPrevDestInst(i); - DPRINTF(BE, "Commit rename map setting register %i to [sn:%lli]\n", - (int)(*insts_it)->destRegIdx(i), prev_dest); - renameTable[(*insts_it)->destRegIdx(i)] = prev_dest; - ++freed_regs; - } - - (*insts_it)->clearDependents(); - - instList.erase(insts_it--); - --numInsts; - } - - frontEnd->addFreeRegs(freed_regs); -} - -template -void -BackEnd::squashFromXC() -{ - xcSquash = true; -} - -template -void -BackEnd::squashDueToBranch(DynInstPtr &inst) -{ - // Update the branch predictor state I guess - squash(inst->seqNum); - frontEnd->squash(inst->seqNum, inst->readNextPC(), - true, inst->mispredicted()); -} - -template -void -BackEnd::squashDueToMemBlocked(DynInstPtr &inst) -{ - DPRINTF(IEW, "Memory blocked, squashing load and younger insts, " - "PC: %#x [sn:%i].\n", inst->readPC(), inst->seqNum); - - squash(inst->seqNum - 1); - frontEnd->squash(inst->seqNum - 1, inst->readPC()); -} - -template -void -BackEnd::fetchFault(Fault &fault) -{ - faultFromFetch = fault; -} - -template -void -BackEnd::updateExeInstStats(DynInstPtr &inst) -{ - int thread_number = inst->threadNumber; - - // - // Pick off the software prefetches - // -#ifdef TARGET_ALPHA - if (inst->isDataPrefetch()) - exe_swp[thread_number]++; - else - exe_inst[thread_number]++; -#else - exe_inst[thread_number]++; -#endif - - // - // Control operations - // - if (inst->isControl()) - exe_branches[thread_number]++; - - // - // Memory operations - // - if (inst->isMemRef()) { - exe_refs[thread_number]++; - - if (inst->isLoad()) - exe_loads[thread_number]++; - } -} - -template -void -BackEnd::updateComInstStats(DynInstPtr &inst) -{ - unsigned thread = inst->threadNumber; - - // - // Pick off the software prefetches - // -#ifdef TARGET_ALPHA - if (inst->isDataPrefetch()) { - stat_com_swp[thread]++; - } else { - stat_com_inst[thread]++; - } -#else - stat_com_inst[thread]++; -#endif - - // - // Control Instructions - // - if (inst->isControl()) - stat_com_branches[thread]++; - - // - // Memory references - // - if (inst->isMemRef()) { - stat_com_refs[thread]++; - - if (inst->isLoad()) { - stat_com_loads[thread]++; - } - } - - if (inst->isMemBarrier()) { - stat_com_membars[thread]++; - } -} - -template -void -BackEnd::dumpInsts() -{ - int num = 0; - int valid_num = 0; - - InstListIt inst_list_it = instList.begin(); - - cprintf("Inst list size: %i\n", instList.size()); - - while (inst_list_it != instList.end()) - { - cprintf("Instruction:%i\n", - num); - if (!(*inst_list_it)->isSquashed()) { - if (!(*inst_list_it)->isIssued()) { - ++valid_num; - cprintf("Count:%i\n", valid_num); - } else if ((*inst_list_it)->isMemRef() && - !(*inst_list_it)->memOpDone) { - // Loads that have not been marked as executed still count - // towards the total instructions. - ++valid_num; - cprintf("Count:%i\n", valid_num); - } - } - - cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n" - "Issued:%i\nSquashed:%i\n", - (*inst_list_it)->readPC(), - (*inst_list_it)->seqNum, - (*inst_list_it)->threadNumber, - (*inst_list_it)->isIssued(), - (*inst_list_it)->isSquashed()); - - if ((*inst_list_it)->isMemRef()) { - cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone); - } - - cprintf("\n"); - - inst_list_it++; - ++num; - } - - cprintf("Dispatch list size: %i\n", dispatch.size()); - - inst_list_it = dispatch.begin(); - - while (inst_list_it != dispatch.end()) - { - cprintf("Instruction:%i\n", - num); - if (!(*inst_list_it)->isSquashed()) { - if (!(*inst_list_it)->isIssued()) { - ++valid_num; - cprintf("Count:%i\n", valid_num); - } else if ((*inst_list_it)->isMemRef() && - !(*inst_list_it)->memOpDone) { - // Loads that have not been marked as executed still count - // towards the total instructions. - ++valid_num; - cprintf("Count:%i\n", valid_num); - } - } - - cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n" - "Issued:%i\nSquashed:%i\n", - (*inst_list_it)->readPC(), - (*inst_list_it)->seqNum, - (*inst_list_it)->threadNumber, - (*inst_list_it)->isIssued(), - (*inst_list_it)->isSquashed()); - - if ((*inst_list_it)->isMemRef()) { - cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone); - } - - cprintf("\n"); - - inst_list_it++; - ++num; - } - - cprintf("Writeback list size: %i\n", writeback.size()); - - inst_list_it = writeback.begin(); - - while (inst_list_it != writeback.end()) - { - cprintf("Instruction:%i\n", - num); - if (!(*inst_list_it)->isSquashed()) { - if (!(*inst_list_it)->isIssued()) { - ++valid_num; - cprintf("Count:%i\n", valid_num); - } else if ((*inst_list_it)->isMemRef() && - !(*inst_list_it)->memOpDone) { - // Loads that have not been marked as executed still count - // towards the total instructions. - ++valid_num; - cprintf("Count:%i\n", valid_num); - } - } - - cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n" - "Issued:%i\nSquashed:%i\n", - (*inst_list_it)->readPC(), - (*inst_list_it)->seqNum, - (*inst_list_it)->threadNumber, - (*inst_list_it)->isIssued(), - (*inst_list_it)->isSquashed()); - - if ((*inst_list_it)->isMemRef()) { - cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone); - } - - cprintf("\n"); - - inst_list_it++; - ++num; - } -} diff --git a/cpu/ozone/cpu_builder.cc b/cpu/ozone/cpu_builder.cc deleted file mode 100644 index 64aa49c71..000000000 --- a/cpu/ozone/cpu_builder.cc +++ /dev/null @@ -1,830 +0,0 @@ - -#include - -#include "cpu/checker/cpu.hh" -#include "cpu/inst_seq.hh" -#include "cpu/ozone/cpu.hh" -#include "cpu/ozone/ozone_impl.hh" -#include "cpu/ozone/simple_impl.hh" -#include "cpu/ozone/simple_params.hh" -#include "mem/cache/base_cache.hh" -#include "sim/builder.hh" -#include "sim/process.hh" -#include "sim/sim_object.hh" - -class DerivOzoneCPU : public OzoneCPU -{ - public: - DerivOzoneCPU(SimpleParams *p) - : OzoneCPU(p) - { } -}; - -class SimpleOzoneCPU : public OzoneCPU -{ - public: - SimpleOzoneCPU(SimpleParams *p) - : OzoneCPU(p) - { } -}; - - -//////////////////////////////////////////////////////////////////////// -// -// OzoneCPU Simulation Object -// - -BEGIN_DECLARE_SIM_OBJECT_PARAMS(DerivOzoneCPU) - - Param clock; - Param numThreads; - -#if FULL_SYSTEM -SimObjectParam system; -Param cpu_id; -SimObjectParam itb; -SimObjectParam dtb; -#else -SimObjectVectorParam workload; -//SimObjectParam page_table; -#endif // FULL_SYSTEM - -SimObjectParam mem; - -SimObjectParam checker; - -Param max_insts_any_thread; -Param max_insts_all_threads; -Param max_loads_any_thread; -Param max_loads_all_threads; - -SimObjectParam icache; -SimObjectParam dcache; - -Param cachePorts; -Param width; -Param frontEndWidth; -Param backEndWidth; -Param backEndSquashLatency; -Param backEndLatency; -Param maxInstBufferSize; -Param numPhysicalRegs; -Param maxOutstandingMemOps; - -Param decodeToFetchDelay; -Param renameToFetchDelay; -Param iewToFetchDelay; -Param commitToFetchDelay; -Param fetchWidth; - -Param renameToDecodeDelay; -Param iewToDecodeDelay; -Param commitToDecodeDelay; -Param fetchToDecodeDelay; -Param decodeWidth; - -Param iewToRenameDelay; -Param commitToRenameDelay; -Param decodeToRenameDelay; -Param renameWidth; - -Param commitToIEWDelay; -Param renameToIEWDelay; -Param issueToExecuteDelay; -Param issueWidth; -Param executeWidth; -Param executeIntWidth; -Param executeFloatWidth; -Param executeBranchWidth; -Param executeMemoryWidth; - -Param iewToCommitDelay; -Param renameToROBDelay; -Param commitWidth; -Param squashWidth; - -Param localPredictorSize; -Param localCtrBits; -Param localHistoryTableSize; -Param localHistoryBits; -Param globalPredictorSize; -Param globalCtrBits; -Param globalHistoryBits; -Param choicePredictorSize; -Param choiceCtrBits; - -Param BTBEntries; -Param BTBTagSize; - -Param RASSize; - -Param LQEntries; -Param SQEntries; -Param LFSTSize; -Param SSITSize; - -Param numPhysIntRegs; -Param numPhysFloatRegs; -Param numIQEntries; -Param numROBEntries; - -Param decoupledFrontEnd; -Param dispatchWidth; -Param wbWidth; - -Param smtNumFetchingThreads; -Param smtFetchPolicy; -Param smtLSQPolicy; -Param smtLSQThreshold; -Param smtIQPolicy; -Param smtIQThreshold; -Param smtROBPolicy; -Param smtROBThreshold; -Param smtCommitPolicy; - -Param instShiftAmt; - -Param defer_registration; - -Param function_trace; -Param function_trace_start; - -END_DECLARE_SIM_OBJECT_PARAMS(DerivOzoneCPU) - -BEGIN_INIT_SIM_OBJECT_PARAMS(DerivOzoneCPU) - - INIT_PARAM(clock, "clock speed"), - INIT_PARAM(numThreads, "number of HW thread contexts"), - -#if FULL_SYSTEM - INIT_PARAM(system, "System object"), - INIT_PARAM(cpu_id, "processor ID"), - INIT_PARAM(itb, "Instruction translation buffer"), - INIT_PARAM(dtb, "Data translation buffer"), -#else - INIT_PARAM(workload, "Processes to run"), -// INIT_PARAM(page_table, "Page table"), -#endif // FULL_SYSTEM - - INIT_PARAM_DFLT(mem, "Memory", NULL), - - INIT_PARAM_DFLT(checker, "Checker CPU", NULL), - - INIT_PARAM_DFLT(max_insts_any_thread, - "Terminate when any thread reaches this inst count", - 0), - INIT_PARAM_DFLT(max_insts_all_threads, - "Terminate when all threads have reached" - "this inst count", - 0), - INIT_PARAM_DFLT(max_loads_any_thread, - "Terminate when any thread reaches this load count", - 0), - INIT_PARAM_DFLT(max_loads_all_threads, - "Terminate when all threads have reached this load" - "count", - 0), - - INIT_PARAM_DFLT(icache, "L1 instruction cache", NULL), - INIT_PARAM_DFLT(dcache, "L1 data cache", NULL), - - INIT_PARAM_DFLT(cachePorts, "Cache Ports", 200), - INIT_PARAM_DFLT(width, "Width", 1), - INIT_PARAM_DFLT(frontEndWidth, "Front end width", 1), - INIT_PARAM_DFLT(backEndWidth, "Back end width", 1), - INIT_PARAM_DFLT(backEndSquashLatency, "Back end squash latency", 1), - INIT_PARAM_DFLT(backEndLatency, "Back end latency", 1), - INIT_PARAM_DFLT(maxInstBufferSize, "Maximum instruction buffer size", 16), - INIT_PARAM(numPhysicalRegs, "Number of physical registers"), - INIT_PARAM_DFLT(maxOutstandingMemOps, "Maximum outstanding memory operations", 4), - - INIT_PARAM(decodeToFetchDelay, "Decode to fetch delay"), - INIT_PARAM(renameToFetchDelay, "Rename to fetch delay"), - INIT_PARAM(iewToFetchDelay, "Issue/Execute/Writeback to fetch" - "delay"), - INIT_PARAM(commitToFetchDelay, "Commit to fetch delay"), - INIT_PARAM(fetchWidth, "Fetch width"), - INIT_PARAM(renameToDecodeDelay, "Rename to decode delay"), - INIT_PARAM(iewToDecodeDelay, "Issue/Execute/Writeback to decode" - "delay"), - INIT_PARAM(commitToDecodeDelay, "Commit to decode delay"), - INIT_PARAM(fetchToDecodeDelay, "Fetch to decode delay"), - INIT_PARAM(decodeWidth, "Decode width"), - - INIT_PARAM(iewToRenameDelay, "Issue/Execute/Writeback to rename" - "delay"), - INIT_PARAM(commitToRenameDelay, "Commit to rename delay"), - INIT_PARAM(decodeToRenameDelay, "Decode to rename delay"), - INIT_PARAM(renameWidth, "Rename width"), - - INIT_PARAM(commitToIEWDelay, "Commit to " - "Issue/Execute/Writeback delay"), - INIT_PARAM(renameToIEWDelay, "Rename to " - "Issue/Execute/Writeback delay"), - INIT_PARAM(issueToExecuteDelay, "Issue to execute delay (internal" - "to the IEW stage)"), - INIT_PARAM(issueWidth, "Issue width"), - INIT_PARAM(executeWidth, "Execute width"), - INIT_PARAM(executeIntWidth, "Integer execute width"), - INIT_PARAM(executeFloatWidth, "Floating point execute width"), - INIT_PARAM(executeBranchWidth, "Branch execute width"), - INIT_PARAM(executeMemoryWidth, "Memory execute width"), - - INIT_PARAM(iewToCommitDelay, "Issue/Execute/Writeback to commit " - "delay"), - INIT_PARAM(renameToROBDelay, "Rename to reorder buffer delay"), - INIT_PARAM(commitWidth, "Commit width"), - INIT_PARAM(squashWidth, "Squash width"), - - INIT_PARAM(localPredictorSize, "Size of local predictor"), - INIT_PARAM(localCtrBits, "Bits per counter"), - INIT_PARAM(localHistoryTableSize, "Size of local history table"), - INIT_PARAM(localHistoryBits, "Bits for the local history"), - INIT_PARAM(globalPredictorSize, "Size of global predictor"), - INIT_PARAM(globalCtrBits, "Bits per counter"), - INIT_PARAM(globalHistoryBits, "Bits of history"), - INIT_PARAM(choicePredictorSize, "Size of choice predictor"), - INIT_PARAM(choiceCtrBits, "Bits of choice counters"), - - INIT_PARAM(BTBEntries, "Number of BTB entries"), - INIT_PARAM(BTBTagSize, "Size of the BTB tags, in bits"), - - INIT_PARAM(RASSize, "RAS size"), - - INIT_PARAM(LQEntries, "Number of load queue entries"), - INIT_PARAM(SQEntries, "Number of store queue entries"), - INIT_PARAM(LFSTSize, "Last fetched store table size"), - INIT_PARAM(SSITSize, "Store set ID table size"), - - INIT_PARAM(numPhysIntRegs, "Number of physical integer registers"), - INIT_PARAM(numPhysFloatRegs, "Number of physical floating point " - "registers"), - INIT_PARAM(numIQEntries, "Number of instruction queue entries"), - INIT_PARAM(numROBEntries, "Number of reorder buffer entries"), - - INIT_PARAM_DFLT(decoupledFrontEnd, "Decoupled front end", true), - INIT_PARAM_DFLT(dispatchWidth, "Dispatch width", 0), - INIT_PARAM_DFLT(wbWidth, "Writeback width", 0), - - INIT_PARAM_DFLT(smtNumFetchingThreads, "SMT Number of Fetching Threads", 1), - INIT_PARAM_DFLT(smtFetchPolicy, "SMT Fetch Policy", "SingleThread"), - INIT_PARAM_DFLT(smtLSQPolicy, "SMT LSQ Sharing Policy", "Partitioned"), - INIT_PARAM_DFLT(smtLSQThreshold,"SMT LSQ Threshold", 100), - INIT_PARAM_DFLT(smtIQPolicy, "SMT IQ Policy", "Partitioned"), - INIT_PARAM_DFLT(smtIQThreshold, "SMT IQ Threshold", 100), - INIT_PARAM_DFLT(smtROBPolicy, "SMT ROB Sharing Policy", "Partitioned"), - INIT_PARAM_DFLT(smtROBThreshold,"SMT ROB Threshold", 100), - INIT_PARAM_DFLT(smtCommitPolicy,"SMT Commit Fetch Policy", "RoundRobin"), - - INIT_PARAM(instShiftAmt, "Number of bits to shift instructions by"), - INIT_PARAM(defer_registration, "defer system registration (for sampling)"), - - INIT_PARAM(function_trace, "Enable function trace"), - INIT_PARAM(function_trace_start, "Cycle to start function trace") - -END_INIT_SIM_OBJECT_PARAMS(DerivOzoneCPU) - -CREATE_SIM_OBJECT(DerivOzoneCPU) -{ - DerivOzoneCPU *cpu; - -#if FULL_SYSTEM - // Full-system only supports a single thread for the moment. - int actual_num_threads = 1; -#else - // In non-full-system mode, we infer the number of threads from - // the workload if it's not explicitly specified. - int actual_num_threads = - numThreads.isValid() ? numThreads : workload.size(); - - if (workload.size() == 0) { - fatal("Must specify at least one workload!"); - } - -#endif - - SimpleParams *params = new SimpleParams; - - params->clock = clock; - - params->name = getInstanceName(); - params->numberOfThreads = actual_num_threads; - -#if FULL_SYSTEM - params->system = system; - params->cpu_id = cpu_id; - params->itb = itb; - params->dtb = dtb; -#else - params->workload = workload; -// params->pTable = page_table; -#endif // FULL_SYSTEM - - params->mem = mem; - params->checker = checker; - params->max_insts_any_thread = max_insts_any_thread; - params->max_insts_all_threads = max_insts_all_threads; - params->max_loads_any_thread = max_loads_any_thread; - params->max_loads_all_threads = max_loads_all_threads; - - // - // Caches - // - params->icacheInterface = icache ? icache->getInterface() : NULL; - params->dcacheInterface = dcache ? dcache->getInterface() : NULL; - params->cachePorts = cachePorts; - - params->width = width; - params->frontEndWidth = frontEndWidth; - params->backEndWidth = backEndWidth; - params->backEndSquashLatency = backEndSquashLatency; - params->backEndLatency = backEndLatency; - params->maxInstBufferSize = maxInstBufferSize; - params->numPhysicalRegs = numPhysIntRegs + numPhysFloatRegs; - params->maxOutstandingMemOps = maxOutstandingMemOps; - - params->decodeToFetchDelay = decodeToFetchDelay; - params->renameToFetchDelay = renameToFetchDelay; - params->iewToFetchDelay = iewToFetchDelay; - params->commitToFetchDelay = commitToFetchDelay; - params->fetchWidth = fetchWidth; - - params->renameToDecodeDelay = renameToDecodeDelay; - params->iewToDecodeDelay = iewToDecodeDelay; - params->commitToDecodeDelay = commitToDecodeDelay; - params->fetchToDecodeDelay = fetchToDecodeDelay; - params->decodeWidth = decodeWidth; - - params->iewToRenameDelay = iewToRenameDelay; - params->commitToRenameDelay = commitToRenameDelay; - params->decodeToRenameDelay = decodeToRenameDelay; - params->renameWidth = renameWidth; - - params->commitToIEWDelay = commitToIEWDelay; - params->renameToIEWDelay = renameToIEWDelay; - params->issueToExecuteDelay = issueToExecuteDelay; - params->issueWidth = issueWidth; - params->executeWidth = executeWidth; - params->executeIntWidth = executeIntWidth; - params->executeFloatWidth = executeFloatWidth; - params->executeBranchWidth = executeBranchWidth; - params->executeMemoryWidth = executeMemoryWidth; - - params->iewToCommitDelay = iewToCommitDelay; - params->renameToROBDelay = renameToROBDelay; - params->commitWidth = commitWidth; - params->squashWidth = squashWidth; - - - params->localPredictorSize = localPredictorSize; - params->localCtrBits = localCtrBits; - params->localHistoryTableSize = localHistoryTableSize; - params->localHistoryBits = localHistoryBits; - params->globalPredictorSize = globalPredictorSize; - params->globalCtrBits = globalCtrBits; - params->globalHistoryBits = globalHistoryBits; - params->choicePredictorSize = choicePredictorSize; - params->choiceCtrBits = choiceCtrBits; - - params->BTBEntries = BTBEntries; - params->BTBTagSize = BTBTagSize; - - params->RASSize = RASSize; - - params->LQEntries = LQEntries; - params->SQEntries = SQEntries; - - params->SSITSize = SSITSize; - params->LFSTSize = LFSTSize; - - params->numPhysIntRegs = numPhysIntRegs; - params->numPhysFloatRegs = numPhysFloatRegs; - params->numIQEntries = numIQEntries; - params->numROBEntries = numROBEntries; - - params->decoupledFrontEnd = decoupledFrontEnd; - params->dispatchWidth = dispatchWidth; - params->wbWidth = wbWidth; - - params->smtNumFetchingThreads = smtNumFetchingThreads; - params->smtFetchPolicy = smtFetchPolicy; - params->smtIQPolicy = smtIQPolicy; - params->smtLSQPolicy = smtLSQPolicy; - params->smtLSQThreshold = smtLSQThreshold; - params->smtROBPolicy = smtROBPolicy; - params->smtROBThreshold = smtROBThreshold; - params->smtCommitPolicy = smtCommitPolicy; - - params->instShiftAmt = 2; - - params->deferRegistration = defer_registration; - - params->functionTrace = function_trace; - params->functionTraceStart = function_trace_start; - - cpu = new DerivOzoneCPU(params); - - return cpu; -} - -REGISTER_SIM_OBJECT("DerivOzoneCPU", DerivOzoneCPU) - - - -//////////////////////////////////////////////////////////////////////// -// -// OzoneCPU Simulation Object -// - -BEGIN_DECLARE_SIM_OBJECT_PARAMS(SimpleOzoneCPU) - - Param clock; - Param numThreads; - -#if FULL_SYSTEM -SimObjectParam system; -Param cpu_id; -SimObjectParam itb; -SimObjectParam dtb; -#else -SimObjectVectorParam workload; -//SimObjectParam page_table; -#endif // FULL_SYSTEM - -SimObjectParam mem; - -SimObjectParam checker; - -Param max_insts_any_thread; -Param max_insts_all_threads; -Param max_loads_any_thread; -Param max_loads_all_threads; - -SimObjectParam icache; -SimObjectParam dcache; - -Param cachePorts; -Param width; -Param frontEndWidth; -Param backEndWidth; -Param backEndSquashLatency; -Param backEndLatency; -Param maxInstBufferSize; -Param numPhysicalRegs; - -Param decodeToFetchDelay; -Param renameToFetchDelay; -Param iewToFetchDelay; -Param commitToFetchDelay; -Param fetchWidth; - -Param renameToDecodeDelay; -Param iewToDecodeDelay; -Param commitToDecodeDelay; -Param fetchToDecodeDelay; -Param decodeWidth; - -Param iewToRenameDelay; -Param commitToRenameDelay; -Param decodeToRenameDelay; -Param renameWidth; - -Param commitToIEWDelay; -Param renameToIEWDelay; -Param issueToExecuteDelay; -Param issueWidth; -Param executeWidth; -Param executeIntWidth; -Param executeFloatWidth; -Param executeBranchWidth; -Param executeMemoryWidth; - -Param iewToCommitDelay; -Param renameToROBDelay; -Param commitWidth; -Param squashWidth; - -Param localPredictorSize; -Param localCtrBits; -Param localHistoryTableSize; -Param localHistoryBits; -Param globalPredictorSize; -Param globalCtrBits; -Param globalHistoryBits; -Param choicePredictorSize; -Param choiceCtrBits; - -Param BTBEntries; -Param BTBTagSize; - -Param RASSize; - -Param LQEntries; -Param SQEntries; -Param LFSTSize; -Param SSITSize; - -Param numPhysIntRegs; -Param numPhysFloatRegs; -Param numIQEntries; -Param numROBEntries; - -Param decoupledFrontEnd; -Param dispatchWidth; -Param wbWidth; - -Param smtNumFetchingThreads; -Param smtFetchPolicy; -Param smtLSQPolicy; -Param smtLSQThreshold; -Param smtIQPolicy; -Param smtIQThreshold; -Param smtROBPolicy; -Param smtROBThreshold; -Param smtCommitPolicy; - -Param instShiftAmt; - -Param defer_registration; - -Param function_trace; -Param function_trace_start; - -END_DECLARE_SIM_OBJECT_PARAMS(SimpleOzoneCPU) - -BEGIN_INIT_SIM_OBJECT_PARAMS(SimpleOzoneCPU) - - INIT_PARAM(clock, "clock speed"), - INIT_PARAM(numThreads, "number of HW thread contexts"), - -#if FULL_SYSTEM - INIT_PARAM(system, "System object"), - INIT_PARAM(cpu_id, "processor ID"), - INIT_PARAM(itb, "Instruction translation buffer"), - INIT_PARAM(dtb, "Data translation buffer"), -#else - INIT_PARAM(workload, "Processes to run"), -// INIT_PARAM(page_table, "Page table"), -#endif // FULL_SYSTEM - - INIT_PARAM_DFLT(mem, "Memory", NULL), - - INIT_PARAM_DFLT(checker, "Checker CPU", NULL), - - INIT_PARAM_DFLT(max_insts_any_thread, - "Terminate when any thread reaches this inst count", - 0), - INIT_PARAM_DFLT(max_insts_all_threads, - "Terminate when all threads have reached" - "this inst count", - 0), - INIT_PARAM_DFLT(max_loads_any_thread, - "Terminate when any thread reaches this load count", - 0), - INIT_PARAM_DFLT(max_loads_all_threads, - "Terminate when all threads have reached this load" - "count", - 0), - - INIT_PARAM_DFLT(icache, "L1 instruction cache", NULL), - INIT_PARAM_DFLT(dcache, "L1 data cache", NULL), - - INIT_PARAM_DFLT(cachePorts, "Cache Ports", 200), - INIT_PARAM_DFLT(width, "Width", 1), - INIT_PARAM_DFLT(frontEndWidth, "Front end width", 1), - INIT_PARAM_DFLT(backEndWidth, "Back end width", 1), - INIT_PARAM_DFLT(backEndSquashLatency, "Back end squash latency", 1), - INIT_PARAM_DFLT(backEndLatency, "Back end latency", 1), - INIT_PARAM_DFLT(maxInstBufferSize, "Maximum instruction buffer size", 16), - INIT_PARAM(numPhysicalRegs, "Number of physical registers"), - - INIT_PARAM(decodeToFetchDelay, "Decode to fetch delay"), - INIT_PARAM(renameToFetchDelay, "Rename to fetch delay"), - INIT_PARAM(iewToFetchDelay, "Issue/Execute/Writeback to fetch" - "delay"), - INIT_PARAM(commitToFetchDelay, "Commit to fetch delay"), - INIT_PARAM(fetchWidth, "Fetch width"), - INIT_PARAM(renameToDecodeDelay, "Rename to decode delay"), - INIT_PARAM(iewToDecodeDelay, "Issue/Execute/Writeback to decode" - "delay"), - INIT_PARAM(commitToDecodeDelay, "Commit to decode delay"), - INIT_PARAM(fetchToDecodeDelay, "Fetch to decode delay"), - INIT_PARAM(decodeWidth, "Decode width"), - - INIT_PARAM(iewToRenameDelay, "Issue/Execute/Writeback to rename" - "delay"), - INIT_PARAM(commitToRenameDelay, "Commit to rename delay"), - INIT_PARAM(decodeToRenameDelay, "Decode to rename delay"), - INIT_PARAM(renameWidth, "Rename width"), - - INIT_PARAM(commitToIEWDelay, "Commit to " - "Issue/Execute/Writeback delay"), - INIT_PARAM(renameToIEWDelay, "Rename to " - "Issue/Execute/Writeback delay"), - INIT_PARAM(issueToExecuteDelay, "Issue to execute delay (internal" - "to the IEW stage)"), - INIT_PARAM(issueWidth, "Issue width"), - INIT_PARAM(executeWidth, "Execute width"), - INIT_PARAM(executeIntWidth, "Integer execute width"), - INIT_PARAM(executeFloatWidth, "Floating point execute width"), - INIT_PARAM(executeBranchWidth, "Branch execute width"), - INIT_PARAM(executeMemoryWidth, "Memory execute width"), - - INIT_PARAM(iewToCommitDelay, "Issue/Execute/Writeback to commit " - "delay"), - INIT_PARAM(renameToROBDelay, "Rename to reorder buffer delay"), - INIT_PARAM(commitWidth, "Commit width"), - INIT_PARAM(squashWidth, "Squash width"), - - INIT_PARAM(localPredictorSize, "Size of local predictor"), - INIT_PARAM(localCtrBits, "Bits per counter"), - INIT_PARAM(localHistoryTableSize, "Size of local history table"), - INIT_PARAM(localHistoryBits, "Bits for the local history"), - INIT_PARAM(globalPredictorSize, "Size of global predictor"), - INIT_PARAM(globalCtrBits, "Bits per counter"), - INIT_PARAM(globalHistoryBits, "Bits of history"), - INIT_PARAM(choicePredictorSize, "Size of choice predictor"), - INIT_PARAM(choiceCtrBits, "Bits of choice counters"), - - INIT_PARAM(BTBEntries, "Number of BTB entries"), - INIT_PARAM(BTBTagSize, "Size of the BTB tags, in bits"), - - INIT_PARAM(RASSize, "RAS size"), - - INIT_PARAM(LQEntries, "Number of load queue entries"), - INIT_PARAM(SQEntries, "Number of store queue entries"), - INIT_PARAM(LFSTSize, "Last fetched store table size"), - INIT_PARAM(SSITSize, "Store set ID table size"), - - INIT_PARAM(numPhysIntRegs, "Number of physical integer registers"), - INIT_PARAM(numPhysFloatRegs, "Number of physical floating point " - "registers"), - INIT_PARAM(numIQEntries, "Number of instruction queue entries"), - INIT_PARAM(numROBEntries, "Number of reorder buffer entries"), - - INIT_PARAM_DFLT(decoupledFrontEnd, "Decoupled front end", true), - INIT_PARAM_DFLT(dispatchWidth, "Dispatch width", 0), - INIT_PARAM_DFLT(wbWidth, "Writeback width", 0), - - INIT_PARAM_DFLT(smtNumFetchingThreads, "SMT Number of Fetching Threads", 1), - INIT_PARAM_DFLT(smtFetchPolicy, "SMT Fetch Policy", "SingleThread"), - INIT_PARAM_DFLT(smtLSQPolicy, "SMT LSQ Sharing Policy", "Partitioned"), - INIT_PARAM_DFLT(smtLSQThreshold,"SMT LSQ Threshold", 100), - INIT_PARAM_DFLT(smtIQPolicy, "SMT IQ Policy", "Partitioned"), - INIT_PARAM_DFLT(smtIQThreshold, "SMT IQ Threshold", 100), - INIT_PARAM_DFLT(smtROBPolicy, "SMT ROB Sharing Policy", "Partitioned"), - INIT_PARAM_DFLT(smtROBThreshold,"SMT ROB Threshold", 100), - INIT_PARAM_DFLT(smtCommitPolicy,"SMT Commit Fetch Policy", "RoundRobin"), - - INIT_PARAM(instShiftAmt, "Number of bits to shift instructions by"), - INIT_PARAM(defer_registration, "defer system registration (for sampling)"), - - INIT_PARAM(function_trace, "Enable function trace"), - INIT_PARAM(function_trace_start, "Cycle to start function trace") - -END_INIT_SIM_OBJECT_PARAMS(SimpleOzoneCPU) - -CREATE_SIM_OBJECT(SimpleOzoneCPU) -{ - SimpleOzoneCPU *cpu; - -#if FULL_SYSTEM - // Full-system only supports a single thread for the moment. - int actual_num_threads = 1; -#else - // In non-full-system mode, we infer the number of threads from - // the workload if it's not explicitly specified. - int actual_num_threads = - numThreads.isValid() ? numThreads : workload.size(); - - if (workload.size() == 0) { - fatal("Must specify at least one workload!"); - } - -#endif - - SimpleParams *params = new SimpleParams; - - params->clock = clock; - - params->name = getInstanceName(); - params->numberOfThreads = actual_num_threads; - -#if FULL_SYSTEM - params->system = system; - params->cpu_id = cpu_id; - params->itb = itb; - params->dtb = dtb; -#else - params->workload = workload; -// params->pTable = page_table; -#endif // FULL_SYSTEM - - params->mem = mem; - params->checker = checker; - params->max_insts_any_thread = max_insts_any_thread; - params->max_insts_all_threads = max_insts_all_threads; - params->max_loads_any_thread = max_loads_any_thread; - params->max_loads_all_threads = max_loads_all_threads; - - // - // Caches - // - params->icacheInterface = icache ? icache->getInterface() : NULL; - params->dcacheInterface = dcache ? dcache->getInterface() : NULL; - params->cachePorts = cachePorts; - - params->width = width; - params->frontEndWidth = frontEndWidth; - params->backEndWidth = backEndWidth; - params->backEndSquashLatency = backEndSquashLatency; - params->backEndLatency = backEndLatency; - params->maxInstBufferSize = maxInstBufferSize; - params->numPhysicalRegs = numPhysIntRegs + numPhysFloatRegs; - - params->decodeToFetchDelay = decodeToFetchDelay; - params->renameToFetchDelay = renameToFetchDelay; - params->iewToFetchDelay = iewToFetchDelay; - params->commitToFetchDelay = commitToFetchDelay; - params->fetchWidth = fetchWidth; - - params->renameToDecodeDelay = renameToDecodeDelay; - params->iewToDecodeDelay = iewToDecodeDelay; - params->commitToDecodeDelay = commitToDecodeDelay; - params->fetchToDecodeDelay = fetchToDecodeDelay; - params->decodeWidth = decodeWidth; - - params->iewToRenameDelay = iewToRenameDelay; - params->commitToRenameDelay = commitToRenameDelay; - params->decodeToRenameDelay = decodeToRenameDelay; - params->renameWidth = renameWidth; - - params->commitToIEWDelay = commitToIEWDelay; - params->renameToIEWDelay = renameToIEWDelay; - params->issueToExecuteDelay = issueToExecuteDelay; - params->issueWidth = issueWidth; - params->executeWidth = executeWidth; - params->executeIntWidth = executeIntWidth; - params->executeFloatWidth = executeFloatWidth; - params->executeBranchWidth = executeBranchWidth; - params->executeMemoryWidth = executeMemoryWidth; - - params->iewToCommitDelay = iewToCommitDelay; - params->renameToROBDelay = renameToROBDelay; - params->commitWidth = commitWidth; - params->squashWidth = squashWidth; - - - params->localPredictorSize = localPredictorSize; - params->localCtrBits = localCtrBits; - params->localHistoryTableSize = localHistoryTableSize; - params->localHistoryBits = localHistoryBits; - params->globalPredictorSize = globalPredictorSize; - params->globalCtrBits = globalCtrBits; - params->globalHistoryBits = globalHistoryBits; - params->choicePredictorSize = choicePredictorSize; - params->choiceCtrBits = choiceCtrBits; - - params->BTBEntries = BTBEntries; - params->BTBTagSize = BTBTagSize; - - params->RASSize = RASSize; - - params->LQEntries = LQEntries; - params->SQEntries = SQEntries; - - params->SSITSize = SSITSize; - params->LFSTSize = LFSTSize; - - params->numPhysIntRegs = numPhysIntRegs; - params->numPhysFloatRegs = numPhysFloatRegs; - params->numIQEntries = numIQEntries; - params->numROBEntries = numROBEntries; - - params->decoupledFrontEnd = decoupledFrontEnd; - params->dispatchWidth = dispatchWidth; - params->wbWidth = wbWidth; - - params->smtNumFetchingThreads = smtNumFetchingThreads; - params->smtFetchPolicy = smtFetchPolicy; - params->smtIQPolicy = smtIQPolicy; - params->smtLSQPolicy = smtLSQPolicy; - params->smtLSQThreshold = smtLSQThreshold; - params->smtROBPolicy = smtROBPolicy; - params->smtROBThreshold = smtROBThreshold; - params->smtCommitPolicy = smtCommitPolicy; - - params->instShiftAmt = 2; - - params->deferRegistration = defer_registration; - - params->functionTrace = function_trace; - params->functionTraceStart = function_trace_start; - - cpu = new SimpleOzoneCPU(params); - - return cpu; -} - -REGISTER_SIM_OBJECT("SimpleOzoneCPU", SimpleOzoneCPU) - diff --git a/cpu/ozone/dyn_inst.cc b/cpu/ozone/dyn_inst.cc deleted file mode 100644 index 3bf8b03ca..000000000 --- a/cpu/ozone/dyn_inst.cc +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright (c) 2005 The Regents of The University of Michigan - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "cpu/ozone/dyn_inst_impl.hh" -#include "cpu/ozone/ozone_impl.hh" -#include "cpu/ozone/simple_impl.hh" - -template class OzoneDynInst; -template class OzoneDynInst; - diff --git a/cpu/ozone/dyn_inst.hh b/cpu/ozone/dyn_inst.hh deleted file mode 100644 index 5d48bb361..000000000 --- a/cpu/ozone/dyn_inst.hh +++ /dev/null @@ -1,231 +0,0 @@ -/* - * Copyright (c) 2005-2006 The Regents of The University of Michigan - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __CPU_OZONE_DYN_INST_HH__ -#define __CPU_OZONE_DYN_INST_HH__ - -#include "arch/isa_traits.hh" -#include "config/full_system.hh" -#include "cpu/base_dyn_inst.hh" -#include "cpu/ozone/cpu.hh" // MUST include this -#include "cpu/inst_seq.hh" -#include "cpu/ozone/simple_impl.hh" // Would be nice to not have to include this -#include "cpu/ozone/ozone_impl.hh" - -#include -#include - -template -class OzoneDynInst : public BaseDynInst -{ - public: - // Typedefs - typedef typename Impl::FullCPU FullCPU; - - typedef typename FullCPU::ImplState ImplState; - - // Typedef for DynInstPtr. This is really just a RefCountingPtr. - typedef typename Impl::DynInstPtr DynInstPtr; - - typedef TheISA::ExtMachInst ExtMachInst; - typedef TheISA::MachInst MachInst; - typedef TheISA::MiscReg MiscReg; - typedef typename std::list::iterator ListIt; - - // Note that this is duplicated from the BaseDynInst class; I'm - // simply not sure the enum would carry through so I could use it - // in array declarations in this class. - enum { - MaxInstSrcRegs = TheISA::MaxInstSrcRegs, - MaxInstDestRegs = TheISA::MaxInstDestRegs - }; - - OzoneDynInst(FullCPU *cpu); - - OzoneDynInst(ExtMachInst inst, Addr PC, Addr Pred_PC, - InstSeqNum seq_num, FullCPU *cpu); - - OzoneDynInst(StaticInstPtr inst); - - ~OzoneDynInst(); - - void setSrcInst(DynInstPtr &newSrcInst, int regIdx) - { srcInsts[regIdx] = newSrcInst; } - - bool srcInstReady(int regIdx); - - void setPrevDestInst(DynInstPtr &oldDestInst, int regIdx) - { prevDestInst[regIdx] = oldDestInst; } - - DynInstPtr &getPrevDestInst(int regIdx) - { return prevDestInst[regIdx]; } - - void addDependent(DynInstPtr &dependent_inst); - - std::vector &getDependents() { return dependents; } - std::vector &getMemDeps() { return memDependents; } - std::list &getMemSrcs() { return srcMemInsts; } - - void wakeDependents(); - - void wakeMemDependents(); - - void addMemDependent(DynInstPtr &inst) { memDependents.push_back(inst); } - - void addSrcMemInst(DynInstPtr &inst) { srcMemInsts.push_back(inst); } - - void markMemInstReady(OzoneDynInst *inst); - - // For now I will remove instructions from the list when they wake - // up. In the future, you only really need a counter. - bool memDepReady() { return srcMemInsts.empty(); } - - private: - void initInstPtrs(); - - std::vector dependents; - - std::vector memDependents; - - std::list srcMemInsts; - - /** The instruction that produces the value of the source - * registers. These may be NULL if the value has already been - * read from the source instruction. - */ - DynInstPtr srcInsts[MaxInstSrcRegs]; - - /** - * Previous rename instruction for this destination. - */ - DynInstPtr prevDestInst[MaxInstSrcRegs]; - - public: - - Fault initiateAcc(); - - Fault completeAcc(); - - // The register accessor methods provide the index of the - // instruction's operand (e.g., 0 or 1), not the architectural - // register index, to simplify the implementation of register - // renaming. We find the architectural register index by indexing - // into the instruction's own operand index table. Note that a - // raw pointer to the StaticInst is provided instead of a - // ref-counted StaticInstPtr to redice overhead. This is fine as - // long as these methods don't copy the pointer into any long-term - // storage (which is pretty hard to imagine they would have reason - // to do). - - uint64_t readIntReg(const StaticInst *si, int idx) - { - return srcInsts[idx]->readIntResult(); - } - - float readFloatRegSingle(const StaticInst *si, int idx) - { - return srcInsts[idx]->readFloatResult(); - } - - double readFloatRegDouble(const StaticInst *si, int idx) - { - return srcInsts[idx]->readDoubleResult(); - } - - uint64_t readFloatRegInt(const StaticInst *si, int idx) - { - return srcInsts[idx]->readIntResult(); - } - - /** @todo: Make results into arrays so they can handle multiple dest - * registers. - */ - void setIntReg(const StaticInst *si, int idx, uint64_t val) - { - BaseDynInst::setIntReg(si, idx, val); - } - - void setFloatRegSingle(const StaticInst *si, int idx, float val) - { - BaseDynInst::setFloatRegSingle(si, idx, val); - } - - void setFloatRegDouble(const StaticInst *si, int idx, double val) - { - BaseDynInst::setFloatRegDouble(si, idx, val); - } - - void setFloatRegInt(const StaticInst *si, int idx, uint64_t val) - { - BaseDynInst::setFloatRegInt(si, idx, val); - } - - void setIntResult(uint64_t result) { this->instResult.integer = result; } - void setDoubleResult(double result) { this->instResult.dbl = result; } - - bool srcsReady(); - bool eaSrcsReady(); - - Fault execute(); - - Fault executeEAComp() - { return NoFault; } - - Fault executeMemAcc() - { return this->staticInst->memAccInst()->execute(this, this->traceData); } - - void clearDependents(); - - void clearMemDependents(); - - public: - // ISA stuff - MiscReg readMiscReg(int misc_reg); - - MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault); - - Fault setMiscReg(int misc_reg, const MiscReg &val); - - Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val); - -#if FULL_SYSTEM - Fault hwrei(); - int readIntrFlag(); - void setIntrFlag(int val); - bool inPalMode(); - void trap(Fault fault); - bool simPalCheck(int palFunc); -#else - void syscall(); -#endif - - ListIt iqIt; - bool iqItValid; -}; - -#endif // __CPU_OZONE_DYN_INST_HH__ diff --git a/cpu/ozone/dyn_inst_impl.hh b/cpu/ozone/dyn_inst_impl.hh deleted file mode 100644 index f891ec515..000000000 --- a/cpu/ozone/dyn_inst_impl.hh +++ /dev/null @@ -1,315 +0,0 @@ -/* - * Copyright (c) 2005-2006 The Regents of The University of Michigan - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "arch/faults.hh" -#include "arch/isa_traits.hh" -#include "config/full_system.hh" -#include "cpu/ozone/dyn_inst.hh" -#include "kern/kernel_stats.hh" - -using namespace TheISA; - -template -OzoneDynInst::OzoneDynInst(FullCPU *cpu) - : BaseDynInst(0, 0, 0, 0, cpu) -{ - this->setResultReady(); - - initInstPtrs(); -} - -template -OzoneDynInst::OzoneDynInst(ExtMachInst inst, Addr PC, Addr Pred_PC, - InstSeqNum seq_num, FullCPU *cpu) - : BaseDynInst(inst, PC, Pred_PC, seq_num, cpu) -{ - initInstPtrs(); -} - -template -OzoneDynInst::OzoneDynInst(StaticInstPtr _staticInst) - : BaseDynInst(_staticInst) -{ - initInstPtrs(); -} - -template -OzoneDynInst::~OzoneDynInst() -{ - DPRINTF(BE, "[sn:%lli] destructor called\n", this->seqNum); - for (int i = 0; i < this->numSrcRegs(); ++i) { - srcInsts[i] = NULL; - } - - for (int i = 0; i < this->numDestRegs(); ++i) { - prevDestInst[i] = NULL; - } - - dependents.clear(); -} - -template -Fault -OzoneDynInst::execute() -{ - // @todo: Pretty convoluted way to avoid squashing from happening when using - // the XC during an instruction's execution (specifically for instructions - // that have sideeffects that use the XC). Fix this. - bool in_syscall = this->thread->inSyscall; - this->thread->inSyscall = true; - - this->fault = this->staticInst->execute(this, this->traceData); - - this->thread->inSyscall = in_syscall; - - return this->fault; -} - -template -Fault -OzoneDynInst::initiateAcc() -{ - // @todo: Pretty convoluted way to avoid squashing from happening when using - // the XC during an instruction's execution (specifically for instructions - // that have sideeffects that use the XC). Fix this. - bool in_syscall = this->thread->inSyscall; - this->thread->inSyscall = true; - - this->fault = this->staticInst->initiateAcc(this, this->traceData); - - this->thread->inSyscall = in_syscall; - - return this->fault; -} - -template -Fault -OzoneDynInst::completeAcc() -{ - if (this->isLoad()) { - this->fault = this->staticInst->completeAcc(this->req->data, - this, - this->traceData); - } else if (this->isStore()) { - this->fault = this->staticInst->completeAcc((uint8_t*)&this->req->result, - this, - this->traceData); - } else { - panic("Unknown type!"); - } - - return this->fault; -} - -template -bool -OzoneDynInst::srcInstReady(int regIdx) -{ - return srcInsts[regIdx]->isResultReady(); -} - -template -void -OzoneDynInst::addDependent(DynInstPtr &dependent_inst) -{ - dependents.push_back(dependent_inst); -} - -template -void -OzoneDynInst::wakeDependents() -{ - for (int i = 0; i < dependents.size(); ++i) { - dependents[i]->markSrcRegReady(); - } -} - -template -void -OzoneDynInst::wakeMemDependents() -{ - for (int i = 0; i < memDependents.size(); ++i) { - memDependents[i]->markMemInstReady(this); - } -} - -template -void -OzoneDynInst::markMemInstReady(OzoneDynInst *inst) -{ - ListIt mem_it = srcMemInsts.begin(); - while ((*mem_it) != inst && mem_it != srcMemInsts.end()) { - mem_it++; - } - assert(mem_it != srcMemInsts.end()); - - srcMemInsts.erase(mem_it); -} - -template -void -OzoneDynInst::initInstPtrs() -{ - for (int i = 0; i < MaxInstSrcRegs; ++i) { - srcInsts[i] = NULL; - } - iqItValid = false; -} - -template -bool -OzoneDynInst::srcsReady() -{ - for (int i = 0; i < this->numSrcRegs(); ++i) { - if (!srcInsts[i]->isResultReady()) - return false; - } - - return true; -} - -template -bool -OzoneDynInst::eaSrcsReady() -{ - for (int i = 1; i < this->numSrcRegs(); ++i) { - if (!srcInsts[i]->isResultReady()) - return false; - } - - return true; -} - -template -void -OzoneDynInst::clearDependents() -{ - dependents.clear(); - for (int i = 0; i < this->numSrcRegs(); ++i) { - srcInsts[i] = NULL; - } - for (int i = 0; i < this->numDestRegs(); ++i) { - prevDestInst[i] = NULL; - } -} - -template -void -OzoneDynInst::clearMemDependents() -{ - memDependents.clear(); -} - -template -MiscReg -OzoneDynInst::readMiscReg(int misc_reg) -{ - return this->thread->readMiscReg(misc_reg); -} - -template -MiscReg -OzoneDynInst::readMiscRegWithEffect(int misc_reg, Fault &fault) -{ - return this->thread->readMiscRegWithEffect(misc_reg, fault); -} - -template -Fault -OzoneDynInst::setMiscReg(int misc_reg, const MiscReg &val) -{ - this->setIntResult(val); - return this->thread->setMiscReg(misc_reg, val); -} - -template -Fault -OzoneDynInst::setMiscRegWithEffect(int misc_reg, const MiscReg &val) -{ - return this->thread->setMiscRegWithEffect(misc_reg, val); -} - -#if FULL_SYSTEM - -template -Fault -OzoneDynInst::hwrei() -{ - if (!this->cpu->inPalMode(this->readPC())) - return new AlphaISA::UnimplementedOpcodeFault; - - this->setNextPC(this->thread->readMiscReg(AlphaISA::IPR_EXC_ADDR)); - - this->cpu->hwrei(); - - // FIXME: XXX check for interrupts? XXX - return NoFault; -} - -template -int -OzoneDynInst::readIntrFlag() -{ -return this->cpu->readIntrFlag(); -} - -template -void -OzoneDynInst::setIntrFlag(int val) -{ - this->cpu->setIntrFlag(val); -} - -template -bool -OzoneDynInst::inPalMode() -{ - return this->cpu->inPalMode(); -} - -template -void -OzoneDynInst::trap(Fault fault) -{ - fault->invoke(this->thread->getXCProxy()); -} - -template -bool -OzoneDynInst::simPalCheck(int palFunc) -{ - return this->cpu->simPalCheck(palFunc); -} -#else -template -void -OzoneDynInst::syscall() -{ - this->cpu->syscall(); -} -#endif diff --git a/cpu/ozone/front_end.cc b/cpu/ozone/front_end.cc deleted file mode 100644 index a974d43cb..000000000 --- a/cpu/ozone/front_end.cc +++ /dev/null @@ -1,7 +0,0 @@ - -#include "cpu/ozone/front_end_impl.hh" -#include "cpu/ozone/ozone_impl.hh" -#include "cpu/ozone/simple_impl.hh" - -template class FrontEnd; -template class FrontEnd; diff --git a/cpu/ozone/front_end.hh b/cpu/ozone/front_end.hh deleted file mode 100644 index dd382491f..000000000 --- a/cpu/ozone/front_end.hh +++ /dev/null @@ -1,284 +0,0 @@ -/* - * Copyright (c) 2006 The Regents of The University of Michigan - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __CPU_OZONE_FRONT_END_HH__ -#define __CPU_OZONE_FRONT_END_HH__ - -#include - -#include "cpu/inst_seq.hh" -#include "cpu/o3/bpred_unit.hh" -#include "cpu/ozone/rename_table.hh" -#include "mem/mem_req.hh" -#include "sim/eventq.hh" -#include "sim/stats.hh" - -class ExecContext; -class MemInterface; -template -class OzoneThreadState; -class PageTable; -template -class TimeBuffer; - -template -class FrontEnd -{ - public: - typedef typename Impl::Params Params; - typedef typename Impl::DynInst DynInst; - typedef typename Impl::DynInstPtr DynInstPtr; - typedef typename Impl::FullCPU FullCPU; - typedef typename Impl::BackEnd BackEnd; - - typedef typename Impl::FullCPU::OzoneXC OzoneXC; - typedef typename Impl::FullCPU::CommStruct CommStruct; - - FrontEnd(Params *params); - - std::string name() const; - - void setCPU(FullCPU *cpu_ptr) - { cpu = cpu_ptr; } - - void setBackEnd(BackEnd *back_end_ptr) - { backEnd = back_end_ptr; } - - void setCommBuffer(TimeBuffer *_comm); - - void setXC(ExecContext *xc_ptr); - - void setThreadState(OzoneThreadState *thread_ptr) - { thread = thread_ptr; } - - void regStats(); - - void tick(); - Fault fetchCacheLine(); - void processInst(DynInstPtr &inst); - void squash(const InstSeqNum &squash_num, const Addr &next_PC, - const bool is_branch = false, const bool branch_taken = false); - DynInstPtr getInst(); - - void processCacheCompletion(MemReqPtr &req); - - void addFreeRegs(int num_freed); - - bool isEmpty() { return instBuffer.empty(); } - - void switchOut(); - - void doSwitchOut(); - - void takeOverFrom(ExecContext *old_xc = NULL); - - bool isSwitchedOut() { return switchedOut; } - - bool switchedOut; - - private: - bool updateStatus(); - - void checkBE(); - DynInstPtr getInstFromCacheline(); - void renameInst(DynInstPtr &inst); - // Returns true if we need to stop the front end this cycle - bool processBarriers(DynInstPtr &inst); - - void handleFault(Fault &fault); - public: - Fault getFault() { return fetchFault; } - private: - Fault fetchFault; - - // Align an address (typically a PC) to the start of an I-cache block. - // We fold in the PISA 64- to 32-bit conversion here as well. - Addr icacheBlockAlignPC(Addr addr) - { - addr = TheISA::realPCToFetchPC(addr); - return (addr & ~(cacheBlkMask)); - } - - InstSeqNum getAndIncrementInstSeq() - { return cpu->globalSeqNum++; } - - public: - FullCPU *cpu; - - BackEnd *backEnd; - - ExecContext *xc; - - OzoneThreadState *thread; - - enum Status { - Running, - Idle, - IcacheMissStall, - IcacheMissComplete, - SerializeBlocked, - SerializeComplete, - RenameBlocked, - QuiescePending, - TrapPending, - BEBlocked - }; - - Status status; - - private: - TimeBuffer *comm; - typename TimeBuffer::wire fromCommit; - - typedef typename Impl::BranchPred BranchPred; - - BranchPred branchPred; - - class ICacheCompletionEvent : public Event - { - private: - MemReqPtr req; - FrontEnd *frontEnd; - - public: - ICacheCompletionEvent(MemReqPtr &_req, FrontEnd *_fe); - - virtual void process(); - virtual const char *description(); - }; - - MemInterface *icacheInterface; - -#if !FULL_SYSTEM - PageTable *pTable; -#endif - - MemReqPtr memReq; - - /** Mask to get a cache block's address. */ - Addr cacheBlkMask; - - unsigned cacheBlkSize; - - Addr cacheBlkPC; - - /** The cache line being fetched. */ - uint8_t *cacheData; - - bool fetchCacheLineNextCycle; - - bool cacheBlkValid; - - public: - RenameTable renameTable; - - private: - Addr PC; - Addr nextPC; - - public: - void setPC(Addr val) { PC = val; } - void setNextPC(Addr val) { nextPC = val; } - - void wakeFromQuiesce(); - - void dumpInsts(); - - private: - typedef typename std::deque InstBuff; - typedef typename InstBuff::iterator InstBuffIt; - - InstBuff instBuffer; - - int instBufferSize; - - int maxInstBufferSize; - - int width; - - int freeRegs; - - int numPhysRegs; - - bool serializeNext; - - DynInstPtr barrierInst; - - public: - bool interruptPending; - private: - // number of idle cycles -/* - Stats::Average<> notIdleFraction; - Stats::Formula idleFraction; -*/ - // @todo: Consider making these vectors and tracking on a per thread basis. - /** Stat for total number of cycles stalled due to an icache miss. */ - Stats::Scalar<> icacheStallCycles; - /** Stat for total number of fetched instructions. */ - Stats::Scalar<> fetchedInsts; - Stats::Scalar<> fetchedBranches; - /** Stat for total number of predicted branches. */ - Stats::Scalar<> predictedBranches; - /** Stat for total number of cycles spent fetching. */ - Stats::Scalar<> fetchCycles; - - Stats::Scalar<> fetchIdleCycles; - /** Stat for total number of cycles spent squashing. */ - Stats::Scalar<> fetchSquashCycles; - /** Stat for total number of cycles spent blocked due to other stages in - * the pipeline. - */ - Stats::Scalar<> fetchBlockedCycles; - /** Stat for total number of fetched cache lines. */ - Stats::Scalar<> fetchedCacheLines; - - Stats::Scalar<> fetchIcacheSquashes; - /** Distribution of number of instructions fetched each cycle. */ - Stats::Distribution<> fetchNisnDist; -// Stats::Vector<> qfull_iq_occupancy; -// Stats::VectorDistribution<> qfull_iq_occ_dist_; - Stats::Formula idleRate; - Stats::Formula branchRate; - Stats::Formula fetchRate; - Stats::Scalar<> IFQCount; // cumulative IFQ occupancy - Stats::Formula IFQOccupancy; - Stats::Formula IFQLatency; - Stats::Scalar<> IFQFcount; // cumulative IFQ full count - Stats::Formula IFQFullRate; - - Stats::Scalar<> dispatchCountStat; - Stats::Scalar<> dispatchedSerializing; - Stats::Scalar<> dispatchedTempSerializing; - Stats::Scalar<> dispatchSerializeStallCycles; - Stats::Formula dispatchRate; - Stats::Formula regIntFull; - Stats::Formula regFpFull; -}; - -#endif // __CPU_OZONE_FRONT_END_HH__ diff --git a/cpu/ozone/front_end_impl.hh b/cpu/ozone/front_end_impl.hh deleted file mode 100644 index ffbcf3340..000000000 --- a/cpu/ozone/front_end_impl.hh +++ /dev/null @@ -1,920 +0,0 @@ -/* - * Copyright (c) 2006 The Regents of The University of Michigan - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "arch/faults.hh" -#include "arch/isa_traits.hh" -#include "base/statistics.hh" -#include "cpu/exec_context.hh" -#include "cpu/exetrace.hh" -#include "cpu/ozone/front_end.hh" -#include "mem/mem_interface.hh" -#include "sim/byte_swap.hh" - -using namespace TheISA; - -template -FrontEnd::FrontEnd(Params *params) - : branchPred(params), - icacheInterface(params->icacheInterface), - instBufferSize(0), - maxInstBufferSize(params->maxInstBufferSize), - width(params->frontEndWidth), - freeRegs(params->numPhysicalRegs), - numPhysRegs(params->numPhysicalRegs), - serializeNext(false), - interruptPending(false) -{ - switchedOut = false; - - status = Idle; - - memReq = NULL; - // Size of cache block. - cacheBlkSize = icacheInterface ? icacheInterface->getBlockSize() : 64; - - assert(isPowerOf2(cacheBlkSize)); - - // Create mask to get rid of offset bits. - cacheBlkMask = (cacheBlkSize - 1); - - // Create space to store a cache line. - cacheData = new uint8_t[cacheBlkSize]; - - fetchCacheLineNextCycle = true; - - cacheBlkValid = false; - -#if !FULL_SYSTEM -// pTable = params->pTable; -#endif - fetchFault = NoFault; -} - -template -std::string -FrontEnd::name() const -{ - return cpu->name() + ".frontend"; -} - -template -void -FrontEnd::setCommBuffer(TimeBuffer *_comm) -{ - comm = _comm; - // @todo: Hardcoded for now. Allow this to be set by a latency. - fromCommit = comm->getWire(-1); -} - -template -void -FrontEnd::setXC(ExecContext *xc_ptr) -{ - xc = xc_ptr; -} - -template -void -FrontEnd::regStats() -{ - icacheStallCycles - .name(name() + ".icacheStallCycles") - .desc("Number of cycles fetch is stalled on an Icache miss") - .prereq(icacheStallCycles); - - fetchedInsts - .name(name() + ".fetchedInsts") - .desc("Number of instructions fetch has processed") - .prereq(fetchedInsts); - - fetchedBranches - .name(name() + ".fetchedBranches") - .desc("Number of fetched branches") - .prereq(fetchedBranches); - - predictedBranches - .name(name() + ".predictedBranches") - .desc("Number of branches that fetch has predicted taken") - .prereq(predictedBranches); - - fetchCycles - .name(name() + ".fetchCycles") - .desc("Number of cycles fetch has run and was not squashing or" - " blocked") - .prereq(fetchCycles); - - fetchIdleCycles - .name(name() + ".fetchIdleCycles") - .desc("Number of cycles fetch was idle") - .prereq(fetchIdleCycles); - - fetchSquashCycles - .name(name() + ".fetchSquashCycles") - .desc("Number of cycles fetch has spent squashing") - .prereq(fetchSquashCycles); - - fetchBlockedCycles - .name(name() + ".fetchBlockedCycles") - .desc("Number of cycles fetch has spent blocked") - .prereq(fetchBlockedCycles); - - fetchedCacheLines - .name(name() + ".fetchedCacheLines") - .desc("Number of cache lines fetched") - .prereq(fetchedCacheLines); - - fetchIcacheSquashes - .name(name() + ".fetchIcacheSquashes") - .desc("Number of outstanding Icache misses that were squashed") - .prereq(fetchIcacheSquashes); - - fetchNisnDist - .init(/* base value */ 0, - /* last value */ width, - /* bucket size */ 1) - .name(name() + ".rateDist") - .desc("Number of instructions fetched each cycle (Total)") - .flags(Stats::pdf); - - idleRate - .name(name() + ".idleRate") - .desc("Percent of cycles fetch was idle") - .prereq(idleRate); - idleRate = fetchIdleCycles * 100 / cpu->numCycles; - - branchRate - .name(name() + ".branchRate") - .desc("Number of branch fetches per cycle") - .flags(Stats::total); - branchRate = fetchedBranches / cpu->numCycles; - - fetchRate - .name(name() + ".rate") - .desc("Number of inst fetches per cycle") - .flags(Stats::total); - fetchRate = fetchedInsts / cpu->numCycles; - - IFQCount - .name(name() + ".IFQ:count") - .desc("cumulative IFQ occupancy") - ; - - IFQFcount - .name(name() + ".IFQ:fullCount") - .desc("cumulative IFQ full count") - .flags(Stats::total) - ; - - IFQOccupancy - .name(name() + ".IFQ:occupancy") - .desc("avg IFQ occupancy (inst's)") - ; - IFQOccupancy = IFQCount / cpu->numCycles; - - IFQLatency - .name(name() + ".IFQ:latency") - .desc("avg IFQ occupant latency (cycle's)") - .flags(Stats::total) - ; - - IFQFullRate - .name(name() + ".IFQ:fullRate") - .desc("fraction of time (cycles) IFQ was full") - .flags(Stats::total); - ; - IFQFullRate = IFQFcount * Stats::constant(100) / cpu->numCycles; - - dispatchCountStat - .name(name() + ".DIS:count") - .desc("cumulative count of dispatched insts") - .flags(Stats::total) - ; - - dispatchedSerializing - .name(name() + ".DIS:serializingInsts") - .desc("count of serializing insts dispatched") - .flags(Stats::total) - ; - - dispatchedTempSerializing - .name(name() + ".DIS:tempSerializingInsts") - .desc("count of temporary serializing insts dispatched") - .flags(Stats::total) - ; - - dispatchSerializeStallCycles - .name(name() + ".DIS:serializeStallCycles") - .desc("count of cycles dispatch stalled for serializing inst") - .flags(Stats::total) - ; - - dispatchRate - .name(name() + ".DIS:rate") - .desc("dispatched insts per cycle") - .flags(Stats::total) - ; - dispatchRate = dispatchCountStat / cpu->numCycles; - - regIntFull - .name(name() + ".REG:int:full") - .desc("number of cycles where there were no INT registers") - ; - - regFpFull - .name(name() + ".REG:fp:full") - .desc("number of cycles where there were no FP registers") - ; - IFQLatency = IFQOccupancy / dispatchRate; - - branchPred.regStats(); -} - -template -void -FrontEnd::tick() -{ - if (switchedOut) - return; - - // @todo: Maybe I want to just have direct communication... - if (fromCommit->doneSeqNum) { - branchPred.update(fromCommit->doneSeqNum, 0); - } - - IFQCount += instBufferSize; - IFQFcount += instBufferSize == maxInstBufferSize; - - // Fetch cache line - if (status == IcacheMissComplete) { - cacheBlkValid = true; - - status = Running; - if (barrierInst) - status = SerializeBlocked; - if (freeRegs <= 0) - status = RenameBlocked; - checkBE(); - } else if (status == IcacheMissStall) { - DPRINTF(FE, "Still in Icache miss stall.\n"); - icacheStallCycles++; - return; - } - - if (status == RenameBlocked || status == SerializeBlocked || - status == TrapPending || status == BEBlocked) { - // Will cause a one cycle bubble between changing state and - // restarting. - DPRINTF(FE, "In blocked status.\n"); - - fetchBlockedCycles++; - - if (status == SerializeBlocked) { - dispatchSerializeStallCycles++; - } - updateStatus(); - return; - } else if (status == QuiescePending) { - DPRINTF(FE, "Waiting for quiesce to execute or get squashed.\n"); - return; - } else if (status != IcacheMissComplete) { - if (fetchCacheLineNextCycle) { - Fault fault = fetchCacheLine(); - if (fault != NoFault) { - handleFault(fault); - fetchFault = fault; - return; - } - fetchCacheLineNextCycle = false; - } - // If miss, stall until it returns. - if (status == IcacheMissStall) { - // Tell CPU to not tick me for now. - return; - } - } - - fetchCycles++; - - int num_inst = 0; - - // Otherwise loop and process instructions. - // One way to hack infinite width is to set width and maxInstBufferSize - // both really high. Inelegant, but probably will work. - while (num_inst < width && - instBufferSize < maxInstBufferSize) { - // Get instruction from cache line. - DynInstPtr inst = getInstFromCacheline(); - - if (!inst) { - // PC is no longer in the cache line, end fetch. - // Might want to check this at the end of the cycle so that - // there's no cycle lost to checking for a new cache line. - DPRINTF(FE, "Need to get new cache line\n"); - fetchCacheLineNextCycle = true; - break; - } - - processInst(inst); - - if (status == SerializeBlocked) { - break; - } - - // Possibly push into a time buffer that estimates the front end - // latency - instBuffer.push_back(inst); - ++instBufferSize; - ++num_inst; - -#if FULL_SYSTEM - if (inst->isQuiesce()) { - warn("%lli: Quiesce instruction encountered, halting fetch!", curTick); - status = QuiescePending; - break; - } -#endif - - if (inst->predTaken()) { - // Start over with tick? - break; - } else if (freeRegs <= 0) { - DPRINTF(FE, "Ran out of free registers to rename to!\n"); - status = RenameBlocked; - break; - } else if (serializeNext) { - break; - } - } - - fetchNisnDist.sample(num_inst); - checkBE(); - - DPRINTF(FE, "Num insts processed: %i, Inst Buffer size: %i, Free " - "Regs %i\n", num_inst, instBufferSize, freeRegs); -} - -template -Fault -FrontEnd::fetchCacheLine() -{ - // Read a cache line, based on the current PC. -#if FULL_SYSTEM - // Flag to say whether or not address is physical addr. - unsigned flags = cpu->inPalMode(PC) ? PHYSICAL : 0; -#else - unsigned flags = 0; -#endif // FULL_SYSTEM - Fault fault = NoFault; - - if (interruptPending && flags == 0) { - return fault; - } - - // Align the fetch PC so it's at the start of a cache block. - Addr fetch_PC = icacheBlockAlignPC(PC); - - DPRINTF(FE, "Fetching cache line starting at %#x.\n", fetch_PC); - - // Setup the memReq to do a read of the first isntruction's address. - // Set the appropriate read size and flags as well. - memReq = new MemReq(); - - memReq->asid = 0; - memReq->thread_num = 0; - memReq->data = new uint8_t[64]; - memReq->xc = xc; - memReq->cmd = Read; - memReq->reset(fetch_PC, cacheBlkSize, flags); - - // Translate the instruction request. - fault = cpu->translateInstReq(memReq); - - // Now do the timing access to see whether or not the instruction - // exists within the cache. - if (icacheInterface && fault == NoFault) { -#if FULL_SYSTEM - if (cpu->system->memctrl->badaddr(memReq->paddr) || - memReq->flags & UNCACHEABLE) { - DPRINTF(FE, "Fetch: Bad address %#x (hopefully on a " - "misspeculating path!", - memReq->paddr); - return TheISA::genMachineCheckFault(); - } -#endif - - memReq->completionEvent = NULL; - - memReq->time = curTick; - fault = cpu->mem->read(memReq, cacheData); - - MemAccessResult res = icacheInterface->access(memReq); - - // If the cache missed then schedule an event to wake - // up this stage once the cache miss completes. - if (icacheInterface->doEvents() && res != MA_HIT) { - memReq->completionEvent = new ICacheCompletionEvent(memReq, this); - - status = IcacheMissStall; - - cacheBlkValid = false; - - DPRINTF(FE, "Cache miss.\n"); - } else { - DPRINTF(FE, "Cache hit.\n"); - - cacheBlkValid = true; - -// memcpy(cacheData, memReq->data, memReq->size); - } - } - - // Note that this will set the cache block PC a bit earlier than it should - // be set. - cacheBlkPC = fetch_PC; - - ++fetchedCacheLines; - - DPRINTF(FE, "Done fetching cache line.\n"); - - return fault; -} - -template -void -FrontEnd::processInst(DynInstPtr &inst) -{ - if (processBarriers(inst)) { - return; - } - - Addr inst_PC = inst->readPC(); - - if (!inst->isControl()) { - inst->setPredTarg(inst->readNextPC()); - } else { - fetchedBranches++; - if (branchPred.predict(inst, inst_PC, inst->threadNumber)) { - predictedBranches++; - } - } - - Addr next_PC = inst->readPredTarg(); - - DPRINTF(FE, "[sn:%lli] Predicted and processed inst PC %#x, next PC " - "%#x\n", inst->seqNum, inst_PC, next_PC); - -// inst->setNextPC(next_PC); - - // Not sure where I should set this - PC = next_PC; - - renameInst(inst); -} - -template -bool -FrontEnd::processBarriers(DynInstPtr &inst) -{ - if (serializeNext) { - inst->setSerializeBefore(); - serializeNext = false; - } else if (!inst->isSerializing() && - !inst->isIprAccess() && - !inst->isStoreConditional()) { - return false; - } - - if ((inst->isIprAccess() || inst->isSerializeBefore()) && - !inst->isSerializeHandled()) { - DPRINTF(FE, "Serialize before instruction encountered.\n"); - - if (!inst->isTempSerializeBefore()) { - dispatchedSerializing++; - inst->setSerializeHandled(); - } else { - dispatchedTempSerializing++; - } - - // Change status over to SerializeBlocked so that other stages know - // what this is blocked on. - status = SerializeBlocked; - - barrierInst = inst; - return true; - } else if ((inst->isStoreConditional() || inst->isSerializeAfter()) - && !inst->isSerializeHandled()) { - DPRINTF(FE, "Serialize after instruction encountered.\n"); - - inst->setSerializeHandled(); - - dispatchedSerializing++; - - serializeNext = true; - return false; - } - return false; -} - -template -void -FrontEnd::handleFault(Fault &fault) -{ - DPRINTF(FE, "Fault at fetch, telling commit\n"); - - // We're blocked on the back end until it handles this fault. - status = TrapPending; - - // Get a sequence number. - InstSeqNum inst_seq = getAndIncrementInstSeq(); - // We will use a nop in order to carry the fault. - ExtMachInst ext_inst = TheISA::NoopMachInst; - - // Create a new DynInst from the dummy nop. - DynInstPtr instruction = new DynInst(ext_inst, PC, - PC+sizeof(MachInst), - inst_seq, cpu); - instruction->setPredTarg(instruction->readNextPC()); -// instruction->setThread(tid); - -// instruction->setASID(tid); - - instruction->setState(thread); - - instruction->traceData = NULL; - - instruction->fault = fault; - instruction->setCanIssue(); - instBuffer.push_back(instruction); - ++instBufferSize; -} - -template -void -FrontEnd::squash(const InstSeqNum &squash_num, const Addr &next_PC, - const bool is_branch, const bool branch_taken) -{ - DPRINTF(FE, "Squashing from [sn:%lli], setting PC to %#x\n", - squash_num, next_PC); - - if (fetchFault != NoFault) - fetchFault = NoFault; - - while (!instBuffer.empty() && - instBuffer.back()->seqNum > squash_num) { - DynInstPtr inst = instBuffer.back(); - - DPRINTF(FE, "Squashing instruction [sn:%lli] PC %#x\n", - inst->seqNum, inst->readPC()); - - inst->clearDependents(); - - instBuffer.pop_back(); - --instBufferSize; - - freeRegs+= inst->numDestRegs(); - } - - // Copy over rename table from the back end. - renameTable.copyFrom(backEnd->renameTable); - - PC = next_PC; - - // Update BP with proper information. - if (is_branch) { - branchPred.squash(squash_num, next_PC, branch_taken, 0); - } else { - branchPred.squash(squash_num, 0); - } - - // Clear the icache miss if it's outstanding. - if (status == IcacheMissStall && icacheInterface) { - DPRINTF(FE, "Squashing outstanding Icache miss.\n"); - memReq = NULL; - } - - if (status == SerializeBlocked) { - assert(barrierInst->seqNum > squash_num); - barrierInst = NULL; - } - - // Unless this squash originated from the front end, we're probably - // in running mode now. - // Actually might want to make this latency dependent. - status = Running; - fetchCacheLineNextCycle = true; -} - -template -typename Impl::DynInstPtr -FrontEnd::getInst() -{ - if (instBufferSize == 0) { - return NULL; - } - - DynInstPtr inst = instBuffer.front(); - - instBuffer.pop_front(); - - --instBufferSize; - - dispatchCountStat++; - - return inst; -} - -template -void -FrontEnd::processCacheCompletion(MemReqPtr &req) -{ - DPRINTF(FE, "Processing cache completion\n"); - - // Do something here. - if (status != IcacheMissStall || - req != memReq || - switchedOut) { - DPRINTF(FE, "Previous fetch was squashed.\n"); - fetchIcacheSquashes++; - return; - } - - status = IcacheMissComplete; - -/* if (checkStall(tid)) { - fetchStatus[tid] = Blocked; - } else { - fetchStatus[tid] = IcacheMissComplete; - } -*/ -// memcpy(cacheData, memReq->data, memReq->size); - - // Reset the completion event to NULL. -// memReq->completionEvent = NULL; - memReq = NULL; -} - -template -void -FrontEnd::addFreeRegs(int num_freed) -{ - if (status == RenameBlocked && freeRegs + num_freed > 0) { - status = Running; - } - - DPRINTF(FE, "Adding %i freed registers\n", num_freed); - - freeRegs+= num_freed; - -// assert(freeRegs <= numPhysRegs); - if (freeRegs > numPhysRegs) - freeRegs = numPhysRegs; -} - -template -bool -FrontEnd::updateStatus() -{ - bool serialize_block = !backEnd->robEmpty() || instBufferSize; - bool be_block = cpu->decoupledFrontEnd ? false : backEnd->isBlocked(); - bool ret_val = false; - - if (status == SerializeBlocked && !serialize_block) { - status = SerializeComplete; - ret_val = true; - } - - if (status == BEBlocked && !be_block) { - if (barrierInst) { - status = SerializeBlocked; - } else { - status = Running; - } - ret_val = true; - } - return ret_val; -} - -template -void -FrontEnd::checkBE() -{ - bool be_block = cpu->decoupledFrontEnd ? false : backEnd->isBlocked(); - if (be_block) { - if (status == Running || status == Idle) { - status = BEBlocked; - } - } -} - -template -typename Impl::DynInstPtr -FrontEnd::getInstFromCacheline() -{ - if (status == SerializeComplete) { - DynInstPtr inst = barrierInst; - status = Running; - barrierInst = NULL; - inst->clearSerializeBefore(); - return inst; - } - - InstSeqNum inst_seq; - MachInst inst; - // @todo: Fix this magic number used here to handle word offset (and - // getting rid of PAL bit) - unsigned offset = (PC & cacheBlkMask) & ~3; - - // PC of inst is not in this cache block - if (PC >= (cacheBlkPC + cacheBlkSize) || PC < cacheBlkPC || !cacheBlkValid) { - return NULL; - } - - ////////////////////////// - // Fetch one instruction - ////////////////////////// - - // Get a sequence number. - inst_seq = getAndIncrementInstSeq(); - - // Make sure this is a valid index. - assert(offset <= cacheBlkSize - sizeof(MachInst)); - - // Get the instruction from the array of the cache line. - inst = htog(*reinterpret_cast(&cacheData[offset])); - - ExtMachInst decode_inst = TheISA::makeExtMI(inst, PC); - - // Create a new DynInst from the instruction fetched. - DynInstPtr instruction = new DynInst(decode_inst, PC, PC+sizeof(MachInst), - inst_seq, cpu); - - instruction->setState(thread); - - DPRINTF(FE, "Instruction [sn:%lli] created, with PC %#x\n%s\n", - inst_seq, instruction->readPC(), - instruction->staticInst->disassemble(PC)); - - instruction->traceData = - Trace::getInstRecord(curTick, xc, cpu, - instruction->staticInst, - instruction->readPC(), 0); - - // Increment stat of fetched instructions. - ++fetchedInsts; - - return instruction; -} - -template -void -FrontEnd::renameInst(DynInstPtr &inst) -{ - DynInstPtr src_inst = NULL; - int num_src_regs = inst->numSrcRegs(); - if (num_src_regs == 0) { - inst->setCanIssue(); - } else { - for (int i = 0; i < num_src_regs; ++i) { - src_inst = renameTable[inst->srcRegIdx(i)]; - - inst->setSrcInst(src_inst, i); - - DPRINTF(FE, "[sn:%lli]: Src reg %i is inst [sn:%lli]\n", - inst->seqNum, (int)inst->srcRegIdx(i), src_inst->seqNum); - - if (src_inst->isResultReady()) { - DPRINTF(FE, "Reg ready.\n"); - inst->markSrcRegReady(i); - } else { - DPRINTF(FE, "Adding to dependent list.\n"); - src_inst->addDependent(inst); - } - } - } - - for (int i = 0; i < inst->numDestRegs(); ++i) { - RegIndex idx = inst->destRegIdx(i); - - DPRINTF(FE, "Dest reg %i is now inst [sn:%lli], was previously " - "[sn:%lli]\n", - (int)inst->destRegIdx(i), inst->seqNum, - renameTable[idx]->seqNum); - - inst->setPrevDestInst(renameTable[idx], i); - - renameTable[idx] = inst; - --freeRegs; - } -} - -template -void -FrontEnd::wakeFromQuiesce() -{ - DPRINTF(FE, "Waking up from quiesce\n"); - // Hopefully this is safe - status = Running; -} - -template -void -FrontEnd::switchOut() -{ - switchedOut = true; - cpu->signalSwitched(); -} - -template -void -FrontEnd::doSwitchOut() -{ - memReq = NULL; - squash(0, 0); - instBuffer.clear(); - instBufferSize = 0; - status = Idle; -} - -template -void -FrontEnd::takeOverFrom(ExecContext *old_xc) -{ - assert(freeRegs == numPhysRegs); - fetchCacheLineNextCycle = true; - - cacheBlkValid = false; - -#if !FULL_SYSTEM -// pTable = params->pTable; -#endif - fetchFault = NoFault; - serializeNext = false; - barrierInst = NULL; - status = Running; - switchedOut = false; - interruptPending = false; -} - -template -void -FrontEnd::dumpInsts() -{ - cprintf("instBuffer size: %i\n", instBuffer.size()); - - InstBuffIt buff_it = instBuffer.begin(); - - for (int num = 0; buff_it != instBuffer.end(); num++) { - cprintf("Instruction:%i\nPC:%#x\n[tid:%i]\n[sn:%lli]\nIssued:%i\n" - "Squashed:%i\n\n", - num, (*buff_it)->readPC(), (*buff_it)->threadNumber, - (*buff_it)->seqNum, (*buff_it)->isIssued(), - (*buff_it)->isSquashed()); - buff_it++; - } -} - -template -FrontEnd::ICacheCompletionEvent::ICacheCompletionEvent(MemReqPtr &_req, FrontEnd *fe) - : Event(&mainEventQueue, Delayed_Writeback_Pri), req(_req), frontEnd(fe) -{ - this->setFlags(Event::AutoDelete); -} - -template -void -FrontEnd::ICacheCompletionEvent::process() -{ - frontEnd->processCacheCompletion(req); -} - -template -const char * -FrontEnd::ICacheCompletionEvent::description() -{ - return "ICache completion event"; -} diff --git a/cpu/ozone/inorder_back_end.cc b/cpu/ozone/inorder_back_end.cc deleted file mode 100644 index 14db610d2..000000000 --- a/cpu/ozone/inorder_back_end.cc +++ /dev/null @@ -1,5 +0,0 @@ - -#include "cpu/ozone/inorder_back_end_impl.hh" -#include "cpu/ozone/simple_impl.hh" - -template class InorderBackEnd; diff --git a/cpu/ozone/inorder_back_end.hh b/cpu/ozone/inorder_back_end.hh deleted file mode 100644 index 4039d8384..000000000 --- a/cpu/ozone/inorder_back_end.hh +++ /dev/null @@ -1,450 +0,0 @@ - -#ifndef __CPU_OZONE_INORDER_BACK_END_HH__ -#define __CPU_OZONE_INORDER_BACK_END_HH__ - -#include - -#include "arch/faults.hh" -#include "base/timebuf.hh" -#include "cpu/exec_context.hh" -#include "cpu/inst_seq.hh" -#include "cpu/ozone/rename_table.hh" -#include "cpu/ozone/thread_state.hh" -#include "mem/mem_interface.hh" -#include "mem/mem_req.hh" -#include "sim/eventq.hh" - -template -class InorderBackEnd -{ - public: - typedef typename Impl::Params Params; - typedef typename Impl::DynInstPtr DynInstPtr; - typedef typename Impl::FullCPU FullCPU; - typedef typename Impl::FrontEnd FrontEnd; - - typedef typename FullCPU::OzoneXC OzoneXC; - typedef typename Impl::FullCPU::CommStruct CommStruct; - - InorderBackEnd(Params *params); - - std::string name() const; - - void setCPU(FullCPU *cpu_ptr) - { cpu = cpu_ptr; } - - void setFrontEnd(FrontEnd *front_end_ptr) - { frontEnd = front_end_ptr; } - - void setCommBuffer(TimeBuffer *_comm) - { comm = _comm; } - - void setXC(ExecContext *xc_ptr); - - void setThreadState(OzoneThreadState *thread_ptr); - - void regStats() { } - -#if FULL_SYSTEM - void checkInterrupts(); -#endif - - void tick(); - void executeInsts(); - void squash(const InstSeqNum &squash_num, const Addr &next_PC); - - void squashFromXC(); - void generateXCEvent() { } - - bool robEmpty() { return instList.empty(); } - - bool isFull() { return false; } - bool isBlocked() { return status == DcacheMissStoreStall || - status == DcacheMissLoadStall || - interruptBlocked; } - - void fetchFault(Fault &fault); - - void dumpInsts(); - - private: - void handleFault(); - - void setSquashInfoFromXC(); - - bool squashPending; - InstSeqNum squashSeqNum; - Addr squashNextPC; - - Fault faultFromFetch; - - bool interruptBlocked; - - public: - template - Fault read(Addr addr, T &data, unsigned flags); - - template - Fault read(MemReqPtr &req, T &data, int load_idx); - - template - Fault write(T data, Addr addr, unsigned flags, uint64_t *res); - - template - Fault write(MemReqPtr &req, T &data, int store_idx); - - Addr readCommitPC() { return commitPC; } - - Addr commitPC; - - void switchOut() { panic("Not implemented!"); } - void doSwitchOut() { panic("Not implemented!"); } - void takeOverFrom(ExecContext *old_xc = NULL) { panic("Not implemented!"); } - - public: - FullCPU *cpu; - - FrontEnd *frontEnd; - - ExecContext *xc; - - OzoneThreadState *thread; - - RenameTable renameTable; - - protected: - enum Status { - Running, - Idle, - DcacheMissLoadStall, - DcacheMissStoreStall, - DcacheMissComplete, - Blocked - }; - - Status status; - - class DCacheCompletionEvent : public Event - { - private: - InorderBackEnd *be; - - public: - DCacheCompletionEvent(InorderBackEnd *_be); - - virtual void process(); - virtual const char *description(); - - DynInstPtr inst; - }; - - friend class DCacheCompletionEvent; - - DCacheCompletionEvent cacheCompletionEvent; - - MemInterface *dcacheInterface; - - MemReqPtr memReq; - - private: - typedef typename std::list::iterator InstListIt; - - std::list instList; - - // General back end width. Used if the more specific isn't given. - int width; - - int latency; - - int squashLatency; - - TimeBuffer numInstsToWB; - TimeBuffer::wire instsAdded; - TimeBuffer::wire instsToExecute; - - TimeBuffer *comm; - // number of cycles stalled for D-cache misses - Stats::Scalar<> dcacheStallCycles; - Counter lastDcacheStall; -}; - -template -template -Fault -InorderBackEnd::read(Addr addr, T &data, unsigned flags) -{ - memReq->reset(addr, sizeof(T), flags); - - // translate to physical address - Fault fault = cpu->translateDataReadReq(memReq); - - // if we have a cache, do cache access too - if (fault == NoFault && dcacheInterface) { - memReq->cmd = Read; - memReq->completionEvent = NULL; - memReq->time = curTick; - memReq->flags &= ~INST_READ; - MemAccessResult result = dcacheInterface->access(memReq); - - // Ugly hack to get an event scheduled *only* if the access is - // a miss. We really should add first-class support for this - // at some point. - if (result != MA_HIT) { - // Fix this hack for keeping funcExeInst correct with loads that - // are executed twice. - memReq->completionEvent = &cacheCompletionEvent; - lastDcacheStall = curTick; -// unscheduleTickEvent(); - status = DcacheMissLoadStall; - DPRINTF(IBE, "Dcache miss stall!\n"); - } else { - // do functional access - DPRINTF(IBE, "Dcache hit!\n"); - } - } -/* - if (!dcacheInterface && (memReq->flags & UNCACHEABLE)) - recordEvent("Uncached Read"); -*/ - return fault; -} -#if 0 -template -template -Fault -InorderBackEnd::read(MemReqPtr &req, T &data) -{ -#if FULL_SYSTEM && defined(TARGET_ALPHA) - if (req->flags & LOCKED) { - req->xc->setMiscReg(TheISA::Lock_Addr_DepTag, req->paddr); - req->xc->setMiscReg(TheISA::Lock_Flag_DepTag, true); - } -#endif - - Fault error; - error = thread->mem->read(req, data); - data = LittleEndianGuest::gtoh(data); - return error; -} -#endif - -template -template -Fault -InorderBackEnd::write(T data, Addr addr, unsigned flags, uint64_t *res) -{ - memReq->reset(addr, sizeof(T), flags); - - // translate to physical address - Fault fault = cpu->translateDataWriteReq(memReq); - - if (fault == NoFault && dcacheInterface) { - memReq->cmd = Write; -// memcpy(memReq->data,(uint8_t *)&data,memReq->size); - memReq->completionEvent = NULL; - memReq->time = curTick; - memReq->flags &= ~INST_READ; - MemAccessResult result = dcacheInterface->access(memReq); - - // Ugly hack to get an event scheduled *only* if the access is - // a miss. We really should add first-class support for this - // at some point. - if (result != MA_HIT) { - memReq->completionEvent = &cacheCompletionEvent; - lastDcacheStall = curTick; -// unscheduleTickEvent(); - status = DcacheMissStoreStall; - DPRINTF(IBE, "Dcache miss stall!\n"); - } else { - DPRINTF(IBE, "Dcache hit!\n"); - } - } - - if (res && (fault == NoFault)) - *res = memReq->result; -/* - if (!dcacheInterface && (memReq->flags & UNCACHEABLE)) - recordEvent("Uncached Write"); -*/ - return fault; -} -#if 0 -template -template -Fault -InorderBackEnd::write(MemReqPtr &req, T &data) -{ -#if FULL_SYSTEM && defined(TARGET_ALPHA) - ExecContext *xc; - - // If this is a store conditional, act appropriately - if (req->flags & LOCKED) { - xc = req->xc; - - if (req->flags & UNCACHEABLE) { - // Don't update result register (see stq_c in isa_desc) - req->result = 2; - xc->setStCondFailures(0);//Needed? [RGD] - } else { - bool lock_flag = xc->readMiscReg(TheISA::Lock_Flag_DepTag); - Addr lock_addr = xc->readMiscReg(TheISA::Lock_Addr_DepTag); - req->result = lock_flag; - if (!lock_flag || - ((lock_addr & ~0xf) != (req->paddr & ~0xf))) { - xc->setMiscReg(TheISA::Lock_Flag_DepTag, false); - xc->setStCondFailures(xc->readStCondFailures() + 1); - if (((xc->readStCondFailures()) % 100000) == 0) { - std::cerr << "Warning: " - << xc->readStCondFailures() - << " consecutive store conditional failures " - << "on cpu " << req->xc->readCpuId() - << std::endl; - } - return NoFault; - } - else xc->setStCondFailures(0); - } - } - - // Need to clear any locked flags on other proccessors for - // this address. Only do this for succsful Store Conditionals - // and all other stores (WH64?). Unsuccessful Store - // Conditionals would have returned above, and wouldn't fall - // through. - for (int i = 0; i < cpu->system->execContexts.size(); i++){ - xc = cpu->system->execContexts[i]; - if ((xc->readMiscReg(TheISA::Lock_Addr_DepTag) & ~0xf) == - (req->paddr & ~0xf)) { - xc->setMiscReg(TheISA::Lock_Flag_DepTag, false); - } - } - -#endif - return thread->mem->write(req, (T)LittleEndianGuest::htog(data)); -} -#endif - -template -template -Fault -InorderBackEnd::read(MemReqPtr &req, T &data, int load_idx) -{ -// panic("Unimplemented!"); -// memReq->reset(addr, sizeof(T), flags); - - // translate to physical address -// Fault fault = cpu->translateDataReadReq(req); - req->cmd = Read; - req->completionEvent = NULL; - req->time = curTick; - assert(!req->data); - req->data = new uint8_t[64]; - req->flags &= ~INST_READ; - Fault fault = cpu->read(req, data); - memcpy(req->data, &data, sizeof(T)); - - // if we have a cache, do cache access too - if (dcacheInterface) { - MemAccessResult result = dcacheInterface->access(req); - - // Ugly hack to get an event scheduled *only* if the access is - // a miss. We really should add first-class support for this - // at some point. - if (result != MA_HIT) { - req->completionEvent = &cacheCompletionEvent; - lastDcacheStall = curTick; -// unscheduleTickEvent(); - status = DcacheMissLoadStall; - DPRINTF(IBE, "Dcache miss load stall!\n"); - } else { - DPRINTF(IBE, "Dcache hit!\n"); - - } - } - -/* - if (!dcacheInterface && (req->flags & UNCACHEABLE)) - recordEvent("Uncached Read"); -*/ - return NoFault; -} - -template -template -Fault -InorderBackEnd::write(MemReqPtr &req, T &data, int store_idx) -{ -// req->reset(addr, sizeof(T), flags); - - // translate to physical address -// Fault fault = cpu->translateDataWriteReq(req); - - req->cmd = Write; - req->completionEvent = NULL; - req->time = curTick; - assert(!req->data); - req->data = new uint8_t[64]; - memcpy(req->data, (uint8_t *)&data, req->size); - - switch(req->size) { - case 1: - cpu->write(req, (uint8_t &)data); - break; - case 2: - cpu->write(req, (uint16_t &)data); - break; - case 4: - cpu->write(req, (uint32_t &)data); - break; - case 8: - cpu->write(req, (uint64_t &)data); - break; - default: - panic("Unexpected store size!\n"); - } - - if (dcacheInterface) { - req->cmd = Write; - req->data = new uint8_t[64]; - memcpy(req->data,(uint8_t *)&data,req->size); - req->completionEvent = NULL; - req->time = curTick; - req->flags &= ~INST_READ; - MemAccessResult result = dcacheInterface->access(req); - - // Ugly hack to get an event scheduled *only* if the access is - // a miss. We really should add first-class support for this - // at some point. - if (result != MA_HIT) { - req->completionEvent = &cacheCompletionEvent; - lastDcacheStall = curTick; -// unscheduleTickEvent(); - status = DcacheMissStoreStall; - DPRINTF(IBE, "Dcache miss store stall!\n"); - } else { - DPRINTF(IBE, "Dcache hit!\n"); - - } - } -/* - if (req->flags & LOCKED) { - if (req->flags & UNCACHEABLE) { - // Don't update result register (see stq_c in isa_desc) - req->result = 2; - } else { - req->result = 1; - } - } -*/ -/* - if (res && (fault == NoFault)) - *res = req->result; - */ -/* - if (!dcacheInterface && (req->flags & UNCACHEABLE)) - recordEvent("Uncached Write"); -*/ - return NoFault; -} - -#endif // __CPU_OZONE_INORDER_BACK_END_HH__ diff --git a/cpu/ozone/inorder_back_end_impl.hh b/cpu/ozone/inorder_back_end_impl.hh deleted file mode 100644 index 5a378ec76..000000000 --- a/cpu/ozone/inorder_back_end_impl.hh +++ /dev/null @@ -1,519 +0,0 @@ - -#include "arch/faults.hh" -#include "arch/isa_traits.hh" -#include "cpu/ozone/inorder_back_end.hh" -#include "cpu/ozone/thread_state.hh" - -using namespace TheISA; - -template -InorderBackEnd::InorderBackEnd(Params *params) - : squashPending(false), - squashSeqNum(0), - squashNextPC(0), - faultFromFetch(NoFault), - interruptBlocked(false), - cacheCompletionEvent(this), - dcacheInterface(params->dcacheInterface), - width(params->backEndWidth), - latency(params->backEndLatency), - squashLatency(params->backEndSquashLatency), - numInstsToWB(0, latency + 1) -{ - instsAdded = numInstsToWB.getWire(latency); - instsToExecute = numInstsToWB.getWire(0); - - memReq = new MemReq; - memReq->data = new uint8_t[64]; - status = Running; -} - -template -std::string -InorderBackEnd::name() const -{ - return cpu->name() + ".inorderbackend"; -} - -template -void -InorderBackEnd::setXC(ExecContext *xc_ptr) -{ - xc = xc_ptr; - memReq->xc = xc; -} - -template -void -InorderBackEnd::setThreadState(OzoneThreadState *thread_ptr) -{ - thread = thread_ptr; - thread->setFuncExeInst(0); -} - -#if FULL_SYSTEM -template -void -InorderBackEnd::checkInterrupts() -{ - //Check if there are any outstanding interrupts - //Handle the interrupts - int ipl = 0; - int summary = 0; - - cpu->checkInterrupts = false; - - if (thread->readMiscReg(IPR_ASTRR)) - panic("asynchronous traps not implemented\n"); - - if (thread->readMiscReg(IPR_SIRR)) { - for (int i = INTLEVEL_SOFTWARE_MIN; - i < INTLEVEL_SOFTWARE_MAX; i++) { - if (thread->readMiscReg(IPR_SIRR) & (ULL(1) << i)) { - // See table 4-19 of the 21164 hardware reference - ipl = (i - INTLEVEL_SOFTWARE_MIN) + 1; - summary |= (ULL(1) << i); - } - } - } - - uint64_t interrupts = cpu->intr_status(); - - if (interrupts) { - for (int i = INTLEVEL_EXTERNAL_MIN; - i < INTLEVEL_EXTERNAL_MAX; i++) { - if (interrupts & (ULL(1) << i)) { - // See table 4-19 of the 21164 hardware reference - ipl = i; - summary |= (ULL(1) << i); - } - } - } - - if (ipl && ipl > thread->readMiscReg(IPR_IPLR)) { - thread->inSyscall = true; - - thread->setMiscReg(IPR_ISR, summary); - thread->setMiscReg(IPR_INTID, ipl); - Fault(new InterruptFault)->invoke(xc); - DPRINTF(Flow, "Interrupt! IPLR=%d ipl=%d summary=%x\n", - thread->readMiscReg(IPR_IPLR), ipl, summary); - - // May need to go 1 inst prior - squashPending = true; - - thread->inSyscall = false; - - setSquashInfoFromXC(); - } -} -#endif - -template -void -InorderBackEnd::tick() -{ - // Squash due to an external source - // Not sure if this or an interrupt has higher priority - if (squashPending) { - squash(squashSeqNum, squashNextPC); - return; - } - - // if (interrupt) then set thread PC, stall front end, record that - // I'm waiting for it to drain. (for now just squash) -#if FULL_SYSTEM - if (interruptBlocked || - (cpu->checkInterrupts && - cpu->check_interrupts() && - !cpu->inPalMode())) { - if (!robEmpty()) { - interruptBlocked = true; - } else if (robEmpty() && cpu->inPalMode()) { - // Will need to let the front end continue a bit until - // we're out of pal mode. Hopefully we never get into an - // infinite loop... - interruptBlocked = false; - } else { - interruptBlocked = false; - checkInterrupts(); - return; - } - } -#endif - - if (status != DcacheMissLoadStall && - status != DcacheMissStoreStall) { - for (int i = 0; i < width && (*instsAdded) < width; ++i) { - DynInstPtr inst = frontEnd->getInst(); - - if (!inst) - break; - - instList.push_back(inst); - - (*instsAdded)++; - } - -#if FULL_SYSTEM - if (faultFromFetch && robEmpty() && frontEnd->isEmpty()) { - handleFault(); - } else { - executeInsts(); - } -#else - executeInsts(); -#endif - } -} - -template -void -InorderBackEnd::executeInsts() -{ - bool completed_last_inst = true; - int insts_to_execute = *instsToExecute; - int freed_regs = 0; - - while (insts_to_execute > 0) { - assert(!instList.empty()); - DynInstPtr inst = instList.front(); - - commitPC = inst->readPC(); - - thread->setPC(commitPC); - thread->setNextPC(inst->readNextPC()); - -#if FULL_SYSTEM - int count = 0; - Addr oldpc; - do { - if (count == 0) - assert(!thread->inSyscall && !thread->trapPending); - oldpc = thread->readPC(); - cpu->system->pcEventQueue.service( - thread->getXCProxy()); - count++; - } while (oldpc != thread->readPC()); - if (count > 1) { - DPRINTF(IBE, "PC skip function event, stopping commit\n"); - completed_last_inst = false; - squashPending = true; - break; - } -#endif - - Fault inst_fault = NoFault; - - if (status == DcacheMissComplete) { - DPRINTF(IBE, "Completing inst [sn:%lli]\n", inst->seqNum); - status = Running; - } else if (inst->isMemRef() && status != DcacheMissComplete && - (!inst->isDataPrefetch() && !inst->isInstPrefetch())) { - DPRINTF(IBE, "Initiating mem op inst [sn:%lli] PC: %#x\n", - inst->seqNum, inst->readPC()); - - cacheCompletionEvent.inst = inst; - inst_fault = inst->initiateAcc(); - if (inst_fault == NoFault && - status != DcacheMissLoadStall && - status != DcacheMissStoreStall) { - inst_fault = inst->completeAcc(); - } - ++thread->funcExeInst; - } else { - DPRINTF(IBE, "Executing inst [sn:%lli] PC: %#x\n", - inst->seqNum, inst->readPC()); - inst_fault = inst->execute(); - ++thread->funcExeInst; - } - - // Will need to be able to break this loop in case the load - // misses. Split access/complete ops would be useful here - // with writeback events. - if (status == DcacheMissLoadStall) { - *instsToExecute = insts_to_execute; - - completed_last_inst = false; - break; - } else if (status == DcacheMissStoreStall) { - // Figure out how to fix this hack. Probably have DcacheMissLoad - // vs DcacheMissStore. - *instsToExecute = insts_to_execute; - completed_last_inst = false; -/* - instList.pop_front(); - --insts_to_execute; - if (inst->traceData) { - inst->traceData->finalize(); - } -*/ - - // Don't really need to stop for a store stall as long as - // the memory system is able to handle store forwarding - // and such. Breaking out might help avoid the cache - // interface becoming blocked. - break; - } - - inst->setExecuted(); - inst->setCompleted(); - inst->setCanCommit(); - - instList.pop_front(); - - --insts_to_execute; - --(*instsToExecute); - - if (inst->traceData) { - inst->traceData->finalize(); - inst->traceData = NULL; - } - - if (inst_fault != NoFault) { -#if FULL_SYSTEM - DPRINTF(IBE, "Inst [sn:%lli] PC %#x has a fault\n", - inst->seqNum, inst->readPC()); - - assert(!thread->inSyscall); - - thread->inSyscall = true; - - // Hack for now; DTB will sometimes need the machine instruction - // for when faults happen. So we will set it here, prior to the - // DTB possibly needing it for this translation. - thread->setInst( - static_cast(inst->staticInst->machInst)); - - // Consider holding onto the trap and waiting until the trap event - // happens for this to be executed. - inst_fault->invoke(xc); - - // Exit state update mode to avoid accidental updating. - thread->inSyscall = false; - - squashPending = true; - - // Generate trap squash event. -// generateTrapEvent(tid); - completed_last_inst = false; - break; -#else // !FULL_SYSTEM - panic("fault (%d) detected @ PC %08p", inst_fault, - inst->PC); -#endif // FULL_SYSTEM - } - - for (int i = 0; i < inst->numDestRegs(); ++i) { - renameTable[inst->destRegIdx(i)] = inst; - thread->renameTable[inst->destRegIdx(i)] = inst; - ++freed_regs; - } - - inst->clearDependents(); - - comm->access(0)->doneSeqNum = inst->seqNum; - - if (inst->mispredicted()) { - squash(inst->seqNum, inst->readNextPC()); - - thread->setNextPC(inst->readNextPC()); - - break; - } else if (squashPending) { - // Something external happened that caused the CPU to squash. - // Break out of commit and handle the squash next cycle. - break; - } - // If it didn't mispredict, then it executed fine. Send back its - // registers and BP info? What about insts that may still have - // latency, like loads? Probably can send back the information after - // it is completed. - - // keep an instruction count - cpu->numInst++; - thread->numInsts++; - } - - frontEnd->addFreeRegs(freed_regs); - - assert(insts_to_execute >= 0); - - // Should only advance this if I have executed all instructions. - if (insts_to_execute == 0) { - numInstsToWB.advance(); - } - - // Should I set the PC to the next PC here? What do I set next PC to? - if (completed_last_inst) { - thread->setPC(thread->readNextPC()); - thread->setNextPC(thread->readPC() + sizeof(MachInst)); - } - - if (squashPending) { - setSquashInfoFromXC(); - } -} - -template -void -InorderBackEnd::handleFault() -{ - DPRINTF(Commit, "Handling fault from fetch\n"); - - assert(!thread->inSyscall); - - thread->inSyscall = true; - - // Consider holding onto the trap and waiting until the trap event - // happens for this to be executed. - faultFromFetch->invoke(xc); - - // Exit state update mode to avoid accidental updating. - thread->inSyscall = false; - - squashPending = true; - - setSquashInfoFromXC(); -} - -template -void -InorderBackEnd::squash(const InstSeqNum &squash_num, const Addr &next_PC) -{ - DPRINTF(IBE, "Squashing from [sn:%lli], setting PC to %#x\n", - squash_num, next_PC); - - InstListIt squash_it = --(instList.end()); - - int freed_regs = 0; - - while (!instList.empty() && (*squash_it)->seqNum > squash_num) { - DynInstPtr inst = *squash_it; - - DPRINTF(IBE, "Squashing instruction PC %#x, [sn:%lli].\n", - inst->readPC(), - inst->seqNum); - - // May cause problems with misc regs - freed_regs+= inst->numDestRegs(); - inst->clearDependents(); - squash_it--; - instList.pop_back(); - } - - frontEnd->addFreeRegs(freed_regs); - - for (int i = 0; i < latency+1; ++i) { - numInstsToWB.advance(); - } - - squashPending = false; - - // Probably want to make sure that this squash is the one that set the - // thread into inSyscall mode. - thread->inSyscall = false; - - // Tell front end to squash, reset PC to new one. - frontEnd->squash(squash_num, next_PC); - - faultFromFetch = NULL; -} - -template -void -InorderBackEnd::squashFromXC() -{ - // Record that I need to squash - squashPending = true; - - thread->inSyscall = true; -} - -template -void -InorderBackEnd::setSquashInfoFromXC() -{ - // Need to handle the case of the instList being empty. In that case - // probably any number works, except maybe with stores in the store buffer. - squashSeqNum = instList.empty() ? 0 : instList.front()->seqNum - 1; - - squashNextPC = thread->PC; -} - -template -void -InorderBackEnd::fetchFault(Fault &fault) -{ - faultFromFetch = fault; -} - -template -void -InorderBackEnd::dumpInsts() -{ - int num = 0; - int valid_num = 0; - - InstListIt inst_list_it = instList.begin(); - - cprintf("Inst list size: %i\n", instList.size()); - - while (inst_list_it != instList.end()) - { - cprintf("Instruction:%i\n", - num); - if (!(*inst_list_it)->isSquashed()) { - if (!(*inst_list_it)->isIssued()) { - ++valid_num; - cprintf("Count:%i\n", valid_num); - } else if ((*inst_list_it)->isMemRef() && - !(*inst_list_it)->memOpDone) { - // Loads that have not been marked as executed still count - // towards the total instructions. - ++valid_num; - cprintf("Count:%i\n", valid_num); - } - } - - cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n" - "Issued:%i\nSquashed:%i\n", - (*inst_list_it)->readPC(), - (*inst_list_it)->seqNum, - (*inst_list_it)->threadNumber, - (*inst_list_it)->isIssued(), - (*inst_list_it)->isSquashed()); - - if ((*inst_list_it)->isMemRef()) { - cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone); - } - - cprintf("\n"); - - inst_list_it++; - ++num; - } -} - -template -InorderBackEnd::DCacheCompletionEvent::DCacheCompletionEvent( - InorderBackEnd *_be) - : Event(&mainEventQueue, CPU_Tick_Pri), be(_be) -{ -// this->setFlags(Event::AutoDelete); -} - -template -void -InorderBackEnd::DCacheCompletionEvent::process() -{ - inst->completeAcc(); - be->status = DcacheMissComplete; -} - -template -const char * -InorderBackEnd::DCacheCompletionEvent::description() -{ - return "DCache completion event"; -} diff --git a/cpu/ozone/inst_queue.cc b/cpu/ozone/inst_queue.cc deleted file mode 100644 index 9c61602d9..000000000 --- a/cpu/ozone/inst_queue.cc +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Regents of The University of Michigan - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "cpu/ozone/dyn_inst.hh" -#include "cpu/ozone/ozone_impl.hh" -#include "cpu/ozone/simple_impl.hh" -#include "cpu/ozone/inst_queue_impl.hh" - -// Force instantiation of InstructionQueue. -template class InstQueue; -template class InstQueue; diff --git a/cpu/ozone/inst_queue.hh b/cpu/ozone/inst_queue.hh deleted file mode 100644 index 2cbbb7987..000000000 --- a/cpu/ozone/inst_queue.hh +++ /dev/null @@ -1,506 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Regents of The University of Michigan - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __CPU_OZONE_INST_QUEUE_HH__ -#define __CPU_OZONE_INST_QUEUE_HH__ - -#include -#include -#include -#include - -#include "base/statistics.hh" -#include "base/timebuf.hh" -#include "cpu/inst_seq.hh" -#include "sim/host.hh" - -class FUPool; -class MemInterface; - -/** - * A standard instruction queue class. It holds ready instructions, in - * order, in seperate priority queues to facilitate the scheduling of - * instructions. The IQ uses a separate linked list to track dependencies. - * Similar to the rename map and the free list, it expects that - * floating point registers have their indices start after the integer - * registers (ie with 96 int and 96 fp registers, regs 0-95 are integer - * and 96-191 are fp). This remains true even for both logical and - * physical register indices. The IQ depends on the memory dependence unit to - * track when memory operations are ready in terms of ordering; register - * dependencies are tracked normally. Right now the IQ also handles the - * execution timing; this is mainly to allow back-to-back scheduling without - * requiring IEW to be able to peek into the IQ. At the end of the execution - * latency, the instruction is put into the queue to execute, where it will - * have the execute() function called on it. - * @todo: Make IQ able to handle multiple FU pools. - */ -template -class InstQueue -{ - public: - //Typedefs from the Impl. - typedef typename Impl::FullCPU FullCPU; - typedef typename Impl::DynInstPtr DynInstPtr; - typedef typename Impl::Params Params; - typedef typename Impl::IssueStruct IssueStruct; -/* - typedef typename Impl::CPUPol::IEW IEW; - typedef typename Impl::CPUPol::MemDepUnit MemDepUnit; - typedef typename Impl::CPUPol::IssueStruct IssueStruct; - typedef typename Impl::CPUPol::TimeStruct TimeStruct; -*/ - // Typedef of iterator through the list of instructions. - typedef typename std::list::iterator ListIt; - - friend class Impl::FullCPU; -#if 0 - /** FU completion event class. */ - class FUCompletion : public Event { - private: - /** Executing instruction. */ - DynInstPtr inst; - - /** Index of the FU used for executing. */ - int fuIdx; - - /** Pointer back to the instruction queue. */ - InstQueue *iqPtr; - - public: - /** Construct a FU completion event. */ - FUCompletion(DynInstPtr &_inst, int fu_idx, - InstQueue *iq_ptr); - - virtual void process(); - virtual const char *description(); - }; -#endif - /** Constructs an IQ. */ - InstQueue(Params *params); - - /** Destructs the IQ. */ - ~InstQueue(); - - /** Returns the name of the IQ. */ - std::string name() const; - - /** Registers statistics. */ - void regStats(); - - /** Sets CPU pointer. */ - void setCPU(FullCPU *_cpu) { cpu = _cpu; } -#if 0 - /** Sets active threads list. */ - void setActiveThreads(list *at_ptr); - - /** Sets the IEW pointer. */ - void setIEW(IEW *iew_ptr) { iewStage = iew_ptr; } -#endif - /** Sets the timer buffer between issue and execute. */ - void setIssueToExecuteQueue(TimeBuffer *i2eQueue); -#if 0 - /** Sets the global time buffer. */ - void setTimeBuffer(TimeBuffer *tb_ptr); - - /** Number of entries needed for given amount of threads. */ - int entryAmount(int num_threads); - - /** Resets max entries for all threads. */ - void resetEntries(); -#endif - /** Returns total number of free entries. */ - unsigned numFreeEntries(); - - /** Returns number of free entries for a thread. */ - unsigned numFreeEntries(unsigned tid); - - /** Returns whether or not the IQ is full. */ - bool isFull(); - - /** Returns whether or not the IQ is full for a specific thread. */ - bool isFull(unsigned tid); - - /** Returns if there are any ready instructions in the IQ. */ - bool hasReadyInsts(); - - /** Inserts a new instruction into the IQ. */ - void insert(DynInstPtr &new_inst); - - /** Inserts a new, non-speculative instruction into the IQ. */ - void insertNonSpec(DynInstPtr &new_inst); -#if 0 - /** - * Advances the tail of the IQ, used if an instruction is not added to the - * IQ for scheduling. - * @todo: Rename this function. - */ - void advanceTail(DynInstPtr &inst); - - /** Process FU completion event. */ - void processFUCompletion(DynInstPtr &inst, int fu_idx); -#endif - /** - * Schedules ready instructions, adding the ready ones (oldest first) to - * the queue to execute. - */ - void scheduleReadyInsts(); - - /** Schedules a single specific non-speculative instruction. */ - void scheduleNonSpec(const InstSeqNum &inst); - - /** - * Commits all instructions up to and including the given sequence number, - * for a specific thread. - */ - void commit(const InstSeqNum &inst, unsigned tid = 0); - - /** Wakes all dependents of a completed instruction. */ - void wakeDependents(DynInstPtr &completed_inst); - - /** Adds a ready memory instruction to the ready list. */ - void addReadyMemInst(DynInstPtr &ready_inst); -#if 0 - /** - * Reschedules a memory instruction. It will be ready to issue once - * replayMemInst() is called. - */ - void rescheduleMemInst(DynInstPtr &resched_inst); - - /** Replays a memory instruction. It must be rescheduled first. */ - void replayMemInst(DynInstPtr &replay_inst); -#endif - /** Completes a memory operation. */ - void completeMemInst(DynInstPtr &completed_inst); -#if 0 - /** Indicates an ordering violation between a store and a load. */ - void violation(DynInstPtr &store, DynInstPtr &faulting_load); -#endif - /** - * Squashes instructions for a thread. Squashing information is obtained - * from the time buffer. - */ - void squash(unsigned tid); // Probably want the ISN - - /** Returns the number of used entries for a thread. */ - unsigned getCount(unsigned tid) { return count[tid]; }; - - /** Updates the number of free entries. */ - void updateFreeEntries(int num) { freeEntries += num; } - - /** Debug function to print all instructions. */ - void printInsts(); - - private: - /** Does the actual squashing. */ - void doSquash(unsigned tid); - - ///////////////////////// - // Various pointers - ///////////////////////// - - /** Pointer to the CPU. */ - FullCPU *cpu; - - /** Cache interface. */ - MemInterface *dcacheInterface; -#if 0 - /** Pointer to IEW stage. */ - IEW *iewStage; - - /** The memory dependence unit, which tracks/predicts memory dependences - * between instructions. - */ - MemDepUnit memDepUnit[Impl::MaxThreads]; -#endif - /** The queue to the execute stage. Issued instructions will be written - * into it. - */ - TimeBuffer *issueToExecuteQueue; -#if 0 - /** The backwards time buffer. */ - TimeBuffer *timeBuffer; - - /** Wire to read information from timebuffer. */ - typename TimeBuffer::wire fromCommit; - - /** Function unit pool. */ - FUPool *fuPool; -#endif - ////////////////////////////////////// - // Instruction lists, ready queues, and ordering - ////////////////////////////////////// - - /** List of all the instructions in the IQ (some of which may be issued). */ - std::list instList[Impl::MaxThreads]; - - /** - * Struct for comparing entries to be added to the priority queue. This - * gives reverse ordering to the instructions in terms of sequence - * numbers: the instructions with smaller sequence numbers (and hence - * are older) will be at the top of the priority queue. - */ - struct pqCompare { - bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const - { - return lhs->seqNum > rhs->seqNum; - } - }; - - /** - * Struct for an IQ entry. It includes the instruction and an iterator - * to the instruction's spot in the IQ. - */ - struct IQEntry { - DynInstPtr inst; - ListIt iqIt; - }; - - typedef std::priority_queue, pqCompare> - ReadyInstQueue; - - typedef std::map ReadyInstMap; - typedef typename std::map::iterator ReadyMapIt; - - /** List of ready instructions. - */ - ReadyInstQueue readyInsts; - - /** List of non-speculative instructions that will be scheduled - * once the IQ gets a signal from commit. While it's redundant to - * have the key be a part of the value (the sequence number is stored - * inside of DynInst), when these instructions are woken up only - * the sequence number will be available. Thus it is most efficient to be - * able to search by the sequence number alone. - */ - std::map nonSpecInsts; - - typedef typename std::map::iterator NonSpecMapIt; -#if 0 - /** Entry for the list age ordering by op class. */ - struct ListOrderEntry { - OpClass queueType; - InstSeqNum oldestInst; - }; - - /** List that contains the age order of the oldest instruction of each - * ready queue. Used to select the oldest instruction available - * among op classes. - */ - std::list listOrder; - - typedef typename std::list::iterator ListOrderIt; - - /** Tracks if each ready queue is on the age order list. */ - bool queueOnList[Num_OpClasses]; - - /** Iterators of each ready queue. Points to their spot in the age order - * list. - */ - ListOrderIt readyIt[Num_OpClasses]; - - /** Add an op class to the age order list. */ - void addToOrderList(OpClass op_class); - - /** - * Called when the oldest instruction has been removed from a ready queue; - * this places that ready queue into the proper spot in the age order list. - */ - void moveToYoungerInst(ListOrderIt age_order_it); -#endif - ////////////////////////////////////// - // Various parameters - ////////////////////////////////////// -#if 0 - /** IQ Resource Sharing Policy */ - enum IQPolicy { - Dynamic, - Partitioned, - Threshold - }; - - /** IQ sharing policy for SMT. */ - IQPolicy iqPolicy; -#endif - /** Number of Total Threads*/ - unsigned numThreads; -#if 0 - /** Pointer to list of active threads. */ - list *activeThreads; -#endif - /** Per Thread IQ count */ - unsigned count[Impl::MaxThreads]; - - /** Max IQ Entries Per Thread */ - unsigned maxEntries[Impl::MaxThreads]; - - /** Number of free IQ entries left. */ - unsigned freeEntries; - - /** The number of entries in the instruction queue. */ - unsigned numEntries; - - /** The total number of instructions that can be issued in one cycle. */ - unsigned totalWidth; -#if 0 - /** The number of physical registers in the CPU. */ - unsigned numPhysRegs; - - /** The number of physical integer registers in the CPU. */ - unsigned numPhysIntRegs; - - /** The number of floating point registers in the CPU. */ - unsigned numPhysFloatRegs; -#endif - /** Delay between commit stage and the IQ. - * @todo: Make there be a distinction between the delays within IEW. - */ - unsigned commitToIEWDelay; - - ////////////////////////////////// - // Variables needed for squashing - ////////////////////////////////// - - /** The sequence number of the squashed instruction. */ - InstSeqNum squashedSeqNum[Impl::MaxThreads]; - - /** Iterator that points to the last instruction that has been squashed. - * This will not be valid unless the IQ is in the process of squashing. - */ - ListIt squashIt[Impl::MaxThreads]; -#if 0 - /////////////////////////////////// - // Dependency graph stuff - /////////////////////////////////// - - class DependencyEntry - { - public: - DependencyEntry() - : inst(NULL), next(NULL) - { } - - DynInstPtr inst; - //Might want to include data about what arch. register the - //dependence is waiting on. - DependencyEntry *next; - - //This function, and perhaps this whole class, stand out a little - //bit as they don't fit a classification well. I want access - //to the underlying structure of the linked list, yet at - //the same time it feels like this should be something abstracted - //away. So for now it will sit here, within the IQ, until - //a better implementation is decided upon. - // This function probably shouldn't be within the entry... - void insert(DynInstPtr &new_inst); - - void remove(DynInstPtr &inst_to_remove); - - // Debug variable, remove when done testing. - static unsigned mem_alloc_counter; - }; - - /** Array of linked lists. Each linked list is a list of all the - * instructions that depend upon a given register. The actual - * register's index is used to index into the graph; ie all - * instructions in flight that are dependent upon r34 will be - * in the linked list of dependGraph[34]. - */ - DependencyEntry *dependGraph; - - /** A cache of the recently woken registers. It is 1 if the register - * has been woken up recently, and 0 if the register has been added - * to the dependency graph and has not yet received its value. It - * is basically a secondary scoreboard, and should pretty much mirror - * the scoreboard that exists in the rename map. - */ - vector regScoreboard; - - /** Adds an instruction to the dependency graph, as a producer. */ - bool addToDependents(DynInstPtr &new_inst); - - /** Adds an instruction to the dependency graph, as a consumer. */ - void createDependency(DynInstPtr &new_inst); -#endif - /** Moves an instruction to the ready queue if it is ready. */ - void addIfReady(DynInstPtr &inst); - - /** Debugging function to count how many entries are in the IQ. It does - * a linear walk through the instructions, so do not call this function - * during normal execution. - */ - int countInsts(); -#if 0 - /** Debugging function to dump out the dependency graph. - */ - void dumpDependGraph(); -#endif - /** Debugging function to dump all the list sizes, as well as print - * out the list of nonspeculative instructions. Should not be used - * in any other capacity, but it has no harmful sideaffects. - */ - void dumpLists(); - - /** Debugging function to dump out all instructions that are in the - * IQ. - */ - void dumpInsts(); - - /** Stat for number of instructions added. */ - Stats::Scalar<> iqInstsAdded; - /** Stat for number of non-speculative instructions added. */ - Stats::Scalar<> iqNonSpecInstsAdded; -// Stats::Scalar<> iqIntInstsAdded; - /** Stat for number of integer instructions issued. */ - Stats::Scalar<> iqIntInstsIssued; -// Stats::Scalar<> iqFloatInstsAdded; - /** Stat for number of floating point instructions issued. */ - Stats::Scalar<> iqFloatInstsIssued; -// Stats::Scalar<> iqBranchInstsAdded; - /** Stat for number of branch instructions issued. */ - Stats::Scalar<> iqBranchInstsIssued; -// Stats::Scalar<> iqMemInstsAdded; - /** Stat for number of memory instructions issued. */ - Stats::Scalar<> iqMemInstsIssued; -// Stats::Scalar<> iqMiscInstsAdded; - /** Stat for number of miscellaneous instructions issued. */ - Stats::Scalar<> iqMiscInstsIssued; - /** Stat for number of squashed instructions that were ready to issue. */ - Stats::Scalar<> iqSquashedInstsIssued; - /** Stat for number of squashed instructions examined when squashing. */ - Stats::Scalar<> iqSquashedInstsExamined; - /** Stat for number of squashed instruction operands examined when - * squashing. - */ - Stats::Scalar<> iqSquashedOperandsExamined; - /** Stat for number of non-speculative instructions removed due to a squash. - */ - Stats::Scalar<> iqSquashedNonSpecRemoved; - -}; - -#endif //__CPU_OZONE_INST_QUEUE_HH__ diff --git a/cpu/ozone/inst_queue_impl.hh b/cpu/ozone/inst_queue_impl.hh deleted file mode 100644 index 0523c68d6..000000000 --- a/cpu/ozone/inst_queue_impl.hh +++ /dev/null @@ -1,1341 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Regents of The University of Michigan - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -// Todo: -// Current ordering allows for 0 cycle added-to-scheduled. Could maybe fake -// it; either do in reverse order, or have added instructions put into a -// different ready queue that, in scheduleRreadyInsts(), gets put onto the -// normal ready queue. This would however give only a one cycle delay, -// but probably is more flexible to actually add in a delay parameter than -// just running it backwards. - -#include - -#include "sim/root.hh" - -#include "cpu/ozone/inst_queue.hh" -#if 0 -template -InstQueue::FUCompletion::FUCompletion(DynInstPtr &_inst, - int fu_idx, - InstQueue *iq_ptr) - : Event(&mainEventQueue, Stat_Event_Pri), - inst(_inst), fuIdx(fu_idx), iqPtr(iq_ptr) -{ - this->setFlags(Event::AutoDelete); -} - -template -void -InstQueue::FUCompletion::process() -{ - iqPtr->processFUCompletion(inst, fuIdx); -} - - -template -const char * -InstQueue::FUCompletion::description() -{ - return "Functional unit completion event"; -} -#endif -template -InstQueue::InstQueue(Params *params) - : dcacheInterface(params->dcacheInterface), -// fuPool(params->fuPool), - numEntries(params->numIQEntries), - totalWidth(params->issueWidth), -// numPhysIntRegs(params->numPhysIntRegs), -// numPhysFloatRegs(params->numPhysFloatRegs), - commitToIEWDelay(params->commitToIEWDelay) -{ -// assert(fuPool); - -// numThreads = params->numberOfThreads; - numThreads = 1; - - //Initialize thread IQ counts - for (int i = 0; i smtIQPolicy; - - //Convert string to lowercase - std::transform(policy.begin(), policy.end(), policy.begin(), - (int(*)(int)) tolower); - - //Figure out resource sharing policy - if (policy == "dynamic") { - iqPolicy = Dynamic; - - //Set Max Entries to Total ROB Capacity - for (int i = 0; i < numThreads; i++) { - maxEntries[i] = numEntries; - } - - } else if (policy == "partitioned") { - iqPolicy = Partitioned; - - //@todo:make work if part_amt doesnt divide evenly. - int part_amt = numEntries / numThreads; - - //Divide ROB up evenly - for (int i = 0; i < numThreads; i++) { - maxEntries[i] = part_amt; - } - - DPRINTF(Fetch, "IQ sharing policy set to Partitioned:" - "%i entries per thread.\n",part_amt); - - } else if (policy == "threshold") { - iqPolicy = Threshold; - - double threshold = (double)params->smtIQThreshold / 100; - - int thresholdIQ = (int)((double)threshold * numEntries); - - //Divide up by threshold amount - for (int i = 0; i < numThreads; i++) { - maxEntries[i] = thresholdIQ; - } - - DPRINTF(Fetch, "IQ sharing policy set to Threshold:" - "%i entries per thread.\n",thresholdIQ); - } else { - assert(0 && "Invalid IQ Sharing Policy.Options Are:{Dynamic," - "Partitioned, Threshold}"); - } -*/ -} - -template -InstQueue::~InstQueue() -{ - // Clear the dependency graph -/* - DependencyEntry *curr; - DependencyEntry *prev; - - for (int i = 0; i < numPhysRegs; ++i) { - curr = dependGraph[i].next; - - while (curr) { - DependencyEntry::mem_alloc_counter--; - - prev = curr; - curr = prev->next; - prev->inst = NULL; - - delete prev; - } - - if (dependGraph[i].inst) { - dependGraph[i].inst = NULL; - } - - dependGraph[i].next = NULL; - } - - assert(DependencyEntry::mem_alloc_counter == 0); - - delete [] dependGraph; -*/ -} - -template -std::string -InstQueue::name() const -{ - return cpu->name() + ".iq"; -} - -template -void -InstQueue::regStats() -{ - iqInstsAdded - .name(name() + ".iqInstsAdded") - .desc("Number of instructions added to the IQ (excludes non-spec)") - .prereq(iqInstsAdded); - - iqNonSpecInstsAdded - .name(name() + ".iqNonSpecInstsAdded") - .desc("Number of non-speculative instructions added to the IQ") - .prereq(iqNonSpecInstsAdded); - -// iqIntInstsAdded; - - iqIntInstsIssued - .name(name() + ".iqIntInstsIssued") - .desc("Number of integer instructions issued") - .prereq(iqIntInstsIssued); - -// iqFloatInstsAdded; - - iqFloatInstsIssued - .name(name() + ".iqFloatInstsIssued") - .desc("Number of float instructions issued") - .prereq(iqFloatInstsIssued); - -// iqBranchInstsAdded; - - iqBranchInstsIssued - .name(name() + ".iqBranchInstsIssued") - .desc("Number of branch instructions issued") - .prereq(iqBranchInstsIssued); - -// iqMemInstsAdded; - - iqMemInstsIssued - .name(name() + ".iqMemInstsIssued") - .desc("Number of memory instructions issued") - .prereq(iqMemInstsIssued); - -// iqMiscInstsAdded; - - iqMiscInstsIssued - .name(name() + ".iqMiscInstsIssued") - .desc("Number of miscellaneous instructions issued") - .prereq(iqMiscInstsIssued); - - iqSquashedInstsIssued - .name(name() + ".iqSquashedInstsIssued") - .desc("Number of squashed instructions issued") - .prereq(iqSquashedInstsIssued); - - iqSquashedInstsExamined - .name(name() + ".iqSquashedInstsExamined") - .desc("Number of squashed instructions iterated over during squash;" - " mainly for profiling") - .prereq(iqSquashedInstsExamined); - - iqSquashedOperandsExamined - .name(name() + ".iqSquashedOperandsExamined") - .desc("Number of squashed operands that are examined and possibly " - "removed from graph") - .prereq(iqSquashedOperandsExamined); - - iqSquashedNonSpecRemoved - .name(name() + ".iqSquashedNonSpecRemoved") - .desc("Number of squashed non-spec instructions that were removed") - .prereq(iqSquashedNonSpecRemoved); -/* - for ( int i=0; i < numThreads; i++) { - // Tell mem dependence unit to reg stats as well. - memDepUnit[i].regStats(); - } -*/ -} -/* -template -void -InstQueue::setActiveThreads(list *at_ptr) -{ - DPRINTF(IQ, "Setting active threads list pointer.\n"); - activeThreads = at_ptr; -} -*/ -template -void -InstQueue::setIssueToExecuteQueue(TimeBuffer *i2e_ptr) -{ - DPRINTF(IQ, "Set the issue to execute queue.\n"); - issueToExecuteQueue = i2e_ptr; -} -/* -template -void -InstQueue::setTimeBuffer(TimeBuffer *tb_ptr) -{ - DPRINTF(IQ, "Set the time buffer.\n"); - timeBuffer = tb_ptr; - - fromCommit = timeBuffer->getWire(-commitToIEWDelay); -} - -template -int -InstQueue::entryAmount(int num_threads) -{ - if (iqPolicy == Partitioned) { - return numEntries / num_threads; - } else { - return 0; - } -} - - -template -void -InstQueue::resetEntries() -{ - if (iqPolicy != Dynamic || numThreads > 1) { - int active_threads = (*activeThreads).size(); - - list::iterator threads = (*activeThreads).begin(); - list::iterator list_end = (*activeThreads).end(); - - while (threads != list_end) { - if (iqPolicy == Partitioned) { - maxEntries[*threads++] = numEntries / active_threads; - } else if(iqPolicy == Threshold && active_threads == 1) { - maxEntries[*threads++] = numEntries; - } - } - } -} -*/ -template -unsigned -InstQueue::numFreeEntries() -{ - return freeEntries; -} - -template -unsigned -InstQueue::numFreeEntries(unsigned tid) -{ - return maxEntries[tid] - count[tid]; -} - -// Might want to do something more complex if it knows how many instructions -// will be issued this cycle. -template -bool -InstQueue::isFull() -{ - if (freeEntries == 0) { - return(true); - } else { - return(false); - } -} - -template -bool -InstQueue::isFull(unsigned tid) -{ - if (numFreeEntries(tid) == 0) { - return(true); - } else { - return(false); - } -} - -template -bool -InstQueue::hasReadyInsts() -{ -/* - if (!listOrder.empty()) { - return true; - } - - for (int i = 0; i < Num_OpClasses; ++i) { - if (!readyInsts[i].empty()) { - return true; - } - } - - return false; -*/ - return readyInsts.empty(); -} - -template -void -InstQueue::insert(DynInstPtr &new_inst) -{ - // Make sure the instruction is valid - assert(new_inst); - - DPRINTF(IQ, "Adding instruction PC %#x to the IQ.\n", - new_inst->readPC()); - - // Check if there are any free entries. Panic if there are none. - // Might want to have this return a fault in the future instead of - // panicing. - assert(freeEntries != 0); - - instList[new_inst->threadNumber].push_back(new_inst); - - // Decrease the number of free entries. - --freeEntries; - - //Mark Instruction as in IQ -// new_inst->setInIQ(); -/* - // Look through its source registers (physical regs), and mark any - // dependencies. - addToDependents(new_inst); - - // Have this instruction set itself as the producer of its destination - // register(s). - createDependency(new_inst); -*/ - // If it's a memory instruction, add it to the memory dependency - // unit. -// if (new_inst->isMemRef()) { -// memDepUnit[new_inst->threadNumber].insert(new_inst); -// } else { - // If the instruction is ready then add it to the ready list. - addIfReady(new_inst); -// } - - ++iqInstsAdded; - - - //Update Thread IQ Count - count[new_inst->threadNumber]++; - - assert(freeEntries == (numEntries - countInsts())); -} - -template -void -InstQueue::insertNonSpec(DynInstPtr &new_inst) -{ - nonSpecInsts[new_inst->seqNum] = new_inst; - - // @todo: Clean up this code; can do it by setting inst as unable - // to issue, then calling normal insert on the inst. - - // Make sure the instruction is valid - assert(new_inst); - - DPRINTF(IQ, "Adding instruction PC %#x to the IQ.\n", - new_inst->readPC()); - - // Check if there are any free entries. Panic if there are none. - // Might want to have this return a fault in the future instead of - // panicing. - assert(freeEntries != 0); - - instList[new_inst->threadNumber].push_back(new_inst); - - // Decrease the number of free entries. - --freeEntries; - - //Mark Instruction as in IQ -// new_inst->setInIQ(); -/* - // Have this instruction set itself as the producer of its destination - // register(s). - createDependency(new_inst); - - // If it's a memory instruction, add it to the memory dependency - // unit. - if (new_inst->isMemRef()) { - memDepUnit[new_inst->threadNumber].insertNonSpec(new_inst); - } -*/ - ++iqNonSpecInstsAdded; - - //Update Thread IQ Count - count[new_inst->threadNumber]++; - - assert(freeEntries == (numEntries - countInsts())); -} -/* -template -void -InstQueue::advanceTail(DynInstPtr &inst) -{ - // Have this instruction set itself as the producer of its destination - // register(s). - createDependency(inst); -} - -template -void -InstQueue::addToOrderList(OpClass op_class) -{ - assert(!readyInsts[op_class].empty()); - - ListOrderEntry queue_entry; - - queue_entry.queueType = op_class; - - queue_entry.oldestInst = readyInsts[op_class].top()->seqNum; - - ListOrderIt list_it = listOrder.begin(); - ListOrderIt list_end_it = listOrder.end(); - - while (list_it != list_end_it) { - if ((*list_it).oldestInst > queue_entry.oldestInst) { - break; - } - - list_it++; - } - - readyIt[op_class] = listOrder.insert(list_it, queue_entry); - queueOnList[op_class] = true; -} - -template -void -InstQueue::moveToYoungerInst(ListOrderIt list_order_it) -{ - // Get iterator of next item on the list - // Delete the original iterator - // Determine if the next item is either the end of the list or younger - // than the new instruction. If so, then add in a new iterator right here. - // If not, then move along. - ListOrderEntry queue_entry; - OpClass op_class = (*list_order_it).queueType; - ListOrderIt next_it = list_order_it; - - ++next_it; - - queue_entry.queueType = op_class; - queue_entry.oldestInst = readyInsts[op_class].top()->seqNum; - - while (next_it != listOrder.end() && - (*next_it).oldestInst < queue_entry.oldestInst) { - ++next_it; - } - - readyIt[op_class] = listOrder.insert(next_it, queue_entry); -} - -template -void -InstQueue::processFUCompletion(DynInstPtr &inst, int fu_idx) -{ - // The CPU could have been sleeping until this op completed (*extremely* - // long latency op). Wake it if it was. This may be overkill. - iewStage->wakeCPU(); - - fuPool->freeUnit(fu_idx); - - int &size = issueToExecuteQueue->access(0)->size; - - issueToExecuteQueue->access(0)->insts[size++] = inst; -} -*/ -// @todo: Figure out a better way to remove the squashed items from the -// lists. Checking the top item of each list to see if it's squashed -// wastes time and forces jumps. -template -void -InstQueue::scheduleReadyInsts() -{ - DPRINTF(IQ, "Attempting to schedule ready instructions from " - "the IQ.\n"); - -// IssueStruct *i2e_info = issueToExecuteQueue->access(0); -/* - // Will need to reorder the list if either a queue is not on the list, - // or it has an older instruction than last time. - for (int i = 0; i < Num_OpClasses; ++i) { - if (!readyInsts[i].empty()) { - if (!queueOnList[i]) { - addToOrderList(OpClass(i)); - } else if (readyInsts[i].top()->seqNum < - (*readyIt[i]).oldestInst) { - listOrder.erase(readyIt[i]); - addToOrderList(OpClass(i)); - } - } - } - - // Have iterator to head of the list - // While I haven't exceeded bandwidth or reached the end of the list, - // Try to get a FU that can do what this op needs. - // If successful, change the oldestInst to the new top of the list, put - // the queue in the proper place in the list. - // Increment the iterator. - // This will avoid trying to schedule a certain op class if there are no - // FUs that handle it. - ListOrderIt order_it = listOrder.begin(); - ListOrderIt order_end_it = listOrder.end(); - int total_issued = 0; - int exec_queue_slot = i2e_info->size; - - while (exec_queue_slot < totalWidth && order_it != order_end_it) { - OpClass op_class = (*order_it).queueType; - - assert(!readyInsts[op_class].empty()); - - DynInstPtr issuing_inst = readyInsts[op_class].top(); - - assert(issuing_inst->seqNum == (*order_it).oldestInst); - - if (issuing_inst->isSquashed()) { - readyInsts[op_class].pop(); - - if (!readyInsts[op_class].empty()) { - moveToYoungerInst(order_it); - } else { - readyIt[op_class] = listOrder.end(); - queueOnList[op_class] = false; - } - - listOrder.erase(order_it++); - - ++iqSquashedInstsIssued; - - continue; - } - - int idx = fuPool->getUnit(op_class); - - if (idx != -1) { - int op_latency = fuPool->getOpLatency(op_class); - - if (op_latency == 1) { - i2e_info->insts[exec_queue_slot++] = issuing_inst; - i2e_info->size++; - - // Add the FU onto the list of FU's to be freed next cycle. - fuPool->freeUnit(idx); - } else { - int issue_latency = fuPool->getIssueLatency(op_class); - - if (issue_latency > 1) { - // Generate completion event for the FU - FUCompletion *execution = new FUCompletion(issuing_inst, - idx, this); - - execution->schedule(curTick + issue_latency - 1); - } else { - i2e_info->insts[exec_queue_slot++] = issuing_inst; - i2e_info->size++; - - // Add the FU onto the list of FU's to be freed next cycle. - fuPool->freeUnit(idx); - } - } - - DPRINTF(IQ, "Thread %i: Issuing instruction PC %#x " - "[sn:%lli]\n", - issuing_inst->threadNumber, issuing_inst->readPC(), - issuing_inst->seqNum); - - readyInsts[op_class].pop(); - - if (!readyInsts[op_class].empty()) { - moveToYoungerInst(order_it); - } else { - readyIt[op_class] = listOrder.end(); - queueOnList[op_class] = false; - } - - issuing_inst->setIssued(); - ++total_issued; - - if (!issuing_inst->isMemRef()) { - // Memory instructions can not be freed from the IQ until they - // complete. - ++freeEntries; - count[issuing_inst->threadNumber]--; - issuing_inst->removeInIQ(); - } else { - memDepUnit[issuing_inst->threadNumber].issue(issuing_inst); - } - - listOrder.erase(order_it++); - } else { - ++order_it; - } - } - - if (total_issued) { - cpu->activityThisCycle(); - } else { - DPRINTF(IQ, "Not able to schedule any instructions.\n"); - } -*/ -} - -template -void -InstQueue::scheduleNonSpec(const InstSeqNum &inst) -{ - DPRINTF(IQ, "Marking nonspeculative instruction with sequence " - "number %i as ready to execute.\n", inst); - - NonSpecMapIt inst_it = nonSpecInsts.find(inst); - - assert(inst_it != nonSpecInsts.end()); - -// unsigned tid = (*inst_it).second->threadNumber; - - // Mark this instruction as ready to issue. - (*inst_it).second->setCanIssue(); - - // Now schedule the instruction. -// if (!(*inst_it).second->isMemRef()) { - addIfReady((*inst_it).second); -// } else { -// memDepUnit[tid].nonSpecInstReady((*inst_it).second); -// } - - nonSpecInsts.erase(inst_it); -} - -template -void -InstQueue::commit(const InstSeqNum &inst, unsigned tid) -{ - /*Need to go through each thread??*/ - DPRINTF(IQ, "[tid:%i]: Committing instructions older than [sn:%i]\n", - tid,inst); - - ListIt iq_it = instList[tid].begin(); - - while (iq_it != instList[tid].end() && - (*iq_it)->seqNum <= inst) { - ++iq_it; - instList[tid].pop_front(); - } - - assert(freeEntries == (numEntries - countInsts())); -} - -template -void -InstQueue::wakeDependents(DynInstPtr &completed_inst) -{ - DPRINTF(IQ, "Waking dependents of completed instruction.\n"); - // Look at the physical destination register of the DynInst - // and look it up on the dependency graph. Then mark as ready - // any instructions within the instruction queue. -/* - DependencyEntry *curr; - DependencyEntry *prev; -*/ - // Tell the memory dependence unit to wake any dependents on this - // instruction if it is a memory instruction. Also complete the memory - // instruction at this point since we know it executed fine. - // @todo: Might want to rename "completeMemInst" to - // something that indicates that it won't need to be replayed, and call - // this earlier. Might not be a big deal. - if (completed_inst->isMemRef()) { -// memDepUnit[completed_inst->threadNumber].wakeDependents(completed_inst); - completeMemInst(completed_inst); - } - completed_inst->wakeDependents(); -/* - for (int dest_reg_idx = 0; - dest_reg_idx < completed_inst->numDestRegs(); - dest_reg_idx++) - { - PhysRegIndex dest_reg = - completed_inst->renamedDestRegIdx(dest_reg_idx); - - // Special case of uniq or control registers. They are not - // handled by the IQ and thus have no dependency graph entry. - // @todo Figure out a cleaner way to handle this. - if (dest_reg >= numPhysRegs) { - continue; - } - - DPRINTF(IQ, "Waking any dependents on register %i.\n", - (int) dest_reg); - - //Maybe abstract this part into a function. - //Go through the dependency chain, marking the registers as ready - //within the waiting instructions. - - curr = dependGraph[dest_reg].next; - - while (curr) { - DPRINTF(IQ, "Waking up a dependent instruction, PC%#x.\n", - curr->inst->readPC()); - - // Might want to give more information to the instruction - // so that it knows which of its source registers is ready. - // However that would mean that the dependency graph entries - // would need to hold the src_reg_idx. - curr->inst->markSrcRegReady(); - - addIfReady(curr->inst); - - DependencyEntry::mem_alloc_counter--; - - prev = curr; - curr = prev->next; - prev->inst = NULL; - - delete prev; - } - - // Reset the head node now that all of its dependents have been woken - // up. - dependGraph[dest_reg].next = NULL; - dependGraph[dest_reg].inst = NULL; - - // Mark the scoreboard as having that register ready. - regScoreboard[dest_reg] = true; - } -*/ -} - -template -void -InstQueue::addReadyMemInst(DynInstPtr &ready_inst) -{ - OpClass op_class = ready_inst->opClass(); - - readyInsts.push(ready_inst); - - DPRINTF(IQ, "Instruction is ready to issue, putting it onto " - "the ready list, PC %#x opclass:%i [sn:%lli].\n", - ready_inst->readPC(), op_class, ready_inst->seqNum); -} -/* -template -void -InstQueue::rescheduleMemInst(DynInstPtr &resched_inst) -{ - memDepUnit[resched_inst->threadNumber].reschedule(resched_inst); -} - -template -void -InstQueue::replayMemInst(DynInstPtr &replay_inst) -{ - memDepUnit[replay_inst->threadNumber].replay(replay_inst); -} -*/ -template -void -InstQueue::completeMemInst(DynInstPtr &completed_inst) -{ - int tid = completed_inst->threadNumber; - - DPRINTF(IQ, "Completing mem instruction PC:%#x [sn:%lli]\n", - completed_inst->readPC(), completed_inst->seqNum); - - ++freeEntries; - -// completed_inst->memOpDone = true; - -// memDepUnit[tid].completed(completed_inst); - - count[tid]--; -} -/* -template -void -InstQueue::violation(DynInstPtr &store, - DynInstPtr &faulting_load) -{ - memDepUnit[store->threadNumber].violation(store, faulting_load); -} -*/ -template -void -InstQueue::squash(unsigned tid) -{ - DPRINTF(IQ, "[tid:%i]: Starting to squash instructions in " - "the IQ.\n", tid); - - // Read instruction sequence number of last instruction out of the - // time buffer. -// squashedSeqNum[tid] = fromCommit->commitInfo[tid].doneSeqNum; - - // Setup the squash iterator to point to the tail. - squashIt[tid] = instList[tid].end(); - --squashIt[tid]; - - // Call doSquash if there are insts in the IQ - if (count[tid] > 0) { - doSquash(tid); - } - - // Also tell the memory dependence unit to squash. -// memDepUnit[tid].squash(squashedSeqNum[tid], tid); -} - -template -void -InstQueue::doSquash(unsigned tid) -{ - // Make sure the squashed sequence number is valid. - assert(squashedSeqNum[tid] != 0); - - DPRINTF(IQ, "[tid:%i]: Squashing until sequence number %i!\n", - tid, squashedSeqNum[tid]); - - // Squash any instructions younger than the squashed sequence number - // given. - while (squashIt[tid] != instList[tid].end() && - (*squashIt[tid])->seqNum > squashedSeqNum[tid]) { - - DynInstPtr squashed_inst = (*squashIt[tid]); - - // Only handle the instruction if it actually is in the IQ and - // hasn't already been squashed in the IQ. - if (squashed_inst->threadNumber != tid || - squashed_inst->isSquashedInIQ()) { - --squashIt[tid]; - continue; - } - - if (!squashed_inst->isIssued() || - (squashed_inst->isMemRef()/* && - !squashed_inst->memOpDone*/)) { - - // Remove the instruction from the dependency list. - if (!squashed_inst->isNonSpeculative()) { -/* - for (int src_reg_idx = 0; - src_reg_idx < squashed_inst->numSrcRegs(); - src_reg_idx++) - { - PhysRegIndex src_reg = - squashed_inst->renamedSrcRegIdx(src_reg_idx); - - // Only remove it from the dependency graph if it was - // placed there in the first place. - // HACK: This assumes that instructions woken up from the - // dependency chain aren't informed that a specific src - // register has become ready. This may not always be true - // in the future. - // Instead of doing a linked list traversal, we can just - // remove these squashed instructions either at issue time, - // or when the register is overwritten. The only downside - // to this is it leaves more room for error. - - if (!squashed_inst->isReadySrcRegIdx(src_reg_idx) && - src_reg < numPhysRegs) { - dependGraph[src_reg].remove(squashed_inst); - } - - - ++iqSquashedOperandsExamined; - } -*/ - // Might want to remove producers as well. - } else { - nonSpecInsts[squashed_inst->seqNum] = NULL; - - nonSpecInsts.erase(squashed_inst->seqNum); - - ++iqSquashedNonSpecRemoved; - } - - // Might want to also clear out the head of the dependency graph. - - // Mark it as squashed within the IQ. - squashed_inst->setSquashedInIQ(); - - // @todo: Remove this hack where several statuses are set so the - // inst will flow through the rest of the pipeline. - squashed_inst->setIssued(); - squashed_inst->setCanCommit(); -// squashed_inst->removeInIQ(); - - //Update Thread IQ Count - count[squashed_inst->threadNumber]--; - - ++freeEntries; - - if (numThreads > 1) { - DPRINTF(IQ, "[tid:%i]: Instruction PC %#x squashed.\n", - tid, squashed_inst->readPC()); - } else { - DPRINTF(IQ, "Instruction PC %#x squashed.\n", - squashed_inst->readPC()); - } - } - - --squashIt[tid]; - ++iqSquashedInstsExamined; - } -} -/* -template -void -InstQueue::DependencyEntry::insert(DynInstPtr &new_inst) -{ - //Add this new, dependent instruction at the head of the dependency - //chain. - - // First create the entry that will be added to the head of the - // dependency chain. - DependencyEntry *new_entry = new DependencyEntry; - new_entry->next = this->next; - new_entry->inst = new_inst; - - // Then actually add it to the chain. - this->next = new_entry; - - ++mem_alloc_counter; -} - -template -void -InstQueue::DependencyEntry::remove(DynInstPtr &inst_to_remove) -{ - DependencyEntry *prev = this; - DependencyEntry *curr = this->next; - - // Make sure curr isn't NULL. Because this instruction is being - // removed from a dependency list, it must have been placed there at - // an earlier time. The dependency chain should not be empty, - // unless the instruction dependent upon it is already ready. - if (curr == NULL) { - return; - } - - // Find the instruction to remove within the dependency linked list. - while (curr->inst != inst_to_remove) { - prev = curr; - curr = curr->next; - - assert(curr != NULL); - } - - // Now remove this instruction from the list. - prev->next = curr->next; - - --mem_alloc_counter; - - // Could push this off to the destructor of DependencyEntry - curr->inst = NULL; - - delete curr; -} - -template -bool -InstQueue::addToDependents(DynInstPtr &new_inst) -{ - // Loop through the instruction's source registers, adding - // them to the dependency list if they are not ready. - int8_t total_src_regs = new_inst->numSrcRegs(); - bool return_val = false; - - for (int src_reg_idx = 0; - src_reg_idx < total_src_regs; - src_reg_idx++) - { - // Only add it to the dependency graph if it's not ready. - if (!new_inst->isReadySrcRegIdx(src_reg_idx)) { - PhysRegIndex src_reg = new_inst->renamedSrcRegIdx(src_reg_idx); - - // Check the IQ's scoreboard to make sure the register - // hasn't become ready while the instruction was in flight - // between stages. Only if it really isn't ready should - // it be added to the dependency graph. - if (src_reg >= numPhysRegs) { - continue; - } else if (regScoreboard[src_reg] == false) { - DPRINTF(IQ, "Instruction PC %#x has src reg %i that " - "is being added to the dependency chain.\n", - new_inst->readPC(), src_reg); - - dependGraph[src_reg].insert(new_inst); - - // Change the return value to indicate that something - // was added to the dependency graph. - return_val = true; - } else { - DPRINTF(IQ, "Instruction PC %#x has src reg %i that " - "became ready before it reached the IQ.\n", - new_inst->readPC(), src_reg); - // Mark a register ready within the instruction. - new_inst->markSrcRegReady(); - } - } - } - - return return_val; -} - -template -void -InstQueue::createDependency(DynInstPtr &new_inst) -{ - //Actually nothing really needs to be marked when an - //instruction becomes the producer of a register's value, - //but for convenience a ptr to the producing instruction will - //be placed in the head node of the dependency links. - int8_t total_dest_regs = new_inst->numDestRegs(); - - for (int dest_reg_idx = 0; - dest_reg_idx < total_dest_regs; - dest_reg_idx++) - { - PhysRegIndex dest_reg = new_inst->renamedDestRegIdx(dest_reg_idx); - - // Instructions that use the misc regs will have a reg number - // higher than the normal physical registers. In this case these - // registers are not renamed, and there is no need to track - // dependencies as these instructions must be executed at commit. - if (dest_reg >= numPhysRegs) { - continue; - } - - if (dependGraph[dest_reg].next) { - dumpDependGraph(); - panic("Dependency graph %i not empty!", dest_reg); - } - - dependGraph[dest_reg].inst = new_inst; - - // Mark the scoreboard to say it's not yet ready. - regScoreboard[dest_reg] = false; - } -} -*/ -template -void -InstQueue::addIfReady(DynInstPtr &inst) -{ - //If the instruction now has all of its source registers - // available, then add it to the list of ready instructions. - if (inst->readyToIssue()) { - - //Add the instruction to the proper ready list. - if (inst->isMemRef()) { - - DPRINTF(IQ, "Checking if memory instruction can issue.\n"); - - // Message to the mem dependence unit that this instruction has - // its registers ready. - -// memDepUnit[inst->threadNumber].regsReady(inst); - - return; - } - - OpClass op_class = inst->opClass(); - - DPRINTF(IQ, "Instruction is ready to issue, putting it onto " - "the ready list, PC %#x opclass:%i [sn:%lli].\n", - inst->readPC(), op_class, inst->seqNum); - - readyInsts.push(inst); - } -} - -template -int -InstQueue::countInsts() -{ - //ksewell:This works but definitely could use a cleaner write - //with a more intuitive way of counting. Right now it's - //just brute force .... - -#if 0 - int total_insts = 0; - - for (int i = 0; i < numThreads; ++i) { - ListIt count_it = instList[i].begin(); - - while (count_it != instList[i].end()) { - if (!(*count_it)->isSquashed() && !(*count_it)->isSquashedInIQ()) { - if (!(*count_it)->isIssued()) { - ++total_insts; - } else if ((*count_it)->isMemRef() && - !(*count_it)->memOpDone) { - // Loads that have not been marked as executed still count - // towards the total instructions. - ++total_insts; - } - } - - ++count_it; - } - } - - return total_insts; -#else - return numEntries - freeEntries; -#endif -} -/* -template -void -InstQueue::dumpDependGraph() -{ - DependencyEntry *curr; - - for (int i = 0; i < numPhysRegs; ++i) - { - curr = &dependGraph[i]; - - if (curr->inst) { - cprintf("dependGraph[%i]: producer: %#x [sn:%lli] consumer: ", - i, curr->inst->readPC(), curr->inst->seqNum); - } else { - cprintf("dependGraph[%i]: No producer. consumer: ", i); - } - - while (curr->next != NULL) { - curr = curr->next; - - cprintf("%#x [sn:%lli] ", - curr->inst->readPC(), curr->inst->seqNum); - } - - cprintf("\n"); - } -} -*/ -template -void -InstQueue::dumpLists() -{ - for (int i = 0; i < Num_OpClasses; ++i) { - cprintf("Ready list %i size: %i\n", i, readyInsts.size()); - - cprintf("\n"); - } - - cprintf("Non speculative list size: %i\n", nonSpecInsts.size()); - - NonSpecMapIt non_spec_it = nonSpecInsts.begin(); - NonSpecMapIt non_spec_end_it = nonSpecInsts.end(); - - cprintf("Non speculative list: "); - - while (non_spec_it != non_spec_end_it) { - cprintf("%#x [sn:%lli]", (*non_spec_it).second->readPC(), - (*non_spec_it).second->seqNum); - ++non_spec_it; - } - - cprintf("\n"); -/* - ListOrderIt list_order_it = listOrder.begin(); - ListOrderIt list_order_end_it = listOrder.end(); - int i = 1; - - cprintf("List order: "); - - while (list_order_it != list_order_end_it) { - cprintf("%i OpClass:%i [sn:%lli] ", i, (*list_order_it).queueType, - (*list_order_it).oldestInst); - - ++list_order_it; - ++i; - } -*/ - cprintf("\n"); -} - - -template -void -InstQueue::dumpInsts() -{ - for (int i = 0; i < numThreads; ++i) { -// int num = 0; -// int valid_num = 0; -/* - ListIt inst_list_it = instList[i].begin(); - - while (inst_list_it != instList[i].end()) - { - cprintf("Instruction:%i\n", - num); - if (!(*inst_list_it)->isSquashed()) { - if (!(*inst_list_it)->isIssued()) { - ++valid_num; - cprintf("Count:%i\n", valid_num); - } else if ((*inst_list_it)->isMemRef() && - !(*inst_list_it)->memOpDone) { - // Loads that have not been marked as executed still count - // towards the total instructions. - ++valid_num; - cprintf("Count:%i\n", valid_num); - } - } - - cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n" - "Issued:%i\nSquashed:%i\n", - (*inst_list_it)->readPC(), - (*inst_list_it)->seqNum, - (*inst_list_it)->threadNumber, - (*inst_list_it)->isIssued(), - (*inst_list_it)->isSquashed()); - - if ((*inst_list_it)->isMemRef()) { - cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone); - } - - cprintf("\n"); - - inst_list_it++; - ++num; - } -*/ - } -} diff --git a/cpu/ozone/lsq_unit.cc b/cpu/ozone/lsq_unit.cc deleted file mode 100644 index 3ac51b87d..000000000 --- a/cpu/ozone/lsq_unit.cc +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Regents of The University of Michigan - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "cpu/ozone/ozone_impl.hh" -#include "cpu/ozone/lsq_unit_impl.hh" - -// Force the instantiation of LDSTQ for all the implementations we care about. -template class OzoneLSQ; - diff --git a/cpu/ozone/lsq_unit.hh b/cpu/ozone/lsq_unit.hh deleted file mode 100644 index 4b600af67..000000000 --- a/cpu/ozone/lsq_unit.hh +++ /dev/null @@ -1,637 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Regents of The University of Michigan - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __CPU_OZONE_LSQ_UNIT_HH__ -#define __CPU_OZONE_LSQ_UNIT_HH__ - -#include -#include -#include - -#include "arch/faults.hh" -#include "arch/isa_traits.hh" -#include "config/full_system.hh" -#include "base/hashmap.hh" -#include "cpu/inst_seq.hh" -#include "mem/mem_interface.hh" -//#include "mem/page_table.hh" -#include "sim/sim_object.hh" - -class PageTable; - -/** - * Class that implements the actual LQ and SQ for each specific thread. - * Both are circular queues; load entries are freed upon committing, while - * store entries are freed once they writeback. The LSQUnit tracks if there - * are memory ordering violations, and also detects partial load to store - * forwarding cases (a store only has part of a load's data) that requires - * the load to wait until the store writes back. In the former case it - * holds onto the instruction until the dependence unit looks at it, and - * in the latter it stalls the LSQ until the store writes back. At that - * point the load is replayed. - */ -template -class OzoneLSQ { - public: - typedef typename Impl::Params Params; - typedef typename Impl::FullCPU FullCPU; - typedef typename Impl::BackEnd BackEnd; - typedef typename Impl::DynInstPtr DynInstPtr; - typedef typename Impl::IssueStruct IssueStruct; - - typedef TheISA::IntReg IntReg; - - typedef typename std::map::iterator LdMapIt; - - private: - class StoreCompletionEvent : public Event { - public: - /** Constructs a store completion event. */ - StoreCompletionEvent(int store_idx, Event *wb_event, OzoneLSQ *lsq_ptr); - - /** Processes the store completion event. */ - void process(); - - /** Returns the description of this event. */ - const char *description(); - - private: - /** The store index of the store being written back. */ - int storeIdx; - /** The writeback event for the store. Needed for store - * conditionals. - */ - Event *wbEvent; - /** The pointer to the LSQ unit that issued the store. */ - OzoneLSQ *lsqPtr; - }; - - friend class StoreCompletionEvent; - - public: - /** Constructs an LSQ unit. init() must be called prior to use. */ - OzoneLSQ(); - - /** Initializes the LSQ unit with the specified number of entries. */ - void init(Params *params, unsigned maxLQEntries, - unsigned maxSQEntries, unsigned id); - - /** Returns the name of the LSQ unit. */ - std::string name() const; - - /** Sets the CPU pointer. */ - void setCPU(FullCPU *cpu_ptr) - { cpu = cpu_ptr; } - - /** Sets the back-end stage pointer. */ - void setBE(BackEnd *be_ptr) - { be = be_ptr; } - - /** Sets the page table pointer. */ - void setPageTable(PageTable *pt_ptr); - - /** Ticks the LSQ unit, which in this case only resets the number of - * used cache ports. - * @todo: Move the number of used ports up to the LSQ level so it can - * be shared by all LSQ units. - */ - void tick() { usedPorts = 0; } - - /** Inserts an instruction. */ - void insert(DynInstPtr &inst); - /** Inserts a load instruction. */ - void insertLoad(DynInstPtr &load_inst); - /** Inserts a store instruction. */ - void insertStore(DynInstPtr &store_inst); - - /** Executes a load instruction. */ - Fault executeLoad(DynInstPtr &inst); - - Fault executeLoad(int lq_idx); - /** Executes a store instruction. */ - Fault executeStore(DynInstPtr &inst); - - /** Commits the head load. */ - void commitLoad(); - /** Commits a specific load, given by the sequence number. */ - void commitLoad(InstSeqNum &inst); - /** Commits loads older than a specific sequence number. */ - void commitLoads(InstSeqNum &youngest_inst); - - /** Commits stores older than a specific sequence number. */ - void commitStores(InstSeqNum &youngest_inst); - - /** Writes back stores. */ - void writebackStores(); - - // @todo: Include stats in the LSQ unit. - //void regStats(); - - /** Clears all the entries in the LQ. */ - void clearLQ(); - - /** Clears all the entries in the SQ. */ - void clearSQ(); - - /** Resizes the LQ to a given size. */ - void resizeLQ(unsigned size); - - /** Resizes the SQ to a given size. */ - void resizeSQ(unsigned size); - - /** Squashes all instructions younger than a specific sequence number. */ - void squash(const InstSeqNum &squashed_num); - - /** Returns if there is a memory ordering violation. Value is reset upon - * call to getMemDepViolator(). - */ - bool violation() { return memDepViolator; } - - /** Returns the memory ordering violator. */ - DynInstPtr getMemDepViolator(); - - /** Returns if a load became blocked due to the memory system. It clears - * the bool's value upon this being called. - */ - inline bool loadBlocked(); - - /** Returns the number of free entries (min of free LQ and SQ entries). */ - unsigned numFreeEntries(); - - /** Returns the number of loads ready to execute. */ - int numLoadsReady(); - - /** Returns the number of loads in the LQ. */ - int numLoads() { return loads; } - - /** Returns the number of stores in the SQ. */ - int numStores() { return stores; } - - /** Returns if either the LQ or SQ is full. */ - bool isFull() { return lqFull() || sqFull(); } - - /** Returns if the LQ is full. */ - bool lqFull() { return loads >= (LQEntries - 1); } - - /** Returns if the SQ is full. */ - bool sqFull() { return stores >= (SQEntries - 1); } - - /** Debugging function to dump instructions in the LSQ. */ - void dumpInsts(); - - /** Returns the number of instructions in the LSQ. */ - unsigned getCount() { return loads + stores; } - - /** Returns if there are any stores to writeback. */ - bool hasStoresToWB() { return storesToWB; } - - /** Returns the number of stores to writeback. */ - int numStoresToWB() { return storesToWB; } - - /** Returns if the LSQ unit will writeback on this cycle. */ - bool willWB() { return storeQueue[storeWBIdx].canWB && - !storeQueue[storeWBIdx].completed && - !dcacheInterface->isBlocked(); } - - private: - /** Completes the store at the specified index. */ - void completeStore(int store_idx); - - /** Increments the given store index (circular queue). */ - inline void incrStIdx(int &store_idx); - /** Decrements the given store index (circular queue). */ - inline void decrStIdx(int &store_idx); - /** Increments the given load index (circular queue). */ - inline void incrLdIdx(int &load_idx); - /** Decrements the given load index (circular queue). */ - inline void decrLdIdx(int &load_idx); - - private: - /** Pointer to the CPU. */ - FullCPU *cpu; - - /** Pointer to the back-end stage. */ - BackEnd *be; - - /** Pointer to the D-cache. */ - MemInterface *dcacheInterface; - - /** Pointer to the page table. */ - PageTable *pTable; - - public: - struct SQEntry { - /** Constructs an empty store queue entry. */ - SQEntry() - : inst(NULL), req(NULL), size(0), data(0), - canWB(0), committed(0), completed(0) - { } - - /** Constructs a store queue entry for a given instruction. */ - SQEntry(DynInstPtr &_inst) - : inst(_inst), req(NULL), size(0), data(0), - canWB(0), committed(0), completed(0) - { } - - /** The store instruction. */ - DynInstPtr inst; - /** The memory request for the store. */ - MemReqPtr req; - /** The size of the store. */ - int size; - /** The store data. */ - IntReg data; - /** Whether or not the store can writeback. */ - bool canWB; - /** Whether or not the store is committed. */ - bool committed; - /** Whether or not the store is completed. */ - bool completed; - }; - - enum Status { - Running, - Idle, - DcacheMissStall, - DcacheMissSwitch - }; - - private: - /** The OzoneLSQ thread id. */ - unsigned lsqID; - - /** The status of the LSQ unit. */ - Status _status; - - /** The store queue. */ - std::vector storeQueue; - - /** The load queue. */ - std::vector loadQueue; - - // Consider making these 16 bits - /** The number of LQ entries. */ - unsigned LQEntries; - /** The number of SQ entries. */ - unsigned SQEntries; - - /** The number of load instructions in the LQ. */ - int loads; - /** The number of store instructions in the SQ (excludes those waiting to - * writeback). - */ - int stores; - /** The number of store instructions in the SQ waiting to writeback. */ - int storesToWB; - - /** The index of the head instruction in the LQ. */ - int loadHead; - /** The index of the tail instruction in the LQ. */ - int loadTail; - - /** The index of the head instruction in the SQ. */ - int storeHead; - /** The index of the first instruction that is ready to be written back, - * and has not yet been written back. - */ - int storeWBIdx; - /** The index of the tail instruction in the SQ. */ - int storeTail; - - /// @todo Consider moving to a more advanced model with write vs read ports - /** The number of cache ports available each cycle. */ - int cachePorts; - - /** The number of used cache ports in this cycle. */ - int usedPorts; - - //list mshrSeqNums; - - //Stats::Scalar<> dcacheStallCycles; - Counter lastDcacheStall; - - /** Wire to read information from the issue stage time queue. */ - typename TimeBuffer::wire fromIssue; - - // Make these per thread? - /** Whether or not the LSQ is stalled. */ - bool stalled; - /** The store that causes the stall due to partial store to load - * forwarding. - */ - InstSeqNum stallingStoreIsn; - /** The index of the above store. */ - int stallingLoadIdx; - - /** Whether or not a load is blocked due to the memory system. It is - * cleared when this value is checked via loadBlocked(). - */ - bool isLoadBlocked; - - /** The oldest faulting load instruction. */ - DynInstPtr loadFaultInst; - /** The oldest faulting store instruction. */ - DynInstPtr storeFaultInst; - - /** The oldest load that caused a memory ordering violation. */ - DynInstPtr memDepViolator; - - // Will also need how many read/write ports the Dcache has. Or keep track - // of that in stage that is one level up, and only call executeLoad/Store - // the appropriate number of times. - - public: - /** Executes the load at the given index. */ - template - Fault read(MemReqPtr &req, T &data, int load_idx); - - /** Executes the store at the given index. */ - template - Fault write(MemReqPtr &req, T &data, int store_idx); - - /** Returns the index of the head load instruction. */ - int getLoadHead() { return loadHead; } - /** Returns the sequence number of the head load instruction. */ - InstSeqNum getLoadHeadSeqNum() - { - if (loadQueue[loadHead]) { - return loadQueue[loadHead]->seqNum; - } else { - return 0; - } - - } - - /** Returns the index of the head store instruction. */ - int getStoreHead() { return storeHead; } - /** Returns the sequence number of the head store instruction. */ - InstSeqNum getStoreHeadSeqNum() - { - if (storeQueue[storeHead].inst) { - return storeQueue[storeHead].inst->seqNum; - } else { - return 0; - } - - } - - /** Returns whether or not the LSQ unit is stalled. */ - bool isStalled() { return stalled; } -}; - -template -template -Fault -OzoneLSQ::read(MemReqPtr &req, T &data, int load_idx) -{ - //Depending on issue2execute delay a squashed load could - //execute if it is found to be squashed in the same - //cycle it is scheduled to execute - assert(loadQueue[load_idx]); - - if (loadQueue[load_idx]->isExecuted()) { - panic("Should not reach this point with split ops!"); - - memcpy(&data,req->data,req->size); - - return NoFault; - } - - // Make sure this isn't an uncacheable access - // A bit of a hackish way to get uncached accesses to work only if they're - // at the head of the LSQ and are ready to commit (at the head of the ROB - // too). - // @todo: Fix uncached accesses. - if (req->flags & UNCACHEABLE && - (load_idx != loadHead || !loadQueue[load_idx]->readyToCommit())) { - - return TheISA::genMachineCheckFault(); - } - - // Check the SQ for any previous stores that might lead to forwarding - int store_idx = loadQueue[load_idx]->sqIdx; - - int store_size = 0; - - DPRINTF(OzoneLSQ, "Read called, load idx: %i, store idx: %i, " - "storeHead: %i addr: %#x\n", - load_idx, store_idx, storeHead, req->paddr); - - while (store_idx != -1) { - // End once we've reached the top of the LSQ - if (store_idx == storeWBIdx) { - break; - } - - // Move the index to one younger - if (--store_idx < 0) - store_idx += SQEntries; - - assert(storeQueue[store_idx].inst); - - store_size = storeQueue[store_idx].size; - - if (store_size == 0) - continue; - - // Check if the store data is within the lower and upper bounds of - // addresses that the request needs. - bool store_has_lower_limit = - req->vaddr >= storeQueue[store_idx].inst->effAddr; - bool store_has_upper_limit = - (req->vaddr + req->size) <= (storeQueue[store_idx].inst->effAddr + - store_size); - bool lower_load_has_store_part = - req->vaddr < (storeQueue[store_idx].inst->effAddr + - store_size); - bool upper_load_has_store_part = - (req->vaddr + req->size) > storeQueue[store_idx].inst->effAddr; - - // If the store's data has all of the data needed, we can forward. - if (store_has_lower_limit && store_has_upper_limit) { - - int shift_amt = req->vaddr & (store_size - 1); - // Assumes byte addressing - shift_amt = shift_amt << 3; - - // Cast this to type T? - data = storeQueue[store_idx].data >> shift_amt; - - req->cmd = Read; - assert(!req->completionEvent); - req->completionEvent = NULL; - req->time = curTick; - assert(!req->data); - req->data = new uint8_t[64]; - - memcpy(req->data, &data, req->size); - - DPRINTF(OzoneLSQ, "Forwarding from store idx %i to load to " - "addr %#x, data %#x\n", - store_idx, req->vaddr, *(req->data)); - - typename BackEnd::LdWritebackEvent *wb = - new typename BackEnd::LdWritebackEvent(loadQueue[load_idx], - be); - - // We'll say this has a 1 cycle load-store forwarding latency - // for now. - // FIXME - Need to make this a parameter. - wb->schedule(curTick); - - // Should keep track of stat for forwarded data - return NoFault; - } else if ((store_has_lower_limit && lower_load_has_store_part) || - (store_has_upper_limit && upper_load_has_store_part) || - (lower_load_has_store_part && upper_load_has_store_part)) { - // This is the partial store-load forwarding case where a store - // has only part of the load's data. - - // If it's already been written back, then don't worry about - // stalling on it. - if (storeQueue[store_idx].completed) { - continue; - } - - // Must stall load and force it to retry, so long as it's the oldest - // load that needs to do so. - if (!stalled || - (stalled && - loadQueue[load_idx]->seqNum < - loadQueue[stallingLoadIdx]->seqNum)) { - stalled = true; - stallingStoreIsn = storeQueue[store_idx].inst->seqNum; - stallingLoadIdx = load_idx; - } - - // Tell IQ/mem dep unit that this instruction will need to be - // rescheduled eventually - be->rescheduleMemInst(loadQueue[load_idx]); - - DPRINTF(OzoneLSQ, "Load-store forwarding mis-match. " - "Store idx %i to load addr %#x\n", - store_idx, req->vaddr); - - return NoFault; - } - } - - - // If there's no forwarding case, then go access memory - DynInstPtr inst = loadQueue[load_idx]; - - ++usedPorts; - - // if we have a cache, do cache access too - if (dcacheInterface) { - if (dcacheInterface->isBlocked()) { - isLoadBlocked = true; - // No fault occurred, even though the interface is blocked. - return NoFault; - } - - DPRINTF(OzoneLSQ, "D-cache: PC:%#x reading from paddr:%#x " - "vaddr:%#x flags:%i\n", - inst->readPC(), req->paddr, req->vaddr, req->flags); - - // Setup MemReq pointer - req->cmd = Read; - req->completionEvent = NULL; - req->time = curTick; - assert(!req->data); - req->data = new uint8_t[64]; - - assert(!req->completionEvent); - typedef typename BackEnd::LdWritebackEvent LdWritebackEvent; - - LdWritebackEvent *wb = new LdWritebackEvent(loadQueue[load_idx], be); - - req->completionEvent = wb; - - // Do Cache Access - MemAccessResult result = dcacheInterface->access(req); - - // Ugly hack to get an event scheduled *only* if the access is - // a miss. We really should add first-class support for this - // at some point. - // @todo: Probably should support having no events - if (result != MA_HIT) { - DPRINTF(OzoneLSQ, "D-cache miss!\n"); - DPRINTF(Activity, "Activity: ld accessing mem miss [sn:%lli]\n", - inst->seqNum); - - lastDcacheStall = curTick; - - _status = DcacheMissStall; - - wb->setDcacheMiss(); - - } else { -// DPRINTF(Activity, "Activity: ld accessing mem hit [sn:%lli]\n", -// inst->seqNum); - - DPRINTF(OzoneLSQ, "D-cache hit!\n"); - } - } else { - fatal("Must use D-cache with new memory system"); - } - - return NoFault; -} - -template -template -Fault -OzoneLSQ::write(MemReqPtr &req, T &data, int store_idx) -{ - assert(storeQueue[store_idx].inst); - - DPRINTF(OzoneLSQ, "Doing write to store idx %i, addr %#x data %#x" - " | storeHead:%i [sn:%i]\n", - store_idx, req->paddr, data, storeHead, - storeQueue[store_idx].inst->seqNum); - - storeQueue[store_idx].req = req; - storeQueue[store_idx].size = sizeof(T); - storeQueue[store_idx].data = data; - - // This function only writes the data to the store queue, so no fault - // can happen here. - return NoFault; -} - -template -inline bool -OzoneLSQ::loadBlocked() -{ - bool ret_val = isLoadBlocked; - isLoadBlocked = false; - return ret_val; -} - -#endif // __CPU_OZONE_LSQ_UNIT_HH__ diff --git a/cpu/ozone/lsq_unit_impl.hh b/cpu/ozone/lsq_unit_impl.hh deleted file mode 100644 index 726348d76..000000000 --- a/cpu/ozone/lsq_unit_impl.hh +++ /dev/null @@ -1,846 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Regents of The University of Michigan - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "arch/isa_traits.hh" -#include "base/str.hh" -#include "cpu/ozone/lsq_unit.hh" - -template -OzoneLSQ::StoreCompletionEvent::StoreCompletionEvent(int store_idx, - Event *wb_event, - OzoneLSQ *lsq_ptr) - : Event(&mainEventQueue), - storeIdx(store_idx), - wbEvent(wb_event), - lsqPtr(lsq_ptr) -{ - this->setFlags(Event::AutoDelete); -} - -template -void -OzoneLSQ::StoreCompletionEvent::process() -{ - DPRINTF(OzoneLSQ, "Cache miss complete for store idx:%i\n", storeIdx); - - //lsqPtr->removeMSHR(lsqPtr->storeQueue[storeIdx].inst->seqNum); - -// lsqPtr->cpu->wakeCPU(); - if (wbEvent) - wbEvent->process(); - lsqPtr->completeStore(storeIdx); -} - -template -const char * -OzoneLSQ::StoreCompletionEvent::description() -{ - return "LSQ store completion event"; -} - -template -OzoneLSQ::OzoneLSQ() - : loads(0), stores(0), storesToWB(0), stalled(false), isLoadBlocked(false) -{ -} - -template -void -OzoneLSQ::init(Params *params, unsigned maxLQEntries, - unsigned maxSQEntries, unsigned id) - -{ - DPRINTF(OzoneLSQ, "Creating OzoneLSQ%i object.\n",id); - - lsqID = id; - - LQEntries = maxLQEntries; - SQEntries = maxSQEntries; - - loadQueue.resize(LQEntries); - storeQueue.resize(SQEntries); - - - // May want to initialize these entries to NULL - - loadHead = loadTail = 0; - - storeHead = storeWBIdx = storeTail = 0; - - usedPorts = 0; - cachePorts = params->cachePorts; - - dcacheInterface = params->dcacheInterface; - - loadFaultInst = storeFaultInst = memDepViolator = NULL; -} - -template -std::string -OzoneLSQ::name() const -{ - return "lsqunit"; -} - -template -void -OzoneLSQ::clearLQ() -{ - loadQueue.clear(); -} - -template -void -OzoneLSQ::clearSQ() -{ - storeQueue.clear(); -} - -template -void -OzoneLSQ::setPageTable(PageTable *pt_ptr) -{ - DPRINTF(OzoneLSQ, "Setting the page table pointer.\n"); - pTable = pt_ptr; -} - -template -void -OzoneLSQ::resizeLQ(unsigned size) -{ - assert( size >= LQEntries); - - if (size > LQEntries) { - while (size > loadQueue.size()) { - DynInstPtr dummy; - loadQueue.push_back(dummy); - LQEntries++; - } - } else { - LQEntries = size; - } - -} - -template -void -OzoneLSQ::resizeSQ(unsigned size) -{ - if (size > SQEntries) { - while (size > storeQueue.size()) { - SQEntry dummy; - storeQueue.push_back(dummy); - SQEntries++; - } - } else { - SQEntries = size; - } -} - -template -void -OzoneLSQ::insert(DynInstPtr &inst) -{ - // Make sure we really have a memory reference. - assert(inst->isMemRef()); - - // Make sure it's one of the two classes of memory references. - assert(inst->isLoad() || inst->isStore()); - - if (inst->isLoad()) { - insertLoad(inst); - } else { - insertStore(inst); - } - -// inst->setInLSQ(); -} - -template -void -OzoneLSQ::insertLoad(DynInstPtr &load_inst) -{ - assert((loadTail + 1) % LQEntries != loadHead && loads < LQEntries); - - DPRINTF(OzoneLSQ, "Inserting load PC %#x, idx:%i [sn:%lli]\n", - load_inst->readPC(), loadTail, load_inst->seqNum); - - load_inst->lqIdx = loadTail; - - if (stores == 0) { - load_inst->sqIdx = -1; - } else { - load_inst->sqIdx = storeTail; - } - - loadQueue[loadTail] = load_inst; - - incrLdIdx(loadTail); - - ++loads; -} - -template -void -OzoneLSQ::insertStore(DynInstPtr &store_inst) -{ - // Make sure it is not full before inserting an instruction. - assert((storeTail + 1) % SQEntries != storeHead); - assert(stores < SQEntries); - - DPRINTF(OzoneLSQ, "Inserting store PC %#x, idx:%i [sn:%lli]\n", - store_inst->readPC(), storeTail, store_inst->seqNum); - - store_inst->sqIdx = storeTail; - store_inst->lqIdx = loadTail; - - storeQueue[storeTail] = SQEntry(store_inst); - - incrStIdx(storeTail); - - ++stores; - -} - -template -typename Impl::DynInstPtr -OzoneLSQ::getMemDepViolator() -{ - DynInstPtr temp = memDepViolator; - - memDepViolator = NULL; - - return temp; -} - -template -unsigned -OzoneLSQ::numFreeEntries() -{ - unsigned free_lq_entries = LQEntries - loads; - unsigned free_sq_entries = SQEntries - stores; - - // Both the LQ and SQ entries have an extra dummy entry to differentiate - // empty/full conditions. Subtract 1 from the free entries. - if (free_lq_entries < free_sq_entries) { - return free_lq_entries - 1; - } else { - return free_sq_entries - 1; - } -} - -template -int -OzoneLSQ::numLoadsReady() -{ - int load_idx = loadHead; - int retval = 0; - - while (load_idx != loadTail) { - assert(loadQueue[load_idx]); - - if (loadQueue[load_idx]->readyToIssue()) { - ++retval; - } - } - - return retval; -} - -#if 0 -template -Fault -OzoneLSQ::executeLoad() -{ - Fault load_fault = NoFault; - DynInstPtr load_inst; - - assert(readyLoads.size() != 0); - - // Execute a ready load. - LdMapIt ready_it = readyLoads.begin(); - - load_inst = (*ready_it).second; - - // Execute the instruction, which is held in the data portion of the - // iterator. - load_fault = load_inst->execute(); - - // If it executed successfully, then switch it over to the executed - // loads list. - if (load_fault == NoFault) { - executedLoads[load_inst->seqNum] = load_inst; - - readyLoads.erase(ready_it); - } else { - loadFaultInst = load_inst; - } - - return load_fault; -} -#endif - -template -Fault -OzoneLSQ::executeLoad(DynInstPtr &inst) -{ - // Execute a specific load. - Fault load_fault = NoFault; - - DPRINTF(OzoneLSQ, "Executing load PC %#x, [sn:%lli]\n", - inst->readPC(),inst->seqNum); - - // Make sure it's really in the list. - // Normally it should always be in the list. However, - /* due to a syscall it may not be the list. -#ifdef DEBUG - int i = loadHead; - while (1) { - if (i == loadTail && !find(inst)) { - assert(0 && "Load not in the queue!"); - } else if (loadQueue[i] == inst) { - break; - } - - i = i + 1; - if (i >= LQEntries) { - i = 0; - } - } -#endif // DEBUG*/ - - load_fault = inst->initiateAcc(); - - // Might want to make sure that I'm not overwriting a previously faulting - // instruction that hasn't been checked yet. - // Actually probably want the oldest faulting load - if (load_fault != NoFault) { - // Maybe just set it as can commit here, although that might cause - // some other problems with sending traps to the ROB too quickly. -// iewStage->instToCommit(inst); -// iewStage->activityThisCycle(); - } - - return load_fault; -} - -template -Fault -OzoneLSQ::executeLoad(int lq_idx) -{ - // Very hackish. Not sure the best way to check that this - // instruction is at the head of the ROB. I should have some sort - // of extra information here so that I'm not overloading the - // canCommit signal for 15 different things. - loadQueue[lq_idx]->setCanCommit(); - Fault ret_fault = executeLoad(loadQueue[lq_idx]); - loadQueue[lq_idx]->clearCanCommit(); - return ret_fault; -} - -template -Fault -OzoneLSQ::executeStore(DynInstPtr &store_inst) -{ - // Make sure that a store exists. - assert(stores != 0); - - int store_idx = store_inst->sqIdx; - - DPRINTF(OzoneLSQ, "Executing store PC %#x [sn:%lli]\n", - store_inst->readPC(), store_inst->seqNum); - - // Check the recently completed loads to see if any match this store's - // address. If so, then we have a memory ordering violation. - int load_idx = store_inst->lqIdx; - - Fault store_fault = store_inst->initiateAcc(); - - // Store size should now be available. Use it to get proper offset for - // addr comparisons. - int size = storeQueue[store_idx].size; - - if (size == 0) { - DPRINTF(OzoneLSQ,"Fault on Store PC %#x, [sn:%lli],Size = 0\n", - store_inst->readPC(),store_inst->seqNum); - - return store_fault; - } - - assert(store_fault == NoFault); - - if (!storeFaultInst) { - if (store_fault != NoFault) { - panic("Fault in a store instruction!"); - storeFaultInst = store_inst; - } else if (store_inst->isNonSpeculative()) { - // Nonspeculative accesses (namely store conditionals) - // need to set themselves as able to writeback if we - // haven't had a fault by here. - storeQueue[store_idx].canWB = true; - - ++storesToWB; - } - } - - if (!memDepViolator) { - while (load_idx != loadTail) { - // Actually should only check loads that have actually executed - // Might be safe because effAddr is set to InvalAddr when the - // dyn inst is created. - - // Must actually check all addrs in the proper size range - // Which is more correct than needs to be. What if for now we just - // assume all loads are quad-word loads, and do the addr based - // on that. - // @todo: Fix this, magic number being used here - if ((loadQueue[load_idx]->effAddr >> 8) == - (store_inst->effAddr >> 8)) { - // A load incorrectly passed this store. Squash and refetch. - // For now return a fault to show that it was unsuccessful. - memDepViolator = loadQueue[load_idx]; - - return TheISA::genMachineCheckFault(); - } - - incrLdIdx(load_idx); - } - - // If we've reached this point, there was no violation. - memDepViolator = NULL; - } - - return store_fault; -} - -template -void -OzoneLSQ::commitLoad() -{ - assert(loadQueue[loadHead]); - - DPRINTF(OzoneLSQ, "[sn:%lli] Committing head load instruction, PC %#x\n", - loadQueue[loadHead]->seqNum, loadQueue[loadHead]->readPC()); - - - loadQueue[loadHead] = NULL; - - incrLdIdx(loadHead); - - --loads; -} - -template -void -OzoneLSQ::commitLoad(InstSeqNum &inst) -{ - // Hopefully I don't use this function too much - panic("Don't use this function!"); - - int i = loadHead; - while (1) { - if (i == loadTail) { - assert(0 && "Load not in the queue!"); - } else if (loadQueue[i]->seqNum == inst) { - break; - } - - ++i; - if (i >= LQEntries) { - i = 0; - } - } - -// loadQueue[i]->removeInLSQ(); - loadQueue[i] = NULL; - --loads; -} - -template -void -OzoneLSQ::commitLoads(InstSeqNum &youngest_inst) -{ - assert(loads == 0 || loadQueue[loadHead]); - - while (loads != 0 && loadQueue[loadHead]->seqNum <= youngest_inst) { - commitLoad(); - } -} - -template -void -OzoneLSQ::commitStores(InstSeqNum &youngest_inst) -{ - assert(stores == 0 || storeQueue[storeHead].inst); - - int store_idx = storeHead; - - while (store_idx != storeTail) { - assert(storeQueue[store_idx].inst); - if (!storeQueue[store_idx].canWB) { - if (storeQueue[store_idx].inst->seqNum > youngest_inst) { - break; - } - DPRINTF(OzoneLSQ, "Marking store as able to write back, PC " - "%#x [sn:%lli]\n", - storeQueue[store_idx].inst->readPC(), - storeQueue[store_idx].inst->seqNum); - - storeQueue[store_idx].canWB = true; - -// --stores; - ++storesToWB; - } - - incrStIdx(store_idx); - } -} - -template -void -OzoneLSQ::writebackStores() -{ - while (storesToWB > 0 && - storeWBIdx != storeTail && - storeQueue[storeWBIdx].inst && - storeQueue[storeWBIdx].canWB && - usedPorts < cachePorts) { - - if (storeQueue[storeWBIdx].size == 0) { - completeStore(storeWBIdx); - - incrStIdx(storeWBIdx); - - continue; - } - - if (dcacheInterface && dcacheInterface->isBlocked()) { - DPRINTF(OzoneLSQ, "Unable to write back any more stores, cache" - " is blocked!\n"); - break; - } - - ++usedPorts; - - if (storeQueue[storeWBIdx].inst->isDataPrefetch()) { - incrStIdx(storeWBIdx); - - continue; - } - - assert(storeQueue[storeWBIdx].req); - assert(!storeQueue[storeWBIdx].committed); - - MemReqPtr req = storeQueue[storeWBIdx].req; - storeQueue[storeWBIdx].committed = true; - -// Fault fault = cpu->translateDataReadReq(req); - req->cmd = Write; - req->completionEvent = NULL; - req->time = curTick; - assert(!req->data); - req->data = new uint8_t[64]; - memcpy(req->data, (uint8_t *)&storeQueue[storeWBIdx].data, req->size); - - DPRINTF(OzoneLSQ, "D-Cache: Writing back store idx:%i PC:%#x " - "to Addr:%#x, data:%#x [sn:%lli]\n", - storeWBIdx,storeQueue[storeWBIdx].inst->readPC(), - req->paddr, *(req->data), - storeQueue[storeWBIdx].inst->seqNum); - -// if (fault != NoFault) { - //What should we do if there is a fault??? - //for now panic -// panic("Page Table Fault!!!!!\n"); -// } - - if (dcacheInterface) { - MemAccessResult result = dcacheInterface->access(req); - - //@todo temp fix for LL/SC (works fine for 1 CPU) - if (req->flags & LOCKED) { - req->result=1; - panic("LL/SC! oh no no support!!!"); - } - - if (isStalled() && - storeQueue[storeWBIdx].inst->seqNum == stallingStoreIsn) { - DPRINTF(OzoneLSQ, "Unstalling, stalling store [sn:%lli] " - "load idx:%i\n", - stallingStoreIsn, stallingLoadIdx); - stalled = false; - stallingStoreIsn = 0; - be->replayMemInst(loadQueue[stallingLoadIdx]); - } - - if (result != MA_HIT && dcacheInterface->doEvents()) { - Event *wb = NULL; -/* - typename IEW::LdWritebackEvent *wb = NULL; - if (req->flags & LOCKED) { - // Stx_C does not generate a system port transaction. - req->result=0; - wb = new typename IEW::LdWritebackEvent(storeQueue[storeWBIdx].inst, - iewStage); - } -*/ - DPRINTF(OzoneLSQ,"D-Cache Write Miss!\n"); - -// DPRINTF(Activity, "Active st accessing mem miss [sn:%lli]\n", -// storeQueue[storeWBIdx].inst->seqNum); - - // Will stores need their own kind of writeback events? - // Do stores even need writeback events? - assert(!req->completionEvent); - req->completionEvent = new - StoreCompletionEvent(storeWBIdx, wb, this); - - lastDcacheStall = curTick; - - _status = DcacheMissStall; - - //mshrSeqNums.push_back(storeQueue[storeWBIdx].inst->seqNum); - - //DPRINTF(OzoneLSQ, "Added MSHR. count = %i\n",mshrSeqNums.size()); - - // Increment stat here or something - } else { - DPRINTF(OzoneLSQ,"D-Cache: Write Hit on idx:%i !\n", - storeWBIdx); - -// DPRINTF(Activity, "Active st accessing mem hit [sn:%lli]\n", -// storeQueue[storeWBIdx].inst->seqNum); - - if (req->flags & LOCKED) { - // Stx_C does not generate a system port transaction. - req->result=1; - typename BackEnd::LdWritebackEvent *wb = - new typename BackEnd::LdWritebackEvent(storeQueue[storeWBIdx].inst, - be); - wb->schedule(curTick); - } - - completeStore(storeWBIdx); - } - - incrStIdx(storeWBIdx); - } else { - panic("Must HAVE DCACHE!!!!!\n"); - } - } - - // Not sure this should set it to 0. - usedPorts = 0; - - assert(stores >= 0 && storesToWB >= 0); -} - -/*template -void -OzoneLSQ::removeMSHR(InstSeqNum seqNum) -{ - list::iterator mshr_it = find(mshrSeqNums.begin(), - mshrSeqNums.end(), - seqNum); - - if (mshr_it != mshrSeqNums.end()) { - mshrSeqNums.erase(mshr_it); - DPRINTF(OzoneLSQ, "Removing MSHR. count = %i\n",mshrSeqNums.size()); - } -}*/ - -template -void -OzoneLSQ::squash(const InstSeqNum &squashed_num) -{ - DPRINTF(OzoneLSQ, "Squashing until [sn:%lli]!" - "(Loads:%i Stores:%i)\n",squashed_num,loads,stores); - - int load_idx = loadTail; - decrLdIdx(load_idx); - - while (loads != 0 && loadQueue[load_idx]->seqNum > squashed_num) { - - // Clear the smart pointer to make sure it is decremented. - DPRINTF(OzoneLSQ,"Load Instruction PC %#x squashed, " - "[sn:%lli]\n", - loadQueue[load_idx]->readPC(), - loadQueue[load_idx]->seqNum); - - if (isStalled() && load_idx == stallingLoadIdx) { - stalled = false; - stallingStoreIsn = 0; - stallingLoadIdx = 0; - } - -// loadQueue[load_idx]->squashed = true; - loadQueue[load_idx] = NULL; - --loads; - - // Inefficient! - loadTail = load_idx; - - decrLdIdx(load_idx); - } - - int store_idx = storeTail; - decrStIdx(store_idx); - - while (stores != 0 && storeQueue[store_idx].inst->seqNum > squashed_num) { - - // Clear the smart pointer to make sure it is decremented. - DPRINTF(OzoneLSQ,"Store Instruction PC %#x squashed, " - "idx:%i [sn:%lli]\n", - storeQueue[store_idx].inst->readPC(), - store_idx, storeQueue[store_idx].inst->seqNum); - - // I don't think this can happen. It should have been cleared by the - // stalling load. - if (isStalled() && - storeQueue[store_idx].inst->seqNum == stallingStoreIsn) { - panic("Is stalled should have been cleared by stalling load!\n"); - stalled = false; - stallingStoreIsn = 0; - } - -// storeQueue[store_idx].inst->squashed = true; - storeQueue[store_idx].inst = NULL; - storeQueue[store_idx].canWB = 0; - - if (storeQueue[store_idx].req) { - assert(!storeQueue[store_idx].req->completionEvent); - } - storeQueue[store_idx].req = NULL; - --stores; - - // Inefficient! - storeTail = store_idx; - - decrStIdx(store_idx); - } -} - -template -void -OzoneLSQ::dumpInsts() -{ - cprintf("Load store queue: Dumping instructions.\n"); - cprintf("Load queue size: %i\n", loads); - cprintf("Load queue: "); - - int load_idx = loadHead; - - while (load_idx != loadTail && loadQueue[load_idx]) { - cprintf("[sn:%lli] %#x ", loadQueue[load_idx]->seqNum, - loadQueue[load_idx]->readPC()); - - incrLdIdx(load_idx); - } - - cprintf("\nStore queue size: %i\n", stores); - cprintf("Store queue: "); - - int store_idx = storeHead; - - while (store_idx != storeTail && storeQueue[store_idx].inst) { - cprintf("[sn:%lli] %#x ", storeQueue[store_idx].inst->seqNum, - storeQueue[store_idx].inst->readPC()); - - incrStIdx(store_idx); - } - - cprintf("\n"); -} - -template -void -OzoneLSQ::completeStore(int store_idx) -{ - assert(storeQueue[store_idx].inst); - storeQueue[store_idx].completed = true; - --storesToWB; - // A bit conservative because a store completion may not free up entries, - // but hopefully avoids two store completions in one cycle from making - // the CPU tick twice. -// cpu->activityThisCycle(); - - if (store_idx == storeHead) { - do { - incrStIdx(storeHead); - - --stores; - } while (storeQueue[storeHead].completed && - storeHead != storeTail); - -// be->updateLSQNextCycle = true; - } - - DPRINTF(OzoneLSQ, "Store head idx:%i\n", storeHead); - - if (isStalled() && - storeQueue[store_idx].inst->seqNum == stallingStoreIsn) { - DPRINTF(OzoneLSQ, "Unstalling, stalling store [sn:%lli] " - "load idx:%i\n", - stallingStoreIsn, stallingLoadIdx); - stalled = false; - stallingStoreIsn = 0; - be->replayMemInst(loadQueue[stallingLoadIdx]); - } -} - -template -inline void -OzoneLSQ::incrStIdx(int &store_idx) -{ - if (++store_idx >= SQEntries) - store_idx = 0; -} - -template -inline void -OzoneLSQ::decrStIdx(int &store_idx) -{ - if (--store_idx < 0) - store_idx += SQEntries; -} - -template -inline void -OzoneLSQ::incrLdIdx(int &load_idx) -{ - if (++load_idx >= LQEntries) - load_idx = 0; -} - -template -inline void -OzoneLSQ::decrLdIdx(int &load_idx) -{ - if (--load_idx < 0) - load_idx += LQEntries; -} diff --git a/cpu/ozone/lw_back_end.cc b/cpu/ozone/lw_back_end.cc deleted file mode 100644 index 8e9a56ef5..000000000 --- a/cpu/ozone/lw_back_end.cc +++ /dev/null @@ -1,5 +0,0 @@ - -#include "cpu/ozone/lw_back_end_impl.hh" -#include "cpu/ozone/ozone_impl.hh" - -template class LWBackEnd; diff --git a/cpu/ozone/lw_back_end.hh b/cpu/ozone/lw_back_end.hh deleted file mode 100644 index 1c03ffb73..000000000 --- a/cpu/ozone/lw_back_end.hh +++ /dev/null @@ -1,473 +0,0 @@ -/* - * Copyright (c) 2006 The Regents of The University of Michigan - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __CPU_OZONE_LW_BACK_END_HH__ -#define __CPU_OZONE_LW_BACK_END_HH__ - -#include -#include -#include -#include - -#include "arch/faults.hh" -#include "base/timebuf.hh" -#include "cpu/inst_seq.hh" -#include "cpu/ozone/rename_table.hh" -#include "cpu/ozone/thread_state.hh" -#include "mem/functional/functional.hh" -#include "mem/mem_interface.hh" -#include "mem/mem_req.hh" -#include "sim/eventq.hh" - -template -class Checker; -class ExecContext; - -template -class OzoneThreadState; - -template -class LWBackEnd -{ - public: - typedef OzoneThreadState Thread; - - typedef typename Impl::Params Params; - typedef typename Impl::DynInst DynInst; - typedef typename Impl::DynInstPtr DynInstPtr; - typedef typename Impl::FullCPU FullCPU; - typedef typename Impl::FrontEnd FrontEnd; - typedef typename Impl::FullCPU::CommStruct CommStruct; - - struct SizeStruct { - int size; - }; - - typedef SizeStruct DispatchToIssue; - typedef SizeStruct IssueToExec; - typedef SizeStruct ExecToCommit; - typedef SizeStruct Writeback; - - TimeBuffer d2i; - typename TimeBuffer::wire instsToDispatch; - TimeBuffer i2e; - typename TimeBuffer::wire instsToExecute; - TimeBuffer e2c; - TimeBuffer numInstsToWB; - - TimeBuffer *comm; - typename TimeBuffer::wire toIEW; - typename TimeBuffer::wire fromCommit; - - class TrapEvent : public Event { - private: - LWBackEnd *be; - - public: - TrapEvent(LWBackEnd *_be); - - void process(); - const char *description(); - }; - - /** LdWriteback event for a load completion. */ - class LdWritebackEvent : public Event { - private: - /** Instruction that is writing back data to the register file. */ - DynInstPtr inst; - /** Pointer to IEW stage. */ - LWBackEnd *be; - - bool dcacheMiss; - - public: - /** Constructs a load writeback event. */ - LdWritebackEvent(DynInstPtr &_inst, LWBackEnd *be); - - /** Processes writeback event. */ - virtual void process(); - /** Returns the description of the writeback event. */ - virtual const char *description(); - - void setDcacheMiss() { dcacheMiss = true; be->addDcacheMiss(inst); } - }; - - LWBackEnd(Params *params); - - std::string name() const; - - void regStats(); - - void setCPU(FullCPU *cpu_ptr); - - void setFrontEnd(FrontEnd *front_end_ptr) - { frontEnd = front_end_ptr; } - - void setXC(ExecContext *xc_ptr) - { xc = xc_ptr; } - - void setThreadState(Thread *thread_ptr) - { thread = thread_ptr; } - - void setCommBuffer(TimeBuffer *_comm); - - void tick(); - void squash(); - void generateXCEvent() { xcSquash = true; } - void squashFromXC(); - void squashFromTrap(); - void checkInterrupts(); - bool trapSquash; - bool xcSquash; - - template - Fault read(MemReqPtr &req, T &data, int load_idx); - - template - Fault write(MemReqPtr &req, T &data, int store_idx); - - Addr readCommitPC() { return commitPC; } - - Addr commitPC; - - Tick lastCommitCycle; - - bool robEmpty() { return instList.empty(); } - - bool isFull() { return numInsts >= numROBEntries; } - bool isBlocked() { return status == Blocked || dispatchStatus == Blocked; } - - void fetchFault(Fault &fault); - - int wakeDependents(DynInstPtr &inst, bool memory_deps = false); - - /** Tells memory dependence unit that a memory instruction needs to be - * rescheduled. It will re-execute once replayMemInst() is called. - */ - void rescheduleMemInst(DynInstPtr &inst); - - /** Re-executes all rescheduled memory instructions. */ - void replayMemInst(DynInstPtr &inst); - - /** Completes memory instruction. */ - void completeMemInst(DynInstPtr &inst) { } - - void addDcacheMiss(DynInstPtr &inst) - { - waitingMemOps.insert(inst->seqNum); - numWaitingMemOps++; - DPRINTF(BE, "Adding a Dcache miss mem op [sn:%lli], total %i\n", - inst->seqNum, numWaitingMemOps); - } - - void removeDcacheMiss(DynInstPtr &inst) - { - assert(waitingMemOps.find(inst->seqNum) != waitingMemOps.end()); - waitingMemOps.erase(inst->seqNum); - numWaitingMemOps--; - DPRINTF(BE, "Removing a Dcache miss mem op [sn:%lli], total %i\n", - inst->seqNum, numWaitingMemOps); - } - - void addWaitingMemOp(DynInstPtr &inst) - { - waitingMemOps.insert(inst->seqNum); - numWaitingMemOps++; - DPRINTF(BE, "Adding a waiting mem op [sn:%lli], total %i\n", - inst->seqNum, numWaitingMemOps); - } - - void removeWaitingMemOp(DynInstPtr &inst) - { - assert(waitingMemOps.find(inst->seqNum) != waitingMemOps.end()); - waitingMemOps.erase(inst->seqNum); - numWaitingMemOps--; - DPRINTF(BE, "Removing a waiting mem op [sn:%lli], total %i\n", - inst->seqNum, numWaitingMemOps); - } - - void instToCommit(DynInstPtr &inst); - - void switchOut(); - void doSwitchOut(); - void takeOverFrom(ExecContext *old_xc = NULL); - - bool isSwitchedOut() { return switchedOut; } - - private: - void generateTrapEvent(Tick latency = 0); - void handleFault(Fault &fault, Tick latency = 0); - void updateStructures(); - void dispatchInsts(); - void dispatchStall(); - void checkDispatchStatus(); - void executeInsts(); - void commitInsts(); - void addToLSQ(DynInstPtr &inst); - void writebackInsts(); - bool commitInst(int inst_num); - void squash(const InstSeqNum &sn); - void squashDueToBranch(DynInstPtr &inst); - void squashDueToMemViolation(DynInstPtr &inst); - void squashDueToMemBlocked(DynInstPtr &inst); - void updateExeInstStats(DynInstPtr &inst); - void updateComInstStats(DynInstPtr &inst); - - public: - FullCPU *cpu; - - FrontEnd *frontEnd; - - ExecContext *xc; - - Thread *thread; - - enum Status { - Running, - Idle, - DcacheMissStall, - DcacheMissComplete, - Blocked, - TrapPending - }; - - Status status; - - Status dispatchStatus; - - Status commitStatus; - - Counter funcExeInst; - - private: - typedef typename Impl::LdstQueue LdstQueue; - - LdstQueue LSQ; - public: - RenameTable commitRenameTable; - - RenameTable renameTable; - private: - class DCacheCompletionEvent : public Event - { - private: - LWBackEnd *be; - - public: - DCacheCompletionEvent(LWBackEnd *_be); - - virtual void process(); - virtual const char *description(); - }; - - friend class DCacheCompletionEvent; - - DCacheCompletionEvent cacheCompletionEvent; - - MemInterface *dcacheInterface; - - MemReqPtr memReq; - - // General back end width. Used if the more specific isn't given. - int width; - - // Dispatch width. - int dispatchWidth; - int numDispatchEntries; - int dispatchSize; - - int waitingInsts; - - int issueWidth; - - // Writeback width - int wbWidth; - - // Commit width - int commitWidth; - - /** Index into queue of instructions being written back. */ - unsigned wbNumInst; - - /** Cycle number within the queue of instructions being written - * back. Used in case there are too many instructions writing - * back at the current cycle and writesbacks need to be scheduled - * for the future. See comments in instToCommit(). - */ - unsigned wbCycle; - - int numROBEntries; - int numInsts; - - std::set waitingMemOps; - typedef std::set::iterator MemIt; - int numWaitingMemOps; - unsigned maxOutstandingMemOps; - - bool squashPending; - InstSeqNum squashSeqNum; - Addr squashNextPC; - - Fault faultFromFetch; - bool fetchHasFault; - - bool switchedOut; - bool switchPending; - - DynInstPtr memBarrier; - - private: - struct pqCompare { - bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const - { - return lhs->seqNum > rhs->seqNum; - } - }; - - typedef typename std::priority_queue, pqCompare> ReadyInstQueue; - ReadyInstQueue exeList; - - typedef typename std::list::iterator InstListIt; - - std::list instList; - std::list waitingList; - std::list replayList; - std::list writeback; - - int latency; - - int squashLatency; - - bool exactFullStall; - - // number of cycles stalled for D-cache misses -/* Stats::Scalar<> dcacheStallCycles; - Counter lastDcacheStall; -*/ - Stats::Vector<> rob_cap_events; - Stats::Vector<> rob_cap_inst_count; - Stats::Vector<> iq_cap_events; - Stats::Vector<> iq_cap_inst_count; - // total number of instructions executed - Stats::Vector<> exe_inst; - Stats::Vector<> exe_swp; - Stats::Vector<> exe_nop; - Stats::Vector<> exe_refs; - Stats::Vector<> exe_loads; - Stats::Vector<> exe_branches; - - Stats::Vector<> issued_ops; - - // total number of loads forwaded from LSQ stores - Stats::Vector<> lsq_forw_loads; - - // total number of loads ignored due to invalid addresses - Stats::Vector<> inv_addr_loads; - - // total number of software prefetches ignored due to invalid addresses - Stats::Vector<> inv_addr_swpfs; - // ready loads blocked due to memory disambiguation - Stats::Vector<> lsq_blocked_loads; - - Stats::Scalar<> lsqInversion; - - Stats::Vector<> n_issued_dist; - Stats::VectorDistribution<> issue_delay_dist; - - Stats::VectorDistribution<> queue_res_dist; -/* - Stats::Vector<> stat_fu_busy; - Stats::Vector2d<> stat_fuBusy; - Stats::Vector<> dist_unissued; - Stats::Vector2d<> stat_issued_inst_type; - - Stats::Formula misspec_cnt; - Stats::Formula misspec_ipc; - Stats::Formula issue_rate; - Stats::Formula issue_stores; - Stats::Formula issue_op_rate; - Stats::Formula fu_busy_rate; - Stats::Formula commit_stores; - Stats::Formula commit_ipc; - Stats::Formula commit_ipb; - Stats::Formula lsq_inv_rate; -*/ - Stats::Vector<> writeback_count; - Stats::Vector<> producer_inst; - Stats::Vector<> consumer_inst; - Stats::Vector<> wb_penalized; - - Stats::Formula wb_rate; - Stats::Formula wb_fanout; - Stats::Formula wb_penalized_rate; - - // total number of instructions committed - Stats::Vector<> stat_com_inst; - Stats::Vector<> stat_com_swp; - Stats::Vector<> stat_com_refs; - Stats::Vector<> stat_com_loads; - Stats::Vector<> stat_com_membars; - Stats::Vector<> stat_com_branches; - - Stats::Distribution<> n_committed_dist; - - Stats::Scalar<> commit_eligible_samples; - Stats::Vector<> commit_eligible; - - Stats::Vector<> squashedInsts; - Stats::Vector<> ROBSquashedInsts; - - Stats::Scalar<> ROB_fcount; - Stats::Formula ROB_full_rate; - - Stats::Vector<> ROB_count; // cumulative ROB occupancy - Stats::Formula ROB_occ_rate; - Stats::VectorDistribution<> ROB_occ_dist; - public: - void dumpInsts(); - - Checker *checker; -}; - -template -template -Fault -LWBackEnd::read(MemReqPtr &req, T &data, int load_idx) -{ - return LSQ.read(req, data, load_idx); -} - -template -template -Fault -LWBackEnd::write(MemReqPtr &req, T &data, int store_idx) -{ - return LSQ.write(req, data, store_idx); -} - -#endif // __CPU_OZONE_LW_BACK_END_HH__ diff --git a/cpu/ozone/lw_back_end_impl.hh b/cpu/ozone/lw_back_end_impl.hh deleted file mode 100644 index 41b4ea24b..000000000 --- a/cpu/ozone/lw_back_end_impl.hh +++ /dev/null @@ -1,1693 +0,0 @@ -/* - * Copyright (c) 2006 The Regents of The University of Michigan - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "cpu/checker/cpu.hh" -#include "cpu/ozone/lw_back_end.hh" -#include "encumbered/cpu/full/op_class.hh" - -template -void -LWBackEnd::generateTrapEvent(Tick latency) -{ - DPRINTF(BE, "Generating trap event\n"); - - TrapEvent *trap = new TrapEvent(this); - - trap->schedule(curTick + cpu->cycles(latency)); - - thread->trapPending = true; -} - -template -int -LWBackEnd::wakeDependents(DynInstPtr &inst, bool memory_deps) -{ - assert(!inst->isSquashed()); - std::vector &dependents = memory_deps ? inst->getMemDeps() : - inst->getDependents(); - int num_outputs = dependents.size(); - - DPRINTF(BE, "Waking instruction [sn:%lli] dependents in IQ\n", inst->seqNum); - - for (int i = 0; i < num_outputs; i++) { - DynInstPtr dep_inst = dependents[i]; - if (!memory_deps) { - dep_inst->markSrcRegReady(); - } else { - if (!dep_inst->isSquashed()) - dep_inst->markMemInstReady(inst.get()); - } - - DPRINTF(BE, "Marking source reg ready [sn:%lli] in IQ\n", dep_inst->seqNum); - - if (dep_inst->readyToIssue() && dep_inst->isInROB() && - !dep_inst->isNonSpeculative() && !dep_inst->isStoreConditional() && - dep_inst->memDepReady() && !dep_inst->isMemBarrier() && - !dep_inst->isWriteBarrier()) { - DPRINTF(BE, "Adding instruction to exeList [sn:%lli]\n", - dep_inst->seqNum); - exeList.push(dep_inst); - if (dep_inst->iqItValid) { - DPRINTF(BE, "Removing instruction from waiting list\n"); - waitingList.erase(dep_inst->iqIt); - waitingInsts--; - dep_inst->iqItValid = false; - assert(waitingInsts >= 0); - } - if (dep_inst->isMemRef()) { - removeWaitingMemOp(dep_inst); - DPRINTF(BE, "Issued a waiting mem op [sn:%lli]\n", - dep_inst->seqNum); - } - } - } - return num_outputs; -} - -template -void -LWBackEnd::rescheduleMemInst(DynInstPtr &inst) -{ - replayList.push_front(inst); -} - -template -LWBackEnd::TrapEvent::TrapEvent(LWBackEnd *_be) - : Event(&mainEventQueue, CPU_Tick_Pri), be(_be) -{ - this->setFlags(Event::AutoDelete); -} - -template -void -LWBackEnd::TrapEvent::process() -{ - be->trapSquash = true; -} - -template -const char * -LWBackEnd::TrapEvent::description() -{ - return "Trap event"; -} - -template -void -LWBackEnd::replayMemInst(DynInstPtr &inst) -{ - bool found_inst = false; - while (!replayList.empty()) { - exeList.push(replayList.front()); - if (replayList.front() == inst) { - found_inst = true; - } - replayList.pop_front(); - } - assert(found_inst); -} - -template -LWBackEnd::LdWritebackEvent::LdWritebackEvent(DynInstPtr &_inst, - LWBackEnd *_be) - : Event(&mainEventQueue), inst(_inst), be(_be), dcacheMiss(false) -{ - this->setFlags(Event::AutoDelete); -} - -template -void -LWBackEnd::LdWritebackEvent::process() -{ - DPRINTF(BE, "Load writeback event [sn:%lli]\n", inst->seqNum); -// DPRINTF(Activity, "Activity: Ld Writeback event [sn:%lli]\n", inst->seqNum); - - //iewStage->ldstQueue.removeMSHR(inst->threadNumber,inst->seqNum); - -// iewStage->wakeCPU(); - - if (be->isSwitchedOut()) - return; - - if (dcacheMiss) { - be->removeDcacheMiss(inst); - } - - if (inst->isSquashed()) { - inst = NULL; - return; - } - - if (!inst->isExecuted()) { - inst->setExecuted(); - - // Execute again to copy data to proper place. - inst->completeAcc(); - } - - // Need to insert instruction into queue to commit - be->instToCommit(inst); - - //wroteToTimeBuffer = true; -// iewStage->activityThisCycle(); - - inst = NULL; -} - -template -const char * -LWBackEnd::LdWritebackEvent::description() -{ - return "Load writeback event"; -} - - -template -LWBackEnd::DCacheCompletionEvent::DCacheCompletionEvent(LWBackEnd *_be) - : Event(&mainEventQueue, CPU_Tick_Pri), be(_be) -{ -} - -template -void -LWBackEnd::DCacheCompletionEvent::process() -{ -} - -template -const char * -LWBackEnd::DCacheCompletionEvent::description() -{ - return "Cache completion event"; -} - -template -LWBackEnd::LWBackEnd(Params *params) - : d2i(5, 5), i2e(5, 5), e2c(5, 5), numInstsToWB(5, 5), - trapSquash(false), xcSquash(false), cacheCompletionEvent(this), - dcacheInterface(params->dcacheInterface), width(params->backEndWidth), - exactFullStall(true) -{ - numROBEntries = params->numROBEntries; - numInsts = 0; - numDispatchEntries = 32; - maxOutstandingMemOps = params->maxOutstandingMemOps; - numWaitingMemOps = 0; - waitingInsts = 0; - switchedOut = false; - switchPending = false; - - LSQ.setBE(this); - - // Setup IQ and LSQ with their parameters here. - instsToDispatch = d2i.getWire(-1); - - instsToExecute = i2e.getWire(-1); - - dispatchWidth = params->dispatchWidth ? params->dispatchWidth : width; - issueWidth = params->issueWidth ? params->issueWidth : width; - wbWidth = params->wbWidth ? params->wbWidth : width; - commitWidth = params->commitWidth ? params->commitWidth : width; - - LSQ.init(params, params->LQEntries, params->SQEntries, 0); - - dispatchStatus = Running; -} - -template -std::string -LWBackEnd::name() const -{ - return cpu->name() + ".backend"; -} - -template -void -LWBackEnd::regStats() -{ - using namespace Stats; - rob_cap_events - .init(cpu->number_of_threads) - .name(name() + ".ROB:cap_events") - .desc("number of cycles where ROB cap was active") - .flags(total) - ; - - rob_cap_inst_count - .init(cpu->number_of_threads) - .name(name() + ".ROB:cap_inst") - .desc("number of instructions held up by ROB cap") - .flags(total) - ; - - iq_cap_events - .init(cpu->number_of_threads) - .name(name() +".IQ:cap_events" ) - .desc("number of cycles where IQ cap was active") - .flags(total) - ; - - iq_cap_inst_count - .init(cpu->number_of_threads) - .name(name() + ".IQ:cap_inst") - .desc("number of instructions held up by IQ cap") - .flags(total) - ; - - - exe_inst - .init(cpu->number_of_threads) - .name(name() + ".ISSUE:count") - .desc("number of insts issued") - .flags(total) - ; - - exe_swp - .init(cpu->number_of_threads) - .name(name() + ".ISSUE:swp") - .desc("number of swp insts issued") - .flags(total) - ; - - exe_nop - .init(cpu->number_of_threads) - .name(name() + ".ISSUE:nop") - .desc("number of nop insts issued") - .flags(total) - ; - - exe_refs - .init(cpu->number_of_threads) - .name(name() + ".ISSUE:refs") - .desc("number of memory reference insts issued") - .flags(total) - ; - - exe_loads - .init(cpu->number_of_threads) - .name(name() + ".ISSUE:loads") - .desc("number of load insts issued") - .flags(total) - ; - - exe_branches - .init(cpu->number_of_threads) - .name(name() + ".ISSUE:branches") - .desc("Number of branches issued") - .flags(total) - ; - - issued_ops - .init(cpu->number_of_threads) - .name(name() + ".ISSUE:op_count") - .desc("number of insts issued") - .flags(total) - ; - -/* - for (int i=0; inumber_of_threads) - .name(name() + ".LSQ:forw_loads") - .desc("number of loads forwarded via LSQ") - .flags(total) - ; - - inv_addr_loads - .init(cpu->number_of_threads) - .name(name() + ".ISSUE:addr_loads") - .desc("number of invalid-address loads") - .flags(total) - ; - - inv_addr_swpfs - .init(cpu->number_of_threads) - .name(name() + ".ISSUE:addr_swpfs") - .desc("number of invalid-address SW prefetches") - .flags(total) - ; - - lsq_blocked_loads - .init(cpu->number_of_threads) - .name(name() + ".LSQ:blocked_loads") - .desc("number of ready loads not issued due to memory disambiguation") - .flags(total) - ; - - lsqInversion - .name(name() + ".ISSUE:lsq_invert") - .desc("Number of times LSQ instruction issued early") - ; - - n_issued_dist - .init(issueWidth + 1) - .name(name() + ".ISSUE:issued_per_cycle") - .desc("Number of insts issued each cycle") - .flags(total | pdf | dist) - ; - issue_delay_dist - .init(Num_OpClasses,0,99,2) - .name(name() + ".ISSUE:") - .desc("cycles from operands ready to issue") - .flags(pdf | cdf) - ; - - queue_res_dist - .init(Num_OpClasses, 0, 99, 2) - .name(name() + ".IQ:residence:") - .desc("cycles from dispatch to issue") - .flags(total | pdf | cdf ) - ; - for (int i = 0; i < Num_OpClasses; ++i) { - queue_res_dist.subname(i, opClassStrings[i]); - } - - writeback_count - .init(cpu->number_of_threads) - .name(name() + ".WB:count") - .desc("cumulative count of insts written-back") - .flags(total) - ; - - producer_inst - .init(cpu->number_of_threads) - .name(name() + ".WB:producers") - .desc("num instructions producing a value") - .flags(total) - ; - - consumer_inst - .init(cpu->number_of_threads) - .name(name() + ".WB:consumers") - .desc("num instructions consuming a value") - .flags(total) - ; - - wb_penalized - .init(cpu->number_of_threads) - .name(name() + ".WB:penalized") - .desc("number of instrctions required to write to 'other' IQ") - .flags(total) - ; - - - wb_penalized_rate - .name(name() + ".WB:penalized_rate") - .desc ("fraction of instructions written-back that wrote to 'other' IQ") - .flags(total) - ; - - wb_penalized_rate = wb_penalized / writeback_count; - - wb_fanout - .name(name() + ".WB:fanout") - .desc("average fanout of values written-back") - .flags(total) - ; - - wb_fanout = producer_inst / consumer_inst; - - wb_rate - .name(name() + ".WB:rate") - .desc("insts written-back per cycle") - .flags(total) - ; - wb_rate = writeback_count / cpu->numCycles; - - stat_com_inst - .init(cpu->number_of_threads) - .name(name() + ".COM:count") - .desc("Number of instructions committed") - .flags(total) - ; - - stat_com_swp - .init(cpu->number_of_threads) - .name(name() + ".COM:swp_count") - .desc("Number of s/w prefetches committed") - .flags(total) - ; - - stat_com_refs - .init(cpu->number_of_threads) - .name(name() + ".COM:refs") - .desc("Number of memory references committed") - .flags(total) - ; - - stat_com_loads - .init(cpu->number_of_threads) - .name(name() + ".COM:loads") - .desc("Number of loads committed") - .flags(total) - ; - - stat_com_membars - .init(cpu->number_of_threads) - .name(name() + ".COM:membars") - .desc("Number of memory barriers committed") - .flags(total) - ; - - stat_com_branches - .init(cpu->number_of_threads) - .name(name() + ".COM:branches") - .desc("Number of branches committed") - .flags(total) - ; - n_committed_dist - .init(0,commitWidth,1) - .name(name() + ".COM:committed_per_cycle") - .desc("Number of insts commited each cycle") - .flags(pdf) - ; - - // - // Commit-Eligible instructions... - // - // -> The number of instructions eligible to commit in those - // cycles where we reached our commit BW limit (less the number - // actually committed) - // - // -> The average value is computed over ALL CYCLES... not just - // the BW limited cycles - // - // -> The standard deviation is computed only over cycles where - // we reached the BW limit - // - commit_eligible - .init(cpu->number_of_threads) - .name(name() + ".COM:bw_limited") - .desc("number of insts not committed due to BW limits") - .flags(total) - ; - - commit_eligible_samples - .name(name() + ".COM:bw_lim_events") - .desc("number cycles where commit BW limit reached") - ; - - squashedInsts - .init(cpu->number_of_threads) - .name(name() + ".COM:squashed_insts") - .desc("Number of instructions removed from inst list") - ; - - ROBSquashedInsts - .init(cpu->number_of_threads) - .name(name() + ".COM:rob_squashed_insts") - .desc("Number of instructions removed from inst list when they reached the head of the ROB") - ; - - ROB_fcount - .name(name() + ".ROB:full_count") - .desc("number of cycles where ROB was full") - ; - - ROB_count - .init(cpu->number_of_threads) - .name(name() + ".ROB:occupancy") - .desc(name() + ".ROB occupancy (cumulative)") - .flags(total) - ; - - ROB_full_rate - .name(name() + ".ROB:full_rate") - .desc("ROB full per cycle") - ; - ROB_full_rate = ROB_fcount / cpu->numCycles; - - ROB_occ_rate - .name(name() + ".ROB:occ_rate") - .desc("ROB occupancy rate") - .flags(total) - ; - ROB_occ_rate = ROB_count / cpu->numCycles; - - ROB_occ_dist - .init(cpu->number_of_threads,0,numROBEntries,2) - .name(name() + ".ROB:occ_dist") - .desc("ROB Occupancy per cycle") - .flags(total | cdf) - ; -} - -template -void -LWBackEnd::setCPU(FullCPU *cpu_ptr) -{ - cpu = cpu_ptr; - LSQ.setCPU(cpu_ptr); - checker = cpu->checker; -} - -template -void -LWBackEnd::setCommBuffer(TimeBuffer *_comm) -{ - comm = _comm; - toIEW = comm->getWire(0); - fromCommit = comm->getWire(-1); -} - -#if FULL_SYSTEM -template -void -LWBackEnd::checkInterrupts() -{ - if (cpu->checkInterrupts && - cpu->check_interrupts() && - !cpu->inPalMode(thread->readPC()) && - !trapSquash && - !xcSquash) { - frontEnd->interruptPending = true; - if (robEmpty() && !LSQ.hasStoresToWB()) { - // Will need to squash all instructions currently in flight and have - // the interrupt handler restart at the last non-committed inst. - // Most of that can be handled through the trap() function. The - // processInterrupts() function really just checks for interrupts - // and then calls trap() if there is an interrupt present. - - // Not sure which thread should be the one to interrupt. For now - // always do thread 0. - assert(!thread->inSyscall); - thread->inSyscall = true; - - // CPU will handle implementation of the interrupt. - cpu->processInterrupts(); - - // Now squash or record that I need to squash this cycle. - commitStatus = TrapPending; - - // Exit state update mode to avoid accidental updating. - thread->inSyscall = false; - - // Generate trap squash event. - generateTrapEvent(); - - DPRINTF(BE, "Interrupt detected.\n"); - } else { - DPRINTF(BE, "Interrupt must wait for ROB to drain.\n"); - } - } -} - -template -void -LWBackEnd::handleFault(Fault &fault, Tick latency) -{ - DPRINTF(BE, "Handling fault!\n"); - - assert(!thread->inSyscall); - - thread->inSyscall = true; - - // Consider holding onto the trap and waiting until the trap event - // happens for this to be executed. - fault->invoke(thread->getXCProxy()); - - // Exit state update mode to avoid accidental updating. - thread->inSyscall = false; - - commitStatus = TrapPending; - - // Generate trap squash event. - generateTrapEvent(latency); -} -#endif - -template -void -LWBackEnd::tick() -{ - DPRINTF(BE, "Ticking back end\n"); - - if (switchPending && robEmpty() && !LSQ.hasStoresToWB()) { - cpu->signalSwitched(); - return; - } - - ROB_count[0]+= numInsts; - - wbCycle = 0; - - // Read in any done instruction information and update the IQ or LSQ. - updateStructures(); - -#if FULL_SYSTEM - checkInterrupts(); - - if (trapSquash) { - assert(!xcSquash); - squashFromTrap(); - } else if (xcSquash) { - squashFromXC(); - } -#endif - - if (dispatchStatus != Blocked) { - dispatchInsts(); - } else { - checkDispatchStatus(); - } - - if (commitStatus != TrapPending) { - executeInsts(); - - commitInsts(); - } - - LSQ.writebackStores(); - - DPRINTF(BE, "Waiting insts: %i, mem ops: %i, ROB entries in use: %i, " - "LSQ loads: %i, LSQ stores: %i\n", - waitingInsts, numWaitingMemOps, numInsts, - LSQ.numLoads(), LSQ.numStores()); - -#ifdef DEBUG - assert(numInsts == instList.size()); - assert(waitingInsts == waitingList.size()); - assert(numWaitingMemOps == waitingMemOps.size()); - assert(!switchedOut); -#endif -} - -template -void -LWBackEnd::updateStructures() -{ - if (fromCommit->doneSeqNum) { - LSQ.commitLoads(fromCommit->doneSeqNum); - LSQ.commitStores(fromCommit->doneSeqNum); - } - - if (fromCommit->nonSpecSeqNum) { - if (fromCommit->uncached) { -// LSQ.executeLoad(fromCommit->lqIdx); - } else { -// IQ.scheduleNonSpec( -// fromCommit->nonSpecSeqNum); - } - } -} - -template -void -LWBackEnd::addToLSQ(DynInstPtr &inst) -{ - // Do anything LSQ specific here? - LSQ.insert(inst); -} - -template -void -LWBackEnd::dispatchInsts() -{ - DPRINTF(BE, "Trying to dispatch instructions.\n"); - - while (numInsts < numROBEntries && - numWaitingMemOps < maxOutstandingMemOps) { - // Get instruction from front of time buffer - DynInstPtr inst = frontEnd->getInst(); - if (!inst) { - break; - } else if (inst->isSquashed()) { - continue; - } - - ++numInsts; - instList.push_front(inst); - - inst->setInROB(); - - DPRINTF(BE, "Dispatching instruction [sn:%lli] PC:%#x\n", - inst->seqNum, inst->readPC()); - - for (int i = 0; i < inst->numDestRegs(); ++i) - renameTable[inst->destRegIdx(i)] = inst; - - if (inst->isMemBarrier() || inst->isWriteBarrier()) { - if (memBarrier) { - DPRINTF(BE, "Instruction [sn:%lli] is waiting on " - "barrier [sn:%lli].\n", - inst->seqNum, memBarrier->seqNum); - memBarrier->addMemDependent(inst); - inst->addSrcMemInst(memBarrier); - } - memBarrier = inst; - inst->setCanCommit(); - } else if (inst->readyToIssue() && - !inst->isNonSpeculative() && - !inst->isStoreConditional()) { - if (inst->isMemRef()) { - - LSQ.insert(inst); - if (memBarrier) { - DPRINTF(BE, "Instruction [sn:%lli] is waiting on " - "barrier [sn:%lli].\n", - inst->seqNum, memBarrier->seqNum); - memBarrier->addMemDependent(inst); - inst->addSrcMemInst(memBarrier); - addWaitingMemOp(inst); - - waitingList.push_front(inst); - inst->iqIt = waitingList.begin(); - inst->iqItValid = true; - waitingInsts++; - } else { - DPRINTF(BE, "Instruction [sn:%lli] ready, addding to " - "exeList.\n", - inst->seqNum); - exeList.push(inst); - } - } else if (inst->isNop()) { - DPRINTF(BE, "Nop encountered [sn:%lli], skipping exeList.\n", - inst->seqNum); - inst->setIssued(); - inst->setExecuted(); - inst->setCanCommit(); - } else { - DPRINTF(BE, "Instruction [sn:%lli] ready, addding to " - "exeList.\n", - inst->seqNum); - exeList.push(inst); - } - } else { - if (inst->isNonSpeculative() || inst->isStoreConditional()) { - inst->setCanCommit(); - DPRINTF(BE, "Adding non speculative instruction\n"); - } - - if (inst->isMemRef()) { - addWaitingMemOp(inst); - LSQ.insert(inst); - if (memBarrier) { - memBarrier->addMemDependent(inst); - inst->addSrcMemInst(memBarrier); - - DPRINTF(BE, "Instruction [sn:%lli] is waiting on " - "barrier [sn:%lli].\n", - inst->seqNum, memBarrier->seqNum); - } - } - - DPRINTF(BE, "Instruction [sn:%lli] not ready, addding to " - "waitingList.\n", - inst->seqNum); - waitingList.push_front(inst); - inst->iqIt = waitingList.begin(); - inst->iqItValid = true; - waitingInsts++; - } - } - - // Check if IQ or LSQ is full. If so we'll need to break and stop - // removing instructions. Also update the number of insts to remove - // from the queue. Check here if we don't care about exact stall - // conditions. -/* - bool stall = false; - if (IQ.isFull()) { - DPRINTF(BE, "IQ is full!\n"); - stall = true; - } else if (LSQ.isFull()) { - DPRINTF(BE, "LSQ is full!\n"); - stall = true; - } else if (isFull()) { - DPRINTF(BE, "ROB is full!\n"); - stall = true; - ROB_fcount++; - } - if (stall) { - d2i.advance(); - dispatchStall(); - return; - } -*/ -} - -template -void -LWBackEnd::dispatchStall() -{ - dispatchStatus = Blocked; - if (!cpu->decoupledFrontEnd) { - // Tell front end to stall here through a timebuffer, or just tell - // it directly. - } -} - -template -void -LWBackEnd::checkDispatchStatus() -{ - DPRINTF(BE, "Checking dispatch status\n"); - assert(dispatchStatus == Blocked); - if (!LSQ.isFull() && !isFull()) { - DPRINTF(BE, "Dispatch no longer blocked\n"); - dispatchStatus = Running; - dispatchInsts(); - } -} - -template -void -LWBackEnd::executeInsts() -{ - DPRINTF(BE, "Trying to execute instructions\n"); - - int num_executed = 0; - while (!exeList.empty() && num_executed < issueWidth) { - DynInstPtr inst = exeList.top(); - - DPRINTF(BE, "Executing inst [sn:%lli] PC: %#x\n", - inst->seqNum, inst->readPC()); - - // Check if the instruction is squashed; if so then skip it - // and don't count it towards the FU usage. - if (inst->isSquashed()) { - DPRINTF(BE, "Execute: Instruction was squashed.\n"); - - // Not sure how to handle this plus the method of sending # of - // instructions to use. Probably will just have to count it - // towards the bandwidth usage, but not the FU usage. - ++num_executed; - - // Consider this instruction executed so that commit can go - // ahead and retire the instruction. - inst->setExecuted(); - - // Not sure if I should set this here or just let commit try to - // commit any squashed instructions. I like the latter a bit more. - inst->setCanCommit(); - -// ++iewExecSquashedInsts; - exeList.pop(); - - continue; - } - - Fault fault = NoFault; - - // Execute instruction. - // Note that if the instruction faults, it will be handled - // at the commit stage. - if (inst->isMemRef() && - (!inst->isDataPrefetch() && !inst->isInstPrefetch())) { - if (dcacheInterface->isBlocked()) { - // Should I move the instruction aside? - DPRINTF(BE, "Execute: dcache is blocked\n"); - break; - } - DPRINTF(BE, "Execute: Initiating access for memory " - "reference.\n"); - - if (inst->isLoad()) { - LSQ.executeLoad(inst); - } else if (inst->isStore()) { - LSQ.executeStore(inst); - if (inst->req && !(inst->req->flags & LOCKED)) { - inst->setExecuted(); - - instToCommit(inst); - } - } else { - panic("Unknown mem type!"); - } - } else { - inst->execute(); - - inst->setExecuted(); - - instToCommit(inst); - } - - updateExeInstStats(inst); - - ++funcExeInst; - ++num_executed; - - exeList.pop(); - - if (inst->mispredicted()) { - squashDueToBranch(inst); - break; - } else if (LSQ.violation()) { - // Get the DynInst that caused the violation. Note that this - // clears the violation signal. - DynInstPtr violator; - violator = LSQ.getMemDepViolator(); - - DPRINTF(BE, "LDSTQ detected a violation. Violator PC: " - "%#x, inst PC: %#x. Addr is: %#x.\n", - violator->readPC(), inst->readPC(), inst->physEffAddr); - - // Squash. - squashDueToMemViolation(inst); - } - } - - issued_ops[0]+= num_executed; - n_issued_dist[num_executed]++; -} - -template -void -LWBackEnd::instToCommit(DynInstPtr &inst) -{ - - DPRINTF(BE, "Sending instructions to commit [sn:%lli] PC %#x.\n", - inst->seqNum, inst->readPC()); - - if (!inst->isSquashed()) { - DPRINTF(BE, "Writing back instruction [sn:%lli] PC %#x.\n", - inst->seqNum, inst->readPC()); - - inst->setCanCommit(); - - if (inst->isExecuted()) { - inst->setResultReady(); - int dependents = wakeDependents(inst); - if (dependents) { - producer_inst[0]++; - consumer_inst[0]+= dependents; - } - } - } - - writeback_count[0]++; -} -#if 0 -template -void -LWBackEnd::writebackInsts() -{ - int wb_width = wbWidth; - // Using this method I'm not quite sure how to prevent an - // instruction from waking its own dependents multiple times, - // without the guarantee that commit always has enough bandwidth - // to accept all instructions being written back. This guarantee - // might not be too unrealistic. - InstListIt wb_inst_it = writeback.begin(); - InstListIt wb_end_it = writeback.end(); - int inst_num = 0; - int consumer_insts = 0; - - for (; inst_num < wb_width && - wb_inst_it != wb_end_it; inst_num++) { - DynInstPtr inst = (*wb_inst_it); - - // Some instructions will be sent to commit without having - // executed because they need commit to handle them. - // E.g. Uncached loads have not actually executed when they - // are first sent to commit. Instead commit must tell the LSQ - // when it's ready to execute the uncached load. - if (!inst->isSquashed()) { - DPRINTF(BE, "Writing back instruction [sn:%lli] PC %#x.\n", - inst->seqNum, inst->readPC()); - - inst->setCanCommit(); - inst->setResultReady(); - - if (inst->isExecuted()) { - int dependents = wakeDependents(inst); - if (dependents) { - producer_inst[0]++; - consumer_insts+= dependents; - } - } - } - - writeback.erase(wb_inst_it++); - } - LSQ.writebackStores(); - consumer_inst[0]+= consumer_insts; - writeback_count[0]+= inst_num; -} -#endif -template -bool -LWBackEnd::commitInst(int inst_num) -{ - // Read instruction from the head of the ROB - DynInstPtr inst = instList.back(); - - // Make sure instruction is valid - assert(inst); - - if (!inst->readyToCommit()) - return false; - - DPRINTF(BE, "Trying to commit instruction [sn:%lli] PC:%#x\n", - inst->seqNum, inst->readPC()); - - thread->setPC(inst->readPC()); - thread->setNextPC(inst->readNextPC()); - inst->reachedCommit = true; - - // If the instruction is not executed yet, then it is a non-speculative - // or store inst. Signal backwards that it should be executed. - if (!inst->isExecuted()) { - if (inst->isNonSpeculative() || - inst->isStoreConditional() || - inst->isMemBarrier() || - inst->isWriteBarrier()) { -#if !FULL_SYSTEM - // Hack to make sure syscalls aren't executed until all stores - // write back their data. This direct communication shouldn't - // be used for anything other than this. - if (inst_num > 0 || LSQ.hasStoresToWB()) -#else - if ((inst->isMemBarrier() || inst->isWriteBarrier() || - inst->isQuiesce()) && - LSQ.hasStoresToWB()) -#endif - { - DPRINTF(BE, "Waiting for all stores to writeback.\n"); - return false; - } - - DPRINTF(BE, "Encountered a store or non-speculative " - "instruction at the head of the ROB, PC %#x.\n", - inst->readPC()); - - if (inst->isMemBarrier() || inst->isWriteBarrier()) { - DPRINTF(BE, "Waking dependents on barrier [sn:%lli]\n", - inst->seqNum); - assert(memBarrier); - wakeDependents(inst, true); - if (memBarrier == inst) - memBarrier = NULL; - inst->clearMemDependents(); - } - - // Send back the non-speculative instruction's sequence number. - if (inst->iqItValid) { - DPRINTF(BE, "Removing instruction from waiting list\n"); - waitingList.erase(inst->iqIt); - inst->iqItValid = false; - waitingInsts--; - assert(waitingInsts >= 0); - if (inst->isStore()) - removeWaitingMemOp(inst); - } - - exeList.push(inst); - - // Change the instruction so it won't try to commit again until - // it is executed. - inst->clearCanCommit(); - -// ++commitNonSpecStalls; - - return false; - } else if (inst->isLoad()) { - DPRINTF(BE, "[sn:%lli]: Uncached load, PC %#x.\n", - inst->seqNum, inst->readPC()); - - // Send back the non-speculative instruction's sequence - // number. Maybe just tell the lsq to re-execute the load. - - // Send back the non-speculative instruction's sequence number. - if (inst->iqItValid) { - DPRINTF(BE, "Removing instruction from waiting list\n"); - waitingList.erase(inst->iqIt); - inst->iqItValid = false; - waitingInsts--; - assert(waitingInsts >= 0); - removeWaitingMemOp(inst); - } - replayMemInst(inst); - - inst->clearCanCommit(); - - return false; - } else { - panic("Trying to commit un-executed instruction " - "of unknown type!\n"); - } - } - - // Not handled for now. - assert(!inst->isThreadSync()); - assert(inst->memDepReady()); - // Stores will mark themselves as totally completed as they need - // to wait to writeback to memory. @todo: Hack...attempt to fix - // having the checker be forced to wait until a store completes in - // order to check all of the instructions. If the store at the - // head of the check list misses, but a later store hits, then - // loads in the checker may see the younger store values instead - // of the store they should see. Either the checker needs its own - // memory (annoying to update), its own store buffer (how to tell - // which value is correct?), or something else... - if (!inst->isStore()) { - inst->setCompleted(); - } - // Check if the instruction caused a fault. If so, trap. - Fault inst_fault = inst->getFault(); - - // Use checker prior to updating anything due to traps or PC - // based events. - if (checker) { - checker->tick(inst); - } - - if (inst_fault != NoFault) { - DPRINTF(BE, "Inst [sn:%lli] PC %#x has a fault\n", - inst->seqNum, inst->readPC()); - - // Instruction is completed as it has a fault. - inst->setCompleted(); - - if (LSQ.hasStoresToWB()) { - DPRINTF(BE, "Stores still in flight, will wait until drained.\n"); - return false; - } else if (inst_num != 0) { - DPRINTF(BE, "Will wait until instruction is head of commit group.\n"); - return false; - } else if (checker && inst->isStore()) { - checker->tick(inst); - } - - thread->setInst( - static_cast(inst->staticInst->machInst)); -#if FULL_SYSTEM - handleFault(inst_fault); - return false; -#else // !FULL_SYSTEM - panic("fault (%d) detected @ PC %08p", inst_fault, - inst->PC); -#endif // FULL_SYSTEM - } - - int freed_regs = 0; - - for (int i = 0; i < inst->numDestRegs(); ++i) { - DPRINTF(BE, "Commit rename map setting reg %i to [sn:%lli]\n", - (int)inst->destRegIdx(i), inst->seqNum); - thread->renameTable[inst->destRegIdx(i)] = inst; - ++freed_regs; - } - - if (inst->traceData) { - inst->traceData->setFetchSeq(inst->seqNum); - inst->traceData->setCPSeq(thread->numInst); - inst->traceData->finalize(); - inst->traceData = NULL; - } - - inst->clearDependents(); - - frontEnd->addFreeRegs(freed_regs); - - instList.pop_back(); - - --numInsts; - ++thread->funcExeInst; - // Maybe move this to where the fault is handled; if the fault is - // handled, don't try to set this myself as the fault will set it. - // If not, then I set thread->PC = thread->nextPC and - // thread->nextPC = thread->nextPC + 4. - thread->setPC(thread->readNextPC()); - thread->setNextPC(thread->readNextPC() + sizeof(TheISA::MachInst)); - updateComInstStats(inst); - - // Write the done sequence number here. - toIEW->doneSeqNum = inst->seqNum; - lastCommitCycle = curTick; - -#if FULL_SYSTEM - int count = 0; - Addr oldpc; - do { - if (count == 0) - assert(!thread->inSyscall && !thread->trapPending); - oldpc = thread->readPC(); - cpu->system->pcEventQueue.service( - thread->getXCProxy()); - count++; - } while (oldpc != thread->readPC()); - if (count > 1) { - DPRINTF(BE, "PC skip function event, stopping commit\n"); - xcSquash = true; - return false; - } -#endif - return true; -} - -template -void -LWBackEnd::commitInsts() -{ - // Not sure this should be a loop or not. - int inst_num = 0; - while (!instList.empty() && inst_num < commitWidth) { - if (instList.back()->isSquashed()) { - instList.back()->clearDependents(); - instList.pop_back(); - --numInsts; - ROBSquashedInsts[instList.back()->threadNumber]++; - continue; - } - - if (!commitInst(inst_num++)) { - DPRINTF(BE, "Can't commit, Instruction [sn:%lli] PC " - "%#x is head of ROB and not ready\n", - instList.back()->seqNum, instList.back()->readPC()); - --inst_num; - break; - } - } - n_committed_dist.sample(inst_num); -} - -template -void -LWBackEnd::squash(const InstSeqNum &sn) -{ - LSQ.squash(sn); - - int freed_regs = 0; - InstListIt waiting_list_end = waitingList.end(); - InstListIt insts_it = waitingList.begin(); - - while (insts_it != waiting_list_end && (*insts_it)->seqNum > sn) - { - if ((*insts_it)->isSquashed()) { - ++insts_it; - continue; - } - DPRINTF(BE, "Squashing instruction on waitingList PC %#x, [sn:%lli].\n", - (*insts_it)->readPC(), - (*insts_it)->seqNum); - - if ((*insts_it)->isMemRef()) { - DPRINTF(BE, "Squashing a waiting mem op [sn:%lli]\n", - (*insts_it)->seqNum); - removeWaitingMemOp((*insts_it)); - } - - waitingList.erase(insts_it++); - waitingInsts--; - } - assert(waitingInsts >= 0); - - insts_it = instList.begin(); - - while (!instList.empty() && (*insts_it)->seqNum > sn) - { - if ((*insts_it)->isSquashed()) { - ++insts_it; - continue; - } - DPRINTF(BE, "Squashing instruction on inst list PC %#x, [sn:%lli].\n", - (*insts_it)->readPC(), - (*insts_it)->seqNum); - - // Mark the instruction as squashed, and ready to commit so that - // it can drain out of the pipeline. - (*insts_it)->setSquashed(); - - (*insts_it)->setCanCommit(); - - (*insts_it)->removeInROB(); - - for (int i = 0; i < (*insts_it)->numDestRegs(); ++i) { - DynInstPtr prev_dest = (*insts_it)->getPrevDestInst(i); - DPRINTF(BE, "Commit rename map setting reg %i to [sn:%lli]\n", - (int)(*insts_it)->destRegIdx(i), prev_dest->seqNum); - renameTable[(*insts_it)->destRegIdx(i)] = prev_dest; - ++freed_regs; - } - - (*insts_it)->clearDependents(); - - squashedInsts[(*insts_it)->threadNumber]++; - - instList.erase(insts_it++); - --numInsts; - } - - insts_it = waitingList.begin(); - while (!waitingList.empty() && insts_it != waitingList.end()) { - if ((*insts_it)->seqNum < sn) { - ++insts_it; - continue; - } - assert((*insts_it)->isSquashed()); - - waitingList.erase(insts_it++); - waitingInsts--; - } - - while (memBarrier && memBarrier->seqNum > sn) { - DPRINTF(BE, "[sn:%lli] Memory barrier squashed (or previously " - "squashed)\n", memBarrier->seqNum); - memBarrier->clearMemDependents(); - if (memBarrier->memDepReady()) { - DPRINTF(BE, "No previous barrier\n"); - memBarrier = NULL; - } else { - std::list &srcs = memBarrier->getMemSrcs(); - memBarrier = srcs.front(); - srcs.pop_front(); - assert(srcs.empty()); - DPRINTF(BE, "Previous barrier: [sn:%lli]\n", - memBarrier->seqNum); - } - } - - frontEnd->addFreeRegs(freed_regs); -} - -template -void -LWBackEnd::squashFromXC() -{ - InstSeqNum squashed_inst = robEmpty() ? 0 : instList.back()->seqNum - 1; - squash(squashed_inst); - frontEnd->squash(squashed_inst, thread->readPC(), - false, false); - frontEnd->interruptPending = false; - - thread->trapPending = false; - thread->inSyscall = false; - xcSquash = false; - commitStatus = Running; -} - -template -void -LWBackEnd::squashFromTrap() -{ - InstSeqNum squashed_inst = robEmpty() ? 0 : instList.back()->seqNum - 1; - squash(squashed_inst); - frontEnd->squash(squashed_inst, thread->readPC(), - false, false); - frontEnd->interruptPending = false; - - thread->trapPending = false; - thread->inSyscall = false; - trapSquash = false; - commitStatus = Running; -} - -template -void -LWBackEnd::squashDueToBranch(DynInstPtr &inst) -{ - // Update the branch predictor state I guess - DPRINTF(BE, "Squashing due to branch [sn:%lli], will restart at PC %#x\n", - inst->seqNum, inst->readNextPC()); - squash(inst->seqNum); - frontEnd->squash(inst->seqNum, inst->readNextPC(), - true, inst->mispredicted()); -} - -template -void -LWBackEnd::squashDueToMemViolation(DynInstPtr &inst) -{ - // Update the branch predictor state I guess - DPRINTF(BE, "Squashing due to violation [sn:%lli], will restart at PC %#x\n", - inst->seqNum, inst->readNextPC()); - squash(inst->seqNum); - frontEnd->squash(inst->seqNum, inst->readNextPC(), - false, inst->mispredicted()); -} - -template -void -LWBackEnd::squashDueToMemBlocked(DynInstPtr &inst) -{ - DPRINTF(IEW, "Memory blocked, squashing load and younger insts, " - "PC: %#x [sn:%i].\n", inst->readPC(), inst->seqNum); - - squash(inst->seqNum - 1); - frontEnd->squash(inst->seqNum - 1, inst->readPC()); -} - -template -void -LWBackEnd::fetchFault(Fault &fault) -{ - faultFromFetch = fault; - fetchHasFault = true; -} - -template -void -LWBackEnd::switchOut() -{ - switchPending = true; -} - -template -void -LWBackEnd::doSwitchOut() -{ - switchedOut = true; - switchPending = false; - // Need to get rid of all committed, non-speculative state and write it - // to memory/XC. In this case this is stores that have committed and not - // yet written back. - assert(robEmpty()); - assert(!LSQ.hasStoresToWB()); - - LSQ.switchOut(); - - squash(0); -} - -template -void -LWBackEnd::takeOverFrom(ExecContext *old_xc) -{ - switchedOut = false; - xcSquash = false; - trapSquash = false; - - numInsts = 0; - numWaitingMemOps = 0; - waitingMemOps.clear(); - waitingInsts = 0; - switchedOut = false; - dispatchStatus = Running; - commitStatus = Running; - LSQ.takeOverFrom(old_xc); -} - -template -void -LWBackEnd::updateExeInstStats(DynInstPtr &inst) -{ - int thread_number = inst->threadNumber; - - // - // Pick off the software prefetches - // -#ifdef TARGET_ALPHA - if (inst->isDataPrefetch()) - exe_swp[thread_number]++; - else - exe_inst[thread_number]++; -#else - exe_inst[thread_number]++; -#endif - - // - // Control operations - // - if (inst->isControl()) - exe_branches[thread_number]++; - - // - // Memory operations - // - if (inst->isMemRef()) { - exe_refs[thread_number]++; - - if (inst->isLoad()) - exe_loads[thread_number]++; - } -} - -template -void -LWBackEnd::updateComInstStats(DynInstPtr &inst) -{ - unsigned tid = inst->threadNumber; - - // keep an instruction count - thread->numInst++; - thread->numInsts++; - - cpu->numInst++; - // - // Pick off the software prefetches - // -#ifdef TARGET_ALPHA - if (inst->isDataPrefetch()) { - stat_com_swp[tid]++; - } else { - stat_com_inst[tid]++; - } -#else - stat_com_inst[tid]++; -#endif - - // - // Control Instructions - // - if (inst->isControl()) - stat_com_branches[tid]++; - - // - // Memory references - // - if (inst->isMemRef()) { - stat_com_refs[tid]++; - - if (inst->isLoad()) { - stat_com_loads[tid]++; - } - } - - if (inst->isMemBarrier()) { - stat_com_membars[tid]++; - } -} - -template -void -LWBackEnd::dumpInsts() -{ - int num = 0; - int valid_num = 0; - - InstListIt inst_list_it = --(instList.end()); - - cprintf("ExeList size: %i\n", exeList.size()); - - cprintf("Inst list size: %i\n", instList.size()); - - while (inst_list_it != instList.end()) - { - cprintf("Instruction:%i\n", - num); - if (!(*inst_list_it)->isSquashed()) { - if (!(*inst_list_it)->isIssued()) { - ++valid_num; - cprintf("Count:%i\n", valid_num); - } else if ((*inst_list_it)->isMemRef() && - !(*inst_list_it)->memOpDone) { - // Loads that have not been marked as executed still count - // towards the total instructions. - ++valid_num; - cprintf("Count:%i\n", valid_num); - } - } - - cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n" - "Issued:%i\nSquashed:%i\n", - (*inst_list_it)->readPC(), - (*inst_list_it)->seqNum, - (*inst_list_it)->threadNumber, - (*inst_list_it)->isIssued(), - (*inst_list_it)->isSquashed()); - - if ((*inst_list_it)->isMemRef()) { - cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone); - } - - cprintf("\n"); - - inst_list_it--; - ++num; - } - - cprintf("Waiting list size: %i\n", waitingList.size()); - - inst_list_it = --(waitingList.end()); - - while (inst_list_it != waitingList.end()) - { - cprintf("Instruction:%i\n", - num); - if (!(*inst_list_it)->isSquashed()) { - if (!(*inst_list_it)->isIssued()) { - ++valid_num; - cprintf("Count:%i\n", valid_num); - } else if ((*inst_list_it)->isMemRef() && - !(*inst_list_it)->memOpDone) { - // Loads that have not been marked as executed still count - // towards the total instructions. - ++valid_num; - cprintf("Count:%i\n", valid_num); - } - } - - cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n" - "Issued:%i\nSquashed:%i\n", - (*inst_list_it)->readPC(), - (*inst_list_it)->seqNum, - (*inst_list_it)->threadNumber, - (*inst_list_it)->isIssued(), - (*inst_list_it)->isSquashed()); - - if ((*inst_list_it)->isMemRef()) { - cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone); - } - - cprintf("\n"); - - inst_list_it--; - ++num; - } - - cprintf("waitingMemOps list size: %i\n", waitingMemOps.size()); - - MemIt waiting_it = waitingMemOps.begin(); - - while (waiting_it != waitingMemOps.end()) - { - cprintf("[sn:%lli] ", (*waiting_it)); - waiting_it++; - ++num; - } - cprintf("\n"); -} diff --git a/cpu/ozone/lw_lsq.cc b/cpu/ozone/lw_lsq.cc deleted file mode 100644 index 922228b09..000000000 --- a/cpu/ozone/lw_lsq.cc +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Regents of The University of Michigan - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "cpu/ozone/ozone_impl.hh" -#include "cpu/ozone/lw_lsq_impl.hh" - -// Force the instantiation of LDSTQ for all the implementations we care about. -template class OzoneLWLSQ; - diff --git a/cpu/ozone/lw_lsq.hh b/cpu/ozone/lw_lsq.hh deleted file mode 100644 index 6fe343b42..000000000 --- a/cpu/ozone/lw_lsq.hh +++ /dev/null @@ -1,657 +0,0 @@ -/* - * Copyright (c) 2004-2006 The Regents of The University of Michigan - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __CPU_OZONE_LW_LSQ_HH__ -#define __CPU_OZONE_LW_LSQ_HH__ - -#include -#include -#include -#include - -#include "arch/faults.hh" -#include "arch/isa_traits.hh" -#include "config/full_system.hh" -#include "base/hashmap.hh" -#include "cpu/inst_seq.hh" -#include "mem/mem_interface.hh" -//#include "mem/page_table.hh" -#include "sim/debug.hh" -#include "sim/sim_object.hh" - -//class PageTable; - -/** - * Class that implements the actual LQ and SQ for each specific thread. - * Both are circular queues; load entries are freed upon committing, while - * store entries are freed once they writeback. The LSQUnit tracks if there - * are memory ordering violations, and also detects partial load to store - * forwarding cases (a store only has part of a load's data) that requires - * the load to wait until the store writes back. In the former case it - * holds onto the instruction until the dependence unit looks at it, and - * in the latter it stalls the LSQ until the store writes back. At that - * point the load is replayed. - */ -template -class OzoneLWLSQ { - public: - typedef typename Impl::Params Params; - typedef typename Impl::FullCPU FullCPU; - typedef typename Impl::BackEnd BackEnd; - typedef typename Impl::DynInstPtr DynInstPtr; - typedef typename Impl::IssueStruct IssueStruct; - - typedef TheISA::IntReg IntReg; - - typedef typename std::map::iterator LdMapIt; - - private: - class StoreCompletionEvent : public Event { - public: - /** Constructs a store completion event. */ - StoreCompletionEvent(DynInstPtr &inst, BackEnd *be, - Event *wb_event, OzoneLWLSQ *lsq_ptr); - - /** Processes the store completion event. */ - void process(); - - /** Returns the description of this event. */ - const char *description(); - - private: - /** The store index of the store being written back. */ - DynInstPtr inst; - - BackEnd *be; - /** The writeback event for the store. Needed for store - * conditionals. - */ - public: - Event *wbEvent; - bool miss; - private: - /** The pointer to the LSQ unit that issued the store. */ - OzoneLWLSQ *lsqPtr; - }; - - public: - /** Constructs an LSQ unit. init() must be called prior to use. */ - OzoneLWLSQ(); - - /** Initializes the LSQ unit with the specified number of entries. */ - void init(Params *params, unsigned maxLQEntries, - unsigned maxSQEntries, unsigned id); - - /** Returns the name of the LSQ unit. */ - std::string name() const; - - /** Sets the CPU pointer. */ - void setCPU(FullCPU *cpu_ptr) - { cpu = cpu_ptr; } - - /** Sets the back-end stage pointer. */ - void setBE(BackEnd *be_ptr) - { be = be_ptr; } - - /** Sets the page table pointer. */ -// void setPageTable(PageTable *pt_ptr); - - /** Ticks the LSQ unit, which in this case only resets the number of - * used cache ports. - * @todo: Move the number of used ports up to the LSQ level so it can - * be shared by all LSQ units. - */ - void tick() { usedPorts = 0; } - - /** Inserts an instruction. */ - void insert(DynInstPtr &inst); - /** Inserts a load instruction. */ - void insertLoad(DynInstPtr &load_inst); - /** Inserts a store instruction. */ - void insertStore(DynInstPtr &store_inst); - - /** Executes a load instruction. */ - Fault executeLoad(DynInstPtr &inst); - - /** Executes a store instruction. */ - Fault executeStore(DynInstPtr &inst); - - /** Commits the head load. */ - void commitLoad(); - /** Commits loads older than a specific sequence number. */ - void commitLoads(InstSeqNum &youngest_inst); - - /** Commits stores older than a specific sequence number. */ - void commitStores(InstSeqNum &youngest_inst); - - /** Writes back stores. */ - void writebackStores(); - - // @todo: Include stats in the LSQ unit. - //void regStats(); - - /** Clears all the entries in the LQ. */ - void clearLQ(); - - /** Clears all the entries in the SQ. */ - void clearSQ(); - - /** Resizes the LQ to a given size. */ - void resizeLQ(unsigned size); - - /** Resizes the SQ to a given size. */ - void resizeSQ(unsigned size); - - /** Squashes all instructions younger than a specific sequence number. */ - void squash(const InstSeqNum &squashed_num); - - /** Returns if there is a memory ordering violation. Value is reset upon - * call to getMemDepViolator(). - */ - bool violation() { return memDepViolator; } - - /** Returns the memory ordering violator. */ - DynInstPtr getMemDepViolator(); - - /** Returns if a load became blocked due to the memory system. It clears - * the bool's value upon this being called. - */ - bool loadBlocked() - { return isLoadBlocked; } - - void clearLoadBlocked() - { isLoadBlocked = false; } - - bool isLoadBlockedHandled() - { return loadBlockedHandled; } - - void setLoadBlockedHandled() - { loadBlockedHandled = true; } - - /** Returns the number of free entries (min of free LQ and SQ entries). */ - unsigned numFreeEntries(); - - /** Returns the number of loads ready to execute. */ - int numLoadsReady(); - - /** Returns the number of loads in the LQ. */ - int numLoads() { return loads; } - - /** Returns the number of stores in the SQ. */ - int numStores() { return stores; } - - /** Returns if either the LQ or SQ is full. */ - bool isFull() { return lqFull() || sqFull(); } - - /** Returns if the LQ is full. */ - bool lqFull() { return loads >= (LQEntries - 1); } - - /** Returns if the SQ is full. */ - bool sqFull() { return stores >= (SQEntries - 1); } - - /** Debugging function to dump instructions in the LSQ. */ - void dumpInsts(); - - /** Returns the number of instructions in the LSQ. */ - unsigned getCount() { return loads + stores; } - - /** Returns if there are any stores to writeback. */ - bool hasStoresToWB() { return storesToWB; } - - /** Returns the number of stores to writeback. */ - int numStoresToWB() { return storesToWB; } - - /** Returns if the LSQ unit will writeback on this cycle. */ - bool willWB() { return storeQueue.back().canWB && - !storeQueue.back().completed && - !dcacheInterface->isBlocked(); } - - void switchOut(); - - void takeOverFrom(ExecContext *old_xc = NULL); - - bool isSwitchedOut() { return switchedOut; } - - bool switchedOut; - - private: - /** Completes the store at the specified index. */ - void completeStore(int store_idx); - - private: - /** Pointer to the CPU. */ - FullCPU *cpu; - - /** Pointer to the back-end stage. */ - BackEnd *be; - - /** Pointer to the D-cache. */ - MemInterface *dcacheInterface; - - /** Pointer to the page table. */ -// PageTable *pTable; - - public: - struct SQEntry { - /** Constructs an empty store queue entry. */ - SQEntry() - : inst(NULL), req(NULL), size(0), data(0), - canWB(0), committed(0), completed(0), lqIt(NULL) - { } - - /** Constructs a store queue entry for a given instruction. */ - SQEntry(DynInstPtr &_inst) - : inst(_inst), req(NULL), size(0), data(0), - canWB(0), committed(0), completed(0), lqIt(NULL) - { } - - /** The store instruction. */ - DynInstPtr inst; - /** The memory request for the store. */ - MemReqPtr req; - /** The size of the store. */ - int size; - /** The store data. */ - IntReg data; - /** Whether or not the store can writeback. */ - bool canWB; - /** Whether or not the store is committed. */ - bool committed; - /** Whether or not the store is completed. */ - bool completed; - - typename std::list::iterator lqIt; - }; - - enum Status { - Running, - Idle, - DcacheMissStall, - DcacheMissSwitch - }; - - private: - /** The OzoneLWLSQ thread id. */ - unsigned lsqID; - - /** The status of the LSQ unit. */ - Status _status; - - /** The store queue. */ - std::list storeQueue; - /** The load queue. */ - std::list loadQueue; - - typedef typename std::list::iterator SQIt; - typedef typename std::list::iterator LQIt; - - - struct HashFn { - size_t operator() (const int a) const - { - unsigned hash = (((a >> 14) ^ ((a >> 2) & 0xffff))) & 0x7FFFFFFF; - - return hash; - } - }; - - m5::hash_map SQItHash; - std::queue SQIndices; - m5::hash_map LQItHash; - std::queue LQIndices; - - typedef typename m5::hash_map::iterator LQHashIt; - typedef typename m5::hash_map::iterator SQHashIt; - // Consider making these 16 bits - /** The number of LQ entries. */ - unsigned LQEntries; - /** The number of SQ entries. */ - unsigned SQEntries; - - /** The number of load instructions in the LQ. */ - int loads; - /** The number of store instructions in the SQ (excludes those waiting to - * writeback). - */ - int stores; - - int storesToWB; - - /// @todo Consider moving to a more advanced model with write vs read ports - /** The number of cache ports available each cycle. */ - int cachePorts; - - /** The number of used cache ports in this cycle. */ - int usedPorts; - - //list mshrSeqNums; - - //Stats::Scalar<> dcacheStallCycles; - Counter lastDcacheStall; - - // Make these per thread? - /** Whether or not the LSQ is stalled. */ - bool stalled; - /** The store that causes the stall due to partial store to load - * forwarding. - */ - InstSeqNum stallingStoreIsn; - /** The index of the above store. */ - LQIt stallingLoad; - - /** Whether or not a load is blocked due to the memory system. It is - * cleared when this value is checked via loadBlocked(). - */ - bool isLoadBlocked; - - bool loadBlockedHandled; - - InstSeqNum blockedLoadSeqNum; - - /** The oldest faulting load instruction. */ - DynInstPtr loadFaultInst; - /** The oldest faulting store instruction. */ - DynInstPtr storeFaultInst; - - /** The oldest load that caused a memory ordering violation. */ - DynInstPtr memDepViolator; - - // Will also need how many read/write ports the Dcache has. Or keep track - // of that in stage that is one level up, and only call executeLoad/Store - // the appropriate number of times. - - public: - /** Executes the load at the given index. */ - template - Fault read(MemReqPtr &req, T &data, int load_idx); - - /** Executes the store at the given index. */ - template - Fault write(MemReqPtr &req, T &data, int store_idx); - - /** Returns the sequence number of the head load instruction. */ - InstSeqNum getLoadHeadSeqNum() - { - if (!loadQueue.empty()) { - return loadQueue.back()->seqNum; - } else { - return 0; - } - - } - - /** Returns the sequence number of the head store instruction. */ - InstSeqNum getStoreHeadSeqNum() - { - if (!storeQueue.empty()) { - return storeQueue.back().inst->seqNum; - } else { - return 0; - } - - } - - /** Returns whether or not the LSQ unit is stalled. */ - bool isStalled() { return stalled; } -}; - -template -template -Fault -OzoneLWLSQ::read(MemReqPtr &req, T &data, int load_idx) -{ - //Depending on issue2execute delay a squashed load could - //execute if it is found to be squashed in the same - //cycle it is scheduled to execute - typename m5::hash_map::iterator - lq_hash_it = LQItHash.find(load_idx); - assert(lq_hash_it != LQItHash.end()); - DynInstPtr inst = (*(*lq_hash_it).second); - - if (inst->isExecuted()) { - panic("Should not reach this point with split ops!"); - - memcpy(&data,req->data,req->size); - - return NoFault; - } - - // Make sure this isn't an uncacheable access - // A bit of a hackish way to get uncached accesses to work only if they're - // at the head of the LSQ and are ready to commit (at the head of the ROB - // too). - // @todo: Fix uncached accesses. - if (req->flags & UNCACHEABLE && - (inst != loadQueue.back() || !inst->reachedCommit)) { - DPRINTF(OzoneLSQ, "[sn:%lli] Uncached load and not head of " - "commit/LSQ!\n", - inst->seqNum); - be->rescheduleMemInst(inst); - return TheISA::genMachineCheckFault(); - } - - // Check the SQ for any previous stores that might lead to forwarding - SQIt sq_it = storeQueue.begin(); - int store_size = 0; - - DPRINTF(OzoneLSQ, "Read called, load idx: %i addr: %#x\n", - load_idx, req->paddr); - - while (sq_it != storeQueue.end() && (*sq_it).inst->seqNum > inst->seqNum) - ++sq_it; - - while (1) { - // End once we've reached the top of the LSQ - if (sq_it == storeQueue.end()) { - break; - } - - assert((*sq_it).inst); - - store_size = (*sq_it).size; - - if (store_size == 0) { - sq_it++; - continue; - } - - // Check if the store data is within the lower and upper bounds of - // addresses that the request needs. - bool store_has_lower_limit = - req->vaddr >= (*sq_it).inst->effAddr; - bool store_has_upper_limit = - (req->vaddr + req->size) <= ((*sq_it).inst->effAddr + - store_size); - bool lower_load_has_store_part = - req->vaddr < ((*sq_it).inst->effAddr + - store_size); - bool upper_load_has_store_part = - (req->vaddr + req->size) > (*sq_it).inst->effAddr; - - // If the store's data has all of the data needed, we can forward. - if (store_has_lower_limit && store_has_upper_limit) { - - int shift_amt = req->vaddr & (store_size - 1); - // Assumes byte addressing - shift_amt = shift_amt << 3; - - // Cast this to type T? - data = (*sq_it).data >> shift_amt; - - req->cmd = Read; - assert(!req->completionEvent); - req->completionEvent = NULL; - req->time = curTick; - assert(!req->data); - req->data = new uint8_t[64]; - - memcpy(req->data, &data, req->size); - - DPRINTF(OzoneLSQ, "Forwarding from store [sn:%lli] to load to " - "[sn:%lli] addr %#x, data %#x\n", - (*sq_it).inst->seqNum, inst->seqNum, req->vaddr, *(req->data)); - - typename BackEnd::LdWritebackEvent *wb = - new typename BackEnd::LdWritebackEvent(inst, - be); - - // We'll say this has a 1 cycle load-store forwarding latency - // for now. - // FIXME - Need to make this a parameter. - wb->schedule(curTick); - - // Should keep track of stat for forwarded data - return NoFault; - } else if ((store_has_lower_limit && lower_load_has_store_part) || - (store_has_upper_limit && upper_load_has_store_part) || - (lower_load_has_store_part && upper_load_has_store_part)) { - // This is the partial store-load forwarding case where a store - // has only part of the load's data. - - // If it's already been written back, then don't worry about - // stalling on it. - if ((*sq_it).completed) { - sq_it++; - break; - } - - // Must stall load and force it to retry, so long as it's the oldest - // load that needs to do so. - if (!stalled || - (stalled && - inst->seqNum < - (*stallingLoad)->seqNum)) { - stalled = true; - stallingStoreIsn = (*sq_it).inst->seqNum; - stallingLoad = (*lq_hash_it).second; - } - - // Tell IQ/mem dep unit that this instruction will need to be - // rescheduled eventually - be->rescheduleMemInst(inst); - - DPRINTF(OzoneLSQ, "Load-store forwarding mis-match. " - "Store [sn:%lli] to load addr %#x\n", - (*sq_it).inst->seqNum, req->vaddr); - - return NoFault; - } - sq_it++; - } - - // If there's no forwarding case, then go access memory - DPRINTF(OzoneLSQ, "Doing functional access for inst PC %#x\n", - inst->readPC()); - - // Setup MemReq pointer - req->cmd = Read; - req->completionEvent = NULL; - req->time = curTick; - assert(!req->data); - req->data = new uint8_t[64]; - Fault fault = cpu->read(req, data); - memcpy(req->data, &data, sizeof(T)); - - ++usedPorts; - - // if we have a cache, do cache access too - if (dcacheInterface) { - if (dcacheInterface->isBlocked()) { - // There's an older load that's already going to squash. - if (isLoadBlocked && blockedLoadSeqNum < inst->seqNum) - return NoFault; - - isLoadBlocked = true; - loadBlockedHandled = false; - blockedLoadSeqNum = inst->seqNum; - // No fault occurred, even though the interface is blocked. - return NoFault; - } - - DPRINTF(OzoneLSQ, "D-cache: PC:%#x reading from paddr:%#x " - "vaddr:%#x flags:%i\n", - inst->readPC(), req->paddr, req->vaddr, req->flags); - - assert(!req->completionEvent); - req->completionEvent = - new typename BackEnd::LdWritebackEvent(inst, be); - - // Do Cache Access - MemAccessResult result = dcacheInterface->access(req); - - // Ugly hack to get an event scheduled *only* if the access is - // a miss. We really should add first-class support for this - // at some point. - // @todo: Probably should support having no events - if (result != MA_HIT) { - DPRINTF(OzoneLSQ, "D-cache miss!\n"); - DPRINTF(Activity, "Activity: ld accessing mem miss [sn:%lli]\n", - inst->seqNum); - - lastDcacheStall = curTick; - - _status = DcacheMissStall; - - } else { - DPRINTF(OzoneLSQ, "D-cache hit!\n"); - } - } else { - fatal("Must use D-cache with new memory system"); - } - - return NoFault; -} - -template -template -Fault -OzoneLWLSQ::write(MemReqPtr &req, T &data, int store_idx) -{ - SQHashIt sq_hash_it = SQItHash.find(store_idx); - assert(sq_hash_it != SQItHash.end()); - - SQIt sq_it = (*sq_hash_it).second; - assert((*sq_it).inst); - - DPRINTF(OzoneLSQ, "Doing write to store idx %i, addr %#x data %#x" - " | [sn:%lli]\n", - store_idx, req->paddr, data, (*sq_it).inst->seqNum); - - (*sq_it).req = req; - (*sq_it).size = sizeof(T); - (*sq_it).data = data; - assert(!req->data); - req->data = new uint8_t[64]; - memcpy(req->data, (uint8_t *)&(*sq_it).data, req->size); - - // This function only writes the data to the store queue, so no fault - // can happen here. - return NoFault; -} - -#endif // __CPU_OZONE_LW_LSQ_HH__ diff --git a/cpu/ozone/lw_lsq_impl.hh b/cpu/ozone/lw_lsq_impl.hh deleted file mode 100644 index f72bbb1cc..000000000 --- a/cpu/ozone/lw_lsq_impl.hh +++ /dev/null @@ -1,874 +0,0 @@ -/* - * Copyright (c) 2004-2006 The Regents of The University of Michigan - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "arch/isa_traits.hh" -#include "base/str.hh" -#include "cpu/ozone/lw_lsq.hh" -#include "cpu/checker/cpu.hh" - -template -OzoneLWLSQ::StoreCompletionEvent::StoreCompletionEvent(DynInstPtr &_inst, - BackEnd *_be, - Event *wb_event, - OzoneLWLSQ *lsq_ptr) - : Event(&mainEventQueue), - inst(_inst), - be(_be), - wbEvent(wb_event), - miss(false), - lsqPtr(lsq_ptr) -{ - this->setFlags(Event::AutoDelete); -} - -template -void -OzoneLWLSQ::StoreCompletionEvent::process() -{ - DPRINTF(OzoneLSQ, "Cache miss complete for store [sn:%lli]\n", - inst->seqNum); - - //lsqPtr->removeMSHR(lsqPtr->storeQueue[storeIdx].inst->seqNum); - -// lsqPtr->cpu->wakeCPU(); - if (lsqPtr->isSwitchedOut()) { - if (wbEvent) - delete wbEvent; - - return; - } - - if (wbEvent) { - wbEvent->process(); - delete wbEvent; - } - - lsqPtr->completeStore(inst->sqIdx); - if (miss) - be->removeDcacheMiss(inst); -} - -template -const char * -OzoneLWLSQ::StoreCompletionEvent::description() -{ - return "LSQ store completion event"; -} - -template -OzoneLWLSQ::OzoneLWLSQ() - : loads(0), stores(0), storesToWB(0), stalled(false), isLoadBlocked(false), - loadBlockedHandled(false) -{ -} - -template -void -OzoneLWLSQ::init(Params *params, unsigned maxLQEntries, - unsigned maxSQEntries, unsigned id) -{ - DPRINTF(OzoneLSQ, "Creating OzoneLWLSQ%i object.\n",id); - - lsqID = id; - - LQEntries = maxLQEntries; - SQEntries = maxSQEntries; - - for (int i = 0; i < LQEntries * 2; i++) { - LQIndices.push(i); - SQIndices.push(i); - } - - usedPorts = 0; - cachePorts = params->cachePorts; - - dcacheInterface = params->dcacheInterface; - - loadFaultInst = storeFaultInst = memDepViolator = NULL; - - blockedLoadSeqNum = 0; -} - -template -std::string -OzoneLWLSQ::name() const -{ - return "lsqunit"; -} - -template -void -OzoneLWLSQ::clearLQ() -{ - loadQueue.clear(); -} - -template -void -OzoneLWLSQ::clearSQ() -{ - storeQueue.clear(); -} -/* -template -void -OzoneLWLSQ::setPageTable(PageTable *pt_ptr) -{ - DPRINTF(OzoneLSQ, "Setting the page table pointer.\n"); - pTable = pt_ptr; -} -*/ -template -void -OzoneLWLSQ::resizeLQ(unsigned size) -{ - assert( size >= LQEntries); - - if (size > LQEntries) { - while (size > loadQueue.size()) { - DynInstPtr dummy; - loadQueue.push_back(dummy); - LQEntries++; - } - } else { - LQEntries = size; - } - -} - -template -void -OzoneLWLSQ::resizeSQ(unsigned size) -{ - if (size > SQEntries) { - while (size > storeQueue.size()) { - SQEntry dummy; - storeQueue.push_back(dummy); - SQEntries++; - } - } else { - SQEntries = size; - } -} - -template -void -OzoneLWLSQ::insert(DynInstPtr &inst) -{ - // Make sure we really have a memory reference. - assert(inst->isMemRef()); - - // Make sure it's one of the two classes of memory references. - assert(inst->isLoad() || inst->isStore()); - - if (inst->isLoad()) { - insertLoad(inst); - } else { - insertStore(inst); - } -} - -template -void -OzoneLWLSQ::insertLoad(DynInstPtr &load_inst) -{ - assert(loads < LQEntries * 2); - assert(!LQIndices.empty()); - int load_index = LQIndices.front(); - LQIndices.pop(); - - DPRINTF(OzoneLSQ, "Inserting load PC %#x, idx:%i [sn:%lli]\n", - load_inst->readPC(), load_index, load_inst->seqNum); - - load_inst->lqIdx = load_index; - - loadQueue.push_front(load_inst); - LQItHash[load_index] = loadQueue.begin(); - - ++loads; -} - -template -void -OzoneLWLSQ::insertStore(DynInstPtr &store_inst) -{ - // Make sure it is not full before inserting an instruction. - assert(stores - storesToWB < SQEntries); - - assert(!SQIndices.empty()); - int store_index = SQIndices.front(); - SQIndices.pop(); - - DPRINTF(OzoneLSQ, "Inserting store PC %#x, idx:%i [sn:%lli]\n", - store_inst->readPC(), store_index, store_inst->seqNum); - - store_inst->sqIdx = store_index; - SQEntry entry(store_inst); - if (loadQueue.empty()) { - entry.lqIt = loadQueue.end(); - } else { - entry.lqIt = loadQueue.begin(); - } - storeQueue.push_front(entry); - - SQItHash[store_index] = storeQueue.begin(); - - ++stores; -} - -template -typename Impl::DynInstPtr -OzoneLWLSQ::getMemDepViolator() -{ - DynInstPtr temp = memDepViolator; - - memDepViolator = NULL; - - return temp; -} - -template -unsigned -OzoneLWLSQ::numFreeEntries() -{ - unsigned free_lq_entries = LQEntries - loads; - unsigned free_sq_entries = SQEntries - stores; - - // Both the LQ and SQ entries have an extra dummy entry to differentiate - // empty/full conditions. Subtract 1 from the free entries. - if (free_lq_entries < free_sq_entries) { - return free_lq_entries - 1; - } else { - return free_sq_entries - 1; - } -} - -template -int -OzoneLWLSQ::numLoadsReady() -{ - int retval = 0; - LQIt lq_it = loadQueue.begin(); - LQIt end_it = loadQueue.end(); - - while (lq_it != end_it) { - if ((*lq_it)->readyToIssue()) { - ++retval; - } - } - - return retval; -} - -template -Fault -OzoneLWLSQ::executeLoad(DynInstPtr &inst) -{ - // Execute a specific load. - Fault load_fault = NoFault; - - DPRINTF(OzoneLSQ, "Executing load PC %#x, [sn:%lli]\n", - inst->readPC(),inst->seqNum); - - // Make sure it's really in the list. - // Normally it should always be in the list. However, - /* due to a syscall it may not be the list. -#ifdef DEBUG - int i = loadHead; - while (1) { - if (i == loadTail && !find(inst)) { - assert(0 && "Load not in the queue!"); - } else if (loadQueue[i] == inst) { - break; - } - - i = i + 1; - if (i >= LQEntries) { - i = 0; - } - } -#endif // DEBUG*/ - - load_fault = inst->initiateAcc(); - - // Might want to make sure that I'm not overwriting a previously faulting - // instruction that hasn't been checked yet. - // Actually probably want the oldest faulting load - if (load_fault != NoFault) { - DPRINTF(OzoneLSQ, "Load [sn:%lli] has a fault\n", inst->seqNum); - // Maybe just set it as can commit here, although that might cause - // some other problems with sending traps to the ROB too quickly. - be->instToCommit(inst); -// iewStage->activityThisCycle(); - } - - return load_fault; -} - -template -Fault -OzoneLWLSQ::executeStore(DynInstPtr &store_inst) -{ - // Make sure that a store exists. - assert(stores != 0); - - int store_idx = store_inst->sqIdx; - SQHashIt sq_hash_it = SQItHash.find(store_idx); - assert(sq_hash_it != SQItHash.end()); - DPRINTF(OzoneLSQ, "Executing store PC %#x [sn:%lli]\n", - store_inst->readPC(), store_inst->seqNum); - - SQIt sq_it = (*sq_hash_it).second; - - Fault store_fault = store_inst->initiateAcc(); - - // Store size should now be available. Use it to get proper offset for - // addr comparisons. - int size = (*sq_it).size; - - if (size == 0) { - DPRINTF(OzoneLSQ,"Fault on Store PC %#x, [sn:%lli],Size = 0\n", - store_inst->readPC(),store_inst->seqNum); - - return store_fault; - } - - assert(store_fault == NoFault); - - if (!storeFaultInst) { - if (store_fault != NoFault) { - panic("Fault in a store instruction!"); - storeFaultInst = store_inst; - } else if (store_inst->isStoreConditional()) { - // Store conditionals need to set themselves as able to - // writeback if we haven't had a fault by here. - (*sq_it).canWB = true; - - ++storesToWB; - DPRINTF(OzoneLSQ, "Nonspeculative store! storesToWB:%i\n", - storesToWB); - } - } - - LQIt lq_it = --(loadQueue.end()); - - if (!memDepViolator) { - while (lq_it != loadQueue.end()) { - if ((*lq_it)->seqNum < store_inst->seqNum) { - lq_it--; - continue; - } - // Actually should only check loads that have actually executed - // Might be safe because effAddr is set to InvalAddr when the - // dyn inst is created. - - // Must actually check all addrs in the proper size range - // Which is more correct than needs to be. What if for now we just - // assume all loads are quad-word loads, and do the addr based - // on that. - // @todo: Fix this, magic number being used here - if (((*lq_it)->effAddr >> 8) == - (store_inst->effAddr >> 8)) { - // A load incorrectly passed this store. Squash and refetch. - // For now return a fault to show that it was unsuccessful. - memDepViolator = (*lq_it); - - return TheISA::genMachineCheckFault(); - } - - lq_it--; - } - - // If we've reached this point, there was no violation. - memDepViolator = NULL; - } - - return store_fault; -} - -template -void -OzoneLWLSQ::commitLoad() -{ - assert(!loadQueue.empty()); - - DPRINTF(OzoneLSQ, "[sn:%lli] Committing head load instruction, PC %#x\n", - loadQueue.back()->seqNum, loadQueue.back()->readPC()); - - LQIndices.push(loadQueue.back()->lqIdx); - LQItHash.erase(loadQueue.back()->lqIdx); - - loadQueue.pop_back(); - - --loads; -} - -template -void -OzoneLWLSQ::commitLoads(InstSeqNum &youngest_inst) -{ - assert(loads == 0 || !loadQueue.empty()); - - while (loads != 0 && - loadQueue.back()->seqNum <= youngest_inst) { - commitLoad(); - } -} - -template -void -OzoneLWLSQ::commitStores(InstSeqNum &youngest_inst) -{ - assert(stores == 0 || !storeQueue.empty()); - - SQIt sq_it = --(storeQueue.end()); - while (!storeQueue.empty() && sq_it != storeQueue.end()) { - assert((*sq_it).inst); - if (!(*sq_it).canWB) { - if ((*sq_it).inst->seqNum > youngest_inst) { - break; - } - ++storesToWB; - - DPRINTF(OzoneLSQ, "Marking store as able to write back, PC " - "%#x [sn:%lli], storesToWB:%i\n", - (*sq_it).inst->readPC(), - (*sq_it).inst->seqNum, - storesToWB); - - (*sq_it).canWB = true; - } - - sq_it--; - } -} - -template -void -OzoneLWLSQ::writebackStores() -{ - SQIt sq_it = --(storeQueue.end()); - while (storesToWB > 0 && - sq_it != storeQueue.end() && - (*sq_it).inst && - (*sq_it).canWB && - usedPorts < cachePorts) { - - DynInstPtr inst = (*sq_it).inst; - - if ((*sq_it).size == 0 && !(*sq_it).completed) { - sq_it--; - completeStore(inst->sqIdx); - - continue; - } - - if (inst->isDataPrefetch() || (*sq_it).committed) { - sq_it--; - continue; - } - - if (dcacheInterface && dcacheInterface->isBlocked()) { - DPRINTF(OzoneLSQ, "Unable to write back any more stores, cache" - " is blocked!\n"); - break; - } - - ++usedPorts; - - assert((*sq_it).req); - assert(!(*sq_it).committed); - - (*sq_it).committed = true; - - MemReqPtr req = (*sq_it).req; - - req->cmd = Write; - req->completionEvent = NULL; - req->time = curTick; - - switch((*sq_it).size) { - case 1: - cpu->write(req, (uint8_t &)(*sq_it).data); - break; - case 2: - cpu->write(req, (uint16_t &)(*sq_it).data); - break; - case 4: - cpu->write(req, (uint32_t &)(*sq_it).data); - break; - case 8: - cpu->write(req, (uint64_t &)(*sq_it).data); - break; - default: - panic("Unexpected store size!\n"); - } - if (!(req->flags & LOCKED)) { - (*sq_it).inst->setCompleted(); - if (cpu->checker) { - cpu->checker->tick((*sq_it).inst); - } - } - - DPRINTF(OzoneLSQ, "D-Cache: Writing back store idx:%i PC:%#x " - "to Addr:%#x, data:%#x [sn:%lli]\n", - inst->sqIdx,inst->readPC(), - req->paddr, *(req->data), - inst->seqNum); - - if (dcacheInterface) { - assert(!req->completionEvent); - StoreCompletionEvent *store_event = new - StoreCompletionEvent(inst, be, NULL, this); - req->completionEvent = store_event; - - MemAccessResult result = dcacheInterface->access(req); - - if (isStalled() && - inst->seqNum == stallingStoreIsn) { - DPRINTF(OzoneLSQ, "Unstalling, stalling store [sn:%lli] " - "load [sn:%lli]\n", - stallingStoreIsn, (*stallingLoad)->seqNum); - stalled = false; - stallingStoreIsn = 0; - be->replayMemInst((*stallingLoad)); - } - - if (result != MA_HIT && dcacheInterface->doEvents()) { - store_event->miss = true; - typename BackEnd::LdWritebackEvent *wb = NULL; - if (req->flags & LOCKED) { - wb = new typename BackEnd::LdWritebackEvent(inst, - be); - store_event->wbEvent = wb; - } - - DPRINTF(OzoneLSQ,"D-Cache Write Miss!\n"); - -// DPRINTF(Activity, "Active st accessing mem miss [sn:%lli]\n", -// inst->seqNum); - - be->addDcacheMiss(inst); - - lastDcacheStall = curTick; - - _status = DcacheMissStall; - - // Increment stat here or something - - sq_it--; - } else { - DPRINTF(OzoneLSQ,"D-Cache: Write Hit on idx:%i !\n", - inst->sqIdx); - -// DPRINTF(Activity, "Active st accessing mem hit [sn:%lli]\n", -// inst->seqNum); - - if (req->flags & LOCKED) { - // Stx_C does not generate a system port - // transaction in the 21264, but that might be - // hard to accomplish in this model. - - typename BackEnd::LdWritebackEvent *wb = - new typename BackEnd::LdWritebackEvent(inst, - be); - store_event->wbEvent = wb; - } - sq_it--; - } - } else { - panic("Must HAVE DCACHE!!!!!\n"); - } - } - - // Not sure this should set it to 0. - usedPorts = 0; - - assert(stores >= 0 && storesToWB >= 0); -} - -template -void -OzoneLWLSQ::squash(const InstSeqNum &squashed_num) -{ - DPRINTF(OzoneLSQ, "Squashing until [sn:%lli]!" - "(Loads:%i Stores:%i)\n",squashed_num,loads,stores); - - - LQIt lq_it = loadQueue.begin(); - - while (loads != 0 && (*lq_it)->seqNum > squashed_num) { - assert(!loadQueue.empty()); - // Clear the smart pointer to make sure it is decremented. - DPRINTF(OzoneLSQ,"Load Instruction PC %#x squashed, " - "[sn:%lli]\n", - (*lq_it)->readPC(), - (*lq_it)->seqNum); - - if (isStalled() && lq_it == stallingLoad) { - stalled = false; - stallingStoreIsn = 0; - stallingLoad = NULL; - } - - --loads; - - // Inefficient! - LQHashIt lq_hash_it = LQItHash.find((*lq_it)->lqIdx); - assert(lq_hash_it != LQItHash.end()); - LQItHash.erase(lq_hash_it); - LQIndices.push((*lq_it)->lqIdx); - loadQueue.erase(lq_it++); - } - - if (isLoadBlocked) { - if (squashed_num < blockedLoadSeqNum) { - isLoadBlocked = false; - loadBlockedHandled = false; - blockedLoadSeqNum = 0; - } - } - - SQIt sq_it = storeQueue.begin(); - - while (stores != 0 && (*sq_it).inst->seqNum > squashed_num) { - assert(!storeQueue.empty()); - - if ((*sq_it).canWB) { - break; - } - - // Clear the smart pointer to make sure it is decremented. - DPRINTF(OzoneLSQ,"Store Instruction PC %#x idx:%i squashed [sn:%lli]\n", - (*sq_it).inst->readPC(), (*sq_it).inst->sqIdx, - (*sq_it).inst->seqNum); - - // I don't think this can happen. It should have been cleared by the - // stalling load. - if (isStalled() && - (*sq_it).inst->seqNum == stallingStoreIsn) { - panic("Is stalled should have been cleared by stalling load!\n"); - stalled = false; - stallingStoreIsn = 0; - } - - SQHashIt sq_hash_it = SQItHash.find((*sq_it).inst->sqIdx); - assert(sq_hash_it != SQItHash.end()); - SQItHash.erase(sq_hash_it); - SQIndices.push((*sq_it).inst->sqIdx); - (*sq_it).inst = NULL; - (*sq_it).canWB = 0; - - if ((*sq_it).req) { - assert(!(*sq_it).req->completionEvent); - } - (*sq_it).req = NULL; - --stores; - storeQueue.erase(sq_it++); - } -} - -template -void -OzoneLWLSQ::dumpInsts() -{ - cprintf("Load store queue: Dumping instructions.\n"); - cprintf("Load queue size: %i\n", loads); - cprintf("Load queue: "); - - LQIt lq_it = --(loadQueue.end()); - - while (lq_it != loadQueue.end() && (*lq_it)) { - cprintf("[sn:%lli] %#x ", (*lq_it)->seqNum, - (*lq_it)->readPC()); - - lq_it--; - } - - cprintf("\nStore queue size: %i\n", stores); - cprintf("Store queue: "); - - SQIt sq_it = --(storeQueue.end()); - - while (sq_it != storeQueue.end() && (*sq_it).inst) { - cprintf("[sn:%lli]\nPC:%#x\nSize:%i\nCommitted:%i\nCompleted:%i\ncanWB:%i\n", - (*sq_it).inst->seqNum, - (*sq_it).inst->readPC(), - (*sq_it).size, - (*sq_it).committed, - (*sq_it).completed, - (*sq_it).canWB); - - sq_it--; - } - - cprintf("\n"); -} - -template -void -OzoneLWLSQ::completeStore(int store_idx) -{ - SQHashIt sq_hash_it = SQItHash.find(store_idx); - assert(sq_hash_it != SQItHash.end()); - SQIt sq_it = (*sq_hash_it).second; - - assert((*sq_it).inst); - (*sq_it).completed = true; - DynInstPtr inst = (*sq_it).inst; - - --storesToWB; - - if (isStalled() && - inst->seqNum == stallingStoreIsn) { - DPRINTF(OzoneLSQ, "Unstalling, stalling store [sn:%lli] " - "load [sn:%lli]\n", - stallingStoreIsn, (*stallingLoad)->seqNum); - stalled = false; - stallingStoreIsn = 0; - be->replayMemInst((*stallingLoad)); - } - - DPRINTF(OzoneLSQ, "Completing store idx:%i [sn:%lli], storesToWB:%i\n", - inst->sqIdx, inst->seqNum, storesToWB); - - assert(!storeQueue.empty()); - SQItHash.erase(sq_hash_it); - SQIndices.push(inst->sqIdx); - storeQueue.erase(sq_it); - --stores; - - inst->setCompleted(); - if (cpu->checker) { - cpu->checker->tick(inst); - } -} - -template -void -OzoneLWLSQ::switchOut() -{ - assert(storesToWB == 0); - switchedOut = true; - SQIt sq_it = --(storeQueue.end()); - while (storesToWB > 0 && - sq_it != storeQueue.end() && - (*sq_it).inst && - (*sq_it).canWB) { - - DynInstPtr inst = (*sq_it).inst; - - if ((*sq_it).size == 0 && !(*sq_it).completed) { - sq_it--; - continue; - } - - // Store conditionals don't complete until *after* they have written - // back. If it's here and not yet sent to memory, then don't bother - // as it's not part of committed state. - if (inst->isDataPrefetch() || (*sq_it).committed) { - sq_it--; - continue; - } else if ((*sq_it).req->flags & LOCKED) { - sq_it--; - assert(!(*sq_it).canWB || - ((*sq_it).canWB && (*sq_it).req->flags & LOCKED)); - continue; - } - - assert((*sq_it).req); - assert(!(*sq_it).committed); - - MemReqPtr req = (*sq_it).req; - (*sq_it).committed = true; - - req->cmd = Write; - req->completionEvent = NULL; - req->time = curTick; - assert(!req->data); - req->data = new uint8_t[64]; - memcpy(req->data, (uint8_t *)&(*sq_it).data, req->size); - - DPRINTF(OzoneLSQ, "Switching out : Writing back store idx:%i PC:%#x " - "to Addr:%#x, data:%#x directly to memory [sn:%lli]\n", - inst->sqIdx,inst->readPC(), - req->paddr, *(req->data), - inst->seqNum); - - switch((*sq_it).size) { - case 1: - cpu->write(req, (uint8_t &)(*sq_it).data); - break; - case 2: - cpu->write(req, (uint16_t &)(*sq_it).data); - break; - case 4: - cpu->write(req, (uint32_t &)(*sq_it).data); - break; - case 8: - cpu->write(req, (uint64_t &)(*sq_it).data); - break; - default: - panic("Unexpected store size!\n"); - } - } - - // Clear the queue to free up resources - storeQueue.clear(); - loadQueue.clear(); - loads = stores = storesToWB = 0; -} - -template -void -OzoneLWLSQ::takeOverFrom(ExecContext *old_xc) -{ - // Clear out any old state. May be redundant if this is the first time - // the CPU is being used. - stalled = false; - isLoadBlocked = false; - loadBlockedHandled = false; - switchedOut = false; - - // Could do simple checks here to see if indices are on twice - while (!LQIndices.empty()) - LQIndices.pop(); - while (!SQIndices.empty()) - SQIndices.pop(); - - for (int i = 0; i < LQEntries * 2; i++) { - LQIndices.push(i); - SQIndices.push(i); - } - - usedPorts = 0; - - loadFaultInst = storeFaultInst = memDepViolator = NULL; - - blockedLoadSeqNum = 0; -} diff --git a/cpu/ozone/null_predictor.hh b/cpu/ozone/null_predictor.hh deleted file mode 100644 index d19e2cd1c..000000000 --- a/cpu/ozone/null_predictor.hh +++ /dev/null @@ -1,76 +0,0 @@ - -#ifndef __CPU_OZONE_NULL_PREDICTOR_HH__ -#define __CPU_OZONE_NULL_PREDICTOR_HH__ - -#include "arch/isa_traits.hh" -#include "cpu/inst_seq.hh" - -template -class NullPredictor -{ - public: - typedef typename Impl::Params Params; - typedef typename Impl::DynInstPtr DynInstPtr; - - NullPredictor(Params *p) { } - - struct BPredInfo { - BPredInfo() - : PC(0), nextPC(0) - { } - - BPredInfo(const Addr &pc, const Addr &next_pc) - : PC(pc), nextPC(next_pc) - { } - - Addr PC; - Addr nextPC; - }; - - BPredInfo lookup(Addr &PC) { return BPredInfo(PC, PC+4); } - - void undo(BPredInfo &bp_info) { return; } - - /** - * Predicts whether or not the instruction is a taken branch, and the - * target of the branch if it is taken. - * @param inst The branch instruction. - * @param PC The predicted PC is passed back through this parameter. - * @param tid The thread id. - * @return Returns if the branch is taken or not. - */ - bool predict(DynInstPtr &inst, Addr &PC, unsigned tid) - { return false; } - - /** - * Tells the branch predictor to commit any updates until the given - * sequence number. - * @param done_sn The sequence number to commit any older updates up until. - * @param tid The thread id. - */ - void update(const InstSeqNum &done_sn, unsigned tid) { } - - /** - * Squashes all outstanding updates until a given sequence number. - * @param squashed_sn The sequence number to squash any younger updates up - * until. - * @param tid The thread id. - */ - void squash(const InstSeqNum &squashed_sn, unsigned tid) { } - - /** - * Squashes all outstanding updates until a given sequence number, and - * corrects that sn's update with the proper address and taken/not taken. - * @param squashed_sn The sequence number to squash any younger updates up - * until. - * @param corr_target The correct branch target. - * @param actually_taken The correct branch direction. - * @param tid The thread id. - */ - void squash(const InstSeqNum &squashed_sn, const Addr &corr_target, - bool actually_taken, unsigned tid) - { } - -}; - -#endif // __CPU_OZONE_NULL_PREDICTOR_HH__ diff --git a/cpu/ozone/ozone_impl.hh b/cpu/ozone/ozone_impl.hh deleted file mode 100644 index 1f543ec6e..000000000 --- a/cpu/ozone/ozone_impl.hh +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2006 The Regents of The University of Michigan - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __CPU_OZONE_OZONE_IMPL_HH__ -#define __CPU_OZONE_OZONE_IMPL_HH__ - -#include "arch/alpha/isa_traits.hh" -#include "cpu/o3/bpred_unit.hh" -#include "cpu/ozone/back_end.hh" -#include "cpu/ozone/front_end.hh" -#include "cpu/ozone/inst_queue.hh" -#include "cpu/ozone/lsq_unit.hh" -#include "cpu/ozone/lw_lsq.hh" -#include "cpu/ozone/lw_back_end.hh" -#include "cpu/ozone/null_predictor.hh" -#include "cpu/ozone/dyn_inst.hh" -#include "cpu/ozone/simple_params.hh" - -template -class OzoneCPU; - -template -class OzoneDynInst; - -struct OzoneImpl { - typedef SimpleParams Params; - typedef OzoneCPU OzoneCPU; - typedef OzoneCPU FullCPU; - - // Would like to put these into their own area. -// typedef NullPredictor BranchPred; - typedef TwobitBPredUnit BranchPred; - typedef FrontEnd FrontEnd; - // Will need IQ, LSQ eventually - typedef LWBackEnd BackEnd; - - typedef InstQueue InstQueue; - typedef OzoneLWLSQ LdstQueue; - - typedef OzoneDynInst DynInst; - typedef RefCountingPtr DynInstPtr; - - typedef uint64_t IssueStruct; - - enum { - MaxThreads = 1 - }; -}; - -#endif // __CPU_OZONE_OZONE_IMPL_HH__ diff --git a/cpu/ozone/rename_table.cc b/cpu/ozone/rename_table.cc deleted file mode 100644 index fff41903e..000000000 --- a/cpu/ozone/rename_table.cc +++ /dev/null @@ -1,7 +0,0 @@ - -#include "cpu/ozone/rename_table_impl.hh" -#include "cpu/ozone/ozone_impl.hh" -#include "cpu/ozone/simple_impl.hh" - -template class RenameTable; -template class RenameTable; diff --git a/cpu/ozone/rename_table.hh b/cpu/ozone/rename_table.hh deleted file mode 100644 index 6ee23b21b..000000000 --- a/cpu/ozone/rename_table.hh +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (c) 2006 The Regents of The University of Michigan - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __CPU_OZONE_RENAME_TABLE_HH__ -#define __CPU_OZONE_RENAME_TABLE_HH__ - -#include "arch/isa_traits.hh" - -/** Rename table that holds the rename of each architectural register to - * producing DynInst. Needs to support copying from one table to another. - */ - -template -class RenameTable { - public: - typedef typename Impl::DynInstPtr DynInstPtr; - - RenameTable(); - - void copyFrom(const RenameTable &table_to_copy); - - DynInstPtr &operator [] (int index) - { return table[index]; } - - DynInstPtr table[TheISA::TotalNumRegs]; -}; - -#endif // __CPU_OZONE_RENAME_TABLE_HH__ diff --git a/cpu/ozone/rename_table_impl.hh b/cpu/ozone/rename_table_impl.hh deleted file mode 100644 index 86fc1cc55..000000000 --- a/cpu/ozone/rename_table_impl.hh +++ /dev/null @@ -1,23 +0,0 @@ - -#include // Not really sure what to include to get NULL -#include "cpu/ozone/rename_table.hh" - -template -RenameTable::RenameTable() -{ - // Actually should set these to dummy dyn insts that have the initial value - // and force their values to be initialized. This keeps everything the - // same. - for (int i = 0; i < TheISA::TotalNumRegs; ++i) { - table[i] = NULL; - } -} - -template -void -RenameTable::copyFrom(const RenameTable &table_to_copy) -{ - for (int i = 0; i < TheISA::TotalNumRegs; ++i) { - table[i] = table_to_copy.table[i]; - } -} diff --git a/cpu/ozone/simple_impl.hh b/cpu/ozone/simple_impl.hh deleted file mode 100644 index 961bf2ea9..000000000 --- a/cpu/ozone/simple_impl.hh +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Copyright (c) 2006 The Regents of The University of Michigan - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __CPU_OZONE_SIMPLE_IMPL_HH__ -#define __CPU_OZONE_SIMPLE_IMPL_HH__ - -#include "arch/isa_traits.hh" -#include "cpu/o3/bpred_unit.hh" -#include "cpu/ozone/cpu.hh" -#include "cpu/ozone/front_end.hh" -#include "cpu/ozone/inorder_back_end.hh" -#include "cpu/ozone/null_predictor.hh" -#include "cpu/ozone/dyn_inst.hh" -#include "cpu/ozone/simple_params.hh" - -//template -//class OzoneCPU; - -template -class OzoneDynInst; - -struct SimpleImpl { - typedef SimpleParams Params; - typedef OzoneCPU OzoneCPU; - typedef OzoneCPU FullCPU; - - // Would like to put these into their own area. -// typedef NullPredictor BranchPred; - typedef TwobitBPredUnit BranchPred; - typedef FrontEnd FrontEnd; - // Will need IQ, LSQ eventually - typedef InorderBackEnd BackEnd; - - typedef OzoneDynInst DynInst; - typedef RefCountingPtr DynInstPtr; - - typedef uint64_t IssueStruct; - - enum { - MaxThreads = 1 - }; -}; - -#endif // __CPU_OZONE_SIMPLE_IMPL_HH__ diff --git a/cpu/ozone/simple_params.hh b/cpu/ozone/simple_params.hh deleted file mode 100644 index 647da1781..000000000 --- a/cpu/ozone/simple_params.hh +++ /dev/null @@ -1,165 +0,0 @@ - - -#ifndef __CPU_OZONE_SIMPLE_PARAMS_HH__ -#define __CPU_OZONE_SIMPLE_PARAMS_HH__ - -#include "cpu/ozone/cpu.hh" - -//Forward declarations -class AlphaDTB; -class AlphaITB; -class FUPool; -class FunctionalMemory; -class MemInterface; -class PageTable; -class Process; -class System; - -/** - * This file defines the parameters that will be used for the OzoneCPU. - * This must be defined externally so that the Impl can have a params class - * defined that it can pass to all of the individual stages. - */ - -class SimpleParams : public BaseCPU::Params -{ - public: - -#if FULL_SYSTEM - AlphaITB *itb; AlphaDTB *dtb; -#else - std::vector workload; -// Process *process; -#endif // FULL_SYSTEM - - //Page Table - PageTable *pTable; - - FunctionalMemory *mem; - - // - // Caches - // - MemInterface *icacheInterface; - MemInterface *dcacheInterface; - - unsigned cachePorts; - unsigned width; - unsigned frontEndWidth; - unsigned backEndWidth; - unsigned backEndSquashLatency; - unsigned backEndLatency; - unsigned maxInstBufferSize; - unsigned numPhysicalRegs; - unsigned maxOutstandingMemOps; - // - // Fetch - // - unsigned decodeToFetchDelay; - unsigned renameToFetchDelay; - unsigned iewToFetchDelay; - unsigned commitToFetchDelay; - unsigned fetchWidth; - - // - // Decode - // - unsigned renameToDecodeDelay; - unsigned iewToDecodeDelay; - unsigned commitToDecodeDelay; - unsigned fetchToDecodeDelay; - unsigned decodeWidth; - - // - // Rename - // - unsigned iewToRenameDelay; - unsigned commitToRenameDelay; - unsigned decodeToRenameDelay; - unsigned renameWidth; - - // - // IEW - // - unsigned commitToIEWDelay; - unsigned renameToIEWDelay; - unsigned issueToExecuteDelay; - unsigned issueWidth; - unsigned executeWidth; - unsigned executeIntWidth; - unsigned executeFloatWidth; - unsigned executeBranchWidth; - unsigned executeMemoryWidth; - FUPool *fuPool; - - // - // Commit - // - unsigned iewToCommitDelay; - unsigned renameToROBDelay; - unsigned commitWidth; - unsigned squashWidth; - - // - // Branch predictor (BP & BTB) - // - unsigned localPredictorSize; - unsigned localCtrBits; - unsigned localHistoryTableSize; - unsigned localHistoryBits; - unsigned globalPredictorSize; - unsigned globalCtrBits; - unsigned globalHistoryBits; - unsigned choicePredictorSize; - unsigned choiceCtrBits; - - unsigned BTBEntries; - unsigned BTBTagSize; - - unsigned RASSize; - - // - // Load store queue - // - unsigned LQEntries; - unsigned SQEntries; - - // - // Memory dependence - // - unsigned SSITSize; - unsigned LFSTSize; - - // - // Miscellaneous - // - unsigned numPhysIntRegs; - unsigned numPhysFloatRegs; - unsigned numIQEntries; - unsigned numROBEntries; - - bool decoupledFrontEnd; - int dispatchWidth; - int wbWidth; - - //SMT Parameters - unsigned smtNumFetchingThreads; - - std::string smtFetchPolicy; - - std::string smtIQPolicy; - unsigned smtIQThreshold; - - std::string smtLSQPolicy; - unsigned smtLSQThreshold; - - std::string smtCommitPolicy; - - std::string smtROBPolicy; - unsigned smtROBThreshold; - - // Probably can get this from somewhere. - unsigned instShiftAmt; -}; - -#endif // __CPU_OZONE_SIMPLE_PARAMS_HH__ diff --git a/cpu/ozone/thread_state.hh b/cpu/ozone/thread_state.hh deleted file mode 100644 index c86c3a720..000000000 --- a/cpu/ozone/thread_state.hh +++ /dev/null @@ -1,194 +0,0 @@ -/* - * Copyright (c) 2006 The Regents of The University of Michigan - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __CPU_OZONE_THREAD_STATE_HH__ -#define __CPU_OZONE_THREAD_STATE_HH__ - -#include "arch/faults.hh" -#include "arch/isa_traits.hh" -#include "cpu/exec_context.hh" -#include "cpu/thread_state.hh" -#include "sim/process.hh" - -class Event; -//class Process; - -#if FULL_SYSTEM -class EndQuiesceEvent; -class FunctionProfile; -class ProfileNode; -#else -class Process; -class FunctionalMemory; -#endif - -// Maybe this ozone thread state should only really have committed state? -// I need to think about why I'm using this and what it's useful for. Clearly -// has benefits for SMT; basically serves same use as CPUExecContext. -// Makes the ExecContext proxy easier. Gives organization/central access point -// to state of a thread that can be accessed normally (i.e. not in-flight -// stuff within a OoO processor). Does this need an XC proxy within it? -template -struct OzoneThreadState : public ThreadState { - typedef typename ExecContext::Status Status; - typedef typename Impl::FullCPU FullCPU; - typedef TheISA::MiscReg MiscReg; - -#if FULL_SYSTEM - OzoneThreadState(FullCPU *_cpu, int _thread_num, FunctionalMemory *_mem) - : ThreadState(-1, _thread_num, _mem), - inSyscall(0), trapPending(0) - { - memset(®s, 0, sizeof(TheISA::RegFile)); - } -#else - OzoneThreadState(FullCPU *_cpu, int _thread_num, Process *_process, int _asid) - : ThreadState(-1, _thread_num, _process->getMemory(), _process, _asid), - cpu(_cpu), inSyscall(0), trapPending(0) - { - memset(®s, 0, sizeof(TheISA::RegFile)); - } - - OzoneThreadState(FullCPU *_cpu, int _thread_num, FunctionalMemory *_mem, - int _asid) - : ThreadState(-1, _thread_num, _mem, NULL, _asid), - cpu(_cpu), inSyscall(0), trapPending(0) - { - memset(®s, 0, sizeof(TheISA::RegFile)); - } -#endif - - Status _status; - - Status status() const { return _status; } - - void setStatus(Status new_status) { _status = new_status; } - - RenameTable renameTable; - Addr PC; - Addr nextPC; - - // Current instruction - TheISA::MachInst inst; - - TheISA::RegFile regs; - - typename Impl::FullCPU *cpu; - - bool inSyscall; - - bool trapPending; - - ExecContext *xcProxy; - - ExecContext *getXCProxy() { return xcProxy; } - -#if !FULL_SYSTEM - - Fault dummyTranslation(MemReqPtr &req) - { -#if 0 - assert((req->vaddr >> 48 & 0xffff) == 0); -#endif - - // put the asid in the upper 16 bits of the paddr - req->paddr = req->vaddr & ~((Addr)0xffff << sizeof(Addr) * 8 - 16); - req->paddr = req->paddr | (Addr)req->asid << sizeof(Addr) * 8 - 16; - return NoFault; - } - Fault translateInstReq(MemReqPtr &req) - { - return dummyTranslation(req); - } - Fault translateDataReadReq(MemReqPtr &req) - { - return dummyTranslation(req); - } - Fault translateDataWriteReq(MemReqPtr &req) - { - return dummyTranslation(req); - } -#else - Fault translateInstReq(MemReqPtr &req) - { - return cpu->itb->translate(req); - } - - Fault translateDataReadReq(MemReqPtr &req) - { - return cpu->dtb->translate(req, false); - } - - Fault translateDataWriteReq(MemReqPtr &req) - { - return cpu->dtb->translate(req, true); - } -#endif - - MiscReg readMiscReg(int misc_reg) - { - return regs.miscRegs.readReg(misc_reg); - } - - MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault) - { - return regs.miscRegs.readRegWithEffect(misc_reg, fault, xcProxy); - } - - Fault setMiscReg(int misc_reg, const MiscReg &val) - { - return regs.miscRegs.setReg(misc_reg, val); - } - - Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val) - { - return regs.miscRegs.setRegWithEffect(misc_reg, val, xcProxy); - } - - uint64_t readPC() - { return PC; } - - void setPC(uint64_t val) - { PC = val; } - - uint64_t readNextPC() - { return nextPC; } - - void setNextPC(uint64_t val) - { nextPC = val; } - - bool misspeculating() { return false; } - - void setInst(TheISA::MachInst _inst) { inst = _inst; } - - Counter readFuncExeInst() { return funcExeInst; } - - void setFuncExeInst(Counter new_val) { funcExeInst = new_val; } -}; - -#endif // __CPU_OZONE_THREAD_STATE_HH__ diff --git a/cpu/quiesce_event.cc b/cpu/quiesce_event.cc deleted file mode 100644 index 37814ae09..000000000 --- a/cpu/quiesce_event.cc +++ /dev/null @@ -1,20 +0,0 @@ - -#include "cpu/exec_context.hh" -#include "cpu/quiesce_event.hh" - -EndQuiesceEvent::EndQuiesceEvent(ExecContext *_xc) - : Event(&mainEventQueue), xc(_xc) -{ -} - -void -EndQuiesceEvent::process() -{ - xc->activate(); -} - -const char* -EndQuiesceEvent::description() -{ - return "End Quiesce Event."; -} diff --git a/cpu/quiesce_event.hh b/cpu/quiesce_event.hh deleted file mode 100644 index 18e88ecce..000000000 --- a/cpu/quiesce_event.hh +++ /dev/null @@ -1,23 +0,0 @@ -#ifndef __CPU_QUIESCE_EVENT_HH__ -#define __CPU_QUIESCE_EVENT_HH__ - -#include "sim/eventq.hh" - -class ExecContext; - -/** Event for timing out quiesce instruction */ -struct EndQuiesceEvent : public Event -{ - /** A pointer to the execution context that is quiesced */ - ExecContext *xc; - - EndQuiesceEvent(ExecContext *_xc); - - /** Event process to occur at interrupt*/ - virtual void process(); - - /** Event description */ - virtual const char *description(); -}; - -#endif // __CPU_QUIESCE_EVENT_HH__ diff --git a/cpu/thread_state.hh b/cpu/thread_state.hh deleted file mode 100644 index e09cb12fd..000000000 --- a/cpu/thread_state.hh +++ /dev/null @@ -1,129 +0,0 @@ -/* - * Copyright (c) 2006 The Regents of The University of Michigan - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __CPU_THREAD_STATE_HH__ -#define __CPU_THREAD_STATE_HH__ - -#include "cpu/exec_context.hh" - -#if FULL_SYSTEM -class EndQuiesceEvent; -class FunctionProfile; -class ProfileNode; -namespace Kernel { - class Statistics; -}; -#else -class FunctionalMemory; -class Process; -#endif - -/** - * Struct for holding general thread state that is needed across CPU - * models. This includes things such as pointers to the process, - * memory, quiesce events, and certain stats. This can be expanded - * to hold more thread-specific stats within it. - */ -struct ThreadState { -#if FULL_SYSTEM - ThreadState(int _cpuId, int _tid, FunctionalMemory *_mem) - : cpuId(_cpuId), tid(_tid), mem(_mem), lastActivate(0), lastSuspend(0), - profile(NULL), profileNode(NULL), profilePC(0), quiesceEvent(NULL) -#else - ThreadState(int _cpuId, int _tid, FunctionalMemory *_mem, - Process *_process, short _asid) - : cpuId(_cpuId), tid(_tid), mem(_mem), process(_process), asid(_asid) -#endif - { - funcExeInst = 0; - storeCondFailures = 0; - } - - ExecContext::Status status; - - int cpuId; - - // Index of hardware thread context on the CPU that this represents. - int tid; - - Counter numInst; - Stats::Scalar<> numInsts; - Stats::Scalar<> numMemRefs; - - // number of simulated loads - Counter numLoad; - Counter startNumLoad; - - FunctionalMemory *mem; // functional storage for process address space - -#if FULL_SYSTEM - Tick lastActivate; - Tick lastSuspend; - - FunctionProfile *profile; - ProfileNode *profileNode; - Addr profilePC; - - EndQuiesceEvent *quiesceEvent; - - Kernel::Statistics *kernelStats; -#else - Process *process; - - // Address space ID. Note that this is used for TIMING cache - // simulation only; all functional memory accesses should use - // one of the FunctionalMemory pointers above. - short asid; - -#endif - - /** - * Temporary storage to pass the source address from copy_load to - * copy_store. - * @todo Remove this temporary when we have a better way to do it. - */ - Addr copySrcAddr; - /** - * Temp storage for the physical source address of a copy. - * @todo Remove this temporary when we have a better way to do it. - */ - Addr copySrcPhysAddr; - - /* - * number of executed instructions, for matching with syscall trace - * points in EIO files. - */ - Counter funcExeInst; - - // - // Count failed store conditionals so we can warn of apparent - // application deadlock situations. - unsigned storeCondFailures; -}; - -#endif // __CPU_THREAD_STATE_HH__ diff --git a/python/m5/objects/FUPool.py b/python/m5/objects/FUPool.py deleted file mode 100644 index 5eecfd12f..000000000 --- a/python/m5/objects/FUPool.py +++ /dev/null @@ -1,8 +0,0 @@ -from m5 import * -from FullCPU import OpType -from FullCPU import OpDesc -from FullCPU import FUDesc - -class FUPool(SimObject): - type = 'FUPool' - FUList = VectorParam.FUDesc("list of FU's for this pool") diff --git a/python/m5/objects/OzoneCPU.py b/python/m5/objects/OzoneCPU.py deleted file mode 100644 index 3fca61e28..000000000 --- a/python/m5/objects/OzoneCPU.py +++ /dev/null @@ -1,89 +0,0 @@ -from m5 import * -from BaseCPU import BaseCPU - -class DerivOzoneCPU(BaseCPU): - type = 'DerivOzoneCPU' - - numThreads = Param.Unsigned("number of HW thread contexts") - - if not build_env['FULL_SYSTEM']: - mem = Param.FunctionalMemory(NULL, "memory") - - checker = Param.BaseCPU("Checker CPU") - - width = Param.Unsigned("Width") - frontEndWidth = Param.Unsigned("Front end width") - backEndWidth = Param.Unsigned("Back end width") - backEndSquashLatency = Param.Unsigned("Back end squash latency") - backEndLatency = Param.Unsigned("Back end latency") - maxInstBufferSize = Param.Unsigned("Maximum instruction buffer size") - maxOutstandingMemOps = Param.Unsigned("Maximum number of outstanding memory operations") - decodeToFetchDelay = Param.Unsigned("Decode to fetch delay") - renameToFetchDelay = Param.Unsigned("Rename to fetch delay") - iewToFetchDelay = Param.Unsigned("Issue/Execute/Writeback to fetch " - "delay") - commitToFetchDelay = Param.Unsigned("Commit to fetch delay") - fetchWidth = Param.Unsigned("Fetch width") - - renameToDecodeDelay = Param.Unsigned("Rename to decode delay") - iewToDecodeDelay = Param.Unsigned("Issue/Execute/Writeback to decode " - "delay") - commitToDecodeDelay = Param.Unsigned("Commit to decode delay") - fetchToDecodeDelay = Param.Unsigned("Fetch to decode delay") - decodeWidth = Param.Unsigned("Decode width") - - iewToRenameDelay = Param.Unsigned("Issue/Execute/Writeback to rename " - "delay") - commitToRenameDelay = Param.Unsigned("Commit to rename delay") - decodeToRenameDelay = Param.Unsigned("Decode to rename delay") - renameWidth = Param.Unsigned("Rename width") - - commitToIEWDelay = Param.Unsigned("Commit to " - "Issue/Execute/Writeback delay") - renameToIEWDelay = Param.Unsigned("Rename to " - "Issue/Execute/Writeback delay") - issueToExecuteDelay = Param.Unsigned("Issue to execute delay (internal " - "to the IEW stage)") - issueWidth = Param.Unsigned("Issue width") - executeWidth = Param.Unsigned("Execute width") - executeIntWidth = Param.Unsigned("Integer execute width") - executeFloatWidth = Param.Unsigned("Floating point execute width") - executeBranchWidth = Param.Unsigned("Branch execute width") - executeMemoryWidth = Param.Unsigned("Memory execute width") - - iewToCommitDelay = Param.Unsigned("Issue/Execute/Writeback to commit " - "delay") - renameToROBDelay = Param.Unsigned("Rename to reorder buffer delay") - commitWidth = Param.Unsigned("Commit width") - squashWidth = Param.Unsigned("Squash width") - - localPredictorSize = Param.Unsigned("Size of local predictor") - localCtrBits = Param.Unsigned("Bits per counter") - localHistoryTableSize = Param.Unsigned("Size of local history table") - localHistoryBits = Param.Unsigned("Bits for the local history") - globalPredictorSize = Param.Unsigned("Size of global predictor") - globalCtrBits = Param.Unsigned("Bits per counter") - globalHistoryBits = Param.Unsigned("Bits of history") - choicePredictorSize = Param.Unsigned("Size of choice predictor") - choiceCtrBits = Param.Unsigned("Bits of choice counters") - - BTBEntries = Param.Unsigned("Number of BTB entries") - BTBTagSize = Param.Unsigned("Size of the BTB tags, in bits") - - RASSize = Param.Unsigned("RAS size") - - LQEntries = Param.Unsigned("Number of load queue entries") - SQEntries = Param.Unsigned("Number of store queue entries") - LFSTSize = Param.Unsigned("Last fetched store table size") - SSITSize = Param.Unsigned("Store set ID table size") - - numPhysIntRegs = Param.Unsigned("Number of physical integer registers") - numPhysFloatRegs = Param.Unsigned("Number of physical floating point " - "registers") - numIQEntries = Param.Unsigned("Number of instruction queue entries") - numROBEntries = Param.Unsigned("Number of reorder buffer entries") - - instShiftAmt = Param.Unsigned("Number of bits to shift instructions by") - - function_trace = Param.Bool(False, "Enable function trace") - function_trace_start = Param.Tick(0, "Cycle to start function trace") diff --git a/python/m5/objects/SimpleOzoneCPU.py b/python/m5/objects/SimpleOzoneCPU.py deleted file mode 100644 index 0d6403383..000000000 --- a/python/m5/objects/SimpleOzoneCPU.py +++ /dev/null @@ -1,86 +0,0 @@ -from m5 import * -from BaseCPU import BaseCPU - -class SimpleOzoneCPU(BaseCPU): - type = 'SimpleOzoneCPU' - - numThreads = Param.Unsigned("number of HW thread contexts") - - if not build_env['FULL_SYSTEM']: - mem = Param.FunctionalMemory(NULL, "memory") - - width = Param.Unsigned("Width") - frontEndWidth = Param.Unsigned("Front end width") - backEndWidth = Param.Unsigned("Back end width") - backEndSquashLatency = Param.Unsigned("Back end squash latency") - backEndLatency = Param.Unsigned("Back end latency") - maxInstBufferSize = Param.Unsigned("Maximum instruction buffer size") - decodeToFetchDelay = Param.Unsigned("Decode to fetch delay") - renameToFetchDelay = Param.Unsigned("Rename to fetch delay") - iewToFetchDelay = Param.Unsigned("Issue/Execute/Writeback to fetch " - "delay") - commitToFetchDelay = Param.Unsigned("Commit to fetch delay") - fetchWidth = Param.Unsigned("Fetch width") - - renameToDecodeDelay = Param.Unsigned("Rename to decode delay") - iewToDecodeDelay = Param.Unsigned("Issue/Execute/Writeback to decode " - "delay") - commitToDecodeDelay = Param.Unsigned("Commit to decode delay") - fetchToDecodeDelay = Param.Unsigned("Fetch to decode delay") - decodeWidth = Param.Unsigned("Decode width") - - iewToRenameDelay = Param.Unsigned("Issue/Execute/Writeback to rename " - "delay") - commitToRenameDelay = Param.Unsigned("Commit to rename delay") - decodeToRenameDelay = Param.Unsigned("Decode to rename delay") - renameWidth = Param.Unsigned("Rename width") - - commitToIEWDelay = Param.Unsigned("Commit to " - "Issue/Execute/Writeback delay") - renameToIEWDelay = Param.Unsigned("Rename to " - "Issue/Execute/Writeback delay") - issueToExecuteDelay = Param.Unsigned("Issue to execute delay (internal " - "to the IEW stage)") - issueWidth = Param.Unsigned("Issue width") - executeWidth = Param.Unsigned("Execute width") - executeIntWidth = Param.Unsigned("Integer execute width") - executeFloatWidth = Param.Unsigned("Floating point execute width") - executeBranchWidth = Param.Unsigned("Branch execute width") - executeMemoryWidth = Param.Unsigned("Memory execute width") - - iewToCommitDelay = Param.Unsigned("Issue/Execute/Writeback to commit " - "delay") - renameToROBDelay = Param.Unsigned("Rename to reorder buffer delay") - commitWidth = Param.Unsigned("Commit width") - squashWidth = Param.Unsigned("Squash width") - - localPredictorSize = Param.Unsigned("Size of local predictor") - localCtrBits = Param.Unsigned("Bits per counter") - localHistoryTableSize = Param.Unsigned("Size of local history table") - localHistoryBits = Param.Unsigned("Bits for the local history") - globalPredictorSize = Param.Unsigned("Size of global predictor") - globalCtrBits = Param.Unsigned("Bits per counter") - globalHistoryBits = Param.Unsigned("Bits of history") - choicePredictorSize = Param.Unsigned("Size of choice predictor") - choiceCtrBits = Param.Unsigned("Bits of choice counters") - - BTBEntries = Param.Unsigned("Number of BTB entries") - BTBTagSize = Param.Unsigned("Size of the BTB tags, in bits") - - RASSize = Param.Unsigned("RAS size") - - LQEntries = Param.Unsigned("Number of load queue entries") - SQEntries = Param.Unsigned("Number of store queue entries") - LFSTSize = Param.Unsigned("Last fetched store table size") - SSITSize = Param.Unsigned("Store set ID table size") - - numPhysIntRegs = Param.Unsigned("Number of physical integer registers") - numPhysFloatRegs = Param.Unsigned("Number of physical floating point " - "registers") - numIQEntries = Param.Unsigned("Number of instruction queue entries") - numROBEntries = Param.Unsigned("Number of reorder buffer entries") - - instShiftAmt = Param.Unsigned("Number of bits to shift instructions by") - - function_trace = Param.Bool(False, "Enable function trace") - function_trace_start = Param.Tick(0, "Cycle to start function trace") diff --git a/src/SConscript b/src/SConscript index 184c7ccc1..cd0908246 100644 --- a/src/SConscript +++ b/src/SConscript @@ -89,7 +89,9 @@ base_sources = Split(''' cpu/quiesce_event.cc cpu/static_inst.cc cpu/sampler/sampler.cc - + + encumbered/cpu/full/fu_pool.cc + mem/bridge.cc mem/bus.cc mem/connector.cc diff --git a/src/base/traceflags.py b/src/base/traceflags.py index 9797e4cb7..019835a86 100644 --- a/src/base/traceflags.py +++ b/src/base/traceflags.py @@ -53,6 +53,7 @@ baseFlags = [ 'BusBridge', 'Cache', 'Chains', + 'Checker', 'Clock', 'Commit', 'CommitRate', @@ -116,6 +117,44 @@ baseFlags = [ 'IBE', 'BE', 'OzoneLSQ', + 'PCEvent', + 'PCIA', + 'PCIDEV', + 'PciConfigAll', + 'Pipeline', + 'Printf', + 'ROB', + 'Regs', + 'Rename', + 'RenameMap', + 'SQL', + 'Sampler', + 'ScsiCtrl', + 'ScsiDisk', + 'ScsiNone', + 'Serialize', + 'SimpleCPU', + 'SimpleDisk', + 'SimpleDiskData', + 'Sparc', + 'Split', + 'Stack', + 'StatEvents', + 'Stats', + 'StoreSet', + 'Syscall', + 'SyscallVerbose', + 'TCPIP', + 'TLB', + 'Thread', + 'Timer', + 'Tsunami', + 'Uart', + 'VtoPhys', + 'WriteBarrier', + 'Activity', + 'Scoreboard', + 'Writeback', ] # diff --git a/src/cpu/SConscript b/src/cpu/SConscript index a4cbe2aa6..4d5a79ddf 100644 --- a/src/cpu/SConscript +++ b/src/cpu/SConscript @@ -116,6 +116,7 @@ if 'FastCPU' in env['CPU_MODELS']: if 'AlphaFullCPU' in env['CPU_MODELS']: sources += Split(''' + base_dyn_inst.cc o3/2bit_local_pred.cc o3/alpha_dyn_inst.cc o3/alpha_cpu.cc @@ -155,7 +156,6 @@ if 'OzoneSimpleCPU' in env['CPU_MODELS']: if 'OzoneCPU' in env['CPU_MODELS']: sources += Split(''' - ozone/back_end.cc ozone/lsq_unit.cc ozone/lw_back_end.cc ozone/lw_lsq.cc @@ -164,7 +164,6 @@ if 'OzoneCPU' in env['CPU_MODELS']: if 'CheckerCPU' in env['CPU_MODELS']: sources += Split(''' checker/cpu.cc - checker/cpu_builder.cc checker/o3_cpu_builder.cc ''') diff --git a/src/cpu/activity.cc b/src/cpu/activity.cc new file mode 100644 index 000000000..6dcb6e341 --- /dev/null +++ b/src/cpu/activity.cc @@ -0,0 +1,122 @@ + +#include "base/timebuf.hh" +#include "cpu/activity.hh" + +ActivityRecorder::ActivityRecorder(int num_stages, int longest_latency, + int activity) + : activityBuffer(longest_latency, 0), longestLatency(longest_latency), + activityCount(activity), numStages(num_stages) +{ + stageActive = new bool[numStages]; + memset(stageActive, 0, numStages); +} + +void +ActivityRecorder::activity() +{ + if (activityBuffer[0]) { + return; + } + + activityBuffer[0] = true; + + ++activityCount; + + DPRINTF(Activity, "Activity: %i\n", activityCount); +} + +void +ActivityRecorder::advance() +{ + if (activityBuffer[-longestLatency]) { + --activityCount; + + assert(activityCount >= 0); + + DPRINTF(Activity, "Activity: %i\n", activityCount); + + if (activityCount == 0) { + DPRINTF(Activity, "No activity left!\n"); + } + } + + activityBuffer.advance(); +} + +void +ActivityRecorder::activateStage(const int idx) +{ + if (!stageActive[idx]) { + ++activityCount; + + stageActive[idx] = true; + + DPRINTF(Activity, "Activity: %i\n", activityCount); + } else { + DPRINTF(Activity, "Stage %i already active.\n", idx); + } + +// assert(activityCount < longestLatency + numStages + 1); +} + +void +ActivityRecorder::deactivateStage(const int idx) +{ + if (stageActive[idx]) { + --activityCount; + + stageActive[idx] = false; + + DPRINTF(Activity, "Activity: %i\n", activityCount); + } else { + DPRINTF(Activity, "Stage %i already inactive.\n", idx); + } + + assert(activityCount >= 0); +} + +void +ActivityRecorder::reset() +{ + activityCount = 0; + memset(stageActive, 0, numStages); + for (int i = 0; i < longestLatency + 1; ++i) + activityBuffer.advance(); +} + +void +ActivityRecorder::dump() +{ + for (int i = 0; i <= longestLatency; ++i) { + cprintf("[Idx:%i %i] ", i, activityBuffer[-i]); + } + + cprintf("\n"); + + for (int i = 0; i < numStages; ++i) { + cprintf("[Stage:%i %i]\n", i, stageActive[i]); + } + + cprintf("\n"); + + cprintf("Activity count: %i\n", activityCount); +} + +void +ActivityRecorder::validate() +{ + int count = 0; + for (int i = 0; i <= longestLatency; ++i) { + if (activityBuffer[-i]) { + count++; + } + } + + for (int i = 0; i < numStages; ++i) { + if (stageActive[i]) { + count++; + } + } + + assert(count == activityCount); +} diff --git a/src/cpu/activity.hh b/src/cpu/activity.hh new file mode 100644 index 000000000..2d53dc4bb --- /dev/null +++ b/src/cpu/activity.hh @@ -0,0 +1,67 @@ + +#ifndef __CPU_ACTIVITY_HH__ +#define __CPU_ACTIVITY_HH__ + +#include "base/timebuf.hh" +#include "base/trace.hh" + +class ActivityRecorder { + public: + ActivityRecorder(int num_stages, int longest_latency, int count); + + /** Records that there is activity this cycle. */ + void activity(); + /** Advances the activity buffer, decrementing the activityCount if active + * communication just left the time buffer, and descheduling the CPU if + * there is no activity. + */ + void advance(); + /** Marks a stage as active. */ + void activateStage(const int idx); + /** Deactivates a stage. */ + void deactivateStage(const int idx); + + int getActivityCount() { return activityCount; } + + void setActivityCount(int count) + { activityCount = count; } + + bool active() { return activityCount; } + + void reset(); + + void dump(); + + void validate(); + + private: + /** Time buffer that tracks if any cycles has active communication + * in them. It should be as long as the longest communication + * latency in the system. Each time any time buffer is written, + * the activity buffer should also be written to. The + * activityBuffer is advanced along with all the other time + * buffers, so it should have a 1 somewhere in it only if there + * is active communication in a time buffer. + */ + TimeBuffer activityBuffer; + + int longestLatency; + + /** Tracks how many stages and cycles of time buffer have + * activity. Stages increment this count when they switch to + * active, and decrement it when they switch to + * inactive. Whenever a cycle that previously had no information + * is written in the time buffer, this is incremented. When a + * cycle that had information exits the time buffer due to age, + * this count is decremented. When the count is 0, there is no + * activity in the CPU, and it can be descheduled. + */ + int activityCount; + + int numStages; + + /** Records which stages are active/inactive. */ + bool *stageActive; +}; + +#endif // __CPU_ACTIVITY_HH__ diff --git a/src/cpu/base.cc b/src/cpu/base.cc index 8641d987d..6f81ed73e 100644 --- a/src/cpu/base.cc +++ b/src/cpu/base.cc @@ -229,7 +229,6 @@ BaseCPU::registerExecContexts() #else xc->setCpuId(xc->getProcessPtr()->registerExecContext(xc)); #endif - } } } diff --git a/src/cpu/base_dyn_inst.cc b/src/cpu/base_dyn_inst.cc index 7ab760ae3..1b743e044 100644 --- a/src/cpu/base_dyn_inst.cc +++ b/src/cpu/base_dyn_inst.cc @@ -36,13 +36,13 @@ #include "arch/faults.hh" #include "cpu/exetrace.hh" -#include "mem/mem_req.hh" +#include "mem/request.hh" #include "cpu/base_dyn_inst.hh" #include "cpu/o3/alpha_impl.hh" #include "cpu/o3/alpha_cpu.hh" -#include "cpu/ozone/simple_impl.hh" -#include "cpu/ozone/ozone_impl.hh" +//#include "cpu/ozone/simple_impl.hh" +//#include "cpu/ozone/ozone_impl.hh" using namespace std; using namespace TheISA; @@ -94,8 +94,8 @@ void BaseDynInst::initVars() { req = NULL; - effAddr = MemReq::inval_addr; - physEffAddr = MemReq::inval_addr; + effAddr = 0; + physEffAddr = 0; storeSize = 0; readyRegs = 0; @@ -198,7 +198,7 @@ BaseDynInst::prefetch(Addr addr, unsigned flags) // This is the "functional" implementation of prefetch. Not much // happens here since prefetches don't affect the architectural // state. - +/* // Generate a MemReq so we can translate the effective address. MemReqPtr req = new MemReq(addr, thread->getXCProxy(), 1, flags); req->asid = asid; @@ -226,6 +226,7 @@ BaseDynInst::prefetch(Addr addr, unsigned flags) if (traceData) { traceData->setAddr(addr); } +*/ } template @@ -236,6 +237,7 @@ BaseDynInst::writeHint(Addr addr, int size, unsigned flags) // will casue a TLB miss trap if necessary... not sure whether // that's the best thing to do or not. We don't really need the // MemReq otherwise, since wh64 has no functional effect. +/* MemReqPtr req = new MemReq(addr, thread->getXCProxy(), size, flags); req->asid = asid; @@ -255,6 +257,7 @@ BaseDynInst::writeHint(Addr addr, int size, unsigned flags) storeSize = size; storeData = 0; +*/ } /** @@ -264,6 +267,7 @@ template Fault BaseDynInst::copySrcTranslate(Addr src) { +/* MemReqPtr req = new MemReq(src, thread->getXCProxy(), 64); req->asid = asid; @@ -278,6 +282,8 @@ BaseDynInst::copySrcTranslate(Addr src) thread->copySrcPhysAddr = 0; } return fault; +*/ + return NoFault; } /** @@ -287,6 +293,7 @@ template Fault BaseDynInst::copy(Addr dest) { +/* uint8_t data[64]; FunctionalMemory *mem = thread->mem; assert(thread->copySrcPhysAddr || thread->misspeculating()); @@ -305,6 +312,8 @@ BaseDynInst::copy(Addr dest) mem->write(req, data); } return fault; +*/ + return NoFault; } template @@ -432,7 +441,7 @@ template class BaseDynInst; template <> int BaseDynInst::instcount = 0; - +/* // Forward declaration template class BaseDynInst; @@ -446,3 +455,4 @@ template class BaseDynInst; template <> int BaseDynInst::instcount = 0; +*/ diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh index 388ea4a8d..9ada7c4be 100644 --- a/src/cpu/base_dyn_inst.hh +++ b/src/cpu/base_dyn_inst.hh @@ -32,14 +32,15 @@ #include #include +#include "arch/faults.hh" #include "base/fast_alloc.hh" #include "base/trace.hh" #include "config/full_system.hh" #include "cpu/exetrace.hh" #include "cpu/inst_seq.hh" +#include "cpu/op_class.hh" #include "cpu/static_inst.hh" -#include "encumbered/cpu/full/op_class.hh" -#include "mem/functional/memory_control.hh" +#include "mem/packet.hh" #include "sim/system.hh" /* #include "encumbered/cpu/full/bpred_update.hh" @@ -197,7 +198,11 @@ class BaseDynInst : public FastAlloc, public RefCounted Fault fault; /** The memory request. */ - MemReqPtr req; +// MemReqPtr req; + Request *req; +// Packet pkt; + + uint8_t *memData; /** The effective virtual address (lds & stores only). */ Addr effAddr; @@ -287,12 +292,12 @@ class BaseDynInst : public FastAlloc, public RefCounted * @param p Memory accessed. * @param nbytes Access size. */ - void - trace_mem(Fault fault, - MemCmd cmd, - Addr addr, - void *p, - int nbytes); +// void +// trace_mem(Fault fault, +// MemCmd cmd, +// Addr addr, +// void *p, +// int nbytes); /** Dumps out contents of this BaseDynInst. */ void dump(); @@ -601,7 +606,7 @@ class BaseDynInst : public FastAlloc, public RefCounted void setEA(Addr &ea) { instEffAddr = ea; eaCalcDone = true; } /** Returns the effective address. */ - const Addr &getEA() const { return req->vaddr; } + const Addr &getEA() const { return instEffAddr; } /** Returns whether or not the eff. addr. calculation has been completed. */ bool doneEACalc() { return eaCalcDone; } @@ -637,25 +642,25 @@ inline Fault BaseDynInst::read(Addr addr, T &data, unsigned flags) { if (executed) { + panic("Not supposed to re-execute with split mem ops!"); fault = cpu->read(req, data, lqIdx); return fault; } - req = new MemReq(addr, thread->getXCProxy(), sizeof(T), flags); - req->asid = asid; - req->thread_num = threadNumber; - req->pc = this->PC; + req = new Request(); + req->setVirt(asid, addr, sizeof(T), flags, this->PC); + req->setThreadContext(thread->cpuId, threadNumber); - if ((req->vaddr & (TheISA::VMPageSize - 1)) + req->size > + if ((req->getVaddr() & (TheISA::VMPageSize - 1)) + req->getSize() > TheISA::VMPageSize) { return TheISA::genAlignmentFault(); } fault = cpu->translateDataReadReq(req); - effAddr = req->vaddr; - physEffAddr = req->paddr; - memReqFlags = req->flags; + effAddr = req->getVaddr(); + physEffAddr = req->getPaddr(); + memReqFlags = req->getFlags(); if (fault == NoFault) { #if FULL_SYSTEM @@ -697,22 +702,20 @@ BaseDynInst::write(T data, Addr addr, unsigned flags, uint64_t *res) traceData->setData(data); } - req = new MemReq(addr, thread->getXCProxy(), sizeof(T), flags); - - req->asid = asid; - req->thread_num = threadNumber; - req->pc = this->PC; + req = new Request(); + req->setVirt(asid, addr, sizeof(T), flags, this->PC); + req->setThreadContext(thread->cpuId, threadNumber); - if ((req->vaddr & (TheISA::VMPageSize - 1)) + req->size > + if ((req->getVaddr() & (TheISA::VMPageSize - 1)) + req->getSize() > TheISA::VMPageSize) { return TheISA::genAlignmentFault(); } fault = cpu->translateDataWriteReq(req); - effAddr = req->vaddr; - physEffAddr = req->paddr; - memReqFlags = req->flags; + effAddr = req->getVaddr(); + physEffAddr = req->getPaddr(); + memReqFlags = req->getFlags(); if (fault == NoFault) { #if FULL_SYSTEM @@ -729,7 +732,7 @@ BaseDynInst::write(T data, Addr addr, unsigned flags, uint64_t *res) if (res) { // always return some result to keep misspeculated paths // (which will ignore faults) deterministic - *res = (fault == NoFault) ? req->result : 0; + *res = (fault == NoFault) ? req->getScResult() : 0; } return fault; diff --git a/src/cpu/checker/cpu.cc b/src/cpu/checker/cpu.cc new file mode 100644 index 000000000..bb9ec0445 --- /dev/null +++ b/src/cpu/checker/cpu.cc @@ -0,0 +1,757 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +#include "base/refcnt.hh" +#include "cpu/base.hh" +#include "cpu/base_dyn_inst.hh" +#include "cpu/checker/cpu.hh" +#include "cpu/cpu_exec_context.hh" +#include "cpu/exec_context.hh" +#include "cpu/static_inst.hh" +#include "sim/byteswap.hh" +#include "sim/sim_object.hh" +#include "sim/stats.hh" + +#include "cpu/o3/alpha_dyn_inst.hh" +#include "cpu/o3/alpha_impl.hh" + +//#include "cpu/ozone/dyn_inst.hh" +//#include "cpu/ozone/ozone_impl.hh" +//#include "cpu/ozone/simple_impl.hh" + +#if FULL_SYSTEM +#include "sim/system.hh" +#include "arch/vtophys.hh" +#endif // FULL_SYSTEM + +using namespace std; +//The CheckerCPU does alpha only +using namespace AlphaISA; + +void +CheckerCPU::init() +{ +} + +CheckerCPU::CheckerCPU(Params *p) + : BaseCPU(p), cpuXC(NULL), xcProxy(NULL) +{ + memReq = new Request(); +// memReq->data = new uint8_t[64]; + + numInst = 0; + startNumInst = 0; + numLoad = 0; + startNumLoad = 0; + youngestSN = 0; + + changedPC = willChangePC = changedNextPC = false; + + exitOnError = p->exitOnError; +#if FULL_SYSTEM + itb = p->itb; + dtb = p->dtb; + systemPtr = NULL; + memPtr = NULL; +#endif +} + +CheckerCPU::~CheckerCPU() +{ +} + +void +CheckerCPU::setMemory(MemObject *mem) +{ + memPtr = mem; +#if !FULL_SYSTEM + cpuXC = new CPUExecContext(this, /* thread_num */ 0, NULL, + /* asid */ 0, mem); + + cpuXC->setStatus(ExecContext::Suspended); + xcProxy = cpuXC->getProxy(); + execContexts.push_back(xcProxy); +#else + if (systemPtr) { + cpuXC = new CPUExecContext(this, 0, systemPtr, itb, dtb, memPtr, false); + + cpuXC->setStatus(ExecContext::Suspended); + xcProxy = cpuXC->getProxy(); + execContexts.push_back(xcProxy); + memReq->xc = xcProxy; + delete cpuXC->kernelStats; + cpuXC->kernelStats = NULL; + } +#endif +} + +#if FULL_SYSTEM +void +CheckerCPU::setSystem(System *system) +{ + systemPtr = system; + + if (memPtr) { + cpuXC = new CPUExecContext(this, 0, systemPtr, itb, dtb, memPtr, false); + + cpuXC->setStatus(ExecContext::Suspended); + xcProxy = cpuXC->getProxy(); + execContexts.push_back(xcProxy); + memReq->xc = xcProxy; + delete cpuXC->kernelStats; + cpuXC->kernelStats = NULL; + } +} +#endif + +void +CheckerCPU::serialize(ostream &os) +{ +/* + BaseCPU::serialize(os); + SERIALIZE_SCALAR(inst); + nameOut(os, csprintf("%s.xc", name())); + cpuXC->serialize(os); + cacheCompletionEvent.serialize(os); +*/ +} + +void +CheckerCPU::unserialize(Checkpoint *cp, const string §ion) +{ +/* + BaseCPU::unserialize(cp, section); + UNSERIALIZE_SCALAR(inst); + cpuXC->unserialize(cp, csprintf("%s.xc", section)); +*/ +} + +Fault +CheckerCPU::copySrcTranslate(Addr src) +{ + panic("Unimplemented!"); +} + +Fault +CheckerCPU::copy(Addr dest) +{ + panic("Unimplemented!"); +} + +template +Fault +CheckerCPU::read(Addr addr, T &data, unsigned flags) +{ +/* + memReq->reset(addr, sizeof(T), flags); + + // translate to physical address + translateDataReadReq(memReq); + + memReq->cmd = Read; + memReq->completionEvent = NULL; + memReq->time = curTick; + memReq->flags &= ~INST_READ; + + if (!(memReq->flags & UNCACHEABLE)) { + // Access memory to see if we have the same data + cpuXC->read(memReq, data); + } else { + // Assume the data is correct if it's an uncached access + memcpy(&data, &unverifiedResult.integer, sizeof(T)); + } +*/ + return NoFault; +} + +#ifndef DOXYGEN_SHOULD_SKIP_THIS + +template +Fault +CheckerCPU::read(Addr addr, uint64_t &data, unsigned flags); + +template +Fault +CheckerCPU::read(Addr addr, uint32_t &data, unsigned flags); + +template +Fault +CheckerCPU::read(Addr addr, uint16_t &data, unsigned flags); + +template +Fault +CheckerCPU::read(Addr addr, uint8_t &data, unsigned flags); + +#endif //DOXYGEN_SHOULD_SKIP_THIS + +template<> +Fault +CheckerCPU::read(Addr addr, double &data, unsigned flags) +{ + return read(addr, *(uint64_t*)&data, flags); +} + +template<> +Fault +CheckerCPU::read(Addr addr, float &data, unsigned flags) +{ + return read(addr, *(uint32_t*)&data, flags); +} + +template<> +Fault +CheckerCPU::read(Addr addr, int32_t &data, unsigned flags) +{ + return read(addr, (uint32_t&)data, flags); +} + +template +Fault +CheckerCPU::write(T data, Addr addr, unsigned flags, uint64_t *res) +{ +/* + memReq->reset(addr, sizeof(T), flags); + + // translate to physical address + cpuXC->translateDataWriteReq(memReq); + + // Can compare the write data and result only if it's cacheable, + // not a store conditional, or is a store conditional that + // succeeded. + // @todo: Verify that actual memory matches up with these values. + // Right now it only verifies that the instruction data is the + // same as what was in the request that got sent to memory; there + // is no verification that it is the same as what is in memory. + // This is because the LSQ would have to be snooped in the CPU to + // verify this data. + if (unverifiedReq && + !(unverifiedReq->flags & UNCACHEABLE) && + (!(unverifiedReq->flags & LOCKED) || + ((unverifiedReq->flags & LOCKED) && + unverifiedReq->result == 1))) { +#if 0 + memReq->cmd = Read; + memReq->completionEvent = NULL; + memReq->time = curTick; + memReq->flags &= ~INST_READ; + cpuXC->read(memReq, inst_data); +#endif + T inst_data; + memcpy(&inst_data, unverifiedReq->data, sizeof(T)); + + if (data != inst_data) { + warn("%lli: Store value does not match value in memory! " + "Instruction: %#x, memory: %#x", + curTick, inst_data, data); + handleError(); + } + } + + // Assume the result was the same as the one passed in. This checker + // doesn't check if the SC should succeed or fail, it just checks the + // value. + if (res) + *res = unverifiedReq->result; + */ + return NoFault; +} + + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +template +Fault +CheckerCPU::write(uint64_t data, Addr addr, unsigned flags, uint64_t *res); + +template +Fault +CheckerCPU::write(uint32_t data, Addr addr, unsigned flags, uint64_t *res); + +template +Fault +CheckerCPU::write(uint16_t data, Addr addr, unsigned flags, uint64_t *res); + +template +Fault +CheckerCPU::write(uint8_t data, Addr addr, unsigned flags, uint64_t *res); + +#endif //DOXYGEN_SHOULD_SKIP_THIS + +template<> +Fault +CheckerCPU::write(double data, Addr addr, unsigned flags, uint64_t *res) +{ + return write(*(uint64_t*)&data, addr, flags, res); +} + +template<> +Fault +CheckerCPU::write(float data, Addr addr, unsigned flags, uint64_t *res) +{ + return write(*(uint32_t*)&data, addr, flags, res); +} + +template<> +Fault +CheckerCPU::write(int32_t data, Addr addr, unsigned flags, uint64_t *res) +{ + return write((uint32_t)data, addr, flags, res); +} + + +#if FULL_SYSTEM +Addr +CheckerCPU::dbg_vtophys(Addr addr) +{ + return vtophys(xcProxy, addr); +} +#endif // FULL_SYSTEM + +bool +CheckerCPU::translateInstReq(Request *req) +{ +#if FULL_SYSTEM + return (cpuXC->translateInstReq(req) == NoFault); +#else + cpuXC->translateInstReq(req); + return true; +#endif +} + +void +CheckerCPU::translateDataReadReq(Request *req) +{ + cpuXC->translateDataReadReq(req); + + if (req->getVaddr() != unverifiedReq->getVaddr()) { + warn("%lli: Request virtual addresses do not match! Inst: %#x, " + "checker: %#x", + curTick, unverifiedReq->getVaddr(), req->getVaddr()); + handleError(); + } + req->setPaddr(unverifiedReq->getPaddr()); + + if (checkFlags(req)) { + warn("%lli: Request flags do not match! Inst: %#x, checker: %#x", + curTick, unverifiedReq->getFlags(), req->getFlags()); + handleError(); + } +} + +void +CheckerCPU::translateDataWriteReq(Request *req) +{ + cpuXC->translateDataWriteReq(req); + + if (req->getVaddr() != unverifiedReq->getVaddr()) { + warn("%lli: Request virtual addresses do not match! Inst: %#x, " + "checker: %#x", + curTick, unverifiedReq->getVaddr(), req->getVaddr()); + handleError(); + } + req->setPaddr(unverifiedReq->getPaddr()); + + if (checkFlags(req)) { + warn("%lli: Request flags do not match! Inst: %#x, checker: %#x", + curTick, unverifiedReq->getFlags(), req->getFlags()); + handleError(); + } +} + +bool +CheckerCPU::checkFlags(Request *req) +{ + // Remove any dynamic flags that don't have to do with the request itself. + unsigned flags = unverifiedReq->getFlags(); + unsigned mask = LOCKED | PHYSICAL | VPTE | ALTMODE | UNCACHEABLE | NO_FAULT; + flags = flags & (mask); + if (flags == req->getFlags()) { + return false; + } else { + return true; + } +} + +template +void +Checker::tick(DynInstPtr &completed_inst) +{ + DynInstPtr inst; + + // Either check this instruction, or add it to a list of + // instructions waiting to be checked. Instructions must be + // checked in program order, so if a store has committed yet not + // completed, there may be some instructions that are waiting + // behind it that have completed and must be checked. + if (!instList.empty()) { + if (youngestSN < completed_inst->seqNum) { + DPRINTF(Checker, "Adding instruction [sn:%lli] PC:%#x to list.\n", + completed_inst->seqNum, completed_inst->readPC()); + instList.push_back(completed_inst); + youngestSN = completed_inst->seqNum; + } + + if (!instList.front()->isCompleted()) { + return; + } else { + inst = instList.front(); + instList.pop_front(); + } + } else { + if (!completed_inst->isCompleted()) { + if (youngestSN < completed_inst->seqNum) { + DPRINTF(Checker, "Adding instruction [sn:%lli] PC:%#x to list.\n", + completed_inst->seqNum, completed_inst->readPC()); + instList.push_back(completed_inst); + youngestSN = completed_inst->seqNum; + } + return; + } else { + if (youngestSN < completed_inst->seqNum) { + inst = completed_inst; + youngestSN = completed_inst->seqNum; + } else { + return; + } + } + } + + // Try to check all instructions that are completed, ending if we + // run out of instructions to check or if an instruction is not + // yet completed. + while (1) { + DPRINTF(Checker, "Processing instruction [sn:%lli] PC:%#x.\n", + inst->seqNum, inst->readPC()); + unverifiedResult.integer = inst->readIntResult(); + unverifiedReq = inst->req; + numCycles++; + + Fault fault = NoFault; + + // maintain $r0 semantics + cpuXC->setIntReg(ZeroReg, 0); +#ifdef TARGET_ALPHA + cpuXC->setFloatRegDouble(ZeroReg, 0.0); +#endif // TARGET_ALPHA + + // Check if any recent PC changes match up with anything we + // expect to happen. This is mostly to check if traps or + // PC-based events have occurred in both the checker and CPU. + if (changedPC) { + DPRINTF(Checker, "Changed PC recently to %#x\n", + cpuXC->readPC()); + if (willChangePC) { + if (newPC == cpuXC->readPC()) { + DPRINTF(Checker, "Changed PC matches expected PC\n"); + } else { + warn("%lli: Changed PC does not match expected PC, " + "changed: %#x, expected: %#x", + curTick, cpuXC->readPC(), newPC); + handleError(); + } + willChangePC = false; + } + changedPC = false; + } + if (changedNextPC) { + DPRINTF(Checker, "Changed NextPC recently to %#x\n", + cpuXC->readNextPC()); + changedNextPC = false; + } + + // Try to fetch the instruction + +#if FULL_SYSTEM +#define IFETCH_FLAGS(pc) ((pc) & 1) ? PHYSICAL : 0 +#else +#define IFETCH_FLAGS(pc) 0 +#endif + + // set up memory request for instruction fetch +// memReq->cmd = Read; +// memReq->reset(cpuXC->readPC() & ~3, sizeof(uint32_t), +// IFETCH_FLAGS(cpuXC->readPC())); + + bool succeeded = translateInstReq(memReq); + + if (!succeeded) { + if (inst->getFault() == NoFault) { + // In this case the instruction was not a dummy + // instruction carrying an ITB fault. In the single + // threaded case the ITB should still be able to + // translate this instruction; in the SMT case it's + // possible that its ITB entry was kicked out. + warn("%lli: Instruction PC %#x was not found in the ITB!", + curTick, cpuXC->readPC()); + handleError(); + + // go to the next instruction + cpuXC->setPC(cpuXC->readNextPC()); + cpuXC->setNextPC(cpuXC->readNextPC() + sizeof(MachInst)); + + return; + } else { + // The instruction is carrying an ITB fault. Handle + // the fault and see if our results match the CPU on + // the next tick(). + fault = inst->getFault(); + } + } + + if (fault == NoFault) { +// cpuXC->read(memReq, machInst); + + // keep an instruction count + numInst++; + + // decode the instruction + machInst = gtoh(machInst); + // Checks that the instruction matches what we expected it to be. + // Checks both the machine instruction and the PC. + validateInst(inst); + + curStaticInst = StaticInst::decode(makeExtMI(machInst, + cpuXC->readPC())); + +#if FULL_SYSTEM + cpuXC->setInst(machInst); +#endif // FULL_SYSTEM + + fault = inst->getFault(); + } + + // Either the instruction was a fault and we should process the fault, + // or we should just go ahead execute the instruction. This assumes + // that the instruction is properly marked as a fault. + if (fault == NoFault) { + + cpuXC->func_exe_inst++; + + fault = curStaticInst->execute(this, NULL); + + // Checks to make sure instrution results are correct. + validateExecution(inst); + + if (curStaticInst->isLoad()) { + ++numLoad; + } + } + + if (fault != NoFault) { +#if FULL_SYSTEM + fault->invoke(xcProxy); + willChangePC = true; + newPC = cpuXC->readPC(); + DPRINTF(Checker, "Fault, PC is now %#x\n", newPC); +#else // !FULL_SYSTEM + fatal("fault (%d) detected @ PC 0x%08p", fault, cpuXC->readPC()); +#endif // FULL_SYSTEM + } else { +#if THE_ISA != MIPS_ISA + // go to the next instruction + cpuXC->setPC(cpuXC->readNextPC()); + cpuXC->setNextPC(cpuXC->readNextPC() + sizeof(MachInst)); +#else + // go to the next instruction + cpuXC->setPC(cpuXC->readNextPC()); + cpuXC->setNextPC(cpuXC->readNextNPC()); + cpuXC->setNextNPC(cpuXC->readNextNPC() + sizeof(MachInst)); +#endif + + } + +#if FULL_SYSTEM + // @todo: Determine if these should happen only if the + // instruction hasn't faulted. In the SimpleCPU case this may + // not be true, but in the O3 or Ozone case this may be true. + Addr oldpc; + int count = 0; + do { + oldpc = cpuXC->readPC(); + system->pcEventQueue.service(xcProxy); + count++; + } while (oldpc != cpuXC->readPC()); + if (count > 1) { + willChangePC = true; + newPC = cpuXC->readPC(); + DPRINTF(Checker, "PC Event, PC is now %#x\n", newPC); + } +#endif + + // @todo: Optionally can check all registers. (Or just those + // that have been modified). + validateState(); + + // Continue verifying instructions if there's another completed + // instruction waiting to be verified. + if (instList.empty()) { + break; + } else if (instList.front()->isCompleted()) { + inst = instList.front(); + instList.pop_front(); + } else { + break; + } + } +} + +template +void +Checker::switchOut(Sampler *s) +{ + instList.clear(); +} + +template +void +Checker::takeOverFrom(BaseCPU *oldCPU) +{ +} + +template +void +Checker::validateInst(DynInstPtr &inst) +{ + if (inst->readPC() != cpuXC->readPC()) { + warn("%lli: PCs do not match! Inst: %#x, checker: %#x", + curTick, inst->readPC(), cpuXC->readPC()); + if (changedPC) { + warn("%lli: Changed PCs recently, may not be an error", + curTick); + } else { + handleError(); + } + } + + MachInst mi = static_cast(inst->staticInst->machInst); + + if (mi != machInst) { + warn("%lli: Binary instructions do not match! Inst: %#x, " + "checker: %#x", + curTick, mi, machInst); + handleError(); + } +} + +template +void +Checker::validateExecution(DynInstPtr &inst) +{ + if (inst->numDestRegs()) { + // @todo: Support more destination registers. + if (inst->isUnverifiable()) { + // Unverifiable instructions assume they were executed + // properly by the CPU. Grab the result from the + // instruction and write it to the register. + RegIndex idx = inst->destRegIdx(0); + if (idx < TheISA::FP_Base_DepTag) { + cpuXC->setIntReg(idx, inst->readIntResult()); + } else if (idx < TheISA::Fpcr_DepTag) { + cpuXC->setFloatRegBits(idx, inst->readIntResult()); + } else { + cpuXC->setMiscReg(idx, inst->readIntResult()); + } + } else if (result.integer != inst->readIntResult()) { + warn("%lli: Instruction results do not match! (Results may not " + "actually be integers) Inst: %#x, checker: %#x", + curTick, inst->readIntResult(), result.integer); + handleError(); + } + } + + if (inst->readNextPC() != cpuXC->readNextPC()) { + warn("%lli: Instruction next PCs do not match! Inst: %#x, " + "checker: %#x", + curTick, inst->readNextPC(), cpuXC->readNextPC()); + handleError(); + } + + // Checking side effect registers can be difficult if they are not + // checked simultaneously with the execution of the instruction. + // This is because other valid instructions may have modified + // these registers in the meantime, and their values are not + // stored within the DynInst. + while (!miscRegIdxs.empty()) { + int misc_reg_idx = miscRegIdxs.front(); + miscRegIdxs.pop(); + + if (inst->xcBase()->readMiscReg(misc_reg_idx) != + cpuXC->readMiscReg(misc_reg_idx)) { + warn("%lli: Misc reg idx %i (side effect) does not match! " + "Inst: %#x, checker: %#x", + curTick, misc_reg_idx, + inst->xcBase()->readMiscReg(misc_reg_idx), + cpuXC->readMiscReg(misc_reg_idx)); + handleError(); + } + } +} + +template +void +Checker::validateState() +{ +} + +template +void +Checker::dumpInsts() +{ + int num = 0; + + InstListIt inst_list_it = --(instList.end()); + + cprintf("Inst list size: %i\n", instList.size()); + + while (inst_list_it != instList.end()) + { + cprintf("Instruction:%i\n", + num); + + cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n" + "Completed:%i\n", + (*inst_list_it)->readPC(), + (*inst_list_it)->seqNum, + (*inst_list_it)->threadNumber, + (*inst_list_it)->isCompleted()); + + cprintf("\n"); + + inst_list_it--; + ++num; + } + +} + +//template +//class Checker > >; + +template +class Checker > >; diff --git a/src/cpu/checker/cpu.hh b/src/cpu/checker/cpu.hh new file mode 100644 index 000000000..7e63febb6 --- /dev/null +++ b/src/cpu/checker/cpu.hh @@ -0,0 +1,354 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_CHECKER_CPU_HH__ +#define __CPU_CHECKER_CPU_HH__ + +#include +#include +#include + +#include "arch/types.hh" +#include "base/statistics.hh" +#include "config/full_system.hh" +#include "cpu/base.hh" +#include "cpu/base_dyn_inst.hh" +#include "cpu/cpu_exec_context.hh" +#include "cpu/pc_event.hh" +#include "cpu/static_inst.hh" +#include "sim/eventq.hh" + +// forward declarations +#if FULL_SYSTEM +class Processor; +class AlphaITB; +class AlphaDTB; +class PhysicalMemory; + +class RemoteGDB; +class GDBListener; + +#else + +class Process; + +#endif // FULL_SYSTEM +template +class BaseDynInst; +class ExecContext; +class MemInterface; +class Checkpoint; +class Request; +class Sampler; + +class CheckerCPU : public BaseCPU +{ + protected: + typedef TheISA::MachInst MachInst; + typedef TheISA::FloatReg FloatReg; + typedef TheISA::FloatRegBits FloatRegBits; + typedef TheISA::MiscReg MiscReg; + public: + // main simulation loop (one cycle) + virtual void init(); + + struct Params : public BaseCPU::Params + { +#if FULL_SYSTEM + AlphaITB *itb; + AlphaDTB *dtb; + FunctionalMemory *mem; +#else + Process *process; +#endif + bool exitOnError; + }; + + public: + CheckerCPU(Params *p); + virtual ~CheckerCPU(); + + void setMemory(MemObject *mem); + + MemObject *memPtr; + +#if FULL_SYSTEM + void setSystem(System *system); + + System *systemPtr; +#endif + public: + // execution context + CPUExecContext *cpuXC; + + ExecContext *xcProxy; + + AlphaITB *itb; + AlphaDTB *dtb; + +#if FULL_SYSTEM + Addr dbg_vtophys(Addr addr); +#endif + + union Result { + uint64_t integer; + float fp; + double dbl; + }; + + Result result; + + // current instruction + MachInst machInst; + + // Refcounted pointer to the one memory request. + Request *memReq; + + StaticInstPtr curStaticInst; + + // number of simulated instructions + Counter numInst; + Counter startNumInst; + + std::queue miscRegIdxs; + + virtual Counter totalInstructions() const + { + return numInst - startNumInst; + } + + // number of simulated loads + Counter numLoad; + Counter startNumLoad; + + virtual void serialize(std::ostream &os); + virtual void unserialize(Checkpoint *cp, const std::string §ion); + + template + Fault read(Addr addr, T &data, unsigned flags); + + template + Fault write(T data, Addr addr, unsigned flags, uint64_t *res); + + // These functions are only used in CPU models that split + // effective address computation from the actual memory access. + void setEA(Addr EA) { panic("SimpleCPU::setEA() not implemented\n"); } + Addr getEA() { panic("SimpleCPU::getEA() not implemented\n"); } + + void prefetch(Addr addr, unsigned flags) + { + // need to do this... + } + + void writeHint(Addr addr, int size, unsigned flags) + { + // need to do this... + } + + Fault copySrcTranslate(Addr src); + + Fault copy(Addr dest); + + // The register accessor methods provide the index of the + // instruction's operand (e.g., 0 or 1), not the architectural + // register index, to simplify the implementation of register + // renaming. We find the architectural register index by indexing + // into the instruction's own operand index table. Note that a + // raw pointer to the StaticInst is provided instead of a + // ref-counted StaticInstPtr to redice overhead. This is fine as + // long as these methods don't copy the pointer into any long-term + // storage (which is pretty hard to imagine they would have reason + // to do). + + uint64_t readIntReg(const StaticInst *si, int idx) + { + return cpuXC->readIntReg(si->srcRegIdx(idx)); + } + + FloatReg readFloatReg(const StaticInst *si, int idx, int width) + { + int reg_idx = si->srcRegIdx(idx) - TheISA::FP_Base_DepTag; + return cpuXC->readFloatReg(reg_idx, width); + } + + FloatReg readFloatReg(const StaticInst *si, int idx) + { + int reg_idx = si->srcRegIdx(idx) - TheISA::FP_Base_DepTag; + return cpuXC->readFloatReg(reg_idx); + } + + FloatRegBits readFloatRegBits(const StaticInst *si, int idx, int width) + { + int reg_idx = si->srcRegIdx(idx) - TheISA::FP_Base_DepTag; + return cpuXC->readFloatRegBits(reg_idx, width); + } + + FloatRegBits readFloatRegBits(const StaticInst *si, int idx) + { + int reg_idx = si->srcRegIdx(idx) - TheISA::FP_Base_DepTag; + return cpuXC->readFloatRegBits(reg_idx); + } + + void setIntReg(const StaticInst *si, int idx, uint64_t val) + { + cpuXC->setIntReg(si->destRegIdx(idx), val); + result.integer = val; + } + + void setFloatReg(const StaticInst *si, int idx, FloatReg val, int width) + { + int reg_idx = si->destRegIdx(idx) - TheISA::FP_Base_DepTag; + cpuXC->setFloatReg(reg_idx, val, width); + switch(width) { + case 32: + result.fp = val; + break; + case 64: + result.dbl = val; + break; + }; + } + + void setFloatReg(const StaticInst *si, int idx, FloatReg val) + { + int reg_idx = si->destRegIdx(idx) - TheISA::FP_Base_DepTag; + cpuXC->setFloatReg(reg_idx, val); + result.fp = val; + } + + void setFloatRegBits(const StaticInst *si, int idx, FloatRegBits val, + int width) + { + int reg_idx = si->destRegIdx(idx) - TheISA::FP_Base_DepTag; + cpuXC->setFloatRegBits(reg_idx, val, width); + result.integer = val; + } + + void setFloatRegBits(const StaticInst *si, int idx, FloatRegBits val) + { + int reg_idx = si->destRegIdx(idx) - TheISA::FP_Base_DepTag; + cpuXC->setFloatRegBits(reg_idx, val); + result.integer = val; + } + + uint64_t readPC() { return cpuXC->readPC(); } + + uint64_t readNextPC() { return cpuXC->readNextPC(); } + + void setNextPC(uint64_t val) { + cpuXC->setNextPC(val); + } + + MiscReg readMiscReg(int misc_reg) + { + return cpuXC->readMiscReg(misc_reg); + } + + MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault) + { + return cpuXC->readMiscRegWithEffect(misc_reg, fault); + } + + Fault setMiscReg(int misc_reg, const MiscReg &val) + { + result.integer = val; + miscRegIdxs.push(misc_reg); + return cpuXC->setMiscReg(misc_reg, val); + } + + Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val) + { + miscRegIdxs.push(misc_reg); + return cpuXC->setMiscRegWithEffect(misc_reg, val); + } + + void recordPCChange(uint64_t val) { changedPC = true; } + void recordNextPCChange(uint64_t val) { changedNextPC = true; } + + bool translateInstReq(Request *req); + void translateDataWriteReq(Request *req); + void translateDataReadReq(Request *req); + +#if FULL_SYSTEM + Fault hwrei() { return cpuXC->hwrei(); } + int readIntrFlag() { return cpuXC->readIntrFlag(); } + void setIntrFlag(int val) { cpuXC->setIntrFlag(val); } + bool inPalMode() { return cpuXC->inPalMode(); } + void ev5_trap(Fault fault) { fault->invoke(xcProxy); } + bool simPalCheck(int palFunc) { return cpuXC->simPalCheck(palFunc); } +#else + // Assume that the normal CPU's call to syscall was successful. + // The checker's state would have already been updated by the syscall. + void syscall(uint64_t callnum) { } +#endif + + void handleError() + { + if (exitOnError) + panic("Checker found error!"); + } + bool checkFlags(Request *req); + + ExecContext *xcBase() { return xcProxy; } + CPUExecContext *cpuXCBase() { return cpuXC; } + + Result unverifiedResult; + Request *unverifiedReq; + + bool changedPC; + bool willChangePC; + uint64_t newPC; + bool changedNextPC; + bool exitOnError; + + InstSeqNum youngestSN; +}; + +template +class Checker : public CheckerCPU +{ + public: + Checker(Params *p) + : CheckerCPU(p) + { } + + void switchOut(Sampler *s); + void takeOverFrom(BaseCPU *oldCPU); + + void tick(DynInstPtr &inst); + + void validateInst(DynInstPtr &inst); + void validateExecution(DynInstPtr &inst); + void validateState(); + + std::list instList; + typedef typename std::list::iterator InstListIt; + void dumpInsts(); +}; + +#endif // __CPU_CHECKER_CPU_HH__ diff --git a/src/cpu/checker/cpu_builder.cc b/src/cpu/checker/cpu_builder.cc new file mode 100644 index 000000000..397ccab14 --- /dev/null +++ b/src/cpu/checker/cpu_builder.cc @@ -0,0 +1,126 @@ + +#include + +#include "cpu/checker/cpu.hh" +#include "cpu/inst_seq.hh" +#include "cpu/ozone/dyn_inst.hh" +#include "cpu/ozone/ozone_impl.hh" +#include "mem/base_mem.hh" +#include "sim/builder.hh" +#include "sim/process.hh" +#include "sim/sim_object.hh" + +class OzoneChecker : public Checker > > +{ + public: + OzoneChecker(Params *p) + : Checker > >(p) + { } +}; + +//////////////////////////////////////////////////////////////////////// +// +// CheckerCPU Simulation Object +// +BEGIN_DECLARE_SIM_OBJECT_PARAMS(OzoneChecker) + + Param max_insts_any_thread; + Param max_insts_all_threads; + Param max_loads_any_thread; + Param max_loads_all_threads; + +#if FULL_SYSTEM + SimObjectParam itb; + SimObjectParam dtb; + SimObjectParam mem; + SimObjectParam system; + Param cpu_id; + Param profile; +#else + SimObjectParam workload; +#endif // FULL_SYSTEM + Param clock; + SimObjectParam icache; + SimObjectParam dcache; + + Param defer_registration; + Param exitOnError; + Param function_trace; + Param function_trace_start; + +END_DECLARE_SIM_OBJECT_PARAMS(OzoneChecker) + +BEGIN_INIT_SIM_OBJECT_PARAMS(OzoneChecker) + + INIT_PARAM(max_insts_any_thread, + "terminate when any thread reaches this inst count"), + INIT_PARAM(max_insts_all_threads, + "terminate when all threads have reached this inst count"), + INIT_PARAM(max_loads_any_thread, + "terminate when any thread reaches this load count"), + INIT_PARAM(max_loads_all_threads, + "terminate when all threads have reached this load count"), + +#if FULL_SYSTEM + INIT_PARAM(itb, "Instruction TLB"), + INIT_PARAM(dtb, "Data TLB"), + INIT_PARAM(mem, "memory"), + INIT_PARAM(system, "system object"), + INIT_PARAM(cpu_id, "processor ID"), + INIT_PARAM(profile, ""), +#else + INIT_PARAM(workload, "processes to run"), +#endif // FULL_SYSTEM + + INIT_PARAM(clock, "clock speed"), + INIT_PARAM(icache, "L1 instruction cache object"), + INIT_PARAM(dcache, "L1 data cache object"), + + INIT_PARAM(defer_registration, "defer system registration (for sampling)"), + INIT_PARAM(exitOnError, "exit on error"), + INIT_PARAM(function_trace, "Enable function trace"), + INIT_PARAM(function_trace_start, "Cycle to start function trace") + +END_INIT_SIM_OBJECT_PARAMS(OzoneChecker) + + +CREATE_SIM_OBJECT(OzoneChecker) +{ + OzoneChecker::Params *params = new OzoneChecker::Params(); + params->name = getInstanceName(); + params->numberOfThreads = 1; + params->max_insts_any_thread = 0; + params->max_insts_all_threads = 0; + params->max_loads_any_thread = 0; + params->max_loads_all_threads = 0; + params->exitOnError = exitOnError; + params->deferRegistration = defer_registration; + params->functionTrace = function_trace; + params->functionTraceStart = function_trace_start; + params->clock = clock; + // Hack to touch all parameters. Consider not deriving Checker + // from BaseCPU..it's not really a CPU in the end. + Counter temp; + temp = max_insts_any_thread; + temp = max_insts_all_threads; + temp = max_loads_any_thread; + temp = max_loads_all_threads; + BaseMem *cache = icache; + cache = dcache; + +#if FULL_SYSTEM + params->itb = itb; + params->dtb = dtb; + params->mem = mem; + params->system = system; + params->cpu_id = cpu_id; + params->profile = profile; +#else + params->process = workload; +#endif + + OzoneChecker *cpu = new OzoneChecker(params); + return cpu; +} + +REGISTER_SIM_OBJECT("OzoneChecker", OzoneChecker) diff --git a/src/cpu/checker/exec_context.hh b/src/cpu/checker/exec_context.hh new file mode 100644 index 000000000..7d30e736a --- /dev/null +++ b/src/cpu/checker/exec_context.hh @@ -0,0 +1,283 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_CHECKER_EXEC_CONTEXT_HH__ +#define __CPU_CHECKER_EXEC_CONTEXT_HH__ + +#include "cpu/checker/cpu.hh" +#include "cpu/cpu_exec_context.hh" +#include "cpu/exec_context.hh" + +class EndQuiesceEvent; +namespace Kernel { + class Statistics; +}; + +template +class CheckerExecContext : public ExecContext +{ + public: + CheckerExecContext(XC *actual_xc, + CheckerCPU *checker_cpu) + : actualXC(actual_xc), checkerXC(checker_cpu->cpuXC), + checkerCPU(checker_cpu) + { } + + private: + XC *actualXC; + CPUExecContext *checkerXC; + CheckerCPU *checkerCPU; + + public: + + BaseCPU *getCpuPtr() { return actualXC->getCpuPtr(); } + + void setCpuId(int id) + { + actualXC->setCpuId(id); + checkerXC->setCpuId(id); + } + + int readCpuId() { return actualXC->readCpuId(); } + + TranslatingPort *getMemPort() { return actualXC->getMemPort(); } + +#if FULL_SYSTEM + System *getSystemPtr() { return actualXC->getSystemPtr(); } + + PhysicalMemory *getPhysMemPtr() { return actualXC->getPhysMemPtr(); } + + AlphaITB *getITBPtr() { return actualXC->getITBPtr(); } + + AlphaDTB *getDTBPtr() { return actualXC->getDTBPtr(); } + + Kernel::Statistics *getKernelStats() { return actualXC->getKernelStats(); } +#else + Process *getProcessPtr() { return actualXC->getProcessPtr(); } +#endif + + Status status() const { return actualXC->status(); } + + void setStatus(Status new_status) + { + actualXC->setStatus(new_status); + checkerXC->setStatus(new_status); + } + + /// Set the status to Active. Optional delay indicates number of + /// cycles to wait before beginning execution. + void activate(int delay = 1) { actualXC->activate(delay); } + + /// Set the status to Suspended. + void suspend() { actualXC->suspend(); } + + /// Set the status to Unallocated. + void deallocate() { actualXC->deallocate(); } + + /// Set the status to Halted. + void halt() { actualXC->halt(); } + +#if FULL_SYSTEM + void dumpFuncProfile() { actualXC->dumpFuncProfile(); } +#endif + + void takeOverFrom(ExecContext *oldContext) + { + actualXC->takeOverFrom(oldContext); + checkerXC->takeOverFrom(oldContext); + } + + void regStats(const std::string &name) { actualXC->regStats(name); } + + void serialize(std::ostream &os) { actualXC->serialize(os); } + void unserialize(Checkpoint *cp, const std::string §ion) + { actualXC->unserialize(cp, section); } + +#if FULL_SYSTEM + EndQuiesceEvent *getQuiesceEvent() { return actualXC->getQuiesceEvent(); } + + Tick readLastActivate() { return actualXC->readLastActivate(); } + Tick readLastSuspend() { return actualXC->readLastSuspend(); } + + void profileClear() { return actualXC->profileClear(); } + void profileSample() { return actualXC->profileSample(); } +#endif + + int getThreadNum() { return actualXC->getThreadNum(); } + + // @todo: Do I need this? + MachInst getInst() { return actualXC->getInst(); } + + // @todo: Do I need this? + void copyArchRegs(ExecContext *xc) + { + actualXC->copyArchRegs(xc); + checkerXC->copyArchRegs(xc); + } + + void clearArchRegs() + { + actualXC->clearArchRegs(); + checkerXC->clearArchRegs(); + } + + // + // New accessors for new decoder. + // + uint64_t readIntReg(int reg_idx) + { return actualXC->readIntReg(reg_idx); } + + FloatReg readFloatReg(int reg_idx, int width) + { return actualXC->readFloatReg(reg_idx, width); } + + FloatReg readFloatReg(int reg_idx) + { return actualXC->readFloatReg(reg_idx); } + + FloatRegBits readFloatRegBits(int reg_idx, int width) + { return actualXC->readFloatRegBits(reg_idx, width); } + + FloatRegBits readFloatRegBits(int reg_idx) + { return actualXC->readFloatRegBits(reg_idx); } + + void setIntReg(int reg_idx, uint64_t val) + { + actualXC->setIntReg(reg_idx, val); + checkerXC->setIntReg(reg_idx, val); + } + + void setFloatReg(int reg_idx, FloatReg val, int width) + { + actualXC->setFloatReg(reg_idx, val, width); + checkerXC->setFloatReg(reg_idx, val, width); + } + + void setFloatReg(int reg_idx, FloatReg val) + { + actualXC->setFloatReg(reg_idx, val); + checkerXC->setFloatReg(reg_idx, val); + } + + void setFloatRegBits(int reg_idx, FloatRegBits val, int width) + { + actualXC->setFloatRegBits(reg_idx, val, width); + checkerXC->setFloatRegBits(reg_idx, val, width); + } + + void setFloatRegBits(int reg_idx, FloatRegBits val) + { + actualXC->setFloatRegBits(reg_idx, val); + checkerXC->setFloatRegBits(reg_idx, val); + } + + uint64_t readPC() { return actualXC->readPC(); } + + void setPC(uint64_t val) + { + actualXC->setPC(val); + checkerXC->setPC(val); + checkerCPU->recordPCChange(val); + } + + uint64_t readNextPC() { return actualXC->readNextPC(); } + + void setNextPC(uint64_t val) + { + actualXC->setNextPC(val); + checkerXC->setNextPC(val); + checkerCPU->recordNextPCChange(val); + } + + uint64_t readNextNPC() { return actualXC->readNextNPC(); } + + void setNextNPC(uint64_t val) + { + actualXC->setNextNPC(val); + checkerXC->setNextNPC(val); + checkerCPU->recordNextPCChange(val); + } + + MiscReg readMiscReg(int misc_reg) + { return actualXC->readMiscReg(misc_reg); } + + MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault) + { return actualXC->readMiscRegWithEffect(misc_reg, fault); } + + Fault setMiscReg(int misc_reg, const MiscReg &val) + { + checkerXC->setMiscReg(misc_reg, val); + return actualXC->setMiscReg(misc_reg, val); + } + + Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val) + { + checkerXC->setMiscRegWithEffect(misc_reg, val); + return actualXC->setMiscRegWithEffect(misc_reg, val); + } + + unsigned readStCondFailures() + { return actualXC->readStCondFailures(); } + + void setStCondFailures(unsigned sc_failures) + { + checkerXC->setStCondFailures(sc_failures); + actualXC->setStCondFailures(sc_failures); + } +#if FULL_SYSTEM + bool inPalMode() { return actualXC->inPalMode(); } +#endif + + // @todo: Fix this! + bool misspeculating() { return actualXC->misspeculating(); } + +#if !FULL_SYSTEM + IntReg getSyscallArg(int i) { return actualXC->getSyscallArg(i); } + + // used to shift args for indirect syscall + void setSyscallArg(int i, IntReg val) + { + checkerXC->setSyscallArg(i, val); + actualXC->setSyscallArg(i, val); + } + + void setSyscallReturn(SyscallReturn return_value) + { + checkerXC->setSyscallReturn(return_value); + actualXC->setSyscallReturn(return_value); + } + + Counter readFuncExeInst() { return actualXC->readFuncExeInst(); } +#endif + void changeRegFileContext(RegFile::ContextParam param, + RegFile::ContextVal val) + { + actualXC->changeRegFileContext(param, val); + checkerXC->changeRegFileContext(param, val); + } +}; + +#endif // __CPU_CHECKER_EXEC_CONTEXT_HH__ diff --git a/src/cpu/checker/o3_cpu_builder.cc b/src/cpu/checker/o3_cpu_builder.cc new file mode 100644 index 000000000..c7883b42b --- /dev/null +++ b/src/cpu/checker/o3_cpu_builder.cc @@ -0,0 +1,121 @@ + +#include + +#include "cpu/checker/cpu.hh" +#include "cpu/inst_seq.hh" +#include "cpu/o3/alpha_dyn_inst.hh" +#include "cpu/o3/alpha_impl.hh" +#include "sim/builder.hh" +#include "sim/process.hh" +#include "sim/sim_object.hh" + +class MemObject; + +class O3Checker : public Checker > > +{ + public: + O3Checker(Params *p) + : Checker > >(p) + { } +}; + +//////////////////////////////////////////////////////////////////////// +// +// CheckerCPU Simulation Object +// +BEGIN_DECLARE_SIM_OBJECT_PARAMS(O3Checker) + + Param max_insts_any_thread; + Param max_insts_all_threads; + Param max_loads_any_thread; + Param max_loads_all_threads; + +#if FULL_SYSTEM + SimObjectParam itb; + SimObjectParam dtb; + SimObjectParam mem; + SimObjectParam system; + Param cpu_id; + Param profile; +#else + SimObjectParam workload; +#endif // FULL_SYSTEM + Param clock; + + Param defer_registration; + Param exitOnError; + Param function_trace; + Param function_trace_start; + +END_DECLARE_SIM_OBJECT_PARAMS(O3Checker) + +BEGIN_INIT_SIM_OBJECT_PARAMS(O3Checker) + + INIT_PARAM(max_insts_any_thread, + "terminate when any thread reaches this inst count"), + INIT_PARAM(max_insts_all_threads, + "terminate when all threads have reached this inst count"), + INIT_PARAM(max_loads_any_thread, + "terminate when any thread reaches this load count"), + INIT_PARAM(max_loads_all_threads, + "terminate when all threads have reached this load count"), + +#if FULL_SYSTEM + INIT_PARAM(itb, "Instruction TLB"), + INIT_PARAM(dtb, "Data TLB"), + INIT_PARAM(mem, "memory"), + INIT_PARAM(system, "system object"), + INIT_PARAM(cpu_id, "processor ID"), + INIT_PARAM(profile, ""), +#else + INIT_PARAM(workload, "processes to run"), +#endif // FULL_SYSTEM + + INIT_PARAM(clock, "clock speed"), + + INIT_PARAM(defer_registration, "defer system registration (for sampling)"), + INIT_PARAM(exitOnError, "exit on error"), + INIT_PARAM(function_trace, "Enable function trace"), + INIT_PARAM(function_trace_start, "Cycle to start function trace") + +END_INIT_SIM_OBJECT_PARAMS(O3Checker) + + +CREATE_SIM_OBJECT(O3Checker) +{ + O3Checker::Params *params = new O3Checker::Params(); + params->name = getInstanceName(); + params->numberOfThreads = 1; + params->max_insts_any_thread = 0; + params->max_insts_all_threads = 0; + params->max_loads_any_thread = 0; + params->max_loads_all_threads = 0; + params->exitOnError = exitOnError; + params->deferRegistration = defer_registration; + params->functionTrace = function_trace; + params->functionTraceStart = function_trace_start; + params->clock = clock; + // Hack to touch all parameters. Consider not deriving Checker + // from BaseCPU..it's not really a CPU in the end. + Counter temp; + temp = max_insts_any_thread; + temp = max_insts_all_threads; + temp = max_loads_any_thread; + temp = max_loads_all_threads; + +#if FULL_SYSTEM + params->itb = itb; + params->dtb = dtb; + params->mem = mem; + params->system = system; + params->cpu_id = cpu_id; + params->profile = profile; +#else + params->process = workload; +#endif + + O3Checker *cpu = new O3Checker(params); + return cpu; +} + +REGISTER_SIM_OBJECT("O3Checker", O3Checker) diff --git a/src/cpu/o3/2bit_local_pred.cc b/src/cpu/o3/2bit_local_pred.cc index c3fb2fdb8..2f768fd34 100644 --- a/src/cpu/o3/2bit_local_pred.cc +++ b/src/cpu/o3/2bit_local_pred.cc @@ -27,6 +27,7 @@ */ #include "base/intmath.hh" +#include "base/misc.hh" #include "base/trace.hh" #include "cpu/o3/2bit_local_pred.hh" diff --git a/src/cpu/o3/alpha_cpu.hh b/src/cpu/o3/alpha_cpu.hh index 1bab0703e..fe88a1acc 100644 --- a/src/cpu/o3/alpha_cpu.hh +++ b/src/cpu/o3/alpha_cpu.hh @@ -39,11 +39,15 @@ namespace Kernel { class Statistics; }; +class TranslatingPort; + template class AlphaFullCPU : public FullO3CPU { protected: typedef TheISA::IntReg IntReg; + typedef TheISA::FloatReg FloatReg; + typedef TheISA::FloatRegBits FloatRegBits; typedef TheISA::MiscReg MiscReg; typedef TheISA::RegFile RegFile; typedef TheISA::MiscRegFile MiscRegFile; @@ -69,7 +73,7 @@ class AlphaFullCPU : public FullO3CPU virtual int readCpuId() { return cpu->cpu_id; } - virtual FunctionalMemory *getMemPtr() { return thread->mem; } + virtual TranslatingPort *getMemPort() { return /*thread->port*/ NULL; } #if FULL_SYSTEM virtual System *getSystemPtr() { return cpu->system; } @@ -135,19 +139,23 @@ class AlphaFullCPU : public FullO3CPU virtual uint64_t readIntReg(int reg_idx); - virtual float readFloatRegSingle(int reg_idx); + virtual FloatReg readFloatReg(int reg_idx, int width); + + virtual FloatReg readFloatReg(int reg_idx); - virtual double readFloatRegDouble(int reg_idx); + virtual FloatRegBits readFloatRegBits(int reg_idx, int width); - virtual uint64_t readFloatRegInt(int reg_idx); + virtual FloatRegBits readFloatRegBits(int reg_idx); virtual void setIntReg(int reg_idx, uint64_t val); - virtual void setFloatRegSingle(int reg_idx, float val); + virtual void setFloatReg(int reg_idx, FloatReg val, int width); + + virtual void setFloatReg(int reg_idx, FloatReg val); - virtual void setFloatRegDouble(int reg_idx, double val); + virtual void setFloatRegBits(int reg_idx, FloatRegBits val, int width); - virtual void setFloatRegInt(int reg_idx, uint64_t val); + virtual void setFloatRegBits(int reg_idx, FloatRegBits val); virtual uint64_t readPC() { return cpu->readPC(thread->tid); } @@ -159,6 +167,15 @@ class AlphaFullCPU : public FullO3CPU virtual void setNextPC(uint64_t val); + virtual uint64_t readNextNPC() + { + panic("Alpha has no NextNPC!"); + return 0; + } + + virtual void setNextNPC(uint64_t val) + { panic("Alpha has no NextNPC!"); } + virtual MiscReg readMiscReg(int misc_reg) { return cpu->readMiscReg(misc_reg, thread->tid); } @@ -193,10 +210,14 @@ class AlphaFullCPU : public FullO3CPU virtual void setSyscallReturn(SyscallReturn return_value); - virtual void syscall() { return cpu->syscall(thread->tid); } + virtual void syscall(int64_t callnum) + { return cpu->syscall(callnum, thread->tid); } virtual Counter readFuncExeInst() { return thread->funcExeInst; } #endif + virtual void changeRegFileContext(TheISA::RegFile::ContextParam param, + TheISA::RegFile::ContextVal val) + { panic("Not supported on Alpha!"); } }; #if FULL_SYSTEM @@ -211,52 +232,43 @@ class AlphaFullCPU : public FullO3CPU #if FULL_SYSTEM /** Translates instruction requestion. */ - Fault translateInstReq(MemReqPtr &req) + Fault translateInstReq(RequestPtr &req) { return itb->translate(req); } /** Translates data read request. */ - Fault translateDataReadReq(MemReqPtr &req) + Fault translateDataReadReq(RequestPtr &req) { return dtb->translate(req, false); } /** Translates data write request. */ - Fault translateDataWriteReq(MemReqPtr &req) + Fault translateDataWriteReq(RequestPtr &req) { return dtb->translate(req, true); } #else - Fault dummyTranslation(MemReqPtr &req) - { -#if 0 - assert((req->vaddr >> 48 & 0xffff) == 0); -#endif - - // put the asid in the upper 16 bits of the paddr - req->paddr = req->vaddr & ~((Addr)0xffff << sizeof(Addr) * 8 - 16); - req->paddr = req->paddr | (Addr)req->asid << sizeof(Addr) * 8 - 16; - return NoFault; - } - /** Translates instruction requestion in syscall emulation mode. */ - Fault translateInstReq(MemReqPtr &req) + Fault translateInstReq(RequestPtr &req) { - return dummyTranslation(req); + int tid = req->getThreadNum(); + return this->thread[tid]->process->pTable->translate(req); } /** Translates data read request in syscall emulation mode. */ - Fault translateDataReadReq(MemReqPtr &req) + Fault translateDataReadReq(RequestPtr &req) { - return dummyTranslation(req); + int tid = req->getThreadNum(); + return this->thread[tid]->process->pTable->translate(req); } /** Translates data write request in syscall emulation mode. */ - Fault translateDataWriteReq(MemReqPtr &req) + Fault translateDataWriteReq(RequestPtr &req) { - return dummyTranslation(req); + int tid = req->getThreadNum(); + return this->thread[tid]->process->pTable->translate(req); } #endif @@ -298,7 +310,7 @@ class AlphaFullCPU : public FullO3CPU /** Executes a syscall. * @todo: Determine if this needs to be virtual. */ - void syscall(int thread_num); + void syscall(int64_t callnum, int thread_num); /** Gets a syscall argument. */ IntReg getSyscallArg(int i, int tid); @@ -311,7 +323,7 @@ class AlphaFullCPU : public FullO3CPU /** Read from memory function. */ template - Fault read(MemReqPtr &req, T &data) + Fault read(RequestPtr &req, T &data) { #if 0 #if FULL_SYSTEM && THE_ISA == ALPHA_ISA @@ -338,14 +350,14 @@ class AlphaFullCPU : public FullO3CPU /** CPU read function, forwards read to LSQ. */ template - Fault read(MemReqPtr &req, T &data, int load_idx) + Fault read(RequestPtr &req, T &data, int load_idx) { return this->iew.ldstQueue.read(req, data, load_idx); } /** Write to memory function. */ template - Fault write(MemReqPtr &req, T &data) + Fault write(RequestPtr &req, T &data) { #if 0 #if FULL_SYSTEM && THE_ISA == ALPHA_ISA @@ -417,7 +429,7 @@ class AlphaFullCPU : public FullO3CPU /** CPU write function, forwards write to LSQ. */ template - Fault write(MemReqPtr &req, T &data, int store_idx) + Fault write(RequestPtr &req, T &data, int store_idx) { return this->iew.ldstQueue.write(req, data, store_idx); } diff --git a/src/cpu/o3/alpha_cpu_builder.cc b/src/cpu/o3/alpha_cpu_builder.cc index b0d812edc..6ac408364 100644 --- a/src/cpu/o3/alpha_cpu_builder.cc +++ b/src/cpu/o3/alpha_cpu_builder.cc @@ -33,7 +33,6 @@ #include "cpu/o3/alpha_impl.hh" #include "cpu/o3/alpha_params.hh" #include "cpu/o3/fu_pool.hh" -#include "mem/cache/base_cache.hh" #include "sim/builder.hh" class DerivAlphaFullCPU : public AlphaFullCPU @@ -60,7 +59,7 @@ SimObjectVectorParam workload; //SimObjectParam page_table; #endif // FULL_SYSTEM -SimObjectParam mem; +SimObjectParam mem; SimObjectParam checker; @@ -69,9 +68,6 @@ Param max_insts_all_threads; Param max_loads_any_thread; Param max_loads_all_threads; -SimObjectParam icache; -SimObjectParam dcache; - Param cachePorts; Param decodeToFetchDelay; @@ -169,7 +165,7 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivAlphaFullCPU) // INIT_PARAM(page_table, "Page table"), #endif // FULL_SYSTEM - INIT_PARAM_DFLT(mem, "Memory", NULL), + INIT_PARAM(mem, "Memory"), INIT_PARAM_DFLT(checker, "Checker CPU", NULL), @@ -188,9 +184,6 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivAlphaFullCPU) "count", 0), - INIT_PARAM_DFLT(icache, "L1 instruction cache", NULL), - INIT_PARAM_DFLT(dcache, "L1 data cache", NULL), - INIT_PARAM_DFLT(cachePorts, "Cache Ports", 200), INIT_PARAM(decodeToFetchDelay, "Decode to fetch delay"), @@ -327,8 +320,6 @@ CREATE_SIM_OBJECT(DerivAlphaFullCPU) // // Caches // - params->icacheInterface = icache ? icache->getInterface() : NULL; - params->dcacheInterface = dcache ? dcache->getInterface() : NULL; params->cachePorts = cachePorts; params->decodeToFetchDelay = decodeToFetchDelay; diff --git a/src/cpu/o3/alpha_cpu_impl.hh b/src/cpu/o3/alpha_cpu_impl.hh index f7f0a3842..a890cfd90 100644 --- a/src/cpu/o3/alpha_cpu_impl.hh +++ b/src/cpu/o3/alpha_cpu_impl.hh @@ -31,7 +31,6 @@ #include "base/statistics.hh" #include "base/timebuf.hh" #include "cpu/checker/exec_context.hh" -#include "mem/mem_interface.hh" #include "sim/sim_events.hh" #include "sim/stats.hh" @@ -68,11 +67,9 @@ AlphaFullCPU::AlphaFullCPU(Params *params) this->thread[i]->setStatus(ExecContext::Suspended); #else if (i < params->workload.size()) { - DPRINTF(FullCPU, "FullCPU: Workload[%i]'s starting PC is %#x, " - "process is %#x", - i, params->workload[i]->prog_entry, this->thread[i]); + DPRINTF(FullCPU, "FullCPU: Workload[%i] process is %#x", + i, this->thread[i]); this->thread[i] = new Thread(this, i, params->workload[i], i); - assert(params->workload[i]->getMemory() != NULL); this->thread[i]->setStatus(ExecContext::Suspended); //usedTids[i] = true; @@ -160,7 +157,7 @@ void AlphaFullCPU::AlphaXC::takeOverFrom(ExecContext *old_context) { // some things should already be set up - assert(getMemPtr() == old_context->getMemPtr()); + assert(getMemPort() == old_context->getMemPort()); #if FULL_SYSTEM assert(getSystemPtr() == old_context->getSystemPtr()); #else @@ -366,15 +363,14 @@ AlphaFullCPU::AlphaXC::copyArchRegs(ExecContext *xc) } // Then loop through the floating point registers. - for (int i = 0; i < AlphaISA::NumFloatRegs; ++i) - { - renamed_reg = this->renameMap.lookup(i + AlphaISA::FP_Base_DepTag); - this->cpuXC->setFloatRegBits(i, - this->regFile.readFloatRegBits(renamed_reg)); + for (int i = 0; i < AlphaISA::NumFloatRegs; ++i) { + renamed_reg = cpu->renameMap[tid].lookup(i + AlphaISA::FP_Base_DepTag); + cpu->setFloatRegBits(renamed_reg, + xc->readFloatRegBits(i)); } // Copy the misc regs. - cpu->regFile.miscRegs[tid].copyMiscRegs(xc); + copyMiscRegs(xc, this); // Then finally set the PC and the next PC. cpu->setPC(xc->readPC(), tid); @@ -398,24 +394,40 @@ AlphaFullCPU::AlphaXC::readIntReg(int reg_idx) } template -float -AlphaFullCPU::AlphaXC::readFloatRegSingle(int reg_idx) +FloatReg +AlphaFullCPU::AlphaXC::readFloatReg(int reg_idx, int width) { DPRINTF(Fault, "Reading float register through the XC!\n"); - return cpu->readArchFloatRegSingle(reg_idx, thread->tid); + switch(width) { + case 32: + return cpu->readArchFloatRegSingle(reg_idx, thread->tid); + case 64: + return cpu->readArchFloatRegDouble(reg_idx, thread->tid); + default: + panic("Unsupported width!"); + return 0; + } } template -double -AlphaFullCPU::AlphaXC::readFloatRegDouble(int reg_idx) +FloatReg +AlphaFullCPU::AlphaXC::readFloatReg(int reg_idx) { DPRINTF(Fault, "Reading float register through the XC!\n"); - return cpu->readArchFloatRegDouble(reg_idx, thread->tid); + return cpu->readArchFloatRegSingle(reg_idx, thread->tid); } template -uint64_t -AlphaFullCPU::AlphaXC::readFloatRegInt(int reg_idx) +FloatRegBits +AlphaFullCPU::AlphaXC::readFloatRegBits(int reg_idx, int width) +{ + DPRINTF(Fault, "Reading floatint register through the XC!\n"); + return cpu->readArchFloatRegInt(reg_idx, thread->tid); +} + +template +FloatRegBits +AlphaFullCPU::AlphaXC::readFloatRegBits(int reg_idx) { DPRINTF(Fault, "Reading floatint register through the XC!\n"); return cpu->readArchFloatRegInt(reg_idx, thread->tid); @@ -435,10 +447,17 @@ AlphaFullCPU::AlphaXC::setIntReg(int reg_idx, uint64_t val) template void -AlphaFullCPU::AlphaXC::setFloatRegSingle(int reg_idx, float val) +AlphaFullCPU::AlphaXC::setFloatReg(int reg_idx, FloatReg val, int width) { DPRINTF(Fault, "Setting float register through the XC!\n"); - cpu->setArchFloatRegSingle(reg_idx, val, thread->tid); + switch(width) { + case 32: + cpu->setArchFloatRegSingle(reg_idx, val, thread->tid); + break; + case 64: + cpu->setArchFloatRegDouble(reg_idx, val, thread->tid); + break; + } if (!thread->trapPending && !thread->inSyscall) { cpu->squashFromXC(thread->tid); @@ -447,10 +466,23 @@ AlphaFullCPU::AlphaXC::setFloatRegSingle(int reg_idx, float val) template void -AlphaFullCPU::AlphaXC::setFloatRegDouble(int reg_idx, double val) +AlphaFullCPU::AlphaXC::setFloatReg(int reg_idx, FloatReg val) { DPRINTF(Fault, "Setting float register through the XC!\n"); - cpu->setArchFloatRegDouble(reg_idx, val, thread->tid); + cpu->setArchFloatRegSingle(reg_idx, val, thread->tid); + + if (!thread->trapPending && !thread->inSyscall) { + cpu->squashFromXC(thread->tid); + } +} + +template +void +AlphaFullCPU::AlphaXC::setFloatRegBits(int reg_idx, FloatRegBits val, + int width) +{ + DPRINTF(Fault, "Setting floatint register through the XC!\n"); + cpu->setArchFloatRegInt(reg_idx, val, thread->tid); if (!thread->trapPending && !thread->inSyscall) { cpu->squashFromXC(thread->tid); @@ -459,7 +491,7 @@ AlphaFullCPU::AlphaXC::setFloatRegDouble(int reg_idx, double val) template void -AlphaFullCPU::AlphaXC::setFloatRegInt(int reg_idx, uint64_t val) +AlphaFullCPU::AlphaXC::setFloatRegBits(int reg_idx, FloatRegBits val) { DPRINTF(Fault, "Setting floatint register through the XC!\n"); cpu->setArchFloatRegInt(reg_idx, val, thread->tid); @@ -723,7 +755,7 @@ AlphaFullCPU::processInterrupts() template void -AlphaFullCPU::syscall(int tid) +AlphaFullCPU::syscall(int64_t callnum, int tid) { DPRINTF(FullCPU, "AlphaFullCPU: [tid:%i] Executing syscall().\n\n", tid); @@ -734,7 +766,7 @@ AlphaFullCPU::syscall(int tid) ++(this->thread[tid]->funcExeInst); // Execute the actual syscall. - this->thread[tid]->syscall(); + this->thread[tid]->syscall(callnum); // Decrease funcExeInst by one as the normal commit will handle // incrementing it. diff --git a/src/cpu/o3/alpha_dyn_inst.hh b/src/cpu/o3/alpha_dyn_inst.hh index b03c8c337..f289bbf0d 100644 --- a/src/cpu/o3/alpha_dyn_inst.hh +++ b/src/cpu/o3/alpha_dyn_inst.hh @@ -29,11 +29,14 @@ #ifndef __CPU_O3_ALPHA_DYN_INST_HH__ #define __CPU_O3_ALPHA_DYN_INST_HH__ +#include "arch/isa_traits.hh" #include "cpu/base_dyn_inst.hh" #include "cpu/inst_seq.hh" #include "cpu/o3/alpha_cpu.hh" #include "cpu/o3/alpha_impl.hh" +class Packet; + /** * Mostly implementation & ISA specific AlphaDynInst. As with most * other classes in the new CPU model, it is templated on the Impl to @@ -56,6 +59,8 @@ class AlphaDynInst : public BaseDynInst typedef TheISA::RegIndex RegIndex; /** Integer register index type. */ typedef TheISA::IntReg IntReg; + typedef TheISA::FloatReg FloatReg; + typedef TheISA::FloatRegBits FloatRegBits; /** Misc register index type. */ typedef TheISA::MiscReg MiscReg; @@ -79,7 +84,7 @@ class AlphaDynInst : public BaseDynInst Fault initiateAcc(); /** Completes the access. Only valid for memory operations. */ - Fault completeAcc(); + Fault completeAcc(Packet *pkt); private: /** Initializes variables. */ @@ -123,7 +128,7 @@ class AlphaDynInst : public BaseDynInst bool simPalCheck(int palFunc); #else /** Calls a syscall. */ - void syscall(); + void syscall(int64_t callnum); #endif private: diff --git a/src/cpu/o3/alpha_dyn_inst_impl.hh b/src/cpu/o3/alpha_dyn_inst_impl.hh index 541d5ab82..16c236b4c 100644 --- a/src/cpu/o3/alpha_dyn_inst_impl.hh +++ b/src/cpu/o3/alpha_dyn_inst_impl.hh @@ -96,15 +96,13 @@ AlphaDynInst::initiateAcc() template Fault -AlphaDynInst::completeAcc() +AlphaDynInst::completeAcc(Packet *pkt) { if (this->isLoad()) { - this->fault = this->staticInst->completeAcc(this->req->data, - this, + this->fault = this->staticInst->completeAcc(pkt, this, this->traceData); } else if (this->isStore()) { - this->fault = this->staticInst->completeAcc((uint8_t*)&this->req->result, - this, + this->fault = this->staticInst->completeAcc(pkt, this, this->traceData); } else { panic("Unknown type!"); @@ -168,9 +166,9 @@ AlphaDynInst::simPalCheck(int palFunc) #else template void -AlphaDynInst::syscall() +AlphaDynInst::syscall(int64_t callnum) { - this->cpu->syscall(this->threadNumber); + this->cpu->syscall(callnum, this->threadNumber); } #endif diff --git a/src/cpu/o3/alpha_params.hh b/src/cpu/o3/alpha_params.hh index e3acf2c05..04366e8dd 100644 --- a/src/cpu/o3/alpha_params.hh +++ b/src/cpu/o3/alpha_params.hh @@ -35,8 +35,7 @@ class AlphaDTB; class AlphaITB; class FUPool; -class FunctionalMemory; -class MemInterface; +class MemObject; class Process; class System; @@ -60,7 +59,7 @@ class AlphaSimpleParams : public BaseFullCPU::Params //Page Table // PageTable *pTable; - FunctionalMemory *mem; + MemObject *mem; BaseCPU *checker; @@ -69,8 +68,8 @@ class AlphaSimpleParams : public BaseFullCPU::Params // // Caches // - MemInterface *icacheInterface; - MemInterface *dcacheInterface; +// MemInterface *icacheInterface; +// MemInterface *dcacheInterface; unsigned cachePorts; diff --git a/src/cpu/o3/bpred_unit.cc b/src/cpu/o3/bpred_unit.cc index 92344111f..dcc5ceb80 100644 --- a/src/cpu/o3/bpred_unit.cc +++ b/src/cpu/o3/bpred_unit.cc @@ -30,8 +30,8 @@ #include "cpu/o3/alpha_impl.hh" #include "cpu/o3/alpha_dyn_inst.hh" #include "cpu/ozone/ozone_impl.hh" -#include "cpu/ozone/simple_impl.hh" +//#include "cpu/ozone/simple_impl.hh" template class TwobitBPredUnit; template class TwobitBPredUnit; -template class TwobitBPredUnit; +//template class TwobitBPredUnit; diff --git a/src/cpu/o3/btb.hh b/src/cpu/o3/btb.hh index b9ff42573..c7dc1808b 100644 --- a/src/cpu/o3/btb.hh +++ b/src/cpu/o3/btb.hh @@ -31,6 +31,7 @@ // For Addr type. #include "arch/isa_traits.hh" +#include "base/misc.hh" class DefaultBTB { diff --git a/src/cpu/o3/commit.hh b/src/cpu/o3/commit.hh index 66abf8dc6..c019ef4c7 100644 --- a/src/cpu/o3/commit.hh +++ b/src/cpu/o3/commit.hh @@ -34,7 +34,6 @@ #include "base/timebuf.hh" #include "cpu/exetrace.hh" #include "cpu/inst_seq.hh" -#include "mem/memory_interface.hh" template class O3ThreadState; @@ -301,9 +300,6 @@ class DefaultCommit /** Pointer to FullCPU. */ FullCPU *cpu; - /** Memory interface. Used for d-cache accesses. */ - MemInterface *dcacheInterface; - std::vector thread; Fault fetchFault; diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh index 346a8bc1c..97703c430 100644 --- a/src/cpu/o3/commit_impl.hh +++ b/src/cpu/o3/commit_impl.hh @@ -64,8 +64,7 @@ DefaultCommit::TrapEvent::description() template DefaultCommit::DefaultCommit(Params *params) - : dcacheInterface(params->dcacheInterface), - squashCounter(0), + : squashCounter(0), iewToCommitDelay(params->iewToCommitDelay), commitToIEWDelay(params->commitToIEWDelay), renameToROBDelay(params->renameToROBDelay), diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc index ed02a845b..4e0bb2d2d 100644 --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@ -46,6 +46,7 @@ #include "sim/stat_control.hh" using namespace std; +using namespace TheISA; BaseFullCPU::BaseFullCPU(Params *params) : BaseCPU(params), cpu_id(0) @@ -121,14 +122,9 @@ FullO3CPU::FullO3CPU(Params *params) system(params->system), memCtrl(system->memctrl), physmem(system->physmem), - mem(params->mem), -#else -// pTable(params->pTable), - mem(params->workload[0]->getMemory()), #endif // FULL_SYSTEM + mem(params->mem), switchCount(0), - icacheInterface(params->icacheInterface), - dcacheInterface(params->dcacheInterface), deferRegistration(params->deferRegistration), numThreads(number_of_threads) { @@ -782,6 +778,7 @@ FullO3CPU::readFloatReg(int reg_idx) template FloatRegBits FullO3CPU::readFloatRegBits(int reg_idx, int width) +{ return regFile.readFloatRegBits(reg_idx, width); } @@ -843,7 +840,7 @@ FullO3CPU::readArchFloatRegSingle(int reg_idx, unsigned tid) int idx = reg_idx + TheISA::FP_Base_DepTag; PhysRegIndex phys_reg = commitRenameMap[tid].lookup(idx); - return regFile.readFloatRegSingle(phys_reg); + return regFile.readFloatReg(phys_reg); } template @@ -853,7 +850,7 @@ FullO3CPU::readArchFloatRegDouble(int reg_idx, unsigned tid) int idx = reg_idx + TheISA::FP_Base_DepTag; PhysRegIndex phys_reg = commitRenameMap[tid].lookup(idx); - return regFile.readFloatRegDouble(phys_reg); + return regFile.readFloatReg(phys_reg, 64); } template @@ -863,7 +860,7 @@ FullO3CPU::readArchFloatRegInt(int reg_idx, unsigned tid) int idx = reg_idx + TheISA::FP_Base_DepTag; PhysRegIndex phys_reg = commitRenameMap[tid].lookup(idx); - return regFile.readFloatRegInt(phys_reg); + return regFile.readFloatRegBits(phys_reg); } template @@ -881,7 +878,7 @@ FullO3CPU::setArchFloatRegSingle(int reg_idx, float val, unsigned tid) { PhysRegIndex phys_reg = commitRenameMap[tid].lookup(reg_idx); - regFile.setFloatRegSingle(phys_reg, val); + regFile.setFloatReg(phys_reg, val); } template @@ -890,7 +887,7 @@ FullO3CPU::setArchFloatRegDouble(int reg_idx, double val, unsigned tid) { PhysRegIndex phys_reg = commitRenameMap[tid].lookup(reg_idx); - regFile.setFloatRegDouble(phys_reg, val); + regFile.setFloatReg(phys_reg, val, 64); } template @@ -899,7 +896,7 @@ FullO3CPU::setArchFloatRegInt(int reg_idx, uint64_t val, unsigned tid) { PhysRegIndex phys_reg = commitRenameMap[tid].lookup(reg_idx); - regFile.setFloatRegInt(phys_reg, val); + regFile.setFloatRegBits(phys_reg, val); } template diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh index bed95ad54..c791b2948 100644 --- a/src/cpu/o3/cpu.hh +++ b/src/cpu/o3/cpu.hh @@ -35,6 +35,7 @@ #include #include +#include "arch/isa_traits.hh" #include "base/statistics.hh" #include "base/timebuf.hh" #include "config/full_system.hh" @@ -50,7 +51,7 @@ template class Checker; class ExecContext; -class MemInterface; +class MemObject; class Process; class BaseFullCPU : public BaseCPU @@ -63,6 +64,8 @@ class BaseFullCPU : public BaseCPU void regStats(); + int readCpuId() { return cpu_id; } + protected: int cpu_id; }; @@ -71,6 +74,9 @@ template class FullO3CPU : public BaseFullCPU { public: + typedef TheISA::FloatReg FloatReg; + typedef TheISA::FloatRegBits FloatRegBits; + // Typedefs from the Impl here. typedef typename Impl::CPUPol CPUPolicy; typedef typename Impl::Params Params; @@ -226,14 +232,6 @@ class FullO3CPU : public BaseFullCPU int getDataAsid(unsigned tid) { return regFile.miscRegs[tid].getDataAsid(); } #else - /** Check if this address is a valid instruction address. */ - bool validInstAddr(Addr addr,unsigned tid) - { return thread[tid]->validInstAddr(addr); } - - /** Check if this address is a valid data address. */ - bool validDataAddr(Addr addr,unsigned tid) - { return thread[tid]->validDataAddr(addr); } - /** Get instruction asid. */ int getInstAsid(unsigned tid) { return thread[tid]->asid; } @@ -259,13 +257,13 @@ class FullO3CPU : public BaseFullCPU void setIntReg(int reg_idx, uint64_t val); - void setFloatReg(int reg_idx, FloatReg val, int width); + void setFloatReg(int reg_idx, FloatReg val); void setFloatReg(int reg_idx, FloatReg val, int width); void setFloatRegBits(int reg_idx, FloatRegBits val); - void setFloatRegBits(int reg_idx, FloatRegBits val); + void setFloatRegBits(int reg_idx, FloatRegBits val, int width); uint64_t readArchIntReg(int reg_idx, unsigned tid); @@ -464,7 +462,7 @@ class FullO3CPU : public BaseFullCPU #endif /** Pointer to memory. */ - FunctionalMemory *mem; + MemObject *mem; Sampler *sampler; diff --git a/src/cpu/o3/dep_graph.hh b/src/cpu/o3/dep_graph.hh new file mode 100644 index 000000000..f8ae38da4 --- /dev/null +++ b/src/cpu/o3/dep_graph.hh @@ -0,0 +1,213 @@ + +#ifndef __CPU_O3_DEP_GRAPH_HH__ +#define __CPU_O3_DEP_GRAPH_HH__ + +#include "cpu/o3/comm.hh" + +template +class DependencyEntry +{ + public: + DependencyEntry() + : inst(NULL), next(NULL) + { } + + DynInstPtr inst; + //Might want to include data about what arch. register the + //dependence is waiting on. + DependencyEntry *next; +}; + +template +class DependencyGraph +{ + public: + typedef DependencyEntry DepEntry; + + DependencyGraph() + : numEntries(0), memAllocCounter(0), nodesTraversed(0), nodesRemoved(0) + { } + + void resize(int num_entries); + + void reset(); + + void insert(PhysRegIndex idx, DynInstPtr &new_inst); + + void setInst(PhysRegIndex idx, DynInstPtr &new_inst) + { dependGraph[idx].inst = new_inst; } + + void clearInst(PhysRegIndex idx) + { dependGraph[idx].inst = NULL; } + + void remove(PhysRegIndex idx, DynInstPtr &inst_to_remove); + + DynInstPtr pop(PhysRegIndex idx); + + bool empty(PhysRegIndex idx) { return !dependGraph[idx].next; } + + /** Debugging function to dump out the dependency graph. + */ + void dump(); + + private: + /** Array of linked lists. Each linked list is a list of all the + * instructions that depend upon a given register. The actual + * register's index is used to index into the graph; ie all + * instructions in flight that are dependent upon r34 will be + * in the linked list of dependGraph[34]. + */ + DepEntry *dependGraph; + + int numEntries; + + // Debug variable, remove when done testing. + unsigned memAllocCounter; + + public: + uint64_t nodesTraversed; + uint64_t nodesRemoved; +}; + +template +void +DependencyGraph::resize(int num_entries) +{ + numEntries = num_entries; + dependGraph = new DepEntry[numEntries]; +} + +template +void +DependencyGraph::reset() +{ + // Clear the dependency graph + DepEntry *curr; + DepEntry *prev; + + for (int i = 0; i < numEntries; ++i) { + curr = dependGraph[i].next; + + while (curr) { + memAllocCounter--; + + prev = curr; + curr = prev->next; + prev->inst = NULL; + + delete prev; + } + + if (dependGraph[i].inst) { + dependGraph[i].inst = NULL; + } + + dependGraph[i].next = NULL; + } +} + +template +void +DependencyGraph::insert(PhysRegIndex idx, DynInstPtr &new_inst) +{ + //Add this new, dependent instruction at the head of the dependency + //chain. + + // First create the entry that will be added to the head of the + // dependency chain. + DepEntry *new_entry = new DepEntry; + new_entry->next = dependGraph[idx].next; + new_entry->inst = new_inst; + + // Then actually add it to the chain. + dependGraph[idx].next = new_entry; + + ++memAllocCounter; +} + + +template +void +DependencyGraph::remove(PhysRegIndex idx, + DynInstPtr &inst_to_remove) +{ + DepEntry *prev = &dependGraph[idx]; + DepEntry *curr = dependGraph[idx].next; + + // Make sure curr isn't NULL. Because this instruction is being + // removed from a dependency list, it must have been placed there at + // an earlier time. The dependency chain should not be empty, + // unless the instruction dependent upon it is already ready. + if (curr == NULL) { + return; + } + + nodesRemoved++; + + // Find the instruction to remove within the dependency linked list. + while (curr->inst != inst_to_remove) { + prev = curr; + curr = curr->next; + nodesTraversed++; + + assert(curr != NULL); + } + + // Now remove this instruction from the list. + prev->next = curr->next; + + --memAllocCounter; + + // Could push this off to the destructor of DependencyEntry + curr->inst = NULL; + + delete curr; +} + +template +DynInstPtr +DependencyGraph::pop(PhysRegIndex idx) +{ + DepEntry *node; + node = dependGraph[idx].next; + DynInstPtr inst = NULL; + if (node) { + inst = node->inst; + dependGraph[idx].next = node->next; + node->inst = NULL; + memAllocCounter--; + delete node; + } + return inst; +} + +template +void +DependencyGraph::dump() +{ + DepEntry *curr; + + for (int i = 0; i < numEntries; ++i) + { + curr = &dependGraph[i]; + + if (curr->inst) { + cprintf("dependGraph[%i]: producer: %#x [sn:%lli] consumer: ", + i, curr->inst->readPC(), curr->inst->seqNum); + } else { + cprintf("dependGraph[%i]: No producer. consumer: ", i); + } + + while (curr->next != NULL) { + curr = curr->next; + + cprintf("%#x [sn:%lli] ", + curr->inst->readPC(), curr->inst->seqNum); + } + + cprintf("\n"); + } + cprintf("memAllocCounter: %i\n", memAllocCounter); +} + +#endif // __CPU_O3_DEP_GRAPH_HH__ diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh index 3fcfdc3a1..2b1d93cb7 100644 --- a/src/cpu/o3/fetch.hh +++ b/src/cpu/o3/fetch.hh @@ -29,10 +29,12 @@ #ifndef __CPU_O3_FETCH_HH__ #define __CPU_O3_FETCH_HH__ +#include "arch/utility.hh" #include "base/statistics.hh" #include "base/timebuf.hh" #include "cpu/pc_event.hh" -#include "mem/mem_interface.hh" +#include "mem/packet.hh" +#include "mem/port.hh" #include "sim/eventq.hh" class Sampler; @@ -65,6 +67,32 @@ class DefaultFetch typedef TheISA::MachInst MachInst; typedef TheISA::ExtMachInst ExtMachInst; + class IcachePort : public Port + { + protected: + DefaultFetch *fetch; + + public: + IcachePort(DefaultFetch *_fetch) + : Port(_fetch->name() + "-iport"), fetch(_fetch) + { } + + protected: + virtual Tick recvAtomic(PacketPtr pkt); + + virtual void recvFunctional(PacketPtr pkt); + + virtual void recvStatusChange(Status status); + + virtual void getDeviceAddressRanges(AddrRangeList &resp, + AddrRangeList &snoop) + { resp.clear(); snoop.clear(); } + + virtual bool recvTiming(PacketPtr pkt); + + virtual void recvRetry(); + }; + public: /** Overall fetch status. Used to determine if the CPU can * deschedule itsef due to a lack of activity. @@ -84,8 +112,9 @@ class DefaultFetch TrapPending, QuiescePending, SwitchOut, - IcacheMissStall, - IcacheMissComplete + IcacheWaitResponse, + IcacheRetry, + IcacheAccessComplete }; /** Fetching Policy, Add new policies here.*/ @@ -110,28 +139,6 @@ class DefaultFetch /** List that has the threads organized by priority. */ std::list priorityList; - public: - class CacheCompletionEvent : public Event - { - private: - MemReqPtr req; - /** Pointer to fetch. */ - DefaultFetch *fetch; - /** Thread id. */ -// unsigned threadId; - - public: - /** Constructs a cache completion event, which tells fetch when the - * cache miss is complete. - */ - CacheCompletionEvent(MemReqPtr &_req, DefaultFetch *_fetch); - - /** Processes cache completion event. */ - virtual void process(); - /** Returns the description of the cache completion event. */ - virtual const char *description(); - }; - public: /** DefaultFetch constructor. */ DefaultFetch(Params *params); @@ -161,7 +168,7 @@ class DefaultFetch void initStage(); /** Processes cache completion event. */ - void processCacheCompletion(MemReqPtr &req); + void processCacheCompletion(PacketPtr pkt); void switchOut(); @@ -295,8 +302,10 @@ class DefaultFetch /** Wire used to write any information heading to decode. */ typename TimeBuffer::wire toDecode; + MemObject *mem; + /** Icache interface. */ - MemInterface *icacheInterface; + IcachePort *icachePort; /** BPredUnit. */ BPredUnit branchPred; @@ -305,8 +314,8 @@ class DefaultFetch Addr nextPC[Impl::MaxThreads]; - /** Memory request used to access cache. */ - MemReqPtr memReq[Impl::MaxThreads]; + /** Memory packet used to access cache. */ + PacketPtr memPkt[Impl::MaxThreads]; /** Variable that tracks if fetch has written to the time buffer this * cycle. Used to tell CPU if there is activity this cycle. diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh index 1c5e508f6..a80afbcf4 100644 --- a/src/cpu/o3/fetch_impl.hh +++ b/src/cpu/o3/fetch_impl.hh @@ -27,12 +27,13 @@ */ #include "arch/isa_traits.hh" +#include "arch/utility.hh" #include "cpu/exetrace.hh" #include "cpu/o3/fetch.hh" -#include "mem/base_mem.hh" -#include "mem/mem_interface.hh" -#include "mem/mem_req.hh" +#include "mem/packet.hh" +#include "mem/request.hh" #include "sim/byteswap.hh" +#include "sim/host.hh" #include "sim/root.hh" #if FULL_SYSTEM @@ -42,42 +43,67 @@ #include "mem/functional/memory_control.hh" #include "mem/functional/physical.hh" #include "sim/system.hh" -#else // !FULL_SYSTEM -#include "mem/functional/functional.hh" #endif // FULL_SYSTEM #include using namespace std; +using namespace TheISA; template -DefaultFetch::CacheCompletionEvent::CacheCompletionEvent(MemReqPtr &_req, - DefaultFetch *_fetch) - : Event(&mainEventQueue, Delayed_Writeback_Pri), - req(_req), - fetch(_fetch) +Tick +DefaultFetch::IcachePort::recvAtomic(PacketPtr pkt) { - this->setFlags(Event::AutoDelete); + panic("DefaultFetch doesn't expect recvAtomic callback!"); + return curTick; } template void -DefaultFetch::CacheCompletionEvent::process() +DefaultFetch::IcachePort::recvFunctional(PacketPtr pkt) +{ + panic("DefaultFetch doesn't expect recvFunctional callback!"); +} + +template +void +DefaultFetch::IcachePort::recvStatusChange(Status status) +{ + if (status == RangeChange) + return; + + panic("DefaultFetch doesn't expect recvStatusChange callback!"); +} + +template +bool +DefaultFetch::IcachePort::recvTiming(Packet *pkt) { - fetch->processCacheCompletion(req); + fetch->processCacheCompletion(pkt); + return true; } template -const char * -DefaultFetch::CacheCompletionEvent::description() +void +DefaultFetch::IcachePort::recvRetry() { - return "DefaultFetch cache completion event"; + panic("DefaultFetch doesn't support retry yet."); + // we shouldn't get a retry unless we have a packet that we're + // waiting to transmit +/* + assert(cpu->dcache_pkt != NULL); + assert(cpu->_status == DcacheRetry); + Packet *tmp = cpu->dcache_pkt; + if (sendTiming(tmp)) { + cpu->_status = DcacheWaitResponse; + cpu->dcache_pkt = NULL; + } +*/ } template DefaultFetch::DefaultFetch(Params *params) - : icacheInterface(params->icacheInterface), - branchPred(params), + : branchPred(params), decodeToFetchDelay(params->decodeToFetchDelay), renameToFetchDelay(params->renameToFetchDelay), iewToFetchDelay(params->iewToFetchDelay), @@ -122,7 +148,7 @@ DefaultFetch::DefaultFetch(Params *params) } // Size of cache block. - cacheBlkSize = icacheInterface ? icacheInterface->getBlockSize() : 64; + cacheBlkSize = 64; // Create mask to get rid of offset bits. cacheBlkMask = (cacheBlkSize - 1); @@ -133,8 +159,7 @@ DefaultFetch::DefaultFetch(Params *params) priorityList.push_back(tid); - // Create a new memory request. - memReq[tid] = NULL; + memPkt[tid] = NULL; // Create space to store a cache line. cacheData[tid] = new uint8_t[cacheBlkSize]; @@ -253,6 +278,9 @@ DefaultFetch::setCPU(FullCPU *cpu_ptr) DPRINTF(Fetch, "Setting the CPU pointer.\n"); cpu = cpu_ptr; + // Name is finally available, so create the port. + icachePort = new IcachePort(this); + // Fetch needs to start fetching instructions at the very beginning, // so it must start up in active state. switchToActive(); @@ -315,9 +343,9 @@ DefaultFetch::initStage() template void -DefaultFetch::processCacheCompletion(MemReqPtr &req) +DefaultFetch::processCacheCompletion(PacketPtr pkt) { - unsigned tid = req->thread_num; + unsigned tid = pkt->req->getThreadNum(); DPRINTF(Fetch, "[tid:%u] Waking up from cache miss.\n",tid); @@ -325,10 +353,11 @@ DefaultFetch::processCacheCompletion(MemReqPtr &req) // to return. // Can keep track of how many cache accesses go unused due to // misspeculation here. - if (fetchStatus[tid] != IcacheMissStall || - req != memReq[tid] || + if (fetchStatus[tid] != IcacheWaitResponse || + pkt != memPkt[tid] || isSwitchedOut()) { ++fetchIcacheSquashes; + delete pkt; return; } @@ -341,17 +370,19 @@ DefaultFetch::processCacheCompletion(MemReqPtr &req) switchToActive(); - // Only switch to IcacheMissComplete if we're not stalled as well. + // Only switch to IcacheAccessComplete if we're not stalled as well. if (checkStall(tid)) { fetchStatus[tid] = Blocked; } else { - fetchStatus[tid] = IcacheMissComplete; + fetchStatus[tid] = IcacheAccessComplete; } // memcpy(cacheData[tid], memReq[tid]->data, memReq[tid]->size); // Reset the mem req to NULL. - memReq[tid] = NULL; + delete pkt->req; + delete pkt; + memPkt[tid] = NULL; } template @@ -475,18 +506,15 @@ DefaultFetch::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid // Setup the memReq to do a read of the first instruction's address. // Set the appropriate read size and flags as well. - memReq[tid] = new MemReq(); + // Build request here. + RequestPtr mem_req = new Request(tid, fetch_PC, cacheBlkSize, flags, + fetch_PC, cpu->readCpuId(), tid); - memReq[tid]->asid = tid; - memReq[tid]->thread_num = tid; - memReq[tid]->data = new uint8_t[64]; - memReq[tid]->xc = cpu->xcBase(tid); - memReq[tid]->cmd = Read; - memReq[tid]->reset(fetch_PC, cacheBlkSize, flags); + memPkt[tid] = NULL; // Translate the instruction request. //#if FULL_SYSTEM - fault = cpu->translateInstReq(memReq[tid]); + fault = cpu->translateInstReq(mem_req); //#else // fault = pTable->translate(memReq[tid]); //#endif @@ -508,48 +536,31 @@ DefaultFetch::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid } #endif + // Build packet here. + PacketPtr data_pkt = new Packet(mem_req, + Packet::ReadReq, Packet::Broadcast); + data_pkt->dataStatic(cacheData[tid]); + DPRINTF(Fetch, "Fetch: Doing instruction read.\n"); - fault = cpu->mem->read(memReq[tid], cacheData[tid]); - // This read may change when the mem interface changes. + + fetchedCacheLines++; // Now do the timing access to see whether or not the instruction // exists within the cache. - if (icacheInterface && !icacheInterface->isBlocked()) { - DPRINTF(Fetch, "Doing cache access.\n"); - - memReq[tid]->completionEvent = NULL; - - memReq[tid]->time = curTick; - - MemAccessResult result = icacheInterface->access(memReq[tid]); - - fetchedCacheLines++; - - // If the cache missed, then schedule an event to wake - // up this stage once the cache miss completes. - // @todo: Possibly allow for longer than 1 cycle cache hits. - if (result != MA_HIT && icacheInterface->doEvents()) { - - memReq[tid]->completionEvent = - new CacheCompletionEvent(memReq[tid], this); - - lastIcacheStall[tid] = curTick; - - DPRINTF(Activity, "[tid:%i]: Activity: Stalling due to I-cache " - "miss.\n", tid); - - fetchStatus[tid] = IcacheMissStall; - } else { - DPRINTF(Fetch, "[tid:%i]: I-Cache hit. Doing Instruction " - "read.\n", tid); - -// memcpy(cacheData[tid], memReq[tid]->data, memReq[tid]->size); - } - } else { + if (!icachePort->sendTiming(data_pkt)) { DPRINTF(Fetch, "[tid:%i] Out of MSHRs!\n", tid); ret_fault = NoFault; return false; } + + DPRINTF(Fetch, "Doing cache access.\n"); + + lastIcacheStall[tid] = curTick; + + DPRINTF(Activity, "[tid:%i]: Activity: Waiting on I-cache " + "response.\n", tid); + + fetchStatus[tid] = IcacheWaitResponse; } ret_fault = fault; @@ -567,10 +578,11 @@ DefaultFetch::doSquash(const Addr &new_PC, unsigned tid) nextPC[tid] = new_PC + instSize; // Clear the icache miss if it's outstanding. - if (fetchStatus[tid] == IcacheMissStall && icacheInterface) { + if (fetchStatus[tid] == IcacheWaitResponse) { DPRINTF(Fetch, "[tid:%i]: Squashing outstanding Icache miss.\n", tid); - memReq[tid] = NULL; + delete memPkt[tid]; + memPkt[tid] = NULL; } fetchStatus[tid] = Squashing; @@ -632,12 +644,12 @@ DefaultFetch::updateFetchStatus() if (fetchStatus[tid] == Running || fetchStatus[tid] == Squashing || - fetchStatus[tid] == IcacheMissComplete) { + fetchStatus[tid] == IcacheAccessComplete) { if (_status == Inactive) { DPRINTF(Activity, "[tid:%i]: Activating stage.\n",tid); - if (fetchStatus[tid] == IcacheMissComplete) { + if (fetchStatus[tid] == IcacheAccessComplete) { DPRINTF(Activity, "[tid:%i]: Activating fetch due to cache" "completion\n",tid); } @@ -831,7 +843,7 @@ DefaultFetch::checkSignalsAndUpdate(unsigned tid) } } - if (checkStall(tid) && fetchStatus[tid] != IcacheMissStall) { + if (checkStall(tid) && fetchStatus[tid] != IcacheWaitResponse) { DPRINTF(Fetch, "[tid:%i]: Setting to blocked\n",tid); fetchStatus[tid] = Blocked; @@ -882,7 +894,7 @@ DefaultFetch::fetch(bool &status_change) // If returning from the delay of a cache miss, then update the status // to running, otherwise do the cache access. Possibly move this up // to tick() function. - if (fetchStatus[tid] == IcacheMissComplete) { + if (fetchStatus[tid] == IcacheAccessComplete) { DPRINTF(Fetch, "[tid:%i]: Icache miss is complete.\n", tid); @@ -905,11 +917,11 @@ DefaultFetch::fetch(bool &status_change) ++fetchBlockedCycles; } else if (fetchStatus[tid] == Squashing) { ++fetchSquashCycles; - } else if (fetchStatus[tid] == IcacheMissStall) { + } else if (fetchStatus[tid] == IcacheWaitResponse) { ++icacheStallCycles; } - // Status is Idle, Squashing, Blocked, or IcacheMissStall, so + // Status is Idle, Squashing, Blocked, or IcacheWaitResponse, so // fetch should do nothing. return; } @@ -917,7 +929,7 @@ DefaultFetch::fetch(bool &status_change) ++fetchCycles; // If we had a stall due to an icache miss, then return. - if (fetchStatus[tid] == IcacheMissStall) { + if (fetchStatus[tid] == IcacheWaitResponse) { ++icacheStallCycles; status_change = true; return; @@ -1026,7 +1038,7 @@ DefaultFetch::fetch(bool &status_change) } else { // We shouldn't be in an icache miss and also have a fault (an ITB // miss) - if (fetchStatus[tid] == IcacheMissStall) { + if (fetchStatus[tid] == IcacheWaitResponse) { panic("Fetch should have exited prior to this!"); } @@ -1107,7 +1119,7 @@ DefaultFetch::getFetchingThread(FetchPriority &fetch_priority) int tid = *((*activeThreads).begin()); if (fetchStatus[tid] == Running || - fetchStatus[tid] == IcacheMissComplete || + fetchStatus[tid] == IcacheAccessComplete || fetchStatus[tid] == Idle) { return tid; } else { @@ -1133,7 +1145,7 @@ DefaultFetch::roundRobin() assert(high_pri <= numThreads); if (fetchStatus[high_pri] == Running || - fetchStatus[high_pri] == IcacheMissComplete || + fetchStatus[high_pri] == IcacheAccessComplete || fetchStatus[high_pri] == Idle) { priorityList.erase(pri_iter); @@ -1167,7 +1179,7 @@ DefaultFetch::iqCount() unsigned high_pri = PQ.top(); if (fetchStatus[high_pri] == Running || - fetchStatus[high_pri] == IcacheMissComplete || + fetchStatus[high_pri] == IcacheAccessComplete || fetchStatus[high_pri] == Idle) return high_pri; else @@ -1198,7 +1210,7 @@ DefaultFetch::lsqCount() unsigned high_pri = PQ.top(); if (fetchStatus[high_pri] == Running || - fetchStatus[high_pri] == IcacheMissComplete || + fetchStatus[high_pri] == IcacheAccessComplete || fetchStatus[high_pri] == Idle) return high_pri; else diff --git a/src/cpu/o3/free_list.hh b/src/cpu/o3/free_list.hh index 29e84cd44..daf1007c1 100644 --- a/src/cpu/o3/free_list.hh +++ b/src/cpu/o3/free_list.hh @@ -33,6 +33,7 @@ #include #include "arch/isa_traits.hh" +#include "base/misc.hh" #include "base/trace.hh" #include "base/traceflags.hh" #include "cpu/o3/comm.hh" diff --git a/src/cpu/o3/fu_pool.cc b/src/cpu/o3/fu_pool.cc new file mode 100644 index 000000000..fb2b5c00d --- /dev/null +++ b/src/cpu/o3/fu_pool.cc @@ -0,0 +1,295 @@ +/* + * Copyright (c) 2002-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include "cpu/o3/fu_pool.hh" +#include "encumbered/cpu/full/fu_pool.hh" +#include "sim/builder.hh" + +using namespace std; + +//////////////////////////////////////////////////////////////////////////// +// +// A pool of function units +// + +inline void +FUPool::FUIdxQueue::addFU(int fu_idx) +{ + funcUnitsIdx.push_back(fu_idx); + ++size; +} + +inline int +FUPool::FUIdxQueue::getFU() +{ + int retval = funcUnitsIdx[idx++]; + + if (idx == size) + idx = 0; + + return retval; +} + +FUPool::~FUPool() +{ + fuListIterator i = funcUnits.begin(); + fuListIterator end = funcUnits.end(); + for (; i != end; ++i) + delete *i; +} + + +// Constructor +FUPool::FUPool(string name, vector paramList) + : SimObject(name) +{ + numFU = 0; + + funcUnits.clear(); + + for (int i = 0; i < Num_OpClasses; ++i) { + maxOpLatencies[i] = 0; + maxIssueLatencies[i] = 0; + } + + // + // Iterate through the list of FUDescData structures + // + for (FUDDiterator i = paramList.begin(); i != paramList.end(); ++i) { + + // + // Don't bother with this if we're not going to create any FU's + // + if ((*i)->number) { + // + // Create the FuncUnit object from this structure + // - add the capabilities listed in the FU's operation + // description + // + // We create the first unit, then duplicate it as needed + // + FuncUnit *fu = new FuncUnit; + + OPDDiterator j = (*i)->opDescList.begin(); + OPDDiterator end = (*i)->opDescList.end(); + for (; j != end; ++j) { + // indicate that this pool has this capability + capabilityList.set((*j)->opClass); + + // Add each of the FU's that will have this capability to the + // appropriate queue. + for (int k = 0; k < (*i)->number; ++k) + fuPerCapList[(*j)->opClass].addFU(numFU + k); + + // indicate that this FU has the capability + fu->addCapability((*j)->opClass, (*j)->opLat, (*j)->issueLat); + + if ((*j)->opLat > maxOpLatencies[(*j)->opClass]) + maxOpLatencies[(*j)->opClass] = (*j)->opLat; + + if ((*j)->issueLat > maxIssueLatencies[(*j)->opClass]) + maxIssueLatencies[(*j)->opClass] = (*j)->issueLat; + } + + numFU++; + + // Add the appropriate number of copies of this FU to the list + ostringstream s; + + s << (*i)->name() << "(0)"; + fu->name = s.str(); + funcUnits.push_back(fu); + + for (int c = 1; c < (*i)->number; ++c) { + ostringstream s; + numFU++; + FuncUnit *fu2 = new FuncUnit(*fu); + + s << (*i)->name() << "(" << c << ")"; + fu2->name = s.str(); + funcUnits.push_back(fu2); + } + } + } + + unitBusy.resize(numFU); + + for (int i = 0; i < numFU; i++) { + unitBusy[i] = false; + } +} + +void +FUPool::annotateMemoryUnits(unsigned hit_latency) +{ + maxOpLatencies[MemReadOp] = hit_latency; + + fuListIterator i = funcUnits.begin(); + fuListIterator iend = funcUnits.end(); + for (; i != iend; ++i) { + if ((*i)->provides(MemReadOp)) + (*i)->opLatency(MemReadOp) = hit_latency; + + if ((*i)->provides(MemWriteOp)) + (*i)->opLatency(MemWriteOp) = hit_latency; + } +} + +int +FUPool::getUnit(OpClass capability) +{ + // If this pool doesn't have the specified capability, + // return this information to the caller + if (!capabilityList[capability]) + return -2; + + int fu_idx = fuPerCapList[capability].getFU(); + int start_idx = fu_idx; + + // Iterate through the circular queue if needed, stopping if we've reached + // the first element again. + while (unitBusy[fu_idx]) { + fu_idx = fuPerCapList[capability].getFU(); + if (fu_idx == start_idx) { + // No FU available + return -1; + } + } + + unitBusy[fu_idx] = true; + + return fu_idx; +} + +void +FUPool::freeUnitNextCycle(int fu_idx) +{ + assert(unitBusy[fu_idx]); + unitsToBeFreed.push_back(fu_idx); +} + +void +FUPool::processFreeUnits() +{ + while (!unitsToBeFreed.empty()) { + int fu_idx = unitsToBeFreed.back(); + unitsToBeFreed.pop_back(); + + assert(unitBusy[fu_idx]); + + unitBusy[fu_idx] = false; + } +} + +void +FUPool::dump() +{ + cout << "Function Unit Pool (" << name() << ")\n"; + cout << "======================================\n"; + cout << "Free List:\n"; + + for (int i = 0; i < numFU; ++i) { + if (unitBusy[i]) { + continue; + } + + cout << " [" << i << "] : "; + + cout << funcUnits[i]->name << " "; + + cout << "\n"; + } + + cout << "======================================\n"; + cout << "Busy List:\n"; + for (int i = 0; i < numFU; ++i) { + if (!unitBusy[i]) { + continue; + } + + cout << " [" << i << "] : "; + + cout << funcUnits[i]->name << " "; + + cout << "\n"; + } +} + +void +FUPool::switchOut() +{ +} + +void +FUPool::takeOverFrom() +{ + for (int i = 0; i < numFU; i++) { + unitBusy[i] = false; + } + unitsToBeFreed.clear(); +} + +// + +//////////////////////////////////////////////////////////////////////////// +// +// The SimObjects we use to get the FU information into the simulator +// +//////////////////////////////////////////////////////////////////////////// + +// +// FUPool - Contails a list of FUDesc objects to make available +// + +// +// The FuPool object +// + +BEGIN_DECLARE_SIM_OBJECT_PARAMS(FUPool) + + SimObjectVectorParam FUList; + +END_DECLARE_SIM_OBJECT_PARAMS(FUPool) + + +BEGIN_INIT_SIM_OBJECT_PARAMS(FUPool) + + INIT_PARAM(FUList, "list of FU's for this pool") + +END_INIT_SIM_OBJECT_PARAMS(FUPool) + + +CREATE_SIM_OBJECT(FUPool) +{ + return new FUPool(getInstanceName(), FUList); +} + +REGISTER_SIM_OBJECT("FUPool", FUPool) + diff --git a/src/cpu/o3/fu_pool.hh b/src/cpu/o3/fu_pool.hh new file mode 100644 index 000000000..f590c4149 --- /dev/null +++ b/src/cpu/o3/fu_pool.hh @@ -0,0 +1,162 @@ +/* + * Copyright (c) 2002-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_O3_FU_POOL_HH__ +#define __CPU_O3_FU_POOL_HH__ + +#include +#include +#include +#include + +#include "base/sched_list.hh" +#include "cpu/op_class.hh" +#include "sim/sim_object.hh" + +class FUDesc; +class FuncUnit; + +/** + * Pool of FU's, specific to the new CPU model. The old FU pool had lists of + * free units and busy units, and whenever a FU was needed it would iterate + * through the free units to find a FU that provided the capability. This pool + * has lists of units specific to each of the capabilities, and whenever a FU + * is needed, it iterates through that list to find a free unit. The previous + * FU pool would have to be ticked each cycle to update which units became + * free. This FU pool lets the IEW stage handle freeing units, which frees + * them as their scheduled execution events complete. This limits units in this + * model to either have identical issue and op latencies, or 1 cycle issue + * latencies. + */ +class FUPool : public SimObject +{ + private: + /** Maximum op execution latencies, per op class. */ + unsigned maxOpLatencies[Num_OpClasses]; + /** Maximum issue latencies, per op class. */ + unsigned maxIssueLatencies[Num_OpClasses]; + + /** Bitvector listing capabilities of this FU pool. */ + std::bitset capabilityList; + + /** Bitvector listing which FUs are busy. */ + std::vector unitBusy; + + /** List of units to be freed at the end of this cycle. */ + std::vector unitsToBeFreed; + + /** + * Class that implements a circular queue to hold FU indices. The hope is + * that FUs that have been just used will be moved to the end of the queue + * by iterating through it, thus leaving free units at the head of the + * queue. + */ + class FUIdxQueue { + public: + /** Constructs a circular queue of FU indices. */ + FUIdxQueue() + : idx(0), size(0) + { } + + /** Adds a FU to the queue. */ + inline void addFU(int fu_idx); + + /** Returns the index of the FU at the head of the queue, and changes + * the index to the next element. + */ + inline int getFU(); + + private: + /** Circular queue index. */ + int idx; + + /** Size of the queue. */ + int size; + + /** Queue of FU indices. */ + std::vector funcUnitsIdx; + }; + + /** Per op class queues of FUs that provide that capability. */ + FUIdxQueue fuPerCapList[Num_OpClasses]; + + /** Number of FUs. */ + int numFU; + + /** Functional units. */ + std::vector funcUnits; + + typedef std::vector::iterator fuListIterator; + + public: + + /** Constructs a FU pool. */ + FUPool(std::string name, std::vector l); + ~FUPool(); + + /** Annotates units that provide memory operations. Included only because + * old FU pool provided this function. + */ + void annotateMemoryUnits(unsigned hit_latency); + + /** + * Gets a FU providing the requested capability. Will mark the unit as busy, + * but leaves the freeing of the unit up to the IEW stage. + * @param capability The capability requested. + * @return Returns -2 if the FU pool does not have the capability, -1 if + * there is no free FU, and the FU's index otherwise. + */ + int getUnit(OpClass capability); + + /** Frees a FU at the end of this cycle. */ + void freeUnitNextCycle(int fu_idx); + + /** Frees all FUs on the list. */ + void processFreeUnits(); + + /** Returns the total number of FUs. */ + int size() { return numFU; } + + /** Debugging function used to dump FU information. */ + void dump(); + + /** Returns the operation execution latency of the given capability. */ + unsigned getOpLatency(OpClass capability) { + return maxOpLatencies[capability]; + } + + /** Returns the issue latency of the given capability. */ + unsigned getIssueLatency(OpClass capability) { + return maxIssueLatencies[capability]; + } + + void switchOut(); + void takeOverFrom(); +}; + +#endif // __CPU_O3_FU_POOL_HH__ diff --git a/src/cpu/o3/iew.hh b/src/cpu/o3/iew.hh index 935320628..c931669c6 100644 --- a/src/cpu/o3/iew.hh +++ b/src/cpu/o3/iew.hh @@ -110,25 +110,6 @@ class DefaultIEW /** Writeback status. */ StageStatus wbStatus; - public: - /** LdWriteback event for a load completion. */ - class LdWritebackEvent : public Event { - private: - /** Instruction that is writing back data to the register file. */ - DynInstPtr inst; - /** Pointer to IEW stage. */ - DefaultIEW *iewStage; - - public: - /** Constructs a load writeback event. */ - LdWritebackEvent(DynInstPtr &_inst, DefaultIEW *_iew); - - /** Processes writeback event. */ - virtual void process(); - /** Returns the description of the writeback event. */ - virtual const char *description(); - }; - public: /** Constructs a DefaultIEW with the given parameters. */ DefaultIEW(Params *params); diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh index b0137d7fc..955ebfdf3 100644 --- a/src/cpu/o3/iew_impl.hh +++ b/src/cpu/o3/iew_impl.hh @@ -38,58 +38,6 @@ using namespace std; -template -DefaultIEW::LdWritebackEvent::LdWritebackEvent(DynInstPtr &_inst, - DefaultIEW *_iew) - : Event(&mainEventQueue), inst(_inst), iewStage(_iew) -{ - this->setFlags(Event::AutoDelete); -} - -template -void -DefaultIEW::LdWritebackEvent::process() -{ - DPRINTF(IEW, "Load writeback event [sn:%lli]\n", inst->seqNum); - DPRINTF(Activity, "Activity: Ld Writeback event [sn:%lli]\n", inst->seqNum); - - //iewStage->ldstQueue.removeMSHR(inst->threadNumber,inst->seqNum); - - if (iewStage->isSwitchedOut()) { - inst = NULL; - return; - } else if (inst->isSquashed()) { - iewStage->wakeCPU(); - inst = NULL; - return; - } - - iewStage->wakeCPU(); - - if (!inst->isExecuted()) { - inst->setExecuted(); - - // Complete access to copy data to proper place. - if (inst->isStore()) { - inst->completeAcc(); - } - } - - // Need to insert instruction into queue to commit - iewStage->instToCommit(inst); - - iewStage->activityThisCycle(); - - inst = NULL; -} - -template -const char * -DefaultIEW::LdWritebackEvent::description() -{ - return "Load writeback event"; -} - template DefaultIEW::DefaultIEW(Params *params) : // @todo: Make this into a parameter. @@ -1280,7 +1228,7 @@ DefaultIEW::executeInsts() ldstQueue.executeStore(inst); // If the store had a fault then it may not have a mem req - if (inst->req && !(inst->req->flags & LOCKED)) { + if (inst->req && !(inst->req->getFlags() & LOCKED)) { inst->setExecuted(); instToCommit(inst); @@ -1556,7 +1504,7 @@ DefaultIEW::updateExeInstStats(DynInstPtr &inst) else iewExecutedInsts++; #else - iewExecutedInsts[thread_number]++; + iewExecutedInsts++; #endif // diff --git a/src/cpu/o3/inst_queue.hh b/src/cpu/o3/inst_queue.hh index 518de73d9..843f6a8fe 100644 --- a/src/cpu/o3/inst_queue.hh +++ b/src/cpu/o3/inst_queue.hh @@ -38,7 +38,7 @@ #include "base/timebuf.hh" #include "cpu/inst_seq.hh" #include "cpu/o3/dep_graph.hh" -#include "encumbered/cpu/full/op_class.hh" +#include "cpu/op_class.hh" #include "sim/host.hh" class FUPool; diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh index f1dc4e01f..4fa756cb6 100644 --- a/src/cpu/o3/inst_queue_impl.hh +++ b/src/cpu/o3/inst_queue_impl.hh @@ -64,8 +64,7 @@ InstructionQueue::FUCompletion::description() template InstructionQueue::InstructionQueue(Params *params) - : dcacheInterface(params->dcacheInterface), - fuPool(params->fuPool), + : fuPool(params->fuPool), numEntries(params->numIQEntries), totalWidth(params->issueWidth), numPhysIntRegs(params->numPhysIntRegs), diff --git a/src/cpu/o3/lsq.cc b/src/cpu/o3/lsq.cc new file mode 100644 index 000000000..8991ab8f8 --- /dev/null +++ b/src/cpu/o3/lsq.cc @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cpu/o3/alpha_dyn_inst.hh" +#include "cpu/o3/alpha_cpu.hh" +#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/lsq_impl.hh" + +// Force the instantiation of LDSTQ for all the implementations we care about. +template class LSQ; + diff --git a/src/cpu/o3/lsq.hh b/src/cpu/o3/lsq.hh new file mode 100644 index 000000000..51eb23cd7 --- /dev/null +++ b/src/cpu/o3/lsq.hh @@ -0,0 +1,324 @@ +/* + * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_O3_LSQ_HH__ +#define __CPU_O3_LSQ_HH__ + +#include +#include + +#include "config/full_system.hh" +#include "cpu/inst_seq.hh" +//#include "cpu/o3/cpu_policy.hh" +#include "cpu/o3/lsq_unit.hh" +#include "mem/port.hh" +//#include "mem/page_table.hh" +#include "sim/sim_object.hh" + +template +class LSQ { + public: + typedef typename Impl::Params Params; + typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::DynInstPtr DynInstPtr; + typedef typename Impl::CPUPol::IEW IEW; + typedef typename Impl::CPUPol::LSQUnit LSQUnit; + + enum LSQPolicy { + Dynamic, + Partitioned, + Threshold + }; + + /** Constructs an LSQ with the given parameters. */ + LSQ(Params *params); + + /** Returns the name of the LSQ. */ + std::string name() const; + + /** Sets the pointer to the list of active threads. */ + void setActiveThreads(std::list *at_ptr); + /** Sets the CPU pointer. */ + void setCPU(FullCPU *cpu_ptr); + /** Sets the IEW stage pointer. */ + void setIEW(IEW *iew_ptr); + /** Sets the page table pointer. */ +// void setPageTable(PageTable *pt_ptr); + + void switchOut(); + void takeOverFrom(); + + /** Number of entries needed for the given amount of threads.*/ + int entryAmount(int num_threads); + void removeEntries(unsigned tid); + /** Reset the max entries for each thread. */ + void resetEntries(); + /** Resize the max entries for a thread. */ + void resizeEntries(unsigned size, unsigned tid); + + /** Ticks the LSQ. */ + void tick(); + /** Ticks a specific LSQ Unit. */ + void tick(unsigned tid) + { thread[tid].tick(); } + + /** Inserts a load into the LSQ. */ + void insertLoad(DynInstPtr &load_inst); + /** Inserts a store into the LSQ. */ + void insertStore(DynInstPtr &store_inst); + + /** Executes a load. */ + Fault executeLoad(DynInstPtr &inst); + + Fault executeLoad(int lq_idx, unsigned tid) + { return thread[tid].executeLoad(lq_idx); } + + /** Executes a store. */ + Fault executeStore(DynInstPtr &inst); + + /** + * Commits loads up until the given sequence number for a specific thread. + */ + void commitLoads(InstSeqNum &youngest_inst, unsigned tid) + { thread[tid].commitLoads(youngest_inst); } + + /** + * Commits stores up until the given sequence number for a specific thread. + */ + void commitStores(InstSeqNum &youngest_inst, unsigned tid) + { thread[tid].commitStores(youngest_inst); } + + /** + * Attempts to write back stores until all cache ports are used or the + * interface becomes blocked. + */ + void writebackStores(); + /** Same as above, but only for one thread. */ + void writebackStores(unsigned tid); + + /** + * Squash instructions from a thread until the specified sequence number. + */ + void squash(const InstSeqNum &squashed_num, unsigned tid) + { thread[tid].squash(squashed_num); } + + /** Returns whether or not there was a memory ordering violation. */ + bool violation(); + /** + * Returns whether or not there was a memory ordering violation for a + * specific thread. + */ + bool violation(unsigned tid) + { return thread[tid].violation(); } + + /** Returns if a load is blocked due to the memory system for a specific + * thread. + */ + bool loadBlocked(unsigned tid) + { return thread[tid].loadBlocked(); } + + bool isLoadBlockedHandled(unsigned tid) + { return thread[tid].isLoadBlockedHandled(); } + + void setLoadBlockedHandled(unsigned tid) + { thread[tid].setLoadBlockedHandled(); } + + /** Gets the instruction that caused the memory ordering violation. */ + DynInstPtr getMemDepViolator(unsigned tid) + { return thread[tid].getMemDepViolator(); } + + /** Returns the head index of the load queue for a specific thread. */ + int getLoadHead(unsigned tid) + { return thread[tid].getLoadHead(); } + + /** Returns the sequence number of the head of the load queue. */ + InstSeqNum getLoadHeadSeqNum(unsigned tid) + { + return thread[tid].getLoadHeadSeqNum(); + } + + /** Returns the head index of the store queue. */ + int getStoreHead(unsigned tid) + { return thread[tid].getStoreHead(); } + + /** Returns the sequence number of the head of the store queue. */ + InstSeqNum getStoreHeadSeqNum(unsigned tid) + { + return thread[tid].getStoreHeadSeqNum(); + } + + /** Returns the number of instructions in all of the queues. */ + int getCount(); + /** Returns the number of instructions in the queues of one thread. */ + int getCount(unsigned tid) + { return thread[tid].getCount(); } + + /** Returns the total number of loads in the load queue. */ + int numLoads(); + /** Returns the total number of loads for a single thread. */ + int numLoads(unsigned tid) + { return thread[tid].numLoads(); } + + /** Returns the total number of stores in the store queue. */ + int numStores(); + /** Returns the total number of stores for a single thread. */ + int numStores(unsigned tid) + { return thread[tid].numStores(); } + + /** Returns the total number of loads that are ready. */ + int numLoadsReady(); + /** Returns the number of loads that are ready for a single thread. */ + int numLoadsReady(unsigned tid) + { return thread[tid].numLoadsReady(); } + + /** Returns the number of free entries. */ + unsigned numFreeEntries(); + /** Returns the number of free entries for a specific thread. */ + unsigned numFreeEntries(unsigned tid); + + /** Returns if the LSQ is full (either LQ or SQ is full). */ + bool isFull(); + /** + * Returns if the LSQ is full for a specific thread (either LQ or SQ is + * full). + */ + bool isFull(unsigned tid); + + /** Returns if any of the LQs are full. */ + bool lqFull(); + /** Returns if the LQ of a given thread is full. */ + bool lqFull(unsigned tid); + + /** Returns if any of the SQs are full. */ + bool sqFull(); + /** Returns if the SQ of a given thread is full. */ + bool sqFull(unsigned tid); + + /** + * Returns if the LSQ is stalled due to a memory operation that must be + * replayed. + */ + bool isStalled(); + /** + * Returns if the LSQ of a specific thread is stalled due to a memory + * operation that must be replayed. + */ + bool isStalled(unsigned tid); + + /** Returns whether or not there are any stores to write back to memory. */ + bool hasStoresToWB(); + + /** Returns whether or not a specific thread has any stores to write back + * to memory. + */ + bool hasStoresToWB(unsigned tid) + { return thread[tid].hasStoresToWB(); } + + /** Returns the number of stores a specific thread has to write back. */ + int numStoresToWB(unsigned tid) + { return thread[tid].numStoresToWB(); } + + /** Returns if the LSQ will write back to memory this cycle. */ + bool willWB(); + /** Returns if the LSQ of a specific thread will write back to memory this + * cycle. + */ + bool willWB(unsigned tid) + { return thread[tid].willWB(); } + + /** Debugging function to print out all instructions. */ + void dumpInsts(); + /** Debugging function to print out instructions from a specific thread. */ + void dumpInsts(unsigned tid) + { thread[tid].dumpInsts(); } + + /** Executes a read operation, using the load specified at the load index. */ + template + Fault read(RequestPtr req, T &data, int load_idx); + + /** Executes a store operation, using the store specified at the store + * index. + */ + template + Fault write(RequestPtr req, T &data, int store_idx); + + private: + /** The LSQ policy for SMT mode. */ + LSQPolicy lsqPolicy; + + /** The LSQ units for individual threads. */ + LSQUnit thread[Impl::MaxThreads]; + + /** The CPU pointer. */ + FullCPU *cpu; + + /** The IEW stage pointer. */ + IEW *iewStage; + + /** The pointer to the page table. */ +// PageTable *pTable; + + /** List of Active Threads in System. */ + std::list *activeThreads; + + /** Total Size of LQ Entries. */ + unsigned LQEntries; + /** Total Size of SQ Entries. */ + unsigned SQEntries; + + /** Max LQ Size - Used to Enforce Sharing Policies. */ + unsigned maxLQEntries; + + /** Max SQ Size - Used to Enforce Sharing Policies. */ + unsigned maxSQEntries; + + /** Number of Threads. */ + unsigned numThreads; +}; + +template +template +Fault +LSQ::read(RequestPtr req, T &data, int load_idx) +{ + unsigned tid = req->getThreadNum(); + + return thread[tid].read(req, data, load_idx); +} + +template +template +Fault +LSQ::write(RequestPtr req, T &data, int store_idx) +{ + unsigned tid = req->getThreadNum(); + + return thread[tid].write(req, data, store_idx); +} + +#endif // __CPU_O3_LSQ_HH__ diff --git a/src/cpu/o3/lsq_impl.hh b/src/cpu/o3/lsq_impl.hh new file mode 100644 index 000000000..a6ad27522 --- /dev/null +++ b/src/cpu/o3/lsq_impl.hh @@ -0,0 +1,538 @@ +/* + * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +#include "cpu/o3/lsq.hh" + +using namespace std; + +template +LSQ::LSQ(Params *params) + : LQEntries(params->LQEntries), SQEntries(params->SQEntries), + numThreads(params->numberOfThreads) +{ + DPRINTF(LSQ, "Creating LSQ object.\n"); + + //**********************************************/ + //************ Handle SMT Parameters ***********/ + //**********************************************/ + string policy = params->smtLSQPolicy; + + //Convert string to lowercase + std::transform(policy.begin(), policy.end(), policy.begin(), + (int(*)(int)) tolower); + + //Figure out fetch policy + if (policy == "dynamic") { + lsqPolicy = Dynamic; + + maxLQEntries = LQEntries; + maxSQEntries = SQEntries; + + DPRINTF(LSQ, "LSQ sharing policy set to Dynamic\n"); + + } else if (policy == "partitioned") { + lsqPolicy = Partitioned; + + //@todo:make work if part_amt doesnt divide evenly. + maxLQEntries = LQEntries / numThreads; + maxSQEntries = SQEntries / numThreads; + + DPRINTF(Fetch, "LSQ sharing policy set to Partitioned: " + "%i entries per LQ | %i entries per SQ", + maxLQEntries,maxSQEntries); + + } else if (policy == "threshold") { + lsqPolicy = Threshold; + + assert(params->smtLSQThreshold > LQEntries); + assert(params->smtLSQThreshold > SQEntries); + + //Divide up by threshold amount + //@todo: Should threads check the max and the total + //amount of the LSQ + maxLQEntries = params->smtLSQThreshold; + maxSQEntries = params->smtLSQThreshold; + + DPRINTF(LSQ, "LSQ sharing policy set to Threshold: " + "%i entries per LQ | %i entries per SQ", + maxLQEntries,maxSQEntries); + + } else { + assert(0 && "Invalid LSQ Sharing Policy.Options Are:{Dynamic," + "Partitioned, Threshold}"); + } + + //Initialize LSQs + for (int tid=0; tid < numThreads; tid++) { + thread[tid].init(params, maxLQEntries, maxSQEntries, tid); + } +} + + +template +std::string +LSQ::name() const +{ + return iewStage->name() + ".lsq"; +} + +template +void +LSQ::setActiveThreads(list *at_ptr) +{ + activeThreads = at_ptr; + assert(activeThreads != 0); +} + +template +void +LSQ::setCPU(FullCPU *cpu_ptr) +{ + cpu = cpu_ptr; + + for (int tid=0; tid < numThreads; tid++) { + thread[tid].setCPU(cpu_ptr); + } +} + +template +void +LSQ::setIEW(IEW *iew_ptr) +{ + iewStage = iew_ptr; + + for (int tid=0; tid < numThreads; tid++) { + thread[tid].setIEW(iew_ptr); + } +} + +#if 0 +template +void +LSQ::setPageTable(PageTable *pt_ptr) +{ + for (int tid=0; tid < numThreads; tid++) { + thread[tid].setPageTable(pt_ptr); + } +} +#endif + +template +void +LSQ::switchOut() +{ + for (int tid = 0; tid < numThreads; tid++) { + thread[tid].switchOut(); + } +} + +template +void +LSQ::takeOverFrom() +{ + for (int tid = 0; tid < numThreads; tid++) { + thread[tid].takeOverFrom(); + } +} + +template +int +LSQ::entryAmount(int num_threads) +{ + if (lsqPolicy == Partitioned) { + return LQEntries / num_threads; + } else { + return 0; + } +} + +template +void +LSQ::resetEntries() +{ + if (lsqPolicy != Dynamic || numThreads > 1) { + int active_threads = (*activeThreads).size(); + + list::iterator threads = (*activeThreads).begin(); + list::iterator list_end = (*activeThreads).end(); + + int maxEntries; + + if (lsqPolicy == Partitioned) { + maxEntries = LQEntries / active_threads; + } else if (lsqPolicy == Threshold && active_threads == 1) { + maxEntries = LQEntries; + } else { + maxEntries = LQEntries; + } + + while (threads != list_end) { + resizeEntries(maxEntries,*threads++); + } + } +} + +template +void +LSQ::removeEntries(unsigned tid) +{ + thread[tid].clearLQ(); + thread[tid].clearSQ(); +} + +template +void +LSQ::resizeEntries(unsigned size,unsigned tid) +{ + thread[tid].resizeLQ(size); + thread[tid].resizeSQ(size); +} + +template +void +LSQ::tick() +{ + list::iterator active_threads = (*activeThreads).begin(); + + while (active_threads != (*activeThreads).end()) { + unsigned tid = *active_threads++; + + thread[tid].tick(); + } +} + +template +void +LSQ::insertLoad(DynInstPtr &load_inst) +{ + unsigned tid = load_inst->threadNumber; + + thread[tid].insertLoad(load_inst); +} + +template +void +LSQ::insertStore(DynInstPtr &store_inst) +{ + unsigned tid = store_inst->threadNumber; + + thread[tid].insertStore(store_inst); +} + +template +Fault +LSQ::executeLoad(DynInstPtr &inst) +{ + unsigned tid = inst->threadNumber; + + return thread[tid].executeLoad(inst); +} + +template +Fault +LSQ::executeStore(DynInstPtr &inst) +{ + unsigned tid = inst->threadNumber; + + return thread[tid].executeStore(inst); +} + +template +void +LSQ::writebackStores() +{ + list::iterator active_threads = (*activeThreads).begin(); + + while (active_threads != (*activeThreads).end()) { + unsigned tid = *active_threads++; + + if (numStoresToWB(tid) > 0) { + DPRINTF(Writeback,"[tid:%i] Writing back stores. %i stores " + "available for Writeback.\n", tid, numStoresToWB(tid)); + } + + thread[tid].writebackStores(); + } +} + +template +bool +LSQ::violation() +{ + /* Answers: Does Anybody Have a Violation?*/ + list::iterator active_threads = (*activeThreads).begin(); + + while (active_threads != (*activeThreads).end()) { + unsigned tid = *active_threads++; + if (thread[tid].violation()) + return true; + } + + return false; +} + +template +int +LSQ::getCount() +{ + unsigned total = 0; + + list::iterator active_threads = (*activeThreads).begin(); + + while (active_threads != (*activeThreads).end()) { + unsigned tid = *active_threads++; + total += getCount(tid); + } + + return total; +} + +template +int +LSQ::numLoads() +{ + unsigned total = 0; + + list::iterator active_threads = (*activeThreads).begin(); + + while (active_threads != (*activeThreads).end()) { + unsigned tid = *active_threads++; + total += numLoads(tid); + } + + return total; +} + +template +int +LSQ::numStores() +{ + unsigned total = 0; + + list::iterator active_threads = (*activeThreads).begin(); + + while (active_threads != (*activeThreads).end()) { + unsigned tid = *active_threads++; + total += thread[tid].numStores(); + } + + return total; +} + +template +int +LSQ::numLoadsReady() +{ + unsigned total = 0; + + list::iterator active_threads = (*activeThreads).begin(); + + while (active_threads != (*activeThreads).end()) { + unsigned tid = *active_threads++; + total += thread[tid].numLoadsReady(); + } + + return total; +} + +template +unsigned +LSQ::numFreeEntries() +{ + unsigned total = 0; + + list::iterator active_threads = (*activeThreads).begin(); + + while (active_threads != (*activeThreads).end()) { + unsigned tid = *active_threads++; + total += thread[tid].numFreeEntries(); + } + + return total; +} + +template +unsigned +LSQ::numFreeEntries(unsigned tid) +{ + //if( lsqPolicy == Dynamic ) + //return numFreeEntries(); + //else + return thread[tid].numFreeEntries(); +} + +template +bool +LSQ::isFull() +{ + list::iterator active_threads = (*activeThreads).begin(); + + while (active_threads != (*activeThreads).end()) { + unsigned tid = *active_threads++; + if (! (thread[tid].lqFull() || thread[tid].sqFull()) ) + return false; + } + + return true; +} + +template +bool +LSQ::isFull(unsigned tid) +{ + //@todo: Change to Calculate All Entries for + //Dynamic Policy + if( lsqPolicy == Dynamic ) + return isFull(); + else + return thread[tid].lqFull() || thread[tid].sqFull(); +} + +template +bool +LSQ::lqFull() +{ + list::iterator active_threads = (*activeThreads).begin(); + + while (active_threads != (*activeThreads).end()) { + unsigned tid = *active_threads++; + if (!thread[tid].lqFull()) + return false; + } + + return true; +} + +template +bool +LSQ::lqFull(unsigned tid) +{ + //@todo: Change to Calculate All Entries for + //Dynamic Policy + if( lsqPolicy == Dynamic ) + return lqFull(); + else + return thread[tid].lqFull(); +} + +template +bool +LSQ::sqFull() +{ + list::iterator active_threads = (*activeThreads).begin(); + + while (active_threads != (*activeThreads).end()) { + unsigned tid = *active_threads++; + if (!sqFull(tid)) + return false; + } + + return true; +} + +template +bool +LSQ::sqFull(unsigned tid) +{ + //@todo: Change to Calculate All Entries for + //Dynamic Policy + if( lsqPolicy == Dynamic ) + return sqFull(); + else + return thread[tid].sqFull(); +} + +template +bool +LSQ::isStalled() +{ + list::iterator active_threads = (*activeThreads).begin(); + + while (active_threads != (*activeThreads).end()) { + unsigned tid = *active_threads++; + if (!thread[tid].isStalled()) + return false; + } + + return true; +} + +template +bool +LSQ::isStalled(unsigned tid) +{ + if( lsqPolicy == Dynamic ) + return isStalled(); + else + return thread[tid].isStalled(); +} + +template +bool +LSQ::hasStoresToWB() +{ + list::iterator active_threads = (*activeThreads).begin(); + + while (active_threads != (*activeThreads).end()) { + unsigned tid = *active_threads++; + if (!hasStoresToWB(tid)) + return false; + } + + return true; +} + +template +bool +LSQ::willWB() +{ + list::iterator active_threads = (*activeThreads).begin(); + + while (active_threads != (*activeThreads).end()) { + unsigned tid = *active_threads++; + if (!willWB(tid)) + return false; + } + + return true; +} + +template +void +LSQ::dumpInsts() +{ + list::iterator active_threads = (*activeThreads).begin(); + + while (active_threads != (*activeThreads).end()) { + unsigned tid = *active_threads++; + thread[tid].dumpInsts(); + } +} diff --git a/src/cpu/o3/lsq_unit.cc b/src/cpu/o3/lsq_unit.cc new file mode 100644 index 000000000..dd29007bc --- /dev/null +++ b/src/cpu/o3/lsq_unit.cc @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cpu/o3/alpha_dyn_inst.hh" +#include "cpu/o3/alpha_cpu.hh" +#include "cpu/o3/alpha_impl.hh" +#include "cpu/o3/lsq_unit_impl.hh" + +// Force the instantiation of LDSTQ for all the implementations we care about. +template class LSQUnit; + diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh new file mode 100644 index 000000000..b339cea2c --- /dev/null +++ b/src/cpu/o3/lsq_unit.hh @@ -0,0 +1,629 @@ +/* + * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_O3_LSQ_UNIT_HH__ +#define __CPU_O3_LSQ_UNIT_HH__ + +#include +#include +#include + +#include "arch/faults.hh" +#include "config/full_system.hh" +#include "base/hashmap.hh" +#include "cpu/inst_seq.hh" +#include "mem/packet.hh" +#include "mem/port.hh" +//#include "mem/page_table.hh" +//#include "sim/debug.hh" +//#include "sim/sim_object.hh" + +/** + * Class that implements the actual LQ and SQ for each specific + * thread. Both are circular queues; load entries are freed upon + * committing, while store entries are freed once they writeback. The + * LSQUnit tracks if there are memory ordering violations, and also + * detects partial load to store forwarding cases (a store only has + * part of a load's data) that requires the load to wait until the + * store writes back. In the former case it holds onto the instruction + * until the dependence unit looks at it, and in the latter it stalls + * the LSQ until the store writes back. At that point the load is + * replayed. + */ +template +class LSQUnit { + protected: + typedef TheISA::IntReg IntReg; + public: + typedef typename Impl::Params Params; + typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::DynInstPtr DynInstPtr; + typedef typename Impl::CPUPol::IEW IEW; + typedef typename Impl::CPUPol::IssueStruct IssueStruct; + + public: + /** Constructs an LSQ unit. init() must be called prior to use. */ + LSQUnit(); + + /** Initializes the LSQ unit with the specified number of entries. */ + void init(Params *params, unsigned maxLQEntries, + unsigned maxSQEntries, unsigned id); + + /** Returns the name of the LSQ unit. */ + std::string name() const; + + /** Sets the CPU pointer. */ + void setCPU(FullCPU *cpu_ptr); + + /** Sets the IEW stage pointer. */ + void setIEW(IEW *iew_ptr) + { iewStage = iew_ptr; } + + /** Sets the page table pointer. */ +// void setPageTable(PageTable *pt_ptr); + + void switchOut(); + + void takeOverFrom(); + + bool isSwitchedOut() { return switchedOut; } + + /** Ticks the LSQ unit, which in this case only resets the number of + * used cache ports. + * @todo: Move the number of used ports up to the LSQ level so it can + * be shared by all LSQ units. + */ + void tick() { usedPorts = 0; } + + /** Inserts an instruction. */ + void insert(DynInstPtr &inst); + /** Inserts a load instruction. */ + void insertLoad(DynInstPtr &load_inst); + /** Inserts a store instruction. */ + void insertStore(DynInstPtr &store_inst); + + /** Executes a load instruction. */ + Fault executeLoad(DynInstPtr &inst); + + Fault executeLoad(int lq_idx) { panic("Not implemented"); return NoFault; } + /** Executes a store instruction. */ + Fault executeStore(DynInstPtr &inst); + + /** Commits the head load. */ + void commitLoad(); + /** Commits loads older than a specific sequence number. */ + void commitLoads(InstSeqNum &youngest_inst); + + /** Commits stores older than a specific sequence number. */ + void commitStores(InstSeqNum &youngest_inst); + + /** Writes back stores. */ + void writebackStores(); + + void completeDataAccess(PacketPtr pkt); + + void completeStoreDataAccess(DynInstPtr &inst); + + // @todo: Include stats in the LSQ unit. + //void regStats(); + + /** Clears all the entries in the LQ. */ + void clearLQ(); + + /** Clears all the entries in the SQ. */ + void clearSQ(); + + /** Resizes the LQ to a given size. */ + void resizeLQ(unsigned size); + + /** Resizes the SQ to a given size. */ + void resizeSQ(unsigned size); + + /** Squashes all instructions younger than a specific sequence number. */ + void squash(const InstSeqNum &squashed_num); + + /** Returns if there is a memory ordering violation. Value is reset upon + * call to getMemDepViolator(). + */ + bool violation() { return memDepViolator; } + + /** Returns the memory ordering violator. */ + DynInstPtr getMemDepViolator(); + + /** Returns if a load became blocked due to the memory system. */ + bool loadBlocked() + { return isLoadBlocked; } + + void clearLoadBlocked() + { isLoadBlocked = false; } + + bool isLoadBlockedHandled() + { return loadBlockedHandled; } + + void setLoadBlockedHandled() + { loadBlockedHandled = true; } + + /** Returns the number of free entries (min of free LQ and SQ entries). */ + unsigned numFreeEntries(); + + /** Returns the number of loads ready to execute. */ + int numLoadsReady(); + + /** Returns the number of loads in the LQ. */ + int numLoads() { return loads; } + + /** Returns the number of stores in the SQ. */ + int numStores() { return stores; } + + /** Returns if either the LQ or SQ is full. */ + bool isFull() { return lqFull() || sqFull(); } + + /** Returns if the LQ is full. */ + bool lqFull() { return loads >= (LQEntries - 1); } + + /** Returns if the SQ is full. */ + bool sqFull() { return stores >= (SQEntries - 1); } + + /** Returns the number of instructions in the LSQ. */ + unsigned getCount() { return loads + stores; } + + /** Returns if there are any stores to writeback. */ + bool hasStoresToWB() { return storesToWB; } + + /** Returns the number of stores to writeback. */ + int numStoresToWB() { return storesToWB; } + + /** Returns if the LSQ unit will writeback on this cycle. */ + bool willWB() { return storeQueue[storeWBIdx].canWB && + !storeQueue[storeWBIdx].completed/* && + !dcacheInterface->isBlocked()*/; } + + private: + /** Completes the store at the specified index. */ + void completeStore(int store_idx); + + /** Increments the given store index (circular queue). */ + inline void incrStIdx(int &store_idx); + /** Decrements the given store index (circular queue). */ + inline void decrStIdx(int &store_idx); + /** Increments the given load index (circular queue). */ + inline void incrLdIdx(int &load_idx); + /** Decrements the given load index (circular queue). */ + inline void decrLdIdx(int &load_idx); + + public: + /** Debugging function to dump instructions in the LSQ. */ + void dumpInsts(); + + private: + /** Pointer to the CPU. */ + FullCPU *cpu; + + /** Pointer to the IEW stage. */ + IEW *iewStage; + + MemObject *mem; + + class DcachePort : public Port + { + protected: + FullCPU *cpu; + LSQUnit *lsq; + + public: + DcachePort(FullCPU *_cpu, LSQUnit *_lsq) + : Port(_lsq->name() + "-dport"), cpu(_cpu), lsq(_lsq) + { } + + protected: + virtual Tick recvAtomic(PacketPtr pkt); + + virtual void recvFunctional(PacketPtr pkt); + + virtual void recvStatusChange(Status status); + + virtual void getDeviceAddressRanges(AddrRangeList &resp, + AddrRangeList &snoop) + { resp.clear(); snoop.clear(); } + + virtual bool recvTiming(PacketPtr pkt); + + virtual void recvRetry(); + }; + + /** Pointer to the D-cache. */ + DcachePort *dcachePort; + + /** Pointer to the page table. */ +// PageTable *pTable; + + public: + struct SQEntry { + /** Constructs an empty store queue entry. */ + SQEntry() + : inst(NULL), req(NULL), size(0), data(0), + canWB(0), committed(0), completed(0) + { } + + /** Constructs a store queue entry for a given instruction. */ + SQEntry(DynInstPtr &_inst) + : inst(_inst), req(NULL), size(0), data(0), + canWB(0), committed(0), completed(0) + { } + + /** The store instruction. */ + DynInstPtr inst; + /** The request for the store. */ + RequestPtr req; + /** The size of the store. */ + int size; + /** The store data. */ + IntReg data; + /** Whether or not the store can writeback. */ + bool canWB; + /** Whether or not the store is committed. */ + bool committed; + /** Whether or not the store is completed. */ + bool completed; + }; + + private: + /** The LSQUnit thread id. */ + unsigned lsqID; + + /** The store queue. */ + std::vector storeQueue; + + /** The load queue. */ + std::vector loadQueue; + + /** The number of LQ entries, plus a sentinel entry (circular queue). + * @todo: Consider having var that records the true number of LQ entries. + */ + unsigned LQEntries; + /** The number of SQ entries, plus a sentinel entry (circular queue). + * @todo: Consider having var that records the true number of SQ entries. + */ + unsigned SQEntries; + + /** The number of load instructions in the LQ. */ + int loads; + /** The number of store instructions in the SQ. */ + int stores; + /** The number of store instructions in the SQ waiting to writeback. */ + int storesToWB; + + /** The index of the head instruction in the LQ. */ + int loadHead; + /** The index of the tail instruction in the LQ. */ + int loadTail; + + /** The index of the head instruction in the SQ. */ + int storeHead; + /** The index of the first instruction that may be ready to be + * written back, and has not yet been written back. + */ + int storeWBIdx; + /** The index of the tail instruction in the SQ. */ + int storeTail; + + /// @todo Consider moving to a more advanced model with write vs read ports + /** The number of cache ports available each cycle. */ + int cachePorts; + + /** The number of used cache ports in this cycle. */ + int usedPorts; + + bool switchedOut; + + //list mshrSeqNums; + + /** Wire to read information from the issue stage time queue. */ + typename TimeBuffer::wire fromIssue; + + /** Whether or not the LSQ is stalled. */ + bool stalled; + /** The store that causes the stall due to partial store to load + * forwarding. + */ + InstSeqNum stallingStoreIsn; + /** The index of the above store. */ + int stallingLoadIdx; + + /** Whether or not a load is blocked due to the memory system. */ + bool isLoadBlocked; + + bool loadBlockedHandled; + + InstSeqNum blockedLoadSeqNum; + + /** The oldest load that caused a memory ordering violation. */ + DynInstPtr memDepViolator; + + // Will also need how many read/write ports the Dcache has. Or keep track + // of that in stage that is one level up, and only call executeLoad/Store + // the appropriate number of times. +/* + // total number of loads forwaded from LSQ stores + Stats::Vector<> lsq_forw_loads; + + // total number of loads ignored due to invalid addresses + Stats::Vector<> inv_addr_loads; + + // total number of software prefetches ignored due to invalid addresses + Stats::Vector<> inv_addr_swpfs; + + // total non-speculative bogus addresses seen (debug var) + Counter sim_invalid_addrs; + Stats::Vector<> fu_busy; //cumulative fu busy + + // ready loads blocked due to memory disambiguation + Stats::Vector<> lsq_blocked_loads; + + Stats::Scalar<> lsqInversion; +*/ + public: + /** Executes the load at the given index. */ + template + Fault read(Request *req, T &data, int load_idx); + + /** Executes the store at the given index. */ + template + Fault write(Request *req, T &data, int store_idx); + + /** Returns the index of the head load instruction. */ + int getLoadHead() { return loadHead; } + /** Returns the sequence number of the head load instruction. */ + InstSeqNum getLoadHeadSeqNum() + { + if (loadQueue[loadHead]) { + return loadQueue[loadHead]->seqNum; + } else { + return 0; + } + + } + + /** Returns the index of the head store instruction. */ + int getStoreHead() { return storeHead; } + /** Returns the sequence number of the head store instruction. */ + InstSeqNum getStoreHeadSeqNum() + { + if (storeQueue[storeHead].inst) { + return storeQueue[storeHead].inst->seqNum; + } else { + return 0; + } + + } + + /** Returns whether or not the LSQ unit is stalled. */ + bool isStalled() { return stalled; } +}; + +template +template +Fault +LSQUnit::read(Request *req, T &data, int load_idx) +{ + DynInstPtr load_inst = loadQueue[load_idx]; + + assert(load_inst); + + assert(!load_inst->isExecuted()); + + // Make sure this isn't an uncacheable access + // A bit of a hackish way to get uncached accesses to work only if they're + // at the head of the LSQ and are ready to commit (at the head of the ROB + // too). + if (req->getFlags() & UNCACHEABLE && + (load_idx != loadHead || !load_inst->reachedCommit)) { + iewStage->rescheduleMemInst(load_inst); + return TheISA::genMachineCheckFault(); + } + + // Check the SQ for any previous stores that might lead to forwarding + int store_idx = load_inst->sqIdx; + + int store_size = 0; + + DPRINTF(LSQUnit, "Read called, load idx: %i, store idx: %i, " + "storeHead: %i addr: %#x\n", + load_idx, store_idx, storeHead, req->getPaddr()); + +#if 0 + if (req->getFlags() & LOCKED) { + cpu->lockAddr = req->getPaddr(); + cpu->lockFlag = true; + } +#endif + + while (store_idx != -1) { + // End once we've reached the top of the LSQ + if (store_idx == storeWBIdx) { + break; + } + + // Move the index to one younger + if (--store_idx < 0) + store_idx += SQEntries; + + assert(storeQueue[store_idx].inst); + + store_size = storeQueue[store_idx].size; + + if (store_size == 0) + continue; + + // Check if the store data is within the lower and upper bounds of + // addresses that the request needs. + bool store_has_lower_limit = + req->getVaddr() >= storeQueue[store_idx].inst->effAddr; + bool store_has_upper_limit = + (req->getVaddr() + req->getSize()) <= + (storeQueue[store_idx].inst->effAddr + store_size); + bool lower_load_has_store_part = + req->getVaddr() < (storeQueue[store_idx].inst->effAddr + + store_size); + bool upper_load_has_store_part = + (req->getVaddr() + req->getSize()) > + storeQueue[store_idx].inst->effAddr; + + // If the store's data has all of the data needed, we can forward. + if (store_has_lower_limit && store_has_upper_limit) { + // Get shift amount for offset into the store's data. + int shift_amt = req->getVaddr() & (store_size - 1); + // @todo: Magic number, assumes byte addressing + shift_amt = shift_amt << 3; + + // Cast this to type T? + data = storeQueue[store_idx].data >> shift_amt; + + assert(!load_inst->memData); + load_inst->memData = new uint8_t[64]; + + memcpy(load_inst->memData, &data, req->getSize()); + + DPRINTF(LSQUnit, "Forwarding from store idx %i to load to " + "addr %#x, data %#x\n", + store_idx, req->getVaddr(), *(load_inst->memData)); +/* + typename LdWritebackEvent *wb = + new typename LdWritebackEvent(load_inst, + iewStage); + + // We'll say this has a 1 cycle load-store forwarding latency + // for now. + // @todo: Need to make this a parameter. + wb->schedule(curTick); +*/ + // Should keep track of stat for forwarded data + return NoFault; + } else if ((store_has_lower_limit && lower_load_has_store_part) || + (store_has_upper_limit && upper_load_has_store_part) || + (lower_load_has_store_part && upper_load_has_store_part)) { + // This is the partial store-load forwarding case where a store + // has only part of the load's data. + + // If it's already been written back, then don't worry about + // stalling on it. + if (storeQueue[store_idx].completed) { + continue; + } + + // Must stall load and force it to retry, so long as it's the oldest + // load that needs to do so. + if (!stalled || + (stalled && + load_inst->seqNum < + loadQueue[stallingLoadIdx]->seqNum)) { + stalled = true; + stallingStoreIsn = storeQueue[store_idx].inst->seqNum; + stallingLoadIdx = load_idx; + } + + // Tell IQ/mem dep unit that this instruction will need to be + // rescheduled eventually + iewStage->rescheduleMemInst(load_inst); + + // Do not generate a writeback event as this instruction is not + // complete. + DPRINTF(LSQUnit, "Load-store forwarding mis-match. " + "Store idx %i to load addr %#x\n", + store_idx, req->getVaddr()); + + return NoFault; + } + } + + // If there's no forwarding case, then go access memory + DPRINTF(LSQUnit, "Doing functional access for inst [sn:%lli] PC %#x\n", + load_inst->seqNum, load_inst->readPC()); + + assert(!load_inst->memData); + load_inst->memData = new uint8_t[64]; + + ++usedPorts; + + DPRINTF(LSQUnit, "Doing timing access for inst PC %#x\n", + load_inst->readPC()); + + PacketPtr data_pkt = new Packet(req, Packet::ReadReq, Packet::Broadcast); + data_pkt->dataStatic(load_inst->memData); + + // if we have a cache, do cache access too + if (!dcachePort->sendTiming(data_pkt)) { + // There's an older load that's already going to squash. + if (isLoadBlocked && blockedLoadSeqNum < load_inst->seqNum) + return NoFault; + + // Record that the load was blocked due to memory. This + // load will squash all instructions after it, be + // refetched, and re-executed. + isLoadBlocked = true; + loadBlockedHandled = false; + blockedLoadSeqNum = load_inst->seqNum; + // No fault occurred, even though the interface is blocked. + return NoFault; + } + + if (data_pkt->result != Packet::Success) { + DPRINTF(LSQUnit, "LSQUnit: D-cache miss!\n"); + DPRINTF(Activity, "Activity: ld accessing mem miss [sn:%lli]\n", + load_inst->seqNum); + } else { + DPRINTF(LSQUnit, "LSQUnit: D-cache hit!\n"); + DPRINTF(Activity, "Activity: ld accessing mem hit [sn:%lli]\n", + load_inst->seqNum); + } + + return NoFault; +} + +template +template +Fault +LSQUnit::write(Request *req, T &data, int store_idx) +{ + assert(storeQueue[store_idx].inst); + + DPRINTF(LSQUnit, "Doing write to store idx %i, addr %#x data %#x" + " | storeHead:%i [sn:%i]\n", + store_idx, req->getPaddr(), data, storeHead, + storeQueue[store_idx].inst->seqNum); + + storeQueue[store_idx].req = req; + storeQueue[store_idx].size = sizeof(T); + storeQueue[store_idx].data = data; + + // This function only writes the data to the store queue, so no fault + // can happen here. + return NoFault; +} + +#endif // __CPU_O3_LSQ_UNIT_HH__ diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh new file mode 100644 index 000000000..3f6af3d2c --- /dev/null +++ b/src/cpu/o3/lsq_unit_impl.hh @@ -0,0 +1,866 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cpu/checker/cpu.hh" +#include "cpu/o3/lsq_unit.hh" +#include "base/str.hh" +#include "mem/request.hh" + +template +void +LSQUnit::completeDataAccess(PacketPtr pkt) +{ +/* + DPRINTF(IEW, "Load writeback event [sn:%lli]\n", inst->seqNum); + DPRINTF(Activity, "Activity: Ld Writeback event [sn:%lli]\n", inst->seqNum); + + //iewStage->ldstQueue.removeMSHR(inst->threadNumber,inst->seqNum); + + if (iewStage->isSwitchedOut()) { + inst = NULL; + return; + } else if (inst->isSquashed()) { + iewStage->wakeCPU(); + inst = NULL; + return; + } + + iewStage->wakeCPU(); + + if (!inst->isExecuted()) { + inst->setExecuted(); + + // Complete access to copy data to proper place. + inst->completeAcc(); + } + + // Need to insert instruction into queue to commit + iewStage->instToCommit(inst); + + iewStage->activityThisCycle(); + + inst = NULL; +*/ +} + +template +void +LSQUnit::completeStoreDataAccess(DynInstPtr &inst) +{ +/* + DPRINTF(LSQ, "Cache miss complete for store idx:%i\n", storeIdx); + DPRINTF(Activity, "Activity: st writeback event idx:%i\n", storeIdx); + + //lsqPtr->removeMSHR(lsqPtr->storeQueue[storeIdx].inst->seqNum); + + if (lsqPtr->isSwitchedOut()) + return; + + lsqPtr->cpu->wakeCPU(); + + if (wb) + lsqPtr->completeDataAccess(storeIdx); + lsqPtr->completeStore(storeIdx); +*/ +} + +template +Tick +LSQUnit::DcachePort::recvAtomic(PacketPtr pkt) +{ + panic("O3CPU model does not work with atomic mode!"); + return curTick; +} + +template +void +LSQUnit::DcachePort::recvFunctional(PacketPtr pkt) +{ + panic("O3CPU doesn't expect recvFunctional callback!"); +} + +template +void +LSQUnit::DcachePort::recvStatusChange(Status status) +{ + if (status == RangeChange) + return; + + panic("O3CPU doesn't expect recvStatusChange callback!"); +} + +template +bool +LSQUnit::DcachePort::recvTiming(PacketPtr pkt) +{ + lsq->completeDataAccess(pkt); + return true; +} + +template +void +LSQUnit::DcachePort::recvRetry() +{ + panic("Retry unsupported for now!"); + // we shouldn't get a retry unless we have a packet that we're + // waiting to transmit +/* + assert(cpu->dcache_pkt != NULL); + assert(cpu->_status == DcacheRetry); + PacketPtr tmp = cpu->dcache_pkt; + if (sendTiming(tmp)) { + cpu->_status = DcacheWaitResponse; + cpu->dcache_pkt = NULL; + } +*/ +} + +template +LSQUnit::LSQUnit() + : loads(0), stores(0), storesToWB(0), stalled(false), isLoadBlocked(false), + loadBlockedHandled(false) +{ +} + +template +void +LSQUnit::init(Params *params, unsigned maxLQEntries, + unsigned maxSQEntries, unsigned id) +{ + DPRINTF(LSQUnit, "Creating LSQUnit%i object.\n",id); + + switchedOut = false; + + lsqID = id; + + // Add 1 for the sentinel entry (they are circular queues). + LQEntries = maxLQEntries + 1; + SQEntries = maxSQEntries + 1; + + loadQueue.resize(LQEntries); + storeQueue.resize(SQEntries); + + loadHead = loadTail = 0; + + storeHead = storeWBIdx = storeTail = 0; + + usedPorts = 0; + cachePorts = params->cachePorts; + + Port *mem_dport = params->mem->getPort(""); + dcachePort->setPeer(mem_dport); + mem_dport->setPeer(dcachePort); + + memDepViolator = NULL; + + blockedLoadSeqNum = 0; +} + +template +void +LSQUnit::setCPU(FullCPU *cpu_ptr) +{ + cpu = cpu_ptr; + dcachePort = new DcachePort(cpu, this); +} + +template +std::string +LSQUnit::name() const +{ + if (Impl::MaxThreads == 1) { + return iewStage->name() + ".lsq"; + } else { + return iewStage->name() + ".lsq.thread." + to_string(lsqID); + } +} + +template +void +LSQUnit::clearLQ() +{ + loadQueue.clear(); +} + +template +void +LSQUnit::clearSQ() +{ + storeQueue.clear(); +} + +#if 0 +template +void +LSQUnit::setPageTable(PageTable *pt_ptr) +{ + DPRINTF(LSQUnit, "Setting the page table pointer.\n"); + pTable = pt_ptr; +} +#endif + +template +void +LSQUnit::switchOut() +{ + switchedOut = true; + for (int i = 0; i < loadQueue.size(); ++i) + loadQueue[i] = NULL; + + assert(storesToWB == 0); +} + +template +void +LSQUnit::takeOverFrom() +{ + switchedOut = false; + loads = stores = storesToWB = 0; + + loadHead = loadTail = 0; + + storeHead = storeWBIdx = storeTail = 0; + + usedPorts = 0; + + memDepViolator = NULL; + + blockedLoadSeqNum = 0; + + stalled = false; + isLoadBlocked = false; + loadBlockedHandled = false; +} + +template +void +LSQUnit::resizeLQ(unsigned size) +{ + unsigned size_plus_sentinel = size + 1; + assert(size_plus_sentinel >= LQEntries); + + if (size_plus_sentinel > LQEntries) { + while (size_plus_sentinel > loadQueue.size()) { + DynInstPtr dummy; + loadQueue.push_back(dummy); + LQEntries++; + } + } else { + LQEntries = size_plus_sentinel; + } + +} + +template +void +LSQUnit::resizeSQ(unsigned size) +{ + unsigned size_plus_sentinel = size + 1; + if (size_plus_sentinel > SQEntries) { + while (size_plus_sentinel > storeQueue.size()) { + SQEntry dummy; + storeQueue.push_back(dummy); + SQEntries++; + } + } else { + SQEntries = size_plus_sentinel; + } +} + +template +void +LSQUnit::insert(DynInstPtr &inst) +{ + assert(inst->isMemRef()); + + assert(inst->isLoad() || inst->isStore()); + + if (inst->isLoad()) { + insertLoad(inst); + } else { + insertStore(inst); + } + + inst->setInLSQ(); +} + +template +void +LSQUnit::insertLoad(DynInstPtr &load_inst) +{ + assert((loadTail + 1) % LQEntries != loadHead); + assert(loads < LQEntries); + + DPRINTF(LSQUnit, "Inserting load PC %#x, idx:%i [sn:%lli]\n", + load_inst->readPC(), loadTail, load_inst->seqNum); + + load_inst->lqIdx = loadTail; + + if (stores == 0) { + load_inst->sqIdx = -1; + } else { + load_inst->sqIdx = storeTail; + } + + loadQueue[loadTail] = load_inst; + + incrLdIdx(loadTail); + + ++loads; +} + +template +void +LSQUnit::insertStore(DynInstPtr &store_inst) +{ + // Make sure it is not full before inserting an instruction. + assert((storeTail + 1) % SQEntries != storeHead); + assert(stores < SQEntries); + + DPRINTF(LSQUnit, "Inserting store PC %#x, idx:%i [sn:%lli]\n", + store_inst->readPC(), storeTail, store_inst->seqNum); + + store_inst->sqIdx = storeTail; + store_inst->lqIdx = loadTail; + + storeQueue[storeTail] = SQEntry(store_inst); + + incrStIdx(storeTail); + + ++stores; +} + +template +typename Impl::DynInstPtr +LSQUnit::getMemDepViolator() +{ + DynInstPtr temp = memDepViolator; + + memDepViolator = NULL; + + return temp; +} + +template +unsigned +LSQUnit::numFreeEntries() +{ + unsigned free_lq_entries = LQEntries - loads; + unsigned free_sq_entries = SQEntries - stores; + + // Both the LQ and SQ entries have an extra dummy entry to differentiate + // empty/full conditions. Subtract 1 from the free entries. + if (free_lq_entries < free_sq_entries) { + return free_lq_entries - 1; + } else { + return free_sq_entries - 1; + } +} + +template +int +LSQUnit::numLoadsReady() +{ + int load_idx = loadHead; + int retval = 0; + + while (load_idx != loadTail) { + assert(loadQueue[load_idx]); + + if (loadQueue[load_idx]->readyToIssue()) { + ++retval; + } + } + + return retval; +} + +template +Fault +LSQUnit::executeLoad(DynInstPtr &inst) +{ + // Execute a specific load. + Fault load_fault = NoFault; + + DPRINTF(LSQUnit, "Executing load PC %#x, [sn:%lli]\n", + inst->readPC(),inst->seqNum); + + load_fault = inst->initiateAcc(); + + // If the instruction faulted, then we need to send it along to commit + // without the instruction completing. + if (load_fault != NoFault) { + // Send this instruction to commit, also make sure iew stage + // realizes there is activity. + iewStage->instToCommit(inst); + iewStage->activityThisCycle(); + } + + return load_fault; +} + +template +Fault +LSQUnit::executeStore(DynInstPtr &store_inst) +{ + using namespace TheISA; + // Make sure that a store exists. + assert(stores != 0); + + int store_idx = store_inst->sqIdx; + + DPRINTF(LSQUnit, "Executing store PC %#x [sn:%lli]\n", + store_inst->readPC(), store_inst->seqNum); + + // Check the recently completed loads to see if any match this store's + // address. If so, then we have a memory ordering violation. + int load_idx = store_inst->lqIdx; + + Fault store_fault = store_inst->initiateAcc(); +// Fault store_fault = store_inst->execute(); + + if (storeQueue[store_idx].size == 0) { + DPRINTF(LSQUnit,"Fault on Store PC %#x, [sn:%lli],Size = 0\n", + store_inst->readPC(),store_inst->seqNum); + + return store_fault; + } + + assert(store_fault == NoFault); + + if (store_inst->isStoreConditional()) { + // Store conditionals need to set themselves as able to + // writeback if we haven't had a fault by here. + storeQueue[store_idx].canWB = true; + + ++storesToWB; + } + + if (!memDepViolator) { + while (load_idx != loadTail) { + // Really only need to check loads that have actually executed + // It's safe to check all loads because effAddr is set to + // InvalAddr when the dyn inst is created. + + // @todo: For now this is extra conservative, detecting a + // violation if the addresses match assuming all accesses + // are quad word accesses. + + // @todo: Fix this, magic number being used here + if ((loadQueue[load_idx]->effAddr >> 8) == + (store_inst->effAddr >> 8)) { + // A load incorrectly passed this store. Squash and refetch. + // For now return a fault to show that it was unsuccessful. + memDepViolator = loadQueue[load_idx]; + + return genMachineCheckFault(); + } + + incrLdIdx(load_idx); + } + + // If we've reached this point, there was no violation. + memDepViolator = NULL; + } + + return store_fault; +} + +template +void +LSQUnit::commitLoad() +{ + assert(loadQueue[loadHead]); + + DPRINTF(LSQUnit, "Committing head load instruction, PC %#x\n", + loadQueue[loadHead]->readPC()); + + + loadQueue[loadHead] = NULL; + + incrLdIdx(loadHead); + + --loads; +} + +template +void +LSQUnit::commitLoads(InstSeqNum &youngest_inst) +{ + assert(loads == 0 || loadQueue[loadHead]); + + while (loads != 0 && loadQueue[loadHead]->seqNum <= youngest_inst) { + commitLoad(); + } +} + +template +void +LSQUnit::commitStores(InstSeqNum &youngest_inst) +{ + assert(stores == 0 || storeQueue[storeHead].inst); + + int store_idx = storeHead; + + while (store_idx != storeTail) { + assert(storeQueue[store_idx].inst); + // Mark any stores that are now committed and have not yet + // been marked as able to write back. + if (!storeQueue[store_idx].canWB) { + if (storeQueue[store_idx].inst->seqNum > youngest_inst) { + break; + } + DPRINTF(LSQUnit, "Marking store as able to write back, PC " + "%#x [sn:%lli]\n", + storeQueue[store_idx].inst->readPC(), + storeQueue[store_idx].inst->seqNum); + + storeQueue[store_idx].canWB = true; + + ++storesToWB; + } + + incrStIdx(store_idx); + } +} + +template +void +LSQUnit::writebackStores() +{ + while (storesToWB > 0 && + storeWBIdx != storeTail && + storeQueue[storeWBIdx].inst && + storeQueue[storeWBIdx].canWB && + usedPorts < cachePorts) { + + // Store didn't write any data so no need to write it back to + // memory. + if (storeQueue[storeWBIdx].size == 0) { + completeStore(storeWBIdx); + + incrStIdx(storeWBIdx); + + continue; + } +/* + if (dcacheInterface && dcacheInterface->isBlocked()) { + DPRINTF(LSQUnit, "Unable to write back any more stores, cache" + " is blocked!\n"); + break; + } +*/ + ++usedPorts; + + if (storeQueue[storeWBIdx].inst->isDataPrefetch()) { + incrStIdx(storeWBIdx); + + continue; + } + + assert(storeQueue[storeWBIdx].req); + assert(!storeQueue[storeWBIdx].committed); + + DynInstPtr inst = storeQueue[storeWBIdx].inst; + + Request *req = storeQueue[storeWBIdx].req; + storeQueue[storeWBIdx].committed = true; + + assert(!inst->memData); + inst->memData = new uint8_t[64]; + memcpy(inst->memData, (uint8_t *)&storeQueue[storeWBIdx].data, req->getSize()); + + PacketPtr data_pkt = new Packet(req, Packet::WriteReq, Packet::Broadcast); + data_pkt->dataStatic(inst->memData); + + DPRINTF(LSQUnit, "D-Cache: Writing back store idx:%i PC:%#x " + "to Addr:%#x, data:%#x [sn:%lli]\n", + storeWBIdx, storeQueue[storeWBIdx].inst->readPC(), + req->getPaddr(), *(inst->memData), + storeQueue[storeWBIdx].inst->seqNum); + + if (!dcachePort->sendTiming(data_pkt)) { + // Need to handle becoming blocked on a store. + } else { + /* + StoreCompletionEvent *store_event = new + StoreCompletionEvent(storeWBIdx, NULL, this); + */ + if (isStalled() && + storeQueue[storeWBIdx].inst->seqNum == stallingStoreIsn) { + DPRINTF(LSQUnit, "Unstalling, stalling store [sn:%lli] " + "load idx:%i\n", + stallingStoreIsn, stallingLoadIdx); + stalled = false; + stallingStoreIsn = 0; + iewStage->replayMemInst(loadQueue[stallingLoadIdx]); + } +/* + typename LdWritebackEvent *wb = NULL; + if (req->flags & LOCKED) { + // Stx_C should not generate a system port transaction + // if it misses in the cache, but that might be hard + // to accomplish without explicit cache support. + wb = new typename + LdWritebackEvent(storeQueue[storeWBIdx].inst, + iewStage); + store_event->wbEvent = wb; + } +*/ + if (data_pkt->result != Packet::Success) { + DPRINTF(LSQUnit,"D-Cache Write Miss on idx:%i!\n", + storeWBIdx); + + DPRINTF(Activity, "Active st accessing mem miss [sn:%lli]\n", + storeQueue[storeWBIdx].inst->seqNum); + + //mshrSeqNums.push_back(storeQueue[storeWBIdx].inst->seqNum); + + //DPRINTF(LSQUnit, "Added MSHR. count = %i\n",mshrSeqNums.size()); + + // @todo: Increment stat here. + } else { + DPRINTF(LSQUnit,"D-Cache: Write Hit on idx:%i !\n", + storeWBIdx); + + DPRINTF(Activity, "Active st accessing mem hit [sn:%lli]\n", + storeQueue[storeWBIdx].inst->seqNum); + } + + incrStIdx(storeWBIdx); + } + } + + // Not sure this should set it to 0. + usedPorts = 0; + + assert(stores >= 0 && storesToWB >= 0); +} + +/*template +void +LSQUnit::removeMSHR(InstSeqNum seqNum) +{ + list::iterator mshr_it = find(mshrSeqNums.begin(), + mshrSeqNums.end(), + seqNum); + + if (mshr_it != mshrSeqNums.end()) { + mshrSeqNums.erase(mshr_it); + DPRINTF(LSQUnit, "Removing MSHR. count = %i\n",mshrSeqNums.size()); + } +}*/ + +template +void +LSQUnit::squash(const InstSeqNum &squashed_num) +{ + DPRINTF(LSQUnit, "Squashing until [sn:%lli]!" + "(Loads:%i Stores:%i)\n", squashed_num, loads, stores); + + int load_idx = loadTail; + decrLdIdx(load_idx); + + while (loads != 0 && loadQueue[load_idx]->seqNum > squashed_num) { + DPRINTF(LSQUnit,"Load Instruction PC %#x squashed, " + "[sn:%lli]\n", + loadQueue[load_idx]->readPC(), + loadQueue[load_idx]->seqNum); + + if (isStalled() && load_idx == stallingLoadIdx) { + stalled = false; + stallingStoreIsn = 0; + stallingLoadIdx = 0; + } + + // Clear the smart pointer to make sure it is decremented. + loadQueue[load_idx]->squashed = true; + loadQueue[load_idx] = NULL; + --loads; + + // Inefficient! + loadTail = load_idx; + + decrLdIdx(load_idx); + } + + if (isLoadBlocked) { + if (squashed_num < blockedLoadSeqNum) { + isLoadBlocked = false; + loadBlockedHandled = false; + blockedLoadSeqNum = 0; + } + } + + int store_idx = storeTail; + decrStIdx(store_idx); + + while (stores != 0 && + storeQueue[store_idx].inst->seqNum > squashed_num) { + // Instructions marked as can WB are already committed. + if (storeQueue[store_idx].canWB) { + break; + } + + DPRINTF(LSQUnit,"Store Instruction PC %#x squashed, " + "idx:%i [sn:%lli]\n", + storeQueue[store_idx].inst->readPC(), + store_idx, storeQueue[store_idx].inst->seqNum); + + // I don't think this can happen. It should have been cleared + // by the stalling load. + if (isStalled() && + storeQueue[store_idx].inst->seqNum == stallingStoreIsn) { + panic("Is stalled should have been cleared by stalling load!\n"); + stalled = false; + stallingStoreIsn = 0; + } + + // Clear the smart pointer to make sure it is decremented. + storeQueue[store_idx].inst->squashed = true; + storeQueue[store_idx].inst = NULL; + storeQueue[store_idx].canWB = 0; + + storeQueue[store_idx].req = NULL; + --stores; + + // Inefficient! + storeTail = store_idx; + + decrStIdx(store_idx); + } +} + +template +void +LSQUnit::completeStore(int store_idx) +{ + assert(storeQueue[store_idx].inst); + storeQueue[store_idx].completed = true; + --storesToWB; + // A bit conservative because a store completion may not free up entries, + // but hopefully avoids two store completions in one cycle from making + // the CPU tick twice. + cpu->activityThisCycle(); + + if (store_idx == storeHead) { + do { + incrStIdx(storeHead); + + --stores; + } while (storeQueue[storeHead].completed && + storeHead != storeTail); + + iewStage->updateLSQNextCycle = true; + } + + DPRINTF(LSQUnit, "Completing store [sn:%lli], idx:%i, store head " + "idx:%i\n", + storeQueue[store_idx].inst->seqNum, store_idx, storeHead); + + if (isStalled() && + storeQueue[store_idx].inst->seqNum == stallingStoreIsn) { + DPRINTF(LSQUnit, "Unstalling, stalling store [sn:%lli] " + "load idx:%i\n", + stallingStoreIsn, stallingLoadIdx); + stalled = false; + stallingStoreIsn = 0; + iewStage->replayMemInst(loadQueue[stallingLoadIdx]); + } + + storeQueue[store_idx].inst->setCompleted(); + + // Tell the checker we've completed this instruction. Some stores + // may get reported twice to the checker, but the checker can + // handle that case. + if (cpu->checker) { + cpu->checker->tick(storeQueue[store_idx].inst); + } +} + +template +inline void +LSQUnit::incrStIdx(int &store_idx) +{ + if (++store_idx >= SQEntries) + store_idx = 0; +} + +template +inline void +LSQUnit::decrStIdx(int &store_idx) +{ + if (--store_idx < 0) + store_idx += SQEntries; +} + +template +inline void +LSQUnit::incrLdIdx(int &load_idx) +{ + if (++load_idx >= LQEntries) + load_idx = 0; +} + +template +inline void +LSQUnit::decrLdIdx(int &load_idx) +{ + if (--load_idx < 0) + load_idx += LQEntries; +} + +template +void +LSQUnit::dumpInsts() +{ + cprintf("Load store queue: Dumping instructions.\n"); + cprintf("Load queue size: %i\n", loads); + cprintf("Load queue: "); + + int load_idx = loadHead; + + while (load_idx != loadTail && loadQueue[load_idx]) { + cprintf("%#x ", loadQueue[load_idx]->readPC()); + + incrLdIdx(load_idx); + } + + cprintf("Store queue size: %i\n", stores); + cprintf("Store queue: "); + + int store_idx = storeHead; + + while (store_idx != storeTail && storeQueue[store_idx].inst) { + cprintf("%#x ", storeQueue[store_idx].inst->readPC()); + + incrStIdx(store_idx); + } + + cprintf("\n"); +} diff --git a/src/cpu/o3/regfile.hh b/src/cpu/o3/regfile.hh index 3350903db..45fe490d2 100644 --- a/src/cpu/o3/regfile.hh +++ b/src/cpu/o3/regfile.hh @@ -31,6 +31,7 @@ #include "arch/isa_traits.hh" #include "arch/faults.hh" +#include "arch/types.hh" #include "base/trace.hh" #include "config/full_system.hh" #include "cpu/o3/comm.hh" @@ -44,9 +45,8 @@ /** * Simple physical register file class. - * This really only depends on the ISA, and not the Impl. Things that are - * in the ifdef FULL_SYSTEM are pretty dependent on the ISA, and probably - * should go in the AlphaFullCPU. + * Right now this is specific to Alpha until we decide if/how to make things + * generic enough to support other ISAs. */ template class PhysRegFile @@ -54,8 +54,15 @@ class PhysRegFile protected: typedef TheISA::IntReg IntReg; typedef TheISA::FloatReg FloatReg; + typedef TheISA::FloatRegBits FloatRegBits; typedef TheISA::MiscRegFile MiscRegFile; typedef TheISA::MiscReg MiscReg; + + typedef union { + FloatReg d; + FloatRegBits q; + } PhysFloatReg; + // Note that most of the definitions of the IntReg, FloatReg, etc. exist // within the Impl/ISA class and not within this PhysRegFile class. @@ -97,7 +104,7 @@ class PhysRegFile assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs); - FloatReg floatReg = floatRegFile.readReg(reg_idx, width); + FloatReg floatReg = floatRegFile[reg_idx].d; DPRINTF(IEW, "RegFile: Access to %d byte float register %i, has " "data %8.8d\n", int(reg_idx), (double)floatReg); @@ -113,7 +120,7 @@ class PhysRegFile assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs); - FloatReg floatReg = floatRegFile.readReg(reg_idx); + FloatReg floatReg = floatRegFile[reg_idx].d; DPRINTF(IEW, "RegFile: Access to float register %i, has " "data %8.8d\n", int(reg_idx), (double)floatReg); @@ -129,7 +136,7 @@ class PhysRegFile assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs); - FloatRegBits floatRegBits = floatRegFile.readRegBits(reg_idx, width); + FloatRegBits floatRegBits = floatRegFile[reg_idx].q; DPRINTF(IEW, "RegFile: Access to %d byte float register %i as int, " "has data %lli\n", int(reg_idx), (uint64_t)floatRegBits); @@ -144,7 +151,7 @@ class PhysRegFile assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs); - FloatRegBits floatRegBits = floatRegFile.readRegBits(reg_idx); + FloatRegBits floatRegBits = floatRegFile[reg_idx].q; DPRINTF(IEW, "RegFile: Access to float register %i as int, " "has data %lli\n", int(reg_idx), (uint64_t)floatRegBits); @@ -176,7 +183,7 @@ class PhysRegFile int(reg_idx), (double)val); if (reg_idx != TheISA::ZeroReg) - floatRegFile.setReg(reg_idx, val, width); + floatRegFile[reg_idx].d = width; } /** Sets a double precision floating point register to the given value. */ @@ -191,7 +198,7 @@ class PhysRegFile int(reg_idx), (double)val); if (reg_idx != TheISA::ZeroReg) - floatRegFile.setReg(reg_idx, val); + floatRegFile[reg_idx].d = val; } /** Sets a floating point register to the given integer value. */ @@ -205,7 +212,7 @@ class PhysRegFile DPRINTF(IEW, "RegFile: Setting float register %i to %lli\n", int(reg_idx), (uint64_t)val); - floatRegFile.setRegBits(reg_idx, val, width); + floatRegFile[reg_idx].q = val; } void setFloatRegBits(PhysRegIndex reg_idx, FloatRegBits val) @@ -217,6 +224,13 @@ class PhysRegFile DPRINTF(IEW, "RegFile: Setting float register %i to %lli\n", int(reg_idx), (uint64_t)val); + + floatRegFile[reg_idx].q = val; + } + + MiscReg readMiscReg(int misc_reg, unsigned thread_id) + { + return miscRegs[thread_id].readReg(misc_reg); } MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault, @@ -249,7 +263,7 @@ class PhysRegFile std::vector intRegFile; /** Floating point register file. */ - std::vector floatRegFile; + std::vector floatRegFile; /** Miscellaneous register file. */ MiscRegFile miscRegs[Impl::MaxThreads]; diff --git a/src/cpu/o3/sat_counter.hh b/src/cpu/o3/sat_counter.hh index d01fd93ce..640445407 100644 --- a/src/cpu/o3/sat_counter.hh +++ b/src/cpu/o3/sat_counter.hh @@ -29,6 +29,7 @@ #ifndef __CPU_O3_SAT_COUNTER_HH__ #define __CPU_O3_SAT_COUNTER_HH__ +#include "base/misc.hh" #include "sim/host.hh" /** diff --git a/src/cpu/o3/scoreboard.cc b/src/cpu/o3/scoreboard.cc new file mode 100644 index 000000000..b0e433620 --- /dev/null +++ b/src/cpu/o3/scoreboard.cc @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cpu/o3/scoreboard.hh" + +Scoreboard::Scoreboard(unsigned activeThreads, + unsigned _numLogicalIntRegs, + unsigned _numPhysicalIntRegs, + unsigned _numLogicalFloatRegs, + unsigned _numPhysicalFloatRegs, + unsigned _numMiscRegs, + unsigned _zeroRegIdx) + : numLogicalIntRegs(_numLogicalIntRegs), + numPhysicalIntRegs(_numPhysicalIntRegs), + numLogicalFloatRegs(_numLogicalFloatRegs), + numPhysicalFloatRegs(_numPhysicalFloatRegs), + numMiscRegs(_numMiscRegs), + zeroRegIdx(_zeroRegIdx) +{ + //Get Register Sizes + numLogicalRegs = numLogicalIntRegs + numLogicalFloatRegs; + numPhysicalRegs = numPhysicalIntRegs + numPhysicalFloatRegs; + + //Resize scoreboard appropriately + regScoreBoard.resize(numPhysicalRegs + (numMiscRegs * activeThreads)); + + //Initialize values + for (int i=0; i < numLogicalIntRegs * activeThreads; i++) { + regScoreBoard[i] = 1; + } + + for (int i= numPhysicalIntRegs; + i < numPhysicalIntRegs + (numLogicalFloatRegs * activeThreads); + i++) { + regScoreBoard[i] = 1; + } + + for (int i = numPhysicalRegs; + i < numPhysicalRegs + (numMiscRegs * activeThreads); + i++) { + regScoreBoard[i] = 1; + } +} + +std::string +Scoreboard::name() const +{ + return "cpu.scoreboard"; +} + +bool +Scoreboard::getReg(PhysRegIndex phys_reg) +{ + // Always ready if int or fp zero reg. + if (phys_reg == zeroRegIdx || + phys_reg == (zeroRegIdx + numPhysicalIntRegs)) { + return 1; + } + + return regScoreBoard[phys_reg]; +} + +void +Scoreboard::setReg(PhysRegIndex phys_reg) +{ + DPRINTF(Scoreboard, "Setting reg %i as ready\n", phys_reg); + + regScoreBoard[phys_reg] = 1; +} + +void +Scoreboard::unsetReg(PhysRegIndex ready_reg) +{ + if (ready_reg == zeroRegIdx || + ready_reg == (zeroRegIdx + numPhysicalIntRegs)) { + // Don't do anything if int or fp zero reg. + return; + } + + regScoreBoard[ready_reg] = 0; +} diff --git a/src/cpu/o3/scoreboard.hh b/src/cpu/o3/scoreboard.hh new file mode 100644 index 000000000..77f2cf157 --- /dev/null +++ b/src/cpu/o3/scoreboard.hh @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_O3_SCOREBOARD_HH__ +#define __CPU_O3_SCOREBOARD_HH__ + +#include +#include +#include +#include "arch/alpha/isa_traits.hh" +#include "base/trace.hh" +#include "base/traceflags.hh" +#include "cpu/o3/comm.hh" + +/** + * Implements a simple scoreboard to track which registers are ready. + * This class assumes that the fp registers start, index wise, right after + * the integer registers. The misc. registers start, index wise, right after + * the fp registers. + * @todo: Fix up handling of the zero register in case the decoder does not + * automatically make insts that write the zero register into nops. + */ +class Scoreboard +{ + public: + /** Constructs a scoreboard. + * @param activeThreads The number of active threads. + * @param _numLogicalIntRegs Number of logical integer registers. + * @param _numPhysicalIntRegs Number of physical integer registers. + * @param _numLogicalFloatRegs Number of logical fp registers. + * @param _numPhysicalFloatRegs Number of physical fp registers. + * @param _numMiscRegs Number of miscellaneous registers. + * @param _zeroRegIdx Index of the zero register. + */ + Scoreboard(unsigned activeThreads, + unsigned _numLogicalIntRegs, + unsigned _numPhysicalIntRegs, + unsigned _numLogicalFloatRegs, + unsigned _numPhysicalFloatRegs, + unsigned _numMiscRegs, + unsigned _zeroRegIdx); + + /** Destructor. */ + ~Scoreboard() {} + + /** Returns the name of the scoreboard. */ + std::string name() const; + + /** Checks if the register is ready. */ + bool getReg(PhysRegIndex ready_reg); + + /** Sets the register as ready. */ + void setReg(PhysRegIndex phys_reg); + + /** Sets the register as not ready. */ + void unsetReg(PhysRegIndex ready_reg); + + private: + /** Scoreboard of physical integer registers, saying whether or not they + * are ready. + */ + std::vector regScoreBoard; + + /** Number of logical integer registers. */ + int numLogicalIntRegs; + + /** Number of physical integer registers. */ + int numPhysicalIntRegs; + + /** Number of logical floating point registers. */ + int numLogicalFloatRegs; + + /** Number of physical floating point registers. */ + int numPhysicalFloatRegs; + + /** Number of miscellaneous registers. */ + int numMiscRegs; + + /** Number of logical integer + float registers. */ + int numLogicalRegs; + + /** Number of physical integer + float registers. */ + int numPhysicalRegs; + + /** The logical index of the zero register. */ + int zeroRegIdx; +}; + +#endif diff --git a/src/cpu/o3/thread_state.hh b/src/cpu/o3/thread_state.hh new file mode 100644 index 000000000..9101eafb9 --- /dev/null +++ b/src/cpu/o3/thread_state.hh @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_O3_THREAD_STATE_HH__ +#define __CPU_O3_THREAD_STATE_HH__ + +#include "arch/faults.hh" +#include "arch/isa_traits.hh" +#include "cpu/exec_context.hh" +#include "cpu/thread_state.hh" + +class Event; +class Process; + +#if FULL_SYSTEM +class EndQuiesceEvent; +class FunctionProfile; +class ProfileNode; +#else +class FunctionalMemory; +class Process; +#endif + +/** + * Class that has various thread state, such as the status, the + * current instruction being processed, whether or not the thread has + * a trap pending or is being externally updated, the ExecContext + * proxy pointer, etc. It also handles anything related to a specific + * thread's process, such as syscalls and checking valid addresses. + */ +template +struct O3ThreadState : public ThreadState { + typedef ExecContext::Status Status; + typedef typename Impl::FullCPU FullCPU; + + Status _status; + + // Current instruction + TheISA::MachInst inst; + private: + FullCPU *cpu; + public: + + bool inSyscall; + + bool trapPending; + +#if FULL_SYSTEM + O3ThreadState(FullCPU *_cpu, int _thread_num, FunctionalMemory *_mem) + : ThreadState(-1, _thread_num, _mem), + inSyscall(0), trapPending(0) + { } +#else + O3ThreadState(FullCPU *_cpu, int _thread_num, Process *_process, int _asid) + : ThreadState(-1, _thread_num, NULL, _process, _asid), + cpu(_cpu), inSyscall(0), trapPending(0) + { } + + O3ThreadState(FullCPU *_cpu, int _thread_num, FunctionalMemory *_mem, + int _asid) + : ThreadState(-1, _thread_num, _mem, NULL, _asid), + cpu(_cpu), inSyscall(0), trapPending(0) + { } +#endif + + ExecContext *xcProxy; + + ExecContext *getXCProxy() { return xcProxy; } + + Status status() const { return _status; } + + void setStatus(Status new_status) { _status = new_status; } + + bool misspeculating() { return false; } + + void setInst(TheISA::MachInst _inst) { inst = _inst; } + + Counter readFuncExeInst() { return funcExeInst; } + + void setFuncExeInst(Counter new_val) { funcExeInst = new_val; } + +#if !FULL_SYSTEM + void syscall(int64_t callnum) { process->syscall(callnum, xcProxy); } +#endif +}; + +#endif // __CPU_O3_THREAD_STATE_HH__ diff --git a/src/cpu/op_class.hh b/src/cpu/op_class.hh index cdb40a0fb..415b9098c 100644 --- a/src/cpu/op_class.hh +++ b/src/cpu/op_class.hh @@ -59,6 +59,6 @@ enum OpClass { /** * Array mapping OpClass enum values to strings. Defined in op_class.cc. */ -extern const char *opClassStrings[]; +extern const char *opClassStrings[Num_OpClasses]; #endif // __CPU__OP_CLASS_HH__ diff --git a/src/cpu/ozone/back_end.cc b/src/cpu/ozone/back_end.cc new file mode 100644 index 000000000..cb014e4cc --- /dev/null +++ b/src/cpu/ozone/back_end.cc @@ -0,0 +1,5 @@ + +#include "cpu/ozone/back_end_impl.hh" +#include "cpu/ozone/ozone_impl.hh" + +//template class BackEnd; diff --git a/src/cpu/ozone/back_end.hh b/src/cpu/ozone/back_end.hh new file mode 100644 index 000000000..63823363e --- /dev/null +++ b/src/cpu/ozone/back_end.hh @@ -0,0 +1,514 @@ + +#ifndef __CPU_OZONE_BACK_END_HH__ +#define __CPU_OZONE_BACK_END_HH__ + +#include +#include +#include + +#include "arch/faults.hh" +#include "base/timebuf.hh" +#include "cpu/inst_seq.hh" +#include "cpu/ozone/rename_table.hh" +#include "cpu/ozone/thread_state.hh" +#include "mem/request.hh" +#include "sim/eventq.hh" + +class ExecContext; + +template +class OzoneThreadState; + +template +class BackEnd +{ + public: + typedef OzoneThreadState Thread; + + typedef typename Impl::Params Params; + typedef typename Impl::DynInst DynInst; + typedef typename Impl::DynInstPtr DynInstPtr; + typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::FrontEnd FrontEnd; + typedef typename Impl::FullCPU::CommStruct CommStruct; + + struct SizeStruct { + int size; + }; + + typedef SizeStruct DispatchToIssue; + typedef SizeStruct IssueToExec; + typedef SizeStruct ExecToCommit; + typedef SizeStruct Writeback; + + TimeBuffer d2i; + typename TimeBuffer::wire instsToDispatch; + TimeBuffer i2e; + typename TimeBuffer::wire instsToExecute; + TimeBuffer e2c; + TimeBuffer numInstsToWB; + + TimeBuffer *comm; + typename TimeBuffer::wire toIEW; + typename TimeBuffer::wire fromCommit; + + class InstQueue { + enum queue { + NonSpec, + IQ, + ToBeScheduled, + ReadyList, + ReplayList + }; + struct pqCompare { + bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const + { + return lhs->seqNum > rhs->seqNum; + } + }; + public: + InstQueue(Params *params); + + std::string name() const; + + void regStats(); + + void setIssueExecQueue(TimeBuffer *i2e_queue); + + void setBE(BackEnd *_be) { be = _be; } + + void insert(DynInstPtr &inst); + + void scheduleReadyInsts(); + + void scheduleNonSpec(const InstSeqNum &sn); + + DynInstPtr getReadyInst(); + + void commit(const InstSeqNum &sn) {} + + void squash(const InstSeqNum &sn); + + int wakeDependents(DynInstPtr &inst); + + /** Tells memory dependence unit that a memory instruction needs to be + * rescheduled. It will re-execute once replayMemInst() is called. + */ + void rescheduleMemInst(DynInstPtr &inst); + + /** Re-executes all rescheduled memory instructions. */ + void replayMemInst(DynInstPtr &inst); + + /** Completes memory instruction. */ + void completeMemInst(DynInstPtr &inst); + + void violation(DynInstPtr &inst, DynInstPtr &violation) { } + + bool isFull() { return numInsts >= size; } + + void dumpInsts(); + + private: + bool find(queue q, typename std::list::iterator it); + BackEnd *be; + TimeBuffer *i2e; + typename TimeBuffer::wire numIssued; + typedef typename std::list InstList; + typedef typename std::list::iterator InstListIt; + typedef typename std::priority_queue, pqCompare> ReadyInstQueue; + // Not sure I need the IQ list; it just needs to be a count. + InstList iq; + InstList toBeScheduled; + InstList readyList; + InstList nonSpec; + InstList replayList; + ReadyInstQueue readyQueue; + public: + int size; + int numInsts; + int width; + + Stats::VectorDistribution<> occ_dist; + + Stats::Vector<> inst_count; + Stats::Vector<> peak_inst_count; + Stats::Scalar<> empty_count; + Stats::Scalar<> current_count; + Stats::Scalar<> fullCount; + + Stats::Formula occ_rate; + Stats::Formula avg_residency; + Stats::Formula empty_rate; + Stats::Formula full_rate; + }; + + /** LdWriteback event for a load completion. */ + class LdWritebackEvent : public Event { + private: + /** Instruction that is writing back data to the register file. */ + DynInstPtr inst; + /** Pointer to IEW stage. */ + BackEnd *be; + + public: + /** Constructs a load writeback event. */ + LdWritebackEvent(DynInstPtr &_inst, BackEnd *be); + + /** Processes writeback event. */ + virtual void process(); + /** Returns the description of the writeback event. */ + virtual const char *description(); + }; + + BackEnd(Params *params); + + std::string name() const; + + void regStats(); + + void setCPU(FullCPU *cpu_ptr) + { cpu = cpu_ptr; } + + void setFrontEnd(FrontEnd *front_end_ptr) + { frontEnd = front_end_ptr; } + + void setXC(ExecContext *xc_ptr) + { xc = xc_ptr; } + + void setThreadState(Thread *thread_ptr) + { thread = thread_ptr; } + + void setCommBuffer(TimeBuffer *_comm); + + void tick(); + void squash(); + void squashFromXC(); + bool xcSquash; + + template + Fault read(RequestPtr req, T &data, int load_idx); + + template + Fault write(RequestPtr req, T &data, int store_idx); + + Addr readCommitPC() { return commitPC; } + + Addr commitPC; + + bool robEmpty() { return instList.empty(); } + + bool isFull() { return numInsts >= numROBEntries; } + bool isBlocked() { return status == Blocked || dispatchStatus == Blocked; } + + /** Tells memory dependence unit that a memory instruction needs to be + * rescheduled. It will re-execute once replayMemInst() is called. + */ + void rescheduleMemInst(DynInstPtr &inst) + { IQ.rescheduleMemInst(inst); } + + /** Re-executes all rescheduled memory instructions. */ + void replayMemInst(DynInstPtr &inst) + { IQ.replayMemInst(inst); } + + /** Completes memory instruction. */ + void completeMemInst(DynInstPtr &inst) + { IQ.completeMemInst(inst); } + + void fetchFault(Fault &fault); + + private: + void updateStructures(); + void dispatchInsts(); + void dispatchStall(); + void checkDispatchStatus(); + void scheduleReadyInsts(); + void executeInsts(); + void commitInsts(); + void addToIQ(DynInstPtr &inst); + void addToLSQ(DynInstPtr &inst); + void instToCommit(DynInstPtr &inst); + void writebackInsts(); + bool commitInst(int inst_num); + void squash(const InstSeqNum &sn); + void squashDueToBranch(DynInstPtr &inst); + void squashDueToMemBlocked(DynInstPtr &inst); + void updateExeInstStats(DynInstPtr &inst); + void updateComInstStats(DynInstPtr &inst); + + public: + FullCPU *cpu; + + FrontEnd *frontEnd; + + ExecContext *xc; + + Thread *thread; + + enum Status { + Running, + Idle, + DcacheMissStall, + DcacheMissComplete, + Blocked + }; + + Status status; + + Status dispatchStatus; + + Counter funcExeInst; + + private: +// typedef typename Impl::InstQueue InstQueue; + + InstQueue IQ; + + typedef typename Impl::LdstQueue LdstQueue; + + LdstQueue LSQ; + public: + RenameTable commitRenameTable; + + RenameTable renameTable; + private: + class DCacheCompletionEvent : public Event + { + private: + BackEnd *be; + + public: + DCacheCompletionEvent(BackEnd *_be); + + virtual void process(); + virtual const char *description(); + }; + + friend class DCacheCompletionEvent; + + DCacheCompletionEvent cacheCompletionEvent; + + MemInterface *dcacheInterface; + + Request *memReq; + + // General back end width. Used if the more specific isn't given. + int width; + + // Dispatch width. + int dispatchWidth; + int numDispatchEntries; + int dispatchSize; + + int issueWidth; + + // Writeback width + int wbWidth; + + // Commit width + int commitWidth; + + /** Index into queue of instructions being written back. */ + unsigned wbNumInst; + + /** Cycle number within the queue of instructions being written + * back. Used in case there are too many instructions writing + * back at the current cycle and writesbacks need to be scheduled + * for the future. See comments in instToCommit(). + */ + unsigned wbCycle; + + int numROBEntries; + int numInsts; + + bool squashPending; + InstSeqNum squashSeqNum; + Addr squashNextPC; + + Fault faultFromFetch; + + private: + typedef typename std::list::iterator InstListIt; + + std::list instList; + std::list dispatch; + std::list writeback; + + int latency; + + int squashLatency; + + bool exactFullStall; + + bool fetchRedirect[Impl::MaxThreads]; + + // number of cycles stalled for D-cache misses +/* Stats::Scalar<> dcacheStallCycles; + Counter lastDcacheStall; +*/ + Stats::Vector<> rob_cap_events; + Stats::Vector<> rob_cap_inst_count; + Stats::Vector<> iq_cap_events; + Stats::Vector<> iq_cap_inst_count; + // total number of instructions executed + Stats::Vector<> exe_inst; + Stats::Vector<> exe_swp; + Stats::Vector<> exe_nop; + Stats::Vector<> exe_refs; + Stats::Vector<> exe_loads; + Stats::Vector<> exe_branches; + + Stats::Vector<> issued_ops; + + // total number of loads forwaded from LSQ stores + Stats::Vector<> lsq_forw_loads; + + // total number of loads ignored due to invalid addresses + Stats::Vector<> inv_addr_loads; + + // total number of software prefetches ignored due to invalid addresses + Stats::Vector<> inv_addr_swpfs; + // ready loads blocked due to memory disambiguation + Stats::Vector<> lsq_blocked_loads; + + Stats::Scalar<> lsqInversion; + + Stats::Vector<> n_issued_dist; + Stats::VectorDistribution<> issue_delay_dist; + + Stats::VectorDistribution<> queue_res_dist; +/* + Stats::Vector<> stat_fu_busy; + Stats::Vector2d<> stat_fuBusy; + Stats::Vector<> dist_unissued; + Stats::Vector2d<> stat_issued_inst_type; + + Stats::Formula misspec_cnt; + Stats::Formula misspec_ipc; + Stats::Formula issue_rate; + Stats::Formula issue_stores; + Stats::Formula issue_op_rate; + Stats::Formula fu_busy_rate; + Stats::Formula commit_stores; + Stats::Formula commit_ipc; + Stats::Formula commit_ipb; + Stats::Formula lsq_inv_rate; +*/ + Stats::Vector<> writeback_count; + Stats::Vector<> producer_inst; + Stats::Vector<> consumer_inst; + Stats::Vector<> wb_penalized; + + Stats::Formula wb_rate; + Stats::Formula wb_fanout; + Stats::Formula wb_penalized_rate; + + // total number of instructions committed + Stats::Vector<> stat_com_inst; + Stats::Vector<> stat_com_swp; + Stats::Vector<> stat_com_refs; + Stats::Vector<> stat_com_loads; + Stats::Vector<> stat_com_membars; + Stats::Vector<> stat_com_branches; + + Stats::Distribution<> n_committed_dist; + + Stats::Scalar<> commit_eligible_samples; + Stats::Vector<> commit_eligible; + + Stats::Scalar<> ROB_fcount; + Stats::Formula ROB_full_rate; + + Stats::Vector<> ROB_count; // cumulative ROB occupancy + Stats::Formula ROB_occ_rate; + Stats::VectorDistribution<> ROB_occ_dist; + public: + void dumpInsts(); +}; + +template +template +Fault +BackEnd::read(RequestPtr req, T &data, int load_idx) +{ +/* memReq->reset(addr, sizeof(T), flags); + + // translate to physical address + Fault fault = cpu->translateDataReadReq(memReq); + + // if we have a cache, do cache access too + if (fault == NoFault && dcacheInterface) { + memReq->cmd = Read; + memReq->completionEvent = NULL; + memReq->time = curTick; + memReq->flags &= ~INST_READ; + MemAccessResult result = dcacheInterface->access(memReq); + + // Ugly hack to get an event scheduled *only* if the access is + // a miss. We really should add first-class support for this + // at some point. + if (result != MA_HIT && dcacheInterface->doEvents()) { + // Fix this hack for keeping funcExeInst correct with loads that + // are executed twice. + --funcExeInst; + + memReq->completionEvent = &cacheCompletionEvent; + lastDcacheStall = curTick; +// unscheduleTickEvent(); +// status = DcacheMissStall; + DPRINTF(OzoneCPU, "Dcache miss stall!\n"); + } else { + // do functional access + fault = thread->mem->read(memReq, data); + + } + } +*/ +/* + if (!dcacheInterface && (memReq->flags & UNCACHEABLE)) + recordEvent("Uncached Read"); +*/ + return LSQ.read(req, data, load_idx); +} + +template +template +Fault +BackEnd::write(RequestPtr req, T &data, int store_idx) +{ +/* + memReq->reset(addr, sizeof(T), flags); + + // translate to physical address + Fault fault = cpu->translateDataWriteReq(memReq); + + if (fault == NoFault && dcacheInterface) { + memReq->cmd = Write; + memcpy(memReq->data,(uint8_t *)&data,memReq->size); + memReq->completionEvent = NULL; + memReq->time = curTick; + memReq->flags &= ~INST_READ; + MemAccessResult result = dcacheInterface->access(memReq); + + // Ugly hack to get an event scheduled *only* if the access is + // a miss. We really should add first-class support for this + // at some point. + if (result != MA_HIT && dcacheInterface->doEvents()) { + memReq->completionEvent = &cacheCompletionEvent; + lastDcacheStall = curTick; +// unscheduleTickEvent(); +// status = DcacheMissStall; + DPRINTF(OzoneCPU, "Dcache miss stall!\n"); + } + } + + if (res && (fault == NoFault)) + *res = memReq->result; + */ +/* + if (!dcacheInterface && (memReq->flags & UNCACHEABLE)) + recordEvent("Uncached Write"); +*/ + return LSQ.write(req, data, store_idx); +} + +#endif // __CPU_OZONE_BACK_END_HH__ diff --git a/src/cpu/ozone/back_end_impl.hh b/src/cpu/ozone/back_end_impl.hh new file mode 100644 index 000000000..36770d65c --- /dev/null +++ b/src/cpu/ozone/back_end_impl.hh @@ -0,0 +1,1904 @@ + +#include "encumbered/cpu/full/op_class.hh" +#include "cpu/ozone/back_end.hh" + +template +BackEnd::InstQueue::InstQueue(Params *params) + : size(params->numIQEntries), numInsts(0), width(params->issueWidth) +{ +} + +template +std::string +BackEnd::InstQueue::name() const +{ + return be->name() + ".iq"; +} + +template +void +BackEnd::InstQueue::regStats() +{ + using namespace Stats; + + occ_dist + .init(1, 0, size, 2) + .name(name() + "occ_dist") + .desc("IQ Occupancy per cycle") + .flags(total | cdf) + ; + + inst_count + .init(1) + .name(name() + "cum_num_insts") + .desc("Total occupancy") + .flags(total) + ; + + peak_inst_count + .init(1) + .name(name() + "peak_occupancy") + .desc("Peak IQ occupancy") + .flags(total) + ; + + current_count + .name(name() + "current_count") + .desc("Occupancy this cycle") + ; + + empty_count + .name(name() + "empty_count") + .desc("Number of empty cycles") + ; + + fullCount + .name(name() + "full_count") + .desc("Number of full cycles") + ; + + + occ_rate + .name(name() + "occ_rate") + .desc("Average occupancy") + .flags(total) + ; + occ_rate = inst_count / be->cpu->numCycles; + + avg_residency + .name(name() + "avg_residency") + .desc("Average IQ residency") + .flags(total) + ; + avg_residency = occ_rate / be->cpu->numCycles; + + empty_rate + .name(name() + "empty_rate") + .desc("Fraction of cycles empty") + ; + empty_rate = 100 * empty_count / be->cpu->numCycles; + + full_rate + .name(name() + "full_rate") + .desc("Fraction of cycles full") + ; + full_rate = 100 * fullCount / be->cpu->numCycles; +} + +template +void +BackEnd::InstQueue::setIssueExecQueue(TimeBuffer *i2e_queue) +{ + i2e = i2e_queue; + numIssued = i2e->getWire(0); +} + +template +void +BackEnd::InstQueue::insert(DynInstPtr &inst) +{ + numInsts++; + inst_count[0]++; + if (!inst->isNonSpeculative()) { + DPRINTF(BE, "Instruction [sn:%lli] added to IQ\n", inst->seqNum); + if (inst->readyToIssue()) { + toBeScheduled.push_front(inst); + inst->iqIt = toBeScheduled.begin(); + inst->iqItValid = true; + } else { + iq.push_front(inst); + inst->iqIt = iq.begin(); + inst->iqItValid = true; + } + } else { + DPRINTF(BE, "Nonspeculative instruction [sn:%lli] added to IQ\n", inst->seqNum); + nonSpec.push_front(inst); + inst->iqIt = nonSpec.begin(); + inst->iqItValid = true; + } +} + +template +void +BackEnd::InstQueue::scheduleReadyInsts() +{ + int scheduled = numIssued->size; + InstListIt iq_it = --toBeScheduled.end(); + InstListIt iq_end_it = toBeScheduled.end(); + + while (iq_it != iq_end_it && scheduled < width) { +// if ((*iq_it)->readyToIssue()) { + DPRINTF(BE, "Instruction [sn:%lli] PC:%#x is ready\n", + (*iq_it)->seqNum, (*iq_it)->readPC()); + readyQueue.push(*iq_it); + readyList.push_front(*iq_it); + + (*iq_it)->iqIt = readyList.begin(); + + toBeScheduled.erase(iq_it--); + + ++scheduled; +// } else { +// iq_it++; +// } + } + + numIssued->size+= scheduled; +} + +template +void +BackEnd::InstQueue::scheduleNonSpec(const InstSeqNum &sn) +{ +/* + InstListIt non_spec_it = nonSpec.begin(); + InstListIt non_spec_end_it = nonSpec.end(); + + while ((*non_spec_it)->seqNum != sn) { + non_spec_it++; + assert(non_spec_it != non_spec_end_it); + } +*/ + DynInstPtr inst = nonSpec.back(); + + DPRINTF(BE, "Nonspeculative instruction [sn:%lli] scheduled\n", inst->seqNum); + + assert(inst->seqNum == sn); + + assert(find(NonSpec, inst->iqIt)); + nonSpec.erase(inst->iqIt); + readyList.push_front(inst); + inst->iqIt = readyList.begin(); + readyQueue.push(inst); + numIssued->size++; +} + +template +typename Impl::DynInstPtr +BackEnd::InstQueue::getReadyInst() +{ + assert(!readyList.empty()); + + DynInstPtr inst = readyQueue.top(); + readyQueue.pop(); + assert(find(ReadyList, inst->iqIt)); + readyList.erase(inst->iqIt); + inst->iqItValid = false; +// if (!inst->isMemRef()) + --numInsts; + return inst; +} + +template +void +BackEnd::InstQueue::squash(const InstSeqNum &sn) +{ + InstListIt iq_it = iq.begin(); + InstListIt iq_end_it = iq.end(); + + while (iq_it != iq_end_it && (*iq_it)->seqNum > sn) { + DPRINTF(BE, "Instruction [sn:%lli] removed from IQ\n", (*iq_it)->seqNum); + (*iq_it)->iqItValid = false; + iq.erase(iq_it++); + --numInsts; + } + + iq_it = nonSpec.begin(); + iq_end_it = nonSpec.end(); + + while (iq_it != iq_end_it && (*iq_it)->seqNum > sn) { + DPRINTF(BE, "Instruction [sn:%lli] removed from IQ\n", (*iq_it)->seqNum); + (*iq_it)->iqItValid = false; + nonSpec.erase(iq_it++); + --numInsts; + } + + iq_it = replayList.begin(); + iq_end_it = replayList.end(); + + while (iq_it != iq_end_it) { + if ((*iq_it)->seqNum > sn) { + DPRINTF(BE, "Instruction [sn:%lli] removed from IQ\n", (*iq_it)->seqNum); + (*iq_it)->iqItValid = false; + replayList.erase(iq_it++); + --numInsts; + } else { + iq_it++; + } + } + + assert(numInsts >= 0); +/* + InstListIt ready_it = readyList.begin(); + InstListIt ready_end_it = readyList.end(); + + while (ready_it != ready_end_it) { + if ((*ready_it)->seqNum > sn) { + readyList.erase(ready_it++); + } else { + ready_it++; + } + } +*/ +} + +template +int +BackEnd::InstQueue::wakeDependents(DynInstPtr &inst) +{ + assert(!inst->isSquashed()); + std::vector &dependents = inst->getDependents(); + int num_outputs = dependents.size(); + + DPRINTF(BE, "Waking instruction [sn:%lli] dependents in IQ\n", inst->seqNum); + + for (int i = 0; i < num_outputs; i++) { + DynInstPtr dep_inst = dependents[i]; + dep_inst->markSrcRegReady(); + DPRINTF(BE, "Marking source reg ready [sn:%lli] in IQ\n", dep_inst->seqNum); + + if (dep_inst->readyToIssue() && dep_inst->iqItValid) { + if (dep_inst->isNonSpeculative()) { + assert(find(NonSpec, dep_inst->iqIt)); + nonSpec.erase(dep_inst->iqIt); + } else { + assert(find(IQ, dep_inst->iqIt)); + iq.erase(dep_inst->iqIt); + } + + toBeScheduled.push_front(dep_inst); + dep_inst->iqIt = toBeScheduled.begin(); + } + } + return num_outputs; +} + +template +void +BackEnd::InstQueue::rescheduleMemInst(DynInstPtr &inst) +{ + DPRINTF(BE, "Rescheduling memory instruction [sn:%lli]\n", inst->seqNum); + assert(!inst->iqItValid); + replayList.push_front(inst); + inst->iqIt = replayList.begin(); + inst->iqItValid = true; + ++numInsts; +} + +template +void +BackEnd::InstQueue::replayMemInst(DynInstPtr &inst) +{ + DPRINTF(BE, "Replaying memory instruction [sn:%lli]\n", inst->seqNum); + assert(find(ReplayList, inst->iqIt)); + InstListIt iq_it = --replayList.end(); + InstListIt iq_end_it = replayList.end(); + while (iq_it != iq_end_it) { + DynInstPtr rescheduled_inst = (*iq_it); + + DPRINTF(BE, "Memory instruction [sn:%lli] also replayed\n", inst->seqNum); + replayList.erase(iq_it--); + toBeScheduled.push_front(rescheduled_inst); + rescheduled_inst->iqIt = toBeScheduled.begin(); + } +} + +template +void +BackEnd::InstQueue::completeMemInst(DynInstPtr &inst) +{ + panic("Not implemented."); +} + +template +bool +BackEnd::InstQueue::find(queue q, InstListIt it) +{ + InstListIt iq_it, iq_end_it; + switch(q) { + case NonSpec: + iq_it = nonSpec.begin(); + iq_end_it = nonSpec.end(); + break; + case IQ: + iq_it = iq.begin(); + iq_end_it = iq.end(); + break; + case ToBeScheduled: + iq_it = toBeScheduled.begin(); + iq_end_it = toBeScheduled.end(); + break; + case ReadyList: + iq_it = readyList.begin(); + iq_end_it = readyList.end(); + break; + case ReplayList: + iq_it = replayList.begin(); + iq_end_it = replayList.end(); + } + + while (iq_it != it && iq_it != iq_end_it) { + iq_it++; + } + if (iq_it == it) { + return true; + } else { + return false; + } +} + +template +void +BackEnd::InstQueue::dumpInsts() +{ + cprintf("IQ size: %i\n", iq.size()); + + InstListIt inst_list_it = --iq.end(); + + int num = 0; + int valid_num = 0; + while (inst_list_it != iq.end()) + { + cprintf("Instruction:%i\n", + num); + if (!(*inst_list_it)->isSquashed()) { + if (!(*inst_list_it)->isIssued()) { + ++valid_num; + cprintf("Count:%i\n", valid_num); + } else if ((*inst_list_it)->isMemRef() && + !(*inst_list_it)->memOpDone) { + // Loads that have not been marked as executed still count + // towards the total instructions. + ++valid_num; + cprintf("Count:%i\n", valid_num); + } + } + + cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n" + "Issued:%i\nSquashed:%i\n", + (*inst_list_it)->readPC(), + (*inst_list_it)->seqNum, + (*inst_list_it)->threadNumber, + (*inst_list_it)->isIssued(), + (*inst_list_it)->isSquashed()); + + if ((*inst_list_it)->isMemRef()) { + cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone); + } + + cprintf("\n"); + + inst_list_it--; + ++num; + } + + cprintf("nonSpec size: %i\n", nonSpec.size()); + + inst_list_it = --nonSpec.end(); + + while (inst_list_it != nonSpec.end()) + { + cprintf("Instruction:%i\n", + num); + if (!(*inst_list_it)->isSquashed()) { + if (!(*inst_list_it)->isIssued()) { + ++valid_num; + cprintf("Count:%i\n", valid_num); + } else if ((*inst_list_it)->isMemRef() && + !(*inst_list_it)->memOpDone) { + // Loads that have not been marked as executed still count + // towards the total instructions. + ++valid_num; + cprintf("Count:%i\n", valid_num); + } + } + + cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n" + "Issued:%i\nSquashed:%i\n", + (*inst_list_it)->readPC(), + (*inst_list_it)->seqNum, + (*inst_list_it)->threadNumber, + (*inst_list_it)->isIssued(), + (*inst_list_it)->isSquashed()); + + if ((*inst_list_it)->isMemRef()) { + cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone); + } + + cprintf("\n"); + + inst_list_it--; + ++num; + } + + cprintf("toBeScheduled size: %i\n", toBeScheduled.size()); + + inst_list_it = --toBeScheduled.end(); + + while (inst_list_it != toBeScheduled.end()) + { + cprintf("Instruction:%i\n", + num); + if (!(*inst_list_it)->isSquashed()) { + if (!(*inst_list_it)->isIssued()) { + ++valid_num; + cprintf("Count:%i\n", valid_num); + } else if ((*inst_list_it)->isMemRef() && + !(*inst_list_it)->memOpDone) { + // Loads that have not been marked as executed still count + // towards the total instructions. + ++valid_num; + cprintf("Count:%i\n", valid_num); + } + } + + cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n" + "Issued:%i\nSquashed:%i\n", + (*inst_list_it)->readPC(), + (*inst_list_it)->seqNum, + (*inst_list_it)->threadNumber, + (*inst_list_it)->isIssued(), + (*inst_list_it)->isSquashed()); + + if ((*inst_list_it)->isMemRef()) { + cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone); + } + + cprintf("\n"); + + inst_list_it--; + ++num; + } + + cprintf("readyList size: %i\n", readyList.size()); + + inst_list_it = --readyList.end(); + + while (inst_list_it != readyList.end()) + { + cprintf("Instruction:%i\n", + num); + if (!(*inst_list_it)->isSquashed()) { + if (!(*inst_list_it)->isIssued()) { + ++valid_num; + cprintf("Count:%i\n", valid_num); + } else if ((*inst_list_it)->isMemRef() && + !(*inst_list_it)->memOpDone) { + // Loads that have not been marked as executed still count + // towards the total instructions. + ++valid_num; + cprintf("Count:%i\n", valid_num); + } + } + + cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n" + "Issued:%i\nSquashed:%i\n", + (*inst_list_it)->readPC(), + (*inst_list_it)->seqNum, + (*inst_list_it)->threadNumber, + (*inst_list_it)->isIssued(), + (*inst_list_it)->isSquashed()); + + if ((*inst_list_it)->isMemRef()) { + cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone); + } + + cprintf("\n"); + + inst_list_it--; + ++num; + } +} + +template +BackEnd::LdWritebackEvent::LdWritebackEvent(DynInstPtr &_inst, + BackEnd *_be) + : Event(&mainEventQueue), inst(_inst), be(_be) +{ + this->setFlags(Event::AutoDelete); +} + +template +void +BackEnd::LdWritebackEvent::process() +{ + DPRINTF(BE, "Load writeback event [sn:%lli]\n", inst->seqNum); +// DPRINTF(Activity, "Activity: Ld Writeback event [sn:%lli]\n", inst->seqNum); + + //iewStage->ldstQueue.removeMSHR(inst->threadNumber,inst->seqNum); + +// iewStage->wakeCPU(); + + if (inst->isSquashed()) { + inst = NULL; + return; + } + + if (!inst->isExecuted()) { + inst->setExecuted(); + + // Execute again to copy data to proper place. + inst->completeAcc(); + } + + // Need to insert instruction into queue to commit + be->instToCommit(inst); + + //wroteToTimeBuffer = true; +// iewStage->activityThisCycle(); + + inst = NULL; +} + +template +const char * +BackEnd::LdWritebackEvent::description() +{ + return "Load writeback event"; +} + + +template +BackEnd::DCacheCompletionEvent::DCacheCompletionEvent(BackEnd *_be) + : Event(&mainEventQueue, CPU_Tick_Pri), be(_be) +{ +} + +template +void +BackEnd::DCacheCompletionEvent::process() +{ +} + +template +const char * +BackEnd::DCacheCompletionEvent::description() +{ + return "Cache completion event"; +} + +template +BackEnd::BackEnd(Params *params) + : d2i(5, 5), i2e(5, 5), e2c(5, 5), numInstsToWB(5, 5), + xcSquash(false), IQ(params), + cacheCompletionEvent(this), width(params->backEndWidth), + exactFullStall(true) +{ + numROBEntries = params->numROBEntries; + numInsts = 0; + numDispatchEntries = 32; + IQ.setBE(this); + LSQ.setBE(this); + + // Setup IQ and LSQ with their parameters here. + instsToDispatch = d2i.getWire(-1); + + instsToExecute = i2e.getWire(-1); + + IQ.setIssueExecQueue(&i2e); + + dispatchWidth = params->dispatchWidth ? params->dispatchWidth : width; + issueWidth = params->issueWidth ? params->issueWidth : width; + wbWidth = params->wbWidth ? params->wbWidth : width; + commitWidth = params->commitWidth ? params->commitWidth : width; + + LSQ.init(params, params->LQEntries, params->SQEntries, 0); + + dispatchStatus = Running; +} + +template +std::string +BackEnd::name() const +{ + return cpu->name() + ".backend"; +} + +template +void +BackEnd::regStats() +{ + using namespace Stats; + rob_cap_events + .init(cpu->number_of_threads) + .name(name() + ".ROB:cap_events") + .desc("number of cycles where ROB cap was active") + .flags(total) + ; + + rob_cap_inst_count + .init(cpu->number_of_threads) + .name(name() + ".ROB:cap_inst") + .desc("number of instructions held up by ROB cap") + .flags(total) + ; + + iq_cap_events + .init(cpu->number_of_threads) + .name(name() +".IQ:cap_events" ) + .desc("number of cycles where IQ cap was active") + .flags(total) + ; + + iq_cap_inst_count + .init(cpu->number_of_threads) + .name(name() + ".IQ:cap_inst") + .desc("number of instructions held up by IQ cap") + .flags(total) + ; + + + exe_inst + .init(cpu->number_of_threads) + .name(name() + ".ISSUE:count") + .desc("number of insts issued") + .flags(total) + ; + + exe_swp + .init(cpu->number_of_threads) + .name(name() + ".ISSUE:swp") + .desc("number of swp insts issued") + .flags(total) + ; + + exe_nop + .init(cpu->number_of_threads) + .name(name() + ".ISSUE:nop") + .desc("number of nop insts issued") + .flags(total) + ; + + exe_refs + .init(cpu->number_of_threads) + .name(name() + ".ISSUE:refs") + .desc("number of memory reference insts issued") + .flags(total) + ; + + exe_loads + .init(cpu->number_of_threads) + .name(name() + ".ISSUE:loads") + .desc("number of load insts issued") + .flags(total) + ; + + exe_branches + .init(cpu->number_of_threads) + .name(name() + ".ISSUE:branches") + .desc("Number of branches issued") + .flags(total) + ; + + issued_ops + .init(cpu->number_of_threads) + .name(name() + ".ISSUE:op_count") + .desc("number of insts issued") + .flags(total) + ; + +/* + for (int i=0; inumber_of_threads) + .name(name() + ".LSQ:forw_loads") + .desc("number of loads forwarded via LSQ") + .flags(total) + ; + + inv_addr_loads + .init(cpu->number_of_threads) + .name(name() + ".ISSUE:addr_loads") + .desc("number of invalid-address loads") + .flags(total) + ; + + inv_addr_swpfs + .init(cpu->number_of_threads) + .name(name() + ".ISSUE:addr_swpfs") + .desc("number of invalid-address SW prefetches") + .flags(total) + ; + + lsq_blocked_loads + .init(cpu->number_of_threads) + .name(name() + ".LSQ:blocked_loads") + .desc("number of ready loads not issued due to memory disambiguation") + .flags(total) + ; + + lsqInversion + .name(name() + ".ISSUE:lsq_invert") + .desc("Number of times LSQ instruction issued early") + ; + + n_issued_dist + .init(issueWidth + 1) + .name(name() + ".ISSUE:issued_per_cycle") + .desc("Number of insts issued each cycle") + .flags(total | pdf | dist) + ; + issue_delay_dist + .init(Num_OpClasses,0,99,2) + .name(name() + ".ISSUE:") + .desc("cycles from operands ready to issue") + .flags(pdf | cdf) + ; + + queue_res_dist + .init(Num_OpClasses, 0, 99, 2) + .name(name() + ".IQ:residence:") + .desc("cycles from dispatch to issue") + .flags(total | pdf | cdf ) + ; + for (int i = 0; i < Num_OpClasses; ++i) { + queue_res_dist.subname(i, opClassStrings[i]); + } + + writeback_count + .init(cpu->number_of_threads) + .name(name() + ".WB:count") + .desc("cumulative count of insts written-back") + .flags(total) + ; + + producer_inst + .init(cpu->number_of_threads) + .name(name() + ".WB:producers") + .desc("num instructions producing a value") + .flags(total) + ; + + consumer_inst + .init(cpu->number_of_threads) + .name(name() + ".WB:consumers") + .desc("num instructions consuming a value") + .flags(total) + ; + + wb_penalized + .init(cpu->number_of_threads) + .name(name() + ".WB:penalized") + .desc("number of instrctions required to write to 'other' IQ") + .flags(total) + ; + + + wb_penalized_rate + .name(name() + ".WB:penalized_rate") + .desc ("fraction of instructions written-back that wrote to 'other' IQ") + .flags(total) + ; + + wb_penalized_rate = wb_penalized / writeback_count; + + wb_fanout + .name(name() + ".WB:fanout") + .desc("average fanout of values written-back") + .flags(total) + ; + + wb_fanout = producer_inst / consumer_inst; + + wb_rate + .name(name() + ".WB:rate") + .desc("insts written-back per cycle") + .flags(total) + ; + wb_rate = writeback_count / cpu->numCycles; + + stat_com_inst + .init(cpu->number_of_threads) + .name(name() + ".COM:count") + .desc("Number of instructions committed") + .flags(total) + ; + + stat_com_swp + .init(cpu->number_of_threads) + .name(name() + ".COM:swp_count") + .desc("Number of s/w prefetches committed") + .flags(total) + ; + + stat_com_refs + .init(cpu->number_of_threads) + .name(name() + ".COM:refs") + .desc("Number of memory references committed") + .flags(total) + ; + + stat_com_loads + .init(cpu->number_of_threads) + .name(name() + ".COM:loads") + .desc("Number of loads committed") + .flags(total) + ; + + stat_com_membars + .init(cpu->number_of_threads) + .name(name() + ".COM:membars") + .desc("Number of memory barriers committed") + .flags(total) + ; + + stat_com_branches + .init(cpu->number_of_threads) + .name(name() + ".COM:branches") + .desc("Number of branches committed") + .flags(total) + ; + n_committed_dist + .init(0,commitWidth,1) + .name(name() + ".COM:committed_per_cycle") + .desc("Number of insts commited each cycle") + .flags(pdf) + ; + + // + // Commit-Eligible instructions... + // + // -> The number of instructions eligible to commit in those + // cycles where we reached our commit BW limit (less the number + // actually committed) + // + // -> The average value is computed over ALL CYCLES... not just + // the BW limited cycles + // + // -> The standard deviation is computed only over cycles where + // we reached the BW limit + // + commit_eligible + .init(cpu->number_of_threads) + .name(name() + ".COM:bw_limited") + .desc("number of insts not committed due to BW limits") + .flags(total) + ; + + commit_eligible_samples + .name(name() + ".COM:bw_lim_events") + .desc("number cycles where commit BW limit reached") + ; + + ROB_fcount + .name(name() + ".ROB:full_count") + .desc("number of cycles where ROB was full") + ; + + ROB_count + .init(cpu->number_of_threads) + .name(name() + ".ROB:occupancy") + .desc(name() + ".ROB occupancy (cumulative)") + .flags(total) + ; + + ROB_full_rate + .name(name() + ".ROB:full_rate") + .desc("ROB full per cycle") + ; + ROB_full_rate = ROB_fcount / cpu->numCycles; + + ROB_occ_rate + .name(name() + ".ROB:occ_rate") + .desc("ROB occupancy rate") + .flags(total) + ; + ROB_occ_rate = ROB_count / cpu->numCycles; + + ROB_occ_dist + .init(cpu->number_of_threads,0,numROBEntries,2) + .name(name() + ".ROB:occ_dist") + .desc("ROB Occupancy per cycle") + .flags(total | cdf) + ; + + IQ.regStats(); +} + +template +void +BackEnd::setCommBuffer(TimeBuffer *_comm) +{ + comm = _comm; + toIEW = comm->getWire(0); + fromCommit = comm->getWire(-1); +} + +template +void +BackEnd::tick() +{ + DPRINTF(BE, "Ticking back end\n"); + + ROB_count[0]+= numInsts; + + wbCycle = 0; + + if (xcSquash) { + squashFromXC(); + } + + // Read in any done instruction information and update the IQ or LSQ. + updateStructures(); + + if (dispatchStatus != Blocked) { + d2i.advance(); + dispatchInsts(); + } else { + checkDispatchStatus(); + } + + i2e.advance(); + scheduleReadyInsts(); + + e2c.advance(); + executeInsts(); + + numInstsToWB.advance(); + writebackInsts(); + + commitInsts(); + + DPRINTF(BE, "IQ entries in use: %i, ROB entries in use: %i, LSQ loads: %i, LSQ stores: %i\n", + IQ.numInsts, numInsts, LSQ.numLoads(), LSQ.numStores()); + + assert(numInsts == instList.size()); +} + +template +void +BackEnd::updateStructures() +{ + if (fromCommit->doneSeqNum) { + IQ.commit(fromCommit->doneSeqNum); + LSQ.commitLoads(fromCommit->doneSeqNum); + LSQ.commitStores(fromCommit->doneSeqNum); + } + + if (fromCommit->nonSpecSeqNum) { + if (fromCommit->uncached) { + LSQ.executeLoad(fromCommit->lqIdx); + } else { + IQ.scheduleNonSpec( + fromCommit->nonSpecSeqNum); + } + } +} + +template +void +BackEnd::addToIQ(DynInstPtr &inst) +{ + // Do anything IQ specific here? + IQ.insert(inst); +} + +template +void +BackEnd::addToLSQ(DynInstPtr &inst) +{ + // Do anything LSQ specific here? + LSQ.insert(inst); +} + +template +void +BackEnd::dispatchInsts() +{ + DPRINTF(BE, "Trying to dispatch instructions.\n"); + + // Pull instructions out of the front end. + int disp_width = dispatchWidth ? dispatchWidth : width; + + // Could model dispatching time, but in general 1 cycle is probably + // good enough. + + if (dispatchSize < numDispatchEntries) { + for (int i = 0; i < disp_width; i++) { + // Get instructions + DynInstPtr inst = frontEnd->getInst(); + + if (!inst) { + // No more instructions to get + break; + } + + DPRINTF(BE, "Processing instruction [sn:%lli] PC:%#x\n", + inst->seqNum, inst->readPC()); + + for (int i = 0; i < inst->numDestRegs(); ++i) + renameTable[inst->destRegIdx(i)] = inst; + + // Add to queue to be dispatched. + dispatch.push_back(inst); + + d2i[0].size++; + ++dispatchSize; + } + } + + assert(dispatch.size() < 64); + + for (int i = 0; i < instsToDispatch->size; ++i) { + assert(!dispatch.empty()); + // Get instruction from front of time buffer + DynInstPtr inst = dispatch.front(); + dispatch.pop_front(); + --dispatchSize; + + if (inst->isSquashed()) + continue; + + ++numInsts; + instList.push_back(inst); + + DPRINTF(BE, "Dispatching instruction [sn:%lli] PC:%#x\n", + inst->seqNum, inst->readPC()); + + addToIQ(inst); + + if (inst->isMemRef()) { + addToLSQ(inst); + } + + if (inst->isNonSpeculative()) { + inst->setCanCommit(); + } + + // Check if IQ or LSQ is full. If so we'll need to break and stop + // removing instructions. Also update the number of insts to remove + // from the queue. + if (exactFullStall) { + bool stall = false; + if (IQ.isFull()) { + DPRINTF(BE, "IQ is full!\n"); + stall = true; + } else if (LSQ.isFull()) { + DPRINTF(BE, "LSQ is full!\n"); + stall = true; + } else if (isFull()) { + DPRINTF(BE, "ROB is full!\n"); + stall = true; + ROB_fcount++; + } + if (stall) { + instsToDispatch->size-= i+1; + dispatchStall(); + return; + } + } + } + + // Check if IQ or LSQ is full. If so we'll need to break and stop + // removing instructions. Also update the number of insts to remove + // from the queue. Check here if we don't care about exact stall + // conditions. + + bool stall = false; + if (IQ.isFull()) { + DPRINTF(BE, "IQ is full!\n"); + stall = true; + } else if (LSQ.isFull()) { + DPRINTF(BE, "LSQ is full!\n"); + stall = true; + } else if (isFull()) { + DPRINTF(BE, "ROB is full!\n"); + stall = true; + ROB_fcount++; + } + if (stall) { + d2i.advance(); + dispatchStall(); + return; + } +} + +template +void +BackEnd::dispatchStall() +{ + dispatchStatus = Blocked; + if (!cpu->decoupledFrontEnd) { + // Tell front end to stall here through a timebuffer, or just tell + // it directly. + } +} + +template +void +BackEnd::checkDispatchStatus() +{ + DPRINTF(BE, "Checking dispatch status\n"); + assert(dispatchStatus == Blocked); + if (!IQ.isFull() && !LSQ.isFull() && !isFull()) { + DPRINTF(BE, "Dispatch no longer blocked\n"); + dispatchStatus = Running; + dispatchInsts(); + } +} + +template +void +BackEnd::scheduleReadyInsts() +{ + // Tell IQ to put any ready instructions into the instruction list. + // Probably want to have a list of DynInstPtrs returned here. Then I + // can choose to either put them into a time buffer to simulate + // IQ scheduling time, or hand them directly off to the next stage. + // Do you ever want to directly hand it off to the next stage? + DPRINTF(BE, "Trying to schedule ready instructions\n"); + IQ.scheduleReadyInsts(); +} + +template +void +BackEnd::executeInsts() +{ + int insts_to_execute = instsToExecute->size; + + issued_ops[0]+= insts_to_execute; + n_issued_dist[insts_to_execute]++; + + DPRINTF(BE, "Trying to execute %i instructions\n", insts_to_execute); + + fetchRedirect[0] = false; + + while (insts_to_execute > 0) { + // Get ready instruction from the IQ (or queue coming out of IQ) + // Execute the ready instruction. + // Wakeup any dependents if it's done. + DynInstPtr inst = IQ.getReadyInst(); + + DPRINTF(BE, "Executing inst [sn:%lli] PC: %#x\n", + inst->seqNum, inst->readPC()); + + ++funcExeInst; + + // Check if the instruction is squashed; if so then skip it + // and don't count it towards the FU usage. + if (inst->isSquashed()) { + DPRINTF(BE, "Execute: Instruction was squashed.\n"); + + // Not sure how to handle this plus the method of sending # of + // instructions to use. Probably will just have to count it + // towards the bandwidth usage, but not the FU usage. + --insts_to_execute; + + // Consider this instruction executed so that commit can go + // ahead and retire the instruction. + inst->setExecuted(); + + // Not sure if I should set this here or just let commit try to + // commit any squashed instructions. I like the latter a bit more. + inst->setCanCommit(); + +// ++iewExecSquashedInsts; + + continue; + } + + Fault fault = NoFault; + + // Execute instruction. + // Note that if the instruction faults, it will be handled + // at the commit stage. + if (inst->isMemRef() && + (!inst->isDataPrefetch() && !inst->isInstPrefetch())) { + DPRINTF(BE, "Execute: Initiating access for memory " + "reference.\n"); + + // Tell the LDSTQ to execute this instruction (if it is a load). + if (inst->isLoad()) { + // Loads will mark themselves as executed, and their writeback + // event adds the instruction to the queue to commit + fault = LSQ.executeLoad(inst); + +// ++iewExecLoadInsts; + } else if (inst->isStore()) { + LSQ.executeStore(inst); + +// ++iewExecStoreInsts; + + if (!(inst->req->flags & LOCKED)) { + inst->setExecuted(); + + instToCommit(inst); + } + // Store conditionals will mark themselves as executed, and + // their writeback event will add the instruction to the queue + // to commit. + } else { + panic("Unexpected memory type!\n"); + } + + } else { + inst->execute(); + +// ++iewExecutedInsts; + + inst->setExecuted(); + + instToCommit(inst); + } + + updateExeInstStats(inst); + + // Probably should have some sort of function for this. + // More general question of how to handle squashes? Have some sort of + // squash unit that controls it? Probably... + // Check if branch was correct. This check happens after the + // instruction is added to the queue because even if the branch + // is mispredicted, the branch instruction itself is still valid. + // Only handle this if there hasn't already been something that + // redirects fetch in this group of instructions. + + // This probably needs to prioritize the redirects if a different + // scheduler is used. Currently the scheduler schedules the oldest + // instruction first, so the branch resolution order will be correct. + unsigned tid = inst->threadNumber; + + if (!fetchRedirect[tid]) { + + if (inst->mispredicted()) { + fetchRedirect[tid] = true; + + DPRINTF(BE, "Execute: Branch mispredict detected.\n"); + DPRINTF(BE, "Execute: Redirecting fetch to PC: %#x.\n", + inst->nextPC); + + // If incorrect, then signal the ROB that it must be squashed. + squashDueToBranch(inst); + + if (inst->predTaken()) { +// predictedTakenIncorrect++; + } else { +// predictedNotTakenIncorrect++; + } + } else if (LSQ.violation()) { + fetchRedirect[tid] = true; + + // Get the DynInst that caused the violation. Note that this + // clears the violation signal. + DynInstPtr violator; + violator = LSQ.getMemDepViolator(); + + DPRINTF(BE, "LDSTQ detected a violation. Violator PC: " + "%#x, inst PC: %#x. Addr is: %#x.\n", + violator->readPC(), inst->readPC(), inst->physEffAddr); + + // Tell the instruction queue that a violation has occured. +// IQ.violation(inst, violator); + + // Squash. +// squashDueToMemOrder(inst,tid); + squashDueToBranch(inst); + +// ++memOrderViolationEvents; + } else if (LSQ.loadBlocked()) { + fetchRedirect[tid] = true; + + DPRINTF(BE, "Load operation couldn't execute because the " + "memory system is blocked. PC: %#x [sn:%lli]\n", + inst->readPC(), inst->seqNum); + + squashDueToMemBlocked(inst); + } + } + +// instList.pop_front(); + + --insts_to_execute; + + // keep an instruction count + thread->numInst++; + thread->numInsts++; + } + + assert(insts_to_execute >= 0); +} + +template +void +BackEnd::instToCommit(DynInstPtr &inst) +{ + int wb_width = wbWidth; + // First check the time slot that this instruction will write + // to. If there are free write ports at the time, then go ahead + // and write the instruction to that time. If there are not, + // keep looking back to see where's the first time there's a + // free slot. What happens if you run out of free spaces? + // For now naively assume that all instructions take one cycle. + // Otherwise would have to look into the time buffer based on the + // latency of the instruction. + + DPRINTF(BE, "Sending instructions to commit [sn:%lli] PC %#x.\n", + inst->seqNum, inst->readPC()); + + while (numInstsToWB[wbCycle].size >= wb_width) { + ++wbCycle; + + assert(wbCycle < 5); + } + + // Add finished instruction to queue to commit. + writeback.push_back(inst); + numInstsToWB[wbCycle].size++; + + if (wbCycle) + wb_penalized[0]++; +} + +template +void +BackEnd::writebackInsts() +{ + int wb_width = wbWidth; + // Using this method I'm not quite sure how to prevent an + // instruction from waking its own dependents multiple times, + // without the guarantee that commit always has enough bandwidth + // to accept all instructions being written back. This guarantee + // might not be too unrealistic. + InstListIt wb_inst_it = writeback.begin(); + InstListIt wb_end_it = writeback.end(); + int inst_num = 0; + int consumer_insts = 0; + + for (; inst_num < wb_width && + wb_inst_it != wb_end_it; inst_num++) { + DynInstPtr inst = (*wb_inst_it); + + // Some instructions will be sent to commit without having + // executed because they need commit to handle them. + // E.g. Uncached loads have not actually executed when they + // are first sent to commit. Instead commit must tell the LSQ + // when it's ready to execute the uncached load. + if (!inst->isSquashed()) { + DPRINTF(BE, "Writing back instruction [sn:%lli] PC %#x.\n", + inst->seqNum, inst->readPC()); + + inst->setCanCommit(); + inst->setResultReady(); + + if (inst->isExecuted()) { + int dependents = IQ.wakeDependents(inst); + if (dependents) { + producer_inst[0]++; + consumer_insts+= dependents; + } + } + } + + writeback.erase(wb_inst_it++); + } + LSQ.writebackStores(); + consumer_inst[0]+= consumer_insts; + writeback_count[0]+= inst_num; +} + +template +bool +BackEnd::commitInst(int inst_num) +{ + // Read instruction from the head of the ROB + DynInstPtr inst = instList.front(); + + // Make sure instruction is valid + assert(inst); + + if (!inst->readyToCommit()) + return false; + + DPRINTF(BE, "Trying to commit instruction [sn:%lli] PC:%#x\n", + inst->seqNum, inst->readPC()); + + // If the instruction is not executed yet, then it is a non-speculative + // or store inst. Signal backwards that it should be executed. + if (!inst->isExecuted()) { + // Keep this number correct. We have not yet actually executed + // and committed this instruction. +// thread->funcExeInst--; + + if (inst->isNonSpeculative()) { +#if !FULL_SYSTEM + // Hack to make sure syscalls aren't executed until all stores + // write back their data. This direct communication shouldn't + // be used for anything other than this. + if (inst_num > 0 || LSQ.hasStoresToWB()) { + DPRINTF(BE, "Waiting for all stores to writeback.\n"); + return false; + } +#endif + + DPRINTF(BE, "Encountered a store or non-speculative " + "instruction at the head of the ROB, PC %#x.\n", + inst->readPC()); + + // Send back the non-speculative instruction's sequence number. + toIEW->nonSpecSeqNum = inst->seqNum; + + // Change the instruction so it won't try to commit again until + // it is executed. + inst->clearCanCommit(); + +// ++commitNonSpecStalls; + + return false; + } else if (inst->isLoad()) { + DPRINTF(BE, "[sn:%lli]: Uncached load, PC %#x.\n", + inst->seqNum, inst->readPC()); + + // Send back the non-speculative instruction's sequence + // number. Maybe just tell the lsq to re-execute the load. + toIEW->nonSpecSeqNum = inst->seqNum; + toIEW->uncached = true; + toIEW->lqIdx = inst->lqIdx; + + inst->clearCanCommit(); + + return false; + } else { + panic("Trying to commit un-executed instruction " + "of unknown type!\n"); + } + } + + // Now check if it's one of the special trap or barrier or + // serializing instructions. + if (inst->isThreadSync()) + { + // Not handled for now. + panic("Barrier instructions are not handled yet.\n"); + } + + // Check if the instruction caused a fault. If so, trap. + Fault inst_fault = inst->getFault(); + + if (inst_fault != NoFault) { + if (!inst->isNop()) { +#if FULL_SYSTEM + DPRINTF(BE, "Inst [sn:%lli] PC %#x has a fault\n", + inst->seqNum, inst->readPC()); + +// assert(!thread->inSyscall); + +// thread->inSyscall = true; + + // Consider holding onto the trap and waiting until the trap event + // happens for this to be executed. + inst_fault->invoke(thread->getXCProxy()); + + // Exit state update mode to avoid accidental updating. +// thread->inSyscall = false; + +// commitStatus = TrapPending; + + // Generate trap squash event. +// generateTrapEvent(); + + return false; +#else // !FULL_SYSTEM + panic("fault (%d) detected @ PC %08p", inst_fault, + inst->PC); +#endif // FULL_SYSTEM + } + } + + if (inst->isControl()) { +// ++commitCommittedBranches; + } + + int freed_regs = 0; + + for (int i = 0; i < inst->numDestRegs(); ++i) { + DPRINTF(BE, "Commit rename map setting register %i to [sn:%lli]\n", + (int)inst->destRegIdx(i), inst->seqNum); + thread->renameTable[inst->destRegIdx(i)] = inst; + ++freed_regs; + } + + if (inst->traceData) { + inst->traceData->finalize(); + inst->traceData = NULL; + } + + inst->clearDependents(); + + frontEnd->addFreeRegs(freed_regs); + + instList.pop_front(); + + --numInsts; + cpu->numInst++; + thread->numInsts++; + ++thread->funcExeInst; + thread->PC = inst->readNextPC(); + updateComInstStats(inst); + + // Write the done sequence number here. + toIEW->doneSeqNum = inst->seqNum; + +#if FULL_SYSTEM + int count = 0; + Addr oldpc; + do { + if (count == 0) + assert(!thread->inSyscall && !thread->trapPending); + oldpc = thread->readPC(); + cpu->system->pcEventQueue.service( + thread->getXCProxy()); + count++; + } while (oldpc != thread->readPC()); + if (count > 1) { + DPRINTF(BE, "PC skip function event, stopping commit\n"); +// completed_last_inst = false; +// squashPending = true; + return false; + } +#endif + return true; +} + +template +void +BackEnd::commitInsts() +{ + int commit_width = commitWidth ? commitWidth : width; + + // Not sure this should be a loop or not. + int inst_num = 0; + while (!instList.empty() && inst_num < commit_width) { + if (instList.front()->isSquashed()) { + panic("No squashed insts should still be on the list!"); + instList.front()->clearDependents(); + instList.pop_front(); + continue; + } + + if (!commitInst(inst_num++)) { + break; + } + } + n_committed_dist.sample(inst_num); +} + +template +void +BackEnd::squash(const InstSeqNum &sn) +{ + IQ.squash(sn); + LSQ.squash(sn); + + int freed_regs = 0; + InstListIt dispatch_end = dispatch.end(); + InstListIt insts_it = dispatch.end(); + insts_it--; + + while (insts_it != dispatch_end && (*insts_it)->seqNum > sn) + { + if ((*insts_it)->isSquashed()) { + --insts_it; + continue; + } + DPRINTF(BE, "Squashing instruction on dispatch list PC %#x, [sn:%lli].\n", + (*insts_it)->readPC(), + (*insts_it)->seqNum); + + // Mark the instruction as squashed, and ready to commit so that + // it can drain out of the pipeline. + (*insts_it)->setSquashed(); + + (*insts_it)->setCanCommit(); + + // Be careful with IPRs and such here + for (int i = 0; i < (*insts_it)->numDestRegs(); ++i) { + DynInstPtr prev_dest = (*insts_it)->getPrevDestInst(i); + DPRINTF(BE, "Commit rename map setting register %i to [sn:%lli]\n", + (int)(*insts_it)->destRegIdx(i), prev_dest); + renameTable[(*insts_it)->destRegIdx(i)] = prev_dest; + ++freed_regs; + } + + (*insts_it)->clearDependents(); + + --insts_it; + } + + insts_it = instList.end(); + insts_it--; + + while (!instList.empty() && (*insts_it)->seqNum > sn) + { + if ((*insts_it)->isSquashed()) { + --insts_it; + continue; + } + DPRINTF(BE, "Squashing instruction on inst list PC %#x, [sn:%lli].\n", + (*insts_it)->readPC(), + (*insts_it)->seqNum); + + // Mark the instruction as squashed, and ready to commit so that + // it can drain out of the pipeline. + (*insts_it)->setSquashed(); + + (*insts_it)->setCanCommit(); + + for (int i = 0; i < (*insts_it)->numDestRegs(); ++i) { + DynInstPtr prev_dest = (*insts_it)->getPrevDestInst(i); + DPRINTF(BE, "Commit rename map setting register %i to [sn:%lli]\n", + (int)(*insts_it)->destRegIdx(i), prev_dest); + renameTable[(*insts_it)->destRegIdx(i)] = prev_dest; + ++freed_regs; + } + + (*insts_it)->clearDependents(); + + instList.erase(insts_it--); + --numInsts; + } + + frontEnd->addFreeRegs(freed_regs); +} + +template +void +BackEnd::squashFromXC() +{ + xcSquash = true; +} + +template +void +BackEnd::squashDueToBranch(DynInstPtr &inst) +{ + // Update the branch predictor state I guess + squash(inst->seqNum); + frontEnd->squash(inst->seqNum, inst->readNextPC(), + true, inst->mispredicted()); +} + +template +void +BackEnd::squashDueToMemBlocked(DynInstPtr &inst) +{ + DPRINTF(IEW, "Memory blocked, squashing load and younger insts, " + "PC: %#x [sn:%i].\n", inst->readPC(), inst->seqNum); + + squash(inst->seqNum - 1); + frontEnd->squash(inst->seqNum - 1, inst->readPC()); +} + +template +void +BackEnd::fetchFault(Fault &fault) +{ + faultFromFetch = fault; +} + +template +void +BackEnd::updateExeInstStats(DynInstPtr &inst) +{ + int thread_number = inst->threadNumber; + + // + // Pick off the software prefetches + // +#ifdef TARGET_ALPHA + if (inst->isDataPrefetch()) + exe_swp[thread_number]++; + else + exe_inst[thread_number]++; +#else + exe_inst[thread_number]++; +#endif + + // + // Control operations + // + if (inst->isControl()) + exe_branches[thread_number]++; + + // + // Memory operations + // + if (inst->isMemRef()) { + exe_refs[thread_number]++; + + if (inst->isLoad()) + exe_loads[thread_number]++; + } +} + +template +void +BackEnd::updateComInstStats(DynInstPtr &inst) +{ + unsigned thread = inst->threadNumber; + + // + // Pick off the software prefetches + // +#ifdef TARGET_ALPHA + if (inst->isDataPrefetch()) { + stat_com_swp[thread]++; + } else { + stat_com_inst[thread]++; + } +#else + stat_com_inst[thread]++; +#endif + + // + // Control Instructions + // + if (inst->isControl()) + stat_com_branches[thread]++; + + // + // Memory references + // + if (inst->isMemRef()) { + stat_com_refs[thread]++; + + if (inst->isLoad()) { + stat_com_loads[thread]++; + } + } + + if (inst->isMemBarrier()) { + stat_com_membars[thread]++; + } +} + +template +void +BackEnd::dumpInsts() +{ + int num = 0; + int valid_num = 0; + + InstListIt inst_list_it = instList.begin(); + + cprintf("Inst list size: %i\n", instList.size()); + + while (inst_list_it != instList.end()) + { + cprintf("Instruction:%i\n", + num); + if (!(*inst_list_it)->isSquashed()) { + if (!(*inst_list_it)->isIssued()) { + ++valid_num; + cprintf("Count:%i\n", valid_num); + } else if ((*inst_list_it)->isMemRef() && + !(*inst_list_it)->memOpDone) { + // Loads that have not been marked as executed still count + // towards the total instructions. + ++valid_num; + cprintf("Count:%i\n", valid_num); + } + } + + cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n" + "Issued:%i\nSquashed:%i\n", + (*inst_list_it)->readPC(), + (*inst_list_it)->seqNum, + (*inst_list_it)->threadNumber, + (*inst_list_it)->isIssued(), + (*inst_list_it)->isSquashed()); + + if ((*inst_list_it)->isMemRef()) { + cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone); + } + + cprintf("\n"); + + inst_list_it++; + ++num; + } + + cprintf("Dispatch list size: %i\n", dispatch.size()); + + inst_list_it = dispatch.begin(); + + while (inst_list_it != dispatch.end()) + { + cprintf("Instruction:%i\n", + num); + if (!(*inst_list_it)->isSquashed()) { + if (!(*inst_list_it)->isIssued()) { + ++valid_num; + cprintf("Count:%i\n", valid_num); + } else if ((*inst_list_it)->isMemRef() && + !(*inst_list_it)->memOpDone) { + // Loads that have not been marked as executed still count + // towards the total instructions. + ++valid_num; + cprintf("Count:%i\n", valid_num); + } + } + + cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n" + "Issued:%i\nSquashed:%i\n", + (*inst_list_it)->readPC(), + (*inst_list_it)->seqNum, + (*inst_list_it)->threadNumber, + (*inst_list_it)->isIssued(), + (*inst_list_it)->isSquashed()); + + if ((*inst_list_it)->isMemRef()) { + cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone); + } + + cprintf("\n"); + + inst_list_it++; + ++num; + } + + cprintf("Writeback list size: %i\n", writeback.size()); + + inst_list_it = writeback.begin(); + + while (inst_list_it != writeback.end()) + { + cprintf("Instruction:%i\n", + num); + if (!(*inst_list_it)->isSquashed()) { + if (!(*inst_list_it)->isIssued()) { + ++valid_num; + cprintf("Count:%i\n", valid_num); + } else if ((*inst_list_it)->isMemRef() && + !(*inst_list_it)->memOpDone) { + // Loads that have not been marked as executed still count + // towards the total instructions. + ++valid_num; + cprintf("Count:%i\n", valid_num); + } + } + + cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n" + "Issued:%i\nSquashed:%i\n", + (*inst_list_it)->readPC(), + (*inst_list_it)->seqNum, + (*inst_list_it)->threadNumber, + (*inst_list_it)->isIssued(), + (*inst_list_it)->isSquashed()); + + if ((*inst_list_it)->isMemRef()) { + cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone); + } + + cprintf("\n"); + + inst_list_it++; + ++num; + } +} diff --git a/src/cpu/ozone/cpu.hh b/src/cpu/ozone/cpu.hh index 5af2b02b2..c4626221e 100644 --- a/src/cpu/ozone/cpu.hh +++ b/src/cpu/ozone/cpu.hh @@ -41,7 +41,6 @@ #include "cpu/ozone/thread_state.hh" #include "cpu/pc_event.hh" #include "cpu/static_inst.hh" -#include "mem/mem_interface.hh" #include "sim/eventq.hh" // forward declarations @@ -69,7 +68,7 @@ class Process; class Checkpoint; class EndQuiesceEvent; -class MemInterface; +class Request; namespace Trace { class InstRecord; @@ -95,6 +94,8 @@ class OzoneCPU : public BaseCPU typedef typename Impl::DynInst DynInst; typedef typename Impl::DynInstPtr DynInstPtr; + typedef TheISA::FloatReg FloatReg; + typedef TheISA::FloatRegBits FloatRegBits; typedef TheISA::MiscReg MiscReg; public: @@ -110,7 +111,7 @@ class OzoneCPU : public BaseCPU int readCpuId() { return thread->cpuId; } - FunctionalMemory *getMemPtr() { return thread->mem; } + TranslatingPort *getMemPort() { return /*thread->port*/NULL; } #if FULL_SYSTEM System *getSystemPtr() { return cpu->system; } @@ -175,19 +176,23 @@ class OzoneCPU : public BaseCPU uint64_t readIntReg(int reg_idx); - float readFloatRegSingle(int reg_idx); + FloatReg readFloatReg(int reg_idx, int width); - double readFloatRegDouble(int reg_idx); + FloatReg readFloatReg(int reg_idx); - uint64_t readFloatRegInt(int reg_idx); + FloatRegBits readFloatRegBits(int reg_idx, int width); + + FloatRegBits readFloatRegBits(int reg_idx); void setIntReg(int reg_idx, uint64_t val); - void setFloatRegSingle(int reg_idx, float val); + void setFloatReg(int reg_idx, FloatReg val, int width); + + void setFloatReg(int reg_idx, FloatReg val); - void setFloatRegDouble(int reg_idx, double val); + void setFloatRegBits(int reg_idx, FloatRegBits val, int width); - void setFloatRegInt(int reg_idx, uint64_t val); + void setFloatRegBits(int reg_idx, FloatRegBits val); uint64_t readPC() { return thread->PC; } void setPC(Addr val); @@ -195,6 +200,15 @@ class OzoneCPU : public BaseCPU uint64_t readNextPC() { return thread->nextPC; } void setNextPC(Addr val); + uint64_t readNextNPC() + { + panic("Alpha has no NextNPC!"); + return 0; + } + + void setNextNPC(uint64_t val) + { panic("Alpha has no NextNPC!"); } + public: // ISA stuff: MiscReg readMiscReg(int misc_reg); @@ -233,6 +247,9 @@ class OzoneCPU : public BaseCPU void setFuncExeInst(Counter new_val) { thread->funcExeInst = new_val; } #endif + void changeRegFileContext(TheISA::RegFile::ContextParam param, + TheISA::RegFile::ContextVal val) + { panic("Not supported on Alpha!"); } }; // execution context proxy @@ -350,10 +367,10 @@ class OzoneCPU : public BaseCPU #endif // L1 instruction cache - MemInterface *icacheInterface; +// MemInterface *icacheInterface; // L1 data cache - MemInterface *dcacheInterface; +// MemInterface *dcacheInterface; /** Pointer to memory. */ FunctionalMemory *mem; @@ -427,40 +444,28 @@ class OzoneCPU : public BaseCPU int getInstAsid() { return thread.asid; } int getDataAsid() { return thread.asid; } - Fault dummyTranslation(MemReqPtr &req) - { -#if 0 - assert((req->vaddr >> 48 & 0xffff) == 0); -#endif - - // put the asid in the upper 16 bits of the paddr - req->paddr = req->vaddr & ~((Addr)0xffff << sizeof(Addr) * 8 - 16); - req->paddr = req->paddr | (Addr)req->asid << sizeof(Addr) * 8 - 16; - return NoFault; - } - /** Translates instruction requestion in syscall emulation mode. */ - Fault translateInstReq(MemReqPtr &req) + Fault translateInstReq(Request *req) { - return dummyTranslation(req); + return thread.translateInstReq(req); } /** Translates data read request in syscall emulation mode. */ - Fault translateDataReadReq(MemReqPtr &req) + Fault translateDataReadReq(Request *req) { - return dummyTranslation(req); + return thread.translateDataReadReq(req); } /** Translates data write request in syscall emulation mode. */ - Fault translateDataWriteReq(MemReqPtr &req) + Fault translateDataWriteReq(Request *req) { - return dummyTranslation(req); + return thread.translateDataWriteReq(req); } #endif /** Old CPU read from memory function. No longer used. */ template - Fault read(MemReqPtr &req, T &data) + Fault read(Request *req, T &data) { #if 0 #if FULL_SYSTEM && defined(TARGET_ALPHA) @@ -469,12 +474,12 @@ class OzoneCPU : public BaseCPU req->xc->setMiscReg(TheISA::Lock_Flag_DepTag, true); } #endif -#endif - Fault error; if (req->flags & LOCKED) { lockAddrList.insert(req->paddr); lockFlag = true; } +#endif + Fault error; error = this->mem->read(req, data); data = gtoh(data); @@ -484,14 +489,14 @@ class OzoneCPU : public BaseCPU /** CPU read function, forwards read to LSQ. */ template - Fault read(MemReqPtr &req, T &data, int load_idx) + Fault read(Request *req, T &data, int load_idx) { return backEnd->read(req, data, load_idx); } /** Old CPU write to memory function. No longer used. */ template - Fault write(MemReqPtr &req, T &data) + Fault write(Request *req, T &data) { #if 0 #if FULL_SYSTEM && defined(TARGET_ALPHA) @@ -539,7 +544,6 @@ class OzoneCPU : public BaseCPU } } -#endif #endif if (req->flags & LOCKED) { @@ -560,13 +564,14 @@ class OzoneCPU : public BaseCPU } } } +#endif return this->mem->write(req, (T)htog(data)); } /** CPU write function, forwards write to LSQ. */ template - Fault write(MemReqPtr &req, T &data, int store_idx) + Fault write(Request *req, T &data, int store_idx) { return backEnd->write(req, data, store_idx); } diff --git a/src/cpu/ozone/cpu_builder.cc b/src/cpu/ozone/cpu_builder.cc new file mode 100644 index 000000000..64aa49c71 --- /dev/null +++ b/src/cpu/ozone/cpu_builder.cc @@ -0,0 +1,830 @@ + +#include + +#include "cpu/checker/cpu.hh" +#include "cpu/inst_seq.hh" +#include "cpu/ozone/cpu.hh" +#include "cpu/ozone/ozone_impl.hh" +#include "cpu/ozone/simple_impl.hh" +#include "cpu/ozone/simple_params.hh" +#include "mem/cache/base_cache.hh" +#include "sim/builder.hh" +#include "sim/process.hh" +#include "sim/sim_object.hh" + +class DerivOzoneCPU : public OzoneCPU +{ + public: + DerivOzoneCPU(SimpleParams *p) + : OzoneCPU(p) + { } +}; + +class SimpleOzoneCPU : public OzoneCPU +{ + public: + SimpleOzoneCPU(SimpleParams *p) + : OzoneCPU(p) + { } +}; + + +//////////////////////////////////////////////////////////////////////// +// +// OzoneCPU Simulation Object +// + +BEGIN_DECLARE_SIM_OBJECT_PARAMS(DerivOzoneCPU) + + Param clock; + Param numThreads; + +#if FULL_SYSTEM +SimObjectParam system; +Param cpu_id; +SimObjectParam itb; +SimObjectParam dtb; +#else +SimObjectVectorParam workload; +//SimObjectParam page_table; +#endif // FULL_SYSTEM + +SimObjectParam mem; + +SimObjectParam checker; + +Param max_insts_any_thread; +Param max_insts_all_threads; +Param max_loads_any_thread; +Param max_loads_all_threads; + +SimObjectParam icache; +SimObjectParam dcache; + +Param cachePorts; +Param width; +Param frontEndWidth; +Param backEndWidth; +Param backEndSquashLatency; +Param backEndLatency; +Param maxInstBufferSize; +Param numPhysicalRegs; +Param maxOutstandingMemOps; + +Param decodeToFetchDelay; +Param renameToFetchDelay; +Param iewToFetchDelay; +Param commitToFetchDelay; +Param fetchWidth; + +Param renameToDecodeDelay; +Param iewToDecodeDelay; +Param commitToDecodeDelay; +Param fetchToDecodeDelay; +Param decodeWidth; + +Param iewToRenameDelay; +Param commitToRenameDelay; +Param decodeToRenameDelay; +Param renameWidth; + +Param commitToIEWDelay; +Param renameToIEWDelay; +Param issueToExecuteDelay; +Param issueWidth; +Param executeWidth; +Param executeIntWidth; +Param executeFloatWidth; +Param executeBranchWidth; +Param executeMemoryWidth; + +Param iewToCommitDelay; +Param renameToROBDelay; +Param commitWidth; +Param squashWidth; + +Param localPredictorSize; +Param localCtrBits; +Param localHistoryTableSize; +Param localHistoryBits; +Param globalPredictorSize; +Param globalCtrBits; +Param globalHistoryBits; +Param choicePredictorSize; +Param choiceCtrBits; + +Param BTBEntries; +Param BTBTagSize; + +Param RASSize; + +Param LQEntries; +Param SQEntries; +Param LFSTSize; +Param SSITSize; + +Param numPhysIntRegs; +Param numPhysFloatRegs; +Param numIQEntries; +Param numROBEntries; + +Param decoupledFrontEnd; +Param dispatchWidth; +Param wbWidth; + +Param smtNumFetchingThreads; +Param smtFetchPolicy; +Param smtLSQPolicy; +Param smtLSQThreshold; +Param smtIQPolicy; +Param smtIQThreshold; +Param smtROBPolicy; +Param smtROBThreshold; +Param smtCommitPolicy; + +Param instShiftAmt; + +Param defer_registration; + +Param function_trace; +Param function_trace_start; + +END_DECLARE_SIM_OBJECT_PARAMS(DerivOzoneCPU) + +BEGIN_INIT_SIM_OBJECT_PARAMS(DerivOzoneCPU) + + INIT_PARAM(clock, "clock speed"), + INIT_PARAM(numThreads, "number of HW thread contexts"), + +#if FULL_SYSTEM + INIT_PARAM(system, "System object"), + INIT_PARAM(cpu_id, "processor ID"), + INIT_PARAM(itb, "Instruction translation buffer"), + INIT_PARAM(dtb, "Data translation buffer"), +#else + INIT_PARAM(workload, "Processes to run"), +// INIT_PARAM(page_table, "Page table"), +#endif // FULL_SYSTEM + + INIT_PARAM_DFLT(mem, "Memory", NULL), + + INIT_PARAM_DFLT(checker, "Checker CPU", NULL), + + INIT_PARAM_DFLT(max_insts_any_thread, + "Terminate when any thread reaches this inst count", + 0), + INIT_PARAM_DFLT(max_insts_all_threads, + "Terminate when all threads have reached" + "this inst count", + 0), + INIT_PARAM_DFLT(max_loads_any_thread, + "Terminate when any thread reaches this load count", + 0), + INIT_PARAM_DFLT(max_loads_all_threads, + "Terminate when all threads have reached this load" + "count", + 0), + + INIT_PARAM_DFLT(icache, "L1 instruction cache", NULL), + INIT_PARAM_DFLT(dcache, "L1 data cache", NULL), + + INIT_PARAM_DFLT(cachePorts, "Cache Ports", 200), + INIT_PARAM_DFLT(width, "Width", 1), + INIT_PARAM_DFLT(frontEndWidth, "Front end width", 1), + INIT_PARAM_DFLT(backEndWidth, "Back end width", 1), + INIT_PARAM_DFLT(backEndSquashLatency, "Back end squash latency", 1), + INIT_PARAM_DFLT(backEndLatency, "Back end latency", 1), + INIT_PARAM_DFLT(maxInstBufferSize, "Maximum instruction buffer size", 16), + INIT_PARAM(numPhysicalRegs, "Number of physical registers"), + INIT_PARAM_DFLT(maxOutstandingMemOps, "Maximum outstanding memory operations", 4), + + INIT_PARAM(decodeToFetchDelay, "Decode to fetch delay"), + INIT_PARAM(renameToFetchDelay, "Rename to fetch delay"), + INIT_PARAM(iewToFetchDelay, "Issue/Execute/Writeback to fetch" + "delay"), + INIT_PARAM(commitToFetchDelay, "Commit to fetch delay"), + INIT_PARAM(fetchWidth, "Fetch width"), + INIT_PARAM(renameToDecodeDelay, "Rename to decode delay"), + INIT_PARAM(iewToDecodeDelay, "Issue/Execute/Writeback to decode" + "delay"), + INIT_PARAM(commitToDecodeDelay, "Commit to decode delay"), + INIT_PARAM(fetchToDecodeDelay, "Fetch to decode delay"), + INIT_PARAM(decodeWidth, "Decode width"), + + INIT_PARAM(iewToRenameDelay, "Issue/Execute/Writeback to rename" + "delay"), + INIT_PARAM(commitToRenameDelay, "Commit to rename delay"), + INIT_PARAM(decodeToRenameDelay, "Decode to rename delay"), + INIT_PARAM(renameWidth, "Rename width"), + + INIT_PARAM(commitToIEWDelay, "Commit to " + "Issue/Execute/Writeback delay"), + INIT_PARAM(renameToIEWDelay, "Rename to " + "Issue/Execute/Writeback delay"), + INIT_PARAM(issueToExecuteDelay, "Issue to execute delay (internal" + "to the IEW stage)"), + INIT_PARAM(issueWidth, "Issue width"), + INIT_PARAM(executeWidth, "Execute width"), + INIT_PARAM(executeIntWidth, "Integer execute width"), + INIT_PARAM(executeFloatWidth, "Floating point execute width"), + INIT_PARAM(executeBranchWidth, "Branch execute width"), + INIT_PARAM(executeMemoryWidth, "Memory execute width"), + + INIT_PARAM(iewToCommitDelay, "Issue/Execute/Writeback to commit " + "delay"), + INIT_PARAM(renameToROBDelay, "Rename to reorder buffer delay"), + INIT_PARAM(commitWidth, "Commit width"), + INIT_PARAM(squashWidth, "Squash width"), + + INIT_PARAM(localPredictorSize, "Size of local predictor"), + INIT_PARAM(localCtrBits, "Bits per counter"), + INIT_PARAM(localHistoryTableSize, "Size of local history table"), + INIT_PARAM(localHistoryBits, "Bits for the local history"), + INIT_PARAM(globalPredictorSize, "Size of global predictor"), + INIT_PARAM(globalCtrBits, "Bits per counter"), + INIT_PARAM(globalHistoryBits, "Bits of history"), + INIT_PARAM(choicePredictorSize, "Size of choice predictor"), + INIT_PARAM(choiceCtrBits, "Bits of choice counters"), + + INIT_PARAM(BTBEntries, "Number of BTB entries"), + INIT_PARAM(BTBTagSize, "Size of the BTB tags, in bits"), + + INIT_PARAM(RASSize, "RAS size"), + + INIT_PARAM(LQEntries, "Number of load queue entries"), + INIT_PARAM(SQEntries, "Number of store queue entries"), + INIT_PARAM(LFSTSize, "Last fetched store table size"), + INIT_PARAM(SSITSize, "Store set ID table size"), + + INIT_PARAM(numPhysIntRegs, "Number of physical integer registers"), + INIT_PARAM(numPhysFloatRegs, "Number of physical floating point " + "registers"), + INIT_PARAM(numIQEntries, "Number of instruction queue entries"), + INIT_PARAM(numROBEntries, "Number of reorder buffer entries"), + + INIT_PARAM_DFLT(decoupledFrontEnd, "Decoupled front end", true), + INIT_PARAM_DFLT(dispatchWidth, "Dispatch width", 0), + INIT_PARAM_DFLT(wbWidth, "Writeback width", 0), + + INIT_PARAM_DFLT(smtNumFetchingThreads, "SMT Number of Fetching Threads", 1), + INIT_PARAM_DFLT(smtFetchPolicy, "SMT Fetch Policy", "SingleThread"), + INIT_PARAM_DFLT(smtLSQPolicy, "SMT LSQ Sharing Policy", "Partitioned"), + INIT_PARAM_DFLT(smtLSQThreshold,"SMT LSQ Threshold", 100), + INIT_PARAM_DFLT(smtIQPolicy, "SMT IQ Policy", "Partitioned"), + INIT_PARAM_DFLT(smtIQThreshold, "SMT IQ Threshold", 100), + INIT_PARAM_DFLT(smtROBPolicy, "SMT ROB Sharing Policy", "Partitioned"), + INIT_PARAM_DFLT(smtROBThreshold,"SMT ROB Threshold", 100), + INIT_PARAM_DFLT(smtCommitPolicy,"SMT Commit Fetch Policy", "RoundRobin"), + + INIT_PARAM(instShiftAmt, "Number of bits to shift instructions by"), + INIT_PARAM(defer_registration, "defer system registration (for sampling)"), + + INIT_PARAM(function_trace, "Enable function trace"), + INIT_PARAM(function_trace_start, "Cycle to start function trace") + +END_INIT_SIM_OBJECT_PARAMS(DerivOzoneCPU) + +CREATE_SIM_OBJECT(DerivOzoneCPU) +{ + DerivOzoneCPU *cpu; + +#if FULL_SYSTEM + // Full-system only supports a single thread for the moment. + int actual_num_threads = 1; +#else + // In non-full-system mode, we infer the number of threads from + // the workload if it's not explicitly specified. + int actual_num_threads = + numThreads.isValid() ? numThreads : workload.size(); + + if (workload.size() == 0) { + fatal("Must specify at least one workload!"); + } + +#endif + + SimpleParams *params = new SimpleParams; + + params->clock = clock; + + params->name = getInstanceName(); + params->numberOfThreads = actual_num_threads; + +#if FULL_SYSTEM + params->system = system; + params->cpu_id = cpu_id; + params->itb = itb; + params->dtb = dtb; +#else + params->workload = workload; +// params->pTable = page_table; +#endif // FULL_SYSTEM + + params->mem = mem; + params->checker = checker; + params->max_insts_any_thread = max_insts_any_thread; + params->max_insts_all_threads = max_insts_all_threads; + params->max_loads_any_thread = max_loads_any_thread; + params->max_loads_all_threads = max_loads_all_threads; + + // + // Caches + // + params->icacheInterface = icache ? icache->getInterface() : NULL; + params->dcacheInterface = dcache ? dcache->getInterface() : NULL; + params->cachePorts = cachePorts; + + params->width = width; + params->frontEndWidth = frontEndWidth; + params->backEndWidth = backEndWidth; + params->backEndSquashLatency = backEndSquashLatency; + params->backEndLatency = backEndLatency; + params->maxInstBufferSize = maxInstBufferSize; + params->numPhysicalRegs = numPhysIntRegs + numPhysFloatRegs; + params->maxOutstandingMemOps = maxOutstandingMemOps; + + params->decodeToFetchDelay = decodeToFetchDelay; + params->renameToFetchDelay = renameToFetchDelay; + params->iewToFetchDelay = iewToFetchDelay; + params->commitToFetchDelay = commitToFetchDelay; + params->fetchWidth = fetchWidth; + + params->renameToDecodeDelay = renameToDecodeDelay; + params->iewToDecodeDelay = iewToDecodeDelay; + params->commitToDecodeDelay = commitToDecodeDelay; + params->fetchToDecodeDelay = fetchToDecodeDelay; + params->decodeWidth = decodeWidth; + + params->iewToRenameDelay = iewToRenameDelay; + params->commitToRenameDelay = commitToRenameDelay; + params->decodeToRenameDelay = decodeToRenameDelay; + params->renameWidth = renameWidth; + + params->commitToIEWDelay = commitToIEWDelay; + params->renameToIEWDelay = renameToIEWDelay; + params->issueToExecuteDelay = issueToExecuteDelay; + params->issueWidth = issueWidth; + params->executeWidth = executeWidth; + params->executeIntWidth = executeIntWidth; + params->executeFloatWidth = executeFloatWidth; + params->executeBranchWidth = executeBranchWidth; + params->executeMemoryWidth = executeMemoryWidth; + + params->iewToCommitDelay = iewToCommitDelay; + params->renameToROBDelay = renameToROBDelay; + params->commitWidth = commitWidth; + params->squashWidth = squashWidth; + + + params->localPredictorSize = localPredictorSize; + params->localCtrBits = localCtrBits; + params->localHistoryTableSize = localHistoryTableSize; + params->localHistoryBits = localHistoryBits; + params->globalPredictorSize = globalPredictorSize; + params->globalCtrBits = globalCtrBits; + params->globalHistoryBits = globalHistoryBits; + params->choicePredictorSize = choicePredictorSize; + params->choiceCtrBits = choiceCtrBits; + + params->BTBEntries = BTBEntries; + params->BTBTagSize = BTBTagSize; + + params->RASSize = RASSize; + + params->LQEntries = LQEntries; + params->SQEntries = SQEntries; + + params->SSITSize = SSITSize; + params->LFSTSize = LFSTSize; + + params->numPhysIntRegs = numPhysIntRegs; + params->numPhysFloatRegs = numPhysFloatRegs; + params->numIQEntries = numIQEntries; + params->numROBEntries = numROBEntries; + + params->decoupledFrontEnd = decoupledFrontEnd; + params->dispatchWidth = dispatchWidth; + params->wbWidth = wbWidth; + + params->smtNumFetchingThreads = smtNumFetchingThreads; + params->smtFetchPolicy = smtFetchPolicy; + params->smtIQPolicy = smtIQPolicy; + params->smtLSQPolicy = smtLSQPolicy; + params->smtLSQThreshold = smtLSQThreshold; + params->smtROBPolicy = smtROBPolicy; + params->smtROBThreshold = smtROBThreshold; + params->smtCommitPolicy = smtCommitPolicy; + + params->instShiftAmt = 2; + + params->deferRegistration = defer_registration; + + params->functionTrace = function_trace; + params->functionTraceStart = function_trace_start; + + cpu = new DerivOzoneCPU(params); + + return cpu; +} + +REGISTER_SIM_OBJECT("DerivOzoneCPU", DerivOzoneCPU) + + + +//////////////////////////////////////////////////////////////////////// +// +// OzoneCPU Simulation Object +// + +BEGIN_DECLARE_SIM_OBJECT_PARAMS(SimpleOzoneCPU) + + Param clock; + Param numThreads; + +#if FULL_SYSTEM +SimObjectParam system; +Param cpu_id; +SimObjectParam itb; +SimObjectParam dtb; +#else +SimObjectVectorParam workload; +//SimObjectParam page_table; +#endif // FULL_SYSTEM + +SimObjectParam mem; + +SimObjectParam checker; + +Param max_insts_any_thread; +Param max_insts_all_threads; +Param max_loads_any_thread; +Param max_loads_all_threads; + +SimObjectParam icache; +SimObjectParam dcache; + +Param cachePorts; +Param width; +Param frontEndWidth; +Param backEndWidth; +Param backEndSquashLatency; +Param backEndLatency; +Param maxInstBufferSize; +Param numPhysicalRegs; + +Param decodeToFetchDelay; +Param renameToFetchDelay; +Param iewToFetchDelay; +Param commitToFetchDelay; +Param fetchWidth; + +Param renameToDecodeDelay; +Param iewToDecodeDelay; +Param commitToDecodeDelay; +Param fetchToDecodeDelay; +Param decodeWidth; + +Param iewToRenameDelay; +Param commitToRenameDelay; +Param decodeToRenameDelay; +Param renameWidth; + +Param commitToIEWDelay; +Param renameToIEWDelay; +Param issueToExecuteDelay; +Param issueWidth; +Param executeWidth; +Param executeIntWidth; +Param executeFloatWidth; +Param executeBranchWidth; +Param executeMemoryWidth; + +Param iewToCommitDelay; +Param renameToROBDelay; +Param commitWidth; +Param squashWidth; + +Param localPredictorSize; +Param localCtrBits; +Param localHistoryTableSize; +Param localHistoryBits; +Param globalPredictorSize; +Param globalCtrBits; +Param globalHistoryBits; +Param choicePredictorSize; +Param choiceCtrBits; + +Param BTBEntries; +Param BTBTagSize; + +Param RASSize; + +Param LQEntries; +Param SQEntries; +Param LFSTSize; +Param SSITSize; + +Param numPhysIntRegs; +Param numPhysFloatRegs; +Param numIQEntries; +Param numROBEntries; + +Param decoupledFrontEnd; +Param dispatchWidth; +Param wbWidth; + +Param smtNumFetchingThreads; +Param smtFetchPolicy; +Param smtLSQPolicy; +Param smtLSQThreshold; +Param smtIQPolicy; +Param smtIQThreshold; +Param smtROBPolicy; +Param smtROBThreshold; +Param smtCommitPolicy; + +Param instShiftAmt; + +Param defer_registration; + +Param function_trace; +Param function_trace_start; + +END_DECLARE_SIM_OBJECT_PARAMS(SimpleOzoneCPU) + +BEGIN_INIT_SIM_OBJECT_PARAMS(SimpleOzoneCPU) + + INIT_PARAM(clock, "clock speed"), + INIT_PARAM(numThreads, "number of HW thread contexts"), + +#if FULL_SYSTEM + INIT_PARAM(system, "System object"), + INIT_PARAM(cpu_id, "processor ID"), + INIT_PARAM(itb, "Instruction translation buffer"), + INIT_PARAM(dtb, "Data translation buffer"), +#else + INIT_PARAM(workload, "Processes to run"), +// INIT_PARAM(page_table, "Page table"), +#endif // FULL_SYSTEM + + INIT_PARAM_DFLT(mem, "Memory", NULL), + + INIT_PARAM_DFLT(checker, "Checker CPU", NULL), + + INIT_PARAM_DFLT(max_insts_any_thread, + "Terminate when any thread reaches this inst count", + 0), + INIT_PARAM_DFLT(max_insts_all_threads, + "Terminate when all threads have reached" + "this inst count", + 0), + INIT_PARAM_DFLT(max_loads_any_thread, + "Terminate when any thread reaches this load count", + 0), + INIT_PARAM_DFLT(max_loads_all_threads, + "Terminate when all threads have reached this load" + "count", + 0), + + INIT_PARAM_DFLT(icache, "L1 instruction cache", NULL), + INIT_PARAM_DFLT(dcache, "L1 data cache", NULL), + + INIT_PARAM_DFLT(cachePorts, "Cache Ports", 200), + INIT_PARAM_DFLT(width, "Width", 1), + INIT_PARAM_DFLT(frontEndWidth, "Front end width", 1), + INIT_PARAM_DFLT(backEndWidth, "Back end width", 1), + INIT_PARAM_DFLT(backEndSquashLatency, "Back end squash latency", 1), + INIT_PARAM_DFLT(backEndLatency, "Back end latency", 1), + INIT_PARAM_DFLT(maxInstBufferSize, "Maximum instruction buffer size", 16), + INIT_PARAM(numPhysicalRegs, "Number of physical registers"), + + INIT_PARAM(decodeToFetchDelay, "Decode to fetch delay"), + INIT_PARAM(renameToFetchDelay, "Rename to fetch delay"), + INIT_PARAM(iewToFetchDelay, "Issue/Execute/Writeback to fetch" + "delay"), + INIT_PARAM(commitToFetchDelay, "Commit to fetch delay"), + INIT_PARAM(fetchWidth, "Fetch width"), + INIT_PARAM(renameToDecodeDelay, "Rename to decode delay"), + INIT_PARAM(iewToDecodeDelay, "Issue/Execute/Writeback to decode" + "delay"), + INIT_PARAM(commitToDecodeDelay, "Commit to decode delay"), + INIT_PARAM(fetchToDecodeDelay, "Fetch to decode delay"), + INIT_PARAM(decodeWidth, "Decode width"), + + INIT_PARAM(iewToRenameDelay, "Issue/Execute/Writeback to rename" + "delay"), + INIT_PARAM(commitToRenameDelay, "Commit to rename delay"), + INIT_PARAM(decodeToRenameDelay, "Decode to rename delay"), + INIT_PARAM(renameWidth, "Rename width"), + + INIT_PARAM(commitToIEWDelay, "Commit to " + "Issue/Execute/Writeback delay"), + INIT_PARAM(renameToIEWDelay, "Rename to " + "Issue/Execute/Writeback delay"), + INIT_PARAM(issueToExecuteDelay, "Issue to execute delay (internal" + "to the IEW stage)"), + INIT_PARAM(issueWidth, "Issue width"), + INIT_PARAM(executeWidth, "Execute width"), + INIT_PARAM(executeIntWidth, "Integer execute width"), + INIT_PARAM(executeFloatWidth, "Floating point execute width"), + INIT_PARAM(executeBranchWidth, "Branch execute width"), + INIT_PARAM(executeMemoryWidth, "Memory execute width"), + + INIT_PARAM(iewToCommitDelay, "Issue/Execute/Writeback to commit " + "delay"), + INIT_PARAM(renameToROBDelay, "Rename to reorder buffer delay"), + INIT_PARAM(commitWidth, "Commit width"), + INIT_PARAM(squashWidth, "Squash width"), + + INIT_PARAM(localPredictorSize, "Size of local predictor"), + INIT_PARAM(localCtrBits, "Bits per counter"), + INIT_PARAM(localHistoryTableSize, "Size of local history table"), + INIT_PARAM(localHistoryBits, "Bits for the local history"), + INIT_PARAM(globalPredictorSize, "Size of global predictor"), + INIT_PARAM(globalCtrBits, "Bits per counter"), + INIT_PARAM(globalHistoryBits, "Bits of history"), + INIT_PARAM(choicePredictorSize, "Size of choice predictor"), + INIT_PARAM(choiceCtrBits, "Bits of choice counters"), + + INIT_PARAM(BTBEntries, "Number of BTB entries"), + INIT_PARAM(BTBTagSize, "Size of the BTB tags, in bits"), + + INIT_PARAM(RASSize, "RAS size"), + + INIT_PARAM(LQEntries, "Number of load queue entries"), + INIT_PARAM(SQEntries, "Number of store queue entries"), + INIT_PARAM(LFSTSize, "Last fetched store table size"), + INIT_PARAM(SSITSize, "Store set ID table size"), + + INIT_PARAM(numPhysIntRegs, "Number of physical integer registers"), + INIT_PARAM(numPhysFloatRegs, "Number of physical floating point " + "registers"), + INIT_PARAM(numIQEntries, "Number of instruction queue entries"), + INIT_PARAM(numROBEntries, "Number of reorder buffer entries"), + + INIT_PARAM_DFLT(decoupledFrontEnd, "Decoupled front end", true), + INIT_PARAM_DFLT(dispatchWidth, "Dispatch width", 0), + INIT_PARAM_DFLT(wbWidth, "Writeback width", 0), + + INIT_PARAM_DFLT(smtNumFetchingThreads, "SMT Number of Fetching Threads", 1), + INIT_PARAM_DFLT(smtFetchPolicy, "SMT Fetch Policy", "SingleThread"), + INIT_PARAM_DFLT(smtLSQPolicy, "SMT LSQ Sharing Policy", "Partitioned"), + INIT_PARAM_DFLT(smtLSQThreshold,"SMT LSQ Threshold", 100), + INIT_PARAM_DFLT(smtIQPolicy, "SMT IQ Policy", "Partitioned"), + INIT_PARAM_DFLT(smtIQThreshold, "SMT IQ Threshold", 100), + INIT_PARAM_DFLT(smtROBPolicy, "SMT ROB Sharing Policy", "Partitioned"), + INIT_PARAM_DFLT(smtROBThreshold,"SMT ROB Threshold", 100), + INIT_PARAM_DFLT(smtCommitPolicy,"SMT Commit Fetch Policy", "RoundRobin"), + + INIT_PARAM(instShiftAmt, "Number of bits to shift instructions by"), + INIT_PARAM(defer_registration, "defer system registration (for sampling)"), + + INIT_PARAM(function_trace, "Enable function trace"), + INIT_PARAM(function_trace_start, "Cycle to start function trace") + +END_INIT_SIM_OBJECT_PARAMS(SimpleOzoneCPU) + +CREATE_SIM_OBJECT(SimpleOzoneCPU) +{ + SimpleOzoneCPU *cpu; + +#if FULL_SYSTEM + // Full-system only supports a single thread for the moment. + int actual_num_threads = 1; +#else + // In non-full-system mode, we infer the number of threads from + // the workload if it's not explicitly specified. + int actual_num_threads = + numThreads.isValid() ? numThreads : workload.size(); + + if (workload.size() == 0) { + fatal("Must specify at least one workload!"); + } + +#endif + + SimpleParams *params = new SimpleParams; + + params->clock = clock; + + params->name = getInstanceName(); + params->numberOfThreads = actual_num_threads; + +#if FULL_SYSTEM + params->system = system; + params->cpu_id = cpu_id; + params->itb = itb; + params->dtb = dtb; +#else + params->workload = workload; +// params->pTable = page_table; +#endif // FULL_SYSTEM + + params->mem = mem; + params->checker = checker; + params->max_insts_any_thread = max_insts_any_thread; + params->max_insts_all_threads = max_insts_all_threads; + params->max_loads_any_thread = max_loads_any_thread; + params->max_loads_all_threads = max_loads_all_threads; + + // + // Caches + // + params->icacheInterface = icache ? icache->getInterface() : NULL; + params->dcacheInterface = dcache ? dcache->getInterface() : NULL; + params->cachePorts = cachePorts; + + params->width = width; + params->frontEndWidth = frontEndWidth; + params->backEndWidth = backEndWidth; + params->backEndSquashLatency = backEndSquashLatency; + params->backEndLatency = backEndLatency; + params->maxInstBufferSize = maxInstBufferSize; + params->numPhysicalRegs = numPhysIntRegs + numPhysFloatRegs; + + params->decodeToFetchDelay = decodeToFetchDelay; + params->renameToFetchDelay = renameToFetchDelay; + params->iewToFetchDelay = iewToFetchDelay; + params->commitToFetchDelay = commitToFetchDelay; + params->fetchWidth = fetchWidth; + + params->renameToDecodeDelay = renameToDecodeDelay; + params->iewToDecodeDelay = iewToDecodeDelay; + params->commitToDecodeDelay = commitToDecodeDelay; + params->fetchToDecodeDelay = fetchToDecodeDelay; + params->decodeWidth = decodeWidth; + + params->iewToRenameDelay = iewToRenameDelay; + params->commitToRenameDelay = commitToRenameDelay; + params->decodeToRenameDelay = decodeToRenameDelay; + params->renameWidth = renameWidth; + + params->commitToIEWDelay = commitToIEWDelay; + params->renameToIEWDelay = renameToIEWDelay; + params->issueToExecuteDelay = issueToExecuteDelay; + params->issueWidth = issueWidth; + params->executeWidth = executeWidth; + params->executeIntWidth = executeIntWidth; + params->executeFloatWidth = executeFloatWidth; + params->executeBranchWidth = executeBranchWidth; + params->executeMemoryWidth = executeMemoryWidth; + + params->iewToCommitDelay = iewToCommitDelay; + params->renameToROBDelay = renameToROBDelay; + params->commitWidth = commitWidth; + params->squashWidth = squashWidth; + + + params->localPredictorSize = localPredictorSize; + params->localCtrBits = localCtrBits; + params->localHistoryTableSize = localHistoryTableSize; + params->localHistoryBits = localHistoryBits; + params->globalPredictorSize = globalPredictorSize; + params->globalCtrBits = globalCtrBits; + params->globalHistoryBits = globalHistoryBits; + params->choicePredictorSize = choicePredictorSize; + params->choiceCtrBits = choiceCtrBits; + + params->BTBEntries = BTBEntries; + params->BTBTagSize = BTBTagSize; + + params->RASSize = RASSize; + + params->LQEntries = LQEntries; + params->SQEntries = SQEntries; + + params->SSITSize = SSITSize; + params->LFSTSize = LFSTSize; + + params->numPhysIntRegs = numPhysIntRegs; + params->numPhysFloatRegs = numPhysFloatRegs; + params->numIQEntries = numIQEntries; + params->numROBEntries = numROBEntries; + + params->decoupledFrontEnd = decoupledFrontEnd; + params->dispatchWidth = dispatchWidth; + params->wbWidth = wbWidth; + + params->smtNumFetchingThreads = smtNumFetchingThreads; + params->smtFetchPolicy = smtFetchPolicy; + params->smtIQPolicy = smtIQPolicy; + params->smtLSQPolicy = smtLSQPolicy; + params->smtLSQThreshold = smtLSQThreshold; + params->smtROBPolicy = smtROBPolicy; + params->smtROBThreshold = smtROBThreshold; + params->smtCommitPolicy = smtCommitPolicy; + + params->instShiftAmt = 2; + + params->deferRegistration = defer_registration; + + params->functionTrace = function_trace; + params->functionTraceStart = function_trace_start; + + cpu = new SimpleOzoneCPU(params); + + return cpu; +} + +REGISTER_SIM_OBJECT("SimpleOzoneCPU", SimpleOzoneCPU) + diff --git a/src/cpu/ozone/cpu_impl.hh b/src/cpu/ozone/cpu_impl.hh index 5675da3a8..1c869c486 100644 --- a/src/cpu/ozone/cpu_impl.hh +++ b/src/cpu/ozone/cpu_impl.hh @@ -920,23 +920,39 @@ OzoneCPU::OzoneXC::readIntReg(int reg_idx) template float -OzoneCPU::OzoneXC::readFloatRegSingle(int reg_idx) +OzoneCPU::OzoneXC::readFloatReg(int reg_idx, int width) { int idx = reg_idx + TheISA::FP_Base_DepTag; - return thread->renameTable[idx]->readFloatResult(); + switch(width) { + case 32: + return thread->renameTable[idx]->readFloatResult(); + case 64: + return thread->renameTable[idx]->readDoubleResult(); + default: + panic("Unsupported width!"); + return 0; + } } template double -OzoneCPU::OzoneXC::readFloatRegDouble(int reg_idx) +OzoneCPU::OzoneXC::readFloatReg(int reg_idx) { int idx = reg_idx + TheISA::FP_Base_DepTag; - return thread->renameTable[idx]->readDoubleResult(); + return thread->renameTable[idx]->readFloatResult(); } template uint64_t -OzoneCPU::OzoneXC::readFloatRegInt(int reg_idx) +OzoneCPU::OzoneXC::readFloatRegBits(int reg_idx, int width) +{ + int idx = reg_idx + TheISA::FP_Base_DepTag; + return thread->renameTable[idx]->readIntResult(); +} + +template +uint64_t +OzoneCPU::OzoneXC::readFloatRegBits(int reg_idx) { int idx = reg_idx + TheISA::FP_Base_DepTag; return thread->renameTable[idx]->readIntResult(); @@ -955,14 +971,28 @@ OzoneCPU::OzoneXC::setIntReg(int reg_idx, uint64_t val) template void -OzoneCPU::OzoneXC::setFloatRegSingle(int reg_idx, float val) +OzoneCPU::OzoneXC::setFloatReg(int reg_idx, FloatReg val, int width) { - panic("Unimplemented!"); + int idx = reg_idx + TheISA::FP_Base_DepTag; + switch(width) { + case 32: + panic("Unimplemented!"); + break; + case 64: + thread->renameTable[idx]->setDoubleResult(val); + break; + default: + panic("Unsupported width!"); + } + + if (!thread->inSyscall) { + cpu->squashFromXC(); + } } template void -OzoneCPU::OzoneXC::setFloatRegDouble(int reg_idx, double val) +OzoneCPU::OzoneXC::setFloatReg(int reg_idx, FloatReg val) { int idx = reg_idx + TheISA::FP_Base_DepTag; @@ -975,7 +1005,15 @@ OzoneCPU::OzoneXC::setFloatRegDouble(int reg_idx, double val) template void -OzoneCPU::OzoneXC::setFloatRegInt(int reg_idx, uint64_t val) +OzoneCPU::OzoneXC::setFloatRegBits(int reg_idx, FloatRegBits val, + int width) +{ + panic("Unimplemented!"); +} + +template +void +OzoneCPU::OzoneXC::setFloatRegBits(int reg_idx, FloatRegBits val) { panic("Unimplemented!"); } diff --git a/src/cpu/ozone/dyn_inst.cc b/src/cpu/ozone/dyn_inst.cc new file mode 100644 index 000000000..1702419d6 --- /dev/null +++ b/src/cpu/ozone/dyn_inst.cc @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cpu/ozone/dyn_inst_impl.hh" +#include "cpu/ozone/ozone_impl.hh" +//#include "cpu/ozone/simple_impl.hh" + +template class OzoneDynInst; +//template class OzoneDynInst; + diff --git a/src/cpu/ozone/dyn_inst.hh b/src/cpu/ozone/dyn_inst.hh new file mode 100644 index 000000000..7c1e17074 --- /dev/null +++ b/src/cpu/ozone/dyn_inst.hh @@ -0,0 +1,231 @@ +/* + * Copyright (c) 2005-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_OZONE_DYN_INST_HH__ +#define __CPU_OZONE_DYN_INST_HH__ + +#include "arch/isa_traits.hh" +#include "config/full_system.hh" +#include "cpu/base_dyn_inst.hh" +#include "cpu/ozone/cpu.hh" // MUST include this +#include "cpu/inst_seq.hh" +//#include "cpu/ozone/simple_impl.hh" // Would be nice to not have to include this +#include "cpu/ozone/ozone_impl.hh" + +#include +#include + +template +class OzoneDynInst : public BaseDynInst +{ + public: + // Typedefs + typedef typename Impl::FullCPU FullCPU; + + typedef typename FullCPU::ImplState ImplState; + + // Typedef for DynInstPtr. This is really just a RefCountingPtr. + typedef typename Impl::DynInstPtr DynInstPtr; + + typedef TheISA::ExtMachInst ExtMachInst; + typedef TheISA::MachInst MachInst; + typedef TheISA::MiscReg MiscReg; + typedef typename std::list::iterator ListIt; + + // Note that this is duplicated from the BaseDynInst class; I'm + // simply not sure the enum would carry through so I could use it + // in array declarations in this class. + enum { + MaxInstSrcRegs = TheISA::MaxInstSrcRegs, + MaxInstDestRegs = TheISA::MaxInstDestRegs + }; + + OzoneDynInst(FullCPU *cpu); + + OzoneDynInst(ExtMachInst inst, Addr PC, Addr Pred_PC, + InstSeqNum seq_num, FullCPU *cpu); + + OzoneDynInst(StaticInstPtr inst); + + ~OzoneDynInst(); + + void setSrcInst(DynInstPtr &newSrcInst, int regIdx) + { srcInsts[regIdx] = newSrcInst; } + + bool srcInstReady(int regIdx); + + void setPrevDestInst(DynInstPtr &oldDestInst, int regIdx) + { prevDestInst[regIdx] = oldDestInst; } + + DynInstPtr &getPrevDestInst(int regIdx) + { return prevDestInst[regIdx]; } + + void addDependent(DynInstPtr &dependent_inst); + + std::vector &getDependents() { return dependents; } + std::vector &getMemDeps() { return memDependents; } + std::list &getMemSrcs() { return srcMemInsts; } + + void wakeDependents(); + + void wakeMemDependents(); + + void addMemDependent(DynInstPtr &inst) { memDependents.push_back(inst); } + + void addSrcMemInst(DynInstPtr &inst) { srcMemInsts.push_back(inst); } + + void markMemInstReady(OzoneDynInst *inst); + + // For now I will remove instructions from the list when they wake + // up. In the future, you only really need a counter. + bool memDepReady() { return srcMemInsts.empty(); } + + private: + void initInstPtrs(); + + std::vector dependents; + + std::vector memDependents; + + std::list srcMemInsts; + + /** The instruction that produces the value of the source + * registers. These may be NULL if the value has already been + * read from the source instruction. + */ + DynInstPtr srcInsts[MaxInstSrcRegs]; + + /** + * Previous rename instruction for this destination. + */ + DynInstPtr prevDestInst[MaxInstSrcRegs]; + + public: + + Fault initiateAcc(); + + Fault completeAcc(); + + // The register accessor methods provide the index of the + // instruction's operand (e.g., 0 or 1), not the architectural + // register index, to simplify the implementation of register + // renaming. We find the architectural register index by indexing + // into the instruction's own operand index table. Note that a + // raw pointer to the StaticInst is provided instead of a + // ref-counted StaticInstPtr to redice overhead. This is fine as + // long as these methods don't copy the pointer into any long-term + // storage (which is pretty hard to imagine they would have reason + // to do). + + uint64_t readIntReg(const StaticInst *si, int idx) + { + return srcInsts[idx]->readIntResult(); + } + + float readFloatRegSingle(const StaticInst *si, int idx) + { + return srcInsts[idx]->readFloatResult(); + } + + double readFloatRegDouble(const StaticInst *si, int idx) + { + return srcInsts[idx]->readDoubleResult(); + } + + uint64_t readFloatRegInt(const StaticInst *si, int idx) + { + return srcInsts[idx]->readIntResult(); + } + + /** @todo: Make results into arrays so they can handle multiple dest + * registers. + */ + void setIntReg(const StaticInst *si, int idx, uint64_t val) + { + BaseDynInst::setIntReg(si, idx, val); + } + + void setFloatRegSingle(const StaticInst *si, int idx, float val) + { + BaseDynInst::setFloatRegSingle(si, idx, val); + } + + void setFloatRegDouble(const StaticInst *si, int idx, double val) + { + BaseDynInst::setFloatRegDouble(si, idx, val); + } + + void setFloatRegInt(const StaticInst *si, int idx, uint64_t val) + { + BaseDynInst::setFloatRegInt(si, idx, val); + } + + void setIntResult(uint64_t result) { this->instResult.integer = result; } + void setDoubleResult(double result) { this->instResult.dbl = result; } + + bool srcsReady(); + bool eaSrcsReady(); + + Fault execute(); + + Fault executeEAComp() + { return NoFault; } + + Fault executeMemAcc() + { return this->staticInst->memAccInst()->execute(this, this->traceData); } + + void clearDependents(); + + void clearMemDependents(); + + public: + // ISA stuff + MiscReg readMiscReg(int misc_reg); + + MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault); + + Fault setMiscReg(int misc_reg, const MiscReg &val); + + Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val); + +#if FULL_SYSTEM + Fault hwrei(); + int readIntrFlag(); + void setIntrFlag(int val); + bool inPalMode(); + void trap(Fault fault); + bool simPalCheck(int palFunc); +#else + void syscall(); +#endif + + ListIt iqIt; + bool iqItValid; +}; + +#endif // __CPU_OZONE_DYN_INST_HH__ diff --git a/src/cpu/ozone/dyn_inst_impl.hh b/src/cpu/ozone/dyn_inst_impl.hh new file mode 100644 index 000000000..f891ec515 --- /dev/null +++ b/src/cpu/ozone/dyn_inst_impl.hh @@ -0,0 +1,315 @@ +/* + * Copyright (c) 2005-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "arch/faults.hh" +#include "arch/isa_traits.hh" +#include "config/full_system.hh" +#include "cpu/ozone/dyn_inst.hh" +#include "kern/kernel_stats.hh" + +using namespace TheISA; + +template +OzoneDynInst::OzoneDynInst(FullCPU *cpu) + : BaseDynInst(0, 0, 0, 0, cpu) +{ + this->setResultReady(); + + initInstPtrs(); +} + +template +OzoneDynInst::OzoneDynInst(ExtMachInst inst, Addr PC, Addr Pred_PC, + InstSeqNum seq_num, FullCPU *cpu) + : BaseDynInst(inst, PC, Pred_PC, seq_num, cpu) +{ + initInstPtrs(); +} + +template +OzoneDynInst::OzoneDynInst(StaticInstPtr _staticInst) + : BaseDynInst(_staticInst) +{ + initInstPtrs(); +} + +template +OzoneDynInst::~OzoneDynInst() +{ + DPRINTF(BE, "[sn:%lli] destructor called\n", this->seqNum); + for (int i = 0; i < this->numSrcRegs(); ++i) { + srcInsts[i] = NULL; + } + + for (int i = 0; i < this->numDestRegs(); ++i) { + prevDestInst[i] = NULL; + } + + dependents.clear(); +} + +template +Fault +OzoneDynInst::execute() +{ + // @todo: Pretty convoluted way to avoid squashing from happening when using + // the XC during an instruction's execution (specifically for instructions + // that have sideeffects that use the XC). Fix this. + bool in_syscall = this->thread->inSyscall; + this->thread->inSyscall = true; + + this->fault = this->staticInst->execute(this, this->traceData); + + this->thread->inSyscall = in_syscall; + + return this->fault; +} + +template +Fault +OzoneDynInst::initiateAcc() +{ + // @todo: Pretty convoluted way to avoid squashing from happening when using + // the XC during an instruction's execution (specifically for instructions + // that have sideeffects that use the XC). Fix this. + bool in_syscall = this->thread->inSyscall; + this->thread->inSyscall = true; + + this->fault = this->staticInst->initiateAcc(this, this->traceData); + + this->thread->inSyscall = in_syscall; + + return this->fault; +} + +template +Fault +OzoneDynInst::completeAcc() +{ + if (this->isLoad()) { + this->fault = this->staticInst->completeAcc(this->req->data, + this, + this->traceData); + } else if (this->isStore()) { + this->fault = this->staticInst->completeAcc((uint8_t*)&this->req->result, + this, + this->traceData); + } else { + panic("Unknown type!"); + } + + return this->fault; +} + +template +bool +OzoneDynInst::srcInstReady(int regIdx) +{ + return srcInsts[regIdx]->isResultReady(); +} + +template +void +OzoneDynInst::addDependent(DynInstPtr &dependent_inst) +{ + dependents.push_back(dependent_inst); +} + +template +void +OzoneDynInst::wakeDependents() +{ + for (int i = 0; i < dependents.size(); ++i) { + dependents[i]->markSrcRegReady(); + } +} + +template +void +OzoneDynInst::wakeMemDependents() +{ + for (int i = 0; i < memDependents.size(); ++i) { + memDependents[i]->markMemInstReady(this); + } +} + +template +void +OzoneDynInst::markMemInstReady(OzoneDynInst *inst) +{ + ListIt mem_it = srcMemInsts.begin(); + while ((*mem_it) != inst && mem_it != srcMemInsts.end()) { + mem_it++; + } + assert(mem_it != srcMemInsts.end()); + + srcMemInsts.erase(mem_it); +} + +template +void +OzoneDynInst::initInstPtrs() +{ + for (int i = 0; i < MaxInstSrcRegs; ++i) { + srcInsts[i] = NULL; + } + iqItValid = false; +} + +template +bool +OzoneDynInst::srcsReady() +{ + for (int i = 0; i < this->numSrcRegs(); ++i) { + if (!srcInsts[i]->isResultReady()) + return false; + } + + return true; +} + +template +bool +OzoneDynInst::eaSrcsReady() +{ + for (int i = 1; i < this->numSrcRegs(); ++i) { + if (!srcInsts[i]->isResultReady()) + return false; + } + + return true; +} + +template +void +OzoneDynInst::clearDependents() +{ + dependents.clear(); + for (int i = 0; i < this->numSrcRegs(); ++i) { + srcInsts[i] = NULL; + } + for (int i = 0; i < this->numDestRegs(); ++i) { + prevDestInst[i] = NULL; + } +} + +template +void +OzoneDynInst::clearMemDependents() +{ + memDependents.clear(); +} + +template +MiscReg +OzoneDynInst::readMiscReg(int misc_reg) +{ + return this->thread->readMiscReg(misc_reg); +} + +template +MiscReg +OzoneDynInst::readMiscRegWithEffect(int misc_reg, Fault &fault) +{ + return this->thread->readMiscRegWithEffect(misc_reg, fault); +} + +template +Fault +OzoneDynInst::setMiscReg(int misc_reg, const MiscReg &val) +{ + this->setIntResult(val); + return this->thread->setMiscReg(misc_reg, val); +} + +template +Fault +OzoneDynInst::setMiscRegWithEffect(int misc_reg, const MiscReg &val) +{ + return this->thread->setMiscRegWithEffect(misc_reg, val); +} + +#if FULL_SYSTEM + +template +Fault +OzoneDynInst::hwrei() +{ + if (!this->cpu->inPalMode(this->readPC())) + return new AlphaISA::UnimplementedOpcodeFault; + + this->setNextPC(this->thread->readMiscReg(AlphaISA::IPR_EXC_ADDR)); + + this->cpu->hwrei(); + + // FIXME: XXX check for interrupts? XXX + return NoFault; +} + +template +int +OzoneDynInst::readIntrFlag() +{ +return this->cpu->readIntrFlag(); +} + +template +void +OzoneDynInst::setIntrFlag(int val) +{ + this->cpu->setIntrFlag(val); +} + +template +bool +OzoneDynInst::inPalMode() +{ + return this->cpu->inPalMode(); +} + +template +void +OzoneDynInst::trap(Fault fault) +{ + fault->invoke(this->thread->getXCProxy()); +} + +template +bool +OzoneDynInst::simPalCheck(int palFunc) +{ + return this->cpu->simPalCheck(palFunc); +} +#else +template +void +OzoneDynInst::syscall() +{ + this->cpu->syscall(); +} +#endif diff --git a/src/cpu/ozone/front_end.cc b/src/cpu/ozone/front_end.cc new file mode 100644 index 000000000..a974d43cb --- /dev/null +++ b/src/cpu/ozone/front_end.cc @@ -0,0 +1,7 @@ + +#include "cpu/ozone/front_end_impl.hh" +#include "cpu/ozone/ozone_impl.hh" +#include "cpu/ozone/simple_impl.hh" + +template class FrontEnd; +template class FrontEnd; diff --git a/src/cpu/ozone/front_end.hh b/src/cpu/ozone/front_end.hh new file mode 100644 index 000000000..b3131149d --- /dev/null +++ b/src/cpu/ozone/front_end.hh @@ -0,0 +1,297 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_OZONE_FRONT_END_HH__ +#define __CPU_OZONE_FRONT_END_HH__ + +#include + +#include "cpu/inst_seq.hh" +#include "cpu/o3/bpred_unit.hh" +#include "cpu/ozone/rename_table.hh" +#include "mem/request.hh" +#include "sim/eventq.hh" +#include "sim/stats.hh" + +class ExecContext; +class MemInterface; +template +class OzoneThreadState; +class PageTable; +template +class TimeBuffer; + +template +class FrontEnd +{ + public: + typedef typename Impl::Params Params; + typedef typename Impl::DynInst DynInst; + typedef typename Impl::DynInstPtr DynInstPtr; + typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::BackEnd BackEnd; + + typedef typename Impl::FullCPU::OzoneXC OzoneXC; + typedef typename Impl::FullCPU::CommStruct CommStruct; + + FrontEnd(Params *params); + + std::string name() const; + + void setCPU(FullCPU *cpu_ptr) + { cpu = cpu_ptr; } + + void setBackEnd(BackEnd *back_end_ptr) + { backEnd = back_end_ptr; } + + void setCommBuffer(TimeBuffer *_comm); + + void setXC(ExecContext *xc_ptr); + + void setThreadState(OzoneThreadState *thread_ptr) + { thread = thread_ptr; } + + void regStats(); + + void tick(); + Fault fetchCacheLine(); + void processInst(DynInstPtr &inst); + void squash(const InstSeqNum &squash_num, const Addr &next_PC, + const bool is_branch = false, const bool branch_taken = false); + DynInstPtr getInst(); + + void processCacheCompletion(Packet *pkt); + + void addFreeRegs(int num_freed); + + bool isEmpty() { return instBuffer.empty(); } + + void switchOut(); + + void doSwitchOut(); + + void takeOverFrom(ExecContext *old_xc = NULL); + + bool isSwitchedOut() { return switchedOut; } + + bool switchedOut; + + private: + bool updateStatus(); + + void checkBE(); + DynInstPtr getInstFromCacheline(); + void renameInst(DynInstPtr &inst); + // Returns true if we need to stop the front end this cycle + bool processBarriers(DynInstPtr &inst); + + void handleFault(Fault &fault); + public: + Fault getFault() { return fetchFault; } + private: + Fault fetchFault; + + // Align an address (typically a PC) to the start of an I-cache block. + // We fold in the PISA 64- to 32-bit conversion here as well. + Addr icacheBlockAlignPC(Addr addr) + { + addr = TheISA::realPCToFetchPC(addr); + return (addr & ~(cacheBlkMask)); + } + + InstSeqNum getAndIncrementInstSeq() + { return cpu->globalSeqNum++; } + + public: + FullCPU *cpu; + + BackEnd *backEnd; + + ExecContext *xc; + + OzoneThreadState *thread; + + enum Status { + Running, + Idle, + IcacheMissStall, + IcacheMissComplete, + SerializeBlocked, + SerializeComplete, + RenameBlocked, + QuiescePending, + TrapPending, + BEBlocked + }; + + Status status; + + private: + TimeBuffer *comm; + typename TimeBuffer::wire fromCommit; + + typedef typename Impl::BranchPred BranchPred; + + BranchPred branchPred; + + class IcachePort : public Port + { + protected: + FrontEnd *fe; + + public: + IcachePort(const std::string &_name, FrontEnd *_fe) + : Port(_name), fe(_fe) + { } + + protected: + virtual Tick recvAtomic(PacketPtr pkt); + + virtual void recvFunctional(PacketPtr pkt); + + virtual void recvStatusChange(Status status); + + virtual void getDeviceAddressRanges(AddrRangeList &resp, + AddrRangeList &snoop) + { resp.clear(); snoop.clear(); } + + virtual bool recvTiming(PacketPtr pkt); + + virtual void recvRetry(); + }; + + IcachePort icachePort; + +#if !FULL_SYSTEM + PageTable *pTable; +#endif + + RequestPtr memReq; + + /** Mask to get a cache block's address. */ + Addr cacheBlkMask; + + unsigned cacheBlkSize; + + Addr cacheBlkPC; + + /** The cache line being fetched. */ + uint8_t *cacheData; + + bool fetchCacheLineNextCycle; + + bool cacheBlkValid; + + public: + RenameTable renameTable; + + private: + Addr PC; + Addr nextPC; + + public: + void setPC(Addr val) { PC = val; } + void setNextPC(Addr val) { nextPC = val; } + + void wakeFromQuiesce(); + + void dumpInsts(); + + private: + typedef typename std::deque InstBuff; + typedef typename InstBuff::iterator InstBuffIt; + + InstBuff instBuffer; + + int instBufferSize; + + int maxInstBufferSize; + + int width; + + int freeRegs; + + int numPhysRegs; + + bool serializeNext; + + DynInstPtr barrierInst; + + public: + bool interruptPending; + private: + // number of idle cycles +/* + Stats::Average<> notIdleFraction; + Stats::Formula idleFraction; +*/ + // @todo: Consider making these vectors and tracking on a per thread basis. + /** Stat for total number of cycles stalled due to an icache miss. */ + Stats::Scalar<> icacheStallCycles; + /** Stat for total number of fetched instructions. */ + Stats::Scalar<> fetchedInsts; + Stats::Scalar<> fetchedBranches; + /** Stat for total number of predicted branches. */ + Stats::Scalar<> predictedBranches; + /** Stat for total number of cycles spent fetching. */ + Stats::Scalar<> fetchCycles; + + Stats::Scalar<> fetchIdleCycles; + /** Stat for total number of cycles spent squashing. */ + Stats::Scalar<> fetchSquashCycles; + /** Stat for total number of cycles spent blocked due to other stages in + * the pipeline. + */ + Stats::Scalar<> fetchBlockedCycles; + /** Stat for total number of fetched cache lines. */ + Stats::Scalar<> fetchedCacheLines; + + Stats::Scalar<> fetchIcacheSquashes; + /** Distribution of number of instructions fetched each cycle. */ + Stats::Distribution<> fetchNisnDist; +// Stats::Vector<> qfull_iq_occupancy; +// Stats::VectorDistribution<> qfull_iq_occ_dist_; + Stats::Formula idleRate; + Stats::Formula branchRate; + Stats::Formula fetchRate; + Stats::Scalar<> IFQCount; // cumulative IFQ occupancy + Stats::Formula IFQOccupancy; + Stats::Formula IFQLatency; + Stats::Scalar<> IFQFcount; // cumulative IFQ full count + Stats::Formula IFQFullRate; + + Stats::Scalar<> dispatchCountStat; + Stats::Scalar<> dispatchedSerializing; + Stats::Scalar<> dispatchedTempSerializing; + Stats::Scalar<> dispatchSerializeStallCycles; + Stats::Formula dispatchRate; + Stats::Formula regIntFull; + Stats::Formula regFpFull; +}; + +#endif // __CPU_OZONE_FRONT_END_HH__ diff --git a/src/cpu/ozone/front_end_impl.hh b/src/cpu/ozone/front_end_impl.hh new file mode 100644 index 000000000..ffbcf3340 --- /dev/null +++ b/src/cpu/ozone/front_end_impl.hh @@ -0,0 +1,920 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "arch/faults.hh" +#include "arch/isa_traits.hh" +#include "base/statistics.hh" +#include "cpu/exec_context.hh" +#include "cpu/exetrace.hh" +#include "cpu/ozone/front_end.hh" +#include "mem/mem_interface.hh" +#include "sim/byte_swap.hh" + +using namespace TheISA; + +template +FrontEnd::FrontEnd(Params *params) + : branchPred(params), + icacheInterface(params->icacheInterface), + instBufferSize(0), + maxInstBufferSize(params->maxInstBufferSize), + width(params->frontEndWidth), + freeRegs(params->numPhysicalRegs), + numPhysRegs(params->numPhysicalRegs), + serializeNext(false), + interruptPending(false) +{ + switchedOut = false; + + status = Idle; + + memReq = NULL; + // Size of cache block. + cacheBlkSize = icacheInterface ? icacheInterface->getBlockSize() : 64; + + assert(isPowerOf2(cacheBlkSize)); + + // Create mask to get rid of offset bits. + cacheBlkMask = (cacheBlkSize - 1); + + // Create space to store a cache line. + cacheData = new uint8_t[cacheBlkSize]; + + fetchCacheLineNextCycle = true; + + cacheBlkValid = false; + +#if !FULL_SYSTEM +// pTable = params->pTable; +#endif + fetchFault = NoFault; +} + +template +std::string +FrontEnd::name() const +{ + return cpu->name() + ".frontend"; +} + +template +void +FrontEnd::setCommBuffer(TimeBuffer *_comm) +{ + comm = _comm; + // @todo: Hardcoded for now. Allow this to be set by a latency. + fromCommit = comm->getWire(-1); +} + +template +void +FrontEnd::setXC(ExecContext *xc_ptr) +{ + xc = xc_ptr; +} + +template +void +FrontEnd::regStats() +{ + icacheStallCycles + .name(name() + ".icacheStallCycles") + .desc("Number of cycles fetch is stalled on an Icache miss") + .prereq(icacheStallCycles); + + fetchedInsts + .name(name() + ".fetchedInsts") + .desc("Number of instructions fetch has processed") + .prereq(fetchedInsts); + + fetchedBranches + .name(name() + ".fetchedBranches") + .desc("Number of fetched branches") + .prereq(fetchedBranches); + + predictedBranches + .name(name() + ".predictedBranches") + .desc("Number of branches that fetch has predicted taken") + .prereq(predictedBranches); + + fetchCycles + .name(name() + ".fetchCycles") + .desc("Number of cycles fetch has run and was not squashing or" + " blocked") + .prereq(fetchCycles); + + fetchIdleCycles + .name(name() + ".fetchIdleCycles") + .desc("Number of cycles fetch was idle") + .prereq(fetchIdleCycles); + + fetchSquashCycles + .name(name() + ".fetchSquashCycles") + .desc("Number of cycles fetch has spent squashing") + .prereq(fetchSquashCycles); + + fetchBlockedCycles + .name(name() + ".fetchBlockedCycles") + .desc("Number of cycles fetch has spent blocked") + .prereq(fetchBlockedCycles); + + fetchedCacheLines + .name(name() + ".fetchedCacheLines") + .desc("Number of cache lines fetched") + .prereq(fetchedCacheLines); + + fetchIcacheSquashes + .name(name() + ".fetchIcacheSquashes") + .desc("Number of outstanding Icache misses that were squashed") + .prereq(fetchIcacheSquashes); + + fetchNisnDist + .init(/* base value */ 0, + /* last value */ width, + /* bucket size */ 1) + .name(name() + ".rateDist") + .desc("Number of instructions fetched each cycle (Total)") + .flags(Stats::pdf); + + idleRate + .name(name() + ".idleRate") + .desc("Percent of cycles fetch was idle") + .prereq(idleRate); + idleRate = fetchIdleCycles * 100 / cpu->numCycles; + + branchRate + .name(name() + ".branchRate") + .desc("Number of branch fetches per cycle") + .flags(Stats::total); + branchRate = fetchedBranches / cpu->numCycles; + + fetchRate + .name(name() + ".rate") + .desc("Number of inst fetches per cycle") + .flags(Stats::total); + fetchRate = fetchedInsts / cpu->numCycles; + + IFQCount + .name(name() + ".IFQ:count") + .desc("cumulative IFQ occupancy") + ; + + IFQFcount + .name(name() + ".IFQ:fullCount") + .desc("cumulative IFQ full count") + .flags(Stats::total) + ; + + IFQOccupancy + .name(name() + ".IFQ:occupancy") + .desc("avg IFQ occupancy (inst's)") + ; + IFQOccupancy = IFQCount / cpu->numCycles; + + IFQLatency + .name(name() + ".IFQ:latency") + .desc("avg IFQ occupant latency (cycle's)") + .flags(Stats::total) + ; + + IFQFullRate + .name(name() + ".IFQ:fullRate") + .desc("fraction of time (cycles) IFQ was full") + .flags(Stats::total); + ; + IFQFullRate = IFQFcount * Stats::constant(100) / cpu->numCycles; + + dispatchCountStat + .name(name() + ".DIS:count") + .desc("cumulative count of dispatched insts") + .flags(Stats::total) + ; + + dispatchedSerializing + .name(name() + ".DIS:serializingInsts") + .desc("count of serializing insts dispatched") + .flags(Stats::total) + ; + + dispatchedTempSerializing + .name(name() + ".DIS:tempSerializingInsts") + .desc("count of temporary serializing insts dispatched") + .flags(Stats::total) + ; + + dispatchSerializeStallCycles + .name(name() + ".DIS:serializeStallCycles") + .desc("count of cycles dispatch stalled for serializing inst") + .flags(Stats::total) + ; + + dispatchRate + .name(name() + ".DIS:rate") + .desc("dispatched insts per cycle") + .flags(Stats::total) + ; + dispatchRate = dispatchCountStat / cpu->numCycles; + + regIntFull + .name(name() + ".REG:int:full") + .desc("number of cycles where there were no INT registers") + ; + + regFpFull + .name(name() + ".REG:fp:full") + .desc("number of cycles where there were no FP registers") + ; + IFQLatency = IFQOccupancy / dispatchRate; + + branchPred.regStats(); +} + +template +void +FrontEnd::tick() +{ + if (switchedOut) + return; + + // @todo: Maybe I want to just have direct communication... + if (fromCommit->doneSeqNum) { + branchPred.update(fromCommit->doneSeqNum, 0); + } + + IFQCount += instBufferSize; + IFQFcount += instBufferSize == maxInstBufferSize; + + // Fetch cache line + if (status == IcacheMissComplete) { + cacheBlkValid = true; + + status = Running; + if (barrierInst) + status = SerializeBlocked; + if (freeRegs <= 0) + status = RenameBlocked; + checkBE(); + } else if (status == IcacheMissStall) { + DPRINTF(FE, "Still in Icache miss stall.\n"); + icacheStallCycles++; + return; + } + + if (status == RenameBlocked || status == SerializeBlocked || + status == TrapPending || status == BEBlocked) { + // Will cause a one cycle bubble between changing state and + // restarting. + DPRINTF(FE, "In blocked status.\n"); + + fetchBlockedCycles++; + + if (status == SerializeBlocked) { + dispatchSerializeStallCycles++; + } + updateStatus(); + return; + } else if (status == QuiescePending) { + DPRINTF(FE, "Waiting for quiesce to execute or get squashed.\n"); + return; + } else if (status != IcacheMissComplete) { + if (fetchCacheLineNextCycle) { + Fault fault = fetchCacheLine(); + if (fault != NoFault) { + handleFault(fault); + fetchFault = fault; + return; + } + fetchCacheLineNextCycle = false; + } + // If miss, stall until it returns. + if (status == IcacheMissStall) { + // Tell CPU to not tick me for now. + return; + } + } + + fetchCycles++; + + int num_inst = 0; + + // Otherwise loop and process instructions. + // One way to hack infinite width is to set width and maxInstBufferSize + // both really high. Inelegant, but probably will work. + while (num_inst < width && + instBufferSize < maxInstBufferSize) { + // Get instruction from cache line. + DynInstPtr inst = getInstFromCacheline(); + + if (!inst) { + // PC is no longer in the cache line, end fetch. + // Might want to check this at the end of the cycle so that + // there's no cycle lost to checking for a new cache line. + DPRINTF(FE, "Need to get new cache line\n"); + fetchCacheLineNextCycle = true; + break; + } + + processInst(inst); + + if (status == SerializeBlocked) { + break; + } + + // Possibly push into a time buffer that estimates the front end + // latency + instBuffer.push_back(inst); + ++instBufferSize; + ++num_inst; + +#if FULL_SYSTEM + if (inst->isQuiesce()) { + warn("%lli: Quiesce instruction encountered, halting fetch!", curTick); + status = QuiescePending; + break; + } +#endif + + if (inst->predTaken()) { + // Start over with tick? + break; + } else if (freeRegs <= 0) { + DPRINTF(FE, "Ran out of free registers to rename to!\n"); + status = RenameBlocked; + break; + } else if (serializeNext) { + break; + } + } + + fetchNisnDist.sample(num_inst); + checkBE(); + + DPRINTF(FE, "Num insts processed: %i, Inst Buffer size: %i, Free " + "Regs %i\n", num_inst, instBufferSize, freeRegs); +} + +template +Fault +FrontEnd::fetchCacheLine() +{ + // Read a cache line, based on the current PC. +#if FULL_SYSTEM + // Flag to say whether or not address is physical addr. + unsigned flags = cpu->inPalMode(PC) ? PHYSICAL : 0; +#else + unsigned flags = 0; +#endif // FULL_SYSTEM + Fault fault = NoFault; + + if (interruptPending && flags == 0) { + return fault; + } + + // Align the fetch PC so it's at the start of a cache block. + Addr fetch_PC = icacheBlockAlignPC(PC); + + DPRINTF(FE, "Fetching cache line starting at %#x.\n", fetch_PC); + + // Setup the memReq to do a read of the first isntruction's address. + // Set the appropriate read size and flags as well. + memReq = new MemReq(); + + memReq->asid = 0; + memReq->thread_num = 0; + memReq->data = new uint8_t[64]; + memReq->xc = xc; + memReq->cmd = Read; + memReq->reset(fetch_PC, cacheBlkSize, flags); + + // Translate the instruction request. + fault = cpu->translateInstReq(memReq); + + // Now do the timing access to see whether or not the instruction + // exists within the cache. + if (icacheInterface && fault == NoFault) { +#if FULL_SYSTEM + if (cpu->system->memctrl->badaddr(memReq->paddr) || + memReq->flags & UNCACHEABLE) { + DPRINTF(FE, "Fetch: Bad address %#x (hopefully on a " + "misspeculating path!", + memReq->paddr); + return TheISA::genMachineCheckFault(); + } +#endif + + memReq->completionEvent = NULL; + + memReq->time = curTick; + fault = cpu->mem->read(memReq, cacheData); + + MemAccessResult res = icacheInterface->access(memReq); + + // If the cache missed then schedule an event to wake + // up this stage once the cache miss completes. + if (icacheInterface->doEvents() && res != MA_HIT) { + memReq->completionEvent = new ICacheCompletionEvent(memReq, this); + + status = IcacheMissStall; + + cacheBlkValid = false; + + DPRINTF(FE, "Cache miss.\n"); + } else { + DPRINTF(FE, "Cache hit.\n"); + + cacheBlkValid = true; + +// memcpy(cacheData, memReq->data, memReq->size); + } + } + + // Note that this will set the cache block PC a bit earlier than it should + // be set. + cacheBlkPC = fetch_PC; + + ++fetchedCacheLines; + + DPRINTF(FE, "Done fetching cache line.\n"); + + return fault; +} + +template +void +FrontEnd::processInst(DynInstPtr &inst) +{ + if (processBarriers(inst)) { + return; + } + + Addr inst_PC = inst->readPC(); + + if (!inst->isControl()) { + inst->setPredTarg(inst->readNextPC()); + } else { + fetchedBranches++; + if (branchPred.predict(inst, inst_PC, inst->threadNumber)) { + predictedBranches++; + } + } + + Addr next_PC = inst->readPredTarg(); + + DPRINTF(FE, "[sn:%lli] Predicted and processed inst PC %#x, next PC " + "%#x\n", inst->seqNum, inst_PC, next_PC); + +// inst->setNextPC(next_PC); + + // Not sure where I should set this + PC = next_PC; + + renameInst(inst); +} + +template +bool +FrontEnd::processBarriers(DynInstPtr &inst) +{ + if (serializeNext) { + inst->setSerializeBefore(); + serializeNext = false; + } else if (!inst->isSerializing() && + !inst->isIprAccess() && + !inst->isStoreConditional()) { + return false; + } + + if ((inst->isIprAccess() || inst->isSerializeBefore()) && + !inst->isSerializeHandled()) { + DPRINTF(FE, "Serialize before instruction encountered.\n"); + + if (!inst->isTempSerializeBefore()) { + dispatchedSerializing++; + inst->setSerializeHandled(); + } else { + dispatchedTempSerializing++; + } + + // Change status over to SerializeBlocked so that other stages know + // what this is blocked on. + status = SerializeBlocked; + + barrierInst = inst; + return true; + } else if ((inst->isStoreConditional() || inst->isSerializeAfter()) + && !inst->isSerializeHandled()) { + DPRINTF(FE, "Serialize after instruction encountered.\n"); + + inst->setSerializeHandled(); + + dispatchedSerializing++; + + serializeNext = true; + return false; + } + return false; +} + +template +void +FrontEnd::handleFault(Fault &fault) +{ + DPRINTF(FE, "Fault at fetch, telling commit\n"); + + // We're blocked on the back end until it handles this fault. + status = TrapPending; + + // Get a sequence number. + InstSeqNum inst_seq = getAndIncrementInstSeq(); + // We will use a nop in order to carry the fault. + ExtMachInst ext_inst = TheISA::NoopMachInst; + + // Create a new DynInst from the dummy nop. + DynInstPtr instruction = new DynInst(ext_inst, PC, + PC+sizeof(MachInst), + inst_seq, cpu); + instruction->setPredTarg(instruction->readNextPC()); +// instruction->setThread(tid); + +// instruction->setASID(tid); + + instruction->setState(thread); + + instruction->traceData = NULL; + + instruction->fault = fault; + instruction->setCanIssue(); + instBuffer.push_back(instruction); + ++instBufferSize; +} + +template +void +FrontEnd::squash(const InstSeqNum &squash_num, const Addr &next_PC, + const bool is_branch, const bool branch_taken) +{ + DPRINTF(FE, "Squashing from [sn:%lli], setting PC to %#x\n", + squash_num, next_PC); + + if (fetchFault != NoFault) + fetchFault = NoFault; + + while (!instBuffer.empty() && + instBuffer.back()->seqNum > squash_num) { + DynInstPtr inst = instBuffer.back(); + + DPRINTF(FE, "Squashing instruction [sn:%lli] PC %#x\n", + inst->seqNum, inst->readPC()); + + inst->clearDependents(); + + instBuffer.pop_back(); + --instBufferSize; + + freeRegs+= inst->numDestRegs(); + } + + // Copy over rename table from the back end. + renameTable.copyFrom(backEnd->renameTable); + + PC = next_PC; + + // Update BP with proper information. + if (is_branch) { + branchPred.squash(squash_num, next_PC, branch_taken, 0); + } else { + branchPred.squash(squash_num, 0); + } + + // Clear the icache miss if it's outstanding. + if (status == IcacheMissStall && icacheInterface) { + DPRINTF(FE, "Squashing outstanding Icache miss.\n"); + memReq = NULL; + } + + if (status == SerializeBlocked) { + assert(barrierInst->seqNum > squash_num); + barrierInst = NULL; + } + + // Unless this squash originated from the front end, we're probably + // in running mode now. + // Actually might want to make this latency dependent. + status = Running; + fetchCacheLineNextCycle = true; +} + +template +typename Impl::DynInstPtr +FrontEnd::getInst() +{ + if (instBufferSize == 0) { + return NULL; + } + + DynInstPtr inst = instBuffer.front(); + + instBuffer.pop_front(); + + --instBufferSize; + + dispatchCountStat++; + + return inst; +} + +template +void +FrontEnd::processCacheCompletion(MemReqPtr &req) +{ + DPRINTF(FE, "Processing cache completion\n"); + + // Do something here. + if (status != IcacheMissStall || + req != memReq || + switchedOut) { + DPRINTF(FE, "Previous fetch was squashed.\n"); + fetchIcacheSquashes++; + return; + } + + status = IcacheMissComplete; + +/* if (checkStall(tid)) { + fetchStatus[tid] = Blocked; + } else { + fetchStatus[tid] = IcacheMissComplete; + } +*/ +// memcpy(cacheData, memReq->data, memReq->size); + + // Reset the completion event to NULL. +// memReq->completionEvent = NULL; + memReq = NULL; +} + +template +void +FrontEnd::addFreeRegs(int num_freed) +{ + if (status == RenameBlocked && freeRegs + num_freed > 0) { + status = Running; + } + + DPRINTF(FE, "Adding %i freed registers\n", num_freed); + + freeRegs+= num_freed; + +// assert(freeRegs <= numPhysRegs); + if (freeRegs > numPhysRegs) + freeRegs = numPhysRegs; +} + +template +bool +FrontEnd::updateStatus() +{ + bool serialize_block = !backEnd->robEmpty() || instBufferSize; + bool be_block = cpu->decoupledFrontEnd ? false : backEnd->isBlocked(); + bool ret_val = false; + + if (status == SerializeBlocked && !serialize_block) { + status = SerializeComplete; + ret_val = true; + } + + if (status == BEBlocked && !be_block) { + if (barrierInst) { + status = SerializeBlocked; + } else { + status = Running; + } + ret_val = true; + } + return ret_val; +} + +template +void +FrontEnd::checkBE() +{ + bool be_block = cpu->decoupledFrontEnd ? false : backEnd->isBlocked(); + if (be_block) { + if (status == Running || status == Idle) { + status = BEBlocked; + } + } +} + +template +typename Impl::DynInstPtr +FrontEnd::getInstFromCacheline() +{ + if (status == SerializeComplete) { + DynInstPtr inst = barrierInst; + status = Running; + barrierInst = NULL; + inst->clearSerializeBefore(); + return inst; + } + + InstSeqNum inst_seq; + MachInst inst; + // @todo: Fix this magic number used here to handle word offset (and + // getting rid of PAL bit) + unsigned offset = (PC & cacheBlkMask) & ~3; + + // PC of inst is not in this cache block + if (PC >= (cacheBlkPC + cacheBlkSize) || PC < cacheBlkPC || !cacheBlkValid) { + return NULL; + } + + ////////////////////////// + // Fetch one instruction + ////////////////////////// + + // Get a sequence number. + inst_seq = getAndIncrementInstSeq(); + + // Make sure this is a valid index. + assert(offset <= cacheBlkSize - sizeof(MachInst)); + + // Get the instruction from the array of the cache line. + inst = htog(*reinterpret_cast(&cacheData[offset])); + + ExtMachInst decode_inst = TheISA::makeExtMI(inst, PC); + + // Create a new DynInst from the instruction fetched. + DynInstPtr instruction = new DynInst(decode_inst, PC, PC+sizeof(MachInst), + inst_seq, cpu); + + instruction->setState(thread); + + DPRINTF(FE, "Instruction [sn:%lli] created, with PC %#x\n%s\n", + inst_seq, instruction->readPC(), + instruction->staticInst->disassemble(PC)); + + instruction->traceData = + Trace::getInstRecord(curTick, xc, cpu, + instruction->staticInst, + instruction->readPC(), 0); + + // Increment stat of fetched instructions. + ++fetchedInsts; + + return instruction; +} + +template +void +FrontEnd::renameInst(DynInstPtr &inst) +{ + DynInstPtr src_inst = NULL; + int num_src_regs = inst->numSrcRegs(); + if (num_src_regs == 0) { + inst->setCanIssue(); + } else { + for (int i = 0; i < num_src_regs; ++i) { + src_inst = renameTable[inst->srcRegIdx(i)]; + + inst->setSrcInst(src_inst, i); + + DPRINTF(FE, "[sn:%lli]: Src reg %i is inst [sn:%lli]\n", + inst->seqNum, (int)inst->srcRegIdx(i), src_inst->seqNum); + + if (src_inst->isResultReady()) { + DPRINTF(FE, "Reg ready.\n"); + inst->markSrcRegReady(i); + } else { + DPRINTF(FE, "Adding to dependent list.\n"); + src_inst->addDependent(inst); + } + } + } + + for (int i = 0; i < inst->numDestRegs(); ++i) { + RegIndex idx = inst->destRegIdx(i); + + DPRINTF(FE, "Dest reg %i is now inst [sn:%lli], was previously " + "[sn:%lli]\n", + (int)inst->destRegIdx(i), inst->seqNum, + renameTable[idx]->seqNum); + + inst->setPrevDestInst(renameTable[idx], i); + + renameTable[idx] = inst; + --freeRegs; + } +} + +template +void +FrontEnd::wakeFromQuiesce() +{ + DPRINTF(FE, "Waking up from quiesce\n"); + // Hopefully this is safe + status = Running; +} + +template +void +FrontEnd::switchOut() +{ + switchedOut = true; + cpu->signalSwitched(); +} + +template +void +FrontEnd::doSwitchOut() +{ + memReq = NULL; + squash(0, 0); + instBuffer.clear(); + instBufferSize = 0; + status = Idle; +} + +template +void +FrontEnd::takeOverFrom(ExecContext *old_xc) +{ + assert(freeRegs == numPhysRegs); + fetchCacheLineNextCycle = true; + + cacheBlkValid = false; + +#if !FULL_SYSTEM +// pTable = params->pTable; +#endif + fetchFault = NoFault; + serializeNext = false; + barrierInst = NULL; + status = Running; + switchedOut = false; + interruptPending = false; +} + +template +void +FrontEnd::dumpInsts() +{ + cprintf("instBuffer size: %i\n", instBuffer.size()); + + InstBuffIt buff_it = instBuffer.begin(); + + for (int num = 0; buff_it != instBuffer.end(); num++) { + cprintf("Instruction:%i\nPC:%#x\n[tid:%i]\n[sn:%lli]\nIssued:%i\n" + "Squashed:%i\n\n", + num, (*buff_it)->readPC(), (*buff_it)->threadNumber, + (*buff_it)->seqNum, (*buff_it)->isIssued(), + (*buff_it)->isSquashed()); + buff_it++; + } +} + +template +FrontEnd::ICacheCompletionEvent::ICacheCompletionEvent(MemReqPtr &_req, FrontEnd *fe) + : Event(&mainEventQueue, Delayed_Writeback_Pri), req(_req), frontEnd(fe) +{ + this->setFlags(Event::AutoDelete); +} + +template +void +FrontEnd::ICacheCompletionEvent::process() +{ + frontEnd->processCacheCompletion(req); +} + +template +const char * +FrontEnd::ICacheCompletionEvent::description() +{ + return "ICache completion event"; +} diff --git a/src/cpu/ozone/inorder_back_end.cc b/src/cpu/ozone/inorder_back_end.cc new file mode 100644 index 000000000..14db610d2 --- /dev/null +++ b/src/cpu/ozone/inorder_back_end.cc @@ -0,0 +1,5 @@ + +#include "cpu/ozone/inorder_back_end_impl.hh" +#include "cpu/ozone/simple_impl.hh" + +template class InorderBackEnd; diff --git a/src/cpu/ozone/inorder_back_end.hh b/src/cpu/ozone/inorder_back_end.hh new file mode 100644 index 000000000..578ae4ce2 --- /dev/null +++ b/src/cpu/ozone/inorder_back_end.hh @@ -0,0 +1,449 @@ + +#ifndef __CPU_OZONE_INORDER_BACK_END_HH__ +#define __CPU_OZONE_INORDER_BACK_END_HH__ + +#include + +#include "arch/faults.hh" +#include "base/timebuf.hh" +#include "cpu/exec_context.hh" +#include "cpu/inst_seq.hh" +#include "cpu/ozone/rename_table.hh" +#include "cpu/ozone/thread_state.hh" +#include "mem/request.hh" +#include "sim/eventq.hh" + +template +class InorderBackEnd +{ + public: + typedef typename Impl::Params Params; + typedef typename Impl::DynInstPtr DynInstPtr; + typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::FrontEnd FrontEnd; + + typedef typename FullCPU::OzoneXC OzoneXC; + typedef typename Impl::FullCPU::CommStruct CommStruct; + + InorderBackEnd(Params *params); + + std::string name() const; + + void setCPU(FullCPU *cpu_ptr) + { cpu = cpu_ptr; } + + void setFrontEnd(FrontEnd *front_end_ptr) + { frontEnd = front_end_ptr; } + + void setCommBuffer(TimeBuffer *_comm) + { comm = _comm; } + + void setXC(ExecContext *xc_ptr); + + void setThreadState(OzoneThreadState *thread_ptr); + + void regStats() { } + +#if FULL_SYSTEM + void checkInterrupts(); +#endif + + void tick(); + void executeInsts(); + void squash(const InstSeqNum &squash_num, const Addr &next_PC); + + void squashFromXC(); + void generateXCEvent() { } + + bool robEmpty() { return instList.empty(); } + + bool isFull() { return false; } + bool isBlocked() { return status == DcacheMissStoreStall || + status == DcacheMissLoadStall || + interruptBlocked; } + + void fetchFault(Fault &fault); + + void dumpInsts(); + + private: + void handleFault(); + + void setSquashInfoFromXC(); + + bool squashPending; + InstSeqNum squashSeqNum; + Addr squashNextPC; + + Fault faultFromFetch; + + bool interruptBlocked; + + public: + template + Fault read(Addr addr, T &data, unsigned flags); + + template + Fault read(RequestPtr req, T &data, int load_idx); + + template + Fault write(T data, Addr addr, unsigned flags, uint64_t *res); + + template + Fault write(RequestPtr req, T &data, int store_idx); + + Addr readCommitPC() { return commitPC; } + + Addr commitPC; + + void switchOut() { panic("Not implemented!"); } + void doSwitchOut() { panic("Not implemented!"); } + void takeOverFrom(ExecContext *old_xc = NULL) { panic("Not implemented!"); } + + public: + FullCPU *cpu; + + FrontEnd *frontEnd; + + ExecContext *xc; + + OzoneThreadState *thread; + + RenameTable renameTable; + + protected: + enum Status { + Running, + Idle, + DcacheMissLoadStall, + DcacheMissStoreStall, + DcacheMissComplete, + Blocked + }; + + Status status; + + class DCacheCompletionEvent : public Event + { + private: + InorderBackEnd *be; + + public: + DCacheCompletionEvent(InorderBackEnd *_be); + + virtual void process(); + virtual const char *description(); + + DynInstPtr inst; + }; + + friend class DCacheCompletionEvent; + + DCacheCompletionEvent cacheCompletionEvent; + +// MemInterface *dcacheInterface; + + RequestPtr memReq; + + private: + typedef typename std::list::iterator InstListIt; + + std::list instList; + + // General back end width. Used if the more specific isn't given. + int width; + + int latency; + + int squashLatency; + + TimeBuffer numInstsToWB; + TimeBuffer::wire instsAdded; + TimeBuffer::wire instsToExecute; + + TimeBuffer *comm; + // number of cycles stalled for D-cache misses + Stats::Scalar<> dcacheStallCycles; + Counter lastDcacheStall; +}; + +template +template +Fault +InorderBackEnd::read(Addr addr, T &data, unsigned flags) +{ + memReq->reset(addr, sizeof(T), flags); + + // translate to physical address + Fault fault = cpu->translateDataReadReq(memReq); + + // if we have a cache, do cache access too + if (fault == NoFault && dcacheInterface) { + memReq->cmd = Read; + memReq->completionEvent = NULL; + memReq->time = curTick; + memReq->flags &= ~INST_READ; + MemAccessResult result = dcacheInterface->access(memReq); + + // Ugly hack to get an event scheduled *only* if the access is + // a miss. We really should add first-class support for this + // at some point. + if (result != MA_HIT) { + // Fix this hack for keeping funcExeInst correct with loads that + // are executed twice. + memReq->completionEvent = &cacheCompletionEvent; + lastDcacheStall = curTick; +// unscheduleTickEvent(); + status = DcacheMissLoadStall; + DPRINTF(IBE, "Dcache miss stall!\n"); + } else { + // do functional access + DPRINTF(IBE, "Dcache hit!\n"); + } + } +/* + if (!dcacheInterface && (memReq->flags & UNCACHEABLE)) + recordEvent("Uncached Read"); +*/ + return fault; +} +#if 0 +template +template +Fault +InorderBackEnd::read(MemReqPtr &req, T &data) +{ +#if FULL_SYSTEM && defined(TARGET_ALPHA) + if (req->flags & LOCKED) { + req->xc->setMiscReg(TheISA::Lock_Addr_DepTag, req->paddr); + req->xc->setMiscReg(TheISA::Lock_Flag_DepTag, true); + } +#endif + + Fault error; + error = thread->mem->read(req, data); + data = LittleEndianGuest::gtoh(data); + return error; +} +#endif + +template +template +Fault +InorderBackEnd::write(T data, Addr addr, unsigned flags, uint64_t *res) +{ + memReq->reset(addr, sizeof(T), flags); + + // translate to physical address + Fault fault = cpu->translateDataWriteReq(memReq); + + if (fault == NoFault && dcacheInterface) { + memReq->cmd = Write; +// memcpy(memReq->data,(uint8_t *)&data,memReq->size); + memReq->completionEvent = NULL; + memReq->time = curTick; + memReq->flags &= ~INST_READ; + MemAccessResult result = dcacheInterface->access(memReq); + + // Ugly hack to get an event scheduled *only* if the access is + // a miss. We really should add first-class support for this + // at some point. + if (result != MA_HIT) { + memReq->completionEvent = &cacheCompletionEvent; + lastDcacheStall = curTick; +// unscheduleTickEvent(); + status = DcacheMissStoreStall; + DPRINTF(IBE, "Dcache miss stall!\n"); + } else { + DPRINTF(IBE, "Dcache hit!\n"); + } + } + + if (res && (fault == NoFault)) + *res = memReq->result; +/* + if (!dcacheInterface && (memReq->flags & UNCACHEABLE)) + recordEvent("Uncached Write"); +*/ + return fault; +} +#if 0 +template +template +Fault +InorderBackEnd::write(MemReqPtr &req, T &data) +{ +#if FULL_SYSTEM && defined(TARGET_ALPHA) + ExecContext *xc; + + // If this is a store conditional, act appropriately + if (req->flags & LOCKED) { + xc = req->xc; + + if (req->flags & UNCACHEABLE) { + // Don't update result register (see stq_c in isa_desc) + req->result = 2; + xc->setStCondFailures(0);//Needed? [RGD] + } else { + bool lock_flag = xc->readMiscReg(TheISA::Lock_Flag_DepTag); + Addr lock_addr = xc->readMiscReg(TheISA::Lock_Addr_DepTag); + req->result = lock_flag; + if (!lock_flag || + ((lock_addr & ~0xf) != (req->paddr & ~0xf))) { + xc->setMiscReg(TheISA::Lock_Flag_DepTag, false); + xc->setStCondFailures(xc->readStCondFailures() + 1); + if (((xc->readStCondFailures()) % 100000) == 0) { + std::cerr << "Warning: " + << xc->readStCondFailures() + << " consecutive store conditional failures " + << "on cpu " << req->xc->readCpuId() + << std::endl; + } + return NoFault; + } + else xc->setStCondFailures(0); + } + } + + // Need to clear any locked flags on other proccessors for + // this address. Only do this for succsful Store Conditionals + // and all other stores (WH64?). Unsuccessful Store + // Conditionals would have returned above, and wouldn't fall + // through. + for (int i = 0; i < cpu->system->execContexts.size(); i++){ + xc = cpu->system->execContexts[i]; + if ((xc->readMiscReg(TheISA::Lock_Addr_DepTag) & ~0xf) == + (req->paddr & ~0xf)) { + xc->setMiscReg(TheISA::Lock_Flag_DepTag, false); + } + } + +#endif + return thread->mem->write(req, (T)LittleEndianGuest::htog(data)); +} +#endif + +template +template +Fault +InorderBackEnd::read(MemReqPtr &req, T &data, int load_idx) +{ +// panic("Unimplemented!"); +// memReq->reset(addr, sizeof(T), flags); + + // translate to physical address +// Fault fault = cpu->translateDataReadReq(req); + req->cmd = Read; + req->completionEvent = NULL; + req->time = curTick; + assert(!req->data); + req->data = new uint8_t[64]; + req->flags &= ~INST_READ; + Fault fault = cpu->read(req, data); + memcpy(req->data, &data, sizeof(T)); + + // if we have a cache, do cache access too + if (dcacheInterface) { + MemAccessResult result = dcacheInterface->access(req); + + // Ugly hack to get an event scheduled *only* if the access is + // a miss. We really should add first-class support for this + // at some point. + if (result != MA_HIT) { + req->completionEvent = &cacheCompletionEvent; + lastDcacheStall = curTick; +// unscheduleTickEvent(); + status = DcacheMissLoadStall; + DPRINTF(IBE, "Dcache miss load stall!\n"); + } else { + DPRINTF(IBE, "Dcache hit!\n"); + + } + } + +/* + if (!dcacheInterface && (req->flags & UNCACHEABLE)) + recordEvent("Uncached Read"); +*/ + return NoFault; +} + +template +template +Fault +InorderBackEnd::write(MemReqPtr &req, T &data, int store_idx) +{ +// req->reset(addr, sizeof(T), flags); + + // translate to physical address +// Fault fault = cpu->translateDataWriteReq(req); + + req->cmd = Write; + req->completionEvent = NULL; + req->time = curTick; + assert(!req->data); + req->data = new uint8_t[64]; + memcpy(req->data, (uint8_t *)&data, req->size); + + switch(req->size) { + case 1: + cpu->write(req, (uint8_t &)data); + break; + case 2: + cpu->write(req, (uint16_t &)data); + break; + case 4: + cpu->write(req, (uint32_t &)data); + break; + case 8: + cpu->write(req, (uint64_t &)data); + break; + default: + panic("Unexpected store size!\n"); + } + + if (dcacheInterface) { + req->cmd = Write; + req->data = new uint8_t[64]; + memcpy(req->data,(uint8_t *)&data,req->size); + req->completionEvent = NULL; + req->time = curTick; + req->flags &= ~INST_READ; + MemAccessResult result = dcacheInterface->access(req); + + // Ugly hack to get an event scheduled *only* if the access is + // a miss. We really should add first-class support for this + // at some point. + if (result != MA_HIT) { + req->completionEvent = &cacheCompletionEvent; + lastDcacheStall = curTick; +// unscheduleTickEvent(); + status = DcacheMissStoreStall; + DPRINTF(IBE, "Dcache miss store stall!\n"); + } else { + DPRINTF(IBE, "Dcache hit!\n"); + + } + } +/* + if (req->flags & LOCKED) { + if (req->flags & UNCACHEABLE) { + // Don't update result register (see stq_c in isa_desc) + req->result = 2; + } else { + req->result = 1; + } + } +*/ +/* + if (res && (fault == NoFault)) + *res = req->result; + */ +/* + if (!dcacheInterface && (req->flags & UNCACHEABLE)) + recordEvent("Uncached Write"); +*/ + return NoFault; +} + +#endif // __CPU_OZONE_INORDER_BACK_END_HH__ diff --git a/src/cpu/ozone/inorder_back_end_impl.hh b/src/cpu/ozone/inorder_back_end_impl.hh new file mode 100644 index 000000000..5a378ec76 --- /dev/null +++ b/src/cpu/ozone/inorder_back_end_impl.hh @@ -0,0 +1,519 @@ + +#include "arch/faults.hh" +#include "arch/isa_traits.hh" +#include "cpu/ozone/inorder_back_end.hh" +#include "cpu/ozone/thread_state.hh" + +using namespace TheISA; + +template +InorderBackEnd::InorderBackEnd(Params *params) + : squashPending(false), + squashSeqNum(0), + squashNextPC(0), + faultFromFetch(NoFault), + interruptBlocked(false), + cacheCompletionEvent(this), + dcacheInterface(params->dcacheInterface), + width(params->backEndWidth), + latency(params->backEndLatency), + squashLatency(params->backEndSquashLatency), + numInstsToWB(0, latency + 1) +{ + instsAdded = numInstsToWB.getWire(latency); + instsToExecute = numInstsToWB.getWire(0); + + memReq = new MemReq; + memReq->data = new uint8_t[64]; + status = Running; +} + +template +std::string +InorderBackEnd::name() const +{ + return cpu->name() + ".inorderbackend"; +} + +template +void +InorderBackEnd::setXC(ExecContext *xc_ptr) +{ + xc = xc_ptr; + memReq->xc = xc; +} + +template +void +InorderBackEnd::setThreadState(OzoneThreadState *thread_ptr) +{ + thread = thread_ptr; + thread->setFuncExeInst(0); +} + +#if FULL_SYSTEM +template +void +InorderBackEnd::checkInterrupts() +{ + //Check if there are any outstanding interrupts + //Handle the interrupts + int ipl = 0; + int summary = 0; + + cpu->checkInterrupts = false; + + if (thread->readMiscReg(IPR_ASTRR)) + panic("asynchronous traps not implemented\n"); + + if (thread->readMiscReg(IPR_SIRR)) { + for (int i = INTLEVEL_SOFTWARE_MIN; + i < INTLEVEL_SOFTWARE_MAX; i++) { + if (thread->readMiscReg(IPR_SIRR) & (ULL(1) << i)) { + // See table 4-19 of the 21164 hardware reference + ipl = (i - INTLEVEL_SOFTWARE_MIN) + 1; + summary |= (ULL(1) << i); + } + } + } + + uint64_t interrupts = cpu->intr_status(); + + if (interrupts) { + for (int i = INTLEVEL_EXTERNAL_MIN; + i < INTLEVEL_EXTERNAL_MAX; i++) { + if (interrupts & (ULL(1) << i)) { + // See table 4-19 of the 21164 hardware reference + ipl = i; + summary |= (ULL(1) << i); + } + } + } + + if (ipl && ipl > thread->readMiscReg(IPR_IPLR)) { + thread->inSyscall = true; + + thread->setMiscReg(IPR_ISR, summary); + thread->setMiscReg(IPR_INTID, ipl); + Fault(new InterruptFault)->invoke(xc); + DPRINTF(Flow, "Interrupt! IPLR=%d ipl=%d summary=%x\n", + thread->readMiscReg(IPR_IPLR), ipl, summary); + + // May need to go 1 inst prior + squashPending = true; + + thread->inSyscall = false; + + setSquashInfoFromXC(); + } +} +#endif + +template +void +InorderBackEnd::tick() +{ + // Squash due to an external source + // Not sure if this or an interrupt has higher priority + if (squashPending) { + squash(squashSeqNum, squashNextPC); + return; + } + + // if (interrupt) then set thread PC, stall front end, record that + // I'm waiting for it to drain. (for now just squash) +#if FULL_SYSTEM + if (interruptBlocked || + (cpu->checkInterrupts && + cpu->check_interrupts() && + !cpu->inPalMode())) { + if (!robEmpty()) { + interruptBlocked = true; + } else if (robEmpty() && cpu->inPalMode()) { + // Will need to let the front end continue a bit until + // we're out of pal mode. Hopefully we never get into an + // infinite loop... + interruptBlocked = false; + } else { + interruptBlocked = false; + checkInterrupts(); + return; + } + } +#endif + + if (status != DcacheMissLoadStall && + status != DcacheMissStoreStall) { + for (int i = 0; i < width && (*instsAdded) < width; ++i) { + DynInstPtr inst = frontEnd->getInst(); + + if (!inst) + break; + + instList.push_back(inst); + + (*instsAdded)++; + } + +#if FULL_SYSTEM + if (faultFromFetch && robEmpty() && frontEnd->isEmpty()) { + handleFault(); + } else { + executeInsts(); + } +#else + executeInsts(); +#endif + } +} + +template +void +InorderBackEnd::executeInsts() +{ + bool completed_last_inst = true; + int insts_to_execute = *instsToExecute; + int freed_regs = 0; + + while (insts_to_execute > 0) { + assert(!instList.empty()); + DynInstPtr inst = instList.front(); + + commitPC = inst->readPC(); + + thread->setPC(commitPC); + thread->setNextPC(inst->readNextPC()); + +#if FULL_SYSTEM + int count = 0; + Addr oldpc; + do { + if (count == 0) + assert(!thread->inSyscall && !thread->trapPending); + oldpc = thread->readPC(); + cpu->system->pcEventQueue.service( + thread->getXCProxy()); + count++; + } while (oldpc != thread->readPC()); + if (count > 1) { + DPRINTF(IBE, "PC skip function event, stopping commit\n"); + completed_last_inst = false; + squashPending = true; + break; + } +#endif + + Fault inst_fault = NoFault; + + if (status == DcacheMissComplete) { + DPRINTF(IBE, "Completing inst [sn:%lli]\n", inst->seqNum); + status = Running; + } else if (inst->isMemRef() && status != DcacheMissComplete && + (!inst->isDataPrefetch() && !inst->isInstPrefetch())) { + DPRINTF(IBE, "Initiating mem op inst [sn:%lli] PC: %#x\n", + inst->seqNum, inst->readPC()); + + cacheCompletionEvent.inst = inst; + inst_fault = inst->initiateAcc(); + if (inst_fault == NoFault && + status != DcacheMissLoadStall && + status != DcacheMissStoreStall) { + inst_fault = inst->completeAcc(); + } + ++thread->funcExeInst; + } else { + DPRINTF(IBE, "Executing inst [sn:%lli] PC: %#x\n", + inst->seqNum, inst->readPC()); + inst_fault = inst->execute(); + ++thread->funcExeInst; + } + + // Will need to be able to break this loop in case the load + // misses. Split access/complete ops would be useful here + // with writeback events. + if (status == DcacheMissLoadStall) { + *instsToExecute = insts_to_execute; + + completed_last_inst = false; + break; + } else if (status == DcacheMissStoreStall) { + // Figure out how to fix this hack. Probably have DcacheMissLoad + // vs DcacheMissStore. + *instsToExecute = insts_to_execute; + completed_last_inst = false; +/* + instList.pop_front(); + --insts_to_execute; + if (inst->traceData) { + inst->traceData->finalize(); + } +*/ + + // Don't really need to stop for a store stall as long as + // the memory system is able to handle store forwarding + // and such. Breaking out might help avoid the cache + // interface becoming blocked. + break; + } + + inst->setExecuted(); + inst->setCompleted(); + inst->setCanCommit(); + + instList.pop_front(); + + --insts_to_execute; + --(*instsToExecute); + + if (inst->traceData) { + inst->traceData->finalize(); + inst->traceData = NULL; + } + + if (inst_fault != NoFault) { +#if FULL_SYSTEM + DPRINTF(IBE, "Inst [sn:%lli] PC %#x has a fault\n", + inst->seqNum, inst->readPC()); + + assert(!thread->inSyscall); + + thread->inSyscall = true; + + // Hack for now; DTB will sometimes need the machine instruction + // for when faults happen. So we will set it here, prior to the + // DTB possibly needing it for this translation. + thread->setInst( + static_cast(inst->staticInst->machInst)); + + // Consider holding onto the trap and waiting until the trap event + // happens for this to be executed. + inst_fault->invoke(xc); + + // Exit state update mode to avoid accidental updating. + thread->inSyscall = false; + + squashPending = true; + + // Generate trap squash event. +// generateTrapEvent(tid); + completed_last_inst = false; + break; +#else // !FULL_SYSTEM + panic("fault (%d) detected @ PC %08p", inst_fault, + inst->PC); +#endif // FULL_SYSTEM + } + + for (int i = 0; i < inst->numDestRegs(); ++i) { + renameTable[inst->destRegIdx(i)] = inst; + thread->renameTable[inst->destRegIdx(i)] = inst; + ++freed_regs; + } + + inst->clearDependents(); + + comm->access(0)->doneSeqNum = inst->seqNum; + + if (inst->mispredicted()) { + squash(inst->seqNum, inst->readNextPC()); + + thread->setNextPC(inst->readNextPC()); + + break; + } else if (squashPending) { + // Something external happened that caused the CPU to squash. + // Break out of commit and handle the squash next cycle. + break; + } + // If it didn't mispredict, then it executed fine. Send back its + // registers and BP info? What about insts that may still have + // latency, like loads? Probably can send back the information after + // it is completed. + + // keep an instruction count + cpu->numInst++; + thread->numInsts++; + } + + frontEnd->addFreeRegs(freed_regs); + + assert(insts_to_execute >= 0); + + // Should only advance this if I have executed all instructions. + if (insts_to_execute == 0) { + numInstsToWB.advance(); + } + + // Should I set the PC to the next PC here? What do I set next PC to? + if (completed_last_inst) { + thread->setPC(thread->readNextPC()); + thread->setNextPC(thread->readPC() + sizeof(MachInst)); + } + + if (squashPending) { + setSquashInfoFromXC(); + } +} + +template +void +InorderBackEnd::handleFault() +{ + DPRINTF(Commit, "Handling fault from fetch\n"); + + assert(!thread->inSyscall); + + thread->inSyscall = true; + + // Consider holding onto the trap and waiting until the trap event + // happens for this to be executed. + faultFromFetch->invoke(xc); + + // Exit state update mode to avoid accidental updating. + thread->inSyscall = false; + + squashPending = true; + + setSquashInfoFromXC(); +} + +template +void +InorderBackEnd::squash(const InstSeqNum &squash_num, const Addr &next_PC) +{ + DPRINTF(IBE, "Squashing from [sn:%lli], setting PC to %#x\n", + squash_num, next_PC); + + InstListIt squash_it = --(instList.end()); + + int freed_regs = 0; + + while (!instList.empty() && (*squash_it)->seqNum > squash_num) { + DynInstPtr inst = *squash_it; + + DPRINTF(IBE, "Squashing instruction PC %#x, [sn:%lli].\n", + inst->readPC(), + inst->seqNum); + + // May cause problems with misc regs + freed_regs+= inst->numDestRegs(); + inst->clearDependents(); + squash_it--; + instList.pop_back(); + } + + frontEnd->addFreeRegs(freed_regs); + + for (int i = 0; i < latency+1; ++i) { + numInstsToWB.advance(); + } + + squashPending = false; + + // Probably want to make sure that this squash is the one that set the + // thread into inSyscall mode. + thread->inSyscall = false; + + // Tell front end to squash, reset PC to new one. + frontEnd->squash(squash_num, next_PC); + + faultFromFetch = NULL; +} + +template +void +InorderBackEnd::squashFromXC() +{ + // Record that I need to squash + squashPending = true; + + thread->inSyscall = true; +} + +template +void +InorderBackEnd::setSquashInfoFromXC() +{ + // Need to handle the case of the instList being empty. In that case + // probably any number works, except maybe with stores in the store buffer. + squashSeqNum = instList.empty() ? 0 : instList.front()->seqNum - 1; + + squashNextPC = thread->PC; +} + +template +void +InorderBackEnd::fetchFault(Fault &fault) +{ + faultFromFetch = fault; +} + +template +void +InorderBackEnd::dumpInsts() +{ + int num = 0; + int valid_num = 0; + + InstListIt inst_list_it = instList.begin(); + + cprintf("Inst list size: %i\n", instList.size()); + + while (inst_list_it != instList.end()) + { + cprintf("Instruction:%i\n", + num); + if (!(*inst_list_it)->isSquashed()) { + if (!(*inst_list_it)->isIssued()) { + ++valid_num; + cprintf("Count:%i\n", valid_num); + } else if ((*inst_list_it)->isMemRef() && + !(*inst_list_it)->memOpDone) { + // Loads that have not been marked as executed still count + // towards the total instructions. + ++valid_num; + cprintf("Count:%i\n", valid_num); + } + } + + cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n" + "Issued:%i\nSquashed:%i\n", + (*inst_list_it)->readPC(), + (*inst_list_it)->seqNum, + (*inst_list_it)->threadNumber, + (*inst_list_it)->isIssued(), + (*inst_list_it)->isSquashed()); + + if ((*inst_list_it)->isMemRef()) { + cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone); + } + + cprintf("\n"); + + inst_list_it++; + ++num; + } +} + +template +InorderBackEnd::DCacheCompletionEvent::DCacheCompletionEvent( + InorderBackEnd *_be) + : Event(&mainEventQueue, CPU_Tick_Pri), be(_be) +{ +// this->setFlags(Event::AutoDelete); +} + +template +void +InorderBackEnd::DCacheCompletionEvent::process() +{ + inst->completeAcc(); + be->status = DcacheMissComplete; +} + +template +const char * +InorderBackEnd::DCacheCompletionEvent::description() +{ + return "DCache completion event"; +} diff --git a/src/cpu/ozone/inst_queue.cc b/src/cpu/ozone/inst_queue.cc new file mode 100644 index 000000000..9c61602d9 --- /dev/null +++ b/src/cpu/ozone/inst_queue.cc @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cpu/ozone/dyn_inst.hh" +#include "cpu/ozone/ozone_impl.hh" +#include "cpu/ozone/simple_impl.hh" +#include "cpu/ozone/inst_queue_impl.hh" + +// Force instantiation of InstructionQueue. +template class InstQueue; +template class InstQueue; diff --git a/src/cpu/ozone/inst_queue.hh b/src/cpu/ozone/inst_queue.hh new file mode 100644 index 000000000..2cbbb7987 --- /dev/null +++ b/src/cpu/ozone/inst_queue.hh @@ -0,0 +1,506 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_OZONE_INST_QUEUE_HH__ +#define __CPU_OZONE_INST_QUEUE_HH__ + +#include +#include +#include +#include + +#include "base/statistics.hh" +#include "base/timebuf.hh" +#include "cpu/inst_seq.hh" +#include "sim/host.hh" + +class FUPool; +class MemInterface; + +/** + * A standard instruction queue class. It holds ready instructions, in + * order, in seperate priority queues to facilitate the scheduling of + * instructions. The IQ uses a separate linked list to track dependencies. + * Similar to the rename map and the free list, it expects that + * floating point registers have their indices start after the integer + * registers (ie with 96 int and 96 fp registers, regs 0-95 are integer + * and 96-191 are fp). This remains true even for both logical and + * physical register indices. The IQ depends on the memory dependence unit to + * track when memory operations are ready in terms of ordering; register + * dependencies are tracked normally. Right now the IQ also handles the + * execution timing; this is mainly to allow back-to-back scheduling without + * requiring IEW to be able to peek into the IQ. At the end of the execution + * latency, the instruction is put into the queue to execute, where it will + * have the execute() function called on it. + * @todo: Make IQ able to handle multiple FU pools. + */ +template +class InstQueue +{ + public: + //Typedefs from the Impl. + typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::DynInstPtr DynInstPtr; + typedef typename Impl::Params Params; + typedef typename Impl::IssueStruct IssueStruct; +/* + typedef typename Impl::CPUPol::IEW IEW; + typedef typename Impl::CPUPol::MemDepUnit MemDepUnit; + typedef typename Impl::CPUPol::IssueStruct IssueStruct; + typedef typename Impl::CPUPol::TimeStruct TimeStruct; +*/ + // Typedef of iterator through the list of instructions. + typedef typename std::list::iterator ListIt; + + friend class Impl::FullCPU; +#if 0 + /** FU completion event class. */ + class FUCompletion : public Event { + private: + /** Executing instruction. */ + DynInstPtr inst; + + /** Index of the FU used for executing. */ + int fuIdx; + + /** Pointer back to the instruction queue. */ + InstQueue *iqPtr; + + public: + /** Construct a FU completion event. */ + FUCompletion(DynInstPtr &_inst, int fu_idx, + InstQueue *iq_ptr); + + virtual void process(); + virtual const char *description(); + }; +#endif + /** Constructs an IQ. */ + InstQueue(Params *params); + + /** Destructs the IQ. */ + ~InstQueue(); + + /** Returns the name of the IQ. */ + std::string name() const; + + /** Registers statistics. */ + void regStats(); + + /** Sets CPU pointer. */ + void setCPU(FullCPU *_cpu) { cpu = _cpu; } +#if 0 + /** Sets active threads list. */ + void setActiveThreads(list *at_ptr); + + /** Sets the IEW pointer. */ + void setIEW(IEW *iew_ptr) { iewStage = iew_ptr; } +#endif + /** Sets the timer buffer between issue and execute. */ + void setIssueToExecuteQueue(TimeBuffer *i2eQueue); +#if 0 + /** Sets the global time buffer. */ + void setTimeBuffer(TimeBuffer *tb_ptr); + + /** Number of entries needed for given amount of threads. */ + int entryAmount(int num_threads); + + /** Resets max entries for all threads. */ + void resetEntries(); +#endif + /** Returns total number of free entries. */ + unsigned numFreeEntries(); + + /** Returns number of free entries for a thread. */ + unsigned numFreeEntries(unsigned tid); + + /** Returns whether or not the IQ is full. */ + bool isFull(); + + /** Returns whether or not the IQ is full for a specific thread. */ + bool isFull(unsigned tid); + + /** Returns if there are any ready instructions in the IQ. */ + bool hasReadyInsts(); + + /** Inserts a new instruction into the IQ. */ + void insert(DynInstPtr &new_inst); + + /** Inserts a new, non-speculative instruction into the IQ. */ + void insertNonSpec(DynInstPtr &new_inst); +#if 0 + /** + * Advances the tail of the IQ, used if an instruction is not added to the + * IQ for scheduling. + * @todo: Rename this function. + */ + void advanceTail(DynInstPtr &inst); + + /** Process FU completion event. */ + void processFUCompletion(DynInstPtr &inst, int fu_idx); +#endif + /** + * Schedules ready instructions, adding the ready ones (oldest first) to + * the queue to execute. + */ + void scheduleReadyInsts(); + + /** Schedules a single specific non-speculative instruction. */ + void scheduleNonSpec(const InstSeqNum &inst); + + /** + * Commits all instructions up to and including the given sequence number, + * for a specific thread. + */ + void commit(const InstSeqNum &inst, unsigned tid = 0); + + /** Wakes all dependents of a completed instruction. */ + void wakeDependents(DynInstPtr &completed_inst); + + /** Adds a ready memory instruction to the ready list. */ + void addReadyMemInst(DynInstPtr &ready_inst); +#if 0 + /** + * Reschedules a memory instruction. It will be ready to issue once + * replayMemInst() is called. + */ + void rescheduleMemInst(DynInstPtr &resched_inst); + + /** Replays a memory instruction. It must be rescheduled first. */ + void replayMemInst(DynInstPtr &replay_inst); +#endif + /** Completes a memory operation. */ + void completeMemInst(DynInstPtr &completed_inst); +#if 0 + /** Indicates an ordering violation between a store and a load. */ + void violation(DynInstPtr &store, DynInstPtr &faulting_load); +#endif + /** + * Squashes instructions for a thread. Squashing information is obtained + * from the time buffer. + */ + void squash(unsigned tid); // Probably want the ISN + + /** Returns the number of used entries for a thread. */ + unsigned getCount(unsigned tid) { return count[tid]; }; + + /** Updates the number of free entries. */ + void updateFreeEntries(int num) { freeEntries += num; } + + /** Debug function to print all instructions. */ + void printInsts(); + + private: + /** Does the actual squashing. */ + void doSquash(unsigned tid); + + ///////////////////////// + // Various pointers + ///////////////////////// + + /** Pointer to the CPU. */ + FullCPU *cpu; + + /** Cache interface. */ + MemInterface *dcacheInterface; +#if 0 + /** Pointer to IEW stage. */ + IEW *iewStage; + + /** The memory dependence unit, which tracks/predicts memory dependences + * between instructions. + */ + MemDepUnit memDepUnit[Impl::MaxThreads]; +#endif + /** The queue to the execute stage. Issued instructions will be written + * into it. + */ + TimeBuffer *issueToExecuteQueue; +#if 0 + /** The backwards time buffer. */ + TimeBuffer *timeBuffer; + + /** Wire to read information from timebuffer. */ + typename TimeBuffer::wire fromCommit; + + /** Function unit pool. */ + FUPool *fuPool; +#endif + ////////////////////////////////////// + // Instruction lists, ready queues, and ordering + ////////////////////////////////////// + + /** List of all the instructions in the IQ (some of which may be issued). */ + std::list instList[Impl::MaxThreads]; + + /** + * Struct for comparing entries to be added to the priority queue. This + * gives reverse ordering to the instructions in terms of sequence + * numbers: the instructions with smaller sequence numbers (and hence + * are older) will be at the top of the priority queue. + */ + struct pqCompare { + bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const + { + return lhs->seqNum > rhs->seqNum; + } + }; + + /** + * Struct for an IQ entry. It includes the instruction and an iterator + * to the instruction's spot in the IQ. + */ + struct IQEntry { + DynInstPtr inst; + ListIt iqIt; + }; + + typedef std::priority_queue, pqCompare> + ReadyInstQueue; + + typedef std::map ReadyInstMap; + typedef typename std::map::iterator ReadyMapIt; + + /** List of ready instructions. + */ + ReadyInstQueue readyInsts; + + /** List of non-speculative instructions that will be scheduled + * once the IQ gets a signal from commit. While it's redundant to + * have the key be a part of the value (the sequence number is stored + * inside of DynInst), when these instructions are woken up only + * the sequence number will be available. Thus it is most efficient to be + * able to search by the sequence number alone. + */ + std::map nonSpecInsts; + + typedef typename std::map::iterator NonSpecMapIt; +#if 0 + /** Entry for the list age ordering by op class. */ + struct ListOrderEntry { + OpClass queueType; + InstSeqNum oldestInst; + }; + + /** List that contains the age order of the oldest instruction of each + * ready queue. Used to select the oldest instruction available + * among op classes. + */ + std::list listOrder; + + typedef typename std::list::iterator ListOrderIt; + + /** Tracks if each ready queue is on the age order list. */ + bool queueOnList[Num_OpClasses]; + + /** Iterators of each ready queue. Points to their spot in the age order + * list. + */ + ListOrderIt readyIt[Num_OpClasses]; + + /** Add an op class to the age order list. */ + void addToOrderList(OpClass op_class); + + /** + * Called when the oldest instruction has been removed from a ready queue; + * this places that ready queue into the proper spot in the age order list. + */ + void moveToYoungerInst(ListOrderIt age_order_it); +#endif + ////////////////////////////////////// + // Various parameters + ////////////////////////////////////// +#if 0 + /** IQ Resource Sharing Policy */ + enum IQPolicy { + Dynamic, + Partitioned, + Threshold + }; + + /** IQ sharing policy for SMT. */ + IQPolicy iqPolicy; +#endif + /** Number of Total Threads*/ + unsigned numThreads; +#if 0 + /** Pointer to list of active threads. */ + list *activeThreads; +#endif + /** Per Thread IQ count */ + unsigned count[Impl::MaxThreads]; + + /** Max IQ Entries Per Thread */ + unsigned maxEntries[Impl::MaxThreads]; + + /** Number of free IQ entries left. */ + unsigned freeEntries; + + /** The number of entries in the instruction queue. */ + unsigned numEntries; + + /** The total number of instructions that can be issued in one cycle. */ + unsigned totalWidth; +#if 0 + /** The number of physical registers in the CPU. */ + unsigned numPhysRegs; + + /** The number of physical integer registers in the CPU. */ + unsigned numPhysIntRegs; + + /** The number of floating point registers in the CPU. */ + unsigned numPhysFloatRegs; +#endif + /** Delay between commit stage and the IQ. + * @todo: Make there be a distinction between the delays within IEW. + */ + unsigned commitToIEWDelay; + + ////////////////////////////////// + // Variables needed for squashing + ////////////////////////////////// + + /** The sequence number of the squashed instruction. */ + InstSeqNum squashedSeqNum[Impl::MaxThreads]; + + /** Iterator that points to the last instruction that has been squashed. + * This will not be valid unless the IQ is in the process of squashing. + */ + ListIt squashIt[Impl::MaxThreads]; +#if 0 + /////////////////////////////////// + // Dependency graph stuff + /////////////////////////////////// + + class DependencyEntry + { + public: + DependencyEntry() + : inst(NULL), next(NULL) + { } + + DynInstPtr inst; + //Might want to include data about what arch. register the + //dependence is waiting on. + DependencyEntry *next; + + //This function, and perhaps this whole class, stand out a little + //bit as they don't fit a classification well. I want access + //to the underlying structure of the linked list, yet at + //the same time it feels like this should be something abstracted + //away. So for now it will sit here, within the IQ, until + //a better implementation is decided upon. + // This function probably shouldn't be within the entry... + void insert(DynInstPtr &new_inst); + + void remove(DynInstPtr &inst_to_remove); + + // Debug variable, remove when done testing. + static unsigned mem_alloc_counter; + }; + + /** Array of linked lists. Each linked list is a list of all the + * instructions that depend upon a given register. The actual + * register's index is used to index into the graph; ie all + * instructions in flight that are dependent upon r34 will be + * in the linked list of dependGraph[34]. + */ + DependencyEntry *dependGraph; + + /** A cache of the recently woken registers. It is 1 if the register + * has been woken up recently, and 0 if the register has been added + * to the dependency graph and has not yet received its value. It + * is basically a secondary scoreboard, and should pretty much mirror + * the scoreboard that exists in the rename map. + */ + vector regScoreboard; + + /** Adds an instruction to the dependency graph, as a producer. */ + bool addToDependents(DynInstPtr &new_inst); + + /** Adds an instruction to the dependency graph, as a consumer. */ + void createDependency(DynInstPtr &new_inst); +#endif + /** Moves an instruction to the ready queue if it is ready. */ + void addIfReady(DynInstPtr &inst); + + /** Debugging function to count how many entries are in the IQ. It does + * a linear walk through the instructions, so do not call this function + * during normal execution. + */ + int countInsts(); +#if 0 + /** Debugging function to dump out the dependency graph. + */ + void dumpDependGraph(); +#endif + /** Debugging function to dump all the list sizes, as well as print + * out the list of nonspeculative instructions. Should not be used + * in any other capacity, but it has no harmful sideaffects. + */ + void dumpLists(); + + /** Debugging function to dump out all instructions that are in the + * IQ. + */ + void dumpInsts(); + + /** Stat for number of instructions added. */ + Stats::Scalar<> iqInstsAdded; + /** Stat for number of non-speculative instructions added. */ + Stats::Scalar<> iqNonSpecInstsAdded; +// Stats::Scalar<> iqIntInstsAdded; + /** Stat for number of integer instructions issued. */ + Stats::Scalar<> iqIntInstsIssued; +// Stats::Scalar<> iqFloatInstsAdded; + /** Stat for number of floating point instructions issued. */ + Stats::Scalar<> iqFloatInstsIssued; +// Stats::Scalar<> iqBranchInstsAdded; + /** Stat for number of branch instructions issued. */ + Stats::Scalar<> iqBranchInstsIssued; +// Stats::Scalar<> iqMemInstsAdded; + /** Stat for number of memory instructions issued. */ + Stats::Scalar<> iqMemInstsIssued; +// Stats::Scalar<> iqMiscInstsAdded; + /** Stat for number of miscellaneous instructions issued. */ + Stats::Scalar<> iqMiscInstsIssued; + /** Stat for number of squashed instructions that were ready to issue. */ + Stats::Scalar<> iqSquashedInstsIssued; + /** Stat for number of squashed instructions examined when squashing. */ + Stats::Scalar<> iqSquashedInstsExamined; + /** Stat for number of squashed instruction operands examined when + * squashing. + */ + Stats::Scalar<> iqSquashedOperandsExamined; + /** Stat for number of non-speculative instructions removed due to a squash. + */ + Stats::Scalar<> iqSquashedNonSpecRemoved; + +}; + +#endif //__CPU_OZONE_INST_QUEUE_HH__ diff --git a/src/cpu/ozone/inst_queue_impl.hh b/src/cpu/ozone/inst_queue_impl.hh new file mode 100644 index 000000000..0523c68d6 --- /dev/null +++ b/src/cpu/ozone/inst_queue_impl.hh @@ -0,0 +1,1341 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +// Todo: +// Current ordering allows for 0 cycle added-to-scheduled. Could maybe fake +// it; either do in reverse order, or have added instructions put into a +// different ready queue that, in scheduleRreadyInsts(), gets put onto the +// normal ready queue. This would however give only a one cycle delay, +// but probably is more flexible to actually add in a delay parameter than +// just running it backwards. + +#include + +#include "sim/root.hh" + +#include "cpu/ozone/inst_queue.hh" +#if 0 +template +InstQueue::FUCompletion::FUCompletion(DynInstPtr &_inst, + int fu_idx, + InstQueue *iq_ptr) + : Event(&mainEventQueue, Stat_Event_Pri), + inst(_inst), fuIdx(fu_idx), iqPtr(iq_ptr) +{ + this->setFlags(Event::AutoDelete); +} + +template +void +InstQueue::FUCompletion::process() +{ + iqPtr->processFUCompletion(inst, fuIdx); +} + + +template +const char * +InstQueue::FUCompletion::description() +{ + return "Functional unit completion event"; +} +#endif +template +InstQueue::InstQueue(Params *params) + : dcacheInterface(params->dcacheInterface), +// fuPool(params->fuPool), + numEntries(params->numIQEntries), + totalWidth(params->issueWidth), +// numPhysIntRegs(params->numPhysIntRegs), +// numPhysFloatRegs(params->numPhysFloatRegs), + commitToIEWDelay(params->commitToIEWDelay) +{ +// assert(fuPool); + +// numThreads = params->numberOfThreads; + numThreads = 1; + + //Initialize thread IQ counts + for (int i = 0; i smtIQPolicy; + + //Convert string to lowercase + std::transform(policy.begin(), policy.end(), policy.begin(), + (int(*)(int)) tolower); + + //Figure out resource sharing policy + if (policy == "dynamic") { + iqPolicy = Dynamic; + + //Set Max Entries to Total ROB Capacity + for (int i = 0; i < numThreads; i++) { + maxEntries[i] = numEntries; + } + + } else if (policy == "partitioned") { + iqPolicy = Partitioned; + + //@todo:make work if part_amt doesnt divide evenly. + int part_amt = numEntries / numThreads; + + //Divide ROB up evenly + for (int i = 0; i < numThreads; i++) { + maxEntries[i] = part_amt; + } + + DPRINTF(Fetch, "IQ sharing policy set to Partitioned:" + "%i entries per thread.\n",part_amt); + + } else if (policy == "threshold") { + iqPolicy = Threshold; + + double threshold = (double)params->smtIQThreshold / 100; + + int thresholdIQ = (int)((double)threshold * numEntries); + + //Divide up by threshold amount + for (int i = 0; i < numThreads; i++) { + maxEntries[i] = thresholdIQ; + } + + DPRINTF(Fetch, "IQ sharing policy set to Threshold:" + "%i entries per thread.\n",thresholdIQ); + } else { + assert(0 && "Invalid IQ Sharing Policy.Options Are:{Dynamic," + "Partitioned, Threshold}"); + } +*/ +} + +template +InstQueue::~InstQueue() +{ + // Clear the dependency graph +/* + DependencyEntry *curr; + DependencyEntry *prev; + + for (int i = 0; i < numPhysRegs; ++i) { + curr = dependGraph[i].next; + + while (curr) { + DependencyEntry::mem_alloc_counter--; + + prev = curr; + curr = prev->next; + prev->inst = NULL; + + delete prev; + } + + if (dependGraph[i].inst) { + dependGraph[i].inst = NULL; + } + + dependGraph[i].next = NULL; + } + + assert(DependencyEntry::mem_alloc_counter == 0); + + delete [] dependGraph; +*/ +} + +template +std::string +InstQueue::name() const +{ + return cpu->name() + ".iq"; +} + +template +void +InstQueue::regStats() +{ + iqInstsAdded + .name(name() + ".iqInstsAdded") + .desc("Number of instructions added to the IQ (excludes non-spec)") + .prereq(iqInstsAdded); + + iqNonSpecInstsAdded + .name(name() + ".iqNonSpecInstsAdded") + .desc("Number of non-speculative instructions added to the IQ") + .prereq(iqNonSpecInstsAdded); + +// iqIntInstsAdded; + + iqIntInstsIssued + .name(name() + ".iqIntInstsIssued") + .desc("Number of integer instructions issued") + .prereq(iqIntInstsIssued); + +// iqFloatInstsAdded; + + iqFloatInstsIssued + .name(name() + ".iqFloatInstsIssued") + .desc("Number of float instructions issued") + .prereq(iqFloatInstsIssued); + +// iqBranchInstsAdded; + + iqBranchInstsIssued + .name(name() + ".iqBranchInstsIssued") + .desc("Number of branch instructions issued") + .prereq(iqBranchInstsIssued); + +// iqMemInstsAdded; + + iqMemInstsIssued + .name(name() + ".iqMemInstsIssued") + .desc("Number of memory instructions issued") + .prereq(iqMemInstsIssued); + +// iqMiscInstsAdded; + + iqMiscInstsIssued + .name(name() + ".iqMiscInstsIssued") + .desc("Number of miscellaneous instructions issued") + .prereq(iqMiscInstsIssued); + + iqSquashedInstsIssued + .name(name() + ".iqSquashedInstsIssued") + .desc("Number of squashed instructions issued") + .prereq(iqSquashedInstsIssued); + + iqSquashedInstsExamined + .name(name() + ".iqSquashedInstsExamined") + .desc("Number of squashed instructions iterated over during squash;" + " mainly for profiling") + .prereq(iqSquashedInstsExamined); + + iqSquashedOperandsExamined + .name(name() + ".iqSquashedOperandsExamined") + .desc("Number of squashed operands that are examined and possibly " + "removed from graph") + .prereq(iqSquashedOperandsExamined); + + iqSquashedNonSpecRemoved + .name(name() + ".iqSquashedNonSpecRemoved") + .desc("Number of squashed non-spec instructions that were removed") + .prereq(iqSquashedNonSpecRemoved); +/* + for ( int i=0; i < numThreads; i++) { + // Tell mem dependence unit to reg stats as well. + memDepUnit[i].regStats(); + } +*/ +} +/* +template +void +InstQueue::setActiveThreads(list *at_ptr) +{ + DPRINTF(IQ, "Setting active threads list pointer.\n"); + activeThreads = at_ptr; +} +*/ +template +void +InstQueue::setIssueToExecuteQueue(TimeBuffer *i2e_ptr) +{ + DPRINTF(IQ, "Set the issue to execute queue.\n"); + issueToExecuteQueue = i2e_ptr; +} +/* +template +void +InstQueue::setTimeBuffer(TimeBuffer *tb_ptr) +{ + DPRINTF(IQ, "Set the time buffer.\n"); + timeBuffer = tb_ptr; + + fromCommit = timeBuffer->getWire(-commitToIEWDelay); +} + +template +int +InstQueue::entryAmount(int num_threads) +{ + if (iqPolicy == Partitioned) { + return numEntries / num_threads; + } else { + return 0; + } +} + + +template +void +InstQueue::resetEntries() +{ + if (iqPolicy != Dynamic || numThreads > 1) { + int active_threads = (*activeThreads).size(); + + list::iterator threads = (*activeThreads).begin(); + list::iterator list_end = (*activeThreads).end(); + + while (threads != list_end) { + if (iqPolicy == Partitioned) { + maxEntries[*threads++] = numEntries / active_threads; + } else if(iqPolicy == Threshold && active_threads == 1) { + maxEntries[*threads++] = numEntries; + } + } + } +} +*/ +template +unsigned +InstQueue::numFreeEntries() +{ + return freeEntries; +} + +template +unsigned +InstQueue::numFreeEntries(unsigned tid) +{ + return maxEntries[tid] - count[tid]; +} + +// Might want to do something more complex if it knows how many instructions +// will be issued this cycle. +template +bool +InstQueue::isFull() +{ + if (freeEntries == 0) { + return(true); + } else { + return(false); + } +} + +template +bool +InstQueue::isFull(unsigned tid) +{ + if (numFreeEntries(tid) == 0) { + return(true); + } else { + return(false); + } +} + +template +bool +InstQueue::hasReadyInsts() +{ +/* + if (!listOrder.empty()) { + return true; + } + + for (int i = 0; i < Num_OpClasses; ++i) { + if (!readyInsts[i].empty()) { + return true; + } + } + + return false; +*/ + return readyInsts.empty(); +} + +template +void +InstQueue::insert(DynInstPtr &new_inst) +{ + // Make sure the instruction is valid + assert(new_inst); + + DPRINTF(IQ, "Adding instruction PC %#x to the IQ.\n", + new_inst->readPC()); + + // Check if there are any free entries. Panic if there are none. + // Might want to have this return a fault in the future instead of + // panicing. + assert(freeEntries != 0); + + instList[new_inst->threadNumber].push_back(new_inst); + + // Decrease the number of free entries. + --freeEntries; + + //Mark Instruction as in IQ +// new_inst->setInIQ(); +/* + // Look through its source registers (physical regs), and mark any + // dependencies. + addToDependents(new_inst); + + // Have this instruction set itself as the producer of its destination + // register(s). + createDependency(new_inst); +*/ + // If it's a memory instruction, add it to the memory dependency + // unit. +// if (new_inst->isMemRef()) { +// memDepUnit[new_inst->threadNumber].insert(new_inst); +// } else { + // If the instruction is ready then add it to the ready list. + addIfReady(new_inst); +// } + + ++iqInstsAdded; + + + //Update Thread IQ Count + count[new_inst->threadNumber]++; + + assert(freeEntries == (numEntries - countInsts())); +} + +template +void +InstQueue::insertNonSpec(DynInstPtr &new_inst) +{ + nonSpecInsts[new_inst->seqNum] = new_inst; + + // @todo: Clean up this code; can do it by setting inst as unable + // to issue, then calling normal insert on the inst. + + // Make sure the instruction is valid + assert(new_inst); + + DPRINTF(IQ, "Adding instruction PC %#x to the IQ.\n", + new_inst->readPC()); + + // Check if there are any free entries. Panic if there are none. + // Might want to have this return a fault in the future instead of + // panicing. + assert(freeEntries != 0); + + instList[new_inst->threadNumber].push_back(new_inst); + + // Decrease the number of free entries. + --freeEntries; + + //Mark Instruction as in IQ +// new_inst->setInIQ(); +/* + // Have this instruction set itself as the producer of its destination + // register(s). + createDependency(new_inst); + + // If it's a memory instruction, add it to the memory dependency + // unit. + if (new_inst->isMemRef()) { + memDepUnit[new_inst->threadNumber].insertNonSpec(new_inst); + } +*/ + ++iqNonSpecInstsAdded; + + //Update Thread IQ Count + count[new_inst->threadNumber]++; + + assert(freeEntries == (numEntries - countInsts())); +} +/* +template +void +InstQueue::advanceTail(DynInstPtr &inst) +{ + // Have this instruction set itself as the producer of its destination + // register(s). + createDependency(inst); +} + +template +void +InstQueue::addToOrderList(OpClass op_class) +{ + assert(!readyInsts[op_class].empty()); + + ListOrderEntry queue_entry; + + queue_entry.queueType = op_class; + + queue_entry.oldestInst = readyInsts[op_class].top()->seqNum; + + ListOrderIt list_it = listOrder.begin(); + ListOrderIt list_end_it = listOrder.end(); + + while (list_it != list_end_it) { + if ((*list_it).oldestInst > queue_entry.oldestInst) { + break; + } + + list_it++; + } + + readyIt[op_class] = listOrder.insert(list_it, queue_entry); + queueOnList[op_class] = true; +} + +template +void +InstQueue::moveToYoungerInst(ListOrderIt list_order_it) +{ + // Get iterator of next item on the list + // Delete the original iterator + // Determine if the next item is either the end of the list or younger + // than the new instruction. If so, then add in a new iterator right here. + // If not, then move along. + ListOrderEntry queue_entry; + OpClass op_class = (*list_order_it).queueType; + ListOrderIt next_it = list_order_it; + + ++next_it; + + queue_entry.queueType = op_class; + queue_entry.oldestInst = readyInsts[op_class].top()->seqNum; + + while (next_it != listOrder.end() && + (*next_it).oldestInst < queue_entry.oldestInst) { + ++next_it; + } + + readyIt[op_class] = listOrder.insert(next_it, queue_entry); +} + +template +void +InstQueue::processFUCompletion(DynInstPtr &inst, int fu_idx) +{ + // The CPU could have been sleeping until this op completed (*extremely* + // long latency op). Wake it if it was. This may be overkill. + iewStage->wakeCPU(); + + fuPool->freeUnit(fu_idx); + + int &size = issueToExecuteQueue->access(0)->size; + + issueToExecuteQueue->access(0)->insts[size++] = inst; +} +*/ +// @todo: Figure out a better way to remove the squashed items from the +// lists. Checking the top item of each list to see if it's squashed +// wastes time and forces jumps. +template +void +InstQueue::scheduleReadyInsts() +{ + DPRINTF(IQ, "Attempting to schedule ready instructions from " + "the IQ.\n"); + +// IssueStruct *i2e_info = issueToExecuteQueue->access(0); +/* + // Will need to reorder the list if either a queue is not on the list, + // or it has an older instruction than last time. + for (int i = 0; i < Num_OpClasses; ++i) { + if (!readyInsts[i].empty()) { + if (!queueOnList[i]) { + addToOrderList(OpClass(i)); + } else if (readyInsts[i].top()->seqNum < + (*readyIt[i]).oldestInst) { + listOrder.erase(readyIt[i]); + addToOrderList(OpClass(i)); + } + } + } + + // Have iterator to head of the list + // While I haven't exceeded bandwidth or reached the end of the list, + // Try to get a FU that can do what this op needs. + // If successful, change the oldestInst to the new top of the list, put + // the queue in the proper place in the list. + // Increment the iterator. + // This will avoid trying to schedule a certain op class if there are no + // FUs that handle it. + ListOrderIt order_it = listOrder.begin(); + ListOrderIt order_end_it = listOrder.end(); + int total_issued = 0; + int exec_queue_slot = i2e_info->size; + + while (exec_queue_slot < totalWidth && order_it != order_end_it) { + OpClass op_class = (*order_it).queueType; + + assert(!readyInsts[op_class].empty()); + + DynInstPtr issuing_inst = readyInsts[op_class].top(); + + assert(issuing_inst->seqNum == (*order_it).oldestInst); + + if (issuing_inst->isSquashed()) { + readyInsts[op_class].pop(); + + if (!readyInsts[op_class].empty()) { + moveToYoungerInst(order_it); + } else { + readyIt[op_class] = listOrder.end(); + queueOnList[op_class] = false; + } + + listOrder.erase(order_it++); + + ++iqSquashedInstsIssued; + + continue; + } + + int idx = fuPool->getUnit(op_class); + + if (idx != -1) { + int op_latency = fuPool->getOpLatency(op_class); + + if (op_latency == 1) { + i2e_info->insts[exec_queue_slot++] = issuing_inst; + i2e_info->size++; + + // Add the FU onto the list of FU's to be freed next cycle. + fuPool->freeUnit(idx); + } else { + int issue_latency = fuPool->getIssueLatency(op_class); + + if (issue_latency > 1) { + // Generate completion event for the FU + FUCompletion *execution = new FUCompletion(issuing_inst, + idx, this); + + execution->schedule(curTick + issue_latency - 1); + } else { + i2e_info->insts[exec_queue_slot++] = issuing_inst; + i2e_info->size++; + + // Add the FU onto the list of FU's to be freed next cycle. + fuPool->freeUnit(idx); + } + } + + DPRINTF(IQ, "Thread %i: Issuing instruction PC %#x " + "[sn:%lli]\n", + issuing_inst->threadNumber, issuing_inst->readPC(), + issuing_inst->seqNum); + + readyInsts[op_class].pop(); + + if (!readyInsts[op_class].empty()) { + moveToYoungerInst(order_it); + } else { + readyIt[op_class] = listOrder.end(); + queueOnList[op_class] = false; + } + + issuing_inst->setIssued(); + ++total_issued; + + if (!issuing_inst->isMemRef()) { + // Memory instructions can not be freed from the IQ until they + // complete. + ++freeEntries; + count[issuing_inst->threadNumber]--; + issuing_inst->removeInIQ(); + } else { + memDepUnit[issuing_inst->threadNumber].issue(issuing_inst); + } + + listOrder.erase(order_it++); + } else { + ++order_it; + } + } + + if (total_issued) { + cpu->activityThisCycle(); + } else { + DPRINTF(IQ, "Not able to schedule any instructions.\n"); + } +*/ +} + +template +void +InstQueue::scheduleNonSpec(const InstSeqNum &inst) +{ + DPRINTF(IQ, "Marking nonspeculative instruction with sequence " + "number %i as ready to execute.\n", inst); + + NonSpecMapIt inst_it = nonSpecInsts.find(inst); + + assert(inst_it != nonSpecInsts.end()); + +// unsigned tid = (*inst_it).second->threadNumber; + + // Mark this instruction as ready to issue. + (*inst_it).second->setCanIssue(); + + // Now schedule the instruction. +// if (!(*inst_it).second->isMemRef()) { + addIfReady((*inst_it).second); +// } else { +// memDepUnit[tid].nonSpecInstReady((*inst_it).second); +// } + + nonSpecInsts.erase(inst_it); +} + +template +void +InstQueue::commit(const InstSeqNum &inst, unsigned tid) +{ + /*Need to go through each thread??*/ + DPRINTF(IQ, "[tid:%i]: Committing instructions older than [sn:%i]\n", + tid,inst); + + ListIt iq_it = instList[tid].begin(); + + while (iq_it != instList[tid].end() && + (*iq_it)->seqNum <= inst) { + ++iq_it; + instList[tid].pop_front(); + } + + assert(freeEntries == (numEntries - countInsts())); +} + +template +void +InstQueue::wakeDependents(DynInstPtr &completed_inst) +{ + DPRINTF(IQ, "Waking dependents of completed instruction.\n"); + // Look at the physical destination register of the DynInst + // and look it up on the dependency graph. Then mark as ready + // any instructions within the instruction queue. +/* + DependencyEntry *curr; + DependencyEntry *prev; +*/ + // Tell the memory dependence unit to wake any dependents on this + // instruction if it is a memory instruction. Also complete the memory + // instruction at this point since we know it executed fine. + // @todo: Might want to rename "completeMemInst" to + // something that indicates that it won't need to be replayed, and call + // this earlier. Might not be a big deal. + if (completed_inst->isMemRef()) { +// memDepUnit[completed_inst->threadNumber].wakeDependents(completed_inst); + completeMemInst(completed_inst); + } + completed_inst->wakeDependents(); +/* + for (int dest_reg_idx = 0; + dest_reg_idx < completed_inst->numDestRegs(); + dest_reg_idx++) + { + PhysRegIndex dest_reg = + completed_inst->renamedDestRegIdx(dest_reg_idx); + + // Special case of uniq or control registers. They are not + // handled by the IQ and thus have no dependency graph entry. + // @todo Figure out a cleaner way to handle this. + if (dest_reg >= numPhysRegs) { + continue; + } + + DPRINTF(IQ, "Waking any dependents on register %i.\n", + (int) dest_reg); + + //Maybe abstract this part into a function. + //Go through the dependency chain, marking the registers as ready + //within the waiting instructions. + + curr = dependGraph[dest_reg].next; + + while (curr) { + DPRINTF(IQ, "Waking up a dependent instruction, PC%#x.\n", + curr->inst->readPC()); + + // Might want to give more information to the instruction + // so that it knows which of its source registers is ready. + // However that would mean that the dependency graph entries + // would need to hold the src_reg_idx. + curr->inst->markSrcRegReady(); + + addIfReady(curr->inst); + + DependencyEntry::mem_alloc_counter--; + + prev = curr; + curr = prev->next; + prev->inst = NULL; + + delete prev; + } + + // Reset the head node now that all of its dependents have been woken + // up. + dependGraph[dest_reg].next = NULL; + dependGraph[dest_reg].inst = NULL; + + // Mark the scoreboard as having that register ready. + regScoreboard[dest_reg] = true; + } +*/ +} + +template +void +InstQueue::addReadyMemInst(DynInstPtr &ready_inst) +{ + OpClass op_class = ready_inst->opClass(); + + readyInsts.push(ready_inst); + + DPRINTF(IQ, "Instruction is ready to issue, putting it onto " + "the ready list, PC %#x opclass:%i [sn:%lli].\n", + ready_inst->readPC(), op_class, ready_inst->seqNum); +} +/* +template +void +InstQueue::rescheduleMemInst(DynInstPtr &resched_inst) +{ + memDepUnit[resched_inst->threadNumber].reschedule(resched_inst); +} + +template +void +InstQueue::replayMemInst(DynInstPtr &replay_inst) +{ + memDepUnit[replay_inst->threadNumber].replay(replay_inst); +} +*/ +template +void +InstQueue::completeMemInst(DynInstPtr &completed_inst) +{ + int tid = completed_inst->threadNumber; + + DPRINTF(IQ, "Completing mem instruction PC:%#x [sn:%lli]\n", + completed_inst->readPC(), completed_inst->seqNum); + + ++freeEntries; + +// completed_inst->memOpDone = true; + +// memDepUnit[tid].completed(completed_inst); + + count[tid]--; +} +/* +template +void +InstQueue::violation(DynInstPtr &store, + DynInstPtr &faulting_load) +{ + memDepUnit[store->threadNumber].violation(store, faulting_load); +} +*/ +template +void +InstQueue::squash(unsigned tid) +{ + DPRINTF(IQ, "[tid:%i]: Starting to squash instructions in " + "the IQ.\n", tid); + + // Read instruction sequence number of last instruction out of the + // time buffer. +// squashedSeqNum[tid] = fromCommit->commitInfo[tid].doneSeqNum; + + // Setup the squash iterator to point to the tail. + squashIt[tid] = instList[tid].end(); + --squashIt[tid]; + + // Call doSquash if there are insts in the IQ + if (count[tid] > 0) { + doSquash(tid); + } + + // Also tell the memory dependence unit to squash. +// memDepUnit[tid].squash(squashedSeqNum[tid], tid); +} + +template +void +InstQueue::doSquash(unsigned tid) +{ + // Make sure the squashed sequence number is valid. + assert(squashedSeqNum[tid] != 0); + + DPRINTF(IQ, "[tid:%i]: Squashing until sequence number %i!\n", + tid, squashedSeqNum[tid]); + + // Squash any instructions younger than the squashed sequence number + // given. + while (squashIt[tid] != instList[tid].end() && + (*squashIt[tid])->seqNum > squashedSeqNum[tid]) { + + DynInstPtr squashed_inst = (*squashIt[tid]); + + // Only handle the instruction if it actually is in the IQ and + // hasn't already been squashed in the IQ. + if (squashed_inst->threadNumber != tid || + squashed_inst->isSquashedInIQ()) { + --squashIt[tid]; + continue; + } + + if (!squashed_inst->isIssued() || + (squashed_inst->isMemRef()/* && + !squashed_inst->memOpDone*/)) { + + // Remove the instruction from the dependency list. + if (!squashed_inst->isNonSpeculative()) { +/* + for (int src_reg_idx = 0; + src_reg_idx < squashed_inst->numSrcRegs(); + src_reg_idx++) + { + PhysRegIndex src_reg = + squashed_inst->renamedSrcRegIdx(src_reg_idx); + + // Only remove it from the dependency graph if it was + // placed there in the first place. + // HACK: This assumes that instructions woken up from the + // dependency chain aren't informed that a specific src + // register has become ready. This may not always be true + // in the future. + // Instead of doing a linked list traversal, we can just + // remove these squashed instructions either at issue time, + // or when the register is overwritten. The only downside + // to this is it leaves more room for error. + + if (!squashed_inst->isReadySrcRegIdx(src_reg_idx) && + src_reg < numPhysRegs) { + dependGraph[src_reg].remove(squashed_inst); + } + + + ++iqSquashedOperandsExamined; + } +*/ + // Might want to remove producers as well. + } else { + nonSpecInsts[squashed_inst->seqNum] = NULL; + + nonSpecInsts.erase(squashed_inst->seqNum); + + ++iqSquashedNonSpecRemoved; + } + + // Might want to also clear out the head of the dependency graph. + + // Mark it as squashed within the IQ. + squashed_inst->setSquashedInIQ(); + + // @todo: Remove this hack where several statuses are set so the + // inst will flow through the rest of the pipeline. + squashed_inst->setIssued(); + squashed_inst->setCanCommit(); +// squashed_inst->removeInIQ(); + + //Update Thread IQ Count + count[squashed_inst->threadNumber]--; + + ++freeEntries; + + if (numThreads > 1) { + DPRINTF(IQ, "[tid:%i]: Instruction PC %#x squashed.\n", + tid, squashed_inst->readPC()); + } else { + DPRINTF(IQ, "Instruction PC %#x squashed.\n", + squashed_inst->readPC()); + } + } + + --squashIt[tid]; + ++iqSquashedInstsExamined; + } +} +/* +template +void +InstQueue::DependencyEntry::insert(DynInstPtr &new_inst) +{ + //Add this new, dependent instruction at the head of the dependency + //chain. + + // First create the entry that will be added to the head of the + // dependency chain. + DependencyEntry *new_entry = new DependencyEntry; + new_entry->next = this->next; + new_entry->inst = new_inst; + + // Then actually add it to the chain. + this->next = new_entry; + + ++mem_alloc_counter; +} + +template +void +InstQueue::DependencyEntry::remove(DynInstPtr &inst_to_remove) +{ + DependencyEntry *prev = this; + DependencyEntry *curr = this->next; + + // Make sure curr isn't NULL. Because this instruction is being + // removed from a dependency list, it must have been placed there at + // an earlier time. The dependency chain should not be empty, + // unless the instruction dependent upon it is already ready. + if (curr == NULL) { + return; + } + + // Find the instruction to remove within the dependency linked list. + while (curr->inst != inst_to_remove) { + prev = curr; + curr = curr->next; + + assert(curr != NULL); + } + + // Now remove this instruction from the list. + prev->next = curr->next; + + --mem_alloc_counter; + + // Could push this off to the destructor of DependencyEntry + curr->inst = NULL; + + delete curr; +} + +template +bool +InstQueue::addToDependents(DynInstPtr &new_inst) +{ + // Loop through the instruction's source registers, adding + // them to the dependency list if they are not ready. + int8_t total_src_regs = new_inst->numSrcRegs(); + bool return_val = false; + + for (int src_reg_idx = 0; + src_reg_idx < total_src_regs; + src_reg_idx++) + { + // Only add it to the dependency graph if it's not ready. + if (!new_inst->isReadySrcRegIdx(src_reg_idx)) { + PhysRegIndex src_reg = new_inst->renamedSrcRegIdx(src_reg_idx); + + // Check the IQ's scoreboard to make sure the register + // hasn't become ready while the instruction was in flight + // between stages. Only if it really isn't ready should + // it be added to the dependency graph. + if (src_reg >= numPhysRegs) { + continue; + } else if (regScoreboard[src_reg] == false) { + DPRINTF(IQ, "Instruction PC %#x has src reg %i that " + "is being added to the dependency chain.\n", + new_inst->readPC(), src_reg); + + dependGraph[src_reg].insert(new_inst); + + // Change the return value to indicate that something + // was added to the dependency graph. + return_val = true; + } else { + DPRINTF(IQ, "Instruction PC %#x has src reg %i that " + "became ready before it reached the IQ.\n", + new_inst->readPC(), src_reg); + // Mark a register ready within the instruction. + new_inst->markSrcRegReady(); + } + } + } + + return return_val; +} + +template +void +InstQueue::createDependency(DynInstPtr &new_inst) +{ + //Actually nothing really needs to be marked when an + //instruction becomes the producer of a register's value, + //but for convenience a ptr to the producing instruction will + //be placed in the head node of the dependency links. + int8_t total_dest_regs = new_inst->numDestRegs(); + + for (int dest_reg_idx = 0; + dest_reg_idx < total_dest_regs; + dest_reg_idx++) + { + PhysRegIndex dest_reg = new_inst->renamedDestRegIdx(dest_reg_idx); + + // Instructions that use the misc regs will have a reg number + // higher than the normal physical registers. In this case these + // registers are not renamed, and there is no need to track + // dependencies as these instructions must be executed at commit. + if (dest_reg >= numPhysRegs) { + continue; + } + + if (dependGraph[dest_reg].next) { + dumpDependGraph(); + panic("Dependency graph %i not empty!", dest_reg); + } + + dependGraph[dest_reg].inst = new_inst; + + // Mark the scoreboard to say it's not yet ready. + regScoreboard[dest_reg] = false; + } +} +*/ +template +void +InstQueue::addIfReady(DynInstPtr &inst) +{ + //If the instruction now has all of its source registers + // available, then add it to the list of ready instructions. + if (inst->readyToIssue()) { + + //Add the instruction to the proper ready list. + if (inst->isMemRef()) { + + DPRINTF(IQ, "Checking if memory instruction can issue.\n"); + + // Message to the mem dependence unit that this instruction has + // its registers ready. + +// memDepUnit[inst->threadNumber].regsReady(inst); + + return; + } + + OpClass op_class = inst->opClass(); + + DPRINTF(IQ, "Instruction is ready to issue, putting it onto " + "the ready list, PC %#x opclass:%i [sn:%lli].\n", + inst->readPC(), op_class, inst->seqNum); + + readyInsts.push(inst); + } +} + +template +int +InstQueue::countInsts() +{ + //ksewell:This works but definitely could use a cleaner write + //with a more intuitive way of counting. Right now it's + //just brute force .... + +#if 0 + int total_insts = 0; + + for (int i = 0; i < numThreads; ++i) { + ListIt count_it = instList[i].begin(); + + while (count_it != instList[i].end()) { + if (!(*count_it)->isSquashed() && !(*count_it)->isSquashedInIQ()) { + if (!(*count_it)->isIssued()) { + ++total_insts; + } else if ((*count_it)->isMemRef() && + !(*count_it)->memOpDone) { + // Loads that have not been marked as executed still count + // towards the total instructions. + ++total_insts; + } + } + + ++count_it; + } + } + + return total_insts; +#else + return numEntries - freeEntries; +#endif +} +/* +template +void +InstQueue::dumpDependGraph() +{ + DependencyEntry *curr; + + for (int i = 0; i < numPhysRegs; ++i) + { + curr = &dependGraph[i]; + + if (curr->inst) { + cprintf("dependGraph[%i]: producer: %#x [sn:%lli] consumer: ", + i, curr->inst->readPC(), curr->inst->seqNum); + } else { + cprintf("dependGraph[%i]: No producer. consumer: ", i); + } + + while (curr->next != NULL) { + curr = curr->next; + + cprintf("%#x [sn:%lli] ", + curr->inst->readPC(), curr->inst->seqNum); + } + + cprintf("\n"); + } +} +*/ +template +void +InstQueue::dumpLists() +{ + for (int i = 0; i < Num_OpClasses; ++i) { + cprintf("Ready list %i size: %i\n", i, readyInsts.size()); + + cprintf("\n"); + } + + cprintf("Non speculative list size: %i\n", nonSpecInsts.size()); + + NonSpecMapIt non_spec_it = nonSpecInsts.begin(); + NonSpecMapIt non_spec_end_it = nonSpecInsts.end(); + + cprintf("Non speculative list: "); + + while (non_spec_it != non_spec_end_it) { + cprintf("%#x [sn:%lli]", (*non_spec_it).second->readPC(), + (*non_spec_it).second->seqNum); + ++non_spec_it; + } + + cprintf("\n"); +/* + ListOrderIt list_order_it = listOrder.begin(); + ListOrderIt list_order_end_it = listOrder.end(); + int i = 1; + + cprintf("List order: "); + + while (list_order_it != list_order_end_it) { + cprintf("%i OpClass:%i [sn:%lli] ", i, (*list_order_it).queueType, + (*list_order_it).oldestInst); + + ++list_order_it; + ++i; + } +*/ + cprintf("\n"); +} + + +template +void +InstQueue::dumpInsts() +{ + for (int i = 0; i < numThreads; ++i) { +// int num = 0; +// int valid_num = 0; +/* + ListIt inst_list_it = instList[i].begin(); + + while (inst_list_it != instList[i].end()) + { + cprintf("Instruction:%i\n", + num); + if (!(*inst_list_it)->isSquashed()) { + if (!(*inst_list_it)->isIssued()) { + ++valid_num; + cprintf("Count:%i\n", valid_num); + } else if ((*inst_list_it)->isMemRef() && + !(*inst_list_it)->memOpDone) { + // Loads that have not been marked as executed still count + // towards the total instructions. + ++valid_num; + cprintf("Count:%i\n", valid_num); + } + } + + cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n" + "Issued:%i\nSquashed:%i\n", + (*inst_list_it)->readPC(), + (*inst_list_it)->seqNum, + (*inst_list_it)->threadNumber, + (*inst_list_it)->isIssued(), + (*inst_list_it)->isSquashed()); + + if ((*inst_list_it)->isMemRef()) { + cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone); + } + + cprintf("\n"); + + inst_list_it++; + ++num; + } +*/ + } +} diff --git a/src/cpu/ozone/lsq_unit.cc b/src/cpu/ozone/lsq_unit.cc new file mode 100644 index 000000000..3ac51b87d --- /dev/null +++ b/src/cpu/ozone/lsq_unit.cc @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cpu/ozone/ozone_impl.hh" +#include "cpu/ozone/lsq_unit_impl.hh" + +// Force the instantiation of LDSTQ for all the implementations we care about. +template class OzoneLSQ; + diff --git a/src/cpu/ozone/lsq_unit.hh b/src/cpu/ozone/lsq_unit.hh new file mode 100644 index 000000000..4b600af67 --- /dev/null +++ b/src/cpu/ozone/lsq_unit.hh @@ -0,0 +1,637 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_OZONE_LSQ_UNIT_HH__ +#define __CPU_OZONE_LSQ_UNIT_HH__ + +#include +#include +#include + +#include "arch/faults.hh" +#include "arch/isa_traits.hh" +#include "config/full_system.hh" +#include "base/hashmap.hh" +#include "cpu/inst_seq.hh" +#include "mem/mem_interface.hh" +//#include "mem/page_table.hh" +#include "sim/sim_object.hh" + +class PageTable; + +/** + * Class that implements the actual LQ and SQ for each specific thread. + * Both are circular queues; load entries are freed upon committing, while + * store entries are freed once they writeback. The LSQUnit tracks if there + * are memory ordering violations, and also detects partial load to store + * forwarding cases (a store only has part of a load's data) that requires + * the load to wait until the store writes back. In the former case it + * holds onto the instruction until the dependence unit looks at it, and + * in the latter it stalls the LSQ until the store writes back. At that + * point the load is replayed. + */ +template +class OzoneLSQ { + public: + typedef typename Impl::Params Params; + typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::BackEnd BackEnd; + typedef typename Impl::DynInstPtr DynInstPtr; + typedef typename Impl::IssueStruct IssueStruct; + + typedef TheISA::IntReg IntReg; + + typedef typename std::map::iterator LdMapIt; + + private: + class StoreCompletionEvent : public Event { + public: + /** Constructs a store completion event. */ + StoreCompletionEvent(int store_idx, Event *wb_event, OzoneLSQ *lsq_ptr); + + /** Processes the store completion event. */ + void process(); + + /** Returns the description of this event. */ + const char *description(); + + private: + /** The store index of the store being written back. */ + int storeIdx; + /** The writeback event for the store. Needed for store + * conditionals. + */ + Event *wbEvent; + /** The pointer to the LSQ unit that issued the store. */ + OzoneLSQ *lsqPtr; + }; + + friend class StoreCompletionEvent; + + public: + /** Constructs an LSQ unit. init() must be called prior to use. */ + OzoneLSQ(); + + /** Initializes the LSQ unit with the specified number of entries. */ + void init(Params *params, unsigned maxLQEntries, + unsigned maxSQEntries, unsigned id); + + /** Returns the name of the LSQ unit. */ + std::string name() const; + + /** Sets the CPU pointer. */ + void setCPU(FullCPU *cpu_ptr) + { cpu = cpu_ptr; } + + /** Sets the back-end stage pointer. */ + void setBE(BackEnd *be_ptr) + { be = be_ptr; } + + /** Sets the page table pointer. */ + void setPageTable(PageTable *pt_ptr); + + /** Ticks the LSQ unit, which in this case only resets the number of + * used cache ports. + * @todo: Move the number of used ports up to the LSQ level so it can + * be shared by all LSQ units. + */ + void tick() { usedPorts = 0; } + + /** Inserts an instruction. */ + void insert(DynInstPtr &inst); + /** Inserts a load instruction. */ + void insertLoad(DynInstPtr &load_inst); + /** Inserts a store instruction. */ + void insertStore(DynInstPtr &store_inst); + + /** Executes a load instruction. */ + Fault executeLoad(DynInstPtr &inst); + + Fault executeLoad(int lq_idx); + /** Executes a store instruction. */ + Fault executeStore(DynInstPtr &inst); + + /** Commits the head load. */ + void commitLoad(); + /** Commits a specific load, given by the sequence number. */ + void commitLoad(InstSeqNum &inst); + /** Commits loads older than a specific sequence number. */ + void commitLoads(InstSeqNum &youngest_inst); + + /** Commits stores older than a specific sequence number. */ + void commitStores(InstSeqNum &youngest_inst); + + /** Writes back stores. */ + void writebackStores(); + + // @todo: Include stats in the LSQ unit. + //void regStats(); + + /** Clears all the entries in the LQ. */ + void clearLQ(); + + /** Clears all the entries in the SQ. */ + void clearSQ(); + + /** Resizes the LQ to a given size. */ + void resizeLQ(unsigned size); + + /** Resizes the SQ to a given size. */ + void resizeSQ(unsigned size); + + /** Squashes all instructions younger than a specific sequence number. */ + void squash(const InstSeqNum &squashed_num); + + /** Returns if there is a memory ordering violation. Value is reset upon + * call to getMemDepViolator(). + */ + bool violation() { return memDepViolator; } + + /** Returns the memory ordering violator. */ + DynInstPtr getMemDepViolator(); + + /** Returns if a load became blocked due to the memory system. It clears + * the bool's value upon this being called. + */ + inline bool loadBlocked(); + + /** Returns the number of free entries (min of free LQ and SQ entries). */ + unsigned numFreeEntries(); + + /** Returns the number of loads ready to execute. */ + int numLoadsReady(); + + /** Returns the number of loads in the LQ. */ + int numLoads() { return loads; } + + /** Returns the number of stores in the SQ. */ + int numStores() { return stores; } + + /** Returns if either the LQ or SQ is full. */ + bool isFull() { return lqFull() || sqFull(); } + + /** Returns if the LQ is full. */ + bool lqFull() { return loads >= (LQEntries - 1); } + + /** Returns if the SQ is full. */ + bool sqFull() { return stores >= (SQEntries - 1); } + + /** Debugging function to dump instructions in the LSQ. */ + void dumpInsts(); + + /** Returns the number of instructions in the LSQ. */ + unsigned getCount() { return loads + stores; } + + /** Returns if there are any stores to writeback. */ + bool hasStoresToWB() { return storesToWB; } + + /** Returns the number of stores to writeback. */ + int numStoresToWB() { return storesToWB; } + + /** Returns if the LSQ unit will writeback on this cycle. */ + bool willWB() { return storeQueue[storeWBIdx].canWB && + !storeQueue[storeWBIdx].completed && + !dcacheInterface->isBlocked(); } + + private: + /** Completes the store at the specified index. */ + void completeStore(int store_idx); + + /** Increments the given store index (circular queue). */ + inline void incrStIdx(int &store_idx); + /** Decrements the given store index (circular queue). */ + inline void decrStIdx(int &store_idx); + /** Increments the given load index (circular queue). */ + inline void incrLdIdx(int &load_idx); + /** Decrements the given load index (circular queue). */ + inline void decrLdIdx(int &load_idx); + + private: + /** Pointer to the CPU. */ + FullCPU *cpu; + + /** Pointer to the back-end stage. */ + BackEnd *be; + + /** Pointer to the D-cache. */ + MemInterface *dcacheInterface; + + /** Pointer to the page table. */ + PageTable *pTable; + + public: + struct SQEntry { + /** Constructs an empty store queue entry. */ + SQEntry() + : inst(NULL), req(NULL), size(0), data(0), + canWB(0), committed(0), completed(0) + { } + + /** Constructs a store queue entry for a given instruction. */ + SQEntry(DynInstPtr &_inst) + : inst(_inst), req(NULL), size(0), data(0), + canWB(0), committed(0), completed(0) + { } + + /** The store instruction. */ + DynInstPtr inst; + /** The memory request for the store. */ + MemReqPtr req; + /** The size of the store. */ + int size; + /** The store data. */ + IntReg data; + /** Whether or not the store can writeback. */ + bool canWB; + /** Whether or not the store is committed. */ + bool committed; + /** Whether or not the store is completed. */ + bool completed; + }; + + enum Status { + Running, + Idle, + DcacheMissStall, + DcacheMissSwitch + }; + + private: + /** The OzoneLSQ thread id. */ + unsigned lsqID; + + /** The status of the LSQ unit. */ + Status _status; + + /** The store queue. */ + std::vector storeQueue; + + /** The load queue. */ + std::vector loadQueue; + + // Consider making these 16 bits + /** The number of LQ entries. */ + unsigned LQEntries; + /** The number of SQ entries. */ + unsigned SQEntries; + + /** The number of load instructions in the LQ. */ + int loads; + /** The number of store instructions in the SQ (excludes those waiting to + * writeback). + */ + int stores; + /** The number of store instructions in the SQ waiting to writeback. */ + int storesToWB; + + /** The index of the head instruction in the LQ. */ + int loadHead; + /** The index of the tail instruction in the LQ. */ + int loadTail; + + /** The index of the head instruction in the SQ. */ + int storeHead; + /** The index of the first instruction that is ready to be written back, + * and has not yet been written back. + */ + int storeWBIdx; + /** The index of the tail instruction in the SQ. */ + int storeTail; + + /// @todo Consider moving to a more advanced model with write vs read ports + /** The number of cache ports available each cycle. */ + int cachePorts; + + /** The number of used cache ports in this cycle. */ + int usedPorts; + + //list mshrSeqNums; + + //Stats::Scalar<> dcacheStallCycles; + Counter lastDcacheStall; + + /** Wire to read information from the issue stage time queue. */ + typename TimeBuffer::wire fromIssue; + + // Make these per thread? + /** Whether or not the LSQ is stalled. */ + bool stalled; + /** The store that causes the stall due to partial store to load + * forwarding. + */ + InstSeqNum stallingStoreIsn; + /** The index of the above store. */ + int stallingLoadIdx; + + /** Whether or not a load is blocked due to the memory system. It is + * cleared when this value is checked via loadBlocked(). + */ + bool isLoadBlocked; + + /** The oldest faulting load instruction. */ + DynInstPtr loadFaultInst; + /** The oldest faulting store instruction. */ + DynInstPtr storeFaultInst; + + /** The oldest load that caused a memory ordering violation. */ + DynInstPtr memDepViolator; + + // Will also need how many read/write ports the Dcache has. Or keep track + // of that in stage that is one level up, and only call executeLoad/Store + // the appropriate number of times. + + public: + /** Executes the load at the given index. */ + template + Fault read(MemReqPtr &req, T &data, int load_idx); + + /** Executes the store at the given index. */ + template + Fault write(MemReqPtr &req, T &data, int store_idx); + + /** Returns the index of the head load instruction. */ + int getLoadHead() { return loadHead; } + /** Returns the sequence number of the head load instruction. */ + InstSeqNum getLoadHeadSeqNum() + { + if (loadQueue[loadHead]) { + return loadQueue[loadHead]->seqNum; + } else { + return 0; + } + + } + + /** Returns the index of the head store instruction. */ + int getStoreHead() { return storeHead; } + /** Returns the sequence number of the head store instruction. */ + InstSeqNum getStoreHeadSeqNum() + { + if (storeQueue[storeHead].inst) { + return storeQueue[storeHead].inst->seqNum; + } else { + return 0; + } + + } + + /** Returns whether or not the LSQ unit is stalled. */ + bool isStalled() { return stalled; } +}; + +template +template +Fault +OzoneLSQ::read(MemReqPtr &req, T &data, int load_idx) +{ + //Depending on issue2execute delay a squashed load could + //execute if it is found to be squashed in the same + //cycle it is scheduled to execute + assert(loadQueue[load_idx]); + + if (loadQueue[load_idx]->isExecuted()) { + panic("Should not reach this point with split ops!"); + + memcpy(&data,req->data,req->size); + + return NoFault; + } + + // Make sure this isn't an uncacheable access + // A bit of a hackish way to get uncached accesses to work only if they're + // at the head of the LSQ and are ready to commit (at the head of the ROB + // too). + // @todo: Fix uncached accesses. + if (req->flags & UNCACHEABLE && + (load_idx != loadHead || !loadQueue[load_idx]->readyToCommit())) { + + return TheISA::genMachineCheckFault(); + } + + // Check the SQ for any previous stores that might lead to forwarding + int store_idx = loadQueue[load_idx]->sqIdx; + + int store_size = 0; + + DPRINTF(OzoneLSQ, "Read called, load idx: %i, store idx: %i, " + "storeHead: %i addr: %#x\n", + load_idx, store_idx, storeHead, req->paddr); + + while (store_idx != -1) { + // End once we've reached the top of the LSQ + if (store_idx == storeWBIdx) { + break; + } + + // Move the index to one younger + if (--store_idx < 0) + store_idx += SQEntries; + + assert(storeQueue[store_idx].inst); + + store_size = storeQueue[store_idx].size; + + if (store_size == 0) + continue; + + // Check if the store data is within the lower and upper bounds of + // addresses that the request needs. + bool store_has_lower_limit = + req->vaddr >= storeQueue[store_idx].inst->effAddr; + bool store_has_upper_limit = + (req->vaddr + req->size) <= (storeQueue[store_idx].inst->effAddr + + store_size); + bool lower_load_has_store_part = + req->vaddr < (storeQueue[store_idx].inst->effAddr + + store_size); + bool upper_load_has_store_part = + (req->vaddr + req->size) > storeQueue[store_idx].inst->effAddr; + + // If the store's data has all of the data needed, we can forward. + if (store_has_lower_limit && store_has_upper_limit) { + + int shift_amt = req->vaddr & (store_size - 1); + // Assumes byte addressing + shift_amt = shift_amt << 3; + + // Cast this to type T? + data = storeQueue[store_idx].data >> shift_amt; + + req->cmd = Read; + assert(!req->completionEvent); + req->completionEvent = NULL; + req->time = curTick; + assert(!req->data); + req->data = new uint8_t[64]; + + memcpy(req->data, &data, req->size); + + DPRINTF(OzoneLSQ, "Forwarding from store idx %i to load to " + "addr %#x, data %#x\n", + store_idx, req->vaddr, *(req->data)); + + typename BackEnd::LdWritebackEvent *wb = + new typename BackEnd::LdWritebackEvent(loadQueue[load_idx], + be); + + // We'll say this has a 1 cycle load-store forwarding latency + // for now. + // FIXME - Need to make this a parameter. + wb->schedule(curTick); + + // Should keep track of stat for forwarded data + return NoFault; + } else if ((store_has_lower_limit && lower_load_has_store_part) || + (store_has_upper_limit && upper_load_has_store_part) || + (lower_load_has_store_part && upper_load_has_store_part)) { + // This is the partial store-load forwarding case where a store + // has only part of the load's data. + + // If it's already been written back, then don't worry about + // stalling on it. + if (storeQueue[store_idx].completed) { + continue; + } + + // Must stall load and force it to retry, so long as it's the oldest + // load that needs to do so. + if (!stalled || + (stalled && + loadQueue[load_idx]->seqNum < + loadQueue[stallingLoadIdx]->seqNum)) { + stalled = true; + stallingStoreIsn = storeQueue[store_idx].inst->seqNum; + stallingLoadIdx = load_idx; + } + + // Tell IQ/mem dep unit that this instruction will need to be + // rescheduled eventually + be->rescheduleMemInst(loadQueue[load_idx]); + + DPRINTF(OzoneLSQ, "Load-store forwarding mis-match. " + "Store idx %i to load addr %#x\n", + store_idx, req->vaddr); + + return NoFault; + } + } + + + // If there's no forwarding case, then go access memory + DynInstPtr inst = loadQueue[load_idx]; + + ++usedPorts; + + // if we have a cache, do cache access too + if (dcacheInterface) { + if (dcacheInterface->isBlocked()) { + isLoadBlocked = true; + // No fault occurred, even though the interface is blocked. + return NoFault; + } + + DPRINTF(OzoneLSQ, "D-cache: PC:%#x reading from paddr:%#x " + "vaddr:%#x flags:%i\n", + inst->readPC(), req->paddr, req->vaddr, req->flags); + + // Setup MemReq pointer + req->cmd = Read; + req->completionEvent = NULL; + req->time = curTick; + assert(!req->data); + req->data = new uint8_t[64]; + + assert(!req->completionEvent); + typedef typename BackEnd::LdWritebackEvent LdWritebackEvent; + + LdWritebackEvent *wb = new LdWritebackEvent(loadQueue[load_idx], be); + + req->completionEvent = wb; + + // Do Cache Access + MemAccessResult result = dcacheInterface->access(req); + + // Ugly hack to get an event scheduled *only* if the access is + // a miss. We really should add first-class support for this + // at some point. + // @todo: Probably should support having no events + if (result != MA_HIT) { + DPRINTF(OzoneLSQ, "D-cache miss!\n"); + DPRINTF(Activity, "Activity: ld accessing mem miss [sn:%lli]\n", + inst->seqNum); + + lastDcacheStall = curTick; + + _status = DcacheMissStall; + + wb->setDcacheMiss(); + + } else { +// DPRINTF(Activity, "Activity: ld accessing mem hit [sn:%lli]\n", +// inst->seqNum); + + DPRINTF(OzoneLSQ, "D-cache hit!\n"); + } + } else { + fatal("Must use D-cache with new memory system"); + } + + return NoFault; +} + +template +template +Fault +OzoneLSQ::write(MemReqPtr &req, T &data, int store_idx) +{ + assert(storeQueue[store_idx].inst); + + DPRINTF(OzoneLSQ, "Doing write to store idx %i, addr %#x data %#x" + " | storeHead:%i [sn:%i]\n", + store_idx, req->paddr, data, storeHead, + storeQueue[store_idx].inst->seqNum); + + storeQueue[store_idx].req = req; + storeQueue[store_idx].size = sizeof(T); + storeQueue[store_idx].data = data; + + // This function only writes the data to the store queue, so no fault + // can happen here. + return NoFault; +} + +template +inline bool +OzoneLSQ::loadBlocked() +{ + bool ret_val = isLoadBlocked; + isLoadBlocked = false; + return ret_val; +} + +#endif // __CPU_OZONE_LSQ_UNIT_HH__ diff --git a/src/cpu/ozone/lsq_unit_impl.hh b/src/cpu/ozone/lsq_unit_impl.hh new file mode 100644 index 000000000..726348d76 --- /dev/null +++ b/src/cpu/ozone/lsq_unit_impl.hh @@ -0,0 +1,846 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "arch/isa_traits.hh" +#include "base/str.hh" +#include "cpu/ozone/lsq_unit.hh" + +template +OzoneLSQ::StoreCompletionEvent::StoreCompletionEvent(int store_idx, + Event *wb_event, + OzoneLSQ *lsq_ptr) + : Event(&mainEventQueue), + storeIdx(store_idx), + wbEvent(wb_event), + lsqPtr(lsq_ptr) +{ + this->setFlags(Event::AutoDelete); +} + +template +void +OzoneLSQ::StoreCompletionEvent::process() +{ + DPRINTF(OzoneLSQ, "Cache miss complete for store idx:%i\n", storeIdx); + + //lsqPtr->removeMSHR(lsqPtr->storeQueue[storeIdx].inst->seqNum); + +// lsqPtr->cpu->wakeCPU(); + if (wbEvent) + wbEvent->process(); + lsqPtr->completeStore(storeIdx); +} + +template +const char * +OzoneLSQ::StoreCompletionEvent::description() +{ + return "LSQ store completion event"; +} + +template +OzoneLSQ::OzoneLSQ() + : loads(0), stores(0), storesToWB(0), stalled(false), isLoadBlocked(false) +{ +} + +template +void +OzoneLSQ::init(Params *params, unsigned maxLQEntries, + unsigned maxSQEntries, unsigned id) + +{ + DPRINTF(OzoneLSQ, "Creating OzoneLSQ%i object.\n",id); + + lsqID = id; + + LQEntries = maxLQEntries; + SQEntries = maxSQEntries; + + loadQueue.resize(LQEntries); + storeQueue.resize(SQEntries); + + + // May want to initialize these entries to NULL + + loadHead = loadTail = 0; + + storeHead = storeWBIdx = storeTail = 0; + + usedPorts = 0; + cachePorts = params->cachePorts; + + dcacheInterface = params->dcacheInterface; + + loadFaultInst = storeFaultInst = memDepViolator = NULL; +} + +template +std::string +OzoneLSQ::name() const +{ + return "lsqunit"; +} + +template +void +OzoneLSQ::clearLQ() +{ + loadQueue.clear(); +} + +template +void +OzoneLSQ::clearSQ() +{ + storeQueue.clear(); +} + +template +void +OzoneLSQ::setPageTable(PageTable *pt_ptr) +{ + DPRINTF(OzoneLSQ, "Setting the page table pointer.\n"); + pTable = pt_ptr; +} + +template +void +OzoneLSQ::resizeLQ(unsigned size) +{ + assert( size >= LQEntries); + + if (size > LQEntries) { + while (size > loadQueue.size()) { + DynInstPtr dummy; + loadQueue.push_back(dummy); + LQEntries++; + } + } else { + LQEntries = size; + } + +} + +template +void +OzoneLSQ::resizeSQ(unsigned size) +{ + if (size > SQEntries) { + while (size > storeQueue.size()) { + SQEntry dummy; + storeQueue.push_back(dummy); + SQEntries++; + } + } else { + SQEntries = size; + } +} + +template +void +OzoneLSQ::insert(DynInstPtr &inst) +{ + // Make sure we really have a memory reference. + assert(inst->isMemRef()); + + // Make sure it's one of the two classes of memory references. + assert(inst->isLoad() || inst->isStore()); + + if (inst->isLoad()) { + insertLoad(inst); + } else { + insertStore(inst); + } + +// inst->setInLSQ(); +} + +template +void +OzoneLSQ::insertLoad(DynInstPtr &load_inst) +{ + assert((loadTail + 1) % LQEntries != loadHead && loads < LQEntries); + + DPRINTF(OzoneLSQ, "Inserting load PC %#x, idx:%i [sn:%lli]\n", + load_inst->readPC(), loadTail, load_inst->seqNum); + + load_inst->lqIdx = loadTail; + + if (stores == 0) { + load_inst->sqIdx = -1; + } else { + load_inst->sqIdx = storeTail; + } + + loadQueue[loadTail] = load_inst; + + incrLdIdx(loadTail); + + ++loads; +} + +template +void +OzoneLSQ::insertStore(DynInstPtr &store_inst) +{ + // Make sure it is not full before inserting an instruction. + assert((storeTail + 1) % SQEntries != storeHead); + assert(stores < SQEntries); + + DPRINTF(OzoneLSQ, "Inserting store PC %#x, idx:%i [sn:%lli]\n", + store_inst->readPC(), storeTail, store_inst->seqNum); + + store_inst->sqIdx = storeTail; + store_inst->lqIdx = loadTail; + + storeQueue[storeTail] = SQEntry(store_inst); + + incrStIdx(storeTail); + + ++stores; + +} + +template +typename Impl::DynInstPtr +OzoneLSQ::getMemDepViolator() +{ + DynInstPtr temp = memDepViolator; + + memDepViolator = NULL; + + return temp; +} + +template +unsigned +OzoneLSQ::numFreeEntries() +{ + unsigned free_lq_entries = LQEntries - loads; + unsigned free_sq_entries = SQEntries - stores; + + // Both the LQ and SQ entries have an extra dummy entry to differentiate + // empty/full conditions. Subtract 1 from the free entries. + if (free_lq_entries < free_sq_entries) { + return free_lq_entries - 1; + } else { + return free_sq_entries - 1; + } +} + +template +int +OzoneLSQ::numLoadsReady() +{ + int load_idx = loadHead; + int retval = 0; + + while (load_idx != loadTail) { + assert(loadQueue[load_idx]); + + if (loadQueue[load_idx]->readyToIssue()) { + ++retval; + } + } + + return retval; +} + +#if 0 +template +Fault +OzoneLSQ::executeLoad() +{ + Fault load_fault = NoFault; + DynInstPtr load_inst; + + assert(readyLoads.size() != 0); + + // Execute a ready load. + LdMapIt ready_it = readyLoads.begin(); + + load_inst = (*ready_it).second; + + // Execute the instruction, which is held in the data portion of the + // iterator. + load_fault = load_inst->execute(); + + // If it executed successfully, then switch it over to the executed + // loads list. + if (load_fault == NoFault) { + executedLoads[load_inst->seqNum] = load_inst; + + readyLoads.erase(ready_it); + } else { + loadFaultInst = load_inst; + } + + return load_fault; +} +#endif + +template +Fault +OzoneLSQ::executeLoad(DynInstPtr &inst) +{ + // Execute a specific load. + Fault load_fault = NoFault; + + DPRINTF(OzoneLSQ, "Executing load PC %#x, [sn:%lli]\n", + inst->readPC(),inst->seqNum); + + // Make sure it's really in the list. + // Normally it should always be in the list. However, + /* due to a syscall it may not be the list. +#ifdef DEBUG + int i = loadHead; + while (1) { + if (i == loadTail && !find(inst)) { + assert(0 && "Load not in the queue!"); + } else if (loadQueue[i] == inst) { + break; + } + + i = i + 1; + if (i >= LQEntries) { + i = 0; + } + } +#endif // DEBUG*/ + + load_fault = inst->initiateAcc(); + + // Might want to make sure that I'm not overwriting a previously faulting + // instruction that hasn't been checked yet. + // Actually probably want the oldest faulting load + if (load_fault != NoFault) { + // Maybe just set it as can commit here, although that might cause + // some other problems with sending traps to the ROB too quickly. +// iewStage->instToCommit(inst); +// iewStage->activityThisCycle(); + } + + return load_fault; +} + +template +Fault +OzoneLSQ::executeLoad(int lq_idx) +{ + // Very hackish. Not sure the best way to check that this + // instruction is at the head of the ROB. I should have some sort + // of extra information here so that I'm not overloading the + // canCommit signal for 15 different things. + loadQueue[lq_idx]->setCanCommit(); + Fault ret_fault = executeLoad(loadQueue[lq_idx]); + loadQueue[lq_idx]->clearCanCommit(); + return ret_fault; +} + +template +Fault +OzoneLSQ::executeStore(DynInstPtr &store_inst) +{ + // Make sure that a store exists. + assert(stores != 0); + + int store_idx = store_inst->sqIdx; + + DPRINTF(OzoneLSQ, "Executing store PC %#x [sn:%lli]\n", + store_inst->readPC(), store_inst->seqNum); + + // Check the recently completed loads to see if any match this store's + // address. If so, then we have a memory ordering violation. + int load_idx = store_inst->lqIdx; + + Fault store_fault = store_inst->initiateAcc(); + + // Store size should now be available. Use it to get proper offset for + // addr comparisons. + int size = storeQueue[store_idx].size; + + if (size == 0) { + DPRINTF(OzoneLSQ,"Fault on Store PC %#x, [sn:%lli],Size = 0\n", + store_inst->readPC(),store_inst->seqNum); + + return store_fault; + } + + assert(store_fault == NoFault); + + if (!storeFaultInst) { + if (store_fault != NoFault) { + panic("Fault in a store instruction!"); + storeFaultInst = store_inst; + } else if (store_inst->isNonSpeculative()) { + // Nonspeculative accesses (namely store conditionals) + // need to set themselves as able to writeback if we + // haven't had a fault by here. + storeQueue[store_idx].canWB = true; + + ++storesToWB; + } + } + + if (!memDepViolator) { + while (load_idx != loadTail) { + // Actually should only check loads that have actually executed + // Might be safe because effAddr is set to InvalAddr when the + // dyn inst is created. + + // Must actually check all addrs in the proper size range + // Which is more correct than needs to be. What if for now we just + // assume all loads are quad-word loads, and do the addr based + // on that. + // @todo: Fix this, magic number being used here + if ((loadQueue[load_idx]->effAddr >> 8) == + (store_inst->effAddr >> 8)) { + // A load incorrectly passed this store. Squash and refetch. + // For now return a fault to show that it was unsuccessful. + memDepViolator = loadQueue[load_idx]; + + return TheISA::genMachineCheckFault(); + } + + incrLdIdx(load_idx); + } + + // If we've reached this point, there was no violation. + memDepViolator = NULL; + } + + return store_fault; +} + +template +void +OzoneLSQ::commitLoad() +{ + assert(loadQueue[loadHead]); + + DPRINTF(OzoneLSQ, "[sn:%lli] Committing head load instruction, PC %#x\n", + loadQueue[loadHead]->seqNum, loadQueue[loadHead]->readPC()); + + + loadQueue[loadHead] = NULL; + + incrLdIdx(loadHead); + + --loads; +} + +template +void +OzoneLSQ::commitLoad(InstSeqNum &inst) +{ + // Hopefully I don't use this function too much + panic("Don't use this function!"); + + int i = loadHead; + while (1) { + if (i == loadTail) { + assert(0 && "Load not in the queue!"); + } else if (loadQueue[i]->seqNum == inst) { + break; + } + + ++i; + if (i >= LQEntries) { + i = 0; + } + } + +// loadQueue[i]->removeInLSQ(); + loadQueue[i] = NULL; + --loads; +} + +template +void +OzoneLSQ::commitLoads(InstSeqNum &youngest_inst) +{ + assert(loads == 0 || loadQueue[loadHead]); + + while (loads != 0 && loadQueue[loadHead]->seqNum <= youngest_inst) { + commitLoad(); + } +} + +template +void +OzoneLSQ::commitStores(InstSeqNum &youngest_inst) +{ + assert(stores == 0 || storeQueue[storeHead].inst); + + int store_idx = storeHead; + + while (store_idx != storeTail) { + assert(storeQueue[store_idx].inst); + if (!storeQueue[store_idx].canWB) { + if (storeQueue[store_idx].inst->seqNum > youngest_inst) { + break; + } + DPRINTF(OzoneLSQ, "Marking store as able to write back, PC " + "%#x [sn:%lli]\n", + storeQueue[store_idx].inst->readPC(), + storeQueue[store_idx].inst->seqNum); + + storeQueue[store_idx].canWB = true; + +// --stores; + ++storesToWB; + } + + incrStIdx(store_idx); + } +} + +template +void +OzoneLSQ::writebackStores() +{ + while (storesToWB > 0 && + storeWBIdx != storeTail && + storeQueue[storeWBIdx].inst && + storeQueue[storeWBIdx].canWB && + usedPorts < cachePorts) { + + if (storeQueue[storeWBIdx].size == 0) { + completeStore(storeWBIdx); + + incrStIdx(storeWBIdx); + + continue; + } + + if (dcacheInterface && dcacheInterface->isBlocked()) { + DPRINTF(OzoneLSQ, "Unable to write back any more stores, cache" + " is blocked!\n"); + break; + } + + ++usedPorts; + + if (storeQueue[storeWBIdx].inst->isDataPrefetch()) { + incrStIdx(storeWBIdx); + + continue; + } + + assert(storeQueue[storeWBIdx].req); + assert(!storeQueue[storeWBIdx].committed); + + MemReqPtr req = storeQueue[storeWBIdx].req; + storeQueue[storeWBIdx].committed = true; + +// Fault fault = cpu->translateDataReadReq(req); + req->cmd = Write; + req->completionEvent = NULL; + req->time = curTick; + assert(!req->data); + req->data = new uint8_t[64]; + memcpy(req->data, (uint8_t *)&storeQueue[storeWBIdx].data, req->size); + + DPRINTF(OzoneLSQ, "D-Cache: Writing back store idx:%i PC:%#x " + "to Addr:%#x, data:%#x [sn:%lli]\n", + storeWBIdx,storeQueue[storeWBIdx].inst->readPC(), + req->paddr, *(req->data), + storeQueue[storeWBIdx].inst->seqNum); + +// if (fault != NoFault) { + //What should we do if there is a fault??? + //for now panic +// panic("Page Table Fault!!!!!\n"); +// } + + if (dcacheInterface) { + MemAccessResult result = dcacheInterface->access(req); + + //@todo temp fix for LL/SC (works fine for 1 CPU) + if (req->flags & LOCKED) { + req->result=1; + panic("LL/SC! oh no no support!!!"); + } + + if (isStalled() && + storeQueue[storeWBIdx].inst->seqNum == stallingStoreIsn) { + DPRINTF(OzoneLSQ, "Unstalling, stalling store [sn:%lli] " + "load idx:%i\n", + stallingStoreIsn, stallingLoadIdx); + stalled = false; + stallingStoreIsn = 0; + be->replayMemInst(loadQueue[stallingLoadIdx]); + } + + if (result != MA_HIT && dcacheInterface->doEvents()) { + Event *wb = NULL; +/* + typename IEW::LdWritebackEvent *wb = NULL; + if (req->flags & LOCKED) { + // Stx_C does not generate a system port transaction. + req->result=0; + wb = new typename IEW::LdWritebackEvent(storeQueue[storeWBIdx].inst, + iewStage); + } +*/ + DPRINTF(OzoneLSQ,"D-Cache Write Miss!\n"); + +// DPRINTF(Activity, "Active st accessing mem miss [sn:%lli]\n", +// storeQueue[storeWBIdx].inst->seqNum); + + // Will stores need their own kind of writeback events? + // Do stores even need writeback events? + assert(!req->completionEvent); + req->completionEvent = new + StoreCompletionEvent(storeWBIdx, wb, this); + + lastDcacheStall = curTick; + + _status = DcacheMissStall; + + //mshrSeqNums.push_back(storeQueue[storeWBIdx].inst->seqNum); + + //DPRINTF(OzoneLSQ, "Added MSHR. count = %i\n",mshrSeqNums.size()); + + // Increment stat here or something + } else { + DPRINTF(OzoneLSQ,"D-Cache: Write Hit on idx:%i !\n", + storeWBIdx); + +// DPRINTF(Activity, "Active st accessing mem hit [sn:%lli]\n", +// storeQueue[storeWBIdx].inst->seqNum); + + if (req->flags & LOCKED) { + // Stx_C does not generate a system port transaction. + req->result=1; + typename BackEnd::LdWritebackEvent *wb = + new typename BackEnd::LdWritebackEvent(storeQueue[storeWBIdx].inst, + be); + wb->schedule(curTick); + } + + completeStore(storeWBIdx); + } + + incrStIdx(storeWBIdx); + } else { + panic("Must HAVE DCACHE!!!!!\n"); + } + } + + // Not sure this should set it to 0. + usedPorts = 0; + + assert(stores >= 0 && storesToWB >= 0); +} + +/*template +void +OzoneLSQ::removeMSHR(InstSeqNum seqNum) +{ + list::iterator mshr_it = find(mshrSeqNums.begin(), + mshrSeqNums.end(), + seqNum); + + if (mshr_it != mshrSeqNums.end()) { + mshrSeqNums.erase(mshr_it); + DPRINTF(OzoneLSQ, "Removing MSHR. count = %i\n",mshrSeqNums.size()); + } +}*/ + +template +void +OzoneLSQ::squash(const InstSeqNum &squashed_num) +{ + DPRINTF(OzoneLSQ, "Squashing until [sn:%lli]!" + "(Loads:%i Stores:%i)\n",squashed_num,loads,stores); + + int load_idx = loadTail; + decrLdIdx(load_idx); + + while (loads != 0 && loadQueue[load_idx]->seqNum > squashed_num) { + + // Clear the smart pointer to make sure it is decremented. + DPRINTF(OzoneLSQ,"Load Instruction PC %#x squashed, " + "[sn:%lli]\n", + loadQueue[load_idx]->readPC(), + loadQueue[load_idx]->seqNum); + + if (isStalled() && load_idx == stallingLoadIdx) { + stalled = false; + stallingStoreIsn = 0; + stallingLoadIdx = 0; + } + +// loadQueue[load_idx]->squashed = true; + loadQueue[load_idx] = NULL; + --loads; + + // Inefficient! + loadTail = load_idx; + + decrLdIdx(load_idx); + } + + int store_idx = storeTail; + decrStIdx(store_idx); + + while (stores != 0 && storeQueue[store_idx].inst->seqNum > squashed_num) { + + // Clear the smart pointer to make sure it is decremented. + DPRINTF(OzoneLSQ,"Store Instruction PC %#x squashed, " + "idx:%i [sn:%lli]\n", + storeQueue[store_idx].inst->readPC(), + store_idx, storeQueue[store_idx].inst->seqNum); + + // I don't think this can happen. It should have been cleared by the + // stalling load. + if (isStalled() && + storeQueue[store_idx].inst->seqNum == stallingStoreIsn) { + panic("Is stalled should have been cleared by stalling load!\n"); + stalled = false; + stallingStoreIsn = 0; + } + +// storeQueue[store_idx].inst->squashed = true; + storeQueue[store_idx].inst = NULL; + storeQueue[store_idx].canWB = 0; + + if (storeQueue[store_idx].req) { + assert(!storeQueue[store_idx].req->completionEvent); + } + storeQueue[store_idx].req = NULL; + --stores; + + // Inefficient! + storeTail = store_idx; + + decrStIdx(store_idx); + } +} + +template +void +OzoneLSQ::dumpInsts() +{ + cprintf("Load store queue: Dumping instructions.\n"); + cprintf("Load queue size: %i\n", loads); + cprintf("Load queue: "); + + int load_idx = loadHead; + + while (load_idx != loadTail && loadQueue[load_idx]) { + cprintf("[sn:%lli] %#x ", loadQueue[load_idx]->seqNum, + loadQueue[load_idx]->readPC()); + + incrLdIdx(load_idx); + } + + cprintf("\nStore queue size: %i\n", stores); + cprintf("Store queue: "); + + int store_idx = storeHead; + + while (store_idx != storeTail && storeQueue[store_idx].inst) { + cprintf("[sn:%lli] %#x ", storeQueue[store_idx].inst->seqNum, + storeQueue[store_idx].inst->readPC()); + + incrStIdx(store_idx); + } + + cprintf("\n"); +} + +template +void +OzoneLSQ::completeStore(int store_idx) +{ + assert(storeQueue[store_idx].inst); + storeQueue[store_idx].completed = true; + --storesToWB; + // A bit conservative because a store completion may not free up entries, + // but hopefully avoids two store completions in one cycle from making + // the CPU tick twice. +// cpu->activityThisCycle(); + + if (store_idx == storeHead) { + do { + incrStIdx(storeHead); + + --stores; + } while (storeQueue[storeHead].completed && + storeHead != storeTail); + +// be->updateLSQNextCycle = true; + } + + DPRINTF(OzoneLSQ, "Store head idx:%i\n", storeHead); + + if (isStalled() && + storeQueue[store_idx].inst->seqNum == stallingStoreIsn) { + DPRINTF(OzoneLSQ, "Unstalling, stalling store [sn:%lli] " + "load idx:%i\n", + stallingStoreIsn, stallingLoadIdx); + stalled = false; + stallingStoreIsn = 0; + be->replayMemInst(loadQueue[stallingLoadIdx]); + } +} + +template +inline void +OzoneLSQ::incrStIdx(int &store_idx) +{ + if (++store_idx >= SQEntries) + store_idx = 0; +} + +template +inline void +OzoneLSQ::decrStIdx(int &store_idx) +{ + if (--store_idx < 0) + store_idx += SQEntries; +} + +template +inline void +OzoneLSQ::incrLdIdx(int &load_idx) +{ + if (++load_idx >= LQEntries) + load_idx = 0; +} + +template +inline void +OzoneLSQ::decrLdIdx(int &load_idx) +{ + if (--load_idx < 0) + load_idx += LQEntries; +} diff --git a/src/cpu/ozone/lw_back_end.cc b/src/cpu/ozone/lw_back_end.cc new file mode 100644 index 000000000..8e9a56ef5 --- /dev/null +++ b/src/cpu/ozone/lw_back_end.cc @@ -0,0 +1,5 @@ + +#include "cpu/ozone/lw_back_end_impl.hh" +#include "cpu/ozone/ozone_impl.hh" + +template class LWBackEnd; diff --git a/src/cpu/ozone/lw_back_end.hh b/src/cpu/ozone/lw_back_end.hh new file mode 100644 index 000000000..021381dd0 --- /dev/null +++ b/src/cpu/ozone/lw_back_end.hh @@ -0,0 +1,469 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_OZONE_LW_BACK_END_HH__ +#define __CPU_OZONE_LW_BACK_END_HH__ + +#include +#include +#include +#include + +#include "arch/faults.hh" +#include "base/timebuf.hh" +#include "cpu/inst_seq.hh" +#include "cpu/ozone/rename_table.hh" +#include "cpu/ozone/thread_state.hh" +#include "mem/request.hh" +#include "sim/eventq.hh" + +template +class Checker; +class ExecContext; + +template +class OzoneThreadState; + +template +class LWBackEnd +{ + public: + typedef OzoneThreadState Thread; + + typedef typename Impl::Params Params; + typedef typename Impl::DynInst DynInst; + typedef typename Impl::DynInstPtr DynInstPtr; + typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::FrontEnd FrontEnd; + typedef typename Impl::FullCPU::CommStruct CommStruct; + + struct SizeStruct { + int size; + }; + + typedef SizeStruct DispatchToIssue; + typedef SizeStruct IssueToExec; + typedef SizeStruct ExecToCommit; + typedef SizeStruct Writeback; + + TimeBuffer d2i; + typename TimeBuffer::wire instsToDispatch; + TimeBuffer i2e; + typename TimeBuffer::wire instsToExecute; + TimeBuffer e2c; + TimeBuffer numInstsToWB; + + TimeBuffer *comm; + typename TimeBuffer::wire toIEW; + typename TimeBuffer::wire fromCommit; + + class TrapEvent : public Event { + private: + LWBackEnd *be; + + public: + TrapEvent(LWBackEnd *_be); + + void process(); + const char *description(); + }; + + /** LdWriteback event for a load completion. */ + class LdWritebackEvent : public Event { + private: + /** Instruction that is writing back data to the register file. */ + DynInstPtr inst; + /** Pointer to IEW stage. */ + LWBackEnd *be; + + bool dcacheMiss; + + public: + /** Constructs a load writeback event. */ + LdWritebackEvent(DynInstPtr &_inst, LWBackEnd *be); + + /** Processes writeback event. */ + virtual void process(); + /** Returns the description of the writeback event. */ + virtual const char *description(); + + void setDcacheMiss() { dcacheMiss = true; be->addDcacheMiss(inst); } + }; + + LWBackEnd(Params *params); + + std::string name() const; + + void regStats(); + + void setCPU(FullCPU *cpu_ptr); + + void setFrontEnd(FrontEnd *front_end_ptr) + { frontEnd = front_end_ptr; } + + void setXC(ExecContext *xc_ptr) + { xc = xc_ptr; } + + void setThreadState(Thread *thread_ptr) + { thread = thread_ptr; } + + void setCommBuffer(TimeBuffer *_comm); + + void tick(); + void squash(); + void generateXCEvent() { xcSquash = true; } + void squashFromXC(); + void squashFromTrap(); + void checkInterrupts(); + bool trapSquash; + bool xcSquash; + + template + Fault read(RequestPtr req, T &data, int load_idx); + + template + Fault write(RequestPtr req, T &data, int store_idx); + + Addr readCommitPC() { return commitPC; } + + Addr commitPC; + + Tick lastCommitCycle; + + bool robEmpty() { return instList.empty(); } + + bool isFull() { return numInsts >= numROBEntries; } + bool isBlocked() { return status == Blocked || dispatchStatus == Blocked; } + + void fetchFault(Fault &fault); + + int wakeDependents(DynInstPtr &inst, bool memory_deps = false); + + /** Tells memory dependence unit that a memory instruction needs to be + * rescheduled. It will re-execute once replayMemInst() is called. + */ + void rescheduleMemInst(DynInstPtr &inst); + + /** Re-executes all rescheduled memory instructions. */ + void replayMemInst(DynInstPtr &inst); + + /** Completes memory instruction. */ + void completeMemInst(DynInstPtr &inst) { } + + void addDcacheMiss(DynInstPtr &inst) + { + waitingMemOps.insert(inst->seqNum); + numWaitingMemOps++; + DPRINTF(BE, "Adding a Dcache miss mem op [sn:%lli], total %i\n", + inst->seqNum, numWaitingMemOps); + } + + void removeDcacheMiss(DynInstPtr &inst) + { + assert(waitingMemOps.find(inst->seqNum) != waitingMemOps.end()); + waitingMemOps.erase(inst->seqNum); + numWaitingMemOps--; + DPRINTF(BE, "Removing a Dcache miss mem op [sn:%lli], total %i\n", + inst->seqNum, numWaitingMemOps); + } + + void addWaitingMemOp(DynInstPtr &inst) + { + waitingMemOps.insert(inst->seqNum); + numWaitingMemOps++; + DPRINTF(BE, "Adding a waiting mem op [sn:%lli], total %i\n", + inst->seqNum, numWaitingMemOps); + } + + void removeWaitingMemOp(DynInstPtr &inst) + { + assert(waitingMemOps.find(inst->seqNum) != waitingMemOps.end()); + waitingMemOps.erase(inst->seqNum); + numWaitingMemOps--; + DPRINTF(BE, "Removing a waiting mem op [sn:%lli], total %i\n", + inst->seqNum, numWaitingMemOps); + } + + void instToCommit(DynInstPtr &inst); + + void switchOut(); + void doSwitchOut(); + void takeOverFrom(ExecContext *old_xc = NULL); + + bool isSwitchedOut() { return switchedOut; } + + private: + void generateTrapEvent(Tick latency = 0); + void handleFault(Fault &fault, Tick latency = 0); + void updateStructures(); + void dispatchInsts(); + void dispatchStall(); + void checkDispatchStatus(); + void executeInsts(); + void commitInsts(); + void addToLSQ(DynInstPtr &inst); + void writebackInsts(); + bool commitInst(int inst_num); + void squash(const InstSeqNum &sn); + void squashDueToBranch(DynInstPtr &inst); + void squashDueToMemViolation(DynInstPtr &inst); + void squashDueToMemBlocked(DynInstPtr &inst); + void updateExeInstStats(DynInstPtr &inst); + void updateComInstStats(DynInstPtr &inst); + + public: + FullCPU *cpu; + + FrontEnd *frontEnd; + + ExecContext *xc; + + Thread *thread; + + enum Status { + Running, + Idle, + DcacheMissStall, + DcacheMissComplete, + Blocked, + TrapPending + }; + + Status status; + + Status dispatchStatus; + + Status commitStatus; + + Counter funcExeInst; + + private: + typedef typename Impl::LdstQueue LdstQueue; + + LdstQueue LSQ; + public: + RenameTable commitRenameTable; + + RenameTable renameTable; + private: + class DCacheCompletionEvent : public Event + { + private: + LWBackEnd *be; + + public: + DCacheCompletionEvent(LWBackEnd *_be); + + virtual void process(); + virtual const char *description(); + }; + + friend class DCacheCompletionEvent; + + DCacheCompletionEvent cacheCompletionEvent; + + MemInterface *dcacheInterface; + + // General back end width. Used if the more specific isn't given. + int width; + + // Dispatch width. + int dispatchWidth; + int numDispatchEntries; + int dispatchSize; + + int waitingInsts; + + int issueWidth; + + // Writeback width + int wbWidth; + + // Commit width + int commitWidth; + + /** Index into queue of instructions being written back. */ + unsigned wbNumInst; + + /** Cycle number within the queue of instructions being written + * back. Used in case there are too many instructions writing + * back at the current cycle and writesbacks need to be scheduled + * for the future. See comments in instToCommit(). + */ + unsigned wbCycle; + + int numROBEntries; + int numInsts; + + std::set waitingMemOps; + typedef std::set::iterator MemIt; + int numWaitingMemOps; + unsigned maxOutstandingMemOps; + + bool squashPending; + InstSeqNum squashSeqNum; + Addr squashNextPC; + + Fault faultFromFetch; + bool fetchHasFault; + + bool switchedOut; + bool switchPending; + + DynInstPtr memBarrier; + + private: + struct pqCompare { + bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const + { + return lhs->seqNum > rhs->seqNum; + } + }; + + typedef typename std::priority_queue, pqCompare> ReadyInstQueue; + ReadyInstQueue exeList; + + typedef typename std::list::iterator InstListIt; + + std::list instList; + std::list waitingList; + std::list replayList; + std::list writeback; + + int latency; + + int squashLatency; + + bool exactFullStall; + + // number of cycles stalled for D-cache misses +/* Stats::Scalar<> dcacheStallCycles; + Counter lastDcacheStall; +*/ + Stats::Vector<> rob_cap_events; + Stats::Vector<> rob_cap_inst_count; + Stats::Vector<> iq_cap_events; + Stats::Vector<> iq_cap_inst_count; + // total number of instructions executed + Stats::Vector<> exe_inst; + Stats::Vector<> exe_swp; + Stats::Vector<> exe_nop; + Stats::Vector<> exe_refs; + Stats::Vector<> exe_loads; + Stats::Vector<> exe_branches; + + Stats::Vector<> issued_ops; + + // total number of loads forwaded from LSQ stores + Stats::Vector<> lsq_forw_loads; + + // total number of loads ignored due to invalid addresses + Stats::Vector<> inv_addr_loads; + + // total number of software prefetches ignored due to invalid addresses + Stats::Vector<> inv_addr_swpfs; + // ready loads blocked due to memory disambiguation + Stats::Vector<> lsq_blocked_loads; + + Stats::Scalar<> lsqInversion; + + Stats::Vector<> n_issued_dist; + Stats::VectorDistribution<> issue_delay_dist; + + Stats::VectorDistribution<> queue_res_dist; +/* + Stats::Vector<> stat_fu_busy; + Stats::Vector2d<> stat_fuBusy; + Stats::Vector<> dist_unissued; + Stats::Vector2d<> stat_issued_inst_type; + + Stats::Formula misspec_cnt; + Stats::Formula misspec_ipc; + Stats::Formula issue_rate; + Stats::Formula issue_stores; + Stats::Formula issue_op_rate; + Stats::Formula fu_busy_rate; + Stats::Formula commit_stores; + Stats::Formula commit_ipc; + Stats::Formula commit_ipb; + Stats::Formula lsq_inv_rate; +*/ + Stats::Vector<> writeback_count; + Stats::Vector<> producer_inst; + Stats::Vector<> consumer_inst; + Stats::Vector<> wb_penalized; + + Stats::Formula wb_rate; + Stats::Formula wb_fanout; + Stats::Formula wb_penalized_rate; + + // total number of instructions committed + Stats::Vector<> stat_com_inst; + Stats::Vector<> stat_com_swp; + Stats::Vector<> stat_com_refs; + Stats::Vector<> stat_com_loads; + Stats::Vector<> stat_com_membars; + Stats::Vector<> stat_com_branches; + + Stats::Distribution<> n_committed_dist; + + Stats::Scalar<> commit_eligible_samples; + Stats::Vector<> commit_eligible; + + Stats::Vector<> squashedInsts; + Stats::Vector<> ROBSquashedInsts; + + Stats::Scalar<> ROB_fcount; + Stats::Formula ROB_full_rate; + + Stats::Vector<> ROB_count; // cumulative ROB occupancy + Stats::Formula ROB_occ_rate; + Stats::VectorDistribution<> ROB_occ_dist; + public: + void dumpInsts(); + + Checker *checker; +}; + +template +template +Fault +LWBackEnd::read(RequestPtr req, T &data, int load_idx) +{ + return LSQ.read(req, data, load_idx); +} + +template +template +Fault +LWBackEnd::write(RequestPtr req, T &data, int store_idx) +{ + return LSQ.write(req, data, store_idx); +} + +#endif // __CPU_OZONE_LW_BACK_END_HH__ diff --git a/src/cpu/ozone/lw_back_end_impl.hh b/src/cpu/ozone/lw_back_end_impl.hh new file mode 100644 index 000000000..41b4ea24b --- /dev/null +++ b/src/cpu/ozone/lw_back_end_impl.hh @@ -0,0 +1,1693 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cpu/checker/cpu.hh" +#include "cpu/ozone/lw_back_end.hh" +#include "encumbered/cpu/full/op_class.hh" + +template +void +LWBackEnd::generateTrapEvent(Tick latency) +{ + DPRINTF(BE, "Generating trap event\n"); + + TrapEvent *trap = new TrapEvent(this); + + trap->schedule(curTick + cpu->cycles(latency)); + + thread->trapPending = true; +} + +template +int +LWBackEnd::wakeDependents(DynInstPtr &inst, bool memory_deps) +{ + assert(!inst->isSquashed()); + std::vector &dependents = memory_deps ? inst->getMemDeps() : + inst->getDependents(); + int num_outputs = dependents.size(); + + DPRINTF(BE, "Waking instruction [sn:%lli] dependents in IQ\n", inst->seqNum); + + for (int i = 0; i < num_outputs; i++) { + DynInstPtr dep_inst = dependents[i]; + if (!memory_deps) { + dep_inst->markSrcRegReady(); + } else { + if (!dep_inst->isSquashed()) + dep_inst->markMemInstReady(inst.get()); + } + + DPRINTF(BE, "Marking source reg ready [sn:%lli] in IQ\n", dep_inst->seqNum); + + if (dep_inst->readyToIssue() && dep_inst->isInROB() && + !dep_inst->isNonSpeculative() && !dep_inst->isStoreConditional() && + dep_inst->memDepReady() && !dep_inst->isMemBarrier() && + !dep_inst->isWriteBarrier()) { + DPRINTF(BE, "Adding instruction to exeList [sn:%lli]\n", + dep_inst->seqNum); + exeList.push(dep_inst); + if (dep_inst->iqItValid) { + DPRINTF(BE, "Removing instruction from waiting list\n"); + waitingList.erase(dep_inst->iqIt); + waitingInsts--; + dep_inst->iqItValid = false; + assert(waitingInsts >= 0); + } + if (dep_inst->isMemRef()) { + removeWaitingMemOp(dep_inst); + DPRINTF(BE, "Issued a waiting mem op [sn:%lli]\n", + dep_inst->seqNum); + } + } + } + return num_outputs; +} + +template +void +LWBackEnd::rescheduleMemInst(DynInstPtr &inst) +{ + replayList.push_front(inst); +} + +template +LWBackEnd::TrapEvent::TrapEvent(LWBackEnd *_be) + : Event(&mainEventQueue, CPU_Tick_Pri), be(_be) +{ + this->setFlags(Event::AutoDelete); +} + +template +void +LWBackEnd::TrapEvent::process() +{ + be->trapSquash = true; +} + +template +const char * +LWBackEnd::TrapEvent::description() +{ + return "Trap event"; +} + +template +void +LWBackEnd::replayMemInst(DynInstPtr &inst) +{ + bool found_inst = false; + while (!replayList.empty()) { + exeList.push(replayList.front()); + if (replayList.front() == inst) { + found_inst = true; + } + replayList.pop_front(); + } + assert(found_inst); +} + +template +LWBackEnd::LdWritebackEvent::LdWritebackEvent(DynInstPtr &_inst, + LWBackEnd *_be) + : Event(&mainEventQueue), inst(_inst), be(_be), dcacheMiss(false) +{ + this->setFlags(Event::AutoDelete); +} + +template +void +LWBackEnd::LdWritebackEvent::process() +{ + DPRINTF(BE, "Load writeback event [sn:%lli]\n", inst->seqNum); +// DPRINTF(Activity, "Activity: Ld Writeback event [sn:%lli]\n", inst->seqNum); + + //iewStage->ldstQueue.removeMSHR(inst->threadNumber,inst->seqNum); + +// iewStage->wakeCPU(); + + if (be->isSwitchedOut()) + return; + + if (dcacheMiss) { + be->removeDcacheMiss(inst); + } + + if (inst->isSquashed()) { + inst = NULL; + return; + } + + if (!inst->isExecuted()) { + inst->setExecuted(); + + // Execute again to copy data to proper place. + inst->completeAcc(); + } + + // Need to insert instruction into queue to commit + be->instToCommit(inst); + + //wroteToTimeBuffer = true; +// iewStage->activityThisCycle(); + + inst = NULL; +} + +template +const char * +LWBackEnd::LdWritebackEvent::description() +{ + return "Load writeback event"; +} + + +template +LWBackEnd::DCacheCompletionEvent::DCacheCompletionEvent(LWBackEnd *_be) + : Event(&mainEventQueue, CPU_Tick_Pri), be(_be) +{ +} + +template +void +LWBackEnd::DCacheCompletionEvent::process() +{ +} + +template +const char * +LWBackEnd::DCacheCompletionEvent::description() +{ + return "Cache completion event"; +} + +template +LWBackEnd::LWBackEnd(Params *params) + : d2i(5, 5), i2e(5, 5), e2c(5, 5), numInstsToWB(5, 5), + trapSquash(false), xcSquash(false), cacheCompletionEvent(this), + dcacheInterface(params->dcacheInterface), width(params->backEndWidth), + exactFullStall(true) +{ + numROBEntries = params->numROBEntries; + numInsts = 0; + numDispatchEntries = 32; + maxOutstandingMemOps = params->maxOutstandingMemOps; + numWaitingMemOps = 0; + waitingInsts = 0; + switchedOut = false; + switchPending = false; + + LSQ.setBE(this); + + // Setup IQ and LSQ with their parameters here. + instsToDispatch = d2i.getWire(-1); + + instsToExecute = i2e.getWire(-1); + + dispatchWidth = params->dispatchWidth ? params->dispatchWidth : width; + issueWidth = params->issueWidth ? params->issueWidth : width; + wbWidth = params->wbWidth ? params->wbWidth : width; + commitWidth = params->commitWidth ? params->commitWidth : width; + + LSQ.init(params, params->LQEntries, params->SQEntries, 0); + + dispatchStatus = Running; +} + +template +std::string +LWBackEnd::name() const +{ + return cpu->name() + ".backend"; +} + +template +void +LWBackEnd::regStats() +{ + using namespace Stats; + rob_cap_events + .init(cpu->number_of_threads) + .name(name() + ".ROB:cap_events") + .desc("number of cycles where ROB cap was active") + .flags(total) + ; + + rob_cap_inst_count + .init(cpu->number_of_threads) + .name(name() + ".ROB:cap_inst") + .desc("number of instructions held up by ROB cap") + .flags(total) + ; + + iq_cap_events + .init(cpu->number_of_threads) + .name(name() +".IQ:cap_events" ) + .desc("number of cycles where IQ cap was active") + .flags(total) + ; + + iq_cap_inst_count + .init(cpu->number_of_threads) + .name(name() + ".IQ:cap_inst") + .desc("number of instructions held up by IQ cap") + .flags(total) + ; + + + exe_inst + .init(cpu->number_of_threads) + .name(name() + ".ISSUE:count") + .desc("number of insts issued") + .flags(total) + ; + + exe_swp + .init(cpu->number_of_threads) + .name(name() + ".ISSUE:swp") + .desc("number of swp insts issued") + .flags(total) + ; + + exe_nop + .init(cpu->number_of_threads) + .name(name() + ".ISSUE:nop") + .desc("number of nop insts issued") + .flags(total) + ; + + exe_refs + .init(cpu->number_of_threads) + .name(name() + ".ISSUE:refs") + .desc("number of memory reference insts issued") + .flags(total) + ; + + exe_loads + .init(cpu->number_of_threads) + .name(name() + ".ISSUE:loads") + .desc("number of load insts issued") + .flags(total) + ; + + exe_branches + .init(cpu->number_of_threads) + .name(name() + ".ISSUE:branches") + .desc("Number of branches issued") + .flags(total) + ; + + issued_ops + .init(cpu->number_of_threads) + .name(name() + ".ISSUE:op_count") + .desc("number of insts issued") + .flags(total) + ; + +/* + for (int i=0; inumber_of_threads) + .name(name() + ".LSQ:forw_loads") + .desc("number of loads forwarded via LSQ") + .flags(total) + ; + + inv_addr_loads + .init(cpu->number_of_threads) + .name(name() + ".ISSUE:addr_loads") + .desc("number of invalid-address loads") + .flags(total) + ; + + inv_addr_swpfs + .init(cpu->number_of_threads) + .name(name() + ".ISSUE:addr_swpfs") + .desc("number of invalid-address SW prefetches") + .flags(total) + ; + + lsq_blocked_loads + .init(cpu->number_of_threads) + .name(name() + ".LSQ:blocked_loads") + .desc("number of ready loads not issued due to memory disambiguation") + .flags(total) + ; + + lsqInversion + .name(name() + ".ISSUE:lsq_invert") + .desc("Number of times LSQ instruction issued early") + ; + + n_issued_dist + .init(issueWidth + 1) + .name(name() + ".ISSUE:issued_per_cycle") + .desc("Number of insts issued each cycle") + .flags(total | pdf | dist) + ; + issue_delay_dist + .init(Num_OpClasses,0,99,2) + .name(name() + ".ISSUE:") + .desc("cycles from operands ready to issue") + .flags(pdf | cdf) + ; + + queue_res_dist + .init(Num_OpClasses, 0, 99, 2) + .name(name() + ".IQ:residence:") + .desc("cycles from dispatch to issue") + .flags(total | pdf | cdf ) + ; + for (int i = 0; i < Num_OpClasses; ++i) { + queue_res_dist.subname(i, opClassStrings[i]); + } + + writeback_count + .init(cpu->number_of_threads) + .name(name() + ".WB:count") + .desc("cumulative count of insts written-back") + .flags(total) + ; + + producer_inst + .init(cpu->number_of_threads) + .name(name() + ".WB:producers") + .desc("num instructions producing a value") + .flags(total) + ; + + consumer_inst + .init(cpu->number_of_threads) + .name(name() + ".WB:consumers") + .desc("num instructions consuming a value") + .flags(total) + ; + + wb_penalized + .init(cpu->number_of_threads) + .name(name() + ".WB:penalized") + .desc("number of instrctions required to write to 'other' IQ") + .flags(total) + ; + + + wb_penalized_rate + .name(name() + ".WB:penalized_rate") + .desc ("fraction of instructions written-back that wrote to 'other' IQ") + .flags(total) + ; + + wb_penalized_rate = wb_penalized / writeback_count; + + wb_fanout + .name(name() + ".WB:fanout") + .desc("average fanout of values written-back") + .flags(total) + ; + + wb_fanout = producer_inst / consumer_inst; + + wb_rate + .name(name() + ".WB:rate") + .desc("insts written-back per cycle") + .flags(total) + ; + wb_rate = writeback_count / cpu->numCycles; + + stat_com_inst + .init(cpu->number_of_threads) + .name(name() + ".COM:count") + .desc("Number of instructions committed") + .flags(total) + ; + + stat_com_swp + .init(cpu->number_of_threads) + .name(name() + ".COM:swp_count") + .desc("Number of s/w prefetches committed") + .flags(total) + ; + + stat_com_refs + .init(cpu->number_of_threads) + .name(name() + ".COM:refs") + .desc("Number of memory references committed") + .flags(total) + ; + + stat_com_loads + .init(cpu->number_of_threads) + .name(name() + ".COM:loads") + .desc("Number of loads committed") + .flags(total) + ; + + stat_com_membars + .init(cpu->number_of_threads) + .name(name() + ".COM:membars") + .desc("Number of memory barriers committed") + .flags(total) + ; + + stat_com_branches + .init(cpu->number_of_threads) + .name(name() + ".COM:branches") + .desc("Number of branches committed") + .flags(total) + ; + n_committed_dist + .init(0,commitWidth,1) + .name(name() + ".COM:committed_per_cycle") + .desc("Number of insts commited each cycle") + .flags(pdf) + ; + + // + // Commit-Eligible instructions... + // + // -> The number of instructions eligible to commit in those + // cycles where we reached our commit BW limit (less the number + // actually committed) + // + // -> The average value is computed over ALL CYCLES... not just + // the BW limited cycles + // + // -> The standard deviation is computed only over cycles where + // we reached the BW limit + // + commit_eligible + .init(cpu->number_of_threads) + .name(name() + ".COM:bw_limited") + .desc("number of insts not committed due to BW limits") + .flags(total) + ; + + commit_eligible_samples + .name(name() + ".COM:bw_lim_events") + .desc("number cycles where commit BW limit reached") + ; + + squashedInsts + .init(cpu->number_of_threads) + .name(name() + ".COM:squashed_insts") + .desc("Number of instructions removed from inst list") + ; + + ROBSquashedInsts + .init(cpu->number_of_threads) + .name(name() + ".COM:rob_squashed_insts") + .desc("Number of instructions removed from inst list when they reached the head of the ROB") + ; + + ROB_fcount + .name(name() + ".ROB:full_count") + .desc("number of cycles where ROB was full") + ; + + ROB_count + .init(cpu->number_of_threads) + .name(name() + ".ROB:occupancy") + .desc(name() + ".ROB occupancy (cumulative)") + .flags(total) + ; + + ROB_full_rate + .name(name() + ".ROB:full_rate") + .desc("ROB full per cycle") + ; + ROB_full_rate = ROB_fcount / cpu->numCycles; + + ROB_occ_rate + .name(name() + ".ROB:occ_rate") + .desc("ROB occupancy rate") + .flags(total) + ; + ROB_occ_rate = ROB_count / cpu->numCycles; + + ROB_occ_dist + .init(cpu->number_of_threads,0,numROBEntries,2) + .name(name() + ".ROB:occ_dist") + .desc("ROB Occupancy per cycle") + .flags(total | cdf) + ; +} + +template +void +LWBackEnd::setCPU(FullCPU *cpu_ptr) +{ + cpu = cpu_ptr; + LSQ.setCPU(cpu_ptr); + checker = cpu->checker; +} + +template +void +LWBackEnd::setCommBuffer(TimeBuffer *_comm) +{ + comm = _comm; + toIEW = comm->getWire(0); + fromCommit = comm->getWire(-1); +} + +#if FULL_SYSTEM +template +void +LWBackEnd::checkInterrupts() +{ + if (cpu->checkInterrupts && + cpu->check_interrupts() && + !cpu->inPalMode(thread->readPC()) && + !trapSquash && + !xcSquash) { + frontEnd->interruptPending = true; + if (robEmpty() && !LSQ.hasStoresToWB()) { + // Will need to squash all instructions currently in flight and have + // the interrupt handler restart at the last non-committed inst. + // Most of that can be handled through the trap() function. The + // processInterrupts() function really just checks for interrupts + // and then calls trap() if there is an interrupt present. + + // Not sure which thread should be the one to interrupt. For now + // always do thread 0. + assert(!thread->inSyscall); + thread->inSyscall = true; + + // CPU will handle implementation of the interrupt. + cpu->processInterrupts(); + + // Now squash or record that I need to squash this cycle. + commitStatus = TrapPending; + + // Exit state update mode to avoid accidental updating. + thread->inSyscall = false; + + // Generate trap squash event. + generateTrapEvent(); + + DPRINTF(BE, "Interrupt detected.\n"); + } else { + DPRINTF(BE, "Interrupt must wait for ROB to drain.\n"); + } + } +} + +template +void +LWBackEnd::handleFault(Fault &fault, Tick latency) +{ + DPRINTF(BE, "Handling fault!\n"); + + assert(!thread->inSyscall); + + thread->inSyscall = true; + + // Consider holding onto the trap and waiting until the trap event + // happens for this to be executed. + fault->invoke(thread->getXCProxy()); + + // Exit state update mode to avoid accidental updating. + thread->inSyscall = false; + + commitStatus = TrapPending; + + // Generate trap squash event. + generateTrapEvent(latency); +} +#endif + +template +void +LWBackEnd::tick() +{ + DPRINTF(BE, "Ticking back end\n"); + + if (switchPending && robEmpty() && !LSQ.hasStoresToWB()) { + cpu->signalSwitched(); + return; + } + + ROB_count[0]+= numInsts; + + wbCycle = 0; + + // Read in any done instruction information and update the IQ or LSQ. + updateStructures(); + +#if FULL_SYSTEM + checkInterrupts(); + + if (trapSquash) { + assert(!xcSquash); + squashFromTrap(); + } else if (xcSquash) { + squashFromXC(); + } +#endif + + if (dispatchStatus != Blocked) { + dispatchInsts(); + } else { + checkDispatchStatus(); + } + + if (commitStatus != TrapPending) { + executeInsts(); + + commitInsts(); + } + + LSQ.writebackStores(); + + DPRINTF(BE, "Waiting insts: %i, mem ops: %i, ROB entries in use: %i, " + "LSQ loads: %i, LSQ stores: %i\n", + waitingInsts, numWaitingMemOps, numInsts, + LSQ.numLoads(), LSQ.numStores()); + +#ifdef DEBUG + assert(numInsts == instList.size()); + assert(waitingInsts == waitingList.size()); + assert(numWaitingMemOps == waitingMemOps.size()); + assert(!switchedOut); +#endif +} + +template +void +LWBackEnd::updateStructures() +{ + if (fromCommit->doneSeqNum) { + LSQ.commitLoads(fromCommit->doneSeqNum); + LSQ.commitStores(fromCommit->doneSeqNum); + } + + if (fromCommit->nonSpecSeqNum) { + if (fromCommit->uncached) { +// LSQ.executeLoad(fromCommit->lqIdx); + } else { +// IQ.scheduleNonSpec( +// fromCommit->nonSpecSeqNum); + } + } +} + +template +void +LWBackEnd::addToLSQ(DynInstPtr &inst) +{ + // Do anything LSQ specific here? + LSQ.insert(inst); +} + +template +void +LWBackEnd::dispatchInsts() +{ + DPRINTF(BE, "Trying to dispatch instructions.\n"); + + while (numInsts < numROBEntries && + numWaitingMemOps < maxOutstandingMemOps) { + // Get instruction from front of time buffer + DynInstPtr inst = frontEnd->getInst(); + if (!inst) { + break; + } else if (inst->isSquashed()) { + continue; + } + + ++numInsts; + instList.push_front(inst); + + inst->setInROB(); + + DPRINTF(BE, "Dispatching instruction [sn:%lli] PC:%#x\n", + inst->seqNum, inst->readPC()); + + for (int i = 0; i < inst->numDestRegs(); ++i) + renameTable[inst->destRegIdx(i)] = inst; + + if (inst->isMemBarrier() || inst->isWriteBarrier()) { + if (memBarrier) { + DPRINTF(BE, "Instruction [sn:%lli] is waiting on " + "barrier [sn:%lli].\n", + inst->seqNum, memBarrier->seqNum); + memBarrier->addMemDependent(inst); + inst->addSrcMemInst(memBarrier); + } + memBarrier = inst; + inst->setCanCommit(); + } else if (inst->readyToIssue() && + !inst->isNonSpeculative() && + !inst->isStoreConditional()) { + if (inst->isMemRef()) { + + LSQ.insert(inst); + if (memBarrier) { + DPRINTF(BE, "Instruction [sn:%lli] is waiting on " + "barrier [sn:%lli].\n", + inst->seqNum, memBarrier->seqNum); + memBarrier->addMemDependent(inst); + inst->addSrcMemInst(memBarrier); + addWaitingMemOp(inst); + + waitingList.push_front(inst); + inst->iqIt = waitingList.begin(); + inst->iqItValid = true; + waitingInsts++; + } else { + DPRINTF(BE, "Instruction [sn:%lli] ready, addding to " + "exeList.\n", + inst->seqNum); + exeList.push(inst); + } + } else if (inst->isNop()) { + DPRINTF(BE, "Nop encountered [sn:%lli], skipping exeList.\n", + inst->seqNum); + inst->setIssued(); + inst->setExecuted(); + inst->setCanCommit(); + } else { + DPRINTF(BE, "Instruction [sn:%lli] ready, addding to " + "exeList.\n", + inst->seqNum); + exeList.push(inst); + } + } else { + if (inst->isNonSpeculative() || inst->isStoreConditional()) { + inst->setCanCommit(); + DPRINTF(BE, "Adding non speculative instruction\n"); + } + + if (inst->isMemRef()) { + addWaitingMemOp(inst); + LSQ.insert(inst); + if (memBarrier) { + memBarrier->addMemDependent(inst); + inst->addSrcMemInst(memBarrier); + + DPRINTF(BE, "Instruction [sn:%lli] is waiting on " + "barrier [sn:%lli].\n", + inst->seqNum, memBarrier->seqNum); + } + } + + DPRINTF(BE, "Instruction [sn:%lli] not ready, addding to " + "waitingList.\n", + inst->seqNum); + waitingList.push_front(inst); + inst->iqIt = waitingList.begin(); + inst->iqItValid = true; + waitingInsts++; + } + } + + // Check if IQ or LSQ is full. If so we'll need to break and stop + // removing instructions. Also update the number of insts to remove + // from the queue. Check here if we don't care about exact stall + // conditions. +/* + bool stall = false; + if (IQ.isFull()) { + DPRINTF(BE, "IQ is full!\n"); + stall = true; + } else if (LSQ.isFull()) { + DPRINTF(BE, "LSQ is full!\n"); + stall = true; + } else if (isFull()) { + DPRINTF(BE, "ROB is full!\n"); + stall = true; + ROB_fcount++; + } + if (stall) { + d2i.advance(); + dispatchStall(); + return; + } +*/ +} + +template +void +LWBackEnd::dispatchStall() +{ + dispatchStatus = Blocked; + if (!cpu->decoupledFrontEnd) { + // Tell front end to stall here through a timebuffer, or just tell + // it directly. + } +} + +template +void +LWBackEnd::checkDispatchStatus() +{ + DPRINTF(BE, "Checking dispatch status\n"); + assert(dispatchStatus == Blocked); + if (!LSQ.isFull() && !isFull()) { + DPRINTF(BE, "Dispatch no longer blocked\n"); + dispatchStatus = Running; + dispatchInsts(); + } +} + +template +void +LWBackEnd::executeInsts() +{ + DPRINTF(BE, "Trying to execute instructions\n"); + + int num_executed = 0; + while (!exeList.empty() && num_executed < issueWidth) { + DynInstPtr inst = exeList.top(); + + DPRINTF(BE, "Executing inst [sn:%lli] PC: %#x\n", + inst->seqNum, inst->readPC()); + + // Check if the instruction is squashed; if so then skip it + // and don't count it towards the FU usage. + if (inst->isSquashed()) { + DPRINTF(BE, "Execute: Instruction was squashed.\n"); + + // Not sure how to handle this plus the method of sending # of + // instructions to use. Probably will just have to count it + // towards the bandwidth usage, but not the FU usage. + ++num_executed; + + // Consider this instruction executed so that commit can go + // ahead and retire the instruction. + inst->setExecuted(); + + // Not sure if I should set this here or just let commit try to + // commit any squashed instructions. I like the latter a bit more. + inst->setCanCommit(); + +// ++iewExecSquashedInsts; + exeList.pop(); + + continue; + } + + Fault fault = NoFault; + + // Execute instruction. + // Note that if the instruction faults, it will be handled + // at the commit stage. + if (inst->isMemRef() && + (!inst->isDataPrefetch() && !inst->isInstPrefetch())) { + if (dcacheInterface->isBlocked()) { + // Should I move the instruction aside? + DPRINTF(BE, "Execute: dcache is blocked\n"); + break; + } + DPRINTF(BE, "Execute: Initiating access for memory " + "reference.\n"); + + if (inst->isLoad()) { + LSQ.executeLoad(inst); + } else if (inst->isStore()) { + LSQ.executeStore(inst); + if (inst->req && !(inst->req->flags & LOCKED)) { + inst->setExecuted(); + + instToCommit(inst); + } + } else { + panic("Unknown mem type!"); + } + } else { + inst->execute(); + + inst->setExecuted(); + + instToCommit(inst); + } + + updateExeInstStats(inst); + + ++funcExeInst; + ++num_executed; + + exeList.pop(); + + if (inst->mispredicted()) { + squashDueToBranch(inst); + break; + } else if (LSQ.violation()) { + // Get the DynInst that caused the violation. Note that this + // clears the violation signal. + DynInstPtr violator; + violator = LSQ.getMemDepViolator(); + + DPRINTF(BE, "LDSTQ detected a violation. Violator PC: " + "%#x, inst PC: %#x. Addr is: %#x.\n", + violator->readPC(), inst->readPC(), inst->physEffAddr); + + // Squash. + squashDueToMemViolation(inst); + } + } + + issued_ops[0]+= num_executed; + n_issued_dist[num_executed]++; +} + +template +void +LWBackEnd::instToCommit(DynInstPtr &inst) +{ + + DPRINTF(BE, "Sending instructions to commit [sn:%lli] PC %#x.\n", + inst->seqNum, inst->readPC()); + + if (!inst->isSquashed()) { + DPRINTF(BE, "Writing back instruction [sn:%lli] PC %#x.\n", + inst->seqNum, inst->readPC()); + + inst->setCanCommit(); + + if (inst->isExecuted()) { + inst->setResultReady(); + int dependents = wakeDependents(inst); + if (dependents) { + producer_inst[0]++; + consumer_inst[0]+= dependents; + } + } + } + + writeback_count[0]++; +} +#if 0 +template +void +LWBackEnd::writebackInsts() +{ + int wb_width = wbWidth; + // Using this method I'm not quite sure how to prevent an + // instruction from waking its own dependents multiple times, + // without the guarantee that commit always has enough bandwidth + // to accept all instructions being written back. This guarantee + // might not be too unrealistic. + InstListIt wb_inst_it = writeback.begin(); + InstListIt wb_end_it = writeback.end(); + int inst_num = 0; + int consumer_insts = 0; + + for (; inst_num < wb_width && + wb_inst_it != wb_end_it; inst_num++) { + DynInstPtr inst = (*wb_inst_it); + + // Some instructions will be sent to commit without having + // executed because they need commit to handle them. + // E.g. Uncached loads have not actually executed when they + // are first sent to commit. Instead commit must tell the LSQ + // when it's ready to execute the uncached load. + if (!inst->isSquashed()) { + DPRINTF(BE, "Writing back instruction [sn:%lli] PC %#x.\n", + inst->seqNum, inst->readPC()); + + inst->setCanCommit(); + inst->setResultReady(); + + if (inst->isExecuted()) { + int dependents = wakeDependents(inst); + if (dependents) { + producer_inst[0]++; + consumer_insts+= dependents; + } + } + } + + writeback.erase(wb_inst_it++); + } + LSQ.writebackStores(); + consumer_inst[0]+= consumer_insts; + writeback_count[0]+= inst_num; +} +#endif +template +bool +LWBackEnd::commitInst(int inst_num) +{ + // Read instruction from the head of the ROB + DynInstPtr inst = instList.back(); + + // Make sure instruction is valid + assert(inst); + + if (!inst->readyToCommit()) + return false; + + DPRINTF(BE, "Trying to commit instruction [sn:%lli] PC:%#x\n", + inst->seqNum, inst->readPC()); + + thread->setPC(inst->readPC()); + thread->setNextPC(inst->readNextPC()); + inst->reachedCommit = true; + + // If the instruction is not executed yet, then it is a non-speculative + // or store inst. Signal backwards that it should be executed. + if (!inst->isExecuted()) { + if (inst->isNonSpeculative() || + inst->isStoreConditional() || + inst->isMemBarrier() || + inst->isWriteBarrier()) { +#if !FULL_SYSTEM + // Hack to make sure syscalls aren't executed until all stores + // write back their data. This direct communication shouldn't + // be used for anything other than this. + if (inst_num > 0 || LSQ.hasStoresToWB()) +#else + if ((inst->isMemBarrier() || inst->isWriteBarrier() || + inst->isQuiesce()) && + LSQ.hasStoresToWB()) +#endif + { + DPRINTF(BE, "Waiting for all stores to writeback.\n"); + return false; + } + + DPRINTF(BE, "Encountered a store or non-speculative " + "instruction at the head of the ROB, PC %#x.\n", + inst->readPC()); + + if (inst->isMemBarrier() || inst->isWriteBarrier()) { + DPRINTF(BE, "Waking dependents on barrier [sn:%lli]\n", + inst->seqNum); + assert(memBarrier); + wakeDependents(inst, true); + if (memBarrier == inst) + memBarrier = NULL; + inst->clearMemDependents(); + } + + // Send back the non-speculative instruction's sequence number. + if (inst->iqItValid) { + DPRINTF(BE, "Removing instruction from waiting list\n"); + waitingList.erase(inst->iqIt); + inst->iqItValid = false; + waitingInsts--; + assert(waitingInsts >= 0); + if (inst->isStore()) + removeWaitingMemOp(inst); + } + + exeList.push(inst); + + // Change the instruction so it won't try to commit again until + // it is executed. + inst->clearCanCommit(); + +// ++commitNonSpecStalls; + + return false; + } else if (inst->isLoad()) { + DPRINTF(BE, "[sn:%lli]: Uncached load, PC %#x.\n", + inst->seqNum, inst->readPC()); + + // Send back the non-speculative instruction's sequence + // number. Maybe just tell the lsq to re-execute the load. + + // Send back the non-speculative instruction's sequence number. + if (inst->iqItValid) { + DPRINTF(BE, "Removing instruction from waiting list\n"); + waitingList.erase(inst->iqIt); + inst->iqItValid = false; + waitingInsts--; + assert(waitingInsts >= 0); + removeWaitingMemOp(inst); + } + replayMemInst(inst); + + inst->clearCanCommit(); + + return false; + } else { + panic("Trying to commit un-executed instruction " + "of unknown type!\n"); + } + } + + // Not handled for now. + assert(!inst->isThreadSync()); + assert(inst->memDepReady()); + // Stores will mark themselves as totally completed as they need + // to wait to writeback to memory. @todo: Hack...attempt to fix + // having the checker be forced to wait until a store completes in + // order to check all of the instructions. If the store at the + // head of the check list misses, but a later store hits, then + // loads in the checker may see the younger store values instead + // of the store they should see. Either the checker needs its own + // memory (annoying to update), its own store buffer (how to tell + // which value is correct?), or something else... + if (!inst->isStore()) { + inst->setCompleted(); + } + // Check if the instruction caused a fault. If so, trap. + Fault inst_fault = inst->getFault(); + + // Use checker prior to updating anything due to traps or PC + // based events. + if (checker) { + checker->tick(inst); + } + + if (inst_fault != NoFault) { + DPRINTF(BE, "Inst [sn:%lli] PC %#x has a fault\n", + inst->seqNum, inst->readPC()); + + // Instruction is completed as it has a fault. + inst->setCompleted(); + + if (LSQ.hasStoresToWB()) { + DPRINTF(BE, "Stores still in flight, will wait until drained.\n"); + return false; + } else if (inst_num != 0) { + DPRINTF(BE, "Will wait until instruction is head of commit group.\n"); + return false; + } else if (checker && inst->isStore()) { + checker->tick(inst); + } + + thread->setInst( + static_cast(inst->staticInst->machInst)); +#if FULL_SYSTEM + handleFault(inst_fault); + return false; +#else // !FULL_SYSTEM + panic("fault (%d) detected @ PC %08p", inst_fault, + inst->PC); +#endif // FULL_SYSTEM + } + + int freed_regs = 0; + + for (int i = 0; i < inst->numDestRegs(); ++i) { + DPRINTF(BE, "Commit rename map setting reg %i to [sn:%lli]\n", + (int)inst->destRegIdx(i), inst->seqNum); + thread->renameTable[inst->destRegIdx(i)] = inst; + ++freed_regs; + } + + if (inst->traceData) { + inst->traceData->setFetchSeq(inst->seqNum); + inst->traceData->setCPSeq(thread->numInst); + inst->traceData->finalize(); + inst->traceData = NULL; + } + + inst->clearDependents(); + + frontEnd->addFreeRegs(freed_regs); + + instList.pop_back(); + + --numInsts; + ++thread->funcExeInst; + // Maybe move this to where the fault is handled; if the fault is + // handled, don't try to set this myself as the fault will set it. + // If not, then I set thread->PC = thread->nextPC and + // thread->nextPC = thread->nextPC + 4. + thread->setPC(thread->readNextPC()); + thread->setNextPC(thread->readNextPC() + sizeof(TheISA::MachInst)); + updateComInstStats(inst); + + // Write the done sequence number here. + toIEW->doneSeqNum = inst->seqNum; + lastCommitCycle = curTick; + +#if FULL_SYSTEM + int count = 0; + Addr oldpc; + do { + if (count == 0) + assert(!thread->inSyscall && !thread->trapPending); + oldpc = thread->readPC(); + cpu->system->pcEventQueue.service( + thread->getXCProxy()); + count++; + } while (oldpc != thread->readPC()); + if (count > 1) { + DPRINTF(BE, "PC skip function event, stopping commit\n"); + xcSquash = true; + return false; + } +#endif + return true; +} + +template +void +LWBackEnd::commitInsts() +{ + // Not sure this should be a loop or not. + int inst_num = 0; + while (!instList.empty() && inst_num < commitWidth) { + if (instList.back()->isSquashed()) { + instList.back()->clearDependents(); + instList.pop_back(); + --numInsts; + ROBSquashedInsts[instList.back()->threadNumber]++; + continue; + } + + if (!commitInst(inst_num++)) { + DPRINTF(BE, "Can't commit, Instruction [sn:%lli] PC " + "%#x is head of ROB and not ready\n", + instList.back()->seqNum, instList.back()->readPC()); + --inst_num; + break; + } + } + n_committed_dist.sample(inst_num); +} + +template +void +LWBackEnd::squash(const InstSeqNum &sn) +{ + LSQ.squash(sn); + + int freed_regs = 0; + InstListIt waiting_list_end = waitingList.end(); + InstListIt insts_it = waitingList.begin(); + + while (insts_it != waiting_list_end && (*insts_it)->seqNum > sn) + { + if ((*insts_it)->isSquashed()) { + ++insts_it; + continue; + } + DPRINTF(BE, "Squashing instruction on waitingList PC %#x, [sn:%lli].\n", + (*insts_it)->readPC(), + (*insts_it)->seqNum); + + if ((*insts_it)->isMemRef()) { + DPRINTF(BE, "Squashing a waiting mem op [sn:%lli]\n", + (*insts_it)->seqNum); + removeWaitingMemOp((*insts_it)); + } + + waitingList.erase(insts_it++); + waitingInsts--; + } + assert(waitingInsts >= 0); + + insts_it = instList.begin(); + + while (!instList.empty() && (*insts_it)->seqNum > sn) + { + if ((*insts_it)->isSquashed()) { + ++insts_it; + continue; + } + DPRINTF(BE, "Squashing instruction on inst list PC %#x, [sn:%lli].\n", + (*insts_it)->readPC(), + (*insts_it)->seqNum); + + // Mark the instruction as squashed, and ready to commit so that + // it can drain out of the pipeline. + (*insts_it)->setSquashed(); + + (*insts_it)->setCanCommit(); + + (*insts_it)->removeInROB(); + + for (int i = 0; i < (*insts_it)->numDestRegs(); ++i) { + DynInstPtr prev_dest = (*insts_it)->getPrevDestInst(i); + DPRINTF(BE, "Commit rename map setting reg %i to [sn:%lli]\n", + (int)(*insts_it)->destRegIdx(i), prev_dest->seqNum); + renameTable[(*insts_it)->destRegIdx(i)] = prev_dest; + ++freed_regs; + } + + (*insts_it)->clearDependents(); + + squashedInsts[(*insts_it)->threadNumber]++; + + instList.erase(insts_it++); + --numInsts; + } + + insts_it = waitingList.begin(); + while (!waitingList.empty() && insts_it != waitingList.end()) { + if ((*insts_it)->seqNum < sn) { + ++insts_it; + continue; + } + assert((*insts_it)->isSquashed()); + + waitingList.erase(insts_it++); + waitingInsts--; + } + + while (memBarrier && memBarrier->seqNum > sn) { + DPRINTF(BE, "[sn:%lli] Memory barrier squashed (or previously " + "squashed)\n", memBarrier->seqNum); + memBarrier->clearMemDependents(); + if (memBarrier->memDepReady()) { + DPRINTF(BE, "No previous barrier\n"); + memBarrier = NULL; + } else { + std::list &srcs = memBarrier->getMemSrcs(); + memBarrier = srcs.front(); + srcs.pop_front(); + assert(srcs.empty()); + DPRINTF(BE, "Previous barrier: [sn:%lli]\n", + memBarrier->seqNum); + } + } + + frontEnd->addFreeRegs(freed_regs); +} + +template +void +LWBackEnd::squashFromXC() +{ + InstSeqNum squashed_inst = robEmpty() ? 0 : instList.back()->seqNum - 1; + squash(squashed_inst); + frontEnd->squash(squashed_inst, thread->readPC(), + false, false); + frontEnd->interruptPending = false; + + thread->trapPending = false; + thread->inSyscall = false; + xcSquash = false; + commitStatus = Running; +} + +template +void +LWBackEnd::squashFromTrap() +{ + InstSeqNum squashed_inst = robEmpty() ? 0 : instList.back()->seqNum - 1; + squash(squashed_inst); + frontEnd->squash(squashed_inst, thread->readPC(), + false, false); + frontEnd->interruptPending = false; + + thread->trapPending = false; + thread->inSyscall = false; + trapSquash = false; + commitStatus = Running; +} + +template +void +LWBackEnd::squashDueToBranch(DynInstPtr &inst) +{ + // Update the branch predictor state I guess + DPRINTF(BE, "Squashing due to branch [sn:%lli], will restart at PC %#x\n", + inst->seqNum, inst->readNextPC()); + squash(inst->seqNum); + frontEnd->squash(inst->seqNum, inst->readNextPC(), + true, inst->mispredicted()); +} + +template +void +LWBackEnd::squashDueToMemViolation(DynInstPtr &inst) +{ + // Update the branch predictor state I guess + DPRINTF(BE, "Squashing due to violation [sn:%lli], will restart at PC %#x\n", + inst->seqNum, inst->readNextPC()); + squash(inst->seqNum); + frontEnd->squash(inst->seqNum, inst->readNextPC(), + false, inst->mispredicted()); +} + +template +void +LWBackEnd::squashDueToMemBlocked(DynInstPtr &inst) +{ + DPRINTF(IEW, "Memory blocked, squashing load and younger insts, " + "PC: %#x [sn:%i].\n", inst->readPC(), inst->seqNum); + + squash(inst->seqNum - 1); + frontEnd->squash(inst->seqNum - 1, inst->readPC()); +} + +template +void +LWBackEnd::fetchFault(Fault &fault) +{ + faultFromFetch = fault; + fetchHasFault = true; +} + +template +void +LWBackEnd::switchOut() +{ + switchPending = true; +} + +template +void +LWBackEnd::doSwitchOut() +{ + switchedOut = true; + switchPending = false; + // Need to get rid of all committed, non-speculative state and write it + // to memory/XC. In this case this is stores that have committed and not + // yet written back. + assert(robEmpty()); + assert(!LSQ.hasStoresToWB()); + + LSQ.switchOut(); + + squash(0); +} + +template +void +LWBackEnd::takeOverFrom(ExecContext *old_xc) +{ + switchedOut = false; + xcSquash = false; + trapSquash = false; + + numInsts = 0; + numWaitingMemOps = 0; + waitingMemOps.clear(); + waitingInsts = 0; + switchedOut = false; + dispatchStatus = Running; + commitStatus = Running; + LSQ.takeOverFrom(old_xc); +} + +template +void +LWBackEnd::updateExeInstStats(DynInstPtr &inst) +{ + int thread_number = inst->threadNumber; + + // + // Pick off the software prefetches + // +#ifdef TARGET_ALPHA + if (inst->isDataPrefetch()) + exe_swp[thread_number]++; + else + exe_inst[thread_number]++; +#else + exe_inst[thread_number]++; +#endif + + // + // Control operations + // + if (inst->isControl()) + exe_branches[thread_number]++; + + // + // Memory operations + // + if (inst->isMemRef()) { + exe_refs[thread_number]++; + + if (inst->isLoad()) + exe_loads[thread_number]++; + } +} + +template +void +LWBackEnd::updateComInstStats(DynInstPtr &inst) +{ + unsigned tid = inst->threadNumber; + + // keep an instruction count + thread->numInst++; + thread->numInsts++; + + cpu->numInst++; + // + // Pick off the software prefetches + // +#ifdef TARGET_ALPHA + if (inst->isDataPrefetch()) { + stat_com_swp[tid]++; + } else { + stat_com_inst[tid]++; + } +#else + stat_com_inst[tid]++; +#endif + + // + // Control Instructions + // + if (inst->isControl()) + stat_com_branches[tid]++; + + // + // Memory references + // + if (inst->isMemRef()) { + stat_com_refs[tid]++; + + if (inst->isLoad()) { + stat_com_loads[tid]++; + } + } + + if (inst->isMemBarrier()) { + stat_com_membars[tid]++; + } +} + +template +void +LWBackEnd::dumpInsts() +{ + int num = 0; + int valid_num = 0; + + InstListIt inst_list_it = --(instList.end()); + + cprintf("ExeList size: %i\n", exeList.size()); + + cprintf("Inst list size: %i\n", instList.size()); + + while (inst_list_it != instList.end()) + { + cprintf("Instruction:%i\n", + num); + if (!(*inst_list_it)->isSquashed()) { + if (!(*inst_list_it)->isIssued()) { + ++valid_num; + cprintf("Count:%i\n", valid_num); + } else if ((*inst_list_it)->isMemRef() && + !(*inst_list_it)->memOpDone) { + // Loads that have not been marked as executed still count + // towards the total instructions. + ++valid_num; + cprintf("Count:%i\n", valid_num); + } + } + + cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n" + "Issued:%i\nSquashed:%i\n", + (*inst_list_it)->readPC(), + (*inst_list_it)->seqNum, + (*inst_list_it)->threadNumber, + (*inst_list_it)->isIssued(), + (*inst_list_it)->isSquashed()); + + if ((*inst_list_it)->isMemRef()) { + cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone); + } + + cprintf("\n"); + + inst_list_it--; + ++num; + } + + cprintf("Waiting list size: %i\n", waitingList.size()); + + inst_list_it = --(waitingList.end()); + + while (inst_list_it != waitingList.end()) + { + cprintf("Instruction:%i\n", + num); + if (!(*inst_list_it)->isSquashed()) { + if (!(*inst_list_it)->isIssued()) { + ++valid_num; + cprintf("Count:%i\n", valid_num); + } else if ((*inst_list_it)->isMemRef() && + !(*inst_list_it)->memOpDone) { + // Loads that have not been marked as executed still count + // towards the total instructions. + ++valid_num; + cprintf("Count:%i\n", valid_num); + } + } + + cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n" + "Issued:%i\nSquashed:%i\n", + (*inst_list_it)->readPC(), + (*inst_list_it)->seqNum, + (*inst_list_it)->threadNumber, + (*inst_list_it)->isIssued(), + (*inst_list_it)->isSquashed()); + + if ((*inst_list_it)->isMemRef()) { + cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone); + } + + cprintf("\n"); + + inst_list_it--; + ++num; + } + + cprintf("waitingMemOps list size: %i\n", waitingMemOps.size()); + + MemIt waiting_it = waitingMemOps.begin(); + + while (waiting_it != waitingMemOps.end()) + { + cprintf("[sn:%lli] ", (*waiting_it)); + waiting_it++; + ++num; + } + cprintf("\n"); +} diff --git a/src/cpu/ozone/lw_lsq.cc b/src/cpu/ozone/lw_lsq.cc new file mode 100644 index 000000000..922228b09 --- /dev/null +++ b/src/cpu/ozone/lw_lsq.cc @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cpu/ozone/ozone_impl.hh" +#include "cpu/ozone/lw_lsq_impl.hh" + +// Force the instantiation of LDSTQ for all the implementations we care about. +template class OzoneLWLSQ; + diff --git a/src/cpu/ozone/lw_lsq.hh b/src/cpu/ozone/lw_lsq.hh new file mode 100644 index 000000000..e1488dd6f --- /dev/null +++ b/src/cpu/ozone/lw_lsq.hh @@ -0,0 +1,656 @@ +/* + * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_OZONE_LW_LSQ_HH__ +#define __CPU_OZONE_LW_LSQ_HH__ + +#include +#include +#include +#include + +#include "arch/faults.hh" +#include "arch/isa_traits.hh" +#include "config/full_system.hh" +#include "base/hashmap.hh" +#include "cpu/inst_seq.hh" +#include "mem/packet.hh" +#include "mem/port.hh" +//#include "mem/page_table.hh" +#include "sim/debug.hh" +#include "sim/sim_object.hh" + +//class PageTable; + +/** + * Class that implements the actual LQ and SQ for each specific thread. + * Both are circular queues; load entries are freed upon committing, while + * store entries are freed once they writeback. The LSQUnit tracks if there + * are memory ordering violations, and also detects partial load to store + * forwarding cases (a store only has part of a load's data) that requires + * the load to wait until the store writes back. In the former case it + * holds onto the instruction until the dependence unit looks at it, and + * in the latter it stalls the LSQ until the store writes back. At that + * point the load is replayed. + */ +template +class OzoneLWLSQ { + public: + typedef typename Impl::Params Params; + typedef typename Impl::FullCPU FullCPU; + typedef typename Impl::BackEnd BackEnd; + typedef typename Impl::DynInstPtr DynInstPtr; + typedef typename Impl::IssueStruct IssueStruct; + + typedef TheISA::IntReg IntReg; + + typedef typename std::map::iterator LdMapIt; + + private: + class StoreCompletionEvent : public Event { + public: + /** Constructs a store completion event. */ + StoreCompletionEvent(DynInstPtr &inst, BackEnd *be, + Event *wb_event, OzoneLWLSQ *lsq_ptr); + + /** Processes the store completion event. */ + void process(); + + /** Returns the description of this event. */ + const char *description(); + + private: + /** The store index of the store being written back. */ + DynInstPtr inst; + + BackEnd *be; + /** The writeback event for the store. Needed for store + * conditionals. + */ + public: + Event *wbEvent; + bool miss; + private: + /** The pointer to the LSQ unit that issued the store. */ + OzoneLWLSQ *lsqPtr; + }; + + public: + /** Constructs an LSQ unit. init() must be called prior to use. */ + OzoneLWLSQ(); + + /** Initializes the LSQ unit with the specified number of entries. */ + void init(Params *params, unsigned maxLQEntries, + unsigned maxSQEntries, unsigned id); + + /** Returns the name of the LSQ unit. */ + std::string name() const; + + /** Sets the CPU pointer. */ + void setCPU(FullCPU *cpu_ptr) + { cpu = cpu_ptr; } + + /** Sets the back-end stage pointer. */ + void setBE(BackEnd *be_ptr) + { be = be_ptr; } + + /** Sets the page table pointer. */ +// void setPageTable(PageTable *pt_ptr); + + /** Ticks the LSQ unit, which in this case only resets the number of + * used cache ports. + * @todo: Move the number of used ports up to the LSQ level so it can + * be shared by all LSQ units. + */ + void tick() { usedPorts = 0; } + + /** Inserts an instruction. */ + void insert(DynInstPtr &inst); + /** Inserts a load instruction. */ + void insertLoad(DynInstPtr &load_inst); + /** Inserts a store instruction. */ + void insertStore(DynInstPtr &store_inst); + + /** Executes a load instruction. */ + Fault executeLoad(DynInstPtr &inst); + + /** Executes a store instruction. */ + Fault executeStore(DynInstPtr &inst); + + /** Commits the head load. */ + void commitLoad(); + /** Commits loads older than a specific sequence number. */ + void commitLoads(InstSeqNum &youngest_inst); + + /** Commits stores older than a specific sequence number. */ + void commitStores(InstSeqNum &youngest_inst); + + /** Writes back stores. */ + void writebackStores(); + + // @todo: Include stats in the LSQ unit. + //void regStats(); + + /** Clears all the entries in the LQ. */ + void clearLQ(); + + /** Clears all the entries in the SQ. */ + void clearSQ(); + + /** Resizes the LQ to a given size. */ + void resizeLQ(unsigned size); + + /** Resizes the SQ to a given size. */ + void resizeSQ(unsigned size); + + /** Squashes all instructions younger than a specific sequence number. */ + void squash(const InstSeqNum &squashed_num); + + /** Returns if there is a memory ordering violation. Value is reset upon + * call to getMemDepViolator(). + */ + bool violation() { return memDepViolator; } + + /** Returns the memory ordering violator. */ + DynInstPtr getMemDepViolator(); + + /** Returns if a load became blocked due to the memory system. It clears + * the bool's value upon this being called. + */ + bool loadBlocked() + { return isLoadBlocked; } + + void clearLoadBlocked() + { isLoadBlocked = false; } + + bool isLoadBlockedHandled() + { return loadBlockedHandled; } + + void setLoadBlockedHandled() + { loadBlockedHandled = true; } + + /** Returns the number of free entries (min of free LQ and SQ entries). */ + unsigned numFreeEntries(); + + /** Returns the number of loads ready to execute. */ + int numLoadsReady(); + + /** Returns the number of loads in the LQ. */ + int numLoads() { return loads; } + + /** Returns the number of stores in the SQ. */ + int numStores() { return stores; } + + /** Returns if either the LQ or SQ is full. */ + bool isFull() { return lqFull() || sqFull(); } + + /** Returns if the LQ is full. */ + bool lqFull() { return loads >= (LQEntries - 1); } + + /** Returns if the SQ is full. */ + bool sqFull() { return stores >= (SQEntries - 1); } + + /** Debugging function to dump instructions in the LSQ. */ + void dumpInsts(); + + /** Returns the number of instructions in the LSQ. */ + unsigned getCount() { return loads + stores; } + + /** Returns if there are any stores to writeback. */ + bool hasStoresToWB() { return storesToWB; } + + /** Returns the number of stores to writeback. */ + int numStoresToWB() { return storesToWB; } + + /** Returns if the LSQ unit will writeback on this cycle. */ + bool willWB() { return storeQueue.back().canWB && + !storeQueue.back().completed/* && + !dcacheInterface->isBlocked()*/; } + + void switchOut(); + + void takeOverFrom(ExecContext *old_xc = NULL); + + bool isSwitchedOut() { return switchedOut; } + + bool switchedOut; + + private: + /** Completes the store at the specified index. */ + void completeStore(int store_idx); + + private: + /** Pointer to the CPU. */ + FullCPU *cpu; + + /** Pointer to the back-end stage. */ + BackEnd *be; + + MemObject *mem; + + class DcachePort : public Port + { + protected: + FullCPU *cpu; + + public: + DcachePort(const std::string &_name, FullCPU *_cpu) + : Port(_name), cpu(_cpu) + { } + + protected: + virtual Tick recvAtomic(PacketPtr pkt); + + virtual void recvFunctional(PacketPtr pkt); + + virtual void recvStatusChange(Status status); + + virtual void getDeviceAddressRanges(AddrRangeList &resp, + AddrRangeList &snoop) + { resp.clear(); snoop.clear(); } + + virtual bool recvTiming(PacketPtr pkt); + + virtual void recvRetry(); + }; + + /** Pointer to the D-cache. */ + DcachePort dcachePort; + + /** Pointer to the page table. */ +// PageTable *pTable; + + public: + struct SQEntry { + /** Constructs an empty store queue entry. */ + SQEntry() + : inst(NULL), req(NULL), size(0), data(0), + canWB(0), committed(0), completed(0), lqIt(NULL) + { } + + /** Constructs a store queue entry for a given instruction. */ + SQEntry(DynInstPtr &_inst) + : inst(_inst), req(NULL), size(0), data(0), + canWB(0), committed(0), completed(0), lqIt(NULL) + { } + + /** The store instruction. */ + DynInstPtr inst; + /** The memory request for the store. */ + RequestPtr req; + /** The size of the store. */ + int size; + /** The store data. */ + IntReg data; + /** Whether or not the store can writeback. */ + bool canWB; + /** Whether or not the store is committed. */ + bool committed; + /** Whether or not the store is completed. */ + bool completed; + + typename std::list::iterator lqIt; + }; + + enum Status { + Running, + Idle, + DcacheMissStall, + DcacheMissSwitch + }; + + private: + /** The OzoneLWLSQ thread id. */ + unsigned lsqID; + + /** The status of the LSQ unit. */ + Status _status; + + /** The store queue. */ + std::list storeQueue; + /** The load queue. */ + std::list loadQueue; + + typedef typename std::list::iterator SQIt; + typedef typename std::list::iterator LQIt; + + + struct HashFn { + size_t operator() (const int a) const + { + unsigned hash = (((a >> 14) ^ ((a >> 2) & 0xffff))) & 0x7FFFFFFF; + + return hash; + } + }; + + m5::hash_map SQItHash; + std::queue SQIndices; + m5::hash_map LQItHash; + std::queue LQIndices; + + typedef typename m5::hash_map::iterator LQHashIt; + typedef typename m5::hash_map::iterator SQHashIt; + // Consider making these 16 bits + /** The number of LQ entries. */ + unsigned LQEntries; + /** The number of SQ entries. */ + unsigned SQEntries; + + /** The number of load instructions in the LQ. */ + int loads; + /** The number of store instructions in the SQ (excludes those waiting to + * writeback). + */ + int stores; + + int storesToWB; + + /// @todo Consider moving to a more advanced model with write vs read ports + /** The number of cache ports available each cycle. */ + int cachePorts; + + /** The number of used cache ports in this cycle. */ + int usedPorts; + + //list mshrSeqNums; + + //Stats::Scalar<> dcacheStallCycles; + Counter lastDcacheStall; + + // Make these per thread? + /** Whether or not the LSQ is stalled. */ + bool stalled; + /** The store that causes the stall due to partial store to load + * forwarding. + */ + InstSeqNum stallingStoreIsn; + /** The index of the above store. */ + LQIt stallingLoad; + + /** Whether or not a load is blocked due to the memory system. It is + * cleared when this value is checked via loadBlocked(). + */ + bool isLoadBlocked; + + bool loadBlockedHandled; + + InstSeqNum blockedLoadSeqNum; + + /** The oldest faulting load instruction. */ + DynInstPtr loadFaultInst; + /** The oldest faulting store instruction. */ + DynInstPtr storeFaultInst; + + /** The oldest load that caused a memory ordering violation. */ + DynInstPtr memDepViolator; + + // Will also need how many read/write ports the Dcache has. Or keep track + // of that in stage that is one level up, and only call executeLoad/Store + // the appropriate number of times. + + public: + /** Executes the load at the given index. */ + template + Fault read(RequestPtr req, T &data, int load_idx); + + /** Executes the store at the given index. */ + template + Fault write(RequestPtr req, T &data, int store_idx); + + /** Returns the sequence number of the head load instruction. */ + InstSeqNum getLoadHeadSeqNum() + { + if (!loadQueue.empty()) { + return loadQueue.back()->seqNum; + } else { + return 0; + } + + } + + /** Returns the sequence number of the head store instruction. */ + InstSeqNum getStoreHeadSeqNum() + { + if (!storeQueue.empty()) { + return storeQueue.back().inst->seqNum; + } else { + return 0; + } + + } + + /** Returns whether or not the LSQ unit is stalled. */ + bool isStalled() { return stalled; } +}; + +template +template +Fault +OzoneLWLSQ::read(RequestPtr req, T &data, int load_idx) +{ + //Depending on issue2execute delay a squashed load could + //execute if it is found to be squashed in the same + //cycle it is scheduled to execute + typename m5::hash_map::iterator + lq_hash_it = LQItHash.find(load_idx); + assert(lq_hash_it != LQItHash.end()); + DynInstPtr inst = (*(*lq_hash_it).second); + + // Make sure this isn't an uncacheable access + // A bit of a hackish way to get uncached accesses to work only if they're + // at the head of the LSQ and are ready to commit (at the head of the ROB + // too). + // @todo: Fix uncached accesses. + if (req->getFlags() & UNCACHEABLE && + (inst != loadQueue.back() || !inst->reachedCommit)) { + DPRINTF(OzoneLSQ, "[sn:%lli] Uncached load and not head of " + "commit/LSQ!\n", + inst->seqNum); + be->rescheduleMemInst(inst); + return TheISA::genMachineCheckFault(); + } + + // Check the SQ for any previous stores that might lead to forwarding + SQIt sq_it = storeQueue.begin(); + int store_size = 0; + + DPRINTF(OzoneLSQ, "Read called, load idx: %i addr: %#x\n", + load_idx, req->getPaddr()); + + while (sq_it != storeQueue.end() && (*sq_it).inst->seqNum > inst->seqNum) + ++sq_it; + + while (1) { + // End once we've reached the top of the LSQ + if (sq_it == storeQueue.end()) { + break; + } + + assert((*sq_it).inst); + + store_size = (*sq_it).size; + + if (store_size == 0) { + sq_it++; + continue; + } + + // Check if the store data is within the lower and upper bounds of + // addresses that the request needs. + bool store_has_lower_limit = + req->getVaddr() >= (*sq_it).inst->effAddr; + bool store_has_upper_limit = + (req->getVaddr() + req->getSize()) <= ((*sq_it).inst->effAddr + + store_size); + bool lower_load_has_store_part = + req->getVaddr() < ((*sq_it).inst->effAddr + + store_size); + bool upper_load_has_store_part = + (req->getVaddr() + req->getSize()) > (*sq_it).inst->effAddr; + + // If the store's data has all of the data needed, we can forward. + if (store_has_lower_limit && store_has_upper_limit) { + int shift_amt = req->getVaddr() & (store_size - 1); + // Assumes byte addressing + shift_amt = shift_amt << 3; + + // Cast this to type T? + data = (*sq_it).data >> shift_amt; + + assert(!inst->memData); + inst->memData = new uint8_t[64]; + + memcpy(inst->memData, &data, req->getSize()); + + DPRINTF(OzoneLSQ, "Forwarding from store [sn:%lli] to load to " + "[sn:%lli] addr %#x, data %#x\n", + (*sq_it).inst->seqNum, inst->seqNum, req->vaddr, *(inst->memData)); +/* + typename BackEnd::LdWritebackEvent *wb = + new typename BackEnd::LdWritebackEvent(inst, + be); + + // We'll say this has a 1 cycle load-store forwarding latency + // for now. + // FIXME - Need to make this a parameter. + wb->schedule(curTick); +*/ + // Should keep track of stat for forwarded data + return NoFault; + } else if ((store_has_lower_limit && lower_load_has_store_part) || + (store_has_upper_limit && upper_load_has_store_part) || + (lower_load_has_store_part && upper_load_has_store_part)) { + // This is the partial store-load forwarding case where a store + // has only part of the load's data. + + // If it's already been written back, then don't worry about + // stalling on it. + if ((*sq_it).completed) { + sq_it++; + break; + } + + // Must stall load and force it to retry, so long as it's the oldest + // load that needs to do so. + if (!stalled || + (stalled && + inst->seqNum < + (*stallingLoad)->seqNum)) { + stalled = true; + stallingStoreIsn = (*sq_it).inst->seqNum; + stallingLoad = (*lq_hash_it).second; + } + + // Tell IQ/mem dep unit that this instruction will need to be + // rescheduled eventually + be->rescheduleMemInst(inst); + + DPRINTF(OzoneLSQ, "Load-store forwarding mis-match. " + "Store [sn:%lli] to load addr %#x\n", + (*sq_it).inst->seqNum, req->vaddr); + + return NoFault; + } + sq_it++; + } + + // If there's no forwarding case, then go access memory + DPRINTF(OzoneLSQ, "Doing functional access for inst PC %#x\n", + inst->readPC()); + + assert(!inst->memData); + inst->memData = new uint8_t[64]; + + ++usedPorts; + + DPRINTF(OzoneLSQ, "Doing timing access for inst PC %#x\n", + inst->readPC()); + + PacketPtr data_pkt = new Packet(req, Packet::ReadReq, Packet::Broadcast); + data_pkt->dataStatic(inst->memData); + + // if we have a cache, do cache access too + if (!dcachePort.sendTiming(data_pkt)) { + // There's an older load that's already going to squash. + if (isLoadBlocked && blockedLoadSeqNum < inst->seqNum) + return NoFault; + + // Record that the load was blocked due to memory. This + // load will squash all instructions after it, be + // refetched, and re-executed. + isLoadBlocked = true; + loadBlockedHandled = false; + blockedLoadSeqNum = inst->seqNum; + // No fault occurred, even though the interface is blocked. + return NoFault; + } + + if (data_pkt->result != Packet::Success) { + DPRINTF(OzoneLSQ, "OzoneLSQ: D-cache miss!\n"); + DPRINTF(Activity, "Activity: ld accessing mem miss [sn:%lli]\n", + inst->seqNum); + } else { + DPRINTF(OzoneLSQ, "OzoneLSQ: D-cache hit!\n"); + DPRINTF(Activity, "Activity: ld accessing mem hit [sn:%lli]\n", + inst->seqNum); + } + + return NoFault; +} + +template +template +Fault +OzoneLWLSQ::write(RequestPtr req, T &data, int store_idx) +{ + SQHashIt sq_hash_it = SQItHash.find(store_idx); + assert(sq_hash_it != SQItHash.end()); + + SQIt sq_it = (*sq_hash_it).second; + assert((*sq_it).inst); + + DPRINTF(OzoneLSQ, "Doing write to store idx %i, addr %#x data %#x" + " | [sn:%lli]\n", + store_idx, req->getPaddr(), data, (*sq_it).inst->seqNum); + + (*sq_it).req = req; + (*sq_it).size = sizeof(T); + (*sq_it).data = data; +/* + assert(!req->data); + req->data = new uint8_t[64]; + memcpy(req->data, (uint8_t *)&(*sq_it).data, req->size); +*/ + + // This function only writes the data to the store queue, so no fault + // can happen here. + return NoFault; +} + +#endif // __CPU_OZONE_LW_LSQ_HH__ diff --git a/src/cpu/ozone/lw_lsq_impl.hh b/src/cpu/ozone/lw_lsq_impl.hh new file mode 100644 index 000000000..f72bbb1cc --- /dev/null +++ b/src/cpu/ozone/lw_lsq_impl.hh @@ -0,0 +1,874 @@ +/* + * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "arch/isa_traits.hh" +#include "base/str.hh" +#include "cpu/ozone/lw_lsq.hh" +#include "cpu/checker/cpu.hh" + +template +OzoneLWLSQ::StoreCompletionEvent::StoreCompletionEvent(DynInstPtr &_inst, + BackEnd *_be, + Event *wb_event, + OzoneLWLSQ *lsq_ptr) + : Event(&mainEventQueue), + inst(_inst), + be(_be), + wbEvent(wb_event), + miss(false), + lsqPtr(lsq_ptr) +{ + this->setFlags(Event::AutoDelete); +} + +template +void +OzoneLWLSQ::StoreCompletionEvent::process() +{ + DPRINTF(OzoneLSQ, "Cache miss complete for store [sn:%lli]\n", + inst->seqNum); + + //lsqPtr->removeMSHR(lsqPtr->storeQueue[storeIdx].inst->seqNum); + +// lsqPtr->cpu->wakeCPU(); + if (lsqPtr->isSwitchedOut()) { + if (wbEvent) + delete wbEvent; + + return; + } + + if (wbEvent) { + wbEvent->process(); + delete wbEvent; + } + + lsqPtr->completeStore(inst->sqIdx); + if (miss) + be->removeDcacheMiss(inst); +} + +template +const char * +OzoneLWLSQ::StoreCompletionEvent::description() +{ + return "LSQ store completion event"; +} + +template +OzoneLWLSQ::OzoneLWLSQ() + : loads(0), stores(0), storesToWB(0), stalled(false), isLoadBlocked(false), + loadBlockedHandled(false) +{ +} + +template +void +OzoneLWLSQ::init(Params *params, unsigned maxLQEntries, + unsigned maxSQEntries, unsigned id) +{ + DPRINTF(OzoneLSQ, "Creating OzoneLWLSQ%i object.\n",id); + + lsqID = id; + + LQEntries = maxLQEntries; + SQEntries = maxSQEntries; + + for (int i = 0; i < LQEntries * 2; i++) { + LQIndices.push(i); + SQIndices.push(i); + } + + usedPorts = 0; + cachePorts = params->cachePorts; + + dcacheInterface = params->dcacheInterface; + + loadFaultInst = storeFaultInst = memDepViolator = NULL; + + blockedLoadSeqNum = 0; +} + +template +std::string +OzoneLWLSQ::name() const +{ + return "lsqunit"; +} + +template +void +OzoneLWLSQ::clearLQ() +{ + loadQueue.clear(); +} + +template +void +OzoneLWLSQ::clearSQ() +{ + storeQueue.clear(); +} +/* +template +void +OzoneLWLSQ::setPageTable(PageTable *pt_ptr) +{ + DPRINTF(OzoneLSQ, "Setting the page table pointer.\n"); + pTable = pt_ptr; +} +*/ +template +void +OzoneLWLSQ::resizeLQ(unsigned size) +{ + assert( size >= LQEntries); + + if (size > LQEntries) { + while (size > loadQueue.size()) { + DynInstPtr dummy; + loadQueue.push_back(dummy); + LQEntries++; + } + } else { + LQEntries = size; + } + +} + +template +void +OzoneLWLSQ::resizeSQ(unsigned size) +{ + if (size > SQEntries) { + while (size > storeQueue.size()) { + SQEntry dummy; + storeQueue.push_back(dummy); + SQEntries++; + } + } else { + SQEntries = size; + } +} + +template +void +OzoneLWLSQ::insert(DynInstPtr &inst) +{ + // Make sure we really have a memory reference. + assert(inst->isMemRef()); + + // Make sure it's one of the two classes of memory references. + assert(inst->isLoad() || inst->isStore()); + + if (inst->isLoad()) { + insertLoad(inst); + } else { + insertStore(inst); + } +} + +template +void +OzoneLWLSQ::insertLoad(DynInstPtr &load_inst) +{ + assert(loads < LQEntries * 2); + assert(!LQIndices.empty()); + int load_index = LQIndices.front(); + LQIndices.pop(); + + DPRINTF(OzoneLSQ, "Inserting load PC %#x, idx:%i [sn:%lli]\n", + load_inst->readPC(), load_index, load_inst->seqNum); + + load_inst->lqIdx = load_index; + + loadQueue.push_front(load_inst); + LQItHash[load_index] = loadQueue.begin(); + + ++loads; +} + +template +void +OzoneLWLSQ::insertStore(DynInstPtr &store_inst) +{ + // Make sure it is not full before inserting an instruction. + assert(stores - storesToWB < SQEntries); + + assert(!SQIndices.empty()); + int store_index = SQIndices.front(); + SQIndices.pop(); + + DPRINTF(OzoneLSQ, "Inserting store PC %#x, idx:%i [sn:%lli]\n", + store_inst->readPC(), store_index, store_inst->seqNum); + + store_inst->sqIdx = store_index; + SQEntry entry(store_inst); + if (loadQueue.empty()) { + entry.lqIt = loadQueue.end(); + } else { + entry.lqIt = loadQueue.begin(); + } + storeQueue.push_front(entry); + + SQItHash[store_index] = storeQueue.begin(); + + ++stores; +} + +template +typename Impl::DynInstPtr +OzoneLWLSQ::getMemDepViolator() +{ + DynInstPtr temp = memDepViolator; + + memDepViolator = NULL; + + return temp; +} + +template +unsigned +OzoneLWLSQ::numFreeEntries() +{ + unsigned free_lq_entries = LQEntries - loads; + unsigned free_sq_entries = SQEntries - stores; + + // Both the LQ and SQ entries have an extra dummy entry to differentiate + // empty/full conditions. Subtract 1 from the free entries. + if (free_lq_entries < free_sq_entries) { + return free_lq_entries - 1; + } else { + return free_sq_entries - 1; + } +} + +template +int +OzoneLWLSQ::numLoadsReady() +{ + int retval = 0; + LQIt lq_it = loadQueue.begin(); + LQIt end_it = loadQueue.end(); + + while (lq_it != end_it) { + if ((*lq_it)->readyToIssue()) { + ++retval; + } + } + + return retval; +} + +template +Fault +OzoneLWLSQ::executeLoad(DynInstPtr &inst) +{ + // Execute a specific load. + Fault load_fault = NoFault; + + DPRINTF(OzoneLSQ, "Executing load PC %#x, [sn:%lli]\n", + inst->readPC(),inst->seqNum); + + // Make sure it's really in the list. + // Normally it should always be in the list. However, + /* due to a syscall it may not be the list. +#ifdef DEBUG + int i = loadHead; + while (1) { + if (i == loadTail && !find(inst)) { + assert(0 && "Load not in the queue!"); + } else if (loadQueue[i] == inst) { + break; + } + + i = i + 1; + if (i >= LQEntries) { + i = 0; + } + } +#endif // DEBUG*/ + + load_fault = inst->initiateAcc(); + + // Might want to make sure that I'm not overwriting a previously faulting + // instruction that hasn't been checked yet. + // Actually probably want the oldest faulting load + if (load_fault != NoFault) { + DPRINTF(OzoneLSQ, "Load [sn:%lli] has a fault\n", inst->seqNum); + // Maybe just set it as can commit here, although that might cause + // some other problems with sending traps to the ROB too quickly. + be->instToCommit(inst); +// iewStage->activityThisCycle(); + } + + return load_fault; +} + +template +Fault +OzoneLWLSQ::executeStore(DynInstPtr &store_inst) +{ + // Make sure that a store exists. + assert(stores != 0); + + int store_idx = store_inst->sqIdx; + SQHashIt sq_hash_it = SQItHash.find(store_idx); + assert(sq_hash_it != SQItHash.end()); + DPRINTF(OzoneLSQ, "Executing store PC %#x [sn:%lli]\n", + store_inst->readPC(), store_inst->seqNum); + + SQIt sq_it = (*sq_hash_it).second; + + Fault store_fault = store_inst->initiateAcc(); + + // Store size should now be available. Use it to get proper offset for + // addr comparisons. + int size = (*sq_it).size; + + if (size == 0) { + DPRINTF(OzoneLSQ,"Fault on Store PC %#x, [sn:%lli],Size = 0\n", + store_inst->readPC(),store_inst->seqNum); + + return store_fault; + } + + assert(store_fault == NoFault); + + if (!storeFaultInst) { + if (store_fault != NoFault) { + panic("Fault in a store instruction!"); + storeFaultInst = store_inst; + } else if (store_inst->isStoreConditional()) { + // Store conditionals need to set themselves as able to + // writeback if we haven't had a fault by here. + (*sq_it).canWB = true; + + ++storesToWB; + DPRINTF(OzoneLSQ, "Nonspeculative store! storesToWB:%i\n", + storesToWB); + } + } + + LQIt lq_it = --(loadQueue.end()); + + if (!memDepViolator) { + while (lq_it != loadQueue.end()) { + if ((*lq_it)->seqNum < store_inst->seqNum) { + lq_it--; + continue; + } + // Actually should only check loads that have actually executed + // Might be safe because effAddr is set to InvalAddr when the + // dyn inst is created. + + // Must actually check all addrs in the proper size range + // Which is more correct than needs to be. What if for now we just + // assume all loads are quad-word loads, and do the addr based + // on that. + // @todo: Fix this, magic number being used here + if (((*lq_it)->effAddr >> 8) == + (store_inst->effAddr >> 8)) { + // A load incorrectly passed this store. Squash and refetch. + // For now return a fault to show that it was unsuccessful. + memDepViolator = (*lq_it); + + return TheISA::genMachineCheckFault(); + } + + lq_it--; + } + + // If we've reached this point, there was no violation. + memDepViolator = NULL; + } + + return store_fault; +} + +template +void +OzoneLWLSQ::commitLoad() +{ + assert(!loadQueue.empty()); + + DPRINTF(OzoneLSQ, "[sn:%lli] Committing head load instruction, PC %#x\n", + loadQueue.back()->seqNum, loadQueue.back()->readPC()); + + LQIndices.push(loadQueue.back()->lqIdx); + LQItHash.erase(loadQueue.back()->lqIdx); + + loadQueue.pop_back(); + + --loads; +} + +template +void +OzoneLWLSQ::commitLoads(InstSeqNum &youngest_inst) +{ + assert(loads == 0 || !loadQueue.empty()); + + while (loads != 0 && + loadQueue.back()->seqNum <= youngest_inst) { + commitLoad(); + } +} + +template +void +OzoneLWLSQ::commitStores(InstSeqNum &youngest_inst) +{ + assert(stores == 0 || !storeQueue.empty()); + + SQIt sq_it = --(storeQueue.end()); + while (!storeQueue.empty() && sq_it != storeQueue.end()) { + assert((*sq_it).inst); + if (!(*sq_it).canWB) { + if ((*sq_it).inst->seqNum > youngest_inst) { + break; + } + ++storesToWB; + + DPRINTF(OzoneLSQ, "Marking store as able to write back, PC " + "%#x [sn:%lli], storesToWB:%i\n", + (*sq_it).inst->readPC(), + (*sq_it).inst->seqNum, + storesToWB); + + (*sq_it).canWB = true; + } + + sq_it--; + } +} + +template +void +OzoneLWLSQ::writebackStores() +{ + SQIt sq_it = --(storeQueue.end()); + while (storesToWB > 0 && + sq_it != storeQueue.end() && + (*sq_it).inst && + (*sq_it).canWB && + usedPorts < cachePorts) { + + DynInstPtr inst = (*sq_it).inst; + + if ((*sq_it).size == 0 && !(*sq_it).completed) { + sq_it--; + completeStore(inst->sqIdx); + + continue; + } + + if (inst->isDataPrefetch() || (*sq_it).committed) { + sq_it--; + continue; + } + + if (dcacheInterface && dcacheInterface->isBlocked()) { + DPRINTF(OzoneLSQ, "Unable to write back any more stores, cache" + " is blocked!\n"); + break; + } + + ++usedPorts; + + assert((*sq_it).req); + assert(!(*sq_it).committed); + + (*sq_it).committed = true; + + MemReqPtr req = (*sq_it).req; + + req->cmd = Write; + req->completionEvent = NULL; + req->time = curTick; + + switch((*sq_it).size) { + case 1: + cpu->write(req, (uint8_t &)(*sq_it).data); + break; + case 2: + cpu->write(req, (uint16_t &)(*sq_it).data); + break; + case 4: + cpu->write(req, (uint32_t &)(*sq_it).data); + break; + case 8: + cpu->write(req, (uint64_t &)(*sq_it).data); + break; + default: + panic("Unexpected store size!\n"); + } + if (!(req->flags & LOCKED)) { + (*sq_it).inst->setCompleted(); + if (cpu->checker) { + cpu->checker->tick((*sq_it).inst); + } + } + + DPRINTF(OzoneLSQ, "D-Cache: Writing back store idx:%i PC:%#x " + "to Addr:%#x, data:%#x [sn:%lli]\n", + inst->sqIdx,inst->readPC(), + req->paddr, *(req->data), + inst->seqNum); + + if (dcacheInterface) { + assert(!req->completionEvent); + StoreCompletionEvent *store_event = new + StoreCompletionEvent(inst, be, NULL, this); + req->completionEvent = store_event; + + MemAccessResult result = dcacheInterface->access(req); + + if (isStalled() && + inst->seqNum == stallingStoreIsn) { + DPRINTF(OzoneLSQ, "Unstalling, stalling store [sn:%lli] " + "load [sn:%lli]\n", + stallingStoreIsn, (*stallingLoad)->seqNum); + stalled = false; + stallingStoreIsn = 0; + be->replayMemInst((*stallingLoad)); + } + + if (result != MA_HIT && dcacheInterface->doEvents()) { + store_event->miss = true; + typename BackEnd::LdWritebackEvent *wb = NULL; + if (req->flags & LOCKED) { + wb = new typename BackEnd::LdWritebackEvent(inst, + be); + store_event->wbEvent = wb; + } + + DPRINTF(OzoneLSQ,"D-Cache Write Miss!\n"); + +// DPRINTF(Activity, "Active st accessing mem miss [sn:%lli]\n", +// inst->seqNum); + + be->addDcacheMiss(inst); + + lastDcacheStall = curTick; + + _status = DcacheMissStall; + + // Increment stat here or something + + sq_it--; + } else { + DPRINTF(OzoneLSQ,"D-Cache: Write Hit on idx:%i !\n", + inst->sqIdx); + +// DPRINTF(Activity, "Active st accessing mem hit [sn:%lli]\n", +// inst->seqNum); + + if (req->flags & LOCKED) { + // Stx_C does not generate a system port + // transaction in the 21264, but that might be + // hard to accomplish in this model. + + typename BackEnd::LdWritebackEvent *wb = + new typename BackEnd::LdWritebackEvent(inst, + be); + store_event->wbEvent = wb; + } + sq_it--; + } + } else { + panic("Must HAVE DCACHE!!!!!\n"); + } + } + + // Not sure this should set it to 0. + usedPorts = 0; + + assert(stores >= 0 && storesToWB >= 0); +} + +template +void +OzoneLWLSQ::squash(const InstSeqNum &squashed_num) +{ + DPRINTF(OzoneLSQ, "Squashing until [sn:%lli]!" + "(Loads:%i Stores:%i)\n",squashed_num,loads,stores); + + + LQIt lq_it = loadQueue.begin(); + + while (loads != 0 && (*lq_it)->seqNum > squashed_num) { + assert(!loadQueue.empty()); + // Clear the smart pointer to make sure it is decremented. + DPRINTF(OzoneLSQ,"Load Instruction PC %#x squashed, " + "[sn:%lli]\n", + (*lq_it)->readPC(), + (*lq_it)->seqNum); + + if (isStalled() && lq_it == stallingLoad) { + stalled = false; + stallingStoreIsn = 0; + stallingLoad = NULL; + } + + --loads; + + // Inefficient! + LQHashIt lq_hash_it = LQItHash.find((*lq_it)->lqIdx); + assert(lq_hash_it != LQItHash.end()); + LQItHash.erase(lq_hash_it); + LQIndices.push((*lq_it)->lqIdx); + loadQueue.erase(lq_it++); + } + + if (isLoadBlocked) { + if (squashed_num < blockedLoadSeqNum) { + isLoadBlocked = false; + loadBlockedHandled = false; + blockedLoadSeqNum = 0; + } + } + + SQIt sq_it = storeQueue.begin(); + + while (stores != 0 && (*sq_it).inst->seqNum > squashed_num) { + assert(!storeQueue.empty()); + + if ((*sq_it).canWB) { + break; + } + + // Clear the smart pointer to make sure it is decremented. + DPRINTF(OzoneLSQ,"Store Instruction PC %#x idx:%i squashed [sn:%lli]\n", + (*sq_it).inst->readPC(), (*sq_it).inst->sqIdx, + (*sq_it).inst->seqNum); + + // I don't think this can happen. It should have been cleared by the + // stalling load. + if (isStalled() && + (*sq_it).inst->seqNum == stallingStoreIsn) { + panic("Is stalled should have been cleared by stalling load!\n"); + stalled = false; + stallingStoreIsn = 0; + } + + SQHashIt sq_hash_it = SQItHash.find((*sq_it).inst->sqIdx); + assert(sq_hash_it != SQItHash.end()); + SQItHash.erase(sq_hash_it); + SQIndices.push((*sq_it).inst->sqIdx); + (*sq_it).inst = NULL; + (*sq_it).canWB = 0; + + if ((*sq_it).req) { + assert(!(*sq_it).req->completionEvent); + } + (*sq_it).req = NULL; + --stores; + storeQueue.erase(sq_it++); + } +} + +template +void +OzoneLWLSQ::dumpInsts() +{ + cprintf("Load store queue: Dumping instructions.\n"); + cprintf("Load queue size: %i\n", loads); + cprintf("Load queue: "); + + LQIt lq_it = --(loadQueue.end()); + + while (lq_it != loadQueue.end() && (*lq_it)) { + cprintf("[sn:%lli] %#x ", (*lq_it)->seqNum, + (*lq_it)->readPC()); + + lq_it--; + } + + cprintf("\nStore queue size: %i\n", stores); + cprintf("Store queue: "); + + SQIt sq_it = --(storeQueue.end()); + + while (sq_it != storeQueue.end() && (*sq_it).inst) { + cprintf("[sn:%lli]\nPC:%#x\nSize:%i\nCommitted:%i\nCompleted:%i\ncanWB:%i\n", + (*sq_it).inst->seqNum, + (*sq_it).inst->readPC(), + (*sq_it).size, + (*sq_it).committed, + (*sq_it).completed, + (*sq_it).canWB); + + sq_it--; + } + + cprintf("\n"); +} + +template +void +OzoneLWLSQ::completeStore(int store_idx) +{ + SQHashIt sq_hash_it = SQItHash.find(store_idx); + assert(sq_hash_it != SQItHash.end()); + SQIt sq_it = (*sq_hash_it).second; + + assert((*sq_it).inst); + (*sq_it).completed = true; + DynInstPtr inst = (*sq_it).inst; + + --storesToWB; + + if (isStalled() && + inst->seqNum == stallingStoreIsn) { + DPRINTF(OzoneLSQ, "Unstalling, stalling store [sn:%lli] " + "load [sn:%lli]\n", + stallingStoreIsn, (*stallingLoad)->seqNum); + stalled = false; + stallingStoreIsn = 0; + be->replayMemInst((*stallingLoad)); + } + + DPRINTF(OzoneLSQ, "Completing store idx:%i [sn:%lli], storesToWB:%i\n", + inst->sqIdx, inst->seqNum, storesToWB); + + assert(!storeQueue.empty()); + SQItHash.erase(sq_hash_it); + SQIndices.push(inst->sqIdx); + storeQueue.erase(sq_it); + --stores; + + inst->setCompleted(); + if (cpu->checker) { + cpu->checker->tick(inst); + } +} + +template +void +OzoneLWLSQ::switchOut() +{ + assert(storesToWB == 0); + switchedOut = true; + SQIt sq_it = --(storeQueue.end()); + while (storesToWB > 0 && + sq_it != storeQueue.end() && + (*sq_it).inst && + (*sq_it).canWB) { + + DynInstPtr inst = (*sq_it).inst; + + if ((*sq_it).size == 0 && !(*sq_it).completed) { + sq_it--; + continue; + } + + // Store conditionals don't complete until *after* they have written + // back. If it's here and not yet sent to memory, then don't bother + // as it's not part of committed state. + if (inst->isDataPrefetch() || (*sq_it).committed) { + sq_it--; + continue; + } else if ((*sq_it).req->flags & LOCKED) { + sq_it--; + assert(!(*sq_it).canWB || + ((*sq_it).canWB && (*sq_it).req->flags & LOCKED)); + continue; + } + + assert((*sq_it).req); + assert(!(*sq_it).committed); + + MemReqPtr req = (*sq_it).req; + (*sq_it).committed = true; + + req->cmd = Write; + req->completionEvent = NULL; + req->time = curTick; + assert(!req->data); + req->data = new uint8_t[64]; + memcpy(req->data, (uint8_t *)&(*sq_it).data, req->size); + + DPRINTF(OzoneLSQ, "Switching out : Writing back store idx:%i PC:%#x " + "to Addr:%#x, data:%#x directly to memory [sn:%lli]\n", + inst->sqIdx,inst->readPC(), + req->paddr, *(req->data), + inst->seqNum); + + switch((*sq_it).size) { + case 1: + cpu->write(req, (uint8_t &)(*sq_it).data); + break; + case 2: + cpu->write(req, (uint16_t &)(*sq_it).data); + break; + case 4: + cpu->write(req, (uint32_t &)(*sq_it).data); + break; + case 8: + cpu->write(req, (uint64_t &)(*sq_it).data); + break; + default: + panic("Unexpected store size!\n"); + } + } + + // Clear the queue to free up resources + storeQueue.clear(); + loadQueue.clear(); + loads = stores = storesToWB = 0; +} + +template +void +OzoneLWLSQ::takeOverFrom(ExecContext *old_xc) +{ + // Clear out any old state. May be redundant if this is the first time + // the CPU is being used. + stalled = false; + isLoadBlocked = false; + loadBlockedHandled = false; + switchedOut = false; + + // Could do simple checks here to see if indices are on twice + while (!LQIndices.empty()) + LQIndices.pop(); + while (!SQIndices.empty()) + SQIndices.pop(); + + for (int i = 0; i < LQEntries * 2; i++) { + LQIndices.push(i); + SQIndices.push(i); + } + + usedPorts = 0; + + loadFaultInst = storeFaultInst = memDepViolator = NULL; + + blockedLoadSeqNum = 0; +} diff --git a/src/cpu/ozone/null_predictor.hh b/src/cpu/ozone/null_predictor.hh new file mode 100644 index 000000000..d19e2cd1c --- /dev/null +++ b/src/cpu/ozone/null_predictor.hh @@ -0,0 +1,76 @@ + +#ifndef __CPU_OZONE_NULL_PREDICTOR_HH__ +#define __CPU_OZONE_NULL_PREDICTOR_HH__ + +#include "arch/isa_traits.hh" +#include "cpu/inst_seq.hh" + +template +class NullPredictor +{ + public: + typedef typename Impl::Params Params; + typedef typename Impl::DynInstPtr DynInstPtr; + + NullPredictor(Params *p) { } + + struct BPredInfo { + BPredInfo() + : PC(0), nextPC(0) + { } + + BPredInfo(const Addr &pc, const Addr &next_pc) + : PC(pc), nextPC(next_pc) + { } + + Addr PC; + Addr nextPC; + }; + + BPredInfo lookup(Addr &PC) { return BPredInfo(PC, PC+4); } + + void undo(BPredInfo &bp_info) { return; } + + /** + * Predicts whether or not the instruction is a taken branch, and the + * target of the branch if it is taken. + * @param inst The branch instruction. + * @param PC The predicted PC is passed back through this parameter. + * @param tid The thread id. + * @return Returns if the branch is taken or not. + */ + bool predict(DynInstPtr &inst, Addr &PC, unsigned tid) + { return false; } + + /** + * Tells the branch predictor to commit any updates until the given + * sequence number. + * @param done_sn The sequence number to commit any older updates up until. + * @param tid The thread id. + */ + void update(const InstSeqNum &done_sn, unsigned tid) { } + + /** + * Squashes all outstanding updates until a given sequence number. + * @param squashed_sn The sequence number to squash any younger updates up + * until. + * @param tid The thread id. + */ + void squash(const InstSeqNum &squashed_sn, unsigned tid) { } + + /** + * Squashes all outstanding updates until a given sequence number, and + * corrects that sn's update with the proper address and taken/not taken. + * @param squashed_sn The sequence number to squash any younger updates up + * until. + * @param corr_target The correct branch target. + * @param actually_taken The correct branch direction. + * @param tid The thread id. + */ + void squash(const InstSeqNum &squashed_sn, const Addr &corr_target, + bool actually_taken, unsigned tid) + { } + +}; + +#endif // __CPU_OZONE_NULL_PREDICTOR_HH__ diff --git a/src/cpu/ozone/ozone_impl.hh b/src/cpu/ozone/ozone_impl.hh new file mode 100644 index 000000000..d8c545977 --- /dev/null +++ b/src/cpu/ozone/ozone_impl.hh @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_OZONE_OZONE_IMPL_HH__ +#define __CPU_OZONE_OZONE_IMPL_HH__ + +#include "arch/alpha/isa_traits.hh" +#include "cpu/o3/bpred_unit.hh" +#include "cpu/ozone/front_end.hh" +#include "cpu/ozone/inst_queue.hh" +#include "cpu/ozone/lw_lsq.hh" +#include "cpu/ozone/lw_back_end.hh" +#include "cpu/ozone/null_predictor.hh" +#include "cpu/ozone/dyn_inst.hh" +#include "cpu/ozone/simple_params.hh" + +template +class OzoneCPU; + +template +class OzoneDynInst; + +struct OzoneImpl { + typedef SimpleParams Params; + typedef OzoneCPU OzoneCPU; + typedef OzoneCPU FullCPU; + + // Would like to put these into their own area. +// typedef NullPredictor BranchPred; + typedef TwobitBPredUnit BranchPred; + typedef FrontEnd FrontEnd; + // Will need IQ, LSQ eventually + typedef LWBackEnd BackEnd; + + typedef InstQueue InstQueue; + typedef OzoneLWLSQ LdstQueue; + + typedef OzoneDynInst DynInst; + typedef RefCountingPtr DynInstPtr; + + typedef uint64_t IssueStruct; + + enum { + MaxThreads = 1 + }; +}; + +#endif // __CPU_OZONE_OZONE_IMPL_HH__ diff --git a/src/cpu/ozone/rename_table.cc b/src/cpu/ozone/rename_table.cc new file mode 100644 index 000000000..fff41903e --- /dev/null +++ b/src/cpu/ozone/rename_table.cc @@ -0,0 +1,7 @@ + +#include "cpu/ozone/rename_table_impl.hh" +#include "cpu/ozone/ozone_impl.hh" +#include "cpu/ozone/simple_impl.hh" + +template class RenameTable; +template class RenameTable; diff --git a/src/cpu/ozone/rename_table.hh b/src/cpu/ozone/rename_table.hh new file mode 100644 index 000000000..6ee23b21b --- /dev/null +++ b/src/cpu/ozone/rename_table.hh @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_OZONE_RENAME_TABLE_HH__ +#define __CPU_OZONE_RENAME_TABLE_HH__ + +#include "arch/isa_traits.hh" + +/** Rename table that holds the rename of each architectural register to + * producing DynInst. Needs to support copying from one table to another. + */ + +template +class RenameTable { + public: + typedef typename Impl::DynInstPtr DynInstPtr; + + RenameTable(); + + void copyFrom(const RenameTable &table_to_copy); + + DynInstPtr &operator [] (int index) + { return table[index]; } + + DynInstPtr table[TheISA::TotalNumRegs]; +}; + +#endif // __CPU_OZONE_RENAME_TABLE_HH__ diff --git a/src/cpu/ozone/rename_table_impl.hh b/src/cpu/ozone/rename_table_impl.hh new file mode 100644 index 000000000..86fc1cc55 --- /dev/null +++ b/src/cpu/ozone/rename_table_impl.hh @@ -0,0 +1,23 @@ + +#include // Not really sure what to include to get NULL +#include "cpu/ozone/rename_table.hh" + +template +RenameTable::RenameTable() +{ + // Actually should set these to dummy dyn insts that have the initial value + // and force their values to be initialized. This keeps everything the + // same. + for (int i = 0; i < TheISA::TotalNumRegs; ++i) { + table[i] = NULL; + } +} + +template +void +RenameTable::copyFrom(const RenameTable &table_to_copy) +{ + for (int i = 0; i < TheISA::TotalNumRegs; ++i) { + table[i] = table_to_copy.table[i]; + } +} diff --git a/src/cpu/ozone/simple_impl.hh b/src/cpu/ozone/simple_impl.hh new file mode 100644 index 000000000..961bf2ea9 --- /dev/null +++ b/src/cpu/ozone/simple_impl.hh @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_OZONE_SIMPLE_IMPL_HH__ +#define __CPU_OZONE_SIMPLE_IMPL_HH__ + +#include "arch/isa_traits.hh" +#include "cpu/o3/bpred_unit.hh" +#include "cpu/ozone/cpu.hh" +#include "cpu/ozone/front_end.hh" +#include "cpu/ozone/inorder_back_end.hh" +#include "cpu/ozone/null_predictor.hh" +#include "cpu/ozone/dyn_inst.hh" +#include "cpu/ozone/simple_params.hh" + +//template +//class OzoneCPU; + +template +class OzoneDynInst; + +struct SimpleImpl { + typedef SimpleParams Params; + typedef OzoneCPU OzoneCPU; + typedef OzoneCPU FullCPU; + + // Would like to put these into their own area. +// typedef NullPredictor BranchPred; + typedef TwobitBPredUnit BranchPred; + typedef FrontEnd FrontEnd; + // Will need IQ, LSQ eventually + typedef InorderBackEnd BackEnd; + + typedef OzoneDynInst DynInst; + typedef RefCountingPtr DynInstPtr; + + typedef uint64_t IssueStruct; + + enum { + MaxThreads = 1 + }; +}; + +#endif // __CPU_OZONE_SIMPLE_IMPL_HH__ diff --git a/src/cpu/ozone/simple_params.hh b/src/cpu/ozone/simple_params.hh new file mode 100644 index 000000000..647da1781 --- /dev/null +++ b/src/cpu/ozone/simple_params.hh @@ -0,0 +1,165 @@ + + +#ifndef __CPU_OZONE_SIMPLE_PARAMS_HH__ +#define __CPU_OZONE_SIMPLE_PARAMS_HH__ + +#include "cpu/ozone/cpu.hh" + +//Forward declarations +class AlphaDTB; +class AlphaITB; +class FUPool; +class FunctionalMemory; +class MemInterface; +class PageTable; +class Process; +class System; + +/** + * This file defines the parameters that will be used for the OzoneCPU. + * This must be defined externally so that the Impl can have a params class + * defined that it can pass to all of the individual stages. + */ + +class SimpleParams : public BaseCPU::Params +{ + public: + +#if FULL_SYSTEM + AlphaITB *itb; AlphaDTB *dtb; +#else + std::vector workload; +// Process *process; +#endif // FULL_SYSTEM + + //Page Table + PageTable *pTable; + + FunctionalMemory *mem; + + // + // Caches + // + MemInterface *icacheInterface; + MemInterface *dcacheInterface; + + unsigned cachePorts; + unsigned width; + unsigned frontEndWidth; + unsigned backEndWidth; + unsigned backEndSquashLatency; + unsigned backEndLatency; + unsigned maxInstBufferSize; + unsigned numPhysicalRegs; + unsigned maxOutstandingMemOps; + // + // Fetch + // + unsigned decodeToFetchDelay; + unsigned renameToFetchDelay; + unsigned iewToFetchDelay; + unsigned commitToFetchDelay; + unsigned fetchWidth; + + // + // Decode + // + unsigned renameToDecodeDelay; + unsigned iewToDecodeDelay; + unsigned commitToDecodeDelay; + unsigned fetchToDecodeDelay; + unsigned decodeWidth; + + // + // Rename + // + unsigned iewToRenameDelay; + unsigned commitToRenameDelay; + unsigned decodeToRenameDelay; + unsigned renameWidth; + + // + // IEW + // + unsigned commitToIEWDelay; + unsigned renameToIEWDelay; + unsigned issueToExecuteDelay; + unsigned issueWidth; + unsigned executeWidth; + unsigned executeIntWidth; + unsigned executeFloatWidth; + unsigned executeBranchWidth; + unsigned executeMemoryWidth; + FUPool *fuPool; + + // + // Commit + // + unsigned iewToCommitDelay; + unsigned renameToROBDelay; + unsigned commitWidth; + unsigned squashWidth; + + // + // Branch predictor (BP & BTB) + // + unsigned localPredictorSize; + unsigned localCtrBits; + unsigned localHistoryTableSize; + unsigned localHistoryBits; + unsigned globalPredictorSize; + unsigned globalCtrBits; + unsigned globalHistoryBits; + unsigned choicePredictorSize; + unsigned choiceCtrBits; + + unsigned BTBEntries; + unsigned BTBTagSize; + + unsigned RASSize; + + // + // Load store queue + // + unsigned LQEntries; + unsigned SQEntries; + + // + // Memory dependence + // + unsigned SSITSize; + unsigned LFSTSize; + + // + // Miscellaneous + // + unsigned numPhysIntRegs; + unsigned numPhysFloatRegs; + unsigned numIQEntries; + unsigned numROBEntries; + + bool decoupledFrontEnd; + int dispatchWidth; + int wbWidth; + + //SMT Parameters + unsigned smtNumFetchingThreads; + + std::string smtFetchPolicy; + + std::string smtIQPolicy; + unsigned smtIQThreshold; + + std::string smtLSQPolicy; + unsigned smtLSQThreshold; + + std::string smtCommitPolicy; + + std::string smtROBPolicy; + unsigned smtROBThreshold; + + // Probably can get this from somewhere. + unsigned instShiftAmt; +}; + +#endif // __CPU_OZONE_SIMPLE_PARAMS_HH__ diff --git a/src/cpu/ozone/thread_state.hh b/src/cpu/ozone/thread_state.hh new file mode 100644 index 000000000..9b5433815 --- /dev/null +++ b/src/cpu/ozone/thread_state.hh @@ -0,0 +1,182 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_OZONE_THREAD_STATE_HH__ +#define __CPU_OZONE_THREAD_STATE_HH__ + +#include "arch/faults.hh" +#include "arch/isa_traits.hh" +#include "cpu/exec_context.hh" +#include "cpu/thread_state.hh" +#include "sim/process.hh" + +class Event; +//class Process; + +#if FULL_SYSTEM +class EndQuiesceEvent; +class FunctionProfile; +class ProfileNode; +#else +class Process; +class FunctionalMemory; +#endif + +// Maybe this ozone thread state should only really have committed state? +// I need to think about why I'm using this and what it's useful for. Clearly +// has benefits for SMT; basically serves same use as CPUExecContext. +// Makes the ExecContext proxy easier. Gives organization/central access point +// to state of a thread that can be accessed normally (i.e. not in-flight +// stuff within a OoO processor). Does this need an XC proxy within it? +template +struct OzoneThreadState : public ThreadState { + typedef typename ExecContext::Status Status; + typedef typename Impl::FullCPU FullCPU; + typedef TheISA::MiscReg MiscReg; + +#if FULL_SYSTEM + OzoneThreadState(FullCPU *_cpu, int _thread_num, FunctionalMemory *_mem) + : ThreadState(-1, _thread_num, _mem), + inSyscall(0), trapPending(0) + { + memset(®s, 0, sizeof(TheISA::RegFile)); + } +#else + OzoneThreadState(FullCPU *_cpu, int _thread_num, Process *_process, int _asid) + : ThreadState(-1, _thread_num, NULL, _process, _asid), + cpu(_cpu), inSyscall(0), trapPending(0) + { + memset(®s, 0, sizeof(TheISA::RegFile)); + } + + OzoneThreadState(FullCPU *_cpu, int _thread_num, FunctionalMemory *_mem, + int _asid) + : ThreadState(-1, _thread_num, _mem, NULL, _asid), + cpu(_cpu), inSyscall(0), trapPending(0) + { + memset(®s, 0, sizeof(TheISA::RegFile)); + } +#endif + + Status _status; + + Status status() const { return _status; } + + void setStatus(Status new_status) { _status = new_status; } + + RenameTable renameTable; + Addr PC; + Addr nextPC; + + // Current instruction + TheISA::MachInst inst; + + TheISA::RegFile regs; + + typename Impl::FullCPU *cpu; + + bool inSyscall; + + bool trapPending; + + ExecContext *xcProxy; + + ExecContext *getXCProxy() { return xcProxy; } + +#if !FULL_SYSTEM + Fault translateInstReq(Request *req) + { + return process->pTable->translate(req); + } + Fault translateDataReadReq(Request *req) + { + return process->pTable->translate(req); + } + Fault translateDataWriteReq(Request *req) + { + return process->pTable->translate(req); + } +#else + Fault translateInstReq(Request *req) + { + return cpu->itb->translate(req); + } + + Fault translateDataReadReq(Request *req) + { + return cpu->dtb->translate(req, false); + } + + Fault translateDataWriteReq(Request *req) + { + return cpu->dtb->translate(req, true); + } +#endif + + MiscReg readMiscReg(int misc_reg) + { + return regs.readMiscReg(misc_reg); + } + + MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault) + { + return regs.readMiscRegWithEffect(misc_reg, fault, xcProxy); + } + + Fault setMiscReg(int misc_reg, const MiscReg &val) + { + return regs.setMiscReg(misc_reg, val); + } + + Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val) + { + return regs.setMiscRegWithEffect(misc_reg, val, xcProxy); + } + + uint64_t readPC() + { return PC; } + + void setPC(uint64_t val) + { PC = val; } + + uint64_t readNextPC() + { return nextPC; } + + void setNextPC(uint64_t val) + { nextPC = val; } + + bool misspeculating() { return false; } + + void setInst(TheISA::MachInst _inst) { inst = _inst; } + + Counter readFuncExeInst() { return funcExeInst; } + + void setFuncExeInst(Counter new_val) { funcExeInst = new_val; } +}; + +#endif // __CPU_OZONE_THREAD_STATE_HH__ diff --git a/src/cpu/quiesce_event.cc b/src/cpu/quiesce_event.cc new file mode 100644 index 000000000..37814ae09 --- /dev/null +++ b/src/cpu/quiesce_event.cc @@ -0,0 +1,20 @@ + +#include "cpu/exec_context.hh" +#include "cpu/quiesce_event.hh" + +EndQuiesceEvent::EndQuiesceEvent(ExecContext *_xc) + : Event(&mainEventQueue), xc(_xc) +{ +} + +void +EndQuiesceEvent::process() +{ + xc->activate(); +} + +const char* +EndQuiesceEvent::description() +{ + return "End Quiesce Event."; +} diff --git a/src/cpu/quiesce_event.hh b/src/cpu/quiesce_event.hh new file mode 100644 index 000000000..18e88ecce --- /dev/null +++ b/src/cpu/quiesce_event.hh @@ -0,0 +1,23 @@ +#ifndef __CPU_QUIESCE_EVENT_HH__ +#define __CPU_QUIESCE_EVENT_HH__ + +#include "sim/eventq.hh" + +class ExecContext; + +/** Event for timing out quiesce instruction */ +struct EndQuiesceEvent : public Event +{ + /** A pointer to the execution context that is quiesced */ + ExecContext *xc; + + EndQuiesceEvent(ExecContext *_xc); + + /** Event process to occur at interrupt*/ + virtual void process(); + + /** Event description */ + virtual const char *description(); +}; + +#endif // __CPU_QUIESCE_EVENT_HH__ diff --git a/src/cpu/thread_state.hh b/src/cpu/thread_state.hh new file mode 100644 index 000000000..e09cb12fd --- /dev/null +++ b/src/cpu/thread_state.hh @@ -0,0 +1,129 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_THREAD_STATE_HH__ +#define __CPU_THREAD_STATE_HH__ + +#include "cpu/exec_context.hh" + +#if FULL_SYSTEM +class EndQuiesceEvent; +class FunctionProfile; +class ProfileNode; +namespace Kernel { + class Statistics; +}; +#else +class FunctionalMemory; +class Process; +#endif + +/** + * Struct for holding general thread state that is needed across CPU + * models. This includes things such as pointers to the process, + * memory, quiesce events, and certain stats. This can be expanded + * to hold more thread-specific stats within it. + */ +struct ThreadState { +#if FULL_SYSTEM + ThreadState(int _cpuId, int _tid, FunctionalMemory *_mem) + : cpuId(_cpuId), tid(_tid), mem(_mem), lastActivate(0), lastSuspend(0), + profile(NULL), profileNode(NULL), profilePC(0), quiesceEvent(NULL) +#else + ThreadState(int _cpuId, int _tid, FunctionalMemory *_mem, + Process *_process, short _asid) + : cpuId(_cpuId), tid(_tid), mem(_mem), process(_process), asid(_asid) +#endif + { + funcExeInst = 0; + storeCondFailures = 0; + } + + ExecContext::Status status; + + int cpuId; + + // Index of hardware thread context on the CPU that this represents. + int tid; + + Counter numInst; + Stats::Scalar<> numInsts; + Stats::Scalar<> numMemRefs; + + // number of simulated loads + Counter numLoad; + Counter startNumLoad; + + FunctionalMemory *mem; // functional storage for process address space + +#if FULL_SYSTEM + Tick lastActivate; + Tick lastSuspend; + + FunctionProfile *profile; + ProfileNode *profileNode; + Addr profilePC; + + EndQuiesceEvent *quiesceEvent; + + Kernel::Statistics *kernelStats; +#else + Process *process; + + // Address space ID. Note that this is used for TIMING cache + // simulation only; all functional memory accesses should use + // one of the FunctionalMemory pointers above. + short asid; + +#endif + + /** + * Temporary storage to pass the source address from copy_load to + * copy_store. + * @todo Remove this temporary when we have a better way to do it. + */ + Addr copySrcAddr; + /** + * Temp storage for the physical source address of a copy. + * @todo Remove this temporary when we have a better way to do it. + */ + Addr copySrcPhysAddr; + + /* + * number of executed instructions, for matching with syscall trace + * points in EIO files. + */ + Counter funcExeInst; + + // + // Count failed store conditionals so we can warn of apparent + // application deadlock situations. + unsigned storeCondFailures; +}; + +#endif // __CPU_THREAD_STATE_HH__ diff --git a/src/mem/request.hh b/src/mem/request.hh index c69b36c40..468600f3a 100644 --- a/src/mem/request.hh +++ b/src/mem/request.hh @@ -124,6 +124,13 @@ class Request : validCpuAndThreadNums(false) { setPhys(_paddr, _size, _flags); } + Request(int _asid, Addr _vaddr, int _size, int _flags, Addr _pc, + int _cpuNum, int _threadNum) + { + setThreadContext(_cpuNum, _threadNum); + setVirt(_asid, _vaddr, _size, _flags, _pc); + } + /** * Set up CPU and thread numbers. */ void setThreadContext(int _cpuNum, int _threadNum) diff --git a/src/python/m5/objects/FUPool.py b/src/python/m5/objects/FUPool.py new file mode 100644 index 000000000..5eecfd12f --- /dev/null +++ b/src/python/m5/objects/FUPool.py @@ -0,0 +1,8 @@ +from m5 import * +from FullCPU import OpType +from FullCPU import OpDesc +from FullCPU import FUDesc + +class FUPool(SimObject): + type = 'FUPool' + FUList = VectorParam.FUDesc("list of FU's for this pool") diff --git a/src/python/m5/objects/OzoneCPU.py b/src/python/m5/objects/OzoneCPU.py new file mode 100644 index 000000000..3fca61e28 --- /dev/null +++ b/src/python/m5/objects/OzoneCPU.py @@ -0,0 +1,89 @@ +from m5 import * +from BaseCPU import BaseCPU + +class DerivOzoneCPU(BaseCPU): + type = 'DerivOzoneCPU' + + numThreads = Param.Unsigned("number of HW thread contexts") + + if not build_env['FULL_SYSTEM']: + mem = Param.FunctionalMemory(NULL, "memory") + + checker = Param.BaseCPU("Checker CPU") + + width = Param.Unsigned("Width") + frontEndWidth = Param.Unsigned("Front end width") + backEndWidth = Param.Unsigned("Back end width") + backEndSquashLatency = Param.Unsigned("Back end squash latency") + backEndLatency = Param.Unsigned("Back end latency") + maxInstBufferSize = Param.Unsigned("Maximum instruction buffer size") + maxOutstandingMemOps = Param.Unsigned("Maximum number of outstanding memory operations") + decodeToFetchDelay = Param.Unsigned("Decode to fetch delay") + renameToFetchDelay = Param.Unsigned("Rename to fetch delay") + iewToFetchDelay = Param.Unsigned("Issue/Execute/Writeback to fetch " + "delay") + commitToFetchDelay = Param.Unsigned("Commit to fetch delay") + fetchWidth = Param.Unsigned("Fetch width") + + renameToDecodeDelay = Param.Unsigned("Rename to decode delay") + iewToDecodeDelay = Param.Unsigned("Issue/Execute/Writeback to decode " + "delay") + commitToDecodeDelay = Param.Unsigned("Commit to decode delay") + fetchToDecodeDelay = Param.Unsigned("Fetch to decode delay") + decodeWidth = Param.Unsigned("Decode width") + + iewToRenameDelay = Param.Unsigned("Issue/Execute/Writeback to rename " + "delay") + commitToRenameDelay = Param.Unsigned("Commit to rename delay") + decodeToRenameDelay = Param.Unsigned("Decode to rename delay") + renameWidth = Param.Unsigned("Rename width") + + commitToIEWDelay = Param.Unsigned("Commit to " + "Issue/Execute/Writeback delay") + renameToIEWDelay = Param.Unsigned("Rename to " + "Issue/Execute/Writeback delay") + issueToExecuteDelay = Param.Unsigned("Issue to execute delay (internal " + "to the IEW stage)") + issueWidth = Param.Unsigned("Issue width") + executeWidth = Param.Unsigned("Execute width") + executeIntWidth = Param.Unsigned("Integer execute width") + executeFloatWidth = Param.Unsigned("Floating point execute width") + executeBranchWidth = Param.Unsigned("Branch execute width") + executeMemoryWidth = Param.Unsigned("Memory execute width") + + iewToCommitDelay = Param.Unsigned("Issue/Execute/Writeback to commit " + "delay") + renameToROBDelay = Param.Unsigned("Rename to reorder buffer delay") + commitWidth = Param.Unsigned("Commit width") + squashWidth = Param.Unsigned("Squash width") + + localPredictorSize = Param.Unsigned("Size of local predictor") + localCtrBits = Param.Unsigned("Bits per counter") + localHistoryTableSize = Param.Unsigned("Size of local history table") + localHistoryBits = Param.Unsigned("Bits for the local history") + globalPredictorSize = Param.Unsigned("Size of global predictor") + globalCtrBits = Param.Unsigned("Bits per counter") + globalHistoryBits = Param.Unsigned("Bits of history") + choicePredictorSize = Param.Unsigned("Size of choice predictor") + choiceCtrBits = Param.Unsigned("Bits of choice counters") + + BTBEntries = Param.Unsigned("Number of BTB entries") + BTBTagSize = Param.Unsigned("Size of the BTB tags, in bits") + + RASSize = Param.Unsigned("RAS size") + + LQEntries = Param.Unsigned("Number of load queue entries") + SQEntries = Param.Unsigned("Number of store queue entries") + LFSTSize = Param.Unsigned("Last fetched store table size") + SSITSize = Param.Unsigned("Store set ID table size") + + numPhysIntRegs = Param.Unsigned("Number of physical integer registers") + numPhysFloatRegs = Param.Unsigned("Number of physical floating point " + "registers") + numIQEntries = Param.Unsigned("Number of instruction queue entries") + numROBEntries = Param.Unsigned("Number of reorder buffer entries") + + instShiftAmt = Param.Unsigned("Number of bits to shift instructions by") + + function_trace = Param.Bool(False, "Enable function trace") + function_trace_start = Param.Tick(0, "Cycle to start function trace") diff --git a/src/python/m5/objects/SimpleOzoneCPU.py b/src/python/m5/objects/SimpleOzoneCPU.py new file mode 100644 index 000000000..0d6403383 --- /dev/null +++ b/src/python/m5/objects/SimpleOzoneCPU.py @@ -0,0 +1,86 @@ +from m5 import * +from BaseCPU import BaseCPU + +class SimpleOzoneCPU(BaseCPU): + type = 'SimpleOzoneCPU' + + numThreads = Param.Unsigned("number of HW thread contexts") + + if not build_env['FULL_SYSTEM']: + mem = Param.FunctionalMemory(NULL, "memory") + + width = Param.Unsigned("Width") + frontEndWidth = Param.Unsigned("Front end width") + backEndWidth = Param.Unsigned("Back end width") + backEndSquashLatency = Param.Unsigned("Back end squash latency") + backEndLatency = Param.Unsigned("Back end latency") + maxInstBufferSize = Param.Unsigned("Maximum instruction buffer size") + decodeToFetchDelay = Param.Unsigned("Decode to fetch delay") + renameToFetchDelay = Param.Unsigned("Rename to fetch delay") + iewToFetchDelay = Param.Unsigned("Issue/Execute/Writeback to fetch " + "delay") + commitToFetchDelay = Param.Unsigned("Commit to fetch delay") + fetchWidth = Param.Unsigned("Fetch width") + + renameToDecodeDelay = Param.Unsigned("Rename to decode delay") + iewToDecodeDelay = Param.Unsigned("Issue/Execute/Writeback to decode " + "delay") + commitToDecodeDelay = Param.Unsigned("Commit to decode delay") + fetchToDecodeDelay = Param.Unsigned("Fetch to decode delay") + decodeWidth = Param.Unsigned("Decode width") + + iewToRenameDelay = Param.Unsigned("Issue/Execute/Writeback to rename " + "delay") + commitToRenameDelay = Param.Unsigned("Commit to rename delay") + decodeToRenameDelay = Param.Unsigned("Decode to rename delay") + renameWidth = Param.Unsigned("Rename width") + + commitToIEWDelay = Param.Unsigned("Commit to " + "Issue/Execute/Writeback delay") + renameToIEWDelay = Param.Unsigned("Rename to " + "Issue/Execute/Writeback delay") + issueToExecuteDelay = Param.Unsigned("Issue to execute delay (internal " + "to the IEW stage)") + issueWidth = Param.Unsigned("Issue width") + executeWidth = Param.Unsigned("Execute width") + executeIntWidth = Param.Unsigned("Integer execute width") + executeFloatWidth = Param.Unsigned("Floating point execute width") + executeBranchWidth = Param.Unsigned("Branch execute width") + executeMemoryWidth = Param.Unsigned("Memory execute width") + + iewToCommitDelay = Param.Unsigned("Issue/Execute/Writeback to commit " + "delay") + renameToROBDelay = Param.Unsigned("Rename to reorder buffer delay") + commitWidth = Param.Unsigned("Commit width") + squashWidth = Param.Unsigned("Squash width") + + localPredictorSize = Param.Unsigned("Size of local predictor") + localCtrBits = Param.Unsigned("Bits per counter") + localHistoryTableSize = Param.Unsigned("Size of local history table") + localHistoryBits = Param.Unsigned("Bits for the local history") + globalPredictorSize = Param.Unsigned("Size of global predictor") + globalCtrBits = Param.Unsigned("Bits per counter") + globalHistoryBits = Param.Unsigned("Bits of history") + choicePredictorSize = Param.Unsigned("Size of choice predictor") + choiceCtrBits = Param.Unsigned("Bits of choice counters") + + BTBEntries = Param.Unsigned("Number of BTB entries") + BTBTagSize = Param.Unsigned("Size of the BTB tags, in bits") + + RASSize = Param.Unsigned("RAS size") + + LQEntries = Param.Unsigned("Number of load queue entries") + SQEntries = Param.Unsigned("Number of store queue entries") + LFSTSize = Param.Unsigned("Last fetched store table size") + SSITSize = Param.Unsigned("Store set ID table size") + + numPhysIntRegs = Param.Unsigned("Number of physical integer registers") + numPhysFloatRegs = Param.Unsigned("Number of physical floating point " + "registers") + numIQEntries = Param.Unsigned("Number of instruction queue entries") + numROBEntries = Param.Unsigned("Number of reorder buffer entries") + + instShiftAmt = Param.Unsigned("Number of bits to shift instructions by") + + function_trace = Param.Bool(False, "Enable function trace") + function_trace_start = Param.Tick(0, "Cycle to start function trace")