Yet another merge with the main repository.

[gem5.git] / src / cpu / inorder / resources / cache_unit.cc
diff --git a/src/cpu/inorder/resources/cache_unit.cc b/src/cpu/inorder/resources/cache_unit.cc

index 2ab9e889ec6ed3815bf303c2bb275741ca51ed4c..0ab9f0579a084eb0153bb8f48d047e288f5e4350 100644 (file)
--- a/src/cpu/inorder/resources/cache_unit.cc
+++ b/src/cpu/inorder/resources/cache_unit.cc
@@ -29,24 +29,33 @@
   *
   */
  
-#include <vector>
  #include <list>
+#include <vector>
  
  #include "arch/isa_traits.hh"
  #include "arch/locked_mem.hh"
-#include "arch/utility.hh"
  #include "arch/predecoder.hh"
+#include "arch/utility.hh"
  #include "config/the_isa.hh"
  #include "cpu/inorder/resources/cache_unit.hh"
-#include "cpu/inorder/pipeline_traits.hh"
  #include "cpu/inorder/cpu.hh"
+#include "cpu/inorder/pipeline_traits.hh"
  #include "cpu/inorder/resource_pool.hh"
+#include "debug/Activity.hh"
+#include "debug/AddrDep.hh"
+#include "debug/InOrderCachePort.hh"
+#include "debug/InOrderStall.hh"
+#include "debug/InOrderTLB.hh"
+#include "debug/LLSC.hh"
+#include "debug/RefCount.hh"
+#include "debug/ThreadModel.hh"
  #include "mem/request.hh"
  
  using namespace std;
  using namespace TheISA;
  using namespace ThePipeline;
  
+#if TRACING_ON
  static std::string
  printMemData(uint8_t *data, unsigned size)
  {
@@ -56,33 +65,40 @@ printMemData(uint8_t *data, unsigned size)
      }
      return dataStr.str();
  }
+#endif
  
  Tick
  CacheUnit::CachePort::recvAtomic(PacketPtr pkt)
  {
-    panic("CacheUnit::CachePort doesn't expect recvAtomic callback!");
-    return curTick;
+    panic("%s doesn't expect recvAtomic callback!", cachePortUnit->name());
+    return curTick();
  }
  
  void
  CacheUnit::CachePort::recvFunctional(PacketPtr pkt)
  {
-    panic("CacheUnit::CachePort doesn't expect recvFunctional callback!");
+    DPRINTF(InOrderCachePort, "Doesn't update state on a recvFunctional."
+            "Ignoring packet for %x.\n", pkt->getAddr());
  }
  
  void
-CacheUnit::CachePort::recvStatusChange(Status status)
+CacheUnit::CachePort::recvRangeChange()
  {
-    if (status == RangeChange)
-        return;
-
-    panic("CacheUnit::CachePort doesn't expect recvStatusChange callback!");
  }
  
  bool
  CacheUnit::CachePort::recvTiming(Packet *pkt)
  {
-    cachePortUnit->processCacheCompletion(pkt);
+    if (pkt->isError())
+        DPRINTF(InOrderCachePort, "Got error packet back for address: %x\n",
+                pkt->getAddr());
+    else if (pkt->isResponse())
+        cachePortUnit->processCacheCompletion(pkt);
+    else {
+        //@note: depending on consistency model, update here
+        DPRINTF(InOrderCachePort, "Received snoop pkt %x,Ignoring\n", pkt->getAddr());
+    }
+
      return true;
  }
  
@@ -95,7 +111,7 @@ CacheUnit::CachePort::recvRetry()
  CacheUnit::CacheUnit(string res_name, int res_id, int res_width,
          int res_latency, InOrderCPU *_cpu, ThePipeline::Params *params)
      : Resource(res_name, res_id, res_width, res_latency, _cpu),
-      cachePortBlocked(false), predecoder(NULL)
+      cachePortBlocked(false)
  {
      cachePort = new CachePort(this);
  
@@ -109,6 +125,7 @@ CacheUnit::CacheUnit(string res_name, int res_id, int res_width,
  
      for (int i=0; i < MaxThreads; i++) {
          tlbBlocked[i] = false;
+        tlbBlockSeqNum[i] = 0;
      }
  }
  
@@ -131,9 +148,12 @@ CacheUnit::getPort(const string &if_name, int idx)
  void
  CacheUnit::init()
  {
-    // Currently Used to Model TLB Latency. Eventually
-    // Switch to Timing TLB translations.
-    resourceEvent = new CacheUnitEvent[width];
+    for (int i = 0; i < width; i++) {
+        reqs[i] = new CacheRequest(this);
+    }
+
+    cacheBlkSize = this->cachePort->peerBlockSize();
+    cacheBlkMask = cacheBlkSize  - 1;
  
      initSlots();
  }
@@ -142,8 +162,7 @@ int
  CacheUnit::getSlot(DynInstPtr inst)
  {
      ThreadID tid = inst->readTid();
-    
-    if (tlbBlocked[inst->threadNumber]) {
+    if (tlbBlocked[tid]) {
          return -1;
      }
  
@@ -154,40 +173,11 @@ CacheUnit::getSlot(DynInstPtr inst)
                "cache access\n", inst->readTid(), inst->seqNum);
      }
  
-    Addr req_addr = inst->getMemAddr();
-
-    if (resName == "icache_port" ||
-        find(addrList[tid].begin(), addrList[tid].end(), req_addr) == 
-        addrList[tid].end()) {
-
-        int new_slot = Resource::getSlot(inst);
-
-        if (new_slot == -1)
-            return -1;
-
-        inst->memTime = curTick;
-        setAddrDependency(inst);            
-        return new_slot;
-    } else {
-        // Allow same instruction multiple accesses to same address
-        // should only happen maybe after a squashed inst. needs to replay
-        if (addrMap[tid][req_addr] == inst->seqNum) {
-            int new_slot = Resource::getSlot(inst);
-        
-            if (new_slot == -1)
-                return -1;     
-
-            return new_slot;       
-        } else {                    
-            DPRINTF(InOrderCachePort,
-                "[tid:%i] Denying request because there is an outstanding"
-                " request to/for addr. %08p. by [sn:%i] @ tick %i\n",
-                inst->readTid(), req_addr, addrMap[tid][req_addr], inst->memTime);
-            return -1;
-        }        
-    }
-
-    return -1;   
+    int new_slot = Resource::getSlot(inst);
+    inst->memTime = curTick();
+    //@note: add back in if you want speculative loads/store capability
+    //setAddrDependency(inst);
+    return new_slot;
  }
  
  void
@@ -200,16 +190,17 @@ CacheUnit::setAddrDependency(DynInstPtr inst)
      addrMap[tid][req_addr] = inst->seqNum;
  
      DPRINTF(AddrDep,
-            "[tid:%i]: [sn:%i]: Address %08p added to dependency list\n",
-            inst->readTid(), inst->seqNum, req_addr);
+            "[tid:%i]: [sn:%i]: Address %08p added to dependency list (size=%i)\n",
+            inst->readTid(), inst->seqNum, req_addr, addrList[tid].size());
  
-    //@NOTE: 10 is an arbitrarily "high" number here, but to be exact
+    //@NOTE: 10 is an arbitrarily "high" number, but to be exact
      //       we would need to know the # of outstanding accesses
      //       a priori. Information like fetch width, stage width,
-    //       and the branch resolution stage would be useful for the
-    //       icache_port (among other things). For the dcache, the #
-    //       of outstanding cache accesses might be sufficient.
-    assert(addrList[tid].size() < 10);    
+    //       fetch buffer, and the branch resolution stage would be
+    //       useful for the icache_port. For the dcache port, the #
+    //       of outstanding cache accesses (mshrs) would be a good
+    //       sanity check here.
+    //assert(addrList[tid].size() < 10);
  }
  
  void
@@ -222,17 +213,17 @@ CacheUnit::removeAddrDependency(DynInstPtr inst)
      inst->unsetMemAddr();
  
      // Erase from Address List
-    vector<Addr>::iterator vect_it = find(addrList[tid].begin(),
+    std::list<Addr>::iterator list_it = find(addrList[tid].begin(),
                                            addrList[tid].end(),
                                            mem_addr);
-    assert(vect_it != addrList[tid].end() || inst->splitInst);
+    assert(list_it != addrList[tid].end() || inst->splitInst);
  
-    if (vect_it != addrList[tid].end()) {
+    if (list_it != addrList[tid].end()) {
          DPRINTF(AddrDep,
                  "[tid:%i]: [sn:%i] Address %08p removed from dependency "
-                "list\n", inst->readTid(), inst->seqNum, (*vect_it));
+                "list\n", inst->readTid(), inst->seqNum, (*list_it));
  
-        addrList[tid].erase(vect_it);
+        addrList[tid].erase(list_it);
  
          // Erase From Address Map (Used for Debugging)
          addrMap[tid].erase(addrMap[tid].find(mem_addr));
@@ -244,42 +235,34 @@ CacheUnit::removeAddrDependency(DynInstPtr inst)
  ResReqPtr
  CacheUnit::findRequest(DynInstPtr inst)
  {
-    map<int, ResReqPtr>::iterator map_it = reqMap.begin();
-    map<int, ResReqPtr>::iterator map_end = reqMap.end();
-
-    while (map_it != map_end) {
+    for (int i = 0; i < width; i++) {
          CacheRequest* cache_req =
-            dynamic_cast<CacheRequest*>((*map_it).second);
+            dynamic_cast<CacheRequest*>(reqs[i]);
          assert(cache_req);
  
-        if (cache_req &&
+        if (cache_req->valid &&
              cache_req->getInst() == inst &&
-            cache_req->instIdx == inst->resSched.top()->idx) {
+            cache_req->instIdx == inst->curSkedEntry->idx) {
              return cache_req;
          }
-        map_it++;
      }
  
      return NULL;
  }
  
  ResReqPtr
-CacheUnit::findSplitRequest(DynInstPtr inst, int idx)
+CacheUnit::findRequest(DynInstPtr inst, int idx)
  {
-    map<int, ResReqPtr>::iterator map_it = reqMap.begin();
-    map<int, ResReqPtr>::iterator map_end = reqMap.end();
-
-    while (map_it != map_end) {
+    for (int i = 0; i < width; i++) {
          CacheRequest* cache_req =
-            dynamic_cast<CacheRequest*>((*map_it).second);
+            dynamic_cast<CacheRequest*>(reqs[i]);
          assert(cache_req);
  
-        if (cache_req &&
+        if (cache_req->valid &&
              cache_req->getInst() == inst &&
              cache_req->instIdx == idx) {
              return cache_req;
          }
-        map_it++;
      }
  
      return NULL;
@@ -290,7 +273,8 @@ ResReqPtr
  CacheUnit::getRequest(DynInstPtr inst, int stage_num, int res_idx,
                       int slot_num, unsigned cmd)
  {
-    ScheduleEntry* sched_entry = inst->resSched.top();
+    ScheduleEntry* sched_entry = *inst->curSkedEntry;
+    CacheRequest* cache_req = dynamic_cast<CacheRequest*>(reqs[slot_num]);
  
      if (!inst->validMemAddr()) {
          panic("Mem. Addr. must be set before requesting cache access\n");
@@ -332,23 +316,15 @@ CacheUnit::getRequest(DynInstPtr inst, int stage_num, int res_idx,
                  inst->readTid(), inst->seqNum, inst->getMemAddr());
          break;
  
-      case InitiateFetch:
-        pkt_cmd = MemCmd::ReadReq;
-
-        DPRINTF(InOrderCachePort,
-                "[tid:%i]: Fetch request from [sn:%i] for addr %08p\n",
-                inst->readTid(), inst->seqNum, inst->getMemAddr());
-        break;
-
        default:
-        panic("%i: Unexpected request type (%i) to %s", curTick,
+        panic("%i: Unexpected request type (%i) to %s", curTick(),
                sched_entry->cmd, name());
      }
  
-    return new CacheRequest(this, inst, stage_num, id, slot_num,
-                            sched_entry->cmd, 0, pkt_cmd,
-                            0/*flags*/, this->cpu->readCpuId(),
-                            inst->resSched.top()->idx);
+    cache_req->setRequest(inst, stage_num, id, slot_num,
+                          sched_entry->cmd, pkt_cmd,
+                          inst->curSkedEntry->idx);
+    return cache_req;
  }
  
  void
@@ -359,17 +335,17 @@ CacheUnit::requestAgain(DynInstPtr inst, bool &service_request)
  
      // Check to see if this instruction is requesting the same command
      // or a different one
-    if (cache_req->cmd != inst->resSched.top()->cmd &&
-        cache_req->instIdx == inst->resSched.top()->idx) {
+    if (cache_req->cmd != inst->curSkedEntry->cmd &&
+        cache_req->instIdx == inst->curSkedEntry->idx) {
          // If different, then update command in the request
-        cache_req->cmd = inst->resSched.top()->cmd;
+        cache_req->cmd = inst->curSkedEntry->cmd;
          DPRINTF(InOrderCachePort,
                  "[tid:%i]: [sn:%i]: Updating the command for this "
-                "instruction\n ", inst->readTid(), inst->seqNum);
+                "instruction\n", inst->readTid(), inst->seqNum);
  
          service_request = true;
-    } else if (inst->resSched.top()->idx != CacheUnit::InitSecondSplitRead &&
-               inst->resSched.top()->idx != CacheUnit::InitSecondSplitWrite) {        
+    } else if (inst->curSkedEntry->idx != CacheUnit::InitSecondSplitRead &&
+               inst->curSkedEntry->idx != CacheUnit::InitSecondSplitWrite) {
          // If same command, just check to see if memory access was completed
          // but dont try to re-execute
          DPRINTF(InOrderCachePort,
@@ -380,72 +356,89 @@ CacheUnit::requestAgain(DynInstPtr inst, bool &service_request)
      }
  }
  
-Fault
-CacheUnit::doTLBAccess(DynInstPtr inst, CacheReqPtr cache_req, int acc_size,
-                       int flags, TheISA::TLB::Mode tlb_mode)
+void
+CacheUnit::setupMemRequest(DynInstPtr inst, CacheReqPtr cache_req,
+                           int acc_size, int flags)
  {
      ThreadID tid = inst->readTid();
      Addr aligned_addr = inst->getMemAddr();
-    unsigned stage_num = cache_req->getStageNum();
-    unsigned slot_idx = cache_req->getSlot();
  
-    if (tlb_mode == TheISA::TLB::Execute) {
-            inst->fetchMemReq = new Request(inst->readTid(), aligned_addr,
-                                            acc_size, flags, inst->readPC(),
-                                            cpu->readCpuId(), inst->readTid());
-            cache_req->memReq = inst->fetchMemReq;
+    if (!cache_req->is2ndSplit()) {
+        if (cache_req->memReq == NULL) {
+            cache_req->memReq =
+                new Request(cpu->asid[tid], aligned_addr, acc_size, flags,
+                            inst->instAddr(),
+                            cpu->readCpuId(), //@todo: use context id
+                            tid);
+        }
      } else {
-        if (!cache_req->is2ndSplit()) {            
-            inst->dataMemReq = new Request(cpu->asid[tid], aligned_addr,
-                                           acc_size, flags, inst->readPC(),
-                                           cpu->readCpuId(), inst->readTid());
-            cache_req->memReq = inst->dataMemReq;
-        } else {
-            assert(inst->splitInst);
-            
+        assert(inst->splitInst);
+
+        if (inst->splitMemReq == NULL) {
              inst->splitMemReq = new Request(cpu->asid[tid], 
                                              inst->split2ndAddr,
                                              acc_size, 
                                              flags, 
-                                            inst->readPC(),
+                                            inst->instAddr(),
                                              cpu->readCpuId(), 
                                              tid);
-            cache_req->memReq = inst->splitMemReq;            
          }
+
+        cache_req->memReq = inst->splitMemReq;
      }
-    
+}
+
+void
+CacheUnit::doTLBAccess(DynInstPtr inst, CacheReqPtr cache_req, int acc_size,
+                       int flags, TheISA::TLB::Mode tlb_mode)
+{
+    ThreadID tid = inst->readTid();
+
+    setupMemRequest(inst, cache_req, acc_size, flags);
+
+    //@todo: HACK: the DTB expects the correct PC in the ThreadContext
+    //       but how if the memory accesses are speculative? Shouldn't
+    //       we send along the requestor's PC to the translate functions?
+    ThreadContext *tc = cpu->thread[tid]->getTC();
+    PCState old_pc = tc->pcState();
+    tc->pcState() = inst->pcState();
  
-    cache_req->fault =
-        _tlb->translateAtomic(cache_req->memReq,
-                              cpu->thread[tid]->getTC(), tlb_mode);
+    inst->fault =
+        _tlb->translateAtomic(cache_req->memReq, tc, tlb_mode);
+    tc->pcState() = old_pc;
  
-    if (cache_req->fault != NoFault) {
+    if (inst->fault != NoFault) {
          DPRINTF(InOrderTLB, "[tid:%i]: %s encountered while translating "
-                "addr:%08p for [sn:%i].\n", tid, cache_req->fault->name(),
+                "addr:%08p for [sn:%i].\n", tid, inst->fault->name(),
                  cache_req->memReq->getVaddr(), inst->seqNum);
  
-        cpu->pipelineStage[stage_num]->setResStall(cache_req, tid);
-
          tlbBlocked[tid] = true;
+        tlbBlockSeqNum[tid] = inst->seqNum;
  
-        cache_req->tlbStall = true;
-
-        scheduleEvent(slot_idx, 1);
+        // Make sure nothing gets executed until after this faulting
+        // instruction gets handled.
+        inst->setSerializeAfter();
  
-        cpu->trap(cache_req->fault, tid);
+        // Mark it as complete so it can pass through next stage.
+        // Fault Handling will happen at commit/graduation
+        cache_req->setCompleted();
      } else {
          DPRINTF(InOrderTLB, "[tid:%i]: [sn:%i] virt. addr %08p translated "
                  "to phys. addr:%08p.\n", tid, inst->seqNum,
                  cache_req->memReq->getVaddr(),
                  cache_req->memReq->getPaddr());
      }
+}
  
-    return cache_req->fault;
+void
+CacheUnit::trap(Fault fault, ThreadID tid, DynInstPtr inst)
+{
+    tlbBlocked[tid] = false;
  }
  
-template <class T>
  Fault
-CacheUnit::read(DynInstPtr inst, Addr addr, T &data, unsigned flags)
+CacheUnit::read(DynInstPtr inst, Addr addr,
+                uint8_t *data, unsigned size, unsigned flags)
  {
      CacheReqPtr cache_req = dynamic_cast<CacheReqPtr>(findRequest(inst));
      assert(cache_req && "Can't Find Instruction for Read!");
@@ -454,14 +447,15 @@ CacheUnit::read(DynInstPtr inst, Addr addr, T &data, unsigned flags)
      unsigned blockSize = this->cachePort->peerBlockSize();
  
      //The size of the data we're trying to read.
-    int dataSize = sizeof(T);
+    int fullSize = size;
+    inst->totalSize = size;
  
      if (inst->traceData) {
          inst->traceData->setAddr(addr);
      }
  
      if (inst->split2ndAccess) {     
-        dataSize = inst->split2ndSize;
+        size = inst->split2ndSize;
          cache_req->splitAccess = true;        
          cache_req->split2ndAccess = true;
          
@@ -473,66 +467,55 @@ CacheUnit::read(DynInstPtr inst, Addr addr, T &data, unsigned flags)
  
      //The address of the second part of this access if it needs to be split
      //across a cache line boundary.
-    Addr secondAddr = roundDown(addr + dataSize - 1, blockSize);
+    Addr secondAddr = roundDown(addr + size - 1, blockSize);
  
      
      if (secondAddr > addr && !inst->split2ndAccess) {
-        DPRINTF(InOrderCachePort, "%i: sn[%i] Split Read Access (1 of 2) for "
-                "(%#x, %#x).\n", curTick, inst->seqNum, addr, secondAddr);
-        
-        // Save All "Total" Split Information
-        // ==============================
-        inst->splitInst = true;        
-        inst->splitMemData = new uint8_t[dataSize];
-        inst->splitTotalSize = dataSize;
-        
-        if (!inst->splitInstSked) {
-            // Schedule Split Read/Complete for Instruction
+
+        if (!inst->splitInst) {
+            DPRINTF(InOrderCachePort, "%i: sn[%i] Split Read Access (1 of 2) for "
+                    "(%#x, %#x).\n", curTick(), inst->seqNum, addr, secondAddr);
+
+            unsigned stage_num = cache_req->getStageNum();
+            unsigned cmd = inst->curSkedEntry->cmd;
+
+            // 1. Make A New Inst. Schedule w/Split Read/Complete Entered on
+            // the schedule
              // ==============================
-            int stage_num = cache_req->getStageNum();
-        
-            int stage_pri = ThePipeline::getNextPriority(inst, stage_num);
-        
-            int isplit_cmd = CacheUnit::InitSecondSplitRead;
-            inst->resSched.push(new
-                                ScheduleEntry(stage_num,
-                                              stage_pri,
-                                              cpu->resPool->getResIdx(DCache),
-                                              isplit_cmd,
-                                              1));
-
-            int csplit_cmd = CacheUnit::CompleteSecondSplitRead;
-            inst->resSched.push(new
-                                ScheduleEntry(stage_num + 1,
-                                              1/*stage_pri*/,
-                                              cpu->resPool->getResIdx(DCache),
-                                              csplit_cmd,
-                                              1));
-            inst->splitInstSked = true;
+            // 2. Reassign curSkedPtr to current command (InitiateRead) on new
+            // schedule
+            // ==============================
+            inst->splitInst = true;
+            inst->setBackSked(cpu->createBackEndSked(inst));
+            inst->curSkedEntry = inst->backSked->find(stage_num, cmd);
          } else {
              DPRINTF(InOrderCachePort, "[tid:%i] [sn:%i] Retrying Split Read "
                      "Access (1 of 2) for (%#x, %#x).\n", inst->readTid(),
                      inst->seqNum, addr, secondAddr);
          }
  
+        // Save All "Total" Split Information
+        // ==============================
+        inst->splitMemData = new uint8_t[size];
+
          // Split Information for First Access
          // ==============================
-        dataSize = secondAddr - addr;
+        size = secondAddr - addr;
          cache_req->splitAccess = true;
  
          // Split Information for Second Access
          // ==============================
-        inst->split2ndSize = addr + sizeof(T) - secondAddr;
+        inst->split2ndSize = addr + fullSize - secondAddr;
          inst->split2ndAddr = secondAddr;            
-        inst->split2ndDataPtr = inst->splitMemData + dataSize;            
+        inst->split2ndDataPtr = inst->splitMemData + size;
          inst->split2ndFlags = flags;        
      }
      
-    doTLBAccess(inst, cache_req, dataSize, flags, TheISA::TLB::Read);
+    doTLBAccess(inst, cache_req, size, flags, TheISA::TLB::Read);
  
-    if (cache_req->fault == NoFault) {
+    if (inst->fault == NoFault) {
          if (!cache_req->splitAccess) {            
-            cache_req->reqData = new uint8_t[dataSize];
+            cache_req->reqData = new uint8_t[size];
              doCacheAccess(inst, NULL);
          } else {
              if (!inst->split2ndAccess) {                
@@ -545,13 +528,12 @@ CacheUnit::read(DynInstPtr inst, Addr addr, T &data, unsigned flags)
          }        
      }
  
-    return cache_req->fault;
+    return inst->fault;
  }
  
-template <class T>
  Fault
-CacheUnit::write(DynInstPtr inst, T data, Addr addr, unsigned flags,
-            uint64_t *write_res)
+CacheUnit::write(DynInstPtr inst, uint8_t *data, unsigned size,
+                 Addr addr, unsigned flags, uint64_t *write_res)
  {
      CacheReqPtr cache_req = dynamic_cast<CacheReqPtr>(findRequest(inst));
      assert(cache_req && "Can't Find Instruction for Write!");
@@ -559,16 +541,16 @@ CacheUnit::write(DynInstPtr inst, T data, Addr addr, unsigned flags,
      // The block size of our peer
      unsigned blockSize = this->cachePort->peerBlockSize();
  
-    //The size of the data we're trying to read.
-    int dataSize = sizeof(T);
+    //The size of the data we're trying to write.
+    int fullSize = size;
+    inst->totalSize = size;
  
      if (inst->traceData) {
          inst->traceData->setAddr(addr);
-        inst->traceData->setData(data);
      }
  
      if (inst->split2ndAccess) {     
-        dataSize = inst->split2ndSize;
+        size = inst->split2ndSize;
          cache_req->splitAccess = true;        
          cache_req->split2ndAccess = true;
          
@@ -579,7 +561,7 @@ CacheUnit::write(DynInstPtr inst, T data, Addr addr, unsigned flags,
  
      //The address of the second part of this access if it needs to be split
      //across a cache line boundary.
-    Addr secondAddr = roundDown(addr + dataSize - 1, blockSize);
+    Addr secondAddr = roundDown(addr + size - 1, blockSize);
  
      if (secondAddr > addr && !inst->split2ndAccess) {
              
@@ -589,30 +571,35 @@ CacheUnit::write(DynInstPtr inst, T data, Addr addr, unsigned flags,
          // Save All "Total" Split Information
          // ==============================
          inst->splitInst = true;        
-        inst->splitTotalSize = dataSize;
  
          if (!inst->splitInstSked) {
+            assert(0 && "Split Requests Not Supported for Now...");
+
              // Schedule Split Read/Complete for Instruction
              // ==============================
              int stage_num = cache_req->getStageNum();
+            RSkedPtr inst_sked = (stage_num >= ThePipeline::BackEndStartStage) ?
+                inst->backSked : inst->frontSked;
          
-            int stage_pri = ThePipeline::getNextPriority(inst, stage_num);
+            // this is just an arbitrarily high priority to ensure that this
+            // gets pushed to the back of the list
+            int stage_pri = 20;
          
              int isplit_cmd = CacheUnit::InitSecondSplitWrite;
-            inst->resSched.push(new
-                                ScheduleEntry(stage_num,
-                                              stage_pri,
-                                              cpu->resPool->getResIdx(DCache),
-                                              isplit_cmd,
-                                              1));
+            inst_sked->push(new
+                            ScheduleEntry(stage_num,
+                                          stage_pri,
+                                          cpu->resPool->getResIdx(DCache),
+                                          isplit_cmd,
+                                          1));
  
              int csplit_cmd = CacheUnit::CompleteSecondSplitWrite;
-            inst->resSched.push(new
-                                ScheduleEntry(stage_num + 1,
-                                              1/*stage_pri*/,
-                                              cpu->resPool->getResIdx(DCache),
-                                              csplit_cmd,
-                                              1));
+            inst_sked->push(new
+                            ScheduleEntry(stage_num + 1,
+                                          1/*stage_pri*/,
+                                          cpu->resPool->getResIdx(DCache),
+                                          csplit_cmd,
+                                          1));
              inst->splitInstSked = true;
          } else {
              DPRINTF(InOrderCachePort, "[tid:%i] sn:%i] Retrying Split Read "
@@ -624,25 +611,27 @@ CacheUnit::write(DynInstPtr inst, T data, Addr addr, unsigned flags,
  
          // Split Information for First Access
          // ==============================
-        dataSize = secondAddr - addr;
+        size = secondAddr - addr;
          cache_req->splitAccess = true;
  
          // Split Information for Second Access
          // ==============================
-        inst->split2ndSize = addr + sizeof(T) - secondAddr;
+        inst->split2ndSize = addr + fullSize - secondAddr;
          inst->split2ndAddr = secondAddr;            
-        inst->split2ndStoreDataPtr = &cache_req->inst->storeData;
-        inst->split2ndStoreDataPtr += dataSize;            
          inst->split2ndFlags = flags;        
          inst->splitInstSked = true;
      }    
          
-    doTLBAccess(inst, cache_req, dataSize, flags, TheISA::TLB::Write);
+    doTLBAccess(inst, cache_req, size, flags, TheISA::TLB::Write);
+
+    if (inst->fault == NoFault) {
+        if (!cache_req->splitAccess) {
+            cache_req->reqData = new uint8_t[size];
+            memcpy(cache_req->reqData, data, size);
+
+            //inst->split2ndStoreDataPtr = cache_req->reqData;
+            //inst->split2ndStoreDataPtr += size;
  
-    if (cache_req->fault == NoFault) {
-        if (!cache_req->splitAccess) {            
-            // Remove this line since storeData is saved in INST?
-            cache_req->reqData = new uint8_t[dataSize];
              doCacheAccess(inst, write_res);
          } else {            
              doCacheAccess(inst, write_res, cache_req);            
@@ -650,63 +639,63 @@ CacheUnit::write(DynInstPtr inst, T data, Addr addr, unsigned flags,
          
      }
      
-    return cache_req->fault;
+    return inst->fault;
  }
  
  
  void
  CacheUnit::execute(int slot_num)
  {
-    if (cachePortBlocked) {
+    CacheReqPtr cache_req = dynamic_cast<CacheReqPtr>(reqs[slot_num]);
+    assert(cache_req);
+
+    if (cachePortBlocked &&
+        (cache_req->cmd == InitiateReadData ||
+         cache_req->cmd == InitiateWriteData ||
+         cache_req->cmd == InitSecondSplitRead ||
+         cache_req->cmd == InitSecondSplitWrite)) {
          DPRINTF(InOrderCachePort, "Cache Port Blocked. Cannot Access\n");
+        cache_req->done(false);
          return;
      }
  
-    CacheReqPtr cache_req = dynamic_cast<CacheReqPtr>(reqMap[slot_num]);
-    assert(cache_req);
-
      DynInstPtr inst = cache_req->inst;
+    if (inst->fault != NoFault) {
+        DPRINTF(InOrderCachePort,
+                "[tid:%i]: [sn:%i]: Detected %s fault @ %x. Forwarding to "
+                "next stage.\n", inst->readTid(), inst->seqNum, inst->fault->name(),
+                inst->getMemAddr());
+        finishCacheUnitReq(inst, cache_req);
+        return;
+    }
+
+    if (inst->isSquashed()) {
+        DPRINTF(InOrderCachePort,
+                "[tid:%i]: [sn:%i]: Detected squashed instruction "
+                "next stage.\n", inst->readTid(), inst->seqNum);
+        finishCacheUnitReq(inst, cache_req);
+        return;
+    }
+
  #if TRACING_ON
      ThreadID tid = inst->readTid();
-    int seq_num = inst->seqNum;
      std::string acc_type = "write";
-    
  #endif
  
-    cache_req->fault = NoFault;
-
      switch (cache_req->cmd)
      {
-      case InitiateFetch:
-        {
-            //@TODO: Switch to size of full cache block. Store in fetch buffer
-            int acc_size =  sizeof(TheISA::MachInst);
-
-            doTLBAccess(inst, cache_req, acc_size, 0, TheISA::TLB::Execute);
-
-            // Only Do Access if no fault from TLB
-            if (cache_req->fault == NoFault) {
-
-                DPRINTF(InOrderCachePort,
-                    "[tid:%u]: Initiating fetch access to %s for addr. %08p\n",
-                    tid, name(), cache_req->inst->getMemAddr());
-
-                cache_req->reqData = new uint8_t[acc_size];
-
-                inst->setCurResSlot(slot_num);
-
-                doCacheAccess(inst);
-            }
-
-            break;
-        }
  
        case InitiateReadData:
  #if TRACING_ON
          acc_type = "read";
  #endif        
        case InitiateWriteData:
-            
+        if (cachePortBlocked) {
+            DPRINTF(InOrderCachePort, "Cache Port Blocked. Cannot Access\n");
+            cache_req->done(false);
+            return;
+        }
+
          DPRINTF(InOrderCachePort,
                  "[tid:%u]: [sn:%i] Initiating data %s access to %s for "
                  "addr. %08p\n", tid, inst->seqNum, acc_type, name(),
@@ -729,8 +718,8 @@ CacheUnit::execute(int slot_num)
                  cache_req->inst->split2ndAddr);
          inst->split2ndAccess = true;
          assert(inst->split2ndAddr != 0);
-        read(inst, inst->split2ndAddr, inst->split2ndData,
-             inst->split2ndFlags);
+        read(inst, inst->split2ndAddr, &inst->split2ndData,
+             inst->totalSize, inst->split2ndFlags);
          break;
  
        case InitSecondSplitWrite:
@@ -741,54 +730,20 @@ CacheUnit::execute(int slot_num)
  
          inst->split2ndAccess = true;
          assert(inst->split2ndAddr != 0);
-        write(inst, inst->split2ndAddr, inst->split2ndData,
-              inst->split2ndFlags, NULL);
-        break;
-
-
-      case CompleteFetch:
-        if (cache_req->isMemAccComplete()) {
-            DPRINTF(InOrderCachePort,
-                    "[tid:%i]: Completing Fetch Access for [sn:%i]\n",
-                    tid, inst->seqNum);
-
-
-            DPRINTF(InOrderCachePort, "[tid:%i]: Instruction [sn:%i] is: %s\n",
-                    tid, seq_num, inst->staticInst->disassemble(inst->PC));
-
-            removeAddrDependency(inst);
-            
-            delete cache_req->dataPkt;
-            
-            // Do not stall and switch threads for fetch... for now..
-            // TODO: We need to detect cache misses for latencies > 1
-            // cache_req->setMemStall(false);            
-            
-            cache_req->done();
-        } else {
-            DPRINTF(InOrderCachePort,
-                     "[tid:%i]: [sn:%i]: Unable to Complete Fetch Access\n",
-                    tid, inst->seqNum);
-            DPRINTF(InOrderStall,
-                    "STALL: [tid:%i]: Fetch miss from %08p\n",
-                    tid, cache_req->inst->readPC());
-            cache_req->setCompleted(false);
-            //cache_req->setMemStall(true);            
-        }
+        write(inst, &inst->split2ndData, inst->totalSize,
+              inst->split2ndAddr, inst->split2ndFlags, NULL);
          break;
  
        case CompleteReadData:
-      case CompleteWriteData:
          DPRINTF(InOrderCachePort,
-                "[tid:%i]: [sn:%i]: Trying to Complete Data Access\n",
+                "[tid:%i]: [sn:%i]: Trying to Complete Data Read Access\n",
                  tid, inst->seqNum);
  
-        if (cache_req->isMemAccComplete() ||
-            inst->isDataPrefetch() ||
-            inst->isInstPrefetch()) {
-            removeAddrDependency(inst);
-            cache_req->setMemStall(false);            
-            cache_req->done();
+
+        //@todo: timing translations need to check here...
+        assert(!inst->isInstPrefetch() && "Can't Handle Inst. Prefecthes");
+        if (cache_req->isMemAccComplete() || inst->isDataPrefetch()) {
+            finishCacheUnitReq(inst, cache_req);
          } else {
              DPRINTF(InOrderStall, "STALL: [tid:%i]: Data miss from %08p\n",
                      tid, cache_req->inst->getMemAddr());
@@ -797,17 +752,50 @@ CacheUnit::execute(int slot_num)
          }
          break;
  
+      case CompleteWriteData:
+        {
+            DPRINTF(InOrderCachePort,
+                    "[tid:%i]: [sn:%i]: Trying to Complete Data Write Access\n",
+                    tid, inst->seqNum);
+
+
+            //@todo: check that timing translation is finished here
+            RequestPtr mem_req = cache_req->memReq;
+            if (mem_req->isCondSwap() || mem_req->isLLSC() || mem_req->isSwap()) {
+                DPRINTF(InOrderCachePort, "Detected Conditional Store Inst.\n");
+
+                if (!cache_req->isMemAccComplete()) {
+                    DPRINTF(InOrderStall, "STALL: [tid:%i]: Data miss from %08p\n",
+                            tid, cache_req->inst->getMemAddr());
+                    cache_req->setCompleted(false);
+                    cache_req->setMemStall(true);
+                    return;
+                } else {
+                    DPRINTF(InOrderStall, "Mem Acc Completed\n");
+                }
+            }
+
+            if (cache_req->isMemAccPending()) {
+                DPRINTF(InOrderCachePort, "Store Instruction Pending Completion.\n");
+                cache_req->dataPkt->reqData = cache_req->reqData;
+                cache_req->dataPkt->memReq = cache_req->memReq;
+            } else
+                DPRINTF(InOrderCachePort, "Store Instruction Finished Completion.\n");
+
+            //@todo: if split inst save data
+            finishCacheUnitReq(inst, cache_req);
+        }
+        break;
+
        case CompleteSecondSplitRead:
          DPRINTF(InOrderCachePort,
                  "[tid:%i]: [sn:%i]: Trying to Complete Split Data Read "
                  "Access\n", tid, inst->seqNum);
  
-        if (cache_req->isMemAccComplete() ||
-            inst->isDataPrefetch() ||
-            inst->isInstPrefetch()) {
-            removeAddrDependency(inst);
-            cache_req->setMemStall(false);            
-            cache_req->done();
+        //@todo: check that timing translation is finished here
+        assert(!inst->isInstPrefetch() && "Can't Handle Inst. Prefecthes");
+        if (cache_req->isMemAccComplete() || inst->isDataPrefetch()) {
+            finishCacheUnitReq(inst, cache_req);
          } else {
              DPRINTF(InOrderStall, "STALL: [tid:%i]: Data miss from %08p\n",
                      tid, cache_req->inst->split2ndAddr);
@@ -820,19 +808,17 @@ CacheUnit::execute(int slot_num)
          DPRINTF(InOrderCachePort,
                  "[tid:%i]: [sn:%i]: Trying to Complete Split Data Write "
                  "Access\n", tid, inst->seqNum);
+        //@todo: illegal to have a unaligned cond.swap or llsc?
+        assert(!cache_req->memReq->isSwap() && !cache_req->memReq->isCondSwap()
+               && !cache_req->memReq->isLLSC());
  
-        if (cache_req->isMemAccComplete() ||
-            inst->isDataPrefetch() ||
-            inst->isInstPrefetch()) {
-            removeAddrDependency(inst);
-            cache_req->setMemStall(false);            
-            cache_req->done();
-        } else {
-            DPRINTF(InOrderStall, "STALL: [tid:%i]: Data miss from %08p\n",
-                    tid, cache_req->inst->split2ndAddr);
-            cache_req->setCompleted(false);
-            cache_req->setMemStall(true);            
+        if (cache_req->isMemAccPending()) {
+            cache_req->dataPkt->reqData = cache_req->reqData;
+            cache_req->dataPkt->memReq = cache_req->memReq;
          }
+
+        //@todo: check that timing translation is finished here
+        finishCacheUnitReq(inst, cache_req);
          break;
          
        default:
@@ -841,62 +827,17 @@ CacheUnit::execute(int slot_num)
  }
  
  void
-CacheUnit::prefetch(DynInstPtr inst)
+CacheUnit::finishCacheUnitReq(DynInstPtr inst, CacheRequest *cache_req)
  {
-    warn_once("Prefetching currently unimplemented");
-
-    CacheReqPtr cache_req
-        = dynamic_cast<CacheReqPtr>(reqMap[inst->getCurResSlot()]);
-    assert(cache_req);
-
-    // Clean-Up cache resource request so
-    // other memory insts. can use them
-    cache_req->setCompleted();
-    cachePortBlocked = false;
-    cache_req->setMemAccPending(false);
-    cache_req->setMemAccCompleted();
-    inst->unsetMemAddr();
+    //@note: add back in for speculative load/store capability
+    //removeAddrDependency(inst);
+    cache_req->setMemStall(false);
+    cache_req->done();
  }
  
-
  void
-CacheUnit::writeHint(DynInstPtr inst)
-{
-    warn_once("Write Hints currently unimplemented");
-
-    CacheReqPtr cache_req
-        = dynamic_cast<CacheReqPtr>(reqMap[inst->getCurResSlot()]);
-    assert(cache_req);
-
-    // Clean-Up cache resource request so
-    // other memory insts. can use them
-    cache_req->setCompleted();
-    cachePortBlocked = false;
-    cache_req->setMemAccPending(false);
-    cache_req->setMemAccCompleted();
-    inst->unsetMemAddr();
-}
-
-// @TODO: Split into doCacheRead() and doCacheWrite()
-Fault
-CacheUnit::doCacheAccess(DynInstPtr inst, uint64_t *write_res,
-                         CacheReqPtr split_req)
+CacheUnit::buildDataPacket(CacheRequest *cache_req)
  {
-    Fault fault = NoFault;
-#if TRACING_ON
-    ThreadID tid = inst->readTid();
-#endif
-
-    CacheReqPtr cache_req;
-    
-    if (split_req == NULL) {        
-        cache_req = dynamic_cast<CacheReqPtr>(reqMap[inst->getCurResSlot()]);
-    } else{
-        cache_req = split_req;
-    }        
-
-    assert(cache_req);
-
      // Check for LL/SC and if so change command
      if (cache_req->memReq->isLLSC() && cache_req->pktCmd == MemCmd::ReadReq) {
          cache_req->pktCmd = MemCmd::LoadLockedReq;
@@ -913,35 +854,62 @@ CacheUnit::doCacheAccess(DynInstPtr inst, uint64_t *write_res,
                                              cache_req->pktCmd,
                                              Packet::Broadcast,
                                              cache_req->instIdx);
+    DPRINTF(InOrderCachePort, "[slot:%i]: Slot marked for %x\n",
+            cache_req->getSlot(),
+            cache_req->dataPkt->getAddr());
  
-    if (cache_req->dataPkt->isRead()) {
-        cache_req->dataPkt->dataStatic(cache_req->reqData);
-    } else if (cache_req->dataPkt->isWrite()) {        
-        if (inst->split2ndAccess) {            
-            cache_req->dataPkt->dataStatic(inst->split2ndStoreDataPtr);
-        } else {
-            cache_req->dataPkt->dataStatic(&cache_req->inst->storeData);            
-        }
-        
-        if (cache_req->memReq->isCondSwap()) {
-            assert(write_res);
-            cache_req->memReq->setExtraData(*write_res);
-        }
-    }
+    cache_req->dataPkt->hasSlot = true;
+    cache_req->dataPkt->dataStatic(cache_req->reqData);
+}
  
+void
+CacheUnit::doCacheAccess(DynInstPtr inst, uint64_t *write_res,
+                         CacheReqPtr split_req)
+{
+    Fault fault = NoFault;
+#if TRACING_ON
+    ThreadID tid = inst->readTid();
+#endif
      bool do_access = true;  // flag to suppress cache access
  
-    Request *memReq = cache_req->dataPkt->req;
-
-    if (cache_req->dataPkt->isWrite() && cache_req->memReq->isLLSC()) {
-        assert(cache_req->inst->isStoreConditional());
-        DPRINTF(InOrderCachePort, "Evaluating Store Conditional access\n");
-        do_access = TheISA::handleLockedWrite(cpu, memReq);
+    // Special Handling if this is a split request
+    CacheReqPtr cache_req;
+    if (split_req == NULL)
+        cache_req = dynamic_cast<CacheReqPtr>(reqs[inst->getCurResSlot()]);
+    else {
+        cache_req = split_req;
+        assert(0);
      }
  
+    // Make a new packet inside the CacheRequest object
+    assert(cache_req);
+    buildDataPacket(cache_req);
+
+    // Special Handling for LL/SC or Compare/Swap
+     bool is_write = cache_req->dataPkt->isWrite();
+     RequestPtr mem_req = cache_req->dataPkt->req;
+     if (is_write) {
+         DPRINTF(InOrderCachePort,
+                 "[tid:%u]: [sn:%i]: Storing data: %s\n",
+                 tid, inst->seqNum,
+                 printMemData(cache_req->dataPkt->getPtr<uint8_t>(),
+                              cache_req->dataPkt->getSize()));
+
+        if (mem_req->isCondSwap()) {
+             assert(write_res);
+             cache_req->memReq->setExtraData(*write_res);
+         }
+        if (mem_req->isLLSC()) {
+            assert(cache_req->inst->isStoreConditional());
+            DPRINTF(InOrderCachePort, "Evaluating Store Conditional access\n");
+            do_access = TheISA::handleLockedWrite(inst.get(), mem_req);
+        }
+     }
+
+    // Finally, go ahead and make the access if we can...
      DPRINTF(InOrderCachePort,
-            "[tid:%i] [sn:%i] attempting to access cache\n",
-            tid, inst->seqNum);
+            "[tid:%i] [sn:%i] attempting to access cache for addr %08p\n",
+            tid, inst->seqNum, cache_req->dataPkt->getAddr());
  
      if (do_access) {
          if (!cachePort->sendTiming(cache_req->dataPkt)) {
@@ -949,7 +917,13 @@ CacheUnit::doCacheAccess(DynInstPtr inst, uint64_t *write_res,
                      "[tid:%i] [sn:%i] cannot access cache, because port "
                      "is blocked. now waiting to retry request\n", tid, 
                      inst->seqNum);
-            cache_req->setCompleted(false);
+            delete cache_req->dataPkt;
+            cache_req->dataPkt = NULL;
+
+            delete cache_req->memReq;
+            cache_req->memReq = NULL;
+
+            cache_req->done(false);
              cachePortBlocked = true;
          } else {
              DPRINTF(InOrderCachePort,
@@ -959,7 +933,7 @@ CacheUnit::doCacheAccess(DynInstPtr inst, uint64_t *write_res,
              cache_req->setMemAccPending();
              cachePortBlocked = false;
          }
-    } else if (!do_access && memReq->isLLSC()){
+    } else if (mem_req->isLLSC()){
          // Store-Conditional instructions complete even if they "failed"
          assert(cache_req->inst->isStoreConditional());
          cache_req->setCompleted(true);
@@ -970,50 +944,91 @@ CacheUnit::doCacheAccess(DynInstPtr inst, uint64_t *write_res,
  
          processCacheCompletion(cache_req->dataPkt);
      } else {
+        delete cache_req->dataPkt;
+        cache_req->dataPkt = NULL;
+
+        delete cache_req->memReq;
+        cache_req->memReq = NULL;
+
          // Make cache request again since access due to
          // inability to access
          DPRINTF(InOrderStall, "STALL: \n");
-        cache_req->setCompleted(false);
+        cache_req->done(false);
      }
  
-    return fault;
  }
  
-void
-CacheUnit::processCacheCompletion(PacketPtr pkt)
+bool
+CacheUnit::processSquash(CacheReqPacket *cache_pkt)
  {
-    // Cast to correct packet type
-    CacheReqPacket* cache_pkt = dynamic_cast<CacheReqPacket*>(pkt);
-             
-    assert(cache_pkt);
+    // The resource may no longer be actively servicing this
+    // packet. Scenarios like a store that has been sent to the
+    // memory system or access that's been squashed. If that's
+    // the case, we can't access the request slot because it
+    // will be either invalid or servicing another request.
+    if (!cache_pkt->hasSlot) {
+        DPRINTF(InOrderCachePort,
+                "%x does not have a slot in unit, ignoring.\n",
+                cache_pkt->getAddr());
+
+        if (cache_pkt->reqData) {
+            delete [] cache_pkt->reqData;
+            cache_pkt->reqData = NULL;
+        }
+
+        if (cache_pkt->memReq) {
+            delete cache_pkt->memReq;
+            cache_pkt->memReq = NULL;
+        }
+
+        delete cache_pkt;
+        cache_pkt = NULL;
+        cpu->wakeCPU();
+        return true;
+    } else {
+        DPRINTF(InOrderCachePort, "%x has slot %i\n",
+                cache_pkt->getAddr(), cache_pkt->cacheReq->getSlot());
+    }
+
  
+    // It's possible that the request is squashed but the
+    // packet is still acknowledged by the resource. Squashes
+    // should happen at the end of the cycles and trigger the
+    // code above, but if not, this would handle any timing
+    // variations due to diff. user parameters.
      if (cache_pkt->cacheReq->isSquashed()) {
          DPRINTF(InOrderCachePort,
                  "Ignoring completion of squashed access, [tid:%i] [sn:%i]\n",
                  cache_pkt->cacheReq->getInst()->readTid(),
                  cache_pkt->cacheReq->getInst()->seqNum);
-        DPRINTF(RefCount,
-                "Ignoring completion of squashed access, [tid:%i] [sn:%i]\n",
-                cache_pkt->cacheReq->getTid(),
-                cache_pkt->cacheReq->seqNum);
  
-        cache_pkt->cacheReq->done();
+        cache_pkt->cacheReq->setMemAccPending(false);
+        cache_pkt->cacheReq->freeSlot();
          delete cache_pkt;
-
+        cache_pkt = NULL;
          cpu->wakeCPU();
-
-        return;
+        return true;
      }
  
-    DPRINTF(InOrderCachePort,
-            "[tid:%u]: [sn:%i]: Waking from cache access to addr. %08p\n",
-            cache_pkt->cacheReq->getInst()->readTid(),
-            cache_pkt->cacheReq->getInst()->seqNum,
-            cache_pkt->cacheReq->getInst()->getMemAddr());
  
-    // Cast to correct request type
+    return false;
+}
+
+void
+CacheUnit::processCacheCompletion(PacketPtr pkt)
+{
+    //@todo: use packet sender state instead of deriving from packet class to
+    //  get special state
+    CacheReqPacket* cache_pkt = dynamic_cast<CacheReqPacket*>(pkt);
+    assert(cache_pkt);
+
+    DPRINTF(InOrderCachePort, "Finished request for %x\n", pkt->getAddr());
+
+    if (processSquash(cache_pkt))
+        return;
+
      CacheRequest *cache_req = dynamic_cast<CacheReqPtr>(
-        findSplitRequest(cache_pkt->cacheReq->getInst(), cache_pkt->instIdx));
+        findRequest(cache_pkt->cacheReq->getInst(), cache_pkt->instIdx));
  
      if (!cache_req) {
          panic("[tid:%u]: [sn:%i]: Can't find slot for cache access to "
@@ -1023,135 +1038,110 @@ CacheUnit::processCacheCompletion(PacketPtr pkt)
      }
      
      assert(cache_req);
+    assert(cache_req == cache_pkt->cacheReq);
  
+    DPRINTF(InOrderCachePort,
+            "[tid:%u]: [sn:%i]: [slot:%i] Waking from cache access (vaddr.%08p, paddr:%08p)\n",
+            cache_pkt->cacheReq->getInst()->readTid(),
+            cache_pkt->cacheReq->getInst()->seqNum,
+            cache_req->getSlot(),
+            cache_pkt->req->getVaddr(),
+            cache_pkt->req->getPaddr());
  
      // Get resource request info
      unsigned stage_num = cache_req->getStageNum();
      DynInstPtr inst = cache_req->inst;
      ThreadID tid = cache_req->inst->readTid();
  
-    if (!cache_req->isSquashed()) {
-        if (inst->resSched.top()->cmd == CompleteFetch) {
-            DPRINTF(InOrderCachePort,
-                    "[tid:%u]: [sn:%i]: Processing fetch access\n",
-                    tid, inst->seqNum);
+    assert(!cache_req->isSquashed());
+    assert(inst->staticInst && inst->isMemRef());
  
-            // NOTE: This is only allowing a thread to fetch one line
-            //       at a time. Re-examine when/if prefetching
-            //       gets implemented.
-            //memcpy(fetchData[tid], cache_pkt->getPtr<uint8_t>(),
-            //     cache_pkt->getSize());
-
-            // Get the instruction from the array of the cache line.
-            // @todo: update thsi
-            ExtMachInst ext_inst;
-            StaticInstPtr staticInst = NULL;
-            Addr inst_pc = inst->readPC();
-            MachInst mach_inst = 
-                TheISA::gtoh(*reinterpret_cast<TheISA::MachInst *>
-                             (cache_pkt->getPtr<uint8_t>()));
-
-            predecoder.setTC(cpu->thread[tid]->getTC());
-            predecoder.moreBytes(inst_pc, inst_pc, mach_inst);
-            ext_inst = predecoder.getExtMachInst();
-
-            inst->setMachInst(ext_inst);
-
-            // Set Up More TraceData info
-            if (inst->traceData) {
-                inst->traceData->setStaticInst(inst->staticInst);
-                inst->traceData->setPC(inst->readPC());
-            }
  
-        } else if (inst->staticInst && inst->isMemRef()) {
+    DPRINTF(InOrderCachePort,
+            "[tid:%u]: [sn:%i]: Processing cache access\n",
+            tid, inst->seqNum);
+
+    PacketPtr split_pkt = NULL;
+    if (inst->splitInst) {
+        inst->splitFinishCnt++;
+
+        if (inst->splitFinishCnt == 2) {
+            cache_req->memReq->setVirt(0/*inst->tid*/,
+                                       inst->getMemAddr(),
+                                       inst->totalSize,
+                                       0,
+                                       0);
+
+            split_pkt = new Packet(cache_req->memReq, cache_req->pktCmd,
+                                   Packet::Broadcast);
+            split_pkt->dataStatic(inst->splitMemData);
+
+            DPRINTF(InOrderCachePort, "Completing Split Access.\n");
+            inst->completeAcc(split_pkt);
+        }
+    } else {
+        inst->completeAcc(cache_pkt);
+    }
+
+    inst->setExecuted();
+
+    if (inst->isLoad()) {
+        assert(cache_pkt->isRead());
+
+        if (cache_pkt->req->isLLSC()) {
              DPRINTF(InOrderCachePort,
-                    "[tid:%u]: [sn:%i]: Processing cache access\n",
+                    "[tid:%u]: Handling Load-Linked for [sn:%u]\n",
                      tid, inst->seqNum);
-            PacketPtr dataPkt = NULL;
-            
-            if (inst->splitInst) {
-                inst->splitFinishCnt++;
-                
-                if (inst->splitFinishCnt == 2) {
-                    cache_req->memReq->setVirt(0/*inst->tid*/, 
-                                               inst->getMemAddr(),
-                                               inst->splitTotalSize,
-                                               0,
-                                               0);
-                    
-                    Packet split_pkt(cache_req->memReq, cache_req->pktCmd,
-                                     Packet::Broadcast);                    
-
-
-                    if (inst->isLoad()) {                        
-                        split_pkt.dataStatic(inst->splitMemData);
-                    } else  {                            
-                        split_pkt.dataStatic(&inst->storeData);                        
-                    }
-                    
-                    dataPkt = &split_pkt;
-                }                
-            } else {
-                dataPkt = pkt;
-            }
-            inst->completeAcc(dataPkt);
-            
-            if (inst->isLoad()) {
-                assert(cache_pkt->isRead());
-
-                if (cache_pkt->req->isLLSC()) {
-                    DPRINTF(InOrderCachePort,
-                            "[tid:%u]: Handling Load-Linked for [sn:%u]\n",
-                            tid, inst->seqNum);
-                    TheISA::handleLockedRead(cpu, cache_pkt->req);
-                }
+            TheISA::handleLockedRead(inst.get(), cache_pkt->req);
+        }
  
-                DPRINTF(InOrderCachePort,
-                        "[tid:%u]: [sn:%i]: Bytes loaded were: %s\n",
-                        tid, inst->seqNum,
-                        printMemData(dataPkt->getPtr<uint8_t>(),
-                            dataPkt->getSize()));
-            } else if(inst->isStore()) {
-                assert(cache_pkt->isWrite());
-
-                DPRINTF(InOrderCachePort,
-                        "[tid:%u]: [sn:%i]: Bytes stored were: %s\n",
-                        tid, inst->seqNum,
-                        printMemData(dataPkt->getPtr<uint8_t>(),
-                            dataPkt->getSize()));
-            }
+        DPRINTF(InOrderCachePort,
+                "[tid:%u]: [sn:%i]: Bytes loaded were: %s\n",
+                tid, inst->seqNum,
+                (split_pkt) ? printMemData(split_pkt->getPtr<uint8_t>(),
+                                           split_pkt->getSize()) :
+                              printMemData(cache_pkt->getPtr<uint8_t>(),
+                                           cache_pkt->getSize()));
+    } else if(inst->isStore()) {
+        assert(cache_pkt->isWrite());
  
-            delete cache_pkt;
-        }
+        DPRINTF(InOrderCachePort,
+                "[tid:%u]: [sn:%i]: Bytes stored were: %s\n",
+                tid, inst->seqNum,
+                (split_pkt) ? printMemData(split_pkt->getPtr<uint8_t>(),
+                                           split_pkt->getSize()) :
+                              printMemData(cache_pkt->getPtr<uint8_t>(),
+                                           cache_pkt->getSize()));
+    }
+
+
+    if (split_pkt) {
+        delete split_pkt;
+        split_pkt = NULL;
+    }
  
-        cache_req->setMemAccPending(false);
-        cache_req->setMemAccCompleted();
+    cache_req->setMemAccPending(false);
+    cache_req->setMemAccCompleted();
  
-        if (cache_req->isMemStall() && 
-            cpu->threadModel == InOrderCPU::SwitchOnCacheMiss) {    
-            DPRINTF(InOrderCachePort, "[tid:%u] Waking up from Cache Miss.\n",
-                    tid);
+    if (cache_req->isMemStall() &&
+        cpu->threadModel == InOrderCPU::SwitchOnCacheMiss) {
+        DPRINTF(InOrderCachePort, "[tid:%u] Waking up from Cache Miss.\n",
+                tid);
              
-            cpu->activateContext(tid);            
+        cpu->activateContext(tid);
              
-            DPRINTF(ThreadModel, "Activating [tid:%i] after return from cache"
-                    "miss.\n", tid);            
-        }
+        DPRINTF(ThreadModel, "Activating [tid:%i] after return from cache"
+                "miss.\n", tid);
+    }
          
-        // Wake up the CPU (if it went to sleep and was waiting on this
-        // completion event).
-        cpu->wakeCPU();
+    // Wake up the CPU (if it went to sleep and was waiting on this
+    // completion event).
+    cpu->wakeCPU();
  
-        DPRINTF(Activity, "[tid:%u] Activating %s due to cache completion\n",
+    DPRINTF(Activity, "[tid:%u] Activating %s due to cache completion\n",
              tid, cpu->pipelineStage[stage_num]->name());
  
-        cpu->switchToActive(stage_num);
-    } else {
-        DPRINTF(InOrderCachePort,
-                "[tid:%u] Miss on block @ %08p completed, but squashed\n",
-                tid, cache_req->inst->readPC());
-        cache_req->setMemAccCompleted();
-    }
+    cpu->switchToActive(stage_num);
  }
  
  void
@@ -1174,10 +1164,10 @@ CacheUnitEvent::CacheUnitEvent()
  void
  CacheUnitEvent::process()
  {
-    DynInstPtr inst = resource->reqMap[slotIdx]->inst;
-    int stage_num = resource->reqMap[slotIdx]->getStageNum();
+    DynInstPtr inst = resource->reqs[slotIdx]->inst;
+    int stage_num = resource->reqs[slotIdx]->getStageNum();
      ThreadID tid = inst->threadNumber;
-    CacheReqPtr req_ptr = dynamic_cast<CacheReqPtr>(resource->reqMap[slotIdx]);
+    CacheReqPtr req_ptr = dynamic_cast<CacheReqPtr>(resource->reqs[slotIdx]);
  
      DPRINTF(InOrderTLB, "Waking up from TLB Miss caused by [sn:%i].\n",
              inst->seqNum);
@@ -1185,16 +1175,27 @@ CacheUnitEvent::process()
      CacheUnit* tlb_res = dynamic_cast<CacheUnit*>(resource);
      assert(tlb_res);
  
+    //@todo: eventually, we should do a timing translation w/
+    //       hw page table walk on tlb miss
+    DPRINTF(InOrderTLB, "Handling Fault %s : [sn:%i] %x\n", inst->fault->name(), inst->seqNum, inst->getMemAddr());
+    inst->fault->invoke(tlb_res->cpu->tcBase(tid), inst->staticInst);
+
      tlb_res->tlbBlocked[tid] = false;
  
      tlb_res->cpu->pipelineStage[stage_num]->
-        unsetResStall(tlb_res->reqMap[slotIdx], tid);
+        unsetResStall(tlb_res->reqs[slotIdx], tid);
  
      req_ptr->tlbStall = false;
  
+    //@todo: timing translation needs to have some type of independent
+    //       info regarding if it's squashed or not so we can
+    //       free up the resource if a request gets squashed in the middle
+    //       of a table walk
      if (req_ptr->isSquashed()) {
-        req_ptr->done();
+        req_ptr->freeSlot();
      }
+
+    tlb_res->cpu->wakeCPU();
  }
  
  void
@@ -1216,20 +1217,41 @@ CacheUnit::squashDueToMemStall(DynInstPtr inst, int stage_num,
      squash(inst, stage_num, squash_seq_num + 1, tid);    
  }
  
+void
+CacheUnit::squashCacheRequest(CacheReqPtr req_ptr)
+{
+    DynInstPtr inst =  req_ptr->getInst();
+    req_ptr->setSquashed();
+    inst->setSquashed();
+
+    //@note: add back in for speculative load/store capability
+    /*if (inst->validMemAddr()) {
+        DPRINTF(AddrDep, "Squash of [tid:%i] [sn:%i], attempting to "
+                "remove addr. %08p dependencies.\n",
+                inst->readTid(),
+                inst->seqNum,
+                inst->getMemAddr());
+
+        removeAddrDependency(inst);
+    }*/
+}
+
  
  void
  CacheUnit::squash(DynInstPtr inst, int stage_num,
                    InstSeqNum squash_seq_num, ThreadID tid)
  {
-    vector<int> slot_remove_list;
-
-    map<int, ResReqPtr>::iterator map_it = reqMap.begin();
-    map<int, ResReqPtr>::iterator map_end = reqMap.end();
+    if (tlbBlocked[tid] &&
+        tlbBlockSeqNum[tid] > squash_seq_num) {
+        DPRINTF(InOrderCachePort, "Releasing TLB Block due to "
+                " squash after [sn:%i].\n", squash_seq_num);
+        tlbBlocked[tid] = false;
+    }
  
-    while (map_it != map_end) {
-        ResReqPtr req_ptr = (*map_it).second;
+    for (int i = 0; i < width; i++) {
+        ResReqPtr req_ptr = reqs[i];
  
-        if (req_ptr &&
+        if (req_ptr->valid &&
              req_ptr->getInst()->readTid() == tid &&
              req_ptr->getInst()->seqNum > squash_seq_num) {
  
@@ -1242,172 +1264,65 @@ CacheUnit::squash(DynInstPtr inst, int stage_num,
                          "squashed, ignoring squash process.\n",
                          req_ptr->getInst()->readTid(),
                          req_ptr->getInst()->seqNum);
-                map_it++;                
                  continue;                
              }
-            
-            req_ptr->setSquashed();
-
-            req_ptr->getInst()->setSquashed();
  
              CacheReqPtr cache_req = dynamic_cast<CacheReqPtr>(req_ptr);
              assert(cache_req);
  
+            squashCacheRequest(cache_req);
+
              int req_slot_num = req_ptr->getSlot();
  
              if (cache_req->tlbStall) {
                  tlbBlocked[tid] = false;
  
-                int stall_stage = reqMap[req_slot_num]->getStageNum();
+                int stall_stage = reqs[req_slot_num]->getStageNum();
  
                  cpu->pipelineStage[stall_stage]->
-                    unsetResStall(reqMap[req_slot_num], tid);
+                    unsetResStall(reqs[req_slot_num], tid);
              }
  
-            if (!cache_req->tlbStall && !cache_req->isMemAccPending()) {
-                // Mark request for later removal
-                cpu->reqRemoveList.push(req_ptr);
-
-                // Mark slot for removal from resource
-                slot_remove_list.push_back(req_ptr->getSlot());
-            } else {
-                DPRINTF(InOrderCachePort,
-                        "[tid:%i] Request from [sn:%i] squashed, but still "
-                        "pending completion.\n",
-                        req_ptr->getInst()->readTid(), req_ptr->getInst()->seqNum);
-                DPRINTF(RefCount,
-                        "[tid:%i] Request from [sn:%i] squashed (split:%i), but "
-                        "still pending completion.\n",
-                        req_ptr->getInst()->readTid(), req_ptr->getInst()->seqNum,
-                        req_ptr->getInst()->splitInst);
+            if (cache_req->isMemAccPending()) {
+                cache_req->dataPkt->reqData = cache_req->reqData;
+                cache_req->dataPkt->memReq = cache_req->memReq;
              }
  
-            if (req_ptr->getInst()->validMemAddr()) {                    
-                DPRINTF(AddrDep, "Squash of [tid:%i] [sn:%i], attempting to "
-                        "remove addr. %08p dependencies.\n",
-                        req_ptr->getInst()->readTid(),
-                        req_ptr->getInst()->seqNum, 
-                        req_ptr->getInst()->getMemAddr());
-                
-                removeAddrDependency(req_ptr->getInst());
-            }            
+            if (!cache_req->tlbStall)
+                freeSlot(req_slot_num);
          }
-
-        map_it++;
      }
  
-    // Now Delete Slot Entry from Req. Map
-    for (int i = 0; i < slot_remove_list.size(); i++)
-        freeSlot(slot_remove_list[i]);
  }
  
-// Extra Template Definitions
-#ifndef DOXYGEN_SHOULD_SKIP_THIS
-
-template
-Fault
-CacheUnit::read(DynInstPtr inst, Addr addr, Twin32_t &data, unsigned flags);
-
-template
-Fault
-CacheUnit::read(DynInstPtr inst, Addr addr, Twin64_t &data, unsigned flags);
-
-template
-Fault
-CacheUnit::read(DynInstPtr inst, Addr addr, uint64_t &data, unsigned flags);
-
-template
-Fault
-CacheUnit::read(DynInstPtr inst, Addr addr, uint32_t &data, unsigned flags);
-
-template
-Fault
-CacheUnit::read(DynInstPtr inst, Addr addr, uint16_t &data, unsigned flags);
-
-template
-Fault
-CacheUnit::read(DynInstPtr inst, Addr addr, uint8_t &data, unsigned flags);
-
-#endif //DOXYGEN_SHOULD_SKIP_THIS
-
-template<>
-Fault
-CacheUnit::read(DynInstPtr inst, Addr addr, double &data, unsigned flags)
-{
-    return read(inst, addr, *(uint64_t*)&data, flags);
-}
-
-template<>
-Fault
-CacheUnit::read(DynInstPtr inst, Addr addr, float &data, unsigned flags)
-{
-    return read(inst, addr, *(uint32_t*)&data, flags);
-}
-
-
-template<>
-Fault
-CacheUnit::read(DynInstPtr inst, Addr addr, int32_t &data, unsigned flags)
-{
-    return read(inst, addr, (uint32_t&)data, flags);
-}
-
-#ifndef DOXYGEN_SHOULD_SKIP_THIS
-
-template
-Fault
-CacheUnit::write(DynInstPtr inst, Twin32_t data, Addr addr,
-                       unsigned flags, uint64_t *res);
-
-template
-Fault
-CacheUnit::write(DynInstPtr inst, Twin64_t data, Addr addr,
-                       unsigned flags, uint64_t *res);
-
-template
-Fault
-CacheUnit::write(DynInstPtr inst, uint64_t data, Addr addr,
-                       unsigned flags, uint64_t *res);
-
-template
-Fault
-CacheUnit::write(DynInstPtr inst, uint32_t data, Addr addr,
-                       unsigned flags, uint64_t *res);
-
-template
-Fault
-CacheUnit::write(DynInstPtr inst, uint16_t data, Addr addr,
-                       unsigned flags, uint64_t *res);
-
-template
-Fault
-CacheUnit::write(DynInstPtr inst, uint8_t data, Addr addr,
-                       unsigned flags, uint64_t *res);
-
-#endif //DOXYGEN_SHOULD_SKIP_THIS
-
-template<>
-Fault
-CacheUnit::write(DynInstPtr inst, double data, Addr addr, unsigned flags, 
-                 uint64_t *res)
+void
+CacheRequest::clearRequest()
  {
-    return write(inst, *(uint64_t*)&data, addr, flags, res);
-}
+    if (!memAccPending) {
+        if (reqData && !splitAccess)
+            delete [] reqData;
  
-template<>
-Fault
-CacheUnit::write(DynInstPtr inst, float data, Addr addr, unsigned flags, 
-                 uint64_t *res)
-{
-    return write(inst, *(uint32_t*)&data, addr, flags, res);
-}
+        if (memReq)
+            delete memReq;
  
+        if (dataPkt)
+            delete dataPkt;
+    } else {
+        if (dataPkt)
+            dataPkt->hasSlot = false;
+    }
  
-template<>
-Fault
-CacheUnit::write(DynInstPtr inst, int32_t data, Addr addr, unsigned flags, 
-                 uint64_t *res)
-{
-    return write(inst, (uint32_t)data, addr, flags, res);
+    memReq = NULL;
+    reqData = NULL;
+    dataPkt = NULL;
+    memAccComplete = false;
+    memAccPending = false;
+    tlbStall = false;
+    splitAccess = false;
+    splitAccessNum = -1;
+    split2ndAccess = false;
+    instIdx = 0;
+    fetchBufferFill = false;
+
+    ResourceRequest::clearRequest();
  }
-