From db2b72138052ad96d808d8286bd2598c96f96a31 Mon Sep 17 00:00:00 2001
From: Korey Sewell <ksewell@umich.edu>
Date: Tue, 12 May 2009 15:01:16 -0400
Subject: [PATCH] inorder-tlb-cunit: merge the TLB as implicit to any memory
 access TLBUnit no longer used and we also get rid of memAccSize and
 memAccFlags functions added to ISA and StaticInst since TLB is not a separate
 resource to acquire. Instead, TLB access is done before any read/write to
 memory and the result is checked before it's sent out to memory. * * *

---
 src/arch/alpha/isa/mem.isa              |  36 +--
 src/cpu/SConscript                      |   2 -
 src/cpu/inorder/SConscript              |   1 -
 src/cpu/inorder/cpu.cc                  | 164 +++++++++--
 src/cpu/inorder/cpu.hh                  |   7 +-
 src/cpu/inorder/inorder_dyn_inst.cc     |   4 +-
 src/cpu/inorder/inorder_dyn_inst.hh     |   4 -
 src/cpu/inorder/pipeline_traits.cc      |   6 -
 src/cpu/inorder/pipeline_traits.hh      |   2 -
 src/cpu/inorder/resource.hh             |   7 +-
 src/cpu/inorder/resource_pool.cc        |   7 +-
 src/cpu/inorder/resources/cache_unit.cc | 373 +++++++++++++++++++++---
 src/cpu/inorder/resources/cache_unit.hh |  85 +++---
 src/cpu/inorder/resources/tlb_unit.hh   |   4 +-
 src/cpu/static_inst.cc                  |   7 -
 src/cpu/static_inst.hh                  |   2 -
 16 files changed, 516 insertions(+), 195 deletions(-)

diff --git a/src/arch/alpha/isa/mem.isa b/src/arch/alpha/isa/mem.isa
index 5bac4f758..fedfbf55d 100644
--- a/src/arch/alpha/isa/mem.isa
+++ b/src/arch/alpha/isa/mem.isa
@@ -53,10 +53,6 @@ output header {{
 
         std::string
         generateDisassembly(Addr pc, const SymbolTable *symtab) const;
-
-       public:
-
-        Request::Flags memAccFlags() { return memAccessFlags; }
     };
 
     /**
@@ -140,8 +136,6 @@ def template LoadStoreDeclare {{
         %(InitiateAccDeclare)s
 
         %(CompleteAccDeclare)s
-
-        %(MemAccSizeDeclare)s
     };
 }};
 
@@ -160,19 +154,6 @@ def template CompleteAccDeclare {{
                       Trace::InstRecord *) const;
 }};
 
-def template MemAccSizeDeclare {{
-    int memAccSize(%(CPU_exec_context)s *xc);
-}};
-
-def template LoadStoreMemAccSize {{
-    int %(class_name)s::memAccSize(%(CPU_exec_context)s *xc)
-    {
-        // Return the memory access size in bytes
-        return (%(mem_acc_size)d / 8);
-    }
-}};
-
-
 def template LoadStoreConstructor {{
     inline %(class_name)s::%(class_name)s(ExtMachInst machInst)
          : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s)
@@ -462,15 +443,6 @@ def template MiscCompleteAcc {{
     }
 }};
 
-def template MiscMemAccSize {{
-    int %(class_name)s::memAccSize(%(CPU_exec_context)s *xc)
-    {
-        return (%(mem_acc_size)d / 8);
-        panic("memAccSize undefined: Misc instruction does not support split "
-              "access method!");
-        return 0;
-    }
-}};
 
 // load instructions use Ra as dest, so check for
 // Ra == 31 to detect nops
@@ -541,11 +513,6 @@ def LoadStoreBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags,
     initiateAccTemplate = eval(exec_template_base + 'InitiateAcc')
     completeAccTemplate = eval(exec_template_base + 'CompleteAcc')
 
-    if (exec_template_base == 'Load' or exec_template_base == 'Store'):
-      memAccSizeTemplate = eval('LoadStoreMemAccSize')
-    else:
-      memAccSizeTemplate = eval('MiscMemAccSize')
-
     # (header_output, decoder_output, decode_block, exec_output)
     return (LoadStoreDeclare.subst(iop),
             LoadStoreConstructor.subst(iop),
@@ -553,8 +520,7 @@ def LoadStoreBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags,
             fullExecTemplate.subst(iop)
             + EACompExecute.subst(iop)
             + initiateAccTemplate.subst(iop)
-            + completeAccTemplate.subst(iop)
-            + memAccSizeTemplate.subst(memacc_iop))
+            + completeAccTemplate.subst(iop))
 }};
 
 def format LoadOrNop(memacc_code, ea_code = {{ EA = Rb + disp; }},
diff --git a/src/cpu/SConscript b/src/cpu/SConscript
index b14d606b7..80bc0986e 100644
--- a/src/cpu/SConscript
+++ b/src/cpu/SConscript
@@ -56,8 +56,6 @@ virtual Fault initiateAcc(%(type)s *xc, Trace::InstRecord *traceData) const
 virtual Fault completeAcc(Packet *pkt, %(type)s *xc,
                           Trace::InstRecord *traceData) const
 { panic("completeAcc not defined!"); M5_DUMMY_RETURN };
-virtual int memAccSize(%(type)s *xc)
-{ panic("memAccSize not defined!"); M5_DUMMY_RETURN };
 '''
 
 mem_ini_sig_template = '''
diff --git a/src/cpu/inorder/SConscript b/src/cpu/inorder/SConscript
index 9403aa914..64f1b5481 100644
--- a/src/cpu/inorder/SConscript
+++ b/src/cpu/inorder/SConscript
@@ -75,7 +75,6 @@ if 'InOrderCPU' in env['CPU_MODELS']:
 	Source('resources/decode_unit.cc')
 	Source('resources/inst_buffer.cc')
 	Source('resources/graduation_unit.cc')
-	Source('resources/tlb_unit.cc')
 	Source('resources/fetch_seq_unit.cc')
 	Source('resources/mult_div_unit.cc')
 	Source('resource_pool.cc')
diff --git a/src/cpu/inorder/cpu.cc b/src/cpu/inorder/cpu.cc
index c522fc238..e901b475e 100644
--- a/src/cpu/inorder/cpu.cc
+++ b/src/cpu/inorder/cpu.cc
@@ -204,18 +204,6 @@ InOrderCPU::InOrderCPU(Params *params)
         fatal("Unable to find port for data.\n");
     }
 
-
-    // Hard-Code Bindings to ITB & DTB
-    itbIdx = resPool->getResIdx(name() + "."  + "I-TLB");
-    if (itbIdx == 0) {
-        fatal("Unable to find ITB resource.\n");
-    }
-
-    dtbIdx = resPool->getResIdx(name() + "."  + "D-TLB");
-    if (dtbIdx == 0) {
-        fatal("Unable to find DTB resource.\n");
-    }
-
     for (int i = 0; i < numThreads; ++i) {
         if (i < params->workload.size()) {
             DPRINTF(InOrderCPU, "Workload[%i] process is %#x\n",
@@ -486,6 +474,7 @@ InOrderCPU::getPort(const std::string &if_name, int idx)
 void
 InOrderCPU::trap(Fault fault, unsigned tid, int delay)
 {
+    //@ Squash Pipeline during TRAP
     scheduleCpuEvent(Trap, fault, tid, 0/*vpe*/, delay);
 }
 
@@ -502,7 +491,7 @@ InOrderCPU::scheduleCpuEvent(CPUEventType c_event, Fault fault,
     CPUEvent *cpu_event = new CPUEvent(this, c_event, fault, tid, vpe);
 
     if (delay >= 0) {
-        DPRINTF(InOrderCPU, "Scheduling CPU Event Type #%s for cycle %i.\n",
+        DPRINTF(InOrderCPU, "Scheduling CPU Event (%s) for cycle %i.\n",
                 eventNames[c_event], curTick + delay);
         mainEventQueue.schedule(cpu_event,curTick + delay);
     } else {
@@ -1266,20 +1255,6 @@ InOrderCPU::syscall(int64_t callnum, int tid)
     nonSpecInstActive[tid] = false;
 }
 
-Fault
-InOrderCPU::read(DynInstPtr inst)
-{
-    Resource *mem_res = resPool->getResource(dataPortIdx);
-    return mem_res->doDataAccess(inst);
-}
-
-Fault
-InOrderCPU::write(DynInstPtr inst, uint64_t *res)
-{
-    Resource *mem_res = resPool->getResource(dataPortIdx);
-    return mem_res->doDataAccess(inst, res);
-}
-
 void
 InOrderCPU::prefetch(DynInstPtr inst)
 {
@@ -1298,7 +1273,8 @@ InOrderCPU::writeHint(DynInstPtr inst)
 TheISA::TLB*
 InOrderCPU::getITBPtr()
 {
-    TLBUnit *itb_res = dynamic_cast<TLBUnit*>(resPool->getResource(itbIdx));
+    CacheUnit *itb_res =
+        dynamic_cast<CacheUnit*>(resPool->getResource(fetchPortIdx));
     return itb_res->tlb();
 }
 
@@ -1306,6 +1282,136 @@ InOrderCPU::getITBPtr()
 TheISA::TLB*
 InOrderCPU::getDTBPtr()
 {
-    TLBUnit *dtb_res = dynamic_cast<TLBUnit*>(resPool->getResource(dtbIdx));
+    CacheUnit *dtb_res =
+        dynamic_cast<CacheUnit*>(resPool->getResource(dataPortIdx));
     return dtb_res->tlb();
 }
+
+template <class T>
+Fault
+InOrderCPU::read(DynInstPtr inst, Addr addr, T &data, unsigned flags)
+{
+    //@TODO: Generalize name "CacheUnit" to "MemUnit" just in case
+    //       you want to run w/out caches?
+    CacheUnit *cache_res = dynamic_cast<CacheUnit*>(resPool->getResource(dataPortIdx));
+
+    return cache_res->read(inst, addr, data, flags);
+}
+
+#ifndef DOXYGEN_SHOULD_SKIP_THIS
+
+template
+Fault
+InOrderCPU::read(DynInstPtr inst, Addr addr, Twin32_t &data, unsigned flags);
+
+template
+Fault
+InOrderCPU::read(DynInstPtr inst, Addr addr, Twin64_t &data, unsigned flags);
+
+template
+Fault
+InOrderCPU::read(DynInstPtr inst, Addr addr, uint64_t &data, unsigned flags);
+
+template
+Fault
+InOrderCPU::read(DynInstPtr inst, Addr addr, uint32_t &data, unsigned flags);
+
+template
+Fault
+InOrderCPU::read(DynInstPtr inst, Addr addr, uint16_t &data, unsigned flags);
+
+template
+Fault
+InOrderCPU::read(DynInstPtr inst, Addr addr, uint8_t &data, unsigned flags);
+
+#endif //DOXYGEN_SHOULD_SKIP_THIS
+
+template<>
+Fault
+InOrderCPU::read(DynInstPtr inst, Addr addr, double &data, unsigned flags)
+{
+    return read(inst, addr, *(uint64_t*)&data, flags);
+}
+
+template<>
+Fault
+InOrderCPU::read(DynInstPtr inst, Addr addr, float &data, unsigned flags)
+{
+    return read(inst, addr, *(uint32_t*)&data, flags);
+}
+
+
+template<>
+Fault
+InOrderCPU::read(DynInstPtr inst, Addr addr, int32_t &data, unsigned flags)
+{
+    return read(inst, addr, (uint32_t&)data, flags);
+}
+
+template <class T>
+Fault
+InOrderCPU::write(DynInstPtr inst, T data, Addr addr, unsigned flags,
+                  uint64_t *write_res)
+{
+    //@TODO: Generalize name "CacheUnit" to "MemUnit" just in case
+    //       you want to run w/out caches?
+    CacheUnit *cache_res =
+        dynamic_cast<CacheUnit*>(resPool->getResource(dataPortIdx));
+    return cache_res->write(inst, data, addr, flags, write_res);
+}
+
+#ifndef DOXYGEN_SHOULD_SKIP_THIS
+
+template
+Fault
+InOrderCPU::write(DynInstPtr inst, Twin32_t data, Addr addr,
+                       unsigned flags, uint64_t *res);
+
+template
+Fault
+InOrderCPU::write(DynInstPtr inst, Twin64_t data, Addr addr,
+                       unsigned flags, uint64_t *res);
+
+template
+Fault
+InOrderCPU::write(DynInstPtr inst, uint64_t data, Addr addr,
+                       unsigned flags, uint64_t *res);
+
+template
+Fault
+InOrderCPU::write(DynInstPtr inst, uint32_t data, Addr addr,
+                       unsigned flags, uint64_t *res);
+
+template
+Fault
+InOrderCPU::write(DynInstPtr inst, uint16_t data, Addr addr,
+                       unsigned flags, uint64_t *res);
+
+template
+Fault
+InOrderCPU::write(DynInstPtr inst, uint8_t data, Addr addr,
+                       unsigned flags, uint64_t *res);
+
+#endif //DOXYGEN_SHOULD_SKIP_THIS
+
+template<>
+Fault
+InOrderCPU::write(DynInstPtr inst, double data, Addr addr, unsigned flags, uint64_t *res)
+{
+    return write(inst, *(uint64_t*)&data, addr, flags, res);
+}
+
+template<>
+Fault
+InOrderCPU::write(DynInstPtr inst, float data, Addr addr, unsigned flags, uint64_t *res)
+{
+    return write(inst, *(uint32_t*)&data, addr, flags, res);
+}
+
+
+template<>
+Fault
+InOrderCPU::write(DynInstPtr inst, int32_t data, Addr addr, unsigned flags, uint64_t *res)
+{
+    return write(inst, (uint32_t)data, addr, flags, res);
+}
diff --git a/src/cpu/inorder/cpu.hh b/src/cpu/inorder/cpu.hh
index ea5404c60..55b04f6a9 100644
--- a/src/cpu/inorder/cpu.hh
+++ b/src/cpu/inorder/cpu.hh
@@ -492,12 +492,15 @@ class InOrderCPU : public BaseCPU
     /** Forwards an instruction read to the appropriate data
      *  resource (indexes into Resource Pool thru "dataPortIdx")
      */
-    Fault read(DynInstPtr inst);
+    template <class T>
+    Fault read(DynInstPtr inst, Addr addr, T &data, unsigned flags);
 
     /** Forwards an instruction write. to the appropriate data
      *  resource (indexes into Resource Pool thru "dataPortIdx")
      */
-    Fault write(DynInstPtr inst, uint64_t *res = NULL);
+    template <class T>
+    Fault write(DynInstPtr inst, T data, Addr addr, unsigned flags,
+                uint64_t *write_res = NULL);
 
     /** Forwards an instruction prefetch to the appropriate data
      *  resource (indexes into Resource Pool thru "dataPortIdx")
diff --git a/src/cpu/inorder/inorder_dyn_inst.cc b/src/cpu/inorder/inorder_dyn_inst.cc
index 3983821e7..ed63d9148 100644
--- a/src/cpu/inorder/inorder_dyn_inst.cc
+++ b/src/cpu/inorder/inorder_dyn_inst.cc
@@ -604,7 +604,7 @@ template<class T>
 inline Fault
 InOrderDynInst::read(Addr addr, T &data, unsigned flags)
 {
-    return cpu->read(this);
+    return cpu->read(this, addr, data, flags);
 }
 
 #ifndef DOXYGEN_SHOULD_SKIP_THIS
@@ -657,7 +657,7 @@ InOrderDynInst::write(T data, Addr addr, unsigned flags, uint64_t *res)
 
     DPRINTF(InOrderDynInst, "[tid:%i]: [sn:%i] Setting store data to %#x.\n",
             threadNumber, seqNum, memData);
-    return cpu->write(this, res);
+    return cpu->write(this, data, addr, flags, res);
 }
 
 #ifndef DOXYGEN_SHOULD_SKIP_THIS
diff --git a/src/cpu/inorder/inorder_dyn_inst.hh b/src/cpu/inorder/inorder_dyn_inst.hh
index 490992638..52465a712 100644
--- a/src/cpu/inorder/inorder_dyn_inst.hh
+++ b/src/cpu/inorder/inorder_dyn_inst.hh
@@ -652,10 +652,6 @@ class InOrderDynInst : public FastAlloc, public RefCounted
     Addr getMemAddr()
     { return memAddr; }
 
-    int getMemAccSize() { return staticInst->memAccSize(this); }
-
-    int getMemFlags() { return staticInst->memAccFlags(); }
-
     /** Sets the effective address. */
     void setEA(Addr &ea) { instEffAddr = ea; eaCalcDone = true; }
 
diff --git a/src/cpu/inorder/pipeline_traits.cc b/src/cpu/inorder/pipeline_traits.cc
index 150115138..ed72ab1d0 100644
--- a/src/cpu/inorder/pipeline_traits.cc
+++ b/src/cpu/inorder/pipeline_traits.cc
@@ -69,7 +69,6 @@ void createFrontEndSchedule(DynInstPtr &inst)
     InstStage *E = inst->addStage();
 
     I->needs(FetchSeq, FetchSeqUnit::AssignNextPC);
-    I->needs(ITLB, TLBUnit::FetchLookup);
     I->needs(ICache, CacheUnit::InitiateFetch);
 
     E->needs(ICache, CacheUnit::CompleteFetch);
@@ -101,14 +100,10 @@ bool createBackEndSchedule(DynInstPtr &inst)
     } else if ( inst->isMemRef() ) {
         if ( inst->isLoad() ) {
             E->needs(AGEN, AGENUnit::GenerateAddr);
-            E->needs(DTLB, TLBUnit::DataReadLookup);
             E->needs(DCache, CacheUnit::InitiateReadData);
         }
     } else if (inst->opClass() == IntMultOp || inst->opClass() == IntDivOp) {
         E->needs(MDU, MultDivUnit::StartMultDiv);
-
-        // ZERO-LATENCY Multiply:
-        // E->needs(MDU, MultDivUnit::MultDiv);
     } else {
         E->needs(ExecUnit, ExecutionUnit::ExecuteInst);
     }
@@ -122,7 +117,6 @@ bool createBackEndSchedule(DynInstPtr &inst)
     } else if ( inst->isStore() ) {
         M->needs(RegManager, UseDefUnit::ReadSrcReg, 1);
         M->needs(AGEN, AGENUnit::GenerateAddr);
-        M->needs(DTLB, TLBUnit::DataWriteLookup);
         M->needs(DCache, CacheUnit::InitiateWriteData);
     }
 
diff --git a/src/cpu/inorder/pipeline_traits.hh b/src/cpu/inorder/pipeline_traits.hh
index 3c49143bc..5012553b0 100644
--- a/src/cpu/inorder/pipeline_traits.hh
+++ b/src/cpu/inorder/pipeline_traits.hh
@@ -56,7 +56,6 @@ namespace ThePipeline {
     // Enumerated List of Resources The Pipeline Uses
     enum ResourceList {
        FetchSeq = 0,
-       ITLB,
        ICache,
        Decode,
        BPred,
@@ -65,7 +64,6 @@ namespace ThePipeline {
        AGEN,
        ExecUnit,
        MDU,
-       DTLB,
        DCache,
        Grad,
        FetchBuff2
diff --git a/src/cpu/inorder/resource.hh b/src/cpu/inorder/resource.hh
index 0378c0f50..71270241f 100644
--- a/src/cpu/inorder/resource.hh
+++ b/src/cpu/inorder/resource.hh
@@ -140,15 +140,14 @@ class Resource {
      *  if instruction is actually in resource before
      *  trying to do access.Needs to be defined for derived units.
      */
-    virtual Fault doDataAccess(DynInstPtr inst, uint64_t *res=NULL)
-    { panic("doDataAccess undefined for %s", name()); return NoFault; }
+    virtual Fault doCacheAccess(DynInstPtr inst, uint64_t *res=NULL)
+    { panic("doCacheAccess undefined for %s", name()); return NoFault; }
 
     virtual void prefetch(DynInstPtr inst)
     { panic("prefetch undefined for %s", name()); }
 
     virtual void writeHint(DynInstPtr inst)
-    { panic("doDataAccess undefined for %s", name()); }
-
+    { panic("writeHint undefined for %s", name()); }
 
     /** Squash All Requests After This Seq Num */
     virtual void squash(DynInstPtr inst, int stage_num, InstSeqNum squash_seq_num, unsigned tid);
diff --git a/src/cpu/inorder/resource_pool.cc b/src/cpu/inorder/resource_pool.cc
index 2187e2818..5cb55b89b 100644
--- a/src/cpu/inorder/resource_pool.cc
+++ b/src/cpu/inorder/resource_pool.cc
@@ -50,8 +50,6 @@ ResourcePool::ResourcePool(InOrderCPU *_cpu, ThePipeline::Params *params)
     // --------------------------------------------------
     resources.push_back(new FetchSeqUnit("Fetch-Seq-Unit", FetchSeq, StageWidth * 2, 0, _cpu, params));
 
-    resources.push_back(new TLBUnit("I-TLB", ITLB, StageWidth, 0, _cpu, params));
-
     memObjects.push_back(ICache);
     resources.push_back(new CacheUnit("icache_port", ICache, StageWidth * MaxThreads, 0, _cpu, params));
 
@@ -69,8 +67,6 @@ ResourcePool::ResourcePool(InOrderCPU *_cpu, ThePipeline::Params *params)
 
     resources.push_back(new MultDivUnit("Mult-Div-Unit", MDU, 5, 0, _cpu, params));
 
-    resources.push_back(new TLBUnit("D-TLB", DTLB, StageWidth, 0, _cpu, params));
-
     memObjects.push_back(DCache);
     resources.push_back(new CacheUnit("dcache_port", DCache, StageWidth * MaxThreads, 0, _cpu, params));
 
@@ -205,7 +201,6 @@ ResourcePool::scheduleEvent(InOrderCPU::CPUEventType e_type, DynInstPtr inst,
                                      inst->bdelaySeqNum,
                                      inst->readTid());
             mainEventQueue.schedule(res_pool_event, curTick + cpu->ticks(delay));
-
         }
         break;
 
@@ -256,7 +251,7 @@ ResourcePool::scheduleEvent(InOrderCPU::CPUEventType e_type, DynInstPtr inst,
         break;
 
       default:
-        DPRINTF(Resource, "Ignoring Unrecognized CPU Event Type #%s.\n", InOrderCPU::eventNames[e_type]);
+        DPRINTF(Resource, "Ignoring Unrecognized CPU Event (%s).\n", InOrderCPU::eventNames[e_type]);
         ; // If Resource Pool doesnt recognize event, we ignore it.
     }
 }
diff --git a/src/cpu/inorder/resources/cache_unit.cc b/src/cpu/inorder/resources/cache_unit.cc
index c5d35dfb3..5d5d4d45d 100644
--- a/src/cpu/inorder/resources/cache_unit.cc
+++ b/src/cpu/inorder/resources/cache_unit.cc
@@ -86,6 +86,25 @@ CacheUnit::CacheUnit(string res_name, int res_id, int res_width,
       predecoder(NULL)
 {
     cachePort = new CachePort(this);
+
+    // Hard-Code Selection For Now
+    if (res_name == "icache_port")
+        _tlb = params->itb;
+    else if (res_name == "dcache_port")
+        _tlb = params->dtb;
+    else
+        fatal("Unrecognized TLB name passed by user");
+
+    for (int i=0; i < MaxThreads; i++) {
+        tlbBlocked[i] = false;
+    }
+}
+
+TheISA::TLB*
+CacheUnit::tlb()
+{
+    return _tlb;
+
 }
 
 Port *
@@ -97,9 +116,23 @@ CacheUnit::getPort(const string &if_name, int idx)
         return NULL;
 }
 
+void
+CacheUnit::init()
+{
+    // Currently Used to Model TLB Latency. Eventually
+    // Switch to Timing TLB translations.
+    resourceEvent = new CacheUnitEvent[width];
+
+    initSlots();
+}
+
 int
 CacheUnit::getSlot(DynInstPtr inst)
 {
+    if (tlbBlocked[inst->threadNumber]) {
+        return -1;
+    }
+
     if (!inst->validMemAddr()) {
         panic("Mem. Addr. must be set before requesting cache access\n");
     }
@@ -156,45 +189,47 @@ CacheUnit::getRequest(DynInstPtr inst, int stage_num, int res_idx,
         panic("Mem. Addr. must be set before requesting cache access\n");
     }
 
-    int req_size = 0;
     MemCmd::Command pkt_cmd;
 
-    if (sched_entry->cmd == InitiateReadData) {
+    switch (sched_entry->cmd)
+    {
+      case InitiateReadData:
         pkt_cmd = MemCmd::ReadReq;
-        req_size = inst->getMemAccSize();
 
         DPRINTF(InOrderCachePort,
-                "[tid:%i]: %i byte Read request from [sn:%i] for addr %08p\n",
-                inst->readTid(), req_size, inst->seqNum, inst->getMemAddr());
-    } else if (sched_entry->cmd == InitiateWriteData) {
+                "[tid:%i]: Read request from [sn:%i] for addr %08p\n",
+                inst->readTid(), inst->seqNum, inst->getMemAddr());
+        break;
+
+      case InitiateWriteData:
         pkt_cmd = MemCmd::WriteReq;
-        req_size = inst->getMemAccSize();
 
         DPRINTF(InOrderCachePort,
-                "[tid:%i]: %i byte Write request from [sn:%i] for addr %08p\n",
-                inst->readTid(), req_size, inst->seqNum, inst->getMemAddr());
-    } else if (sched_entry->cmd == InitiateFetch){
+                "[tid:%i]: Write request from [sn:%i] for addr %08p\n",
+                inst->readTid(), inst->seqNum, inst->getMemAddr());
+        break;
+
+      case InitiateFetch:
         pkt_cmd = MemCmd::ReadReq;
-        req_size = sizeof(MachInst);
 
         DPRINTF(InOrderCachePort,
-                "[tid:%i]: %i byte Fetch request from [sn:%i] for addr %08p\n",
-                inst->readTid(), req_size, inst->seqNum, inst->getMemAddr());
-    } else {
+                "[tid:%i]: Fetch request from [sn:%i] for addr %08p\n",
+                inst->readTid(), inst->seqNum, inst->getMemAddr());
+        break;
+
+      default:
         panic("%i: Unexpected request type (%i) to %s", curTick,
               sched_entry->cmd, name());
     }
 
     return new CacheRequest(this, inst, stage_num, id, slot_num,
-                            sched_entry->cmd, req_size, pkt_cmd,
+                            sched_entry->cmd, 0, pkt_cmd,
                             0/*flags*/, this->cpu->readCpuId());
 }
 
 void
 CacheUnit::requestAgain(DynInstPtr inst, bool &service_request)
 {
-    //service_request = false;
-
     CacheReqPtr cache_req = dynamic_cast<CacheReqPtr>(findRequest(inst));
     assert(cache_req);
 
@@ -204,7 +239,7 @@ CacheUnit::requestAgain(DynInstPtr inst, bool &service_request)
         // If different, then update command in the request
         cache_req->cmd = inst->resSched.top()->cmd;
         DPRINTF(InOrderCachePort,
-                "[tid:%i]: [sn:%i]: the command for this instruction\n",
+                "[tid:%i]: [sn:%i]: Updating the command for this instruction\n",
                 inst->readTid(), inst->seqNum);
 
         service_request = true;
@@ -219,6 +254,101 @@ CacheUnit::requestAgain(DynInstPtr inst, bool &service_request)
     }
 }
 
+Fault
+CacheUnit::doTLBAccess(DynInstPtr inst, CacheReqPtr cache_req, int acc_size,
+                       int flags, TheISA::TLB::Mode tlb_mode)
+{
+    int tid;
+    int seq_num;
+    Addr aligned_addr;
+    unsigned stage_num;
+    unsigned slot_idx;
+
+    tid = inst->readTid();
+    seq_num = inst->seqNum;
+    aligned_addr = inst->getMemAddr();
+    stage_num = cache_req->getStageNum();
+    slot_idx = cache_req->getSlot();
+
+    if (tlb_mode == TheISA::TLB::Execute) {
+            inst->fetchMemReq = new Request(inst->readTid(), aligned_addr,
+                                            acc_size, flags, inst->readPC(),
+                                            cpu->readCpuId(), inst->readTid());
+            cache_req->memReq = inst->fetchMemReq;
+    } else {
+            inst->dataMemReq = new Request(inst->readTid(), aligned_addr,
+                                           acc_size, flags, inst->readPC(),
+                                           cpu->readCpuId(), inst->readTid());
+            cache_req->memReq = inst->dataMemReq;
+    }
+
+
+    cache_req->fault =
+        _tlb->translateAtomic(cache_req->memReq,
+                              cpu->thread[tid]->getTC(), tlb_mode);
+
+    if (cache_req->fault != NoFault) {
+        DPRINTF(InOrderTLB, "[tid:%i]: %s encountered while translating "
+                "addr:%08p for [sn:%i].\n", tid, cache_req->fault->name(),
+                cache_req->memReq->getVaddr(), seq_num);
+
+        cpu->pipelineStage[stage_num]->setResStall(cache_req, tid);
+
+        tlbBlocked[tid] = true;
+
+        cache_req->tlbStall = true;
+
+        scheduleEvent(slot_idx, 1);
+
+        cpu->trap(cache_req->fault, tid);
+    } else {
+        DPRINTF(InOrderTLB, "[tid:%i]: [sn:%i] virt. addr %08p translated "
+                "to phys. addr:%08p.\n", tid, seq_num,
+                cache_req->memReq->getVaddr(),
+                cache_req->memReq->getPaddr());
+    }
+
+    return cache_req->fault;
+}
+
+template <class T>
+Fault
+CacheUnit::read(DynInstPtr inst, Addr addr, T &data, unsigned flags)
+{
+    CacheReqPtr cache_req = dynamic_cast<CacheReqPtr>(findRequest(inst));
+    assert(cache_req);
+
+    int acc_size =  sizeof(T);
+    doTLBAccess(inst, cache_req, acc_size, flags, TheISA::TLB::Read);
+
+    if (cache_req->fault == NoFault) {
+        cache_req->reqData = new uint8_t[acc_size];
+        doCacheAccess(inst, NULL);
+    }
+
+    return cache_req->fault;
+}
+
+template <class T>
+Fault
+CacheUnit::write(DynInstPtr inst, T data, Addr addr, unsigned flags,
+            uint64_t *write_res)
+{
+    CacheReqPtr cache_req = dynamic_cast<CacheReqPtr>(findRequest(inst));
+    assert(cache_req);
+
+    int acc_size =  sizeof(T);
+    doTLBAccess(inst, cache_req, acc_size, flags, TheISA::TLB::Write);
+
+    if (cache_req->fault == NoFault) {
+        cache_req->reqData = new uint8_t[acc_size];
+        doCacheAccess(inst, write_res);
+    }
+
+    return cache_req->fault;
+}
+
+
 void
 CacheUnit::execute(int slot_num)
 {
@@ -241,21 +371,46 @@ CacheUnit::execute(int slot_num)
     switch (cache_req->cmd)
     {
       case InitiateFetch:
-        DPRINTF(InOrderCachePort,
-                "[tid:%u]: Initiating fetch access to %s for addr. %08p\n",
-                tid, name(), cache_req->inst->getMemAddr());
+        {
+            //@TODO: Switch to size of full cache block. Store in fetch buffer
+            int acc_size =  sizeof(TheISA::MachInst);
+
+            doTLBAccess(inst, cache_req, acc_size, 0, TheISA::TLB::Execute);
+
+            // Only Do Access if no fault from TLB
+            if (cache_req->fault == NoFault) {
+
+                DPRINTF(InOrderCachePort,
+                        "[tid:%u]: Initiating fetch access to %s for addr. %08p\n",
+                        tid, name(), cache_req->inst->getMemAddr());
+
+                cache_req->reqData = new uint8_t[acc_size];
+
+                inst->setCurResSlot(slot_num);
 
+                doCacheAccess(inst);
+            }
+
+            break;
+        }
+
+      case InitiateReadData:
+      case InitiateWriteData:
         DPRINTF(InOrderCachePort,
-                "[tid:%u]: Fetching new cache block from addr: %08p\n",
-                tid, cache_req->memReq->getVaddr());
+                "[tid:%u]: Initiating data access to %s for addr. %08p\n",
+                tid, name(), cache_req->inst->getMemAddr());
 
         inst->setCurResSlot(slot_num);
-        doDataAccess(inst);
+
+        if (inst->isDataPrefetch() || inst->isInstPrefetch()) {
+            inst->execute();
+        } else {
+            inst->initiateAcc();
+        }
+
         break;
 
       case CompleteFetch:
-        // @TODO: MOVE Functionality of handling fetched data into 'fetch unit'
-        //        let cache-unit just be responsible for transferring data.
         if (cache_req->isMemAccComplete()) {
             DPRINTF(InOrderCachePort,
                     "[tid:%i]: Completing Fetch Access for [sn:%i]\n",
@@ -278,22 +433,6 @@ CacheUnit::execute(int slot_num)
         }
         break;
 
-      case InitiateReadData:
-      case InitiateWriteData:
-        DPRINTF(InOrderCachePort,
-                "[tid:%u]: Initiating data access to %s for addr. %08p\n",
-                tid, name(), cache_req->inst->getMemAddr());
-
-        inst->setCurResSlot(slot_num);
-
-        if (inst->isDataPrefetch() || inst->isInstPrefetch()) {
-            inst->execute();
-        } else {
-            inst->initiateAcc();
-        }
-
-        break;
-
       case CompleteReadData:
       case CompleteWriteData:
         DPRINTF(InOrderCachePort,
@@ -355,8 +494,9 @@ CacheUnit::writeHint(DynInstPtr inst)
     inst->unsetMemAddr();
 }
 
+// @TODO: Split into doCacheRead() and doCacheWrite()
 Fault
-CacheUnit::doDataAccess(DynInstPtr inst, uint64_t *write_res)
+CacheUnit::doCacheAccess(DynInstPtr inst, uint64_t *write_res)
 {
     Fault fault = NoFault;
     int tid = 0;
@@ -603,6 +743,35 @@ CacheUnit::recvRetry()
     }
 }
 
+CacheUnitEvent::CacheUnitEvent()
+    : ResourceEvent()
+{ }
+
+void
+CacheUnitEvent::process()
+{
+    DynInstPtr inst = resource->reqMap[slotIdx]->inst;
+    int stage_num = resource->reqMap[slotIdx]->getStageNum();
+    int tid = inst->threadNumber;
+    CacheReqPtr req_ptr = dynamic_cast<CacheReqPtr>(resource->reqMap[slotIdx]);
+
+    DPRINTF(InOrderTLB, "Waking up from TLB Miss caused by [sn:%i].\n",
+            inst->seqNum);
+
+    CacheUnit* tlb_res = dynamic_cast<CacheUnit*>(resource);
+    assert(tlb_res);
+
+    tlb_res->tlbBlocked[tid] = false;
+
+    tlb_res->cpu->pipelineStage[stage_num]->unsetResStall(tlb_res->reqMap[slotIdx], tid);
+
+    req_ptr->tlbStall = false;
+
+    if (req_ptr->isSquashed()) {
+        req_ptr->done();
+    }
+}
+
 void
 CacheUnit::squash(DynInstPtr inst, int stage_num,
                   InstSeqNum squash_seq_num, unsigned tid)
@@ -630,7 +799,17 @@ CacheUnit::squash(DynInstPtr inst, int stage_num,
             CacheReqPtr cache_req = dynamic_cast<CacheReqPtr>(req_ptr);
             assert(cache_req);
 
-            if (!cache_req->isMemAccPending()) {
+            int req_slot_num = req_ptr->getSlot();
+
+            if (cache_req->tlbStall) {
+                tlbBlocked[tid] = false;
+
+                int stall_stage = reqMap[req_slot_num]->getStageNum();
+
+                cpu->pipelineStage[stall_stage]->unsetResStall(reqMap[req_slot_num], tid);
+            }
+
+            if (!cache_req->tlbStall && !cache_req->isMemAccPending()) {
                 // Mark request for later removal
                 cpu->reqRemoveList.push(req_ptr);
 
@@ -669,3 +848,109 @@ CacheUnit::getMemData(Packet *packet)
     }
 }
 
+// Extra Template Definitions
+#ifndef DOXYGEN_SHOULD_SKIP_THIS
+
+template
+Fault
+CacheUnit::read(DynInstPtr inst, Addr addr, Twin32_t &data, unsigned flags);
+
+template
+Fault
+CacheUnit::read(DynInstPtr inst, Addr addr, Twin64_t &data, unsigned flags);
+
+template
+Fault
+CacheUnit::read(DynInstPtr inst, Addr addr, uint64_t &data, unsigned flags);
+
+template
+Fault
+CacheUnit::read(DynInstPtr inst, Addr addr, uint32_t &data, unsigned flags);
+
+template
+Fault
+CacheUnit::read(DynInstPtr inst, Addr addr, uint16_t &data, unsigned flags);
+
+template
+Fault
+CacheUnit::read(DynInstPtr inst, Addr addr, uint8_t &data, unsigned flags);
+
+#endif //DOXYGEN_SHOULD_SKIP_THIS
+
+template<>
+Fault
+CacheUnit::read(DynInstPtr inst, Addr addr, double &data, unsigned flags)
+{
+    return read(inst, addr, *(uint64_t*)&data, flags);
+}
+
+template<>
+Fault
+CacheUnit::read(DynInstPtr inst, Addr addr, float &data, unsigned flags)
+{
+    return read(inst, addr, *(uint32_t*)&data, flags);
+}
+
+
+template<>
+Fault
+CacheUnit::read(DynInstPtr inst, Addr addr, int32_t &data, unsigned flags)
+{
+    return read(inst, addr, (uint32_t&)data, flags);
+}
+
+#ifndef DOXYGEN_SHOULD_SKIP_THIS
+
+template
+Fault
+CacheUnit::write(DynInstPtr inst, Twin32_t data, Addr addr,
+                       unsigned flags, uint64_t *res);
+
+template
+Fault
+CacheUnit::write(DynInstPtr inst, Twin64_t data, Addr addr,
+                       unsigned flags, uint64_t *res);
+
+template
+Fault
+CacheUnit::write(DynInstPtr inst, uint64_t data, Addr addr,
+                       unsigned flags, uint64_t *res);
+
+template
+Fault
+CacheUnit::write(DynInstPtr inst, uint32_t data, Addr addr,
+                       unsigned flags, uint64_t *res);
+
+template
+Fault
+CacheUnit::write(DynInstPtr inst, uint16_t data, Addr addr,
+                       unsigned flags, uint64_t *res);
+
+template
+Fault
+CacheUnit::write(DynInstPtr inst, uint8_t data, Addr addr,
+                       unsigned flags, uint64_t *res);
+
+#endif //DOXYGEN_SHOULD_SKIP_THIS
+
+template<>
+Fault
+CacheUnit::write(DynInstPtr inst, double data, Addr addr, unsigned flags, uint64_t *res)
+{
+    return write(inst, *(uint64_t*)&data, addr, flags, res);
+}
+
+template<>
+Fault
+CacheUnit::write(DynInstPtr inst, float data, Addr addr, unsigned flags, uint64_t *res)
+{
+    return write(inst, *(uint32_t*)&data, addr, flags, res);
+}
+
+
+template<>
+Fault
+CacheUnit::write(DynInstPtr inst, int32_t data, Addr addr, unsigned flags, uint64_t *res)
+{
+    return write(inst, (uint32_t)data, addr, flags, res);
+}
diff --git a/src/cpu/inorder/resources/cache_unit.hh b/src/cpu/inorder/resources/cache_unit.hh
index 226a35a52..aba5a1b0c 100644
--- a/src/cpu/inorder/resources/cache_unit.hh
+++ b/src/cpu/inorder/resources/cache_unit.hh
@@ -36,6 +36,7 @@
 #include <list>
 #include <string>
 
+#include "arch/tlb.hh"
 #include "arch/predecoder.hh"
 #include "cpu/inorder/resource.hh"
 #include "cpu/inorder/inorder_dyn_inst.hh"
@@ -124,7 +125,7 @@ class CacheUnit : public Resource
         cacheAccessComplete
     };
 
-    ///virtual void init();
+    void init();
 
     virtual ResourceRequest* getRequest(DynInstPtr _inst, int stage_num,
                                         int res_idx, int slot_num,
@@ -159,10 +160,20 @@ class CacheUnit : public Resource
     /** Returns a specific port. */
     Port *getPort(const std::string &if_name, int idx);
 
+    template <class T>
+    Fault read(DynInstPtr inst, Addr addr, T &data, unsigned flags);
+
+    template <class T>
+    Fault write(DynInstPtr inst, T data, Addr addr, unsigned flags,
+                        uint64_t *res);
+
+    Fault doTLBAccess(DynInstPtr inst, CacheReqPtr cache_req, int acc_size,
+                      int flags,  TheISA::TLB::Mode tlb_mode);
+
     /** Read/Write on behalf of an instruction.
      *  curResSlot needs to be a valid value in instruction.
      */
-    Fault doDataAccess(DynInstPtr inst, uint64_t *write_result=NULL);
+    Fault doCacheAccess(DynInstPtr inst, uint64_t *write_result=NULL);
 
     void prefetch(DynInstPtr inst);
 
@@ -209,23 +220,28 @@ class CacheUnit : public Resource
     //unsigned fetchOffset[ThePipeline::MaxThreads];
 
     TheISA::Predecoder predecoder;
+
+    bool tlbBlocked[ThePipeline::MaxThreads];
+
+    TheISA::TLB* tlb();
+
+    TheISA::TLB *_tlb;
 };
 
-struct CacheSchedEntry : public ThePipeline::ScheduleEntry
-{
-    enum EntryType {
-        FetchAccess,
-        DataAccess
-    };
+class CacheUnitEvent : public ResourceEvent {
+  public:
+    const std::string name() const
+    {
+        return "CacheUnitEvent";
+    }
 
-    CacheSchedEntry(int stage_num, int _priority, int res_num,
-                    MemCmd::Command pkt_cmd, EntryType _type = FetchAccess)
-        : ScheduleEntry(stage_num, _priority, res_num), pktCmd(pkt_cmd),
-          type(_type)
-    { }
 
-    MemCmd::Command pktCmd;
-    EntryType type;
+    /** Constructs a resource event. */
+    CacheUnitEvent();
+    virtual ~CacheUnitEvent() {}
+
+    /** Processes a resource event. */
+    virtual void process();
 };
 
 class CacheRequest : public ResourceRequest
@@ -235,43 +251,17 @@ class CacheRequest : public ResourceRequest
                  int slot_num, unsigned cmd, int req_size,
                  MemCmd::Command pkt_cmd, unsigned flags, int cpu_id)
         : ResourceRequest(cres, inst, stage_num, res_idx, slot_num, cmd),
-          pktCmd(pkt_cmd), memAccComplete(false), memAccPending(false)
-    {
-        if (cmd == CacheUnit::InitiateFetch ||
-            cmd == CacheUnit::CompleteFetch ||
-            cmd == CacheUnit::Fetch) {
-            memReq = inst->fetchMemReq;
-        } else {
-            memReq = inst->dataMemReq;
-        }
+          pktCmd(pkt_cmd), memReq(NULL), reqData(NULL), dataPkt(NULL),
+          retryPkt(NULL), memAccComplete(false), memAccPending(false),
+          tlbStall(false)
+    { }
 
-        //@ Only matters for Fetch / Read requests
-        //  Don't allocate for Writes!
-        reqData = new uint8_t[req_size];
-        retryPkt = NULL;
-    }
 
     virtual ~CacheRequest()
     {
-#if 0
-        delete reqData;
-
-        // Can get rid of packet and packet request now
-        if (*dataPkt) {
-            if (*dataPkt->req) {
-                delete dataPkt->req;
-            }
-            delete dataPkt;
-        }
-
-        // Can get rid of packet and packet request now
-        if (retryPkt) {
-            if (retryPkt->req) {
-                delete retryPkt->req;
-            }
-            delete retryPkt;
+        if (reqData) {
+            delete [] reqData;
         }
-#endif
     }
 
     virtual PacketDataPtr getData()
@@ -297,6 +287,7 @@ class CacheRequest : public ResourceRequest
 
     bool memAccComplete;
     bool memAccPending;
+    bool tlbStall;
 };
 
 class CacheReqPacket : public Packet
diff --git a/src/cpu/inorder/resources/tlb_unit.hh b/src/cpu/inorder/resources/tlb_unit.hh
index 759fe14f1..8f0291b48 100644
--- a/src/cpu/inorder/resources/tlb_unit.hh
+++ b/src/cpu/inorder/resources/tlb_unit.hh
@@ -114,8 +114,8 @@ class TLBUnitRequest : public ResourceRequest {
             memReq = inst->fetchMemReq;
         } else {
             aligned_addr = inst->getMemAddr();;
-            req_size = inst->getMemAccSize();
-            flags = inst->getMemFlags();
+            req_size = 0; //inst->getMemAccSize();
+            flags = 0; //inst->getMemFlags();
 
             if (req_size == 0 && (inst->isDataPrefetch() || inst->isInstPrefetch())) {
                 req_size = 8;
diff --git a/src/cpu/static_inst.cc b/src/cpu/static_inst.cc
index 01136bda1..2c4fc8ab9 100644
--- a/src/cpu/static_inst.cc
+++ b/src/cpu/static_inst.cc
@@ -106,13 +106,6 @@ StaticInst::branchTarget(ThreadContext *tc) const
     M5_DUMMY_RETURN;
 }
 
-Request::Flags
-StaticInst::memAccFlags()
-{
-    panic("StaticInst::memAccFlags called on non-memory instruction");
-    return 0;
-}
-
 const string &
 StaticInst::disassemble(Addr pc, const SymbolTable *symtab) const
 {
diff --git a/src/cpu/static_inst.hh b/src/cpu/static_inst.hh
index cb32f2333..1dc148ce6 100644
--- a/src/cpu/static_inst.hh
+++ b/src/cpu/static_inst.hh
@@ -419,8 +419,6 @@ class StaticInst : public StaticInstBase
      */
     bool hasBranchTarget(Addr pc, ThreadContext *tc, Addr &tgt) const;
 
-    virtual Request::Flags memAccFlags();
-
     /**
      * Return string representation of disassembled instruction.
      * The default version of this function will call the internal
-- 
2.30.2