From 5f91ec3f4618dad8d36efbf8b5a5112a1ce0d1b7 Mon Sep 17 00:00:00 2001 From: Min Kyu Jeong Date: Mon, 23 Aug 2010 11:18:40 -0500 Subject: [PATCH] ARM/O3: store the result of the predicate evaluation in DynInst or Threadstate. THis allows the CPU to handle predicated-false instructions accordingly. This particular patch makes loads that are predicated-false to be sent straight to the commit stage directly, not waiting for return of the data that was never requested since it was predicated-false. --- src/arch/arm/isa/templates/mem.isa | 21 +++++++++++++++++++-- src/arch/arm/isa/templates/pred.isa | 2 ++ src/cpu/base_dyn_inst.hh | 13 +++++++++++++ src/cpu/base_dyn_inst_impl.hh | 1 + src/cpu/o3/lsq_unit_impl.hh | 18 +++++++++++++++--- src/cpu/simple/base.hh | 3 +++ src/cpu/simple_thread.hh | 13 +++++++++++++ src/cpu/thread_context.hh | 5 +++++ 8 files changed, 71 insertions(+), 5 deletions(-) diff --git a/src/arch/arm/isa/templates/mem.isa b/src/arch/arm/isa/templates/mem.isa index ea66ce2a6..5431777b2 100644 --- a/src/arch/arm/isa/templates/mem.isa +++ b/src/arch/arm/isa/templates/mem.isa @@ -69,6 +69,8 @@ def template SwapExecute {{ if (fault == NoFault) { %(op_wb)s; } + } else { + xc->setPredicate(false); } if (fault == NoFault && machInst.itstateMask != 0) { @@ -103,6 +105,8 @@ def template SwapInitiateAcc {{ if (fault == NoFault) { %(op_wb)s; } + } else { + xc->setPredicate(false); } if (fault == NoFault && machInst.itstateMask != 0) { @@ -164,6 +168,8 @@ def template LoadExecute {{ if (fault == NoFault) { %(op_wb)s; } + } else { + xc->setPredicate(false); } if (fault == NoFault && machInst.itstateMask != 0) { @@ -200,6 +206,8 @@ def template StoreExecute {{ if (fault == NoFault) { %(op_wb)s; } + } else { + xc->setPredicate(false); } if (fault == NoFault && machInst.itstateMask != 0) { @@ -242,6 +250,8 @@ def template StoreExExecute {{ if (fault == NoFault) { %(op_wb)s; } + } else { + xc->setPredicate(false); } if (fault == NoFault && machInst.itstateMask != 0) { @@ -279,6 +289,8 @@ def template StoreExInitiateAcc {{ if (fault == NoFault) { %(op_wb)s; } + } else { + xc->setPredicate(false); } if (fault == NoFault && machInst.itstateMask != 0) { @@ -316,6 +328,8 @@ def template StoreInitiateAcc {{ if (fault == NoFault) { %(op_wb)s; } + } else { + xc->setPredicate(false); } if (fault == NoFault && machInst.itstateMask != 0) { @@ -342,8 +356,11 @@ def template LoadInitiateAcc {{ if (fault == NoFault) { fault = xc->read(EA, (uint%(mem_acc_size)d_t &)Mem, memAccessFlags); } - } else if (fault == NoFault && machInst.itstateMask != 0) { - xc->setMiscReg(MISCREG_ITSTATE, machInst.newItstate); + } else { + xc->setPredicate(false); + if (fault == NoFault && machInst.itstateMask != 0) { + xc->setMiscReg(MISCREG_ITSTATE, machInst.newItstate); + } } return fault; diff --git a/src/arch/arm/isa/templates/pred.isa b/src/arch/arm/isa/templates/pred.isa index 7a5b92760..1029cfaee 100644 --- a/src/arch/arm/isa/templates/pred.isa +++ b/src/arch/arm/isa/templates/pred.isa @@ -142,6 +142,8 @@ def template PredOpExecute {{ { %(op_wb)s; } + } else { + xc->setPredicate(false); } if (fault == NoFault && machInst.itstateMask != 0) { diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh index 6ea00dd3d..a992664d0 100644 --- a/src/cpu/base_dyn_inst.hh +++ b/src/cpu/base_dyn_inst.hh @@ -246,6 +246,9 @@ class BaseDynInst : public FastAlloc, public RefCounted /** Micro PC of this instruction. */ Addr microPC; + /** Did this instruction execute, or is it predicated false */ + bool predicate; + protected: /** Next non-speculative PC. It is not filled in at fetch, but rather * once the target of the branch is truly known (either decode or @@ -794,6 +797,16 @@ class BaseDynInst : public FastAlloc, public RefCounted nextMicroPC = val; } + bool readPredicate() + { + return predicate; + } + + void setPredicate(bool val) + { + predicate = val; + } + /** Sets the ASID. */ void setASID(short addr_space_id) { asid = addr_space_id; } diff --git a/src/cpu/base_dyn_inst_impl.hh b/src/cpu/base_dyn_inst_impl.hh index 70c91ceda..7425431db 100644 --- a/src/cpu/base_dyn_inst_impl.hh +++ b/src/cpu/base_dyn_inst_impl.hh @@ -154,6 +154,7 @@ BaseDynInst::initVars() eaCalcDone = false; memOpDone = false; + predicate = true; lqIdx = -1; sqIdx = -1; diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh index fcc57ab09..dddfb7e1b 100644 --- a/src/cpu/o3/lsq_unit_impl.hh +++ b/src/cpu/o3/lsq_unit_impl.hh @@ -1,4 +1,16 @@ /* + * Copyright (c) 2010 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * * Copyright (c) 2004-2005 The Regents of The University of Michigan * All rights reserved. * @@ -439,9 +451,9 @@ LSQUnit::executeLoad(DynInstPtr &inst) load_fault = inst->initiateAcc(); - // If the instruction faulted, then we need to send it along to commit - // without the instruction completing. - if (load_fault != NoFault) { + // If the instruction faulted or predicated false, then we need to send it + // along to commit without the instruction completing. + if (load_fault != NoFault || inst->readPredicate() == false) { // Send this instruction to commit, also make sure iew stage // realizes there is activity. // Mark it as executed unless it is an uncached load that diff --git a/src/cpu/simple/base.hh b/src/cpu/simple/base.hh index 1265a1f2f..90cb81c0c 100644 --- a/src/cpu/simple/base.hh +++ b/src/cpu/simple/base.hh @@ -287,12 +287,15 @@ class BaseSimpleCPU : public BaseCPU uint64_t readNextPC() { return thread->readNextPC(); } uint64_t readNextMicroPC() { return thread->readNextMicroPC(); } uint64_t readNextNPC() { return thread->readNextNPC(); } + bool readPredicate() { return thread->readPredicate(); } void setPC(uint64_t val) { thread->setPC(val); } void setMicroPC(uint64_t val) { thread->setMicroPC(val); } void setNextPC(uint64_t val) { thread->setNextPC(val); } void setNextMicroPC(uint64_t val) { thread->setNextMicroPC(val); } void setNextNPC(uint64_t val) { thread->setNextNPC(val); } + void setPredicate(bool val) + { return thread->setPredicate(val); } MiscReg readMiscRegNoEffect(int misc_reg) { diff --git a/src/cpu/simple_thread.hh b/src/cpu/simple_thread.hh index bc8588041..dcd933ffc 100644 --- a/src/cpu/simple_thread.hh +++ b/src/cpu/simple_thread.hh @@ -128,6 +128,9 @@ class SimpleThread : public ThreadState */ Addr nextNPC; + /** Did this instruction execute or is it predicated false */ + bool predicate; + public: // pointer to CPU associated with this SimpleThread BaseCPU *cpu; @@ -371,6 +374,16 @@ class SimpleThread : public ThreadState #endif } + bool readPredicate() + { + return predicate; + } + + void setPredicate(bool val) + { + predicate = val; + } + MiscReg readMiscRegNoEffect(int misc_reg, ThreadID tid = 0) { diff --git a/src/cpu/thread_context.hh b/src/cpu/thread_context.hh index 78ecdacf2..7f6d258ab 100644 --- a/src/cpu/thread_context.hh +++ b/src/cpu/thread_context.hh @@ -404,6 +404,11 @@ class ProxyThreadContext : public ThreadContext void setNextMicroPC(uint64_t val) { actualTC->setNextMicroPC(val); } + bool readPredicate() { return actualTC->readPredicate(); } + + void setPredicate(bool val) + { actualTC->setPredicate(val); } + MiscReg readMiscRegNoEffect(int misc_reg) { return actualTC->readMiscRegNoEffect(misc_reg); } -- 2.30.2