From 719f9a6d4fba16af38dcfd62b25a4d708156699f Mon Sep 17 00:00:00 2001 From: Giacomo Gabrielli Date: Tue, 7 Dec 2010 16:19:57 -0800 Subject: [PATCH] O3: Make all instructions that write a misc. register not perform the write until commit. ARM instructions updating cumulative flags (ARM FP exceptions and saturation flags) are not serialized. Added aliases for ARM FP exceptions and saturation flags in FPSCR. Removed write accesses to the FP condition codes for most ARM VFP instructions: only VCMP and VCMPE instructions update the FP condition codes. Removed a potential cause of seg. faults in the O3 model for NEON memory macro-ops (ARM). --- src/arch/alpha/isa/fp.isa | 2 +- src/arch/alpha/locked_mem.hh | 10 +- src/arch/arm/isa.cc | 17 +- src/arch/arm/isa/insts/fp.isa | 233 +++++++++++++++------------- src/arch/arm/isa/insts/misc.isa | 4 +- src/arch/arm/isa/insts/neon.isa | 220 +++++++++++++------------- src/arch/arm/isa/operands.isa | 2 + src/arch/arm/miscregs.hh | 11 +- src/arch/mips/locked_mem.hh | 10 +- src/cpu/base_dyn_inst.hh | 3 +- src/cpu/inorder/inorder_dyn_inst.cc | 48 +----- src/cpu/o3/commit_impl.hh | 15 ++ src/cpu/o3/dyn_inst.hh | 83 +++++----- src/cpu/o3/dyn_inst_impl.hh | 14 ++ src/cpu/simple/base.hh | 17 -- 15 files changed, 359 insertions(+), 330 deletions(-) diff --git a/src/arch/alpha/isa/fp.isa b/src/arch/alpha/isa/fp.isa index ed04d2a50..f9abd9b68 100644 --- a/src/arch/alpha/isa/fp.isa +++ b/src/arch/alpha/isa/fp.isa @@ -229,7 +229,7 @@ def template FloatingPointExecute {{ %(code)s; } else { m5_fesetround(getC99RoundingMode( - xc->readMiscRegNoEffect(MISCREG_FPCR))); + xc->readMiscReg(MISCREG_FPCR))); %(code)s; m5_fesetround(M5_FE_TONEAREST); } diff --git a/src/arch/alpha/locked_mem.hh b/src/arch/alpha/locked_mem.hh index 86958e4c5..24d028b54 100644 --- a/src/arch/alpha/locked_mem.hh +++ b/src/arch/alpha/locked_mem.hh @@ -55,8 +55,8 @@ template inline void handleLockedRead(XC *xc, Request *req) { - xc->setMiscRegNoEffect(MISCREG_LOCKADDR, req->getPaddr() & ~0xf); - xc->setMiscRegNoEffect(MISCREG_LOCKFLAG, true); + xc->setMiscReg(MISCREG_LOCKADDR, req->getPaddr() & ~0xf); + xc->setMiscReg(MISCREG_LOCKFLAG, true); } @@ -70,13 +70,13 @@ handleLockedWrite(XC *xc, Request *req) req->setExtraData(2); } else { // standard store conditional - bool lock_flag = xc->readMiscRegNoEffect(MISCREG_LOCKFLAG); - Addr lock_addr = xc->readMiscRegNoEffect(MISCREG_LOCKADDR); + bool lock_flag = xc->readMiscReg(MISCREG_LOCKFLAG); + Addr lock_addr = xc->readMiscReg(MISCREG_LOCKADDR); if (!lock_flag || (req->getPaddr() & ~0xf) != lock_addr) { // Lock flag not set or addr mismatch in CPU; // don't even bother sending to memory system req->setExtraData(0); - xc->setMiscRegNoEffect(MISCREG_LOCKFLAG, false); + xc->setMiscReg(MISCREG_LOCKFLAG, false); // the rest of this code is not architectural; // it's just a debugging aid to help detect // livelock by warning on long sequences of failed diff --git a/src/arch/arm/isa.cc b/src/arch/arm/isa.cc index 87203c3f0..649394270 100644 --- a/src/arch/arm/isa.cc +++ b/src/arch/arm/isa.cc @@ -202,7 +202,10 @@ ISA::readMiscReg(int misc_reg, ThreadContext *tc) warn("Not doing anyhting for read to miscreg %s\n", miscRegName[misc_reg]); break; - + case MISCREG_FPSCR_QC: + return readMiscRegNoEffect(MISCREG_FPSCR) & ~FpscrQcMask; + case MISCREG_FPSCR_EXC: + return readMiscRegNoEffect(MISCREG_FPSCR) & ~FpscrExcMask; } return readMiscRegNoEffect(misc_reg); } @@ -304,6 +307,18 @@ ISA::setMiscReg(int misc_reg, const MiscReg &val, ThreadContext *tc) (miscRegs[MISCREG_FPSCR] & ~(uint32_t)fpscrMask); } break; + case MISCREG_FPSCR_QC: + { + newVal = miscRegs[MISCREG_FPSCR] | (newVal & FpscrQcMask); + misc_reg = MISCREG_FPSCR; + } + break; + case MISCREG_FPSCR_EXC: + { + newVal = miscRegs[MISCREG_FPSCR] | (newVal & FpscrExcMask); + misc_reg = MISCREG_FPSCR; + } + break; case MISCREG_FPEXC: { const uint32_t fpexcMask = 0x60000000; diff --git a/src/arch/arm/isa/insts/fp.isa b/src/arch/arm/isa/insts/fp.isa index 68c294851..961b9a355 100644 --- a/src/arch/arm/isa/insts/fp.isa +++ b/src/arch/arm/isa/insts/fp.isa @@ -208,7 +208,8 @@ let {{ vmsrFpscrIop = InstObjParams("vmsr", "VmsrFpscr", "FpRegRegOp", { "code": vmsrFpscrCode, "predicate_test": predicateTest, - "op_class": "SimdFloatMiscOp" }, []) + "op_class": "SimdFloatMiscOp" }, + ["IsSerializeAfter","IsNonSpeculative"]) header_output += FpRegRegOpDeclare.subst(vmsrFpscrIop); decoder_output += FpRegRegOpConstructor.subst(vmsrFpscrIop); exec_output += PredOpExecute.subst(vmsrFpscrIop); @@ -217,7 +218,8 @@ let {{ { "code": vmrsEnabledCheckCode + \ "Dest = MiscOp1;", "predicate_test": predicateTest, - "op_class": "SimdFloatMiscOp" }, []) + "op_class": "SimdFloatMiscOp" }, + ["IsSerializeBefore"]) header_output += FpRegRegOpDeclare.subst(vmrsIop); decoder_output += FpRegRegOpConstructor.subst(vmrsIop); exec_output += PredOpExecute.subst(vmrsIop); @@ -226,7 +228,8 @@ let {{ { "code": vmrsEnabledCheckCode + \ "Dest = Fpscr | FpCondCodes;", "predicate_test": predicateTest, - "op_class": "SimdFloatMiscOp" }, []) + "op_class": "SimdFloatMiscOp" }, + ["IsSerializeBefore"]) header_output += FpRegRegOpDeclare.subst(vmrsFpscrIop); decoder_output += FpRegRegOpConstructor.subst(vmrsFpscrIop); exec_output += PredOpExecute.subst(vmrsFpscrIop); @@ -237,7 +240,8 @@ let {{ vmrsApsrIop = InstObjParams("vmrs", "VmrsApsr", "FpRegRegImmOp", { "code": vmrsApsrCode, "predicate_test": predicateTest, - "op_class": "SimdFloatMiscOp" }, []) + "op_class": "SimdFloatMiscOp" }, + ["IsSerializeBefore"]) header_output += FpRegRegImmOpDeclare.subst(vmrsApsrIop); decoder_output += FpRegRegImmOpConstructor.subst(vmrsApsrIop); exec_output += PredOpExecute.subst(vmrsApsrIop); @@ -249,7 +253,8 @@ let {{ vmrsApsrFpscrIop = InstObjParams("vmrs", "VmrsApsrFpscr", "FpRegRegImmOp", { "code": vmrsApsrFpscrCode, "predicate_test": predicateTest, - "op_class": "SimdFloatMiscOp" }, []) + "op_class": "SimdFloatMiscOp" }, + ["IsSerializeBefore"]) header_output += FpRegRegImmOpDeclare.subst(vmrsApsrFpscrIop); decoder_output += FpRegRegImmOpConstructor.subst(vmrsApsrFpscrIop); exec_output += PredOpExecute.subst(vmrsApsrFpscrIop); @@ -451,20 +456,22 @@ let {{ decoder_output = "" exec_output = "" - singleCode = vfpEnabledCheckCode + ''' - FPSCR fpscr = Fpscr | FpCondCodes; + singleSimpleCode = vfpEnabledCheckCode + ''' + FPSCR fpscr = (FPSCR) FpscrExc; FpDest = %(op)s; - FpCondCodes = fpscr & FpCondCodesMask; + ''' + singleCode = singleSimpleCode + ''' + FpscrExc = fpscr; ''' singleBinOp = "binaryOp(fpscr, FpOp1, FpOp2," + \ "%(func)s, fpscr.fz, fpscr.dn, fpscr.rMode)" singleUnaryOp = "unaryOp(fpscr, FpOp1, %(func)s, fpscr.fz, fpscr.rMode)" doubleCode = vfpEnabledCheckCode + ''' - FPSCR fpscr = Fpscr | FpCondCodes; + FPSCR fpscr = (FPSCR) FpscrExc; double dest = %(op)s; - FpCondCodes = fpscr & FpCondCodesMask; FpDestP0.uw = dblLow(dest); FpDestP1.uw = dblHi(dest); + FpscrExc = fpscr; ''' doubleBinOp = ''' binaryOp(fpscr, dbl(FpOp1P0.uw, FpOp1P1.uw), @@ -545,7 +552,7 @@ let {{ global header_output, decoder_output, exec_output sIop = InstObjParams(name + "s", Name + "S", base, - { "code": singleCode % { "op": singleOp }, + { "code": singleSimpleCode % { "op": singleOp }, "predicate_test": predicateTest, "op_class": opClass }, []) dIop = InstObjParams(name + "d", Name + "D", base, @@ -574,12 +581,12 @@ let {{ exec_output = "" vmlaSCode = vfpEnabledCheckCode + ''' - FPSCR fpscr = Fpscr | FpCondCodes; + FPSCR fpscr = (FPSCR) FpscrExc; float mid = binaryOp(fpscr, FpOp1, FpOp2, fpMulS, fpscr.fz, fpscr.dn, fpscr.rMode); FpDest = binaryOp(fpscr, FpDest, mid, fpAddS, fpscr.fz, fpscr.dn, fpscr.rMode); - FpCondCodes = fpscr & FpCondCodesMask; + FpscrExc = fpscr; ''' vmlaSIop = InstObjParams("vmlas", "VmlaS", "FpRegRegRegOp", { "code": vmlaSCode, @@ -590,16 +597,16 @@ let {{ exec_output += PredOpExecute.subst(vmlaSIop); vmlaDCode = vfpEnabledCheckCode + ''' - FPSCR fpscr = Fpscr | FpCondCodes; + FPSCR fpscr = (FPSCR) FpscrExc; double mid = binaryOp(fpscr, dbl(FpOp1P0.uw, FpOp1P1.uw), dbl(FpOp2P0.uw, FpOp2P1.uw), fpMulD, fpscr.fz, fpscr.dn, fpscr.rMode); double dest = binaryOp(fpscr, dbl(FpDestP0.uw, FpDestP1.uw), mid, fpAddD, fpscr.fz, fpscr.dn, fpscr.rMode); - FpCondCodes = fpscr & FpCondCodesMask; FpDestP0.uw = dblLow(dest); FpDestP1.uw = dblHi(dest); + FpscrExc = fpscr; ''' vmlaDIop = InstObjParams("vmlad", "VmlaD", "FpRegRegRegOp", { "code": vmlaDCode, @@ -610,12 +617,12 @@ let {{ exec_output += PredOpExecute.subst(vmlaDIop); vmlsSCode = vfpEnabledCheckCode + ''' - FPSCR fpscr = Fpscr | FpCondCodes; + FPSCR fpscr = (FPSCR) FpscrExc; float mid = binaryOp(fpscr, FpOp1, FpOp2, fpMulS, fpscr.fz, fpscr.dn, fpscr.rMode); FpDest = binaryOp(fpscr, FpDest, -mid, fpAddS, fpscr.fz, fpscr.dn, fpscr.rMode); - FpCondCodes = fpscr & FpCondCodesMask; + FpscrExc = fpscr; ''' vmlsSIop = InstObjParams("vmlss", "VmlsS", "FpRegRegRegOp", { "code": vmlsSCode, @@ -626,16 +633,16 @@ let {{ exec_output += PredOpExecute.subst(vmlsSIop); vmlsDCode = vfpEnabledCheckCode + ''' - FPSCR fpscr = Fpscr | FpCondCodes; + FPSCR fpscr = (FPSCR) FpscrExc; double mid = binaryOp(fpscr, dbl(FpOp1P0.uw, FpOp1P1.uw), dbl(FpOp2P0.uw, FpOp2P1.uw), fpMulD, fpscr.fz, fpscr.dn, fpscr.rMode); double dest = binaryOp(fpscr, dbl(FpDestP0.uw, FpDestP1.uw), -mid, fpAddD, fpscr.fz, fpscr.dn, fpscr.rMode); - FpCondCodes = fpscr & FpCondCodesMask; FpDestP0.uw = dblLow(dest); FpDestP1.uw = dblHi(dest); + FpscrExc = fpscr; ''' vmlsDIop = InstObjParams("vmlsd", "VmlsD", "FpRegRegRegOp", { "code": vmlsDCode, @@ -646,12 +653,12 @@ let {{ exec_output += PredOpExecute.subst(vmlsDIop); vnmlaSCode = vfpEnabledCheckCode + ''' - FPSCR fpscr = Fpscr | FpCondCodes; + FPSCR fpscr = (FPSCR) FpscrExc; float mid = binaryOp(fpscr, FpOp1, FpOp2, fpMulS, fpscr.fz, fpscr.dn, fpscr.rMode); FpDest = binaryOp(fpscr, -FpDest, -mid, fpAddS, fpscr.fz, fpscr.dn, fpscr.rMode); - FpCondCodes = fpscr & FpCondCodesMask; + FpscrExc = fpscr; ''' vnmlaSIop = InstObjParams("vnmlas", "VnmlaS", "FpRegRegRegOp", { "code": vnmlaSCode, @@ -662,16 +669,16 @@ let {{ exec_output += PredOpExecute.subst(vnmlaSIop); vnmlaDCode = vfpEnabledCheckCode + ''' - FPSCR fpscr = Fpscr | FpCondCodes; + FPSCR fpscr = (FPSCR) FpscrExc; double mid = binaryOp(fpscr, dbl(FpOp1P0.uw, FpOp1P1.uw), dbl(FpOp2P0.uw, FpOp2P1.uw), fpMulD, fpscr.fz, fpscr.dn, fpscr.rMode); double dest = binaryOp(fpscr, -dbl(FpDestP0.uw, FpDestP1.uw), -mid, fpAddD, fpscr.fz, fpscr.dn, fpscr.rMode); - FpCondCodes = fpscr & FpCondCodesMask; FpDestP0.uw = dblLow(dest); FpDestP1.uw = dblHi(dest); + FpscrExc = fpscr; ''' vnmlaDIop = InstObjParams("vnmlad", "VnmlaD", "FpRegRegRegOp", { "code": vnmlaDCode, @@ -682,12 +689,12 @@ let {{ exec_output += PredOpExecute.subst(vnmlaDIop); vnmlsSCode = vfpEnabledCheckCode + ''' - FPSCR fpscr = Fpscr | FpCondCodes; + FPSCR fpscr = (FPSCR) FpscrExc; float mid = binaryOp(fpscr, FpOp1, FpOp2, fpMulS, fpscr.fz, fpscr.dn, fpscr.rMode); FpDest = binaryOp(fpscr, -FpDest, mid, fpAddS, fpscr.fz, fpscr.dn, fpscr.rMode); - FpCondCodes = fpscr & FpCondCodesMask; + FpscrExc = fpscr; ''' vnmlsSIop = InstObjParams("vnmlss", "VnmlsS", "FpRegRegRegOp", { "code": vnmlsSCode, @@ -698,16 +705,16 @@ let {{ exec_output += PredOpExecute.subst(vnmlsSIop); vnmlsDCode = vfpEnabledCheckCode + ''' - FPSCR fpscr = Fpscr | FpCondCodes; + FPSCR fpscr = (FPSCR) FpscrExc; double mid = binaryOp(fpscr, dbl(FpOp1P0.uw, FpOp1P1.uw), dbl(FpOp2P0.uw, FpOp2P1.uw), fpMulD, fpscr.fz, fpscr.dn, fpscr.rMode); double dest = binaryOp(fpscr, -dbl(FpDestP0.uw, FpDestP1.uw), mid, fpAddD, fpscr.fz, fpscr.dn, fpscr.rMode); - FpCondCodes = fpscr & FpCondCodesMask; FpDestP0.uw = dblLow(dest); FpDestP1.uw = dblHi(dest); + FpscrExc = fpscr; ''' vnmlsDIop = InstObjParams("vnmlsd", "VnmlsD", "FpRegRegRegOp", { "code": vnmlsDCode, @@ -718,10 +725,10 @@ let {{ exec_output += PredOpExecute.subst(vnmlsDIop); vnmulSCode = vfpEnabledCheckCode + ''' - FPSCR fpscr = Fpscr | FpCondCodes; + FPSCR fpscr = (FPSCR) FpscrExc; FpDest = -binaryOp(fpscr, FpOp1, FpOp2, fpMulS, fpscr.fz, fpscr.dn, fpscr.rMode); - FpCondCodes = fpscr & FpCondCodesMask; + FpscrExc = fpscr; ''' vnmulSIop = InstObjParams("vnmuls", "VnmulS", "FpRegRegRegOp", { "code": vnmulSCode, @@ -732,14 +739,14 @@ let {{ exec_output += PredOpExecute.subst(vnmulSIop); vnmulDCode = vfpEnabledCheckCode + ''' - FPSCR fpscr = Fpscr | FpCondCodes; + FPSCR fpscr = (FPSCR) FpscrExc; double dest = -binaryOp(fpscr, dbl(FpOp1P0.uw, FpOp1P1.uw), dbl(FpOp2P0.uw, FpOp2P1.uw), fpMulD, fpscr.fz, fpscr.dn, fpscr.rMode); - FpCondCodes = fpscr & FpCondCodesMask; FpDestP0.uw = dblLow(dest); FpDestP1.uw = dblHi(dest); + FpscrExc = fpscr; ''' vnmulDIop = InstObjParams("vnmuld", "VnmulD", "FpRegRegRegOp", { "code": vnmulDCode, @@ -757,13 +764,13 @@ let {{ exec_output = "" vcvtUIntFpSCode = vfpEnabledCheckCode + ''' - FPSCR fpscr = Fpscr | FpCondCodes; + FPSCR fpscr = (FPSCR) FpscrExc; VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (FpOp1.uw) : "m" (FpOp1.uw)); FpDest = FpOp1.uw; __asm__ __volatile__("" :: "m" (FpDest)); finishVfp(fpscr, state, fpscr.fz); - FpCondCodes = fpscr & FpCondCodesMask; + FpscrExc = fpscr; ''' vcvtUIntFpSIop = InstObjParams("vcvt", "VcvtUIntFpS", "FpRegRegOp", { "code": vcvtUIntFpSCode, @@ -774,15 +781,15 @@ let {{ exec_output += PredOpExecute.subst(vcvtUIntFpSIop); vcvtUIntFpDCode = vfpEnabledCheckCode + ''' - FPSCR fpscr = Fpscr | FpCondCodes; + FPSCR fpscr = (FPSCR) FpscrExc; VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (FpOp1P0.uw) : "m" (FpOp1P0.uw)); double cDest = (uint64_t)FpOp1P0.uw; __asm__ __volatile__("" :: "m" (cDest)); finishVfp(fpscr, state, fpscr.fz); - FpCondCodes = fpscr & FpCondCodesMask; FpDestP0.uw = dblLow(cDest); FpDestP1.uw = dblHi(cDest); + FpscrExc = fpscr; ''' vcvtUIntFpDIop = InstObjParams("vcvt", "VcvtUIntFpD", "FpRegRegOp", { "code": vcvtUIntFpDCode, @@ -793,13 +800,13 @@ let {{ exec_output += PredOpExecute.subst(vcvtUIntFpDIop); vcvtSIntFpSCode = vfpEnabledCheckCode + ''' - FPSCR fpscr = Fpscr | FpCondCodes; + FPSCR fpscr = (FPSCR) FpscrExc; VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (FpOp1.sw) : "m" (FpOp1.sw)); FpDest = FpOp1.sw; __asm__ __volatile__("" :: "m" (FpDest)); finishVfp(fpscr, state, fpscr.fz); - FpCondCodes = fpscr & FpCondCodesMask; + FpscrExc = fpscr; ''' vcvtSIntFpSIop = InstObjParams("vcvt", "VcvtSIntFpS", "FpRegRegOp", { "code": vcvtSIntFpSCode, @@ -810,15 +817,15 @@ let {{ exec_output += PredOpExecute.subst(vcvtSIntFpSIop); vcvtSIntFpDCode = vfpEnabledCheckCode + ''' - FPSCR fpscr = Fpscr | FpCondCodes; + FPSCR fpscr = (FPSCR) FpscrExc; VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (FpOp1P0.sw) : "m" (FpOp1P0.sw)); double cDest = FpOp1P0.sw; __asm__ __volatile__("" :: "m" (cDest)); finishVfp(fpscr, state, fpscr.fz); - FpCondCodes = fpscr & FpCondCodesMask; FpDestP0.uw = dblLow(cDest); FpDestP1.uw = dblHi(cDest); + FpscrExc = fpscr; ''' vcvtSIntFpDIop = InstObjParams("vcvt", "VcvtSIntFpD", "FpRegRegOp", { "code": vcvtSIntFpDCode, @@ -829,14 +836,14 @@ let {{ exec_output += PredOpExecute.subst(vcvtSIntFpDIop); vcvtFpUIntSRCode = vfpEnabledCheckCode + ''' - FPSCR fpscr = Fpscr | FpCondCodes; + FPSCR fpscr = (FPSCR) FpscrExc; VfpSavedState state = prepFpState(fpscr.rMode); vfpFlushToZero(fpscr, FpOp1); __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1)); FpDest.uw = vfpFpSToFixed(FpOp1, false, false, 0, false); __asm__ __volatile__("" :: "m" (FpDest.uw)); finishVfp(fpscr, state, fpscr.fz); - FpCondCodes = fpscr & FpCondCodesMask; + FpscrExc = fpscr; ''' vcvtFpUIntSRIop = InstObjParams("vcvt", "VcvtFpUIntSR", "FpRegRegOp", { "code": vcvtFpUIntSRCode, @@ -847,7 +854,7 @@ let {{ exec_output += PredOpExecute.subst(vcvtFpUIntSRIop); vcvtFpUIntDRCode = vfpEnabledCheckCode + ''' - FPSCR fpscr = Fpscr | FpCondCodes; + FPSCR fpscr = (FPSCR) FpscrExc; double cOp1 = dbl(FpOp1P0.uw, FpOp1P1.uw); vfpFlushToZero(fpscr, cOp1); VfpSavedState state = prepFpState(fpscr.rMode); @@ -855,8 +862,8 @@ let {{ uint64_t result = vfpFpDToFixed(cOp1, false, false, 0, false); __asm__ __volatile__("" :: "m" (result)); finishVfp(fpscr, state, fpscr.fz); - FpCondCodes = fpscr & FpCondCodesMask; FpDestP0.uw = result; + FpscrExc = fpscr; ''' vcvtFpUIntDRIop = InstObjParams("vcvtr", "VcvtFpUIntDR", "FpRegRegOp", { "code": vcvtFpUIntDRCode, @@ -867,14 +874,14 @@ let {{ exec_output += PredOpExecute.subst(vcvtFpUIntDRIop); vcvtFpSIntSRCode = vfpEnabledCheckCode + ''' - FPSCR fpscr = Fpscr | FpCondCodes; + FPSCR fpscr = (FPSCR) FpscrExc; VfpSavedState state = prepFpState(fpscr.rMode); vfpFlushToZero(fpscr, FpOp1); __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1)); FpDest.sw = vfpFpSToFixed(FpOp1, true, false, 0, false); __asm__ __volatile__("" :: "m" (FpDest.sw)); finishVfp(fpscr, state, fpscr.fz); - FpCondCodes = fpscr & FpCondCodesMask; + FpscrExc = fpscr; ''' vcvtFpSIntSRIop = InstObjParams("vcvtr", "VcvtFpSIntSR", "FpRegRegOp", { "code": vcvtFpSIntSRCode, @@ -885,7 +892,7 @@ let {{ exec_output += PredOpExecute.subst(vcvtFpSIntSRIop); vcvtFpSIntDRCode = vfpEnabledCheckCode + ''' - FPSCR fpscr = Fpscr | FpCondCodes; + FPSCR fpscr = (FPSCR) FpscrExc; double cOp1 = dbl(FpOp1P0.uw, FpOp1P1.uw); vfpFlushToZero(fpscr, cOp1); VfpSavedState state = prepFpState(fpscr.rMode); @@ -893,8 +900,8 @@ let {{ int64_t result = vfpFpDToFixed(cOp1, true, false, 0, false); __asm__ __volatile__("" :: "m" (result)); finishVfp(fpscr, state, fpscr.fz); - FpCondCodes = fpscr & FpCondCodesMask; FpDestP0.uw = result; + FpscrExc = fpscr; ''' vcvtFpSIntDRIop = InstObjParams("vcvtr", "VcvtFpSIntDR", "FpRegRegOp", { "code": vcvtFpSIntDRCode, @@ -905,7 +912,7 @@ let {{ exec_output += PredOpExecute.subst(vcvtFpSIntDRIop); vcvtFpUIntSCode = vfpEnabledCheckCode + ''' - FPSCR fpscr = Fpscr | FpCondCodes; + FPSCR fpscr = (FPSCR) FpscrExc; vfpFlushToZero(fpscr, FpOp1); VfpSavedState state = prepFpState(fpscr.rMode); fesetround(FeRoundZero); @@ -913,7 +920,7 @@ let {{ FpDest.uw = vfpFpSToFixed(FpOp1, false, false, 0); __asm__ __volatile__("" :: "m" (FpDest.uw)); finishVfp(fpscr, state, fpscr.fz); - FpCondCodes = fpscr & FpCondCodesMask; + FpscrExc = fpscr; ''' vcvtFpUIntSIop = InstObjParams("vcvt", "VcvtFpUIntS", "FpRegRegOp", { "code": vcvtFpUIntSCode, @@ -924,7 +931,7 @@ let {{ exec_output += PredOpExecute.subst(vcvtFpUIntSIop); vcvtFpUIntDCode = vfpEnabledCheckCode + ''' - FPSCR fpscr = Fpscr | FpCondCodes; + FPSCR fpscr = (FPSCR) FpscrExc; double cOp1 = dbl(FpOp1P0.uw, FpOp1P1.uw); vfpFlushToZero(fpscr, cOp1); VfpSavedState state = prepFpState(fpscr.rMode); @@ -933,8 +940,8 @@ let {{ uint64_t result = vfpFpDToFixed(cOp1, false, false, 0); __asm__ __volatile__("" :: "m" (result)); finishVfp(fpscr, state, fpscr.fz); - FpCondCodes = fpscr & FpCondCodesMask; FpDestP0.uw = result; + FpscrExc = fpscr; ''' vcvtFpUIntDIop = InstObjParams("vcvt", "VcvtFpUIntD", "FpRegRegOp", { "code": vcvtFpUIntDCode, @@ -945,7 +952,7 @@ let {{ exec_output += PredOpExecute.subst(vcvtFpUIntDIop); vcvtFpSIntSCode = vfpEnabledCheckCode + ''' - FPSCR fpscr = Fpscr | FpCondCodes; + FPSCR fpscr = (FPSCR) FpscrExc; vfpFlushToZero(fpscr, FpOp1); VfpSavedState state = prepFpState(fpscr.rMode); fesetround(FeRoundZero); @@ -953,7 +960,7 @@ let {{ FpDest.sw = vfpFpSToFixed(FpOp1, true, false, 0); __asm__ __volatile__("" :: "m" (FpDest.sw)); finishVfp(fpscr, state, fpscr.fz); - FpCondCodes = fpscr & FpCondCodesMask; + FpscrExc = fpscr; ''' vcvtFpSIntSIop = InstObjParams("vcvt", "VcvtFpSIntS", "FpRegRegOp", { "code": vcvtFpSIntSCode, @@ -964,7 +971,7 @@ let {{ exec_output += PredOpExecute.subst(vcvtFpSIntSIop); vcvtFpSIntDCode = vfpEnabledCheckCode + ''' - FPSCR fpscr = Fpscr | FpCondCodes; + FPSCR fpscr = (FPSCR) FpscrExc; double cOp1 = dbl(FpOp1P0.uw, FpOp1P1.uw); vfpFlushToZero(fpscr, cOp1); VfpSavedState state = prepFpState(fpscr.rMode); @@ -973,8 +980,8 @@ let {{ int64_t result = vfpFpDToFixed(cOp1, true, false, 0); __asm__ __volatile__("" :: "m" (result)); finishVfp(fpscr, state, fpscr.fz); - FpCondCodes = fpscr & FpCondCodesMask; FpDestP0.uw = result; + FpscrExc = fpscr; ''' vcvtFpSIntDIop = InstObjParams("vcvt", "VcvtFpSIntD", "FpRegRegOp", { "code": vcvtFpSIntDCode, @@ -985,16 +992,16 @@ let {{ exec_output += PredOpExecute.subst(vcvtFpSIntDIop); vcvtFpSFpDCode = vfpEnabledCheckCode + ''' - FPSCR fpscr = Fpscr | FpCondCodes; + FPSCR fpscr = (FPSCR) FpscrExc; vfpFlushToZero(fpscr, FpOp1); VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1)); - double cDest = fixFpSFpDDest(Fpscr, FpOp1); + double cDest = fixFpSFpDDest(FpscrExc, FpOp1); __asm__ __volatile__("" :: "m" (cDest)); finishVfp(fpscr, state, fpscr.fz); - FpCondCodes = fpscr & FpCondCodesMask; FpDestP0.uw = dblLow(cDest); FpDestP1.uw = dblHi(cDest); + FpscrExc = fpscr; ''' vcvtFpSFpDIop = InstObjParams("vcvt", "VcvtFpSFpD", "FpRegRegOp", { "code": vcvtFpSFpDCode, @@ -1005,15 +1012,15 @@ let {{ exec_output += PredOpExecute.subst(vcvtFpSFpDIop); vcvtFpDFpSCode = vfpEnabledCheckCode + ''' - FPSCR fpscr = Fpscr | FpCondCodes; + FPSCR fpscr = (FPSCR) FpscrExc; double cOp1 = dbl(FpOp1P0.uw, FpOp1P1.uw); vfpFlushToZero(fpscr, cOp1); VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (cOp1) : "m" (cOp1)); - FpDest = fixFpDFpSDest(Fpscr, cOp1); + FpDest = fixFpDFpSDest(FpscrExc, cOp1); __asm__ __volatile__("" :: "m" (FpDest)); finishVfp(fpscr, state, fpscr.fz); - FpCondCodes = fpscr & FpCondCodesMask; + FpscrExc = fpscr; ''' vcvtFpDFpSIop = InstObjParams("vcvt", "VcvtFpDFpS", "FpRegRegOp", { "code": vcvtFpDFpSCode, @@ -1024,7 +1031,7 @@ let {{ exec_output += PredOpExecute.subst(vcvtFpDFpSIop); vcvtFpHTFpSCode = vfpEnabledCheckCode + ''' - FPSCR fpscr = Fpscr | FpCondCodes; + FPSCR fpscr = (FPSCR) FpscrExc; vfpFlushToZero(fpscr, FpOp1); VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1)); @@ -1032,7 +1039,7 @@ let {{ bits(fpToBits(FpOp1), 31, 16)); __asm__ __volatile__("" :: "m" (FpDest)); finishVfp(fpscr, state, fpscr.fz); - FpCondCodes = fpscr & FpCondCodesMask; + FpscrExc = fpscr; ''' vcvtFpHTFpSIop = InstObjParams("vcvtt", "VcvtFpHTFpS", "FpRegRegOp", { "code": vcvtFpHTFpSCode, @@ -1043,14 +1050,14 @@ let {{ exec_output += PredOpExecute.subst(vcvtFpHTFpSIop); vcvtFpHBFpSCode = vfpEnabledCheckCode + ''' - FPSCR fpscr = Fpscr | FpCondCodes; + FPSCR fpscr = (FPSCR) FpscrExc; VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1)); FpDest = vcvtFpHFpS(fpscr, fpscr.dn, fpscr.ahp, bits(fpToBits(FpOp1), 15, 0)); __asm__ __volatile__("" :: "m" (FpDest)); finishVfp(fpscr, state, fpscr.fz); - FpCondCodes = fpscr & FpCondCodesMask; + FpscrExc = fpscr; ''' vcvtFpHBFpSIop = InstObjParams("vcvtb", "VcvtFpHBFpS", "FpRegRegOp", { "code": vcvtFpHBFpSCode, @@ -1061,7 +1068,7 @@ let {{ exec_output += PredOpExecute.subst(vcvtFpHBFpSIop); vcvtFpSFpHTCode = vfpEnabledCheckCode + ''' - FPSCR fpscr = Fpscr | FpCondCodes; + FPSCR fpscr = (FPSCR) FpscrExc; vfpFlushToZero(fpscr, FpOp1); VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (FpOp1), "=m" (FpDest.uw) @@ -1071,7 +1078,7 @@ let {{ fpscr.rMode, fpscr.ahp, FpOp1)); __asm__ __volatile__("" :: "m" (FpDest.uw)); finishVfp(fpscr, state, fpscr.fz); - FpCondCodes = fpscr & FpCondCodesMask; + FpscrExc = fpscr; ''' vcvtFpSFpHTIop = InstObjParams("vcvtt", "VcvtFpSFpHT", "FpRegRegOp", { "code": vcvtFpHTFpSCode, @@ -1082,7 +1089,7 @@ let {{ exec_output += PredOpExecute.subst(vcvtFpSFpHTIop); vcvtFpSFpHBCode = vfpEnabledCheckCode + ''' - FPSCR fpscr = Fpscr | FpCondCodes; + FPSCR fpscr = (FPSCR) FpscrExc; vfpFlushToZero(fpscr, FpOp1); VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (FpOp1), "=m" (FpDest.uw) @@ -1092,7 +1099,7 @@ let {{ fpscr.rMode, fpscr.ahp, FpOp1)); __asm__ __volatile__("" :: "m" (FpDest.uw)); finishVfp(fpscr, state, fpscr.fz); - FpCondCodes = fpscr & FpCondCodesMask; + FpscrExc = fpscr; ''' vcvtFpSFpHBIop = InstObjParams("vcvtb", "VcvtFpSFpHB", "FpRegRegOp", { "code": vcvtFpSFpHBCode, @@ -1103,7 +1110,7 @@ let {{ exec_output += PredOpExecute.subst(vcvtFpSFpHBIop); vcmpSCode = vfpEnabledCheckCode + ''' - FPSCR fpscr = Fpscr | FpCondCodes; + FPSCR fpscr = (FPSCR) FpscrExc; vfpFlushToZero(fpscr, FpDest, FpOp1); if (FpDest == FpOp1) { fpscr.n = 0; fpscr.z = 1; fpscr.c = 1; fpscr.v = 0; @@ -1122,6 +1129,7 @@ let {{ fpscr.n = 0; fpscr.z = 0; fpscr.c = 1; fpscr.v = 1; } FpCondCodes = fpscr & FpCondCodesMask; + FpscrExc = fpscr; ''' vcmpSIop = InstObjParams("vcmps", "VcmpS", "FpRegRegOp", { "code": vcmpSCode, @@ -1134,7 +1142,7 @@ let {{ vcmpDCode = vfpEnabledCheckCode + ''' double cOp1 = dbl(FpOp1P0.uw, FpOp1P1.uw); double cDest = dbl(FpDestP0.uw, FpDestP1.uw); - FPSCR fpscr = Fpscr | FpCondCodes; + FPSCR fpscr = (FPSCR) FpscrExc; vfpFlushToZero(fpscr, cDest, cOp1); if (cDest == cOp1) { fpscr.n = 0; fpscr.z = 1; fpscr.c = 1; fpscr.v = 0; @@ -1153,6 +1161,7 @@ let {{ fpscr.n = 0; fpscr.z = 0; fpscr.c = 1; fpscr.v = 1; } FpCondCodes = fpscr & FpCondCodesMask; + FpscrExc = fpscr; ''' vcmpDIop = InstObjParams("vcmpd", "VcmpD", "FpRegRegOp", { "code": vcmpDCode, @@ -1163,7 +1172,7 @@ let {{ exec_output += PredOpExecute.subst(vcmpDIop); vcmpZeroSCode = vfpEnabledCheckCode + ''' - FPSCR fpscr = Fpscr | FpCondCodes; + FPSCR fpscr = (FPSCR) FpscrExc; vfpFlushToZero(fpscr, FpDest); // This only handles imm == 0 for now. assert(imm == 0); @@ -1182,6 +1191,7 @@ let {{ fpscr.n = 0; fpscr.z = 0; fpscr.c = 1; fpscr.v = 1; } FpCondCodes = fpscr & FpCondCodesMask; + FpscrExc = fpscr; ''' vcmpZeroSIop = InstObjParams("vcmpZeros", "VcmpZeroS", "FpRegImmOp", { "code": vcmpZeroSCode, @@ -1195,7 +1205,7 @@ let {{ // This only handles imm == 0 for now. assert(imm == 0); double cDest = dbl(FpDestP0.uw, FpDestP1.uw); - FPSCR fpscr = Fpscr | FpCondCodes; + FPSCR fpscr = (FPSCR) FpscrExc; vfpFlushToZero(fpscr, cDest); if (cDest == imm) { fpscr.n = 0; fpscr.z = 1; fpscr.c = 1; fpscr.v = 0; @@ -1212,6 +1222,7 @@ let {{ fpscr.n = 0; fpscr.z = 0; fpscr.c = 1; fpscr.v = 1; } FpCondCodes = fpscr & FpCondCodesMask; + FpscrExc = fpscr; ''' vcmpZeroDIop = InstObjParams("vcmpZerod", "VcmpZeroD", "FpRegImmOp", { "code": vcmpZeroDCode, @@ -1222,7 +1233,7 @@ let {{ exec_output += PredOpExecute.subst(vcmpZeroDIop); vcmpeSCode = vfpEnabledCheckCode + ''' - FPSCR fpscr = Fpscr | FpCondCodes; + FPSCR fpscr = (FPSCR) FpscrExc; vfpFlushToZero(fpscr, FpDest, FpOp1); if (FpDest == FpOp1) { fpscr.n = 0; fpscr.z = 1; fpscr.c = 1; fpscr.v = 0; @@ -1235,6 +1246,7 @@ let {{ fpscr.n = 0; fpscr.z = 0; fpscr.c = 1; fpscr.v = 1; } FpCondCodes = fpscr & FpCondCodesMask; + FpscrExc = fpscr; ''' vcmpeSIop = InstObjParams("vcmpes", "VcmpeS", "FpRegRegOp", { "code": vcmpeSCode, @@ -1247,7 +1259,7 @@ let {{ vcmpeDCode = vfpEnabledCheckCode + ''' double cOp1 = dbl(FpOp1P0.uw, FpOp1P1.uw); double cDest = dbl(FpDestP0.uw, FpDestP1.uw); - FPSCR fpscr = Fpscr | FpCondCodes; + FPSCR fpscr = (FPSCR) FpscrExc; vfpFlushToZero(fpscr, cDest, cOp1); if (cDest == cOp1) { fpscr.n = 0; fpscr.z = 1; fpscr.c = 1; fpscr.v = 0; @@ -1260,6 +1272,7 @@ let {{ fpscr.n = 0; fpscr.z = 0; fpscr.c = 1; fpscr.v = 1; } FpCondCodes = fpscr & FpCondCodesMask; + FpscrExc = fpscr; ''' vcmpeDIop = InstObjParams("vcmped", "VcmpeD", "FpRegRegOp", { "code": vcmpeDCode, @@ -1270,7 +1283,7 @@ let {{ exec_output += PredOpExecute.subst(vcmpeDIop); vcmpeZeroSCode = vfpEnabledCheckCode + ''' - FPSCR fpscr = Fpscr | FpCondCodes; + FPSCR fpscr = (FPSCR) FpscrExc; vfpFlushToZero(fpscr, FpDest); if (FpDest == imm) { fpscr.n = 0; fpscr.z = 1; fpscr.c = 1; fpscr.v = 0; @@ -1283,6 +1296,7 @@ let {{ fpscr.n = 0; fpscr.z = 0; fpscr.c = 1; fpscr.v = 1; } FpCondCodes = fpscr & FpCondCodesMask; + FpscrExc = fpscr; ''' vcmpeZeroSIop = InstObjParams("vcmpeZeros", "VcmpeZeroS", "FpRegImmOp", { "code": vcmpeZeroSCode, @@ -1294,7 +1308,7 @@ let {{ vcmpeZeroDCode = vfpEnabledCheckCode + ''' double cDest = dbl(FpDestP0.uw, FpDestP1.uw); - FPSCR fpscr = Fpscr | FpCondCodes; + FPSCR fpscr = (FPSCR) FpscrExc; vfpFlushToZero(fpscr, cDest); if (cDest == imm) { fpscr.n = 0; fpscr.z = 1; fpscr.c = 1; fpscr.v = 0; @@ -1307,6 +1321,7 @@ let {{ fpscr.n = 0; fpscr.z = 0; fpscr.c = 1; fpscr.v = 1; } FpCondCodes = fpscr & FpCondCodesMask; + FpscrExc = fpscr; ''' vcmpeZeroDIop = InstObjParams("vcmpeZerod", "VcmpeZeroD", "FpRegImmOp", { "code": vcmpeZeroDCode, @@ -1324,14 +1339,14 @@ let {{ exec_output = "" vcvtFpSFixedSCode = vfpEnabledCheckCode + ''' - FPSCR fpscr = Fpscr | FpCondCodes; + FPSCR fpscr = (FPSCR) FpscrExc; vfpFlushToZero(fpscr, FpOp1); VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1)); FpDest.sw = vfpFpSToFixed(FpOp1, true, false, imm); __asm__ __volatile__("" :: "m" (FpDest.sw)); finishVfp(fpscr, state, fpscr.fz); - FpCondCodes = fpscr & FpCondCodesMask; + FpscrExc = fpscr; ''' vcvtFpSFixedSIop = InstObjParams("vcvt", "VcvtFpSFixedS", "FpRegRegImmOp", { "code": vcvtFpSFixedSCode, @@ -1342,7 +1357,7 @@ let {{ exec_output += PredOpExecute.subst(vcvtFpSFixedSIop); vcvtFpSFixedDCode = vfpEnabledCheckCode + ''' - FPSCR fpscr = Fpscr | FpCondCodes; + FPSCR fpscr = (FPSCR) FpscrExc; double cOp1 = dbl(FpOp1P0.uw, FpOp1P1.uw); vfpFlushToZero(fpscr, cOp1); VfpSavedState state = prepFpState(fpscr.rMode); @@ -1350,9 +1365,9 @@ let {{ uint64_t mid = vfpFpDToFixed(cOp1, true, false, imm); __asm__ __volatile__("" :: "m" (mid)); finishVfp(fpscr, state, fpscr.fz); - FpCondCodes = fpscr & FpCondCodesMask; FpDestP0.uw = mid; FpDestP1.uw = mid >> 32; + FpscrExc = fpscr; ''' vcvtFpSFixedDIop = InstObjParams("vcvt", "VcvtFpSFixedD", "FpRegRegImmOp", { "code": vcvtFpSFixedDCode, @@ -1363,14 +1378,14 @@ let {{ exec_output += PredOpExecute.subst(vcvtFpSFixedDIop); vcvtFpUFixedSCode = vfpEnabledCheckCode + ''' - FPSCR fpscr = Fpscr | FpCondCodes; + FPSCR fpscr = (FPSCR) FpscrExc; vfpFlushToZero(fpscr, FpOp1); VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1)); FpDest.uw = vfpFpSToFixed(FpOp1, false, false, imm); __asm__ __volatile__("" :: "m" (FpDest.uw)); finishVfp(fpscr, state, fpscr.fz); - FpCondCodes = fpscr & FpCondCodesMask; + FpscrExc = fpscr; ''' vcvtFpUFixedSIop = InstObjParams("vcvt", "VcvtFpUFixedS", "FpRegRegImmOp", { "code": vcvtFpUFixedSCode, @@ -1381,7 +1396,7 @@ let {{ exec_output += PredOpExecute.subst(vcvtFpUFixedSIop); vcvtFpUFixedDCode = vfpEnabledCheckCode + ''' - FPSCR fpscr = Fpscr | FpCondCodes; + FPSCR fpscr = (FPSCR) FpscrExc; double cOp1 = dbl(FpOp1P0.uw, FpOp1P1.uw); vfpFlushToZero(fpscr, cOp1); VfpSavedState state = prepFpState(fpscr.rMode); @@ -1389,9 +1404,9 @@ let {{ uint64_t mid = vfpFpDToFixed(cOp1, false, false, imm); __asm__ __volatile__("" :: "m" (mid)); finishVfp(fpscr, state, fpscr.fz); - FpCondCodes = fpscr & FpCondCodesMask; FpDestP0.uw = mid; FpDestP1.uw = mid >> 32; + FpscrExc = fpscr; ''' vcvtFpUFixedDIop = InstObjParams("vcvt", "VcvtFpUFixedD", "FpRegRegImmOp", { "code": vcvtFpUFixedDCode, @@ -1402,13 +1417,13 @@ let {{ exec_output += PredOpExecute.subst(vcvtFpUFixedDIop); vcvtSFixedFpSCode = vfpEnabledCheckCode + ''' - FPSCR fpscr = Fpscr | FpCondCodes; + FPSCR fpscr = (FPSCR) FpscrExc; VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (FpOp1.sw) : "m" (FpOp1.sw)); FpDest = vfpSFixedToFpS(fpscr.fz, fpscr.dn, FpOp1.sw, false, imm); __asm__ __volatile__("" :: "m" (FpDest)); finishVfp(fpscr, state, fpscr.fz); - FpCondCodes = fpscr & FpCondCodesMask; + FpscrExc = fpscr; ''' vcvtSFixedFpSIop = InstObjParams("vcvt", "VcvtSFixedFpS", "FpRegRegImmOp", { "code": vcvtSFixedFpSCode, @@ -1419,16 +1434,16 @@ let {{ exec_output += PredOpExecute.subst(vcvtSFixedFpSIop); vcvtSFixedFpDCode = vfpEnabledCheckCode + ''' - FPSCR fpscr = Fpscr | FpCondCodes; + FPSCR fpscr = (FPSCR) FpscrExc; uint64_t mid = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32)); VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (mid) : "m" (mid)); double cDest = vfpSFixedToFpD(fpscr.fz, fpscr.dn, mid, false, imm); __asm__ __volatile__("" :: "m" (cDest)); finishVfp(fpscr, state, fpscr.fz); - FpCondCodes = fpscr & FpCondCodesMask; FpDestP0.uw = dblLow(cDest); FpDestP1.uw = dblHi(cDest); + FpscrExc = fpscr; ''' vcvtSFixedFpDIop = InstObjParams("vcvt", "VcvtSFixedFpD", "FpRegRegImmOp", { "code": vcvtSFixedFpDCode, @@ -1439,13 +1454,13 @@ let {{ exec_output += PredOpExecute.subst(vcvtSFixedFpDIop); vcvtUFixedFpSCode = vfpEnabledCheckCode + ''' - FPSCR fpscr = Fpscr | FpCondCodes; + FPSCR fpscr = (FPSCR) FpscrExc; VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (FpOp1.uw) : "m" (FpOp1.uw)); FpDest = vfpUFixedToFpS(fpscr.fz, fpscr.dn, FpOp1.uw, false, imm); __asm__ __volatile__("" :: "m" (FpDest)); finishVfp(fpscr, state, fpscr.fz); - FpCondCodes = fpscr & FpCondCodesMask; + FpscrExc = fpscr; ''' vcvtUFixedFpSIop = InstObjParams("vcvt", "VcvtUFixedFpS", "FpRegRegImmOp", { "code": vcvtUFixedFpSCode, @@ -1456,16 +1471,16 @@ let {{ exec_output += PredOpExecute.subst(vcvtUFixedFpSIop); vcvtUFixedFpDCode = vfpEnabledCheckCode + ''' - FPSCR fpscr = Fpscr | FpCondCodes; + FPSCR fpscr = (FPSCR) FpscrExc; uint64_t mid = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32)); VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (mid) : "m" (mid)); double cDest = vfpUFixedToFpD(fpscr.fz, fpscr.dn, mid, false, imm); __asm__ __volatile__("" :: "m" (cDest)); finishVfp(fpscr, state, fpscr.fz); - FpCondCodes = fpscr & FpCondCodesMask; FpDestP0.uw = dblLow(cDest); FpDestP1.uw = dblHi(cDest); + FpscrExc = fpscr; ''' vcvtUFixedFpDIop = InstObjParams("vcvt", "VcvtUFixedFpD", "FpRegRegImmOp", { "code": vcvtUFixedFpDCode, @@ -1476,14 +1491,14 @@ let {{ exec_output += PredOpExecute.subst(vcvtUFixedFpDIop); vcvtFpSHFixedSCode = vfpEnabledCheckCode + ''' - FPSCR fpscr = Fpscr | FpCondCodes; + FPSCR fpscr = (FPSCR) FpscrExc; vfpFlushToZero(fpscr, FpOp1); VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1)); FpDest.sh = vfpFpSToFixed(FpOp1, true, true, imm); __asm__ __volatile__("" :: "m" (FpDest.sh)); finishVfp(fpscr, state, fpscr.fz); - FpCondCodes = fpscr & FpCondCodesMask; + FpscrExc = fpscr; ''' vcvtFpSHFixedSIop = InstObjParams("vcvt", "VcvtFpSHFixedS", "FpRegRegImmOp", @@ -1495,7 +1510,7 @@ let {{ exec_output += PredOpExecute.subst(vcvtFpSHFixedSIop); vcvtFpSHFixedDCode = vfpEnabledCheckCode + ''' - FPSCR fpscr = Fpscr | FpCondCodes; + FPSCR fpscr = (FPSCR) FpscrExc; double cOp1 = dbl(FpOp1P0.uw, FpOp1P1.uw); vfpFlushToZero(fpscr, cOp1); VfpSavedState state = prepFpState(fpscr.rMode); @@ -1503,9 +1518,9 @@ let {{ uint64_t result = vfpFpDToFixed(cOp1, true, true, imm); __asm__ __volatile__("" :: "m" (result)); finishVfp(fpscr, state, fpscr.fz); - FpCondCodes = fpscr & FpCondCodesMask; FpDestP0.uw = result; FpDestP1.uw = result >> 32; + FpscrExc = fpscr; ''' vcvtFpSHFixedDIop = InstObjParams("vcvt", "VcvtFpSHFixedD", "FpRegRegImmOp", @@ -1517,14 +1532,14 @@ let {{ exec_output += PredOpExecute.subst(vcvtFpSHFixedDIop); vcvtFpUHFixedSCode = vfpEnabledCheckCode + ''' - FPSCR fpscr = Fpscr | FpCondCodes; + FPSCR fpscr = (FPSCR) FpscrExc; vfpFlushToZero(fpscr, FpOp1); VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1)); FpDest.uh = vfpFpSToFixed(FpOp1, false, true, imm); __asm__ __volatile__("" :: "m" (FpDest.uh)); finishVfp(fpscr, state, fpscr.fz); - FpCondCodes = fpscr & FpCondCodesMask; + FpscrExc = fpscr; ''' vcvtFpUHFixedSIop = InstObjParams("vcvt", "VcvtFpUHFixedS", "FpRegRegImmOp", @@ -1536,7 +1551,7 @@ let {{ exec_output += PredOpExecute.subst(vcvtFpUHFixedSIop); vcvtFpUHFixedDCode = vfpEnabledCheckCode + ''' - FPSCR fpscr = Fpscr | FpCondCodes; + FPSCR fpscr = (FPSCR) FpscrExc; double cOp1 = dbl(FpOp1P0.uw, FpOp1P1.uw); vfpFlushToZero(fpscr, cOp1); VfpSavedState state = prepFpState(fpscr.rMode); @@ -1544,9 +1559,9 @@ let {{ uint64_t mid = vfpFpDToFixed(cOp1, false, true, imm); __asm__ __volatile__("" :: "m" (mid)); finishVfp(fpscr, state, fpscr.fz); - FpCondCodes = fpscr & FpCondCodesMask; FpDestP0.uw = mid; FpDestP1.uw = mid >> 32; + FpscrExc = fpscr; ''' vcvtFpUHFixedDIop = InstObjParams("vcvt", "VcvtFpUHFixedD", "FpRegRegImmOp", @@ -1558,13 +1573,13 @@ let {{ exec_output += PredOpExecute.subst(vcvtFpUHFixedDIop); vcvtSHFixedFpSCode = vfpEnabledCheckCode + ''' - FPSCR fpscr = Fpscr | FpCondCodes; + FPSCR fpscr = (FPSCR) FpscrExc; VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (FpOp1.sh) : "m" (FpOp1.sh)); FpDest = vfpSFixedToFpS(fpscr.fz, fpscr.dn, FpOp1.sh, true, imm); __asm__ __volatile__("" :: "m" (FpDest)); finishVfp(fpscr, state, fpscr.fz); - FpCondCodes = fpscr & FpCondCodesMask; + FpscrExc = fpscr; ''' vcvtSHFixedFpSIop = InstObjParams("vcvt", "VcvtSHFixedFpS", "FpRegRegImmOp", @@ -1576,16 +1591,16 @@ let {{ exec_output += PredOpExecute.subst(vcvtSHFixedFpSIop); vcvtSHFixedFpDCode = vfpEnabledCheckCode + ''' - FPSCR fpscr = Fpscr | FpCondCodes; + FPSCR fpscr = (FPSCR) FpscrExc; uint64_t mid = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32)); VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (mid) : "m" (mid)); double cDest = vfpSFixedToFpD(fpscr.fz, fpscr.dn, mid, true, imm); __asm__ __volatile__("" :: "m" (cDest)); finishVfp(fpscr, state, fpscr.fz); - FpCondCodes = fpscr & FpCondCodesMask; FpDestP0.uw = dblLow(cDest); FpDestP1.uw = dblHi(cDest); + FpscrExc = fpscr; ''' vcvtSHFixedFpDIop = InstObjParams("vcvt", "VcvtSHFixedFpD", "FpRegRegImmOp", @@ -1597,13 +1612,13 @@ let {{ exec_output += PredOpExecute.subst(vcvtSHFixedFpDIop); vcvtUHFixedFpSCode = vfpEnabledCheckCode + ''' - FPSCR fpscr = Fpscr | FpCondCodes; + FPSCR fpscr = (FPSCR) FpscrExc; VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (FpOp1.uh) : "m" (FpOp1.uh)); FpDest = vfpUFixedToFpS(fpscr.fz, fpscr.dn, FpOp1.uh, true, imm); __asm__ __volatile__("" :: "m" (FpDest)); finishVfp(fpscr, state, fpscr.fz); - FpCondCodes = fpscr & FpCondCodesMask; + FpscrExc = fpscr; ''' vcvtUHFixedFpSIop = InstObjParams("vcvt", "VcvtUHFixedFpS", "FpRegRegImmOp", @@ -1615,16 +1630,16 @@ let {{ exec_output += PredOpExecute.subst(vcvtUHFixedFpSIop); vcvtUHFixedFpDCode = vfpEnabledCheckCode + ''' - FPSCR fpscr = Fpscr | FpCondCodes; + FPSCR fpscr = (FPSCR) FpscrExc; uint64_t mid = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32)); VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (mid) : "m" (mid)); double cDest = vfpUFixedToFpD(fpscr.fz, fpscr.dn, mid, true, imm); __asm__ __volatile__("" :: "m" (cDest)); finishVfp(fpscr, state, fpscr.fz); - FpCondCodes = fpscr & FpCondCodesMask; FpDestP0.uw = dblLow(cDest); FpDestP1.uw = dblHi(cDest); + FpscrExc = fpscr; ''' vcvtUHFixedFpDIop = InstObjParams("vcvt", "VcvtUHFixedFpD", "FpRegRegImmOp", diff --git a/src/arch/arm/isa/insts/misc.isa b/src/arch/arm/isa/insts/misc.isa index 1abbc3de1..6e6d2594c 100644 --- a/src/arch/arm/isa/insts/misc.isa +++ b/src/arch/arm/isa/insts/misc.isa @@ -64,7 +64,7 @@ let {{ mrsCpsrIop = InstObjParams("mrs", "MrsCpsr", "MrsOp", { "code": mrsCpsrCode, "predicate_test": condPredicateTest }, - ["IsSerializeAfter"]) + ["IsSerializeBefore"]) header_output += MrsDeclare.subst(mrsCpsrIop) decoder_output += MrsConstructor.subst(mrsCpsrIop) exec_output += PredOpExecute.subst(mrsCpsrIop) @@ -73,7 +73,7 @@ let {{ mrsSpsrIop = InstObjParams("mrs", "MrsSpsr", "MrsOp", { "code": mrsSpsrCode, "predicate_test": predicateTest }, - ["IsSerializeAfter"]) + ["IsSerializeBefore"]) header_output += MrsDeclare.subst(mrsSpsrIop) decoder_output += MrsConstructor.subst(mrsSpsrIop) exec_output += PredOpExecute.subst(mrsSpsrIop) diff --git a/src/arch/arm/isa/insts/neon.isa b/src/arch/arm/isa/insts/neon.isa index c004b71ba..a2948b90a 100644 --- a/src/arch/arm/isa/insts/neon.isa +++ b/src/arch/arm/isa/insts/neon.isa @@ -1632,12 +1632,12 @@ let {{ vqaddUCode = ''' destElem = srcElem1 + srcElem2; - FPSCR fpscr = (FPSCR)Fpscr; + FPSCR fpscr = (FPSCR) FpscrQc; if (destElem < srcElem1 || destElem < srcElem2) { destElem = (Element)(-1); fpscr.qc = 1; } - Fpscr = fpscr; + FpscrQc = fpscr; ''' threeEqualRegInst("vqadd", "VqaddUD", "SimdAddOp", unsignedTypes, 2, vqaddUCode) threeEqualRegInst("vqadd", "VqaddUQ", "SimdAddOp", unsignedTypes, 4, vqaddUCode) @@ -1655,7 +1655,7 @@ let {{ vqaddSCode = ''' destElem = srcElem1 + srcElem2; - FPSCR fpscr = (FPSCR)Fpscr; + FPSCR fpscr = (FPSCR) FpscrQc; bool negDest = (destElem < 0); bool negSrc1 = (srcElem1 < 0); bool negSrc2 = (srcElem2 < 0); @@ -1665,26 +1665,26 @@ let {{ destElem -= 1; fpscr.qc = 1; } - Fpscr = fpscr; + FpscrQc = fpscr; ''' threeEqualRegInst("vqadd", "VqaddSD", "SimdAddOp", signedTypes, 2, vqaddSCode) threeEqualRegInst("vqadd", "VqaddSQ", "SimdAddOp", signedTypes, 4, vqaddSCode) vqsubUCode = ''' destElem = srcElem1 - srcElem2; - FPSCR fpscr = (FPSCR)Fpscr; + FPSCR fpscr = (FPSCR) FpscrQc; if (destElem > srcElem1) { destElem = 0; fpscr.qc = 1; } - Fpscr = fpscr; + FpscrQc = fpscr; ''' threeEqualRegInst("vqsub", "VqsubUD", "SimdAddOp", unsignedTypes, 2, vqsubUCode) threeEqualRegInst("vqsub", "VqsubUQ", "SimdAddOp", unsignedTypes, 4, vqsubUCode) vqsubSCode = ''' destElem = srcElem1 - srcElem2; - FPSCR fpscr = (FPSCR)Fpscr; + FPSCR fpscr = (FPSCR) FpscrQc; bool negDest = (destElem < 0); bool negSrc1 = (srcElem1 < 0); bool posSrc2 = (srcElem2 >= 0); @@ -1694,7 +1694,7 @@ let {{ destElem -= 1; fpscr.qc = 1; } - Fpscr = fpscr; + FpscrQc = fpscr; ''' threeEqualRegInst("vqsub", "VqsubSD", "SimdAddOp", signedTypes, 2, vqsubSCode) threeEqualRegInst("vqsub", "VqsubSQ", "SimdAddOp", signedTypes, 4, vqsubSCode) @@ -1779,7 +1779,7 @@ let {{ vqshlUCode = ''' int16_t shiftAmt = (int8_t)srcElem2; - FPSCR fpscr = (FPSCR)Fpscr; + FPSCR fpscr = (FPSCR) FpscrQc; if (shiftAmt < 0) { shiftAmt = -shiftAmt; if (shiftAmt >= sizeof(Element) * 8) { @@ -1808,14 +1808,14 @@ let {{ } else { destElem = srcElem1; } - Fpscr = fpscr; + FpscrQc = fpscr; ''' threeEqualRegInst("vqshl", "VqshlUD", "SimdAluOp", unsignedTypes, 2, vqshlUCode) threeEqualRegInst("vqshl", "VqshlUQ", "SimdAluOp", unsignedTypes, 4, vqshlUCode) vqshlSCode = ''' int16_t shiftAmt = (int8_t)srcElem2; - FPSCR fpscr = (FPSCR)Fpscr; + FPSCR fpscr = (FPSCR) FpscrQc; if (shiftAmt < 0) { shiftAmt = -shiftAmt; if (shiftAmt >= sizeof(Element) * 8) { @@ -1854,14 +1854,14 @@ let {{ } else { destElem = srcElem1; } - Fpscr = fpscr; + FpscrQc = fpscr; ''' threeEqualRegInst("vqshl", "VqshlSD", "SimdCmpOp", signedTypes, 2, vqshlSCode) threeEqualRegInst("vqshl", "VqshlSQ", "SimdCmpOp", signedTypes, 4, vqshlSCode) vqrshlUCode = ''' int16_t shiftAmt = (int8_t)srcElem2; - FPSCR fpscr = (FPSCR)Fpscr; + FPSCR fpscr = (FPSCR) FpscrQc; if (shiftAmt < 0) { shiftAmt = -shiftAmt; Element rBit = 0; @@ -1892,14 +1892,14 @@ let {{ } } } - Fpscr = fpscr; + FpscrQc = fpscr; ''' threeEqualRegInst("vqrshl", "VqrshlUD", "SimdCmpOp", unsignedTypes, 2, vqrshlUCode) threeEqualRegInst("vqrshl", "VqrshlUQ", "SimdCmpOp", unsignedTypes, 4, vqrshlUCode) vqrshlSCode = ''' int16_t shiftAmt = (int8_t)srcElem2; - FPSCR fpscr = (FPSCR)Fpscr; + FPSCR fpscr = (FPSCR) FpscrQc; if (shiftAmt < 0) { shiftAmt = -shiftAmt; Element rBit = 0; @@ -1944,7 +1944,7 @@ let {{ } else { destElem = srcElem1; } - Fpscr = fpscr; + FpscrQc = fpscr; ''' threeEqualRegInst("vqrshl", "VqrshlSD", "SimdCmpOp", signedTypes, 2, vqrshlSCode) threeEqualRegInst("vqrshl", "VqrshlSQ", "SimdCmpOp", signedTypes, 4, vqrshlSCode) @@ -2002,7 +2002,7 @@ let {{ threeRegLongInst("vmlal", "Vmlal", "SimdMultAccOp", smallTypes, vmlalCode, True) vqdmlalCode = ''' - FPSCR fpscr = (FPSCR)Fpscr; + FPSCR fpscr = (FPSCR) FpscrQc; BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2); Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1); Element halfNeg = maxNeg / 2; @@ -2022,12 +2022,12 @@ let {{ destElem = ~destElem; fpscr.qc = 1; } - Fpscr = fpscr; + FpscrQc = fpscr; ''' threeRegLongInst("vqdmlal", "Vqdmlal", "SimdMultAccOp", smallTypes, vqdmlalCode, True) vqdmlslCode = ''' - FPSCR fpscr = (FPSCR)Fpscr; + FPSCR fpscr = (FPSCR) FpscrQc; BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2); Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1); Element halfNeg = maxNeg / 2; @@ -2047,12 +2047,12 @@ let {{ destElem = ~destElem; fpscr.qc = 1; } - Fpscr = fpscr; + FpscrQc = fpscr; ''' threeRegLongInst("vqdmlsl", "Vqdmlsl", "SimdMultAccOp", smallTypes, vqdmlslCode, True) vqdmullCode = ''' - FPSCR fpscr = (FPSCR)Fpscr; + FPSCR fpscr = (FPSCR) FpscrQc; destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2); if (srcElem1 == srcElem2 && srcElem1 == (Element)((Element)1 << @@ -2060,7 +2060,7 @@ let {{ destElem = ~((BigElement)srcElem1 << (sizeof(Element) * 8)); fpscr.qc = 1; } - Fpscr = fpscr; + FpscrQc = fpscr; ''' threeRegLongInst("vqdmull", "Vqdmull", "SimdMultAccOp", smallTypes, vqdmullCode) @@ -2099,7 +2099,7 @@ let {{ threeEqualRegInst("vpmin", "VpminQ", "SimdCmpOp", allTypes, 4, vminCode, pairwise=True) vqdmulhCode = ''' - FPSCR fpscr = (FPSCR)Fpscr; + FPSCR fpscr = (FPSCR) FpscrQc; destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2) >> (sizeof(Element) * 8); if (srcElem1 == srcElem2 && @@ -2108,13 +2108,13 @@ let {{ destElem = ~srcElem1; fpscr.qc = 1; } - Fpscr = fpscr; + FpscrQc = fpscr; ''' threeEqualRegInst("vqdmulh", "VqdmulhD", "SimdMultOp", smallSignedTypes, 2, vqdmulhCode) threeEqualRegInst("vqdmulh", "VqdmulhQ", "SimdMultOp", smallSignedTypes, 4, vqdmulhCode) vqrdmulhCode = ''' - FPSCR fpscr = (FPSCR)Fpscr; + FPSCR fpscr = (FPSCR) FpscrQc; destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2 + ((int64_t)1 << (sizeof(Element) * 8 - 1))) >> (sizeof(Element) * 8); @@ -2130,7 +2130,7 @@ let {{ } fpscr.qc = 1; } - Fpscr = fpscr; + FpscrQc = fpscr; ''' threeEqualRegInst("vqrdmulh", "VqrdmulhD", "SimdMultOp", smallSignedTypes, 2, vqrdmulhCode) @@ -2138,7 +2138,7 @@ let {{ "SimdMultOp", smallSignedTypes, 4, vqrdmulhCode) vmaxfpCode = ''' - FPSCR fpscr = (FPSCR)Fpscr; + FPSCR fpscr = (FPSCR) FpscrExc; bool done; destReg = processNans(fpscr, done, true, srcReg1, srcReg2); if (!done) { @@ -2147,13 +2147,13 @@ let {{ } else if (flushToZero(srcReg1, srcReg2)) { fpscr.idc = 1; } - Fpscr = fpscr; + FpscrExc = fpscr; ''' threeEqualRegInstFp("vmax", "VmaxDFp", "SimdFloatCmpOp", ("float",), 2, vmaxfpCode) threeEqualRegInstFp("vmax", "VmaxQFp", "SimdFloatCmpOp", ("float",), 4, vmaxfpCode) vminfpCode = ''' - FPSCR fpscr = (FPSCR)Fpscr; + FPSCR fpscr = (FPSCR) FpscrExc; bool done; destReg = processNans(fpscr, done, true, srcReg1, srcReg2); if (!done) { @@ -2162,7 +2162,7 @@ let {{ } else if (flushToZero(srcReg1, srcReg2)) { fpscr.idc = 1; } - Fpscr = fpscr; + FpscrExc = fpscr; ''' threeEqualRegInstFp("vmin", "VminDFp", "SimdFloatCmpOp", ("float",), 2, vminfpCode) threeEqualRegInstFp("vmin", "VminQFp", "SimdFloatCmpOp", ("float",), 4, vminfpCode) @@ -2178,10 +2178,10 @@ let {{ 4, vminfpCode, pairwise=True) vaddfpCode = ''' - FPSCR fpscr = Fpscr; + FPSCR fpscr = (FPSCR) FpscrExc; destReg = binaryOp(fpscr, srcReg1, srcReg2, fpAddS, true, true, VfpRoundNearest); - Fpscr = fpscr; + FpscrExc = fpscr; ''' threeEqualRegInstFp("vadd", "VaddDFp", "SimdFloatAddOp", ("float",), 2, vaddfpCode) threeEqualRegInstFp("vadd", "VaddQFp", "SimdFloatAddOp", ("float",), 4, vaddfpCode) @@ -2192,53 +2192,53 @@ let {{ 4, vaddfpCode, pairwise=True) vsubfpCode = ''' - FPSCR fpscr = Fpscr; + FPSCR fpscr = (FPSCR) FpscrExc; destReg = binaryOp(fpscr, srcReg1, srcReg2, fpSubS, true, true, VfpRoundNearest); - Fpscr = fpscr; + FpscrExc = fpscr; ''' threeEqualRegInstFp("vsub", "VsubDFp", "SimdFloatAddOp", ("float",), 2, vsubfpCode) threeEqualRegInstFp("vsub", "VsubQFp", "SimdFloatAddOp", ("float",), 4, vsubfpCode) vmulfpCode = ''' - FPSCR fpscr = Fpscr; + FPSCR fpscr = (FPSCR) FpscrExc; destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMulS, true, true, VfpRoundNearest); - Fpscr = fpscr; + FpscrExc = fpscr; ''' threeEqualRegInstFp("vmul", "NVmulDFp", "SimdFloatMultOp", ("float",), 2, vmulfpCode) threeEqualRegInstFp("vmul", "NVmulQFp", "SimdFloatMultOp", ("float",), 4, vmulfpCode) vmlafpCode = ''' - FPSCR fpscr = Fpscr; + FPSCR fpscr = (FPSCR) FpscrExc; float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS, true, true, VfpRoundNearest); destReg = binaryOp(fpscr, mid, destReg, fpAddS, true, true, VfpRoundNearest); - Fpscr = fpscr; + FpscrExc = fpscr; ''' threeEqualRegInstFp("vmla", "NVmlaDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True) threeEqualRegInstFp("vmla", "NVmlaQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True) vmlsfpCode = ''' - FPSCR fpscr = Fpscr; + FPSCR fpscr = (FPSCR) FpscrExc; float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS, true, true, VfpRoundNearest); destReg = binaryOp(fpscr, destReg, mid, fpSubS, true, true, VfpRoundNearest); - Fpscr = fpscr; + FpscrExc = fpscr; ''' threeEqualRegInstFp("vmls", "NVmlsDFp", "SimdFloatMultAccOp", ("float",), 2, vmlsfpCode, True) threeEqualRegInstFp("vmls", "NVmlsQFp", "SimdFloatMultAccOp", ("float",), 4, vmlsfpCode, True) vcgtfpCode = ''' - FPSCR fpscr = (FPSCR)Fpscr; + FPSCR fpscr = (FPSCR) FpscrExc; float res = binaryOp(fpscr, srcReg1, srcReg2, vcgtFunc, true, true, VfpRoundNearest); destReg = (res == 0) ? -1 : 0; if (res == 2.0) fpscr.ioc = 1; - Fpscr = fpscr; + FpscrExc = fpscr; ''' threeEqualRegInstFp("vcgt", "VcgtDFp", "SimdFloatCmpOp", ("float",), 2, vcgtfpCode, toInt = True) @@ -2246,13 +2246,13 @@ let {{ 4, vcgtfpCode, toInt = True) vcgefpCode = ''' - FPSCR fpscr = (FPSCR)Fpscr; + FPSCR fpscr = (FPSCR) FpscrExc; float res = binaryOp(fpscr, srcReg1, srcReg2, vcgeFunc, true, true, VfpRoundNearest); destReg = (res == 0) ? -1 : 0; if (res == 2.0) fpscr.ioc = 1; - Fpscr = fpscr; + FpscrExc = fpscr; ''' threeEqualRegInstFp("vcge", "VcgeDFp", "SimdFloatCmpOp", ("float",), 2, vcgefpCode, toInt = True) @@ -2260,13 +2260,13 @@ let {{ 4, vcgefpCode, toInt = True) vacgtfpCode = ''' - FPSCR fpscr = (FPSCR)Fpscr; + FPSCR fpscr = (FPSCR) FpscrExc; float res = binaryOp(fpscr, srcReg1, srcReg2, vacgtFunc, true, true, VfpRoundNearest); destReg = (res == 0) ? -1 : 0; if (res == 2.0) fpscr.ioc = 1; - Fpscr = fpscr; + FpscrExc = fpscr; ''' threeEqualRegInstFp("vacgt", "VacgtDFp", "SimdFloatCmpOp", ("float",), 2, vacgtfpCode, toInt = True) @@ -2274,13 +2274,13 @@ let {{ 4, vacgtfpCode, toInt = True) vacgefpCode = ''' - FPSCR fpscr = (FPSCR)Fpscr; + FPSCR fpscr = (FPSCR) FpscrExc; float res = binaryOp(fpscr, srcReg1, srcReg2, vacgeFunc, true, true, VfpRoundNearest); destReg = (res == 0) ? -1 : 0; if (res == 2.0) fpscr.ioc = 1; - Fpscr = fpscr; + FpscrExc = fpscr; ''' threeEqualRegInstFp("vacge", "VacgeDFp", "SimdFloatCmpOp", ("float",), 2, vacgefpCode, toInt = True) @@ -2288,13 +2288,13 @@ let {{ 4, vacgefpCode, toInt = True) vceqfpCode = ''' - FPSCR fpscr = (FPSCR)Fpscr; + FPSCR fpscr = (FPSCR) FpscrExc; float res = binaryOp(fpscr, srcReg1, srcReg2, vceqFunc, true, true, VfpRoundNearest); destReg = (res == 0) ? -1 : 0; if (res == 2.0) fpscr.ioc = 1; - Fpscr = fpscr; + FpscrExc = fpscr; ''' threeEqualRegInstFp("vceq", "VceqDFp", "SimdFloatCmpOp", ("float",), 2, vceqfpCode, toInt = True) @@ -2302,29 +2302,29 @@ let {{ 4, vceqfpCode, toInt = True) vrecpsCode = ''' - FPSCR fpscr = Fpscr; + FPSCR fpscr = (FPSCR) FpscrExc; destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRecpsS, true, true, VfpRoundNearest); - Fpscr = fpscr; + FpscrExc = fpscr; ''' threeEqualRegInstFp("vrecps", "VrecpsDFp", "SimdFloatMultAccOp", ("float",), 2, vrecpsCode) threeEqualRegInstFp("vrecps", "VrecpsQFp", "SimdFloatMultAccOp", ("float",), 4, vrecpsCode) vrsqrtsCode = ''' - FPSCR fpscr = Fpscr; + FPSCR fpscr = (FPSCR) FpscrExc; destReg = binaryOp(fpscr, srcReg1, srcReg2, fpRSqrtsS, true, true, VfpRoundNearest); - Fpscr = fpscr; + FpscrExc = fpscr; ''' threeEqualRegInstFp("vrsqrts", "VrsqrtsDFp", "SimdFloatMiscOp", ("float",), 2, vrsqrtsCode) threeEqualRegInstFp("vrsqrts", "VrsqrtsQFp", "SimdFloatMiscOp", ("float",), 4, vrsqrtsCode) vabdfpCode = ''' - FPSCR fpscr = Fpscr; + FPSCR fpscr = (FPSCR) FpscrExc; float mid = binaryOp(fpscr, srcReg1, srcReg2, fpSubS, true, true, VfpRoundNearest); destReg = fabs(mid); - Fpscr = fpscr; + FpscrExc = fpscr; ''' threeEqualRegInstFp("vabd", "VabdDFp", "SimdFloatAddOp", ("float",), 2, vabdfpCode) threeEqualRegInstFp("vabd", "VabdQFp", "SimdFloatAddOp", ("float",), 4, vabdfpCode) @@ -2441,7 +2441,7 @@ let {{ twoRegShiftInst("vsli", "NVsliQ", "SimdShiftOp", unsignedTypes, 4, vsliCode, True) vqshlCode = ''' - FPSCR fpscr = (FPSCR)Fpscr; + FPSCR fpscr = (FPSCR) FpscrQc; if (imm >= sizeof(Element) * 8) { if (srcElem1 != 0) { destElem = (Element)1 << (sizeof(Element) * 8 - 1); @@ -2465,13 +2465,13 @@ let {{ } else { destElem = srcElem1; } - Fpscr = fpscr; + FpscrQc = fpscr; ''' twoRegShiftInst("vqshl", "NVqshlD", "SimdShiftOp", signedTypes, 2, vqshlCode) twoRegShiftInst("vqshl", "NVqshlQ", "SimdShiftOp", signedTypes, 4, vqshlCode) vqshluCode = ''' - FPSCR fpscr = (FPSCR)Fpscr; + FPSCR fpscr = (FPSCR) FpscrQc; if (imm >= sizeof(Element) * 8) { if (srcElem1 != 0) { destElem = mask(sizeof(Element) * 8); @@ -2491,13 +2491,13 @@ let {{ } else { destElem = srcElem1; } - Fpscr = fpscr; + FpscrQc = fpscr; ''' twoRegShiftInst("vqshlu", "NVqshluD", "SimdShiftOp", unsignedTypes, 2, vqshluCode) twoRegShiftInst("vqshlu", "NVqshluQ", "SimdShiftOp", unsignedTypes, 4, vqshluCode) vqshlusCode = ''' - FPSCR fpscr = (FPSCR)Fpscr; + FPSCR fpscr = (FPSCR) FpscrQc; if (imm >= sizeof(Element) * 8) { if (srcElem1 < 0) { destElem = 0; @@ -2528,7 +2528,7 @@ let {{ destElem = srcElem1; } } - Fpscr = fpscr; + FpscrQc = fpscr; ''' twoRegShiftInst("vqshlus", "NVqshlusD", "SimdShiftOp", signedTypes, 2, vqshlusCode) twoRegShiftInst("vqshlus", "NVqshlusQ", "SimdShiftOp", signedTypes, 4, vqshlusCode) @@ -2555,7 +2555,7 @@ let {{ twoRegNarrowShiftInst("vrshrn", "NVrshrn", "SimdShiftOp", smallUnsignedTypes, vrshrnCode) vqshrnCode = ''' - FPSCR fpscr = (FPSCR)Fpscr; + FPSCR fpscr = (FPSCR) FpscrQc; if (imm > sizeof(srcElem1) * 8) { if (srcElem1 != 0 && srcElem1 != -1) fpscr.qc = 1; @@ -2575,12 +2575,12 @@ let {{ } else { destElem = srcElem1; } - Fpscr = fpscr; + FpscrQc = fpscr; ''' twoRegNarrowShiftInst("vqshrn", "NVqshrn", "SimdShiftOp", smallSignedTypes, vqshrnCode) vqshrunCode = ''' - FPSCR fpscr = (FPSCR)Fpscr; + FPSCR fpscr = (FPSCR) FpscrQc; if (imm > sizeof(srcElem1) * 8) { if (srcElem1 != 0) fpscr.qc = 1; @@ -2596,13 +2596,13 @@ let {{ } else { destElem = srcElem1; } - Fpscr = fpscr; + FpscrQc = fpscr; ''' twoRegNarrowShiftInst("vqshrun", "NVqshrun", "SimdShiftOp", smallUnsignedTypes, vqshrunCode) vqshrunsCode = ''' - FPSCR fpscr = (FPSCR)Fpscr; + FPSCR fpscr = (FPSCR) FpscrQc; if (imm > sizeof(srcElem1) * 8) { if (srcElem1 != 0) fpscr.qc = 1; @@ -2623,13 +2623,13 @@ let {{ } else { destElem = srcElem1; } - Fpscr = fpscr; + FpscrQc = fpscr; ''' twoRegNarrowShiftInst("vqshrun", "NVqshruns", "SimdShiftOp", smallSignedTypes, vqshrunsCode) vqrshrnCode = ''' - FPSCR fpscr = (FPSCR)Fpscr; + FPSCR fpscr = (FPSCR) FpscrQc; if (imm > sizeof(srcElem1) * 8) { if (srcElem1 != 0 && srcElem1 != -1) fpscr.qc = 1; @@ -2659,13 +2659,13 @@ let {{ destElem = srcElem1; } } - Fpscr = fpscr; + FpscrQc = fpscr; ''' twoRegNarrowShiftInst("vqrshrn", "NVqrshrn", "SimdShiftOp", smallSignedTypes, vqrshrnCode) vqrshrunCode = ''' - FPSCR fpscr = (FPSCR)Fpscr; + FPSCR fpscr = (FPSCR) FpscrQc; if (imm > sizeof(srcElem1) * 8) { if (srcElem1 != 0) fpscr.qc = 1; @@ -2689,13 +2689,13 @@ let {{ destElem = srcElem1; } } - Fpscr = fpscr; + FpscrQc = fpscr; ''' twoRegNarrowShiftInst("vqrshrun", "NVqrshrun", "SimdShiftOp", smallUnsignedTypes, vqrshrunCode) vqrshrunsCode = ''' - FPSCR fpscr = (FPSCR)Fpscr; + FPSCR fpscr = (FPSCR) FpscrQc; if (imm > sizeof(srcElem1) * 8) { if (srcElem1 != 0) fpscr.qc = 1; @@ -2726,7 +2726,7 @@ let {{ destElem = srcElem1; } } - Fpscr = fpscr; + FpscrQc = fpscr; ''' twoRegNarrowShiftInst("vqrshrun", "NVqrshruns", "SimdShiftOp", smallSignedTypes, vqrshrunsCode) @@ -2746,7 +2746,7 @@ let {{ twoRegLongShiftInst("vmovl", "NVmovl", "SimdMiscOp", smallTypes, vmovlCode) vcvt2ufxCode = ''' - FPSCR fpscr = Fpscr; + FPSCR fpscr = (FPSCR) FpscrExc; if (flushToZero(srcElem1)) fpscr.idc = 1; VfpSavedState state = prepFpState(VfpRoundNearest); @@ -2754,7 +2754,7 @@ let {{ destReg = vfpFpSToFixed(srcElem1, false, false, imm); __asm__ __volatile__("" :: "m" (destReg)); finishVfp(fpscr, state, true); - Fpscr = fpscr; + FpscrExc = fpscr; ''' twoRegShiftInst("vcvt", "NVcvt2ufxD", "SimdCvtOp", ("float",), 2, vcvt2ufxCode, toInt = True) @@ -2762,7 +2762,7 @@ let {{ 4, vcvt2ufxCode, toInt = True) vcvt2sfxCode = ''' - FPSCR fpscr = Fpscr; + FPSCR fpscr = (FPSCR) FpscrExc; if (flushToZero(srcElem1)) fpscr.idc = 1; VfpSavedState state = prepFpState(VfpRoundNearest); @@ -2770,7 +2770,7 @@ let {{ destReg = vfpFpSToFixed(srcElem1, true, false, imm); __asm__ __volatile__("" :: "m" (destReg)); finishVfp(fpscr, state, true); - Fpscr = fpscr; + FpscrExc = fpscr; ''' twoRegShiftInst("vcvt", "NVcvt2sfxD", "SimdCvtOp", ("float",), 2, vcvt2sfxCode, toInt = True) @@ -2778,13 +2778,13 @@ let {{ 4, vcvt2sfxCode, toInt = True) vcvtu2fpCode = ''' - FPSCR fpscr = Fpscr; + FPSCR fpscr = (FPSCR) FpscrExc; VfpSavedState state = prepFpState(VfpRoundNearest); __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1)); destElem = vfpUFixedToFpS(true, true, srcReg1, false, imm); __asm__ __volatile__("" :: "m" (destElem)); finishVfp(fpscr, state, true); - Fpscr = fpscr; + FpscrExc = fpscr; ''' twoRegShiftInst("vcvt", "NVcvtu2fpD", "SimdCvtOp", ("float",), 2, vcvtu2fpCode, fromInt = True) @@ -2792,13 +2792,13 @@ let {{ 4, vcvtu2fpCode, fromInt = True) vcvts2fpCode = ''' - FPSCR fpscr = Fpscr; + FPSCR fpscr = (FPSCR) FpscrExc; VfpSavedState state = prepFpState(VfpRoundNearest); __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1)); destElem = vfpSFixedToFpS(true, true, srcReg1, false, imm); __asm__ __volatile__("" :: "m" (destElem)); finishVfp(fpscr, state, true); - Fpscr = fpscr; + FpscrExc = fpscr; ''' twoRegShiftInst("vcvt", "NVcvts2fpD", "SimdCvtOp", ("float",), 2, vcvts2fpCode, fromInt = True) @@ -2806,7 +2806,7 @@ let {{ 4, vcvts2fpCode, fromInt = True) vcvts2hCode = ''' - FPSCR fpscr = Fpscr; + FPSCR fpscr = (FPSCR) FpscrExc; float srcFp1 = bitsToFp(srcElem1, (float)0.0); if (flushToZero(srcFp1)) fpscr.idc = 1; @@ -2817,19 +2817,19 @@ let {{ fpscr.ahp, srcFp1); __asm__ __volatile__("" :: "m" (destElem)); finishVfp(fpscr, state, true); - Fpscr = fpscr; + FpscrExc = fpscr; ''' twoRegNarrowMiscInst("vcvt", "NVcvts2h", "SimdCvtOp", ("uint16_t",), vcvts2hCode) vcvth2sCode = ''' - FPSCR fpscr = Fpscr; + FPSCR fpscr = (FPSCR) FpscrExc; VfpSavedState state = prepFpState(VfpRoundNearest); __asm__ __volatile__("" : "=m" (srcElem1), "=m" (destElem) : "m" (srcElem1), "m" (destElem)); destElem = fpToBits(vcvtFpHFpS(fpscr, true, fpscr.ahp, srcElem1)); __asm__ __volatile__("" :: "m" (destElem)); finishVfp(fpscr, state, true); - Fpscr = fpscr; + FpscrExc = fpscr; ''' twoRegLongMiscInst("vcvt", "NVcvth2s", "SimdCvtOp", ("uint16_t",), vcvth2sCode) @@ -2840,11 +2840,11 @@ let {{ twoRegMiscInst("vrsqrte", "NVrsqrteQ", "SimdSqrtOp", ("uint32_t",), 4, vrsqrteCode) vrsqrtefpCode = ''' - FPSCR fpscr = Fpscr; + FPSCR fpscr = (FPSCR) FpscrExc; if (flushToZero(srcReg1)) fpscr.idc = 1; destReg = fprSqrtEstimate(fpscr, srcReg1); - Fpscr = fpscr; + FpscrExc = fpscr; ''' twoRegMiscInstFp("vrsqrte", "NVrsqrteDFp", "SimdFloatSqrtOp", ("float",), 2, vrsqrtefpCode) twoRegMiscInstFp("vrsqrte", "NVrsqrteQFp", "SimdFloatSqrtOp", ("float",), 4, vrsqrtefpCode) @@ -2856,11 +2856,11 @@ let {{ twoRegMiscInst("vrecpe", "NVrecpeQ", "SimdMultAccOp", ("uint32_t",), 4, vrecpeCode) vrecpefpCode = ''' - FPSCR fpscr = Fpscr; + FPSCR fpscr = (FPSCR) FpscrExc; if (flushToZero(srcReg1)) fpscr.idc = 1; destReg = fpRecipEstimate(fpscr, srcReg1); - Fpscr = fpscr; + FpscrExc = fpscr; ''' twoRegMiscInstFp("vrecpe", "NVrecpeDFp", "SimdFloatMultAccOp", ("float",), 2, vrecpefpCode) twoRegMiscInstFp("vrecpe", "NVrecpeQFp", "SimdFloatMultAccOp", ("float",), 4, vrecpefpCode) @@ -2954,7 +2954,7 @@ let {{ twoRegMiscInst("vmvn", "NVmvnQ", "SimdAluOp", ("uint64_t",), 4, vmvnCode) vqabsCode = ''' - FPSCR fpscr = (FPSCR)Fpscr; + FPSCR fpscr = (FPSCR) FpscrQc; if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) { fpscr.qc = 1; destElem = ~srcElem1; @@ -2963,20 +2963,20 @@ let {{ } else { destElem = srcElem1; } - Fpscr = fpscr; + FpscrQc = fpscr; ''' twoRegMiscInst("vqabs", "NVqabsD", "SimdAluOp", signedTypes, 2, vqabsCode) twoRegMiscInst("vqabs", "NVqabsQ", "SimdAluOp", signedTypes, 4, vqabsCode) vqnegCode = ''' - FPSCR fpscr = (FPSCR)Fpscr; + FPSCR fpscr = (FPSCR) FpscrQc; if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) { fpscr.qc = 1; destElem = ~srcElem1; } else { destElem = -srcElem1; } - Fpscr = fpscr; + FpscrQc = fpscr; ''' twoRegMiscInst("vqneg", "NVqnegD", "SimdAluOp", signedTypes, 2, vqnegCode) twoRegMiscInst("vqneg", "NVqnegQ", "SimdAluOp", signedTypes, 4, vqnegCode) @@ -3019,13 +3019,13 @@ let {{ twoRegMiscInst("vcgt", "NVcgtD", "SimdCmpOp", signedTypes, 2, vcgtCode) twoRegMiscInst("vcgt", "NVcgtQ", "SimdCmpOp", signedTypes, 4, vcgtCode) vcgtfpCode = ''' - FPSCR fpscr = (FPSCR)Fpscr; + FPSCR fpscr = (FPSCR) FpscrExc; float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgtFunc, true, true, VfpRoundNearest); destReg = (res == 0) ? -1 : 0; if (res == 2.0) fpscr.ioc = 1; - Fpscr = fpscr; + FpscrExc = fpscr; ''' twoRegMiscInstFp("vcgt", "NVcgtDFp", "SimdFloatCmpOp", ("float",), 2, vcgtfpCode, toInt = True) @@ -3036,13 +3036,13 @@ let {{ twoRegMiscInst("vcge", "NVcgeD", "SimdCmpOp", signedTypes, 2, vcgeCode) twoRegMiscInst("vcge", "NVcgeQ", "SimdCmpOp", signedTypes, 4, vcgeCode) vcgefpCode = ''' - FPSCR fpscr = (FPSCR)Fpscr; + FPSCR fpscr = (FPSCR) FpscrExc; float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcgeFunc, true, true, VfpRoundNearest); destReg = (res == 0) ? -1 : 0; if (res == 2.0) fpscr.ioc = 1; - Fpscr = fpscr; + FpscrExc = fpscr; ''' twoRegMiscInstFp("vcge", "NVcgeDFp", "SimdFloatCmpOp", ("float",), 2, vcgefpCode, toInt = True) @@ -3053,13 +3053,13 @@ let {{ twoRegMiscInst("vceq", "NVceqD", "SimdCmpOp", signedTypes, 2, vceqCode) twoRegMiscInst("vceq", "NVceqQ", "SimdCmpOp", signedTypes, 4, vceqCode) vceqfpCode = ''' - FPSCR fpscr = (FPSCR)Fpscr; + FPSCR fpscr = (FPSCR) FpscrExc; float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vceqFunc, true, true, VfpRoundNearest); destReg = (res == 0) ? -1 : 0; if (res == 2.0) fpscr.ioc = 1; - Fpscr = fpscr; + FpscrExc = fpscr; ''' twoRegMiscInstFp("vceq", "NVceqDFp", "SimdFloatCmpOp", ("float",), 2, vceqfpCode, toInt = True) @@ -3070,13 +3070,13 @@ let {{ twoRegMiscInst("vcle", "NVcleD", "SimdCmpOp", signedTypes, 2, vcleCode) twoRegMiscInst("vcle", "NVcleQ", "SimdCmpOp", signedTypes, 4, vcleCode) vclefpCode = ''' - FPSCR fpscr = (FPSCR)Fpscr; + FPSCR fpscr = (FPSCR) FpscrExc; float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcleFunc, true, true, VfpRoundNearest); destReg = (res == 0) ? -1 : 0; if (res == 2.0) fpscr.ioc = 1; - Fpscr = fpscr; + FpscrExc = fpscr; ''' twoRegMiscInstFp("vcle", "NVcleDFp", "SimdFloatCmpOp", ("float",), 2, vclefpCode, toInt = True) @@ -3087,13 +3087,13 @@ let {{ twoRegMiscInst("vclt", "NVcltD", "SimdCmpOp", signedTypes, 2, vcltCode) twoRegMiscInst("vclt", "NVcltQ", "SimdCmpOp", signedTypes, 4, vcltCode) vcltfpCode = ''' - FPSCR fpscr = (FPSCR)Fpscr; + FPSCR fpscr = (FPSCR) FpscrExc; float res = binaryOp(fpscr, srcReg1, (FloatReg)0.0, vcltFunc, true, true, VfpRoundNearest); destReg = (res == 0) ? -1 : 0; if (res == 2.0) fpscr.ioc = 1; - Fpscr = fpscr; + FpscrExc = fpscr; ''' twoRegMiscInstFp("vclt", "NVcltDFp", "SimdFloatCmpOp", ("float",), 2, vcltfpCode, toInt = True) @@ -3203,7 +3203,7 @@ let {{ oneRegImmInst("vbic", "NVbiciQ", "SimdAluOp", ("uint64_t",), 4, vbicCode, True) vqmovnCode = ''' - FPSCR fpscr = (FPSCR)Fpscr; + FPSCR fpscr = (FPSCR) FpscrQc; destElem = srcElem1; if ((BigElement)destElem != srcElem1) { fpscr.qc = 1; @@ -3211,24 +3211,24 @@ let {{ if (srcElem1 < 0) destElem = ~destElem; } - Fpscr = fpscr; + FpscrQc = fpscr; ''' twoRegNarrowMiscInst("vqmovn", "NVqmovn", "SimdMiscOp", smallSignedTypes, vqmovnCode) vqmovunCode = ''' - FPSCR fpscr = (FPSCR)Fpscr; + FPSCR fpscr = (FPSCR) FpscrQc; destElem = srcElem1; if ((BigElement)destElem != srcElem1) { fpscr.qc = 1; destElem = mask(sizeof(Element) * 8); } - Fpscr = fpscr; + FpscrQc = fpscr; ''' twoRegNarrowMiscInst("vqmovun", "NVqmovun", "SimdMiscOp", smallUnsignedTypes, vqmovunCode) vqmovunsCode = ''' - FPSCR fpscr = (FPSCR)Fpscr; + FPSCR fpscr = (FPSCR) FpscrQc; destElem = srcElem1; if (srcElem1 < 0 || ((BigElement)destElem & mask(sizeof(Element) * 8)) != srcElem1) { @@ -3237,7 +3237,7 @@ let {{ if (srcElem1 < 0) destElem = ~destElem; } - Fpscr = fpscr; + FpscrQc = fpscr; ''' twoRegNarrowMiscInst("vqmovun", "NVqmovuns", "SimdMiscOp", smallSignedTypes, vqmovunsCode) diff --git a/src/arch/arm/isa/operands.isa b/src/arch/arm/isa/operands.isa index 3c32d98d1..dfbf173b8 100644 --- a/src/arch/arm/isa/operands.isa +++ b/src/arch/arm/isa/operands.isa @@ -203,6 +203,8 @@ def operands {{ 'Fpsr': ('ControlReg', 'uw', 'MISCREG_FPSR', None, 3), 'Fpsid': ('ControlReg', 'uw', 'MISCREG_FPSID', None, 3), 'Fpscr': ('ControlReg', 'uw', 'MISCREG_FPSCR', None, 3), + 'FpscrQc': ('ControlReg', 'uw', 'MISCREG_FPSCR_QC', None, 3), + 'FpscrExc': ('ControlReg', 'uw', 'MISCREG_FPSCR_EXC', None, 3), 'Cpacr': ('ControlReg', 'uw', 'MISCREG_CPACR', (None, None, 'IsControl'), 3), 'Fpexc': ('ControlReg', 'uw', 'MISCREG_FPEXC', None, 3), 'Sctlr': ('ControlReg', 'uw', 'MISCREG_SCTLR', None, 3), diff --git a/src/arch/arm/miscregs.hh b/src/arch/arm/miscregs.hh index 2bb1cdb15..cf9da428a 100644 --- a/src/arch/arm/miscregs.hh +++ b/src/arch/arm/miscregs.hh @@ -78,6 +78,8 @@ namespace ArmISA MISCREG_FPSR, MISCREG_FPSID, MISCREG_FPSCR, + MISCREG_FPSCR_QC, // Cumulative saturation flag + MISCREG_FPSCR_EXC, // Cumulative FP exception flags MISCREG_FPEXC, MISCREG_MVFR0, MISCREG_MVFR1, @@ -206,7 +208,8 @@ namespace ArmISA const char * const miscRegName[NUM_MISCREGS] = { "cpsr", "itstate", "spsr", "spsr_fiq", "spsr_irq", "spsr_svc", "spsr_mon", "spsr_und", "spsr_abt", - "fpsr", "fpsid", "fpscr", "fpexc", "mvfr0", "mvfr1", + "fpsr", "fpsid", "fpscr", "fpscr_qc", "fpscr_exc", "fpexc", + "mvfr0", "mvfr1", "sctlr_rst", "sev_mailbox", "sctlr", "dccisw", "dccimvac", "dccmvac", "contextidr", "tpidrurw", "tpidruro", "tpidrprw", @@ -362,7 +365,11 @@ namespace ArmISA // This mask selects bits of the FPSCR that actually go in the FpCondCodes // integer register to allow renaming. - static const uint32_t FpCondCodesMask = 0xF800009F; + static const uint32_t FpCondCodesMask = 0xF0000000; + // This mask selects the cumulative FP exception flags of the FPSCR. + static const uint32_t FpscrExcMask = 0x0000009F; + // This mask selects the cumulative saturation flag of the FPSCR. + static const uint32_t FpscrQcMask = 0x08000000; BitUnion32(FPEXC) Bitfield<31> ex; diff --git a/src/arch/mips/locked_mem.hh b/src/arch/mips/locked_mem.hh index 21777e440..ddda47a0a 100644 --- a/src/arch/mips/locked_mem.hh +++ b/src/arch/mips/locked_mem.hh @@ -49,8 +49,8 @@ template inline void handleLockedRead(XC *xc, Request *req) { - xc->setMiscRegNoEffect(MISCREG_LLADDR, req->getPaddr() & ~0xf); - xc->setMiscRegNoEffect(MISCREG_LLFLAG, true); + xc->setMiscReg(MISCREG_LLADDR, req->getPaddr() & ~0xf); + xc->setMiscReg(MISCREG_LLFLAG, true); DPRINTF(LLSC, "[tid:%i]: Load-Link Flag Set & Load-Link" " Address set to %x.\n", req->threadId(), req->getPaddr() & ~0xf); @@ -66,14 +66,14 @@ handleLockedWrite(XC *xc, Request *req) req->setExtraData(2); } else { // standard store conditional - bool lock_flag = xc->readMiscRegNoEffect(MISCREG_LLFLAG); - Addr lock_addr = xc->readMiscRegNoEffect(MISCREG_LLADDR); + bool lock_flag = xc->readMiscReg(MISCREG_LLFLAG); + Addr lock_addr = xc->readMiscReg(MISCREG_LLADDR); if (!lock_flag || (req->getPaddr() & ~0xf) != lock_addr) { // Lock flag not set or addr mismatch in CPU; // don't even bother sending to memory system req->setExtraData(0); - xc->setMiscRegNoEffect(MISCREG_LLFLAG, false); + xc->setMiscReg(MISCREG_LLFLAG, false); // the rest of this code is not architectural; // it's just a debugging aid to help detect diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh index 638ee33c1..bd2d9fa95 100644 --- a/src/cpu/base_dyn_inst.hh +++ b/src/cpu/base_dyn_inst.hh @@ -859,7 +859,8 @@ BaseDynInst::readBytes(Addr addr, uint8_t *data, if (fault != NoFault) { // Return a fixed value to keep simulation deterministic even // along misspeculated paths. - bzero(data, size); + if (data) + bzero(data, size); } if (traceData) { diff --git a/src/cpu/inorder/inorder_dyn_inst.cc b/src/cpu/inorder/inorder_dyn_inst.cc index d848226c4..70fd59418 100644 --- a/src/cpu/inorder/inorder_dyn_inst.cc +++ b/src/cpu/inorder/inorder_dyn_inst.cc @@ -419,18 +419,12 @@ InOrderDynInst::readMiscReg(int misc_reg) return this->cpu->readMiscReg(misc_reg, threadNumber); } + /** Reads a misc. register, including any side-effects the read * might have as defined by the architecture. */ MiscReg -InOrderDynInst::readMiscRegNoEffect(int misc_reg) -{ - return this->cpu->readMiscRegNoEffect(misc_reg, threadNumber); -} - -/** Reads a miscellaneous register. */ -MiscReg -InOrderDynInst::readMiscRegOperandNoEffect(const StaticInst *si, int idx) +InOrderDynInst::readMiscRegOperand(const StaticInst *si, int idx) { DPRINTF(InOrderDynInst, "[tid:%i]: [sn:%i] Misc. Reg Source Value %i" " read as %#x.\n", threadNumber, seqNum, idx, @@ -438,22 +432,13 @@ InOrderDynInst::readMiscRegOperandNoEffect(const StaticInst *si, int idx) return instSrc[idx].integer; } -/** Reads a misc. register, including any side-effects the read + +/** Sets a misc. register, including any side-effects the write * might have as defined by the architecture. */ -MiscReg -InOrderDynInst::readMiscRegOperand(const StaticInst *si, int idx) -{ - // For In-Order, the side-effect of reading a register happens - // when explicitly executing a "ReadSrc" command. This simply returns - // a value. - return readMiscRegOperandNoEffect(si, idx); -} - -/** Sets a misc. register. */ void -InOrderDynInst::setMiscRegOperandNoEffect(const StaticInst * si, int idx, - const MiscReg &val) +InOrderDynInst::setMiscRegOperand(const StaticInst *si, int idx, + const MiscReg &val) { instResult[idx].type = Integer; instResult[idx].val.integer = val; @@ -463,19 +448,6 @@ InOrderDynInst::setMiscRegOperandNoEffect(const StaticInst * si, int idx, "being set to %#x.\n", threadNumber, seqNum, idx, val); } -/** Sets a misc. register, including any side-effects the write - * might have as defined by the architecture. - */ -void -InOrderDynInst::setMiscRegOperand(const StaticInst *si, int idx, - const MiscReg &val) -{ - // For In-Order, the side-effect of setting a register happens - // when explicitly writing back the register value. This - // simply maintains the operand value. - setMiscRegOperandNoEffect(si, idx, val); -} - MiscReg InOrderDynInst::readRegOtherThread(unsigned reg_idx, ThreadID tid) { @@ -534,14 +506,6 @@ InOrderDynInst::setFloatRegOperandBits(const StaticInst *si, int idx, threadNumber, seqNum, idx, val, instResult[idx].tick); } -/** Sets a misc. register. */ -/* Alter this when wanting to *speculate* on Miscellaneous registers */ -void -InOrderDynInst::setMiscRegNoEffect(int misc_reg, const MiscReg &val) -{ - this->cpu->setMiscRegNoEffect(misc_reg, val, threadNumber); -} - /** Sets a misc. register, including any side-effects the write * might have as defined by the architecture. */ diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh index 8d3edfb19..1a44c64ee 100644 --- a/src/cpu/o3/commit_impl.hh +++ b/src/cpu/o3/commit_impl.hh @@ -1,4 +1,16 @@ /* + * Copyright (c) 2010 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * * Copyright (c) 2004-2006 The Regents of The University of Michigan * All rights reserved. * @@ -900,6 +912,9 @@ DefaultCommit::commitInsts() cpu->instDone(tid); } + // Updates misc. registers. + head_inst->updateMiscRegs(); + TheISA::advancePC(pc[tid], head_inst->staticInst); int count = 0; diff --git a/src/cpu/o3/dyn_inst.hh b/src/cpu/o3/dyn_inst.hh index 6bd1f9fad..b62068111 100644 --- a/src/cpu/o3/dyn_inst.hh +++ b/src/cpu/o3/dyn_inst.hh @@ -1,4 +1,16 @@ /* + * Copyright (c) 2010 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * * Copyright (c) 2004-2006 The Regents of The University of Michigan * All rights reserved. * @@ -99,13 +111,18 @@ class BaseO3DynInst : public BaseDynInst /** Initializes variables. */ void initVars(); - public: - /** Reads a miscellaneous register. */ - MiscReg readMiscRegNoEffect(int misc_reg) - { - return this->cpu->readMiscRegNoEffect(misc_reg, this->threadNumber); - } + protected: + /** Indexes of the destination misc. registers. They are needed to defer + * the write accesses to the misc. registers until the commit stage, when + * the instruction is out of its speculative state. + */ + int _destMiscRegIdx[MaxInstDestRegs]; + /** Values to be written to the destination misc. registers. */ + MiscReg _destMiscRegVal[MaxInstDestRegs]; + /** Number of destination misc. registers. */ + int _numDestMiscRegs; + public: /** Reads a misc. register, including any side-effects the read * might have as defined by the architecture. */ @@ -114,28 +131,17 @@ class BaseO3DynInst : public BaseDynInst return this->cpu->readMiscReg(misc_reg, this->threadNumber); } - /** Sets a misc. register. */ - void setMiscRegNoEffect(int misc_reg, const MiscReg &val) - { - this->instResult.integer = val; - return this->cpu->setMiscRegNoEffect(misc_reg, val, this->threadNumber); - } - /** Sets a misc. register, including any side-effects the write * might have as defined by the architecture. */ void setMiscReg(int misc_reg, const MiscReg &val) { - return this->cpu->setMiscReg(misc_reg, val, - this->threadNumber); - } - - /** Reads a miscellaneous register. */ - TheISA::MiscReg readMiscRegOperandNoEffect(const StaticInst *si, int idx) - { - return this->cpu->readMiscRegNoEffect( - si->srcRegIdx(idx) - TheISA::Ctrl_Base_DepTag, - this->threadNumber); + /** Writes to misc. registers are recorded and deferred until the + * commit stage, when updateMiscRegs() is called. + */ + _destMiscRegIdx[_numDestMiscRegs] = misc_reg; + _destMiscRegVal[_numDestMiscRegs] = val; + _numDestMiscRegs++; } /** Reads a misc. register, including any side-effects the read @@ -148,24 +154,31 @@ class BaseO3DynInst : public BaseDynInst this->threadNumber); } - /** Sets a misc. register. */ - void setMiscRegOperandNoEffect(const StaticInst * si, int idx, const MiscReg &val) - { - this->instResult.integer = val; - return this->cpu->setMiscRegNoEffect( - si->destRegIdx(idx) - TheISA::Ctrl_Base_DepTag, - val, this->threadNumber); - } - /** Sets a misc. register, including any side-effects the write * might have as defined by the architecture. */ void setMiscRegOperand(const StaticInst *si, int idx, const MiscReg &val) { - return this->cpu->setMiscReg( - si->destRegIdx(idx) - TheISA::Ctrl_Base_DepTag, - val, this->threadNumber); + int misc_reg = si->destRegIdx(idx) - TheISA::Ctrl_Base_DepTag; + setMiscReg(misc_reg, val); + } + + /** Called at the commit stage to update the misc. registers. */ + void updateMiscRegs() + { + // @todo: Pretty convoluted way to avoid squashing from happening when + // using the TC during an instruction's execution (specifically for + // instructions that have side-effects that use the TC). Fix this. + // See cpu/o3/dyn_inst_impl.hh. + bool in_syscall = this->thread->inSyscall; + this->thread->inSyscall = true; + + for (int i = 0; i < _numDestMiscRegs; i++) + this->cpu->setMiscReg( + _destMiscRegIdx[i], _destMiscRegVal[i], this->threadNumber); + + this->thread->inSyscall = in_syscall; } #if FULL_SYSTEM diff --git a/src/cpu/o3/dyn_inst_impl.hh b/src/cpu/o3/dyn_inst_impl.hh index 268746655..89d6528a1 100644 --- a/src/cpu/o3/dyn_inst_impl.hh +++ b/src/cpu/o3/dyn_inst_impl.hh @@ -1,4 +1,16 @@ /* + * Copyright (c) 2010 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * * Copyright (c) 2004-2006 The Regents of The University of Michigan * All rights reserved. * @@ -71,6 +83,8 @@ BaseO3DynInst::initVars() this->_srcRegIdx[i] = this->staticInst->srcRegIdx(i); this->_readySrcRegIdx[i] = 0; } + + _numDestMiscRegs = 0; } template diff --git a/src/cpu/simple/base.hh b/src/cpu/simple/base.hh index f7dcd4a86..bd967b185 100644 --- a/src/cpu/simple/base.hh +++ b/src/cpu/simple/base.hh @@ -297,34 +297,17 @@ class BaseSimpleCPU : public BaseCPU return thread->readMiscReg(misc_reg); } - void setMiscRegNoEffect(int misc_reg, const MiscReg &val) - { - return thread->setMiscRegNoEffect(misc_reg, val); - } - void setMiscReg(int misc_reg, const MiscReg &val) { return thread->setMiscReg(misc_reg, val); } - MiscReg readMiscRegOperandNoEffect(const StaticInst *si, int idx) - { - int reg_idx = si->srcRegIdx(idx) - TheISA::Ctrl_Base_DepTag; - return thread->readMiscRegNoEffect(reg_idx); - } - MiscReg readMiscRegOperand(const StaticInst *si, int idx) { int reg_idx = si->srcRegIdx(idx) - TheISA::Ctrl_Base_DepTag; return thread->readMiscReg(reg_idx); } - void setMiscRegOperandNoEffect(const StaticInst *si, int idx, const MiscReg &val) - { - int reg_idx = si->destRegIdx(idx) - TheISA::Ctrl_Base_DepTag; - return thread->setMiscRegNoEffect(reg_idx, val); - } - void setMiscRegOperand( const StaticInst *si, int idx, const MiscReg &val) { -- 2.30.2