From 5d584934ada15446123d5dc97a9de820a87fd4cf Mon Sep 17 00:00:00 2001 From: Andreas Sandberg Date: Tue, 18 Jun 2013 16:10:42 +0200 Subject: [PATCH] x86: Make fprem like the fprem on a real x87 The current implementation of fprem simply does an fmod and doesn't simulate any of the iterative behavior in a real fprem. This isn't normally a problem, however, it can lead to problems when switching between CPU models. If switching from a real CPU in the middle of an fprem loop to a simulated CPU, the output of the fprem loop becomes correupted. This changeset changes the fprem implementation to work like the one on real hardware. --- .../insts/x87/arithmetic/partial_remainder.py | 4 +-- src/arch/x86/isa/microops/fpop.isa | 27 +++++++++++++++++-- 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/src/arch/x86/isa/insts/x87/arithmetic/partial_remainder.py b/src/arch/x86/isa/insts/x87/arithmetic/partial_remainder.py index b02184e1a..42aabfdf6 100644 --- a/src/arch/x86/isa/insts/x87/arithmetic/partial_remainder.py +++ b/src/arch/x86/isa/insts/x87/arithmetic/partial_remainder.py @@ -37,10 +37,10 @@ microcode = ''' def macroop FPREM { - premfp st(0), st(1), st(0) + premfp st(0), st(1), st(0), SetStatus=True }; def macroop FPREM1 { - premfp st(0), st(1), st(0) + premfp st(0), st(1), st(0), SetStatus=True }; ''' diff --git a/src/arch/x86/isa/microops/fpop.isa b/src/arch/x86/isa/microops/fpop.isa index e6372ba6b..22d6fbcda 100644 --- a/src/arch/x86/isa/microops/fpop.isa +++ b/src/arch/x86/isa/microops/fpop.isa @@ -330,11 +330,34 @@ let {{ class PremFp(FpBinaryOp): code = ''' - FpDestReg = fmod(FpSrcReg1, FpSrcReg2); - DPRINTF(X86, "src1: %lf, src2: %lf, dest: %lf\\n", FpSrcReg1, FpSrcReg2, FpDestReg); + MiscReg new_fsw(FSW); + int src1_exp; + int src2_exp; + std::frexp(FpSrcReg1, &src1_exp); + std::frexp(FpSrcReg2, &src2_exp); + + const int d(src2_exp - src1_exp); + if (d < 64) { + const int64_t q(std::trunc(FpSrcReg2 / FpSrcReg1)); + FpDestReg = FpSrcReg2 - FpSrcReg1 * q; + new_fsw &= ~(CC0Bit | CC1Bit | CC2Bit | CC2Bit); + new_fsw |= (q & 0x1) ? CC1Bit : 0; + new_fsw |= (q & 0x2) ? CC3Bit : 0; + new_fsw |= (q & 0x4) ? CC0Bit : 0; + } else { + const int n(42); + const int64_t qq(std::trunc( + FpSrcReg2 / std::ldexp(FpSrcReg1, d - n))); + FpDestReg = FpSrcReg2 - std::ldexp(FpSrcReg1 * qq, d - n); + new_fsw |= CC2Bit; + } + DPRINTF(X86, "src1: %lf, src2: %lf, dest: %lf, FSW: 0x%x\\n", + FpSrcReg1, FpSrcReg2, FpDestReg, new_fsw); ''' op_class = 'FloatDivOp' + flag_code = 'FSW = new_fsw;' + class Compfp(FpBinaryOp): def __init__(self, src1, src2, spm=0, setStatus=False, \ dataSize="env.dataSize"): -- 2.30.2