X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Farch%2Fx86%2Fisa%2Fmicroops%2Fmediaop.isa;h=4052f254d7d601eafc4a45a9b0a9b591ed270f2a;hb=8f6744c19c7d6cf87a207e901503c3435c1ff7a9;hp=334944222fffe615e7d7b33652c058752d3a73e2;hpb=df163284fc29805d4eb3745b9a4dca49a3688b4c;p=gem5.git diff --git a/src/arch/x86/isa/microops/mediaop.isa b/src/arch/x86/isa/microops/mediaop.isa index 334944222..4052f254d 100644 --- a/src/arch/x86/isa/microops/mediaop.isa +++ b/src/arch/x86/isa/microops/mediaop.isa @@ -190,7 +190,7 @@ let {{ typeQual = "" if match.group("typeQual"): typeQual = match.group("typeQual") - src2_name = "%spsrc2%s" % (match.group("prefix"), typeQual) + src2_name = "%sFpSrcReg2%s" % (match.group("prefix"), typeQual) self.buildCppClasses(name, Name, suffix, matcher.sub(src2_name, code)) self.buildCppClasses(name + "i", Name, suffix + "Imm", @@ -289,24 +289,122 @@ let {{ return allocator class Mov2int(MediaOp): - def __init__(self, dest, src, \ + def __init__(self, dest, src1, src2 = 0, \ size = None, destSize = None, srcSize = None, ext = None): - super(Mov2int, self).__init__(dest, src,\ - "InstRegIndex(0)", size, destSize, srcSize, ext) + super(Mov2int, self).__init__(dest, src1,\ + src2, size, destSize, srcSize, ext) code = ''' - uint64_t fpSrcReg1 = bits(FpSrcReg1.uqw, srcSize * 8 - 1, 0); - DestReg = merge(DestReg, fpSrcReg1, destSize); + int items = sizeof(FloatRegBits) / srcSize; + int offset = imm8; + if (bits(src1, 0) && (ext & 0x1)) + offset -= items; + if (offset >= 0 && offset < items) { + uint64_t fpSrcReg1 = + bits(FpSrcReg1.uqw, + (offset + 1) * srcSize * 8 - 1, + (offset + 0) * srcSize * 8); + DestReg = merge(0, fpSrcReg1, destSize); + } else { + DestReg = DestReg; + } ''' class Mov2fp(MediaOp): + def __init__(self, dest, src1, src2 = 0, \ + size = None, destSize = None, srcSize = None, ext = None): + super(Mov2fp, self).__init__(dest, src1,\ + src2, size, destSize, srcSize, ext) + code = ''' + int items = sizeof(FloatRegBits) / destSize; + int offset = imm8; + if (bits(dest, 0) && (ext & 0x1)) + offset -= items; + if (offset >= 0 && offset < items) { + uint64_t srcReg1 = pick(SrcReg1, 0, srcSize); + FpDestReg.uqw = + insertBits(FpDestReg.uqw, + (offset + 1) * destSize * 8 - 1, + (offset + 0) * destSize * 8, srcReg1); + } else { + FpDestReg.uqw = FpDestReg.uqw; + } + ''' + + class Movsign(MediaOp): def __init__(self, dest, src, \ size = None, destSize = None, srcSize = None, ext = None): - super(Mov2fp, self).__init__(dest, src,\ + super(Movsign, self).__init__(dest, src,\ "InstRegIndex(0)", size, destSize, srcSize, ext) code = ''' - uint64_t srcReg1 = pick(SrcReg1, 0, srcSize); - FpDestReg.uqw = - insertBits(FpDestReg.uqw, destSize * 8 - 1, 0, srcReg1); + int items = sizeof(FloatRegBits) / srcSize; + uint64_t result = 0; + int offset = (ext & 0x1) ? items : 0; + for (int i = 0; i < items; i++) { + uint64_t picked = + bits(FpSrcReg1.uqw, (i + 1) * 8 * srcSize - 1); + result = insertBits(result, i + offset, i + offset, picked); + } + DestReg = DestReg | result; + ''' + + class Maskmov(MediaOp): + code = ''' + assert(srcSize == destSize); + int size = srcSize; + int sizeBits = size * 8; + int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); + uint64_t result = FpDestReg.uqw; + + for (int i = 0; i < items; i++) { + int hiIndex = (i + 1) * sizeBits - 1; + int loIndex = (i + 0) * sizeBits; + uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); + if (bits(FpSrcReg2.uqw, hiIndex)) + result = insertBits(result, hiIndex, loIndex, arg1Bits); + } + FpDestReg.uqw = result; + ''' + + class shuffle(MediaOp): + code = ''' + assert(srcSize == destSize); + int size = srcSize; + int sizeBits = size * 8; + int items = sizeof(FloatRegBits) / size; + int options; + int optionBits; + if (size == 8) { + options = 2; + optionBits = 1; + } else { + options = 4; + optionBits = 2; + } + + uint64_t result = 0; + uint8_t sel = ext; + + for (int i = 0; i < items; i++) { + uint64_t resBits; + uint8_t lsel = sel & mask(optionBits); + if (lsel * size >= sizeof(FloatRegBits)) { + lsel -= options / 2; + resBits = bits(FpSrcReg2.uqw, + (lsel + 1) * sizeBits - 1, + (lsel + 0) * sizeBits); + } else { + resBits = bits(FpSrcReg1.uqw, + (lsel + 1) * sizeBits - 1, + (lsel + 0) * sizeBits); + } + + sel >>= optionBits; + + int hiIndex = (i + 1) * sizeBits - 1; + int loIndex = (i + 0) * sizeBits; + result = insertBits(result, hiIndex, loIndex, resBits); + } + FpDestReg.uqw = result; ''' class Unpack(MediaOp): @@ -354,7 +452,7 @@ let {{ if (signBit) { if (overflow != mask(destBits - srcBits + 1)) { if (ext & 0x1) - picked = (1 << (destBits - 1)); + picked = (ULL(1) << (destBits - 1)); else picked = 0; } @@ -382,7 +480,7 @@ let {{ if (signBit) { if (overflow != mask(destBits - srcBits + 1)) { if (ext & 0x1) - picked = (1 << (destBits - 1)); + picked = (ULL(1) << (destBits - 1)); else picked = 0; } @@ -530,6 +628,82 @@ let {{ FpDestReg.uqw = result; ''' + class Mmini(MediaOp): + code = ''' + + assert(srcSize == destSize); + int size = srcSize; + int sizeBits = size * 8; + int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); + uint64_t result = FpDestReg.uqw; + + for (int i = 0; i < items; i++) { + int hiIndex = (i + 1) * sizeBits - 1; + int loIndex = (i + 0) * sizeBits; + uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); + int64_t arg1 = arg1Bits | + (0 - (arg1Bits & (ULL(1) << (sizeBits - 1)))); + uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); + int64_t arg2 = arg2Bits | + (0 - (arg2Bits & (ULL(1) << (sizeBits - 1)))); + uint64_t resBits; + + if (ext & 0x2) { + if (arg1 < arg2) { + resBits = arg1Bits; + } else { + resBits = arg2Bits; + } + } else { + if (arg1Bits < arg2Bits) { + resBits = arg1Bits; + } else { + resBits = arg2Bits; + } + } + result = insertBits(result, hiIndex, loIndex, resBits); + } + FpDestReg.uqw = result; + ''' + + class Mmaxi(MediaOp): + code = ''' + + assert(srcSize == destSize); + int size = srcSize; + int sizeBits = size * 8; + int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); + uint64_t result = FpDestReg.uqw; + + for (int i = 0; i < items; i++) { + int hiIndex = (i + 1) * sizeBits - 1; + int loIndex = (i + 0) * sizeBits; + uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); + int64_t arg1 = arg1Bits | + (0 - (arg1Bits & (ULL(1) << (sizeBits - 1)))); + uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); + int64_t arg2 = arg2Bits | + (0 - (arg2Bits & (ULL(1) << (sizeBits - 1)))); + uint64_t resBits; + + if (ext & 0x2) { + if (arg1 > arg2) { + resBits = arg1Bits; + } else { + resBits = arg2Bits; + } + } else { + if (arg1Bits > arg2Bits) { + resBits = arg1Bits; + } else { + resBits = arg2Bits; + } + } + result = insertBits(result, hiIndex, loIndex, resBits); + } + FpDestReg.uqw = result; + ''' + class Msqrt(MediaOp): def __init__(self, dest, src, \ size = None, destSize = None, srcSize = None, ext = None): @@ -620,4 +794,762 @@ let {{ } FpDestReg.uqw = result; ''' + + class Msubf(MediaOp): + code = ''' + union floatInt + { + float f; + uint32_t i; + }; + union doubleInt + { + double d; + uint64_t i; + }; + + assert(srcSize == destSize); + int size = srcSize; + int sizeBits = size * 8; + assert(srcSize == 4 || srcSize == 8); + int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); + uint64_t result = FpDestReg.uqw; + + for (int i = 0; i < items; i++) { + int hiIndex = (i + 1) * sizeBits - 1; + int loIndex = (i + 0) * sizeBits; + uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); + uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); + uint64_t resBits; + + if (size == 4) { + floatInt arg1, arg2, res; + arg1.i = arg1Bits; + arg2.i = arg2Bits; + res.f = arg1.f - arg2.f; + resBits = res.i; + } else { + doubleInt arg1, arg2, res; + arg1.i = arg1Bits; + arg2.i = arg2Bits; + res.d = arg1.d - arg2.d; + resBits = res.i; + } + + result = insertBits(result, hiIndex, loIndex, resBits); + } + FpDestReg.uqw = result; + ''' + + class Mmulf(MediaOp): + code = ''' + union floatInt + { + float f; + uint32_t i; + }; + union doubleInt + { + double d; + uint64_t i; + }; + + assert(srcSize == destSize); + int size = srcSize; + int sizeBits = size * 8; + assert(srcSize == 4 || srcSize == 8); + int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); + uint64_t result = FpDestReg.uqw; + + for (int i = 0; i < items; i++) { + int hiIndex = (i + 1) * sizeBits - 1; + int loIndex = (i + 0) * sizeBits; + uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); + uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); + uint64_t resBits; + + if (size == 4) { + floatInt arg1, arg2, res; + arg1.i = arg1Bits; + arg2.i = arg2Bits; + res.f = arg1.f * arg2.f; + resBits = res.i; + } else { + doubleInt arg1, arg2, res; + arg1.i = arg1Bits; + arg2.i = arg2Bits; + res.d = arg1.d * arg2.d; + resBits = res.i; + } + + result = insertBits(result, hiIndex, loIndex, resBits); + } + FpDestReg.uqw = result; + ''' + + class Mdivf(MediaOp): + code = ''' + union floatInt + { + float f; + uint32_t i; + }; + union doubleInt + { + double d; + uint64_t i; + }; + + assert(srcSize == destSize); + int size = srcSize; + int sizeBits = size * 8; + assert(srcSize == 4 || srcSize == 8); + int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); + uint64_t result = FpDestReg.uqw; + + for (int i = 0; i < items; i++) { + int hiIndex = (i + 1) * sizeBits - 1; + int loIndex = (i + 0) * sizeBits; + uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); + uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); + uint64_t resBits; + + if (size == 4) { + floatInt arg1, arg2, res; + arg1.i = arg1Bits; + arg2.i = arg2Bits; + res.f = arg1.f / arg2.f; + resBits = res.i; + } else { + doubleInt arg1, arg2, res; + arg1.i = arg1Bits; + arg2.i = arg2Bits; + res.d = arg1.d / arg2.d; + resBits = res.i; + } + + result = insertBits(result, hiIndex, loIndex, resBits); + } + FpDestReg.uqw = result; + ''' + + class Maddi(MediaOp): + code = ''' + assert(srcSize == destSize); + int size = srcSize; + int sizeBits = size * 8; + int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); + uint64_t result = FpDestReg.uqw; + + for (int i = 0; i < items; i++) { + int hiIndex = (i + 1) * sizeBits - 1; + int loIndex = (i + 0) * sizeBits; + uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); + uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); + uint64_t resBits = arg1Bits + arg2Bits; + + if (ext & 0x2) { + if (findCarry(sizeBits, resBits, arg1Bits, arg2Bits)) + resBits = mask(sizeBits); + } else if (ext & 0x4) { + int arg1Sign = bits(arg1Bits, sizeBits - 1); + int arg2Sign = bits(arg2Bits, sizeBits - 1); + int resSign = bits(resBits, sizeBits - 1); + if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) { + if (resSign == 0) + resBits = (ULL(1) << (sizeBits - 1)); + else + resBits = mask(sizeBits - 1); + } + } + + result = insertBits(result, hiIndex, loIndex, resBits); + } + FpDestReg.uqw = result; + ''' + + class Msubi(MediaOp): + code = ''' + assert(srcSize == destSize); + int size = srcSize; + int sizeBits = size * 8; + int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); + uint64_t result = FpDestReg.uqw; + + for (int i = 0; i < items; i++) { + int hiIndex = (i + 1) * sizeBits - 1; + int loIndex = (i + 0) * sizeBits; + uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); + uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); + uint64_t resBits = arg1Bits - arg2Bits; + + if (ext & 0x2) { + if (arg2Bits > arg1Bits) { + resBits = 0; + } else if (!findCarry(sizeBits, resBits, + arg1Bits, ~arg2Bits)) { + resBits = mask(sizeBits); + } + } else if (ext & 0x4) { + int arg1Sign = bits(arg1Bits, sizeBits - 1); + int arg2Sign = !bits(arg2Bits, sizeBits - 1); + int resSign = bits(resBits, sizeBits - 1); + if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) { + if (resSign == 0) + resBits = (ULL(1) << (sizeBits - 1)); + else + resBits = mask(sizeBits - 1); + } + } + + result = insertBits(result, hiIndex, loIndex, resBits); + } + FpDestReg.uqw = result; + ''' + + class Mmuli(MediaOp): + code = ''' + int srcBits = srcSize * 8; + int destBits = destSize * 8; + assert(destBits <= 64); + assert(destSize >= srcSize); + int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / destSize); + uint64_t result = FpDestReg.uqw; + + for (int i = 0; i < items; i++) { + int offset = 0; + if (ext & 16) { + if (ext & 32) + offset = i * (destBits - srcBits); + else + offset = i * (destBits - srcBits) + srcBits; + } + int srcHiIndex = (i + 1) * srcBits - 1 + offset; + int srcLoIndex = (i + 0) * srcBits + offset; + uint64_t arg1Bits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex); + uint64_t arg2Bits = bits(FpSrcReg2.uqw, srcHiIndex, srcLoIndex); + uint64_t resBits; + + if (ext & 0x2) { + int64_t arg1 = arg1Bits | + (0 - (arg1Bits & (ULL(1) << (srcBits - 1)))); + int64_t arg2 = arg2Bits | + (0 - (arg2Bits & (ULL(1) << (srcBits - 1)))); + resBits = (uint64_t)(arg1 * arg2); + } else { + resBits = arg1Bits * arg2Bits; + } + + if (ext & 0x4) + resBits += (ULL(1) << (destBits - 1)); + + if (ext & 0x8) + resBits >>= destBits; + + int destHiIndex = (i + 1) * destBits - 1; + int destLoIndex = (i + 0) * destBits; + result = insertBits(result, destHiIndex, destLoIndex, resBits); + } + FpDestReg.uqw = result; + ''' + + class Mavg(MediaOp): + code = ''' + assert(srcSize == destSize); + int size = srcSize; + int sizeBits = size * 8; + int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); + uint64_t result = FpDestReg.uqw; + + for (int i = 0; i < items; i++) { + int hiIndex = (i + 1) * sizeBits - 1; + int loIndex = (i + 0) * sizeBits; + uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); + uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); + uint64_t resBits = (arg1Bits + arg2Bits + 1) / 2; + + result = insertBits(result, hiIndex, loIndex, resBits); + } + FpDestReg.uqw = result; + ''' + + class Msad(MediaOp): + code = ''' + int srcBits = srcSize * 8; + int items = sizeof(FloatRegBits) / srcSize; + + uint64_t sum = 0; + for (int i = 0; i < items; i++) { + int hiIndex = (i + 1) * srcBits - 1; + int loIndex = (i + 0) * srcBits; + uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); + uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); + int64_t resBits = arg1Bits - arg2Bits; + if (resBits < 0) + resBits = -resBits; + sum += resBits; + } + FpDestReg.uqw = sum & mask(destSize * 8); + ''' + + class Msrl(MediaOp): + code = ''' + + assert(srcSize == destSize); + int size = srcSize; + int sizeBits = size * 8; + int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); + uint64_t shiftAmt = op2.uqw; + uint64_t result = FpDestReg.uqw; + + for (int i = 0; i < items; i++) { + int hiIndex = (i + 1) * sizeBits - 1; + int loIndex = (i + 0) * sizeBits; + uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); + uint64_t resBits; + if (shiftAmt >= sizeBits) { + resBits = 0; + } else { + resBits = (arg1Bits >> shiftAmt) & + mask(sizeBits - shiftAmt); + } + + result = insertBits(result, hiIndex, loIndex, resBits); + } + FpDestReg.uqw = result; + ''' + + class Msra(MediaOp): + code = ''' + + assert(srcSize == destSize); + int size = srcSize; + int sizeBits = size * 8; + int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); + uint64_t shiftAmt = op2.uqw; + uint64_t result = FpDestReg.uqw; + + for (int i = 0; i < items; i++) { + int hiIndex = (i + 1) * sizeBits - 1; + int loIndex = (i + 0) * sizeBits; + uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); + uint64_t resBits; + if (shiftAmt >= sizeBits) { + if (bits(arg1Bits, sizeBits - 1)) + resBits = mask(sizeBits); + else + resBits = 0; + } else { + resBits = (arg1Bits >> shiftAmt); + resBits = resBits | + (0 - (resBits & (ULL(1) << (sizeBits - 1 - shiftAmt)))); + } + + result = insertBits(result, hiIndex, loIndex, resBits); + } + FpDestReg.uqw = result; + ''' + + class Msll(MediaOp): + code = ''' + + assert(srcSize == destSize); + int size = srcSize; + int sizeBits = size * 8; + int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); + uint64_t shiftAmt = op2.uqw; + uint64_t result = FpDestReg.uqw; + + for (int i = 0; i < items; i++) { + int hiIndex = (i + 1) * sizeBits - 1; + int loIndex = (i + 0) * sizeBits; + uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); + uint64_t resBits; + if (shiftAmt >= sizeBits) { + resBits = 0; + } else { + resBits = (arg1Bits << shiftAmt); + } + + result = insertBits(result, hiIndex, loIndex, resBits); + } + FpDestReg.uqw = result; + ''' + + class Cvtf2i(MediaOp): + def __init__(self, dest, src, \ + size = None, destSize = None, srcSize = None, ext = None): + super(Cvtf2i, self).__init__(dest, src,\ + "InstRegIndex(0)", size, destSize, srcSize, ext) + code = ''' + union floatInt + { + float f; + uint32_t i; + }; + union doubleInt + { + double d; + uint64_t i; + }; + + assert(destSize == 4 || destSize == 8); + assert(srcSize == 4 || srcSize == 8); + int srcSizeBits = srcSize * 8; + int destSizeBits = destSize * 8; + int items; + int srcStart = 0; + int destStart = 0; + if (srcSize == 2 * destSize) { + items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / srcSize; + if (ext & 0x2) + destStart = destSizeBits * items; + } else if (destSize == 2 * srcSize) { + items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize; + if (ext & 0x2) + srcStart = srcSizeBits * items; + } else { + items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize; + } + uint64_t result = FpDestReg.uqw; + + for (int i = 0; i < items; i++) { + int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1; + int srcLoIndex = srcStart + (i + 0) * srcSizeBits; + uint64_t argBits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex); + double arg; + + if (srcSize == 4) { + floatInt fi; + fi.i = argBits; + arg = fi.f; + } else { + doubleInt di; + di.i = argBits; + arg = di.d; + } + + if (ext & 0x4) { + if (arg >= 0) + arg += 0.5; + else + arg -= 0.5; + } + + if (destSize == 4) { + argBits = (uint32_t)arg; + } else { + argBits = (uint64_t)arg; + } + int destHiIndex = destStart + (i + 1) * destSizeBits - 1; + int destLoIndex = destStart + (i + 0) * destSizeBits; + result = insertBits(result, destHiIndex, destLoIndex, argBits); + } + FpDestReg.uqw = result; + ''' + + class Cvti2f(MediaOp): + def __init__(self, dest, src, \ + size = None, destSize = None, srcSize = None, ext = None): + super(Cvti2f, self).__init__(dest, src,\ + "InstRegIndex(0)", size, destSize, srcSize, ext) + code = ''' + union floatInt + { + float f; + uint32_t i; + }; + union doubleInt + { + double d; + uint64_t i; + }; + + assert(destSize == 4 || destSize == 8); + assert(srcSize == 4 || srcSize == 8); + int srcSizeBits = srcSize * 8; + int destSizeBits = destSize * 8; + int items; + int srcStart = 0; + int destStart = 0; + if (srcSize == 2 * destSize) { + items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / srcSize; + if (ext & 0x2) + destStart = destSizeBits * items; + } else if (destSize == 2 * srcSize) { + items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize; + if (ext & 0x2) + srcStart = srcSizeBits * items; + } else { + items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize; + } + uint64_t result = FpDestReg.uqw; + + for (int i = 0; i < items; i++) { + int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1; + int srcLoIndex = srcStart + (i + 0) * srcSizeBits; + uint64_t argBits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex); + + int64_t sArg = argBits | (0 - (argBits & (ULL(1) << srcHiIndex))); + double arg = sArg; + + if (destSize == 4) { + floatInt fi; + fi.f = arg; + argBits = fi.i; + } else { + doubleInt di; + di.d = arg; + argBits = di.i; + } + int destHiIndex = destStart + (i + 1) * destSizeBits - 1; + int destLoIndex = destStart + (i + 0) * destSizeBits; + result = insertBits(result, destHiIndex, destLoIndex, argBits); + } + FpDestReg.uqw = result; + ''' + + class Cvtf2f(MediaOp): + def __init__(self, dest, src, \ + size = None, destSize = None, srcSize = None, ext = None): + super(Cvtf2f, self).__init__(dest, src,\ + "InstRegIndex(0)", size, destSize, srcSize, ext) + code = ''' + union floatInt + { + float f; + uint32_t i; + }; + union doubleInt + { + double d; + uint64_t i; + }; + + assert(destSize == 4 || destSize == 8); + assert(srcSize == 4 || srcSize == 8); + int srcSizeBits = srcSize * 8; + int destSizeBits = destSize * 8; + int items; + int srcStart = 0; + int destStart = 0; + if (srcSize == 2 * destSize) { + items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / srcSize; + if (ext & 0x2) + destStart = destSizeBits * items; + } else if (destSize == 2 * srcSize) { + items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize; + if (ext & 0x2) + srcStart = srcSizeBits * items; + } else { + items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize; + } + uint64_t result = FpDestReg.uqw; + + for (int i = 0; i < items; i++) { + int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1; + int srcLoIndex = srcStart + (i + 0) * srcSizeBits; + uint64_t argBits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex); + double arg; + + if (srcSize == 4) { + floatInt fi; + fi.i = argBits; + arg = fi.f; + } else { + doubleInt di; + di.i = argBits; + arg = di.d; + } + if (destSize == 4) { + floatInt fi; + fi.f = arg; + argBits = fi.i; + } else { + doubleInt di; + di.d = arg; + argBits = di.i; + } + int destHiIndex = destStart + (i + 1) * destSizeBits - 1; + int destLoIndex = destStart + (i + 0) * destSizeBits; + result = insertBits(result, destHiIndex, destLoIndex, argBits); + } + FpDestReg.uqw = result; + ''' + + class Mcmpi2r(MediaOp): + code = ''' + union floatInt + { + float f; + uint32_t i; + }; + union doubleInt + { + double d; + uint64_t i; + }; + + assert(srcSize == destSize); + int size = srcSize; + int sizeBits = size * 8; + int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size); + uint64_t result = FpDestReg.uqw; + + for (int i = 0; i < items; i++) { + int hiIndex = (i + 1) * sizeBits - 1; + int loIndex = (i + 0) * sizeBits; + uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); + int64_t arg1 = arg1Bits | + (0 - (arg1Bits & (ULL(1) << (sizeBits - 1)))); + uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); + int64_t arg2 = arg2Bits | + (0 - (arg2Bits & (ULL(1) << (sizeBits - 1)))); + + uint64_t resBits = 0; + if (((ext & 0x2) == 0 && arg1 == arg2) || + ((ext & 0x2) == 0x2 && arg1 > arg2)) + resBits = mask(sizeBits); + + result = insertBits(result, hiIndex, loIndex, resBits); + } + FpDestReg.uqw = result; + ''' + + class Mcmpf2r(MediaOp): + code = ''' + union floatInt + { + float f; + uint32_t i; + }; + union doubleInt + { + double d; + uint64_t i; + }; + + assert(srcSize == destSize); + int size = srcSize; + int sizeBits = size * 8; + int items = (ext & 0x8) ? 1: (sizeof(FloatRegBits) / size); + uint64_t result = FpDestReg.uqw; + + for (int i = 0; i < items; i++) { + int hiIndex = (i + 1) * sizeBits - 1; + int loIndex = (i + 0) * sizeBits; + uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex); + uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex); + double arg1, arg2; + + if (size == 4) { + floatInt fi; + fi.i = arg1Bits; + arg1 = fi.f; + fi.i = arg2Bits; + arg2 = fi.f; + } else { + doubleInt di; + di.i = arg1Bits; + arg1 = di.d; + di.i = arg2Bits; + arg2 = di.d; + } + + uint64_t resBits = 0; + bool nanop = isnan(arg1) || isnan(arg2); + switch (ext & mask(3)) { + case 0: + if (arg1 == arg2 && !nanop) + resBits = mask(sizeBits); + break; + case 1: + if (arg1 < arg2 && !nanop) + resBits = mask(sizeBits); + break; + case 2: + if (arg1 <= arg2 && !nanop) + resBits = mask(sizeBits); + break; + case 3: + if (nanop) + resBits = mask(sizeBits); + break; + case 4: + if (arg1 != arg2 || nanop) + resBits = mask(sizeBits); + break; + case 5: + if (!(arg1 < arg2) || nanop) + resBits = mask(sizeBits); + break; + case 6: + if (!(arg1 <= arg2) || nanop) + resBits = mask(sizeBits); + break; + case 7: + if (!nanop) + resBits = mask(sizeBits); + break; + }; + + result = insertBits(result, hiIndex, loIndex, resBits); + } + FpDestReg.uqw = result; + ''' + + class Mcmpf2rf(MediaOp): + def __init__(self, src1, src2,\ + size = None, destSize = None, srcSize = None, ext = None): + super(Mcmpf2rf, self).__init__("InstRegIndex(0)", src1,\ + src2, size, destSize, srcSize, ext) + code = ''' + union floatInt + { + float f; + uint32_t i; + }; + union doubleInt + { + double d; + uint64_t i; + }; + + assert(srcSize == destSize); + assert(srcSize == 4 || srcSize == 8); + int size = srcSize; + int sizeBits = size * 8; + + double arg1, arg2; + uint64_t arg1Bits = bits(FpSrcReg1.uqw, sizeBits - 1, 0); + uint64_t arg2Bits = bits(FpSrcReg2.uqw, sizeBits - 1, 0); + if (size == 4) { + floatInt fi; + fi.i = arg1Bits; + arg1 = fi.f; + fi.i = arg2Bits; + arg2 = fi.f; + } else { + doubleInt di; + di.i = arg1Bits; + arg1 = di.d; + di.i = arg2Bits; + arg2 = di.d; + } + + // ZF PF CF + // Unordered 1 1 1 + // Greater than 0 0 0 + // Less than 0 0 1 + // Equal 1 0 0 + // OF = SF = AF = 0 + ccFlagBits = ccFlagBits & ~(OFBit | SFBit | AFBit | + ZFBit | PFBit | CFBit); + if (isnan(arg1) || isnan(arg2)) + ccFlagBits = ccFlagBits | (ZFBit | PFBit | CFBit); + else if(arg1 < arg2) + ccFlagBits = ccFlagBits | CFBit; + else if(arg1 == arg2) + ccFlagBits = ccFlagBits | ZFBit; + ''' }};