typeQual = ""
if match.group("typeQual"):
typeQual = match.group("typeQual")
- src2_name = "%spsrc2%s" % (match.group("prefix"), typeQual)
+ src2_name = "%sFpSrcReg2%s" % (match.group("prefix"), typeQual)
self.buildCppClasses(name, Name, suffix,
matcher.sub(src2_name, code))
self.buildCppClasses(name + "i", Name, suffix + "Imm",
return allocator
class Mov2int(MediaOp):
- def __init__(self, dest, src, \
+ def __init__(self, dest, src1, src2 = 0, \
size = None, destSize = None, srcSize = None, ext = None):
- super(Mov2int, self).__init__(dest, src,\
- "InstRegIndex(0)", size, destSize, srcSize, ext)
+ super(Mov2int, self).__init__(dest, src1,\
+ src2, size, destSize, srcSize, ext)
code = '''
- uint64_t fpSrcReg1 = bits(FpSrcReg1.uqw, srcSize * 8 - 1, 0);
- DestReg = merge(DestReg, fpSrcReg1, destSize);
+ int items = sizeof(FloatRegBits) / srcSize;
+ int offset = imm8;
+ if (bits(src1, 0) && (ext & 0x1))
+ offset -= items;
+ if (offset >= 0 && offset < items) {
+ uint64_t fpSrcReg1 =
+ bits(FpSrcReg1.uqw,
+ (offset + 1) * srcSize * 8 - 1,
+ (offset + 0) * srcSize * 8);
+ DestReg = merge(0, fpSrcReg1, destSize);
+ } else {
+ DestReg = DestReg;
+ }
'''
class Mov2fp(MediaOp):
+ def __init__(self, dest, src1, src2 = 0, \
+ size = None, destSize = None, srcSize = None, ext = None):
+ super(Mov2fp, self).__init__(dest, src1,\
+ src2, size, destSize, srcSize, ext)
+ code = '''
+ int items = sizeof(FloatRegBits) / destSize;
+ int offset = imm8;
+ if (bits(dest, 0) && (ext & 0x1))
+ offset -= items;
+ if (offset >= 0 && offset < items) {
+ uint64_t srcReg1 = pick(SrcReg1, 0, srcSize);
+ FpDestReg.uqw =
+ insertBits(FpDestReg.uqw,
+ (offset + 1) * destSize * 8 - 1,
+ (offset + 0) * destSize * 8, srcReg1);
+ } else {
+ FpDestReg.uqw = FpDestReg.uqw;
+ }
+ '''
+
+ class Movsign(MediaOp):
def __init__(self, dest, src, \
size = None, destSize = None, srcSize = None, ext = None):
- super(Mov2fp, self).__init__(dest, src,\
+ super(Movsign, self).__init__(dest, src,\
"InstRegIndex(0)", size, destSize, srcSize, ext)
code = '''
- uint64_t srcReg1 = pick(SrcReg1, 0, srcSize);
- FpDestReg.uqw =
- insertBits(FpDestReg.uqw, destSize * 8 - 1, 0, srcReg1);
+ int items = sizeof(FloatRegBits) / srcSize;
+ uint64_t result = 0;
+ int offset = (ext & 0x1) ? items : 0;
+ for (int i = 0; i < items; i++) {
+ uint64_t picked =
+ bits(FpSrcReg1.uqw, (i + 1) * 8 * srcSize - 1);
+ result = insertBits(result, i + offset, i + offset, picked);
+ }
+ DestReg = DestReg | result;
+ '''
+
+ class Maskmov(MediaOp):
+ code = '''
+ assert(srcSize == destSize);
+ int size = srcSize;
+ int sizeBits = size * 8;
+ int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
+ uint64_t result = FpDestReg.uqw;
+
+ for (int i = 0; i < items; i++) {
+ int hiIndex = (i + 1) * sizeBits - 1;
+ int loIndex = (i + 0) * sizeBits;
+ uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
+ if (bits(FpSrcReg2.uqw, hiIndex))
+ result = insertBits(result, hiIndex, loIndex, arg1Bits);
+ }
+ FpDestReg.uqw = result;
+ '''
+
+ class shuffle(MediaOp):
+ code = '''
+ assert(srcSize == destSize);
+ int size = srcSize;
+ int sizeBits = size * 8;
+ int items = sizeof(FloatRegBits) / size;
+ int options;
+ int optionBits;
+ if (size == 8) {
+ options = 2;
+ optionBits = 1;
+ } else {
+ options = 4;
+ optionBits = 2;
+ }
+
+ uint64_t result = 0;
+ uint8_t sel = ext;
+
+ for (int i = 0; i < items; i++) {
+ uint64_t resBits;
+ uint8_t lsel = sel & mask(optionBits);
+ if (lsel * size >= sizeof(FloatRegBits)) {
+ lsel -= options / 2;
+ resBits = bits(FpSrcReg2.uqw,
+ (lsel + 1) * sizeBits - 1,
+ (lsel + 0) * sizeBits);
+ } else {
+ resBits = bits(FpSrcReg1.uqw,
+ (lsel + 1) * sizeBits - 1,
+ (lsel + 0) * sizeBits);
+ }
+
+ sel >>= optionBits;
+
+ int hiIndex = (i + 1) * sizeBits - 1;
+ int loIndex = (i + 0) * sizeBits;
+ result = insertBits(result, hiIndex, loIndex, resBits);
+ }
+ FpDestReg.uqw = result;
'''
class Unpack(MediaOp):
FpDestReg.uqw = result;
'''
+ class Pack(MediaOp):
+ code = '''
+ assert(srcSize == destSize * 2);
+ int items = (sizeof(FloatRegBits) / destSize);
+ int destBits = destSize * 8;
+ int srcBits = srcSize * 8;
+ uint64_t result = 0;
+ int i;
+ for (i = 0; i < items / 2; i++) {
+ uint64_t picked =
+ bits(FpSrcReg1.uqw, (i + 1) * srcBits - 1,
+ (i + 0) * srcBits);
+ unsigned signBit = bits(picked, srcBits - 1);
+ uint64_t overflow = bits(picked, srcBits - 1, destBits - 1);
+
+ // Handle saturation.
+ if (signBit) {
+ if (overflow != mask(destBits - srcBits + 1)) {
+ if (ext & 0x1)
+ picked = (ULL(1) << (destBits - 1));
+ else
+ picked = 0;
+ }
+ } else {
+ if (overflow != 0) {
+ if (ext & 0x1)
+ picked = mask(destBits - 1);
+ else
+ picked = mask(destBits);
+ }
+ }
+ result = insertBits(result,
+ (i + 1) * destBits - 1,
+ (i + 0) * destBits,
+ picked);
+ }
+ for (;i < items; i++) {
+ uint64_t picked =
+ bits(FpSrcReg2.uqw, (i - items + 1) * srcBits - 1,
+ (i - items + 0) * srcBits);
+ unsigned signBit = bits(picked, srcBits - 1);
+ uint64_t overflow = bits(picked, srcBits - 1, destBits - 1);
+
+ // Handle saturation.
+ if (signBit) {
+ if (overflow != mask(destBits - srcBits + 1)) {
+ if (ext & 0x1)
+ picked = (ULL(1) << (destBits - 1));
+ else
+ picked = 0;
+ }
+ } else {
+ if (overflow != 0) {
+ if (ext & 0x1)
+ picked = mask(destBits - 1);
+ else
+ picked = mask(destBits);
+ }
+ }
+ result = insertBits(result,
+ (i + 1) * destBits - 1,
+ (i + 0) * destBits,
+ picked);
+ }
+ FpDestReg.uqw = result;
+ '''
+
class Mxor(MediaOp):
def __init__(self, dest, src1, src2):
super(Mxor, self).__init__(dest, src1, src2, 1)
code = '''
FpDestReg.uqw = ~FpSrcReg1.uqw & FpSrcReg2.uqw;
'''
+
+ class Mminf(MediaOp):
+ code = '''
+ union floatInt
+ {
+ float f;
+ uint32_t i;
+ };
+ union doubleInt
+ {
+ double d;
+ uint64_t i;
+ };
+
+ assert(srcSize == destSize);
+ int size = srcSize;
+ int sizeBits = size * 8;
+ assert(srcSize == 4 || srcSize == 8);
+ int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
+ uint64_t result = FpDestReg.uqw;
+
+ for (int i = 0; i < items; i++) {
+ double arg1, arg2;
+ int hiIndex = (i + 1) * sizeBits - 1;
+ int loIndex = (i + 0) * sizeBits;
+ uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
+ uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
+
+ if (size == 4) {
+ floatInt fi;
+ fi.i = arg1Bits;
+ arg1 = fi.f;
+ fi.i = arg2Bits;
+ arg2 = fi.f;
+ } else {
+ doubleInt di;
+ di.i = arg1Bits;
+ arg1 = di.d;
+ di.i = arg2Bits;
+ arg2 = di.d;
+ }
+
+ if (arg1 < arg2) {
+ result = insertBits(result, hiIndex, loIndex, arg1Bits);
+ } else {
+ result = insertBits(result, hiIndex, loIndex, arg2Bits);
+ }
+ }
+ FpDestReg.uqw = result;
+ '''
+
+ class Mmaxf(MediaOp):
+ code = '''
+ union floatInt
+ {
+ float f;
+ uint32_t i;
+ };
+ union doubleInt
+ {
+ double d;
+ uint64_t i;
+ };
+
+ assert(srcSize == destSize);
+ int size = srcSize;
+ int sizeBits = size * 8;
+ assert(srcSize == 4 || srcSize == 8);
+ int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
+ uint64_t result = FpDestReg.uqw;
+
+ for (int i = 0; i < items; i++) {
+ double arg1, arg2;
+ int hiIndex = (i + 1) * sizeBits - 1;
+ int loIndex = (i + 0) * sizeBits;
+ uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
+ uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
+
+ if (size == 4) {
+ floatInt fi;
+ fi.i = arg1Bits;
+ arg1 = fi.f;
+ fi.i = arg2Bits;
+ arg2 = fi.f;
+ } else {
+ doubleInt di;
+ di.i = arg1Bits;
+ arg1 = di.d;
+ di.i = arg2Bits;
+ arg2 = di.d;
+ }
+
+ if (arg1 > arg2) {
+ result = insertBits(result, hiIndex, loIndex, arg1Bits);
+ } else {
+ result = insertBits(result, hiIndex, loIndex, arg2Bits);
+ }
+ }
+ FpDestReg.uqw = result;
+ '''
+
+ class Mmini(MediaOp):
+ code = '''
+
+ assert(srcSize == destSize);
+ int size = srcSize;
+ int sizeBits = size * 8;
+ int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
+ uint64_t result = FpDestReg.uqw;
+
+ for (int i = 0; i < items; i++) {
+ int hiIndex = (i + 1) * sizeBits - 1;
+ int loIndex = (i + 0) * sizeBits;
+ uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
+ int64_t arg1 = arg1Bits |
+ (0 - (arg1Bits & (ULL(1) << (sizeBits - 1))));
+ uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
+ int64_t arg2 = arg2Bits |
+ (0 - (arg2Bits & (ULL(1) << (sizeBits - 1))));
+ uint64_t resBits;
+
+ if (ext & 0x2) {
+ if (arg1 < arg2) {
+ resBits = arg1Bits;
+ } else {
+ resBits = arg2Bits;
+ }
+ } else {
+ if (arg1Bits < arg2Bits) {
+ resBits = arg1Bits;
+ } else {
+ resBits = arg2Bits;
+ }
+ }
+ result = insertBits(result, hiIndex, loIndex, resBits);
+ }
+ FpDestReg.uqw = result;
+ '''
+
+ class Mmaxi(MediaOp):
+ code = '''
+
+ assert(srcSize == destSize);
+ int size = srcSize;
+ int sizeBits = size * 8;
+ int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
+ uint64_t result = FpDestReg.uqw;
+
+ for (int i = 0; i < items; i++) {
+ int hiIndex = (i + 1) * sizeBits - 1;
+ int loIndex = (i + 0) * sizeBits;
+ uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
+ int64_t arg1 = arg1Bits |
+ (0 - (arg1Bits & (ULL(1) << (sizeBits - 1))));
+ uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
+ int64_t arg2 = arg2Bits |
+ (0 - (arg2Bits & (ULL(1) << (sizeBits - 1))));
+ uint64_t resBits;
+
+ if (ext & 0x2) {
+ if (arg1 > arg2) {
+ resBits = arg1Bits;
+ } else {
+ resBits = arg2Bits;
+ }
+ } else {
+ if (arg1Bits > arg2Bits) {
+ resBits = arg1Bits;
+ } else {
+ resBits = arg2Bits;
+ }
+ }
+ result = insertBits(result, hiIndex, loIndex, resBits);
+ }
+ FpDestReg.uqw = result;
+ '''
+
+ class Msqrt(MediaOp):
+ def __init__(self, dest, src, \
+ size = None, destSize = None, srcSize = None, ext = None):
+ super(Msqrt, self).__init__(dest, src,\
+ "InstRegIndex(0)", size, destSize, srcSize, ext)
+ code = '''
+ union floatInt
+ {
+ float f;
+ uint32_t i;
+ };
+ union doubleInt
+ {
+ double d;
+ uint64_t i;
+ };
+
+ assert(srcSize == destSize);
+ int size = srcSize;
+ int sizeBits = size * 8;
+ assert(srcSize == 4 || srcSize == 8);
+ int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
+ uint64_t result = FpDestReg.uqw;
+
+ for (int i = 0; i < items; i++) {
+ int hiIndex = (i + 1) * sizeBits - 1;
+ int loIndex = (i + 0) * sizeBits;
+ uint64_t argBits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
+
+ if (size == 4) {
+ floatInt fi;
+ fi.i = argBits;
+ fi.f = sqrt(fi.f);
+ argBits = fi.i;
+ } else {
+ doubleInt di;
+ di.i = argBits;
+ di.d = sqrt(di.d);
+ argBits = di.i;
+ }
+ result = insertBits(result, hiIndex, loIndex, argBits);
+ }
+ FpDestReg.uqw = result;
+ '''
+
+ class Maddf(MediaOp):
+ code = '''
+ union floatInt
+ {
+ float f;
+ uint32_t i;
+ };
+ union doubleInt
+ {
+ double d;
+ uint64_t i;
+ };
+
+ assert(srcSize == destSize);
+ int size = srcSize;
+ int sizeBits = size * 8;
+ assert(srcSize == 4 || srcSize == 8);
+ int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
+ uint64_t result = FpDestReg.uqw;
+
+ for (int i = 0; i < items; i++) {
+ int hiIndex = (i + 1) * sizeBits - 1;
+ int loIndex = (i + 0) * sizeBits;
+ uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
+ uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
+ uint64_t resBits;
+
+ if (size == 4) {
+ floatInt arg1, arg2, res;
+ arg1.i = arg1Bits;
+ arg2.i = arg2Bits;
+ res.f = arg1.f + arg2.f;
+ resBits = res.i;
+ } else {
+ doubleInt arg1, arg2, res;
+ arg1.i = arg1Bits;
+ arg2.i = arg2Bits;
+ res.d = arg1.d + arg2.d;
+ resBits = res.i;
+ }
+
+ result = insertBits(result, hiIndex, loIndex, resBits);
+ }
+ FpDestReg.uqw = result;
+ '''
+
+ class Msubf(MediaOp):
+ code = '''
+ union floatInt
+ {
+ float f;
+ uint32_t i;
+ };
+ union doubleInt
+ {
+ double d;
+ uint64_t i;
+ };
+
+ assert(srcSize == destSize);
+ int size = srcSize;
+ int sizeBits = size * 8;
+ assert(srcSize == 4 || srcSize == 8);
+ int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
+ uint64_t result = FpDestReg.uqw;
+
+ for (int i = 0; i < items; i++) {
+ int hiIndex = (i + 1) * sizeBits - 1;
+ int loIndex = (i + 0) * sizeBits;
+ uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
+ uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
+ uint64_t resBits;
+
+ if (size == 4) {
+ floatInt arg1, arg2, res;
+ arg1.i = arg1Bits;
+ arg2.i = arg2Bits;
+ res.f = arg1.f - arg2.f;
+ resBits = res.i;
+ } else {
+ doubleInt arg1, arg2, res;
+ arg1.i = arg1Bits;
+ arg2.i = arg2Bits;
+ res.d = arg1.d - arg2.d;
+ resBits = res.i;
+ }
+
+ result = insertBits(result, hiIndex, loIndex, resBits);
+ }
+ FpDestReg.uqw = result;
+ '''
+
+ class Mmulf(MediaOp):
+ code = '''
+ union floatInt
+ {
+ float f;
+ uint32_t i;
+ };
+ union doubleInt
+ {
+ double d;
+ uint64_t i;
+ };
+
+ assert(srcSize == destSize);
+ int size = srcSize;
+ int sizeBits = size * 8;
+ assert(srcSize == 4 || srcSize == 8);
+ int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
+ uint64_t result = FpDestReg.uqw;
+
+ for (int i = 0; i < items; i++) {
+ int hiIndex = (i + 1) * sizeBits - 1;
+ int loIndex = (i + 0) * sizeBits;
+ uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
+ uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
+ uint64_t resBits;
+
+ if (size == 4) {
+ floatInt arg1, arg2, res;
+ arg1.i = arg1Bits;
+ arg2.i = arg2Bits;
+ res.f = arg1.f * arg2.f;
+ resBits = res.i;
+ } else {
+ doubleInt arg1, arg2, res;
+ arg1.i = arg1Bits;
+ arg2.i = arg2Bits;
+ res.d = arg1.d * arg2.d;
+ resBits = res.i;
+ }
+
+ result = insertBits(result, hiIndex, loIndex, resBits);
+ }
+ FpDestReg.uqw = result;
+ '''
+
+ class Mdivf(MediaOp):
+ code = '''
+ union floatInt
+ {
+ float f;
+ uint32_t i;
+ };
+ union doubleInt
+ {
+ double d;
+ uint64_t i;
+ };
+
+ assert(srcSize == destSize);
+ int size = srcSize;
+ int sizeBits = size * 8;
+ assert(srcSize == 4 || srcSize == 8);
+ int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
+ uint64_t result = FpDestReg.uqw;
+
+ for (int i = 0; i < items; i++) {
+ int hiIndex = (i + 1) * sizeBits - 1;
+ int loIndex = (i + 0) * sizeBits;
+ uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
+ uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
+ uint64_t resBits;
+
+ if (size == 4) {
+ floatInt arg1, arg2, res;
+ arg1.i = arg1Bits;
+ arg2.i = arg2Bits;
+ res.f = arg1.f / arg2.f;
+ resBits = res.i;
+ } else {
+ doubleInt arg1, arg2, res;
+ arg1.i = arg1Bits;
+ arg2.i = arg2Bits;
+ res.d = arg1.d / arg2.d;
+ resBits = res.i;
+ }
+
+ result = insertBits(result, hiIndex, loIndex, resBits);
+ }
+ FpDestReg.uqw = result;
+ '''
+
+ class Maddi(MediaOp):
+ code = '''
+ assert(srcSize == destSize);
+ int size = srcSize;
+ int sizeBits = size * 8;
+ int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
+ uint64_t result = FpDestReg.uqw;
+
+ for (int i = 0; i < items; i++) {
+ int hiIndex = (i + 1) * sizeBits - 1;
+ int loIndex = (i + 0) * sizeBits;
+ uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
+ uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
+ uint64_t resBits = arg1Bits + arg2Bits;
+
+ if (ext & 0x2) {
+ if (findCarry(sizeBits, resBits, arg1Bits, arg2Bits))
+ resBits = mask(sizeBits);
+ } else if (ext & 0x4) {
+ int arg1Sign = bits(arg1Bits, sizeBits - 1);
+ int arg2Sign = bits(arg2Bits, sizeBits - 1);
+ int resSign = bits(resBits, sizeBits - 1);
+ if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) {
+ if (resSign == 0)
+ resBits = (ULL(1) << (sizeBits - 1));
+ else
+ resBits = mask(sizeBits - 1);
+ }
+ }
+
+ result = insertBits(result, hiIndex, loIndex, resBits);
+ }
+ FpDestReg.uqw = result;
+ '''
+
+ class Msubi(MediaOp):
+ code = '''
+ assert(srcSize == destSize);
+ int size = srcSize;
+ int sizeBits = size * 8;
+ int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
+ uint64_t result = FpDestReg.uqw;
+
+ for (int i = 0; i < items; i++) {
+ int hiIndex = (i + 1) * sizeBits - 1;
+ int loIndex = (i + 0) * sizeBits;
+ uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
+ uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
+ uint64_t resBits = arg1Bits - arg2Bits;
+
+ if (ext & 0x2) {
+ if (arg2Bits > arg1Bits) {
+ resBits = 0;
+ } else if (!findCarry(sizeBits, resBits,
+ arg1Bits, ~arg2Bits)) {
+ resBits = mask(sizeBits);
+ }
+ } else if (ext & 0x4) {
+ int arg1Sign = bits(arg1Bits, sizeBits - 1);
+ int arg2Sign = !bits(arg2Bits, sizeBits - 1);
+ int resSign = bits(resBits, sizeBits - 1);
+ if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) {
+ if (resSign == 0)
+ resBits = (ULL(1) << (sizeBits - 1));
+ else
+ resBits = mask(sizeBits - 1);
+ }
+ }
+
+ result = insertBits(result, hiIndex, loIndex, resBits);
+ }
+ FpDestReg.uqw = result;
+ '''
+
+ class Mmuli(MediaOp):
+ code = '''
+ int srcBits = srcSize * 8;
+ int destBits = destSize * 8;
+ assert(destBits <= 64);
+ assert(destSize >= srcSize);
+ int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / destSize);
+ uint64_t result = FpDestReg.uqw;
+
+ for (int i = 0; i < items; i++) {
+ int offset = 0;
+ if (ext & 16) {
+ if (ext & 32)
+ offset = i * (destBits - srcBits);
+ else
+ offset = i * (destBits - srcBits) + srcBits;
+ }
+ int srcHiIndex = (i + 1) * srcBits - 1 + offset;
+ int srcLoIndex = (i + 0) * srcBits + offset;
+ uint64_t arg1Bits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex);
+ uint64_t arg2Bits = bits(FpSrcReg2.uqw, srcHiIndex, srcLoIndex);
+ uint64_t resBits;
+
+ if (ext & 0x2) {
+ int64_t arg1 = arg1Bits |
+ (0 - (arg1Bits & (ULL(1) << (srcBits - 1))));
+ int64_t arg2 = arg2Bits |
+ (0 - (arg2Bits & (ULL(1) << (srcBits - 1))));
+ resBits = (uint64_t)(arg1 * arg2);
+ } else {
+ resBits = arg1Bits * arg2Bits;
+ }
+
+ if (ext & 0x4)
+ resBits += (ULL(1) << (destBits - 1));
+
+ if (ext & 0x8)
+ resBits >>= destBits;
+
+ int destHiIndex = (i + 1) * destBits - 1;
+ int destLoIndex = (i + 0) * destBits;
+ result = insertBits(result, destHiIndex, destLoIndex, resBits);
+ }
+ FpDestReg.uqw = result;
+ '''
+
+ class Mavg(MediaOp):
+ code = '''
+ assert(srcSize == destSize);
+ int size = srcSize;
+ int sizeBits = size * 8;
+ int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
+ uint64_t result = FpDestReg.uqw;
+
+ for (int i = 0; i < items; i++) {
+ int hiIndex = (i + 1) * sizeBits - 1;
+ int loIndex = (i + 0) * sizeBits;
+ uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
+ uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
+ uint64_t resBits = (arg1Bits + arg2Bits + 1) / 2;
+
+ result = insertBits(result, hiIndex, loIndex, resBits);
+ }
+ FpDestReg.uqw = result;
+ '''
+
+ class Msad(MediaOp):
+ code = '''
+ int srcBits = srcSize * 8;
+ int items = sizeof(FloatRegBits) / srcSize;
+
+ uint64_t sum = 0;
+ for (int i = 0; i < items; i++) {
+ int hiIndex = (i + 1) * srcBits - 1;
+ int loIndex = (i + 0) * srcBits;
+ uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
+ uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
+ int64_t resBits = arg1Bits - arg2Bits;
+ if (resBits < 0)
+ resBits = -resBits;
+ sum += resBits;
+ }
+ FpDestReg.uqw = sum & mask(destSize * 8);
+ '''
+
+ class Msrl(MediaOp):
+ code = '''
+
+ assert(srcSize == destSize);
+ int size = srcSize;
+ int sizeBits = size * 8;
+ int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
+ uint64_t shiftAmt = op2.uqw;
+ uint64_t result = FpDestReg.uqw;
+
+ for (int i = 0; i < items; i++) {
+ int hiIndex = (i + 1) * sizeBits - 1;
+ int loIndex = (i + 0) * sizeBits;
+ uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
+ uint64_t resBits;
+ if (shiftAmt >= sizeBits) {
+ resBits = 0;
+ } else {
+ resBits = (arg1Bits >> shiftAmt) &
+ mask(sizeBits - shiftAmt);
+ }
+
+ result = insertBits(result, hiIndex, loIndex, resBits);
+ }
+ FpDestReg.uqw = result;
+ '''
+
+ class Msra(MediaOp):
+ code = '''
+
+ assert(srcSize == destSize);
+ int size = srcSize;
+ int sizeBits = size * 8;
+ int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
+ uint64_t shiftAmt = op2.uqw;
+ uint64_t result = FpDestReg.uqw;
+
+ for (int i = 0; i < items; i++) {
+ int hiIndex = (i + 1) * sizeBits - 1;
+ int loIndex = (i + 0) * sizeBits;
+ uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
+ uint64_t resBits;
+ if (shiftAmt >= sizeBits) {
+ if (bits(arg1Bits, sizeBits - 1))
+ resBits = mask(sizeBits);
+ else
+ resBits = 0;
+ } else {
+ resBits = (arg1Bits >> shiftAmt);
+ resBits = resBits |
+ (0 - (resBits & (ULL(1) << (sizeBits - 1 - shiftAmt))));
+ }
+
+ result = insertBits(result, hiIndex, loIndex, resBits);
+ }
+ FpDestReg.uqw = result;
+ '''
+
+ class Msll(MediaOp):
+ code = '''
+
+ assert(srcSize == destSize);
+ int size = srcSize;
+ int sizeBits = size * 8;
+ int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
+ uint64_t shiftAmt = op2.uqw;
+ uint64_t result = FpDestReg.uqw;
+
+ for (int i = 0; i < items; i++) {
+ int hiIndex = (i + 1) * sizeBits - 1;
+ int loIndex = (i + 0) * sizeBits;
+ uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
+ uint64_t resBits;
+ if (shiftAmt >= sizeBits) {
+ resBits = 0;
+ } else {
+ resBits = (arg1Bits << shiftAmt);
+ }
+
+ result = insertBits(result, hiIndex, loIndex, resBits);
+ }
+ FpDestReg.uqw = result;
+ '''
+
+ class Cvtf2i(MediaOp):
+ def __init__(self, dest, src, \
+ size = None, destSize = None, srcSize = None, ext = None):
+ super(Cvtf2i, self).__init__(dest, src,\
+ "InstRegIndex(0)", size, destSize, srcSize, ext)
+ code = '''
+ union floatInt
+ {
+ float f;
+ uint32_t i;
+ };
+ union doubleInt
+ {
+ double d;
+ uint64_t i;
+ };
+
+ assert(destSize == 4 || destSize == 8);
+ assert(srcSize == 4 || srcSize == 8);
+ int srcSizeBits = srcSize * 8;
+ int destSizeBits = destSize * 8;
+ int items;
+ int srcStart = 0;
+ int destStart = 0;
+ if (srcSize == 2 * destSize) {
+ items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / srcSize;
+ if (ext & 0x2)
+ destStart = destSizeBits * items;
+ } else if (destSize == 2 * srcSize) {
+ items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize;
+ if (ext & 0x2)
+ srcStart = srcSizeBits * items;
+ } else {
+ items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize;
+ }
+ uint64_t result = FpDestReg.uqw;
+
+ for (int i = 0; i < items; i++) {
+ int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1;
+ int srcLoIndex = srcStart + (i + 0) * srcSizeBits;
+ uint64_t argBits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex);
+ double arg;
+
+ if (srcSize == 4) {
+ floatInt fi;
+ fi.i = argBits;
+ arg = fi.f;
+ } else {
+ doubleInt di;
+ di.i = argBits;
+ arg = di.d;
+ }
+
+ if (ext & 0x4) {
+ if (arg >= 0)
+ arg += 0.5;
+ else
+ arg -= 0.5;
+ }
+
+ if (destSize == 4) {
+ argBits = (uint32_t)arg;
+ } else {
+ argBits = (uint64_t)arg;
+ }
+ int destHiIndex = destStart + (i + 1) * destSizeBits - 1;
+ int destLoIndex = destStart + (i + 0) * destSizeBits;
+ result = insertBits(result, destHiIndex, destLoIndex, argBits);
+ }
+ FpDestReg.uqw = result;
+ '''
+
+ class Cvti2f(MediaOp):
+ def __init__(self, dest, src, \
+ size = None, destSize = None, srcSize = None, ext = None):
+ super(Cvti2f, self).__init__(dest, src,\
+ "InstRegIndex(0)", size, destSize, srcSize, ext)
+ code = '''
+ union floatInt
+ {
+ float f;
+ uint32_t i;
+ };
+ union doubleInt
+ {
+ double d;
+ uint64_t i;
+ };
+
+ assert(destSize == 4 || destSize == 8);
+ assert(srcSize == 4 || srcSize == 8);
+ int srcSizeBits = srcSize * 8;
+ int destSizeBits = destSize * 8;
+ int items;
+ int srcStart = 0;
+ int destStart = 0;
+ if (srcSize == 2 * destSize) {
+ items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / srcSize;
+ if (ext & 0x2)
+ destStart = destSizeBits * items;
+ } else if (destSize == 2 * srcSize) {
+ items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize;
+ if (ext & 0x2)
+ srcStart = srcSizeBits * items;
+ } else {
+ items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize;
+ }
+ uint64_t result = FpDestReg.uqw;
+
+ for (int i = 0; i < items; i++) {
+ int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1;
+ int srcLoIndex = srcStart + (i + 0) * srcSizeBits;
+ uint64_t argBits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex);
+
+ int64_t sArg = argBits | (0 - (argBits & (ULL(1) << srcHiIndex)));
+ double arg = sArg;
+
+ if (destSize == 4) {
+ floatInt fi;
+ fi.f = arg;
+ argBits = fi.i;
+ } else {
+ doubleInt di;
+ di.d = arg;
+ argBits = di.i;
+ }
+ int destHiIndex = destStart + (i + 1) * destSizeBits - 1;
+ int destLoIndex = destStart + (i + 0) * destSizeBits;
+ result = insertBits(result, destHiIndex, destLoIndex, argBits);
+ }
+ FpDestReg.uqw = result;
+ '''
+
+ class Cvtf2f(MediaOp):
+ def __init__(self, dest, src, \
+ size = None, destSize = None, srcSize = None, ext = None):
+ super(Cvtf2f, self).__init__(dest, src,\
+ "InstRegIndex(0)", size, destSize, srcSize, ext)
+ code = '''
+ union floatInt
+ {
+ float f;
+ uint32_t i;
+ };
+ union doubleInt
+ {
+ double d;
+ uint64_t i;
+ };
+
+ assert(destSize == 4 || destSize == 8);
+ assert(srcSize == 4 || srcSize == 8);
+ int srcSizeBits = srcSize * 8;
+ int destSizeBits = destSize * 8;
+ int items;
+ int srcStart = 0;
+ int destStart = 0;
+ if (srcSize == 2 * destSize) {
+ items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / srcSize;
+ if (ext & 0x2)
+ destStart = destSizeBits * items;
+ } else if (destSize == 2 * srcSize) {
+ items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize;
+ if (ext & 0x2)
+ srcStart = srcSizeBits * items;
+ } else {
+ items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize;
+ }
+ uint64_t result = FpDestReg.uqw;
+
+ for (int i = 0; i < items; i++) {
+ int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1;
+ int srcLoIndex = srcStart + (i + 0) * srcSizeBits;
+ uint64_t argBits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex);
+ double arg;
+
+ if (srcSize == 4) {
+ floatInt fi;
+ fi.i = argBits;
+ arg = fi.f;
+ } else {
+ doubleInt di;
+ di.i = argBits;
+ arg = di.d;
+ }
+ if (destSize == 4) {
+ floatInt fi;
+ fi.f = arg;
+ argBits = fi.i;
+ } else {
+ doubleInt di;
+ di.d = arg;
+ argBits = di.i;
+ }
+ int destHiIndex = destStart + (i + 1) * destSizeBits - 1;
+ int destLoIndex = destStart + (i + 0) * destSizeBits;
+ result = insertBits(result, destHiIndex, destLoIndex, argBits);
+ }
+ FpDestReg.uqw = result;
+ '''
+
+ class Mcmpi2r(MediaOp):
+ code = '''
+ union floatInt
+ {
+ float f;
+ uint32_t i;
+ };
+ union doubleInt
+ {
+ double d;
+ uint64_t i;
+ };
+
+ assert(srcSize == destSize);
+ int size = srcSize;
+ int sizeBits = size * 8;
+ int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
+ uint64_t result = FpDestReg.uqw;
+
+ for (int i = 0; i < items; i++) {
+ int hiIndex = (i + 1) * sizeBits - 1;
+ int loIndex = (i + 0) * sizeBits;
+ uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
+ int64_t arg1 = arg1Bits |
+ (0 - (arg1Bits & (ULL(1) << (sizeBits - 1))));
+ uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
+ int64_t arg2 = arg2Bits |
+ (0 - (arg2Bits & (ULL(1) << (sizeBits - 1))));
+
+ uint64_t resBits = 0;
+ if (((ext & 0x2) == 0 && arg1 == arg2) ||
+ ((ext & 0x2) == 0x2 && arg1 > arg2))
+ resBits = mask(sizeBits);
+
+ result = insertBits(result, hiIndex, loIndex, resBits);
+ }
+ FpDestReg.uqw = result;
+ '''
+
+ class Mcmpf2r(MediaOp):
+ code = '''
+ union floatInt
+ {
+ float f;
+ uint32_t i;
+ };
+ union doubleInt
+ {
+ double d;
+ uint64_t i;
+ };
+
+ assert(srcSize == destSize);
+ int size = srcSize;
+ int sizeBits = size * 8;
+ int items = (ext & 0x8) ? 1: (sizeof(FloatRegBits) / size);
+ uint64_t result = FpDestReg.uqw;
+
+ for (int i = 0; i < items; i++) {
+ int hiIndex = (i + 1) * sizeBits - 1;
+ int loIndex = (i + 0) * sizeBits;
+ uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
+ uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
+ double arg1, arg2;
+
+ if (size == 4) {
+ floatInt fi;
+ fi.i = arg1Bits;
+ arg1 = fi.f;
+ fi.i = arg2Bits;
+ arg2 = fi.f;
+ } else {
+ doubleInt di;
+ di.i = arg1Bits;
+ arg1 = di.d;
+ di.i = arg2Bits;
+ arg2 = di.d;
+ }
+
+ uint64_t resBits = 0;
+ bool nanop = isnan(arg1) || isnan(arg2);
+ switch (ext & mask(3)) {
+ case 0:
+ if (arg1 == arg2 && !nanop)
+ resBits = mask(sizeBits);
+ break;
+ case 1:
+ if (arg1 < arg2 && !nanop)
+ resBits = mask(sizeBits);
+ break;
+ case 2:
+ if (arg1 <= arg2 && !nanop)
+ resBits = mask(sizeBits);
+ break;
+ case 3:
+ if (nanop)
+ resBits = mask(sizeBits);
+ break;
+ case 4:
+ if (arg1 != arg2 || nanop)
+ resBits = mask(sizeBits);
+ break;
+ case 5:
+ if (!(arg1 < arg2) || nanop)
+ resBits = mask(sizeBits);
+ break;
+ case 6:
+ if (!(arg1 <= arg2) || nanop)
+ resBits = mask(sizeBits);
+ break;
+ case 7:
+ if (!nanop)
+ resBits = mask(sizeBits);
+ break;
+ };
+
+ result = insertBits(result, hiIndex, loIndex, resBits);
+ }
+ FpDestReg.uqw = result;
+ '''
+
+ class Mcmpf2rf(MediaOp):
+ def __init__(self, src1, src2,\
+ size = None, destSize = None, srcSize = None, ext = None):
+ super(Mcmpf2rf, self).__init__("InstRegIndex(0)", src1,\
+ src2, size, destSize, srcSize, ext)
+ code = '''
+ union floatInt
+ {
+ float f;
+ uint32_t i;
+ };
+ union doubleInt
+ {
+ double d;
+ uint64_t i;
+ };
+
+ assert(srcSize == destSize);
+ assert(srcSize == 4 || srcSize == 8);
+ int size = srcSize;
+ int sizeBits = size * 8;
+
+ double arg1, arg2;
+ uint64_t arg1Bits = bits(FpSrcReg1.uqw, sizeBits - 1, 0);
+ uint64_t arg2Bits = bits(FpSrcReg2.uqw, sizeBits - 1, 0);
+ if (size == 4) {
+ floatInt fi;
+ fi.i = arg1Bits;
+ arg1 = fi.f;
+ fi.i = arg2Bits;
+ arg2 = fi.f;
+ } else {
+ doubleInt di;
+ di.i = arg1Bits;
+ arg1 = di.d;
+ di.i = arg2Bits;
+ arg2 = di.d;
+ }
+
+ // ZF PF CF
+ // Unordered 1 1 1
+ // Greater than 0 0 0
+ // Less than 0 0 1
+ // Equal 1 0 0
+ // OF = SF = AF = 0
+ ccFlagBits = ccFlagBits & ~(OFBit | SFBit | AFBit |
+ ZFBit | PFBit | CFBit);
+ if (isnan(arg1) || isnan(arg2))
+ ccFlagBits = ccFlagBits | (ZFBit | PFBit | CFBit);
+ else if(arg1 < arg2)
+ ccFlagBits = ccFlagBits | CFBit;
+ else if(arg1 == arg2)
+ ccFlagBits = ccFlagBits | ZFBit;
+ '''
}};