# Generates definitions for SVE floating-point conversions (always
# unary, constructive, merging
def sveCvtInst(name, Name, opClass, types, op, direction=CvtDir.Narrow,
- decoder='Generic'):
+ decoder='Generic', signed=False):
global header_output, exec_output, decoders
+
+ if signed:
+ mask = "SElement msk = mask(sizeof(DElement)*8);"
+ assign_code = '''
+ int sign_bit = bits(destElem, sizeof(DElement)*8 -1);
+ AA64FpDest_x%(bigElemSuffix)s[i] =
+ sign_bit? (destElem|~msk): destElem;
+ ''' % {
+ 'bigElemSuffix': 's' if direction == CvtDir.Narrow else 'd'
+ }
+ else:
+ mask = "";
+ assign_code = '''
+ AA64FpDest_x%(bigElemSuffix)s[i] = destElem;
+ ''' % {
+ 'bigElemSuffix': 's' if direction == CvtDir.Narrow else 'd'
+ }
+
code = sveEnabledCheckCode + '''
unsigned eCount = ArmStaticInst::getCurSveVecLen<%(bigElemType)s>(
xc->tcBase());
+ %(mask)s
for (unsigned i = 0; i < eCount; i++) {
SElement srcElem1 = AA64FpOp1_x%(bigElemSuffix)s[i] &
mask(sizeof(SElement) * 8);
DElement destElem = 0;
if (GpOp_x%(bigElemSuffix)s[i]) {
%(op)s
- AA64FpDest_x%(bigElemSuffix)s[i] = destElem;
+ %(assign)s;
} else {
AA64FpDest_x%(bigElemSuffix)s[i] =
AA64FpDestMerge_x%(bigElemSuffix)s[i];
}
}
- ''' % {'op': op,
- 'bigElemType': 'SElement' if direction == CvtDir.Narrow
+ ''' % {'bigElemType': 'SElement' if direction == CvtDir.Narrow
else 'DElement',
- 'bigElemSuffix': 's' if direction == CvtDir.Narrow else 'd'}
+ 'op': op, 'mask': mask,
+ 'bigElemSuffix': 's' if direction == CvtDir.Narrow else 'd',
+ 'assign': assign_code
+ }
+
iop = InstObjParams(name, 'Sve' + Name, 'SveUnaryPredOp',
{'code': code, 'op_class': opClass}, [])
header_output += SveWideningUnaryPredOpDeclare.subst(iop)
code = sveEnabledCheckCode + '''
unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
xc->tcBase());
+
ArmISA::VecRegContainer tmpVecC;
auto auxDest = tmpVecC.as<Element>();
int firstelem = -1, lastelem = -2;
'uint32_t, uint32_t',
'uint64_t, uint32_t',
'uint64_t, uint64_t'),
- fcvtzsCode, CvtDir.Narrow)
+ fcvtzsCode, CvtDir.Narrow, signed=True)
sveCvtInst('fcvtzs', 'FcvtzsWiden', 'SimdCvtOp',
('uint16_t, uint32_t',
'uint16_t, uint64_t',