}
else
{
- Value *src0 = EXTRACT2_F(vSrc, 0);
- Value *src1 = EXTRACT2_F(vSrc, 1);
+ Value *src0 = EXTRACT2(vSrc, 0);
+ Value *src1 = EXTRACT2(vSrc, 1);
- Value *indices0 = EXTRACT2_I(vIndices, 0);
- Value *indices1 = EXTRACT2_I(vIndices, 1);
+ Value *indices0 = EXTRACT2(vIndices, 0);
+ Value *indices1 = EXTRACT2(vIndices, 1);
- Value *vmask16 = VMASK2(vMask);
-
- Value *mask0 = MASK(EXTRACT2_I(vmask16, 0)); // TODO: do this better..
- Value *mask1 = MASK(EXTRACT2_I(vmask16, 1));
+ Value *mask0 = EXTRACT2(vMask, 0);
+ Value *mask1 = EXTRACT2(vMask, 1);
Value *gather0 = GATHERPS(src0, pBase, indices0, mask0, scale);
Value *gather1 = GATHERPS(src1, pBase, indices1, mask1, scale);
}
else
{
- Value *src0 = EXTRACT2_F(vSrc, 0);
- Value *src1 = EXTRACT2_F(vSrc, 1);
-
- Value *indices0 = EXTRACT2_I(vIndices, 0);
- Value *indices1 = EXTRACT2_I(vIndices, 1);
+ Value *src0 = EXTRACT2(vSrc, 0);
+ Value *src1 = EXTRACT2(vSrc, 1);
- Value *vmask16 = VMASK2(vMask);
+ Value *indices0 = EXTRACT2(vIndices, 0);
+ Value *indices1 = EXTRACT2(vIndices, 1);
- Value *mask0 = MASK(EXTRACT2_I(vmask16, 0)); // TODO: do this better..
- Value *mask1 = MASK(EXTRACT2_I(vmask16, 1));
+ Value *mask0 = EXTRACT2(vMask, 0);
+ Value *mask1 = EXTRACT2(vMask, 1);
Value *gather0 = GATHERDD(src0, pBase, indices0, mask0, scale);
Value *gather1 = GATHERDD(src1, pBase, indices1, mask1, scale);
}
#if USE_SIMD16_BUILDER
- //////////////////////////////////////////////////////////////////////////
- /// @brief
- Value *Builder::EXTRACT2_F(Value *a2, uint32_t imm)
- {
- const uint32_t i0 = (imm > 0) ? mVWidth : 0;
-
- Value *result = VUNDEF_F();
-
- for (uint32_t i = 0; i < mVWidth; i += 1)
- {
-#if 1
- if (!a2->getType()->getScalarType()->isFloatTy())
- {
- a2 = BITCAST(a2, mSimd2FP32Ty);
- }
-
-#endif
- Value *temp = VEXTRACT(a2, C(i0 + i));
-
- result = VINSERT(result, temp, C(i));
- }
-
- return result;
- }
-
- Value *Builder::EXTRACT2_I(Value *a2, uint32_t imm)
+ Value *Builder::EXTRACT2(Value *x, uint32_t imm)
{
- return BITCAST(EXTRACT2_F(a2, imm), mSimdInt32Ty);
+ if (imm == 0)
+ return VSHUFFLE(x, UndefValue::get(x->getType()), {0, 1, 2, 3, 4, 5, 6, 7});
+ else
+ return VSHUFFLE(x, UndefValue::get(x->getType()), {8, 9, 10, 11, 12, 13, 14, 15});
}
Value *Builder::JOIN2(Value *a, Value *b)
vOffsets16 = ADD(vOffsets16, vInstanceStride16);
// TODO: remove the following simd8 interop stuff once all code paths are fully widened to SIMD16..
- Value *vmask16 = VMASK2(vGatherMask16);
- Value *vGatherMask = MASK(EXTRACT2_I(vmask16, 0));
- Value *vGatherMask2 = MASK(EXTRACT2_I(vmask16, 1));
-
- Value *vOffsets = EXTRACT2_I(vOffsets16, 0);
- Value *vOffsets2 = EXTRACT2_I(vOffsets16, 1);
+ Value *vGatherMask = EXTRACT2(vGatherMask16, 0);
+ Value *vGatherMask2 = EXTRACT2(vGatherMask16, 1);
+ Value *vOffsets = EXTRACT2(vOffsets16, 0);
+ Value *vOffsets2 = EXTRACT2(vOffsets16, 1);
#else
// override cur indices with 0 if pitch is 0
Value* pZeroPitchMask = ICMP_EQ(vStride, VIMMED1(0));
// SIMD16 PSHUFB isnt part of AVX-512F, so split into SIMD8 for the sake of KNL, for now..
- Value *vGatherResult_lo = EXTRACT2_I(vGatherResult, 0);
- Value *vGatherResult_hi = EXTRACT2_I(vGatherResult, 1);
+ Value *vGatherResult_lo = EXTRACT2(vGatherResult, 0);
+ Value *vGatherResult_hi = EXTRACT2(vGatherResult, 1);
Value *vShufResult_lo = BITCAST(PSHUFB(BITCAST(vGatherResult_lo, v32x8Ty), vConstMask), vGatherTy);
Value *vShufResult_hi = BITCAST(PSHUFB(BITCAST(vGatherResult_hi, v32x8Ty), vConstMask), vGatherTy);
break;
}
- Value *vGatherResult_lo = EXTRACT2_I(vGatherResult, 0);
- Value *vGatherResult_hi = EXTRACT2_I(vGatherResult, 1);
+ Value *vGatherResult_lo = EXTRACT2(vGatherResult, 0);
+ Value *vGatherResult_hi = EXTRACT2(vGatherResult, 1);
Value *temp_lo = BITCAST(PSHUFB(BITCAST(vGatherResult_lo, v32x8Ty), vConstMask), vGatherTy);
Value *temp_hi = BITCAST(PSHUFB(BITCAST(vGatherResult_hi, v32x8Ty), vConstMask), vGatherTy);
{
// SIMD16 PSHUFB isnt part of AVX-512F, so split into SIMD8 for the sake of KNL, for now..
- Value *vGatherResult_lo = EXTRACT2_I(vGatherResult[0], 0);
- Value *vGatherResult_hi = EXTRACT2_I(vGatherResult[0], 1);
+ Value *vGatherResult_lo = EXTRACT2(vGatherResult[0], 0);
+ Value *vGatherResult_hi = EXTRACT2(vGatherResult[0], 1);
Value *vShufResult_lo = BITCAST(PSHUFB(BITCAST(vGatherResult_lo, v32x8Ty), vConstMask), vGatherTy);
Value *vShufResult_hi = BITCAST(PSHUFB(BITCAST(vGatherResult_hi, v32x8Ty), vConstMask), vGatherTy);
Value *vi128ZW_hi = nullptr;
if (isComponentEnabled(compMask, 2) || isComponentEnabled(compMask, 3))
{
- Value *vGatherResult_lo = EXTRACT2_I(vGatherResult[1], 0);
- Value *vGatherResult_hi = EXTRACT2_I(vGatherResult[1], 1);
+ Value *vGatherResult_lo = EXTRACT2(vGatherResult[1], 0);
+ Value *vGatherResult_hi = EXTRACT2(vGatherResult[1], 1);
Value *vShufResult_lo = BITCAST(PSHUFB(BITCAST(vGatherResult_lo, v32x8Ty), vConstMask), vGatherTy);
Value *vShufResult_hi = BITCAST(PSHUFB(BITCAST(vGatherResult_hi, v32x8Ty), vConstMask), vGatherTy);
// SIMD16 PSHUFB isnt part of AVX-512F, so split into SIMD8 for the sake of KNL, for now..
- Value *vGatherResult_lo = EXTRACT2_I(vGatherResult[selectedGather], 0);
- Value *vGatherResult_hi = EXTRACT2_I(vGatherResult[selectedGather], 1);
+ Value *vGatherResult_lo = EXTRACT2(vGatherResult[selectedGather], 0);
+ Value *vGatherResult_hi = EXTRACT2(vGatherResult[selectedGather], 1);
Value *temp_lo = BITCAST(PSHUFB(BITCAST(vGatherResult_lo, v32x8Ty), vConstMask[selectedMask]), vGatherTy);
Value *temp_hi = BITCAST(PSHUFB(BITCAST(vGatherResult_hi, v32x8Ty), vConstMask[selectedMask]), vGatherTy);