Value *gather0 = GATHERPS(src0, pBase, indices0, mask0, scale);
Value *gather1 = GATHERPS(src1, pBase, indices1, mask1, scale);
- vGather = INSERT2_F(vGather, gather0, 0);
- vGather = INSERT2_F(vGather, gather1, 1);
+ vGather = JOIN2(gather0, gather1);
}
return vGather;
Value *result0 = PSRLI(a0, imm);
Value *result1 = PSRLI(a1, imm);
- result = INSERT2_I(result, result0, 0);
- result = INSERT2_I(result, result1, 1);
+ result = JOIN2(result0, result1);
}
return result;
return BITCAST(EXTRACT2_F(a2, imm), mSimdInt32Ty);
}
- //////////////////////////////////////////////////////////////////////////
- /// @brief
- Value *Builder::INSERT2_F(Value *a2, Value *b, uint32_t imm)
+ Value *Builder::JOIN2(Value *a, Value *b)
{
- const uint32_t i0 = (imm > 0) ? mVWidth : 0;
-
- Value *result = BITCAST(a2, mSimd2FP32Ty);
-
- for (uint32_t i = 0; i < mVWidth; i += 1)
- {
-#if 1
- if (!b->getType()->getScalarType()->isFloatTy())
- {
- b = BITCAST(b, mSimdFP32Ty);
- }
-
-#endif
- Value *temp = VEXTRACT(b, C(i));
-
- result = VINSERT(result, temp, C(i0 + i));
- }
-
- return result;
+ return VSHUFFLE(a, b,
+ {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15});
}
-
- Value *Builder::INSERT2_I(Value *a2, Value *b, uint32_t imm)
- {
- return BITCAST(INSERT2_F(a2, b, imm), mSimd2Int32Ty);
- }
-
#endif
+
//////////////////////////////////////////////////////////////////////////
/// @brief convert x86 <N x float> mask to llvm <N x i1> mask
Value *Builder::MASK(Value *vmask)
// offset indices by baseVertex
#if USE_SIMD16_GATHERS
#if USE_SIMD16_BUILDER
- Value *vIndices16 = VUNDEF2_I();
-
- vIndices16 = INSERT2_I(vIndices16, vIndices, 0);
- vIndices16 = INSERT2_I(vIndices16, vIndices2, 1);
+ Value *vIndices16 = JOIN2(vIndices, vIndices2);
vCurIndices16 = ADD(vIndices16, vBaseVertex16);
#else
// offset indices by baseVertex
#if USE_SIMD16_GATHERS
#if USE_SIMD16_BUILDER
- Value *vIndices16 = VUNDEF2_I();
-
- vIndices16 = INSERT2_I(vIndices16, vIndices, 0);
- vIndices16 = INSERT2_I(vIndices16, vIndices2, 1);
+ Value *vIndices16 = JOIN2(vIndices, vIndices2);
vCurIndices16 = ADD(vIndices16, vBaseVertex16);
#else
{
#if USE_SIMD16_BUILDER
// pack adjacent pairs of SIMD8s into SIMD16s
- pVtxSrc2[currentVertexElement] = VUNDEF2_F();
- pVtxSrc2[currentVertexElement] = INSERT2_F(pVtxSrc2[currentVertexElement], pResults[c], 0);
- pVtxSrc2[currentVertexElement] = INSERT2_F(pVtxSrc2[currentVertexElement], pResults2[c], 1);
+ pVtxSrc2[currentVertexElement] = JOIN2(pResults[c], pResults2[c]);
#else
vVertexElements[currentVertexElement] = pResults[c];
#else
Value *gatherResult[2];
- gatherResult[0] = VUNDEF2_I();
- gatherResult[1] = VUNDEF2_I();
-
- gatherResult[0] = INSERT2_I(gatherResult[0], vGatherResult[0], 0);
- gatherResult[0] = INSERT2_I(gatherResult[0], vGatherResult2[0], 1);
-
- gatherResult[1] = INSERT2_I(gatherResult[1], vGatherResult[1], 0);
- gatherResult[1] = INSERT2_I(gatherResult[1], vGatherResult2[1], 1);
+ gatherResult[0] = JOIN2(vGatherResult[0], vGatherResult2[0]);
+ gatherResult[1] = JOIN2(vGatherResult[1], vGatherResult2[1]);
#endif
Value *pVtxOut2 = BITCAST(pVtxOut, PointerType::get(VectorType::get(mFP32Ty, mVWidth2), 0));
#if USE_SIMD16_BUILDER
// pack adjacent pairs of SIMD8s into SIMD16s
- pVtxSrc2[currentVertexElement] = VUNDEF2_F();
- pVtxSrc2[currentVertexElement] = INSERT2_F(pVtxSrc2[currentVertexElement], vVertexElements[currentVertexElement], 0);
- pVtxSrc2[currentVertexElement] = INSERT2_F(pVtxSrc2[currentVertexElement], vVertexElements2[currentVertexElement], 1);
+ pVtxSrc2[currentVertexElement] = JOIN2(vVertexElements[currentVertexElement],
+ vVertexElements2[currentVertexElement]);
#endif
#endif
#if USE_SIMD16_BUILDER
// pack adjacent pairs of SIMD8s into SIMD16s
- pVtxSrc2[currentVertexElement] = VUNDEF2_F();
- pVtxSrc2[currentVertexElement] = INSERT2_F(pVtxSrc2[currentVertexElement], vVertexElements[currentVertexElement], 0);
- pVtxSrc2[currentVertexElement] = INSERT2_F(pVtxSrc2[currentVertexElement], vVertexElements2[currentVertexElement], 1);
+ pVtxSrc2[currentVertexElement] = JOIN2(vVertexElements[currentVertexElement],
+ vVertexElements2[currentVertexElement]);
#endif
#endif
#if USE_SIMD16_BUILDER
// pack adjacent pairs of SIMD8s into SIMD16s
- pVtxSrc2[currentVertexElement] = VUNDEF2_F();
- pVtxSrc2[currentVertexElement] = INSERT2_F(pVtxSrc2[currentVertexElement], pGather, 0);
- pVtxSrc2[currentVertexElement] = INSERT2_F(pVtxSrc2[currentVertexElement], pGather2, 1);
+ pVtxSrc2[currentVertexElement] = JOIN2(pGather, pGather2);
#else
vVertexElements[currentVertexElement] = pGather;
// xyzw xyzw xyzw xyzw xyzw xyzw xyzw xyzw
#if USE_SIMD16_BUILDER
- Value *gatherResult = VUNDEF2_I();
-
- gatherResult = INSERT2_I(gatherResult, vGatherResult, 0);
- gatherResult = INSERT2_I(gatherResult, vGatherResult2, 1);
+ Value *gatherResult = JOIN2(vGatherResult, vGatherResult2);
Value *pVtxOut2 = BITCAST(pVtxOut, PointerType::get(VectorType::get(mFP32Ty, mVWidth2), 0));
#if USE_SIMD16_BUILDER
Value *gatherResult[2];
- gatherResult[0] = VUNDEF2_I();
- gatherResult[1] = VUNDEF2_I();
-
- gatherResult[0] = INSERT2_I(gatherResult[0], vGatherResult[0], 0);
- gatherResult[0] = INSERT2_I(gatherResult[0], vGatherResult2[0], 1);
-
- gatherResult[1] = INSERT2_I(gatherResult[1], vGatherResult[1], 0);
- gatherResult[1] = INSERT2_I(gatherResult[1], vGatherResult2[1], 1);
+ gatherResult[0] = JOIN2(vGatherResult[0], vGatherResult2[0]);
+ gatherResult[1] = JOIN2(vGatherResult[1], vGatherResult2[1]);
Value *pVtxOut2 = BITCAST(pVtxOut, PointerType::get(VectorType::get(mFP32Ty, mVWidth2), 0));
#if USE_SIMD16_BUILDER
// pack adjacent pairs of SIMD8s into SIMD16s
- pVtxSrc2[currentVertexElement] = VUNDEF2_F();
- pVtxSrc2[currentVertexElement] = INSERT2_F(pVtxSrc2[currentVertexElement], pGather, 0);
- pVtxSrc2[currentVertexElement] = INSERT2_F(pVtxSrc2[currentVertexElement], pGather2, 1);
+ pVtxSrc2[currentVertexElement] = JOIN2(pGather, pGather2);
#else
vVertexElements[currentVertexElement] = pGather;
temp_hi = FMUL(CAST(fpCast, temp_hi, mSimdFP32Ty), conversionFactor);
}
- vVertexElements[currentVertexElement] = VUNDEF2_F();
- vVertexElements[currentVertexElement] = INSERT2_F(vVertexElements[currentVertexElement], temp_lo, 0);
- vVertexElements[currentVertexElement] = INSERT2_F(vVertexElements[currentVertexElement], temp_hi, 1);
+ vVertexElements[currentVertexElement] = JOIN2(temp_lo, temp_hi);
currentVertexElement += 1;
}
temp_hi = FMUL(CAST(fpCast, temp_hi, mSimdFP32Ty), conversionFactor);
}
- vVertexElements[currentVertexElement] = VUNDEF2_F();
- vVertexElements[currentVertexElement] = INSERT2_F(vVertexElements[currentVertexElement], temp_lo, 0);
- vVertexElements[currentVertexElement] = INSERT2_F(vVertexElements[currentVertexElement], temp_hi, 1);
+ vVertexElements[currentVertexElement] = JOIN2(temp_lo, temp_hi);
currentVertexElement += 1;
}
// 256i - 0 1 2 3 4 5 6 7
// xxxx xxxx xxxx xxxx yyyy yyyy yyyy yyyy
#if 0
-
- vi128XY = VUNDEF2_I();
- vi128XY = INSERT2_I(vi128XY, vi128XY_lo, 0);
- vi128XY = INSERT2_I(vi128XY, vi128XY_hi, 1);
+ vi128XY = JOIN2(vi128XY_lo, vi128XY_hi);
#endif
}
vi128ZW_lo = BITCAST(PERMD(vShufResult_lo, C<int32_t>({ 0, 1, 4, 5, 2, 3, 6, 7 })), v128bitTy);
vi128ZW_hi = BITCAST(PERMD(vShufResult_hi, C<int32_t>({ 0, 1, 4, 5, 2, 3, 6, 7 })), v128bitTy);
#if 0
-
- vi128ZW = VUNDEF2_I();
- vi128ZW = INSERT2_I(vi128ZW, vi128ZW_lo, 0);
- vi128ZW = INSERT2_I(vi128ZW, vi128ZW_hi, 1);
+ vi128ZW = JOIN2(vi128ZW_lo, vi128ZW_hi);
#endif
}
Value *temp_lo = CVTPH2PS(BITCAST(VEXTRACT(selectedPermute_lo, C(lane)), v8x16Ty));
Value *temp_hi = CVTPH2PS(BITCAST(VEXTRACT(selectedPermute_hi, C(lane)), v8x16Ty));
- vVertexElements[currentVertexElement] = VUNDEF2_F();
- vVertexElements[currentVertexElement] = INSERT2_F(vVertexElements[currentVertexElement], temp_lo, 0);
- vVertexElements[currentVertexElement] = INSERT2_F(vVertexElements[currentVertexElement], temp_hi, 1);
+ vVertexElements[currentVertexElement] = JOIN2(temp_lo, temp_hi);
}
else
{
temp_hi = FMUL(CAST(IntToFpCast, temp_hi, mSimdFP32Ty), conversionFactor);
}
- vVertexElements[currentVertexElement] = VUNDEF2_F();
- vVertexElements[currentVertexElement] = INSERT2_F(vVertexElements[currentVertexElement], temp_lo, 0);
- vVertexElements[currentVertexElement] = INSERT2_F(vVertexElements[currentVertexElement], temp_hi, 1);
+ vVertexElements[currentVertexElement] = JOIN2(temp_lo, temp_hi);
}
currentVertexElement += 1;
temp_hi = FMUL(CAST(fpCast, temp_hi, mSimdFP32Ty), conversionFactor);
}
- vVertexElements[currentVertexElement] = VUNDEF2_F();
- vVertexElements[currentVertexElement] = INSERT2_F(vVertexElements[currentVertexElement], temp_lo, 0);
- vVertexElements[currentVertexElement] = INSERT2_F(vVertexElements[currentVertexElement], temp_hi, 1);
+ vVertexElements[currentVertexElement] = JOIN2(temp_lo, temp_hi);
currentVertexElement += 1;
}
case Store1Int: return VIMMED2_1(1);
case StoreVertexId:
{
- Value* pId = VUNDEF2_F();
-
Value* pId_lo = BITCAST(LOAD(GEP(mpFetchInfo, { 0, SWR_FETCH_CONTEXT_VertexID })), mSimdFP32Ty);
Value* pId_hi = BITCAST(LOAD(GEP(mpFetchInfo, { 0, SWR_FETCH_CONTEXT_VertexID2 })), mSimdFP32Ty);
- pId = INSERT2_F(pId, pId_lo, 0);
- pId = INSERT2_F(pId, pId_hi, 1);
+ Value *pId = JOIN2(pId_lo, pId_hi);
return VBROADCAST2(pId);
}