: mpJitMgr(pJitMgr)
{
mVWidth = pJitMgr->mVWidth;
+#if USE_SIMD16_BUILDER
+ mVWidth2 = pJitMgr->mVWidth * 2;
+#endif
mpIRBuilder = &pJitMgr->mBuilder;
mSimdFP32Ty = VectorType::get(mFP32Ty, mVWidth);
mSimdVectorTy = ArrayType::get(mSimdFP32Ty, 4);
mSimdVectorTRTy = ArrayType::get(mSimdFP32Ty, 5);
+#if USE_SIMD16_BUILDER
+ mSimd2Int1Ty = VectorType::get(mInt1Ty, mVWidth2);
+ mSimd2Int16Ty = VectorType::get(mInt16Ty, mVWidth2);
+ mSimd2Int32Ty = VectorType::get(mInt32Ty, mVWidth2);
+ mSimd2Int64Ty = VectorType::get(mInt64Ty, mVWidth2);
+ mSimd2FP16Ty = VectorType::get(mFP16Ty, mVWidth2);
+ mSimd2FP32Ty = VectorType::get(mFP32Ty, mVWidth2);
+ mSimd2VectorTy = ArrayType::get(mSimd2FP32Ty, 4);
+ mSimd2VectorTRTy = ArrayType::get(mSimd2FP32Ty, 5);
+#endif
if (sizeof(uint32_t*) == 4)
{
mIntPtrTy = mInt32Ty;
mSimdIntPtrTy = mSimdInt32Ty;
+#if USE_SIMD16_BUILDER
+ mSimd2IntPtrTy = mSimd2Int32Ty;
+#endif
}
else
{
SWR_ASSERT(sizeof(uint32_t*) == 8);
+
mIntPtrTy = mInt64Ty;
mSimdIntPtrTy = mSimdInt64Ty;
+#if USE_SIMD16_BUILDER
+ mSimd2IntPtrTy = mSimd2Int64Ty;
+#endif
}
}
}
#include "JitManager.h"
#include "common/formats.h"
+#define USE_SIMD16_BUILDER 0
+
namespace SwrJit
{
using namespace llvm;
IRBuilder<>* mpIRBuilder;
uint32_t mVWidth;
+#if USE_SIMD16_BUILDER
+ uint32_t mVWidth2;
+#endif
// Built in types.
Type* mVoidTy;
Type* mSimdIntPtrTy;
Type* mSimdVectorTy;
Type* mSimdVectorTRTy;
+#if USE_SIMD16_BUILDER
+ Type* mSimd2FP16Ty;
+ Type* mSimd2FP32Ty;
+ Type* mSimd2Int1Ty;
+ Type* mSimd2Int16Ty;
+ Type* mSimd2Int32Ty;
+ Type* mSimd2Int64Ty;
+ Type* mSimd2IntPtrTy;
+ Type* mSimd2VectorTy;
+ Type* mSimd2VectorTRTy;
+#endif
#include "gen_builder.hpp"
#include "gen_builder_x86.hpp"
return UndefValue::get(VectorType::get(mFP32Ty, mVWidth));
}
+#if USE_SIMD16_BUILDER
+ Value *Builder::VUNDEF2_F()
+ {
+ return UndefValue::get(VectorType::get(mFP32Ty, mVWidth2));
+ }
+
+#endif
Value *Builder::VUNDEF(Type* t)
{
return UndefValue::get(VectorType::get(t, mVWidth));
return vGather;
}
+#if USE_SIMD16_BUILDER
+ //////////////////////////////////////////////////////////////////////////
+ /// @brief
+ Value *Builder::EXTRACT(Value *a2, uint32_t imm)
+ {
+ const uint32_t i0 = (imm > 0) ? mVWidth : 0;
+
+ Value *result = VUNDEF_F();
+
+ for (uint32_t i = 0; i < mVWidth; i += 1)
+ {
+ Value *temp = VEXTRACT(a2, C(i0 + i));
+
+ result = VINSERT(result, temp, C(i));
+ }
+
+ return result;
+ }
+
+ //////////////////////////////////////////////////////////////////////////
+ /// @brief
+ Value *Builder::INSERT(Value *a2, Value * b, uint32_t imm)
+ {
+ const uint32_t i0 = (imm > 0) ? mVWidth : 0;
+
+ Value *result = BITCAST(a2, mSimd2FP32Ty);
+
+ for (uint32_t i = 0; i < mVWidth; i += 1)
+ {
+#if 1
+ if (!b->getType()->getScalarType()->isFloatTy())
+ {
+ b = BITCAST(b, mSimdFP32Ty);
+ }
+
+#endif
+ Value *temp = VEXTRACT(b, C(i));
+
+ result = VINSERT(result, temp, C(i0 + i));
+ }
+
+ return result;
+ }
+
+#endif
//////////////////////////////////////////////////////////////////////////
/// @brief convert x86 <N x float> mask to llvm <N x i1> mask
Value* Builder::MASK(Value* vmask)
Value *VIMMED1(bool i);
Value *VUNDEF(Type* t);
Value *VUNDEF_F();
+#if USE_SIMD16_BUILDER
+Value *VUNDEF2_F();
+#endif
Value *VUNDEF_I();
Value *VUNDEF(Type* ty, uint32_t size);
Value *VUNDEF_IPTR();
/// @brief functions that build IR to call x86 intrinsics directly, or
/// emulate them with other instructions if not available on the host
//////////////////////////////////////////////////////////////////////////
+
+#if USE_SIMD16_BUILDER
+Value *EXTRACT(Value *a, uint32_t imm);
+Value *INSERT(Value *a, Value *b, uint32_t imm);
+
+#endif
Value *MASKLOADD(Value* src, Value* mask);
void Gather4(const SWR_FORMAT format, Value* pSrcBase, Value* byteOffsets,
#endif
void StoreVertexElements(Value* pVtxOut, const uint32_t outputElt, const uint32_t numEltsToStore, Value* (&vVertexElements)[4]);
+#if USE_SIMD16_BUILDER
+ void StoreVertexElements2(Value* pVtxOut, const uint32_t outputElt, const uint32_t numEltsToStore, Value* (&vVertexElements)[4]);
+#endif
#if USE_SIMD16_SHADERS
Value* GenerateCompCtrlVector(const ComponentControl ctrl, bool useVertexID2);
// GEP
pVtxOut = GEP(pVtxOut, C(0));
#if USE_SIMD16_SHADERS
-#if 0
- pVtxOut = BITCAST(pVtxOut, PointerType::get(VectorType::get(mFP32Ty, mVWidth * 2), 0));
+#if 0// USE_SIMD16_BUILDER
+ pVtxOut = BITCAST(pVtxOut, PointerType::get(VectorType::get(mFP32Ty, mVWidth2), 0));
#else
pVtxOut = BITCAST(pVtxOut, PointerType::get(VectorType::get(mFP32Ty, mVWidth), 0));
#endif
if (currentVertexElement > 3)
{
+#if USE_SIMD16_BUILDER
+ Value *pVtxSrc2[4];
+
+ // pack adjacent pairs of SIMD8s into SIMD16s
+ for (uint32_t i = 0; i < 4; i += 1)
+ {
+ pVtxSrc2[i] = VUNDEF2_F();
+
+ pVtxSrc2[i] = INSERT(pVtxSrc2[i], vVertexElements[i], 0);
+ pVtxSrc2[i] = INSERT(pVtxSrc2[i], vVertexElements2[i], 1);
+ }
+
+ // store SIMD16s
+ Value *pVtxOut2 = BITCAST(pVtxOut, PointerType::get(VectorType::get(mFP32Ty, mVWidth2), 0));
+ StoreVertexElements2(pVtxOut2, outputElt, 4, pVtxSrc2);
+
+#else
StoreVertexElements(pVtxOut, outputElt, 4, vVertexElements);
StoreVertexElements(GEP(pVtxOut, C(1)), outputElt, 4, vVertexElements2);
+#endif
outputElt += 1;
// reset to the next vVertexElement to output
for(uint32_t c = 0; c < numEltsToStore; ++c)
{
// STORE expects FP32 x vWidth type, just bitcast if needed
- if(!vVertexElements[c]->getType()->getScalarType()->isFloatTy()){
+ if(!vVertexElements[c]->getType()->getScalarType()->isFloatTy())
+ {
#if FETCH_DUMP_VERTEX
PRINT("vVertexElements[%d]: 0x%x\n", {C(c), vVertexElements[c]});
#endif
}
}
+#if USE_SIMD16_BUILDER
+void FetchJit::StoreVertexElements2(Value* pVtxOut, const uint32_t outputElt, const uint32_t numEltsToStore, Value* (&vVertexElements)[4])
+{
+ SWR_ASSERT(numEltsToStore <= 4, "Invalid element count.");
+
+ for (uint32_t c = 0; c < numEltsToStore; ++c)
+ {
+ // STORE expects FP32 x vWidth type, just bitcast if needed
+ if (!vVertexElements[c]->getType()->getScalarType()->isFloatTy())
+ {
+#if FETCH_DUMP_VERTEX
+ PRINT("vVertexElements[%d]: 0x%x\n", { C(c), vVertexElements[c] });
+#endif
+ vVertexElements[c] = BITCAST(vVertexElements[c], mSimd2FP32Ty);
+ }
+#if FETCH_DUMP_VERTEX
+ else
+ {
+ PRINT("vVertexElements[%d]: %f\n", { C(c), vVertexElements[c] });
+ }
+#endif
+ // outputElt * 4 = offsetting by the size of a simdvertex
+ // + c offsets to a 32bit x vWidth row within the current vertex
+ Value* dest = GEP(pVtxOut, C(outputElt * 4 + c), "destGEP");
+ STORE(vVertexElements[c], dest);
+ }
+}
+
+#endif
//////////////////////////////////////////////////////////////////////////
/// @brief Generates a constant vector of values based on the
/// ComponentControl value