#include "swr_state.h"
#include "swr_screen.h"
-#if HAVE_LLVM < 0x0500
-namespace llvm {
-typedef AttributeSet AttributeList;
-}
-#endif
-
using namespace SwrJit;
using namespace llvm;
gallivm_free_ir(gallivm);
}
+ void WriteVS(Value *pVal, Value *pVsContext, Value *pVtxOutput,
+ unsigned slot, unsigned channel);
+
struct gallivm_state *gallivm;
PFN_VERTEX_FUNC CompileVS(struct swr_context *ctx, swr_jit_vs_key &key);
PFN_PIXEL_KERNEL CompileFS(struct swr_context *ctx, swr_jit_fs_key &key);
IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
+#if USE_SIMD16_FRONTEND
+ const uint32_t simdVertexStride = sizeof(simdvertex) * 2;
+ const uint32_t numSimdBatches = (pGS->maxNumVerts + (mVWidth * 2) - 1) / (mVWidth * 2);
+#else
const uint32_t simdVertexStride = sizeof(simdvertex);
- const uint32_t numSimdBatches = (pGS->maxNumVerts + 7) / 8;
+ const uint32_t numSimdBatches = (pGS->maxNumVerts + mVWidth - 1) / mVWidth;
+#endif
const uint32_t inputPrimStride = numSimdBatches * simdVertexStride;
Value *pStream = LOAD(iface->pGsCtx, { 0, SWR_GS_CONTEXT_pStream });
inputPrimStride * 6,
inputPrimStride * 7 } );
- Value *vVertexSlot = ASHR(unwrap(emitted_vertices_vec), 3);
- Value *vSimdSlot = AND(unwrap(emitted_vertices_vec), 7);
+#if USE_SIMD16_FRONTEND
+ const uint32_t simdShift = log2(mVWidth * 2);
+ Value *vSimdSlot = AND(unwrap(emitted_vertices_vec), (mVWidth * 2) - 1);
+#else
+ const uint32_t simdShift = log2(mVWidth);
+ Value *vSimdSlot = AND(unwrap(emitted_vertices_vec), mVWidth - 1);
+#endif
+ Value *vVertexSlot = ASHR(unwrap(emitted_vertices_vec), simdShift);
for (uint32_t attrib = 0; attrib < iface->num_outputs; ++attrib) {
uint32_t attribSlot = attrib;
else if (iface->info->output_semantic_name[attrib] == TGSI_SEMANTIC_LAYER)
attribSlot = VERTEX_RTAI_SLOT;
+#if USE_SIMD16_FRONTEND
+ Value *vOffsetsAttrib =
+ ADD(vOffsets, MUL(vVertexSlot, VIMMED1((uint32_t)sizeof(simdvertex) * 2)));
+ vOffsetsAttrib =
+ ADD(vOffsetsAttrib, VIMMED1((uint32_t)(attribSlot*sizeof(simdvector) * 2)));
+#else
Value *vOffsetsAttrib =
ADD(vOffsets, MUL(vVertexSlot, VIMMED1((uint32_t)sizeof(simdvertex))));
vOffsetsAttrib =
ADD(vOffsetsAttrib, VIMMED1((uint32_t)(attribSlot*sizeof(simdvector))));
+#endif
vOffsetsAttrib =
ADD(vOffsetsAttrib, MUL(vSimdSlot, VIMMED1((uint32_t)sizeof(float))));
MASKED_SCATTER(vData, vPtrs, 32, vMask1);
+#if USE_SIMD16_FRONTEND
+ vOffsetsAttrib =
+ ADD(vOffsetsAttrib, VIMMED1((uint32_t)sizeof(simdscalar) * 2));
+#else
vOffsetsAttrib =
ADD(vOffsetsAttrib, VIMMED1((uint32_t)sizeof(simdscalar)));
+#endif
}
}
}
AttrBuilder attrBuilder;
attrBuilder.addStackAlignmentAttr(JM()->mVWidth * sizeof(float));
- AttributeList attrSet = AttributeList::get(
- JM()->mContext, AttributeList::FunctionIndex, attrBuilder);
std::vector<Type *> gsArgs{PointerType::get(Gen_swr_draw_context(JM()), 0),
PointerType::get(Gen_SWR_GS_CONTEXT(JM()), 0)};
GlobalValue::ExternalLinkage,
"GS",
JM()->mpCurrentModule);
- pFunction->addAttributes(AttributeList::FunctionIndex, attrSet);
+#if HAVE_LLVM < 0x0500
+ AttributeSet attrSet = AttributeSet::get(
+ JM()->mContext, AttributeSet::FunctionIndex, attrBuilder);
+ pFunction->addAttributes(AttributeSet::FunctionIndex, attrSet);
+#else
+ pFunction->addAttributes(AttributeList::FunctionIndex, attrBuilder);
+#endif
BasicBlock *block = BasicBlock::Create(JM()->mContext, "entry", pFunction);
IRB()->SetInsertPoint(block);
return func;
}
+void
+BuilderSWR::WriteVS(Value *pVal, Value *pVsContext, Value *pVtxOutput, unsigned slot, unsigned channel)
+{
+#if USE_SIMD16_FRONTEND
+ // interleave the simdvertex components into the dest simd16vertex
+ // slot16offset = slot8offset * 2
+ // comp16offset = comp8offset * 2 + alternateOffset
+
+ Value *offset = LOAD(pVsContext, { 0, SWR_VS_CONTEXT_AlternateOffset });
+ Value *pOut = GEP(pVtxOutput, { C(0), C(0), C(slot * 2), offset } );
+ STORE(pVal, pOut, {channel * 2});
+#else
+ Value *pOut = GEP(pVtxOutput, {0, 0, slot});
+ STORE(pVal, pOut, {0, channel});
+#endif
+}
+
PFN_VERTEX_FUNC
BuilderSWR::CompileVS(struct swr_context *ctx, swr_jit_vs_key &key)
{
AttrBuilder attrBuilder;
attrBuilder.addStackAlignmentAttr(JM()->mVWidth * sizeof(float));
- AttributeList attrSet = AttributeList::get(
- JM()->mContext, AttributeList::FunctionIndex, attrBuilder);
std::vector<Type *> vsArgs{PointerType::get(Gen_swr_draw_context(JM()), 0),
PointerType::get(Gen_SWR_VS_CONTEXT(JM()), 0)};
GlobalValue::ExternalLinkage,
"VS",
JM()->mpCurrentModule);
- pFunction->addAttributes(AttributeList::FunctionIndex, attrSet);
+#if HAVE_LLVM < 0x0500
+ AttributeSet attrSet = AttributeSet::get(
+ JM()->mContext, AttributeSet::FunctionIndex, attrBuilder);
+ pFunction->addAttributes(AttributeSet::FunctionIndex, attrSet);
+#else
+ pFunction->addAttributes(AttributeList::FunctionIndex, attrBuilder);
+#endif
BasicBlock *block = BasicBlock::Create(JM()->mContext, "entry", pFunction);
IRB()->SetInsertPoint(block);
uint32_t outSlot = attrib;
if (swr_vs->info.base.output_semantic_name[attrib] == TGSI_SEMANTIC_PSIZE)
outSlot = VERTEX_POINT_SIZE_SLOT;
- STORE(val, vtxOutput, {0, 0, outSlot, channel});
+ WriteVS(val, pVsCtx, vtxOutput, outSlot, channel);
}
}
&swr_vs->info.base);
if (val < 4) {
LLVMValueRef dist = LLVMBuildLoad(gallivm->builder, outputs[cv][val], "");
- STORE(unwrap(dist), vtxOutput, {0, 0, VERTEX_CLIPCULL_DIST_LO_SLOT, val});
+ WriteVS(unwrap(dist), pVsCtx, vtxOutput, VERTEX_CLIPCULL_DIST_LO_SLOT, val);
} else {
LLVMValueRef dist = LLVMBuildLoad(gallivm->builder, outputs[cv][val - 4], "");
- STORE(unwrap(dist), vtxOutput, {0, 0, VERTEX_CLIPCULL_DIST_HI_SLOT, val - 4});
+ WriteVS(unwrap(dist), pVsCtx, vtxOutput, VERTEX_CLIPCULL_DIST_HI_SLOT, val - 4);
}
continue;
}
FMUL(unwrap(cw), VBROADCAST(pw)))));
if (val < 4)
- STORE(dist, vtxOutput, {0, 0, VERTEX_CLIPCULL_DIST_LO_SLOT, val});
+ WriteVS(dist, pVsCtx, vtxOutput, VERTEX_CLIPCULL_DIST_LO_SLOT, val);
else
- STORE(dist, vtxOutput, {0, 0, VERTEX_CLIPCULL_DIST_HI_SLOT, val - 4});
+ WriteVS(dist, pVsCtx, vtxOutput, VERTEX_CLIPCULL_DIST_HI_SLOT, val - 4);
}
}
AttrBuilder attrBuilder;
attrBuilder.addStackAlignmentAttr(JM()->mVWidth * sizeof(float));
- AttributeList attrSet = AttributeList::get(
- JM()->mContext, AttributeList::FunctionIndex, attrBuilder);
std::vector<Type *> fsArgs{PointerType::get(Gen_swr_draw_context(JM()), 0),
PointerType::get(Gen_SWR_PS_CONTEXT(JM()), 0)};
GlobalValue::ExternalLinkage,
"FS",
JM()->mpCurrentModule);
- pFunction->addAttributes(AttributeList::FunctionIndex, attrSet);
+#if HAVE_LLVM < 0x0500
+ AttributeSet attrSet = AttributeSet::get(
+ JM()->mContext, AttributeSet::FunctionIndex, attrBuilder);
+ pFunction->addAttributes(AttributeSet::FunctionIndex, attrSet);
+#else
+ pFunction->addAttributes(AttributeList::FunctionIndex, attrBuilder);
+#endif
BasicBlock *block = BasicBlock::Create(JM()->mContext, "entry", pFunction);
IRB()->SetInsertPoint(block);