pState->pfnBlendFunc[renderTarget] = pfnBlendFunc;
}
-void SwrSetLinkage(
- HANDLE hContext,
- uint32_t mask,
- const uint8_t* pMap)
-{
- API_STATE* pState = GetDrawState(GetContext(hContext));
-
- static const uint8_t IDENTITY_MAP[] =
- {
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
- 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
- };
- static_assert(sizeof(IDENTITY_MAP) == sizeof(pState->linkageMap),
- "Update for new value of MAX_ATTRIBUTES");
-
- pState->linkageMask = mask;
- pState->linkageCount = _mm_popcnt_u32(mask);
-
- if (!pMap)
- {
- pMap = IDENTITY_MAP;
- }
- memcpy(pState->linkageMap, pMap, pState->linkageCount);
-}
-
// update guardband multipliers for the viewport
void updateGuardband(API_STATE *pState)
{
(pState->state.depthStencilState.depthWriteEnable == FALSE) &&
(pState->state.depthStencilState.stencilTestEnable == FALSE) &&
(pState->state.depthStencilState.stencilWriteEnable == FALSE) &&
- (pState->state.linkageCount == 0))
+ (pState->state.backendState.numAttributes == 0))
{
pState->pfnProcessPrims = nullptr;
- pState->state.linkageMask = 0;
}
if (pState->state.soState.rasterizerDisable == true)
{
pState->pfnProcessPrims = nullptr;
- pState->state.linkageMask = 0;
}
- // set up the frontend attrib mask
- pState->state.feAttribMask = pState->state.linkageMask;
+ // set up the frontend attribute count
+ pState->state.feNumAttributes = 0;
+ const SWR_BACKEND_STATE& backendState = pState->state.backendState;
+ if (backendState.swizzleEnable)
+ {
+ // attribute swizzling is enabled, iterate over the map and record the max attribute used
+ for (uint32_t i = 0; i < backendState.numAttributes; ++i)
+ {
+ pState->state.feNumAttributes = std::max(pState->state.feNumAttributes, (uint32_t)backendState.swizzleMap[i].sourceAttrib + 1);
+ }
+ }
+ else
+ {
+ pState->state.feNumAttributes = pState->state.backendState.numAttributes;
+ }
+
if (pState->state.soState.soEnable)
{
+ uint32_t streamMasks = 0;
for (uint32_t i = 0; i < 4; ++i)
{
- pState->state.feAttribMask |= pState->state.soState.streamMasks[i];
+ streamMasks |= pState->state.soState.streamMasks[i];
+ }
+
+ DWORD maxAttrib;
+ if (_BitScanReverse(&maxAttrib, streamMasks))
+ {
+ pState->state.feNumAttributes = std::max(pState->state.feNumAttributes, (uint32_t)(maxAttrib + 1));
}
}
uint8_t* pBase = pInstanceBase + instance * instanceStride;
uint8_t* pCutBase = pCutBufferBase + instance * cutInstanceStride;
- DWORD numAttribs;
- if (_BitScanReverse(&numAttribs, state.feAttribMask))
- {
- numAttribs++;
- }
- else
- {
- numAttribs = 0;
- }
+ uint32_t numAttribs = state.feNumAttributes;
for (uint32_t stream = 0; stream < MAX_SO_STREAMS; ++stream)
{
return TemplateArgUnroller<FEDrawChooser>::GetFunc(IsIndexed, IsCutIndexEnabled, HasTessellation, HasGeometryShader, HasStreamOut, HasRasterization);
}
-
//////////////////////////////////////////////////////////////////////////
/// @brief Processes attributes for the backend based on linkage mask and
/// linkage map. Essentially just doing an SOA->AOS conversion and pack.
/// @param pLinkageMap - maps VS attribute slot to PS slot
/// @param triIndex - Triangle to process attributes for
/// @param pBuffer - Output result
-template<uint32_t NumVerts>
+template<typename NumVertsT, typename IsSwizzledT, typename HasConstantInterpT>
INLINE void ProcessAttributes(
DRAW_CONTEXT *pDC,
PA_STATE&pa,
- uint32_t linkageMask,
- const uint8_t* pLinkageMap,
uint32_t triIndex,
+ uint32_t primId,
float *pBuffer)
{
- DWORD slot = 0;
- uint32_t mapIdx = 0;
- LONG constantInterpMask = pDC->pState->state.backendState.constantInterpolationMask;
+ static_assert(NumVertsT::value > 0 && NumVertsT::value <= 3, "Invalid value for NumVertsT");
+ const SWR_BACKEND_STATE& backendState = pDC->pState->state.backendState;
+ LONG constantInterpMask = backendState.constantInterpolationMask;
const uint32_t provokingVertex = pDC->pState->state.frontendState.topologyProvokingVertex;
const PRIMITIVE_TOPOLOGY topo = pDC->pState->state.topology;
- while (_BitScanForward(&slot, linkageMask))
+ static const float constTable[3][4] = {
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ {0.0f, 0.0f, 0.0f, 1.0f},
+ {1.0f, 1.0f, 1.0f, 1.0f}
+ };
+
+ for (uint32_t i = 0; i < backendState.numAttributes; ++i)
{
- linkageMask &= ~(1 << slot); // done with this bit.
+ uint32_t inputSlot;
+ if (IsSwizzledT::value)
+ {
+ SWR_ATTRIB_SWIZZLE attribSwizzle = backendState.swizzleMap[i];
+ inputSlot = VERTEX_ATTRIB_START_SLOT + attribSwizzle.sourceAttrib;
- // compute absolute slot in vertex attrib array
- uint32_t inputSlot = VERTEX_ATTRIB_START_SLOT + pLinkageMap[mapIdx];
+ }
+ else
+ {
+ inputSlot = VERTEX_ATTRIB_START_SLOT + i;
+ }
__m128 attrib[3]; // triangle attribs (always 4 wide)
+ static const uint32_t numVerts = NumVertsT::value < 3 ? NumVertsT::value : 3;
+ float* pAttribStart = pBuffer;
- if (_bittest(&constantInterpMask, mapIdx))
+ if (HasConstantInterpT::value)
{
- uint32_t vid;
- static const uint32_t tristripProvokingVertex[] = {0, 2, 1};
- static const int32_t quadProvokingTri[2][4] = {{0, 0, 0, 1}, {0, -1, 0, 0}};
- static const uint32_t quadProvokingVertex[2][4] = {{0, 1, 2, 2}, {0, 1, 1, 2}};
- static const int32_t qstripProvokingTri[2][4] = {{0, 0, 0, 1}, {-1, 0, 0, 0}};
- static const uint32_t qstripProvokingVertex[2][4] = {{0, 1, 2, 1}, {0, 0, 2, 1}};
-
- switch (topo) {
- case TOP_QUAD_LIST:
- pa.AssembleSingle(inputSlot,
- triIndex + quadProvokingTri[triIndex & 1][provokingVertex],
- attrib);
- vid = quadProvokingVertex[triIndex & 1][provokingVertex];
- break;
- case TOP_QUAD_STRIP:
- pa.AssembleSingle(inputSlot,
- triIndex + qstripProvokingTri[triIndex & 1][provokingVertex],
- attrib);
- vid = qstripProvokingVertex[triIndex & 1][provokingVertex];
- break;
- case TOP_TRIANGLE_STRIP:
- pa.AssembleSingle(inputSlot, triIndex, attrib);
- vid = (triIndex & 1)
- ? tristripProvokingVertex[provokingVertex]
- : provokingVertex;
- break;
- default:
- pa.AssembleSingle(inputSlot, triIndex, attrib);
- vid = provokingVertex;
- break;
- }
+ if (_bittest(&constantInterpMask, i))
+ {
+ uint32_t vid;
+ uint32_t adjustedTriIndex;
+ static const uint32_t tristripProvokingVertex[] = { 0, 2, 1 };
+ static const int32_t quadProvokingTri[2][4] = { {0, 0, 0, 1}, {0, -1, 0, 0} };
+ static const uint32_t quadProvokingVertex[2][4] = { {0, 1, 2, 2}, {0, 1, 1, 2} };
+ static const int32_t qstripProvokingTri[2][4] = { {0, 0, 0, 1}, {-1, 0, 0, 0} };
+ static const uint32_t qstripProvokingVertex[2][4] = { {0, 1, 2, 1}, {0, 0, 2, 1} };
+
+ switch (topo) {
+ case TOP_QUAD_LIST:
+ adjustedTriIndex = triIndex + quadProvokingTri[triIndex & 1][provokingVertex];
+ vid = quadProvokingVertex[triIndex & 1][provokingVertex];
+ break;
+ case TOP_QUAD_STRIP:
+ adjustedTriIndex = triIndex + qstripProvokingTri[triIndex & 1][provokingVertex];
+ vid = qstripProvokingVertex[triIndex & 1][provokingVertex];
+ break;
+ case TOP_TRIANGLE_STRIP:
+ adjustedTriIndex = triIndex;
+ vid = (triIndex & 1)
+ ? tristripProvokingVertex[provokingVertex]
+ : provokingVertex;
+ break;
+ default:
+ adjustedTriIndex = triIndex;
+ vid = provokingVertex;
+ break;
+ }
+
+ pa.AssembleSingle(inputSlot, adjustedTriIndex, attrib);
- for (uint32_t i = 0; i < NumVerts; ++i)
+ for (uint32_t i = 0; i < numVerts; ++i)
+ {
+ _mm_store_ps(pBuffer, attrib[vid]);
+ pBuffer += 4;
+ }
+ }
+ else
{
- _mm_store_ps(pBuffer, attrib[vid]);
- pBuffer += 4;
+ pa.AssembleSingle(inputSlot, triIndex, attrib);
+
+ for (uint32_t i = 0; i < numVerts; ++i)
+ {
+ _mm_store_ps(pBuffer, attrib[i]);
+ pBuffer += 4;
+ }
}
}
else
{
pa.AssembleSingle(inputSlot, triIndex, attrib);
- for (uint32_t i = 0; i < NumVerts; ++i)
+ for (uint32_t i = 0; i < numVerts; ++i)
{
_mm_store_ps(pBuffer, attrib[i]);
pBuffer += 4;
// interpolation code in the pixel shader works correctly for the
// 3 topologies - point, line, tri. This effectively zeros out the
// effect of the missing vertices in the triangle interpolation.
- for (uint32_t i = NumVerts; i < 3; ++i)
+ for (uint32_t v = numVerts; v < 3; ++v)
{
- _mm_store_ps(pBuffer, attrib[NumVerts - 1]);
+ _mm_store_ps(pBuffer, attrib[numVerts - 1]);
pBuffer += 4;
}
- mapIdx++;
+ // check for constant source overrides
+ if (IsSwizzledT::value)
+ {
+ uint32_t mask = backendState.swizzleMap[i].componentOverrideMask;
+ if (mask)
+ {
+ DWORD comp;
+ while (_BitScanForward(&comp, mask))
+ {
+ mask &= ~(1 << comp);
+
+ float constantValue = 0.0f;
+ switch ((SWR_CONSTANT_SOURCE)backendState.swizzleMap[i].constantSource)
+ {
+ case SWR_CONSTANT_SOURCE_CONST_0000:
+ case SWR_CONSTANT_SOURCE_CONST_0001_FLOAT:
+ case SWR_CONSTANT_SOURCE_CONST_1111_FLOAT:
+ constantValue = constTable[backendState.swizzleMap[i].constantSource][comp];
+ break;
+ case SWR_CONSTANT_SOURCE_PRIM_ID:
+ constantValue = *(float*)&primId;
+ break;
+ }
+
+ // apply constant value to all 3 vertices
+ for (uint32_t v = 0; v < 3; ++v)
+ {
+ pAttribStart[comp + v * 4] = constantValue;
+ }
+ }
+ }
+ }
}
}
+
+typedef void(*PFN_PROCESS_ATTRIBUTES)(DRAW_CONTEXT*, PA_STATE&, uint32_t, uint32_t, float*);
+
+struct ProcessAttributesChooser
+{
+ typedef PFN_PROCESS_ATTRIBUTES FuncType;
+
+ template <typename... ArgsB>
+ static FuncType GetFunc()
+ {
+ return ProcessAttributes<ArgsB...>;
+ }
+};
+
+PFN_PROCESS_ATTRIBUTES GetProcessAttributesFunc(uint32_t NumVerts, bool IsSwizzled, bool HasConstantInterp)
+{
+ return TemplateArgUnroller<ProcessAttributesChooser>::GetFunc(NumVerts, IsSwizzled, HasConstantInterp);
+}
+
//////////////////////////////////////////////////////////////////////////
/// @brief Processes enabled user clip distances. Loads the active clip
/// distances from the PA, sets up barycentric equations, and
const SWR_GS_STATE& gsState = state.gsState;
MacroTileMgr *pTileMgr = pDC->pTileMgr;
+ // Select attribute processor
+ PFN_PROCESS_ATTRIBUTES pfnProcessAttribs = GetProcessAttributesFunc(3,
+ state.backendState.swizzleEnable, state.backendState.constantInterpolationMask);
+
simdscalar vRecipW0 = _simd_set1_ps(1.0f);
simdscalar vRecipW1 = _simd_set1_ps(1.0f);
// scan remaining valid triangles and bin each separately
while (_BitScanForward(&triIndex, triMask))
{
- uint32_t linkageCount = state.linkageCount;
- uint32_t linkageMask = state.linkageMask;
+ uint32_t linkageCount = state.backendState.numAttributes;
uint32_t numScalarAttribs = linkageCount * 4;
BE_WORK work;
float *pAttribs = (float*)pArena->AllocAligned(numScalarAttribs * 3 * sizeof(float), 16);
desc.pAttribs = pAttribs;
desc.numAttribs = linkageCount;
- ProcessAttributes<3>(pDC, pa, linkageMask, state.linkageMap, triIndex, desc.pAttribs);
+ pfnProcessAttribs(pDC, pa, triIndex, pPrimID[triIndex], desc.pAttribs);
// store triangle vertex data
desc.pTriBuffer = (float*)pArena->AllocAligned(4 * 4 * sizeof(float), 16);
const SWR_GS_STATE& gsState = state.gsState;
const SWR_RASTSTATE& rastState = state.rastState;
+ // Select attribute processor
+ PFN_PROCESS_ATTRIBUTES pfnProcessAttribs = GetProcessAttributesFunc(1,
+ state.backendState.swizzleEnable, state.backendState.constantInterpolationMask);
+
if (!feState.vpTransformDisable)
{
// perspective divide
uint32_t *pPrimID = (uint32_t *)&primID;
DWORD primIndex = 0;
+
+ const SWR_BACKEND_STATE& backendState = pDC->pState->state.backendState;
+
// scan remaining valid triangles and bin each separately
while (_BitScanForward(&primIndex, primMask))
{
- uint32_t linkageCount = state.linkageCount;
- uint32_t linkageMask = state.linkageMask;
-
+ uint32_t linkageCount = backendState.numAttributes;
uint32_t numScalarAttribs = linkageCount * 4;
BE_WORK work;
desc.pAttribs = pAttribs;
desc.numAttribs = linkageCount;
- ProcessAttributes<1>(pDC, pa, linkageMask, state.linkageMap, primIndex, pAttribs);
+ pfnProcessAttribs(pDC, pa, primIndex, pPrimID[primIndex], pAttribs);
// store raster tile aligned x, y, perspective correct z
float *pTriBuffer = (float*)pArena->AllocAligned(4 * sizeof(float), 16);
_simd_store_ps((float*)aPrimVertsZ, primVerts.z);
// scan remaining valid prims and bin each separately
+ const SWR_BACKEND_STATE& backendState = state.backendState;
DWORD primIndex;
while (_BitScanForward(&primIndex, primMask))
{
- uint32_t linkageCount = state.linkageCount;
- uint32_t linkageMask = state.linkageMask;
+ uint32_t linkageCount = backendState.numAttributes;
uint32_t numScalarAttribs = linkageCount * 4;
BE_WORK work;
// store active attribs
desc.pAttribs = (float*)pArena->AllocAligned(numScalarAttribs * 3 * sizeof(float), 16);
desc.numAttribs = linkageCount;
- ProcessAttributes<1>(pDC, pa, linkageMask, state.linkageMap, primIndex, desc.pAttribs);
+ pfnProcessAttribs(pDC, pa, primIndex, pPrimID[primIndex], desc.pAttribs);
// store point vertex data
float *pTriBuffer = (float*)pArena->AllocAligned(4 * sizeof(float), 16);
const SWR_FRONTEND_STATE& feState = state.frontendState;
const SWR_GS_STATE& gsState = state.gsState;
+ // Select attribute processor
+ PFN_PROCESS_ATTRIBUTES pfnProcessAttribs = GetProcessAttributesFunc(2,
+ state.backendState.swizzleEnable, state.backendState.constantInterpolationMask);
+
simdscalar vRecipW0 = _simd_set1_ps(1.0f);
simdscalar vRecipW1 = _simd_set1_ps(1.0f);
DWORD primIndex;
while (_BitScanForward(&primIndex, primMask))
{
- uint32_t linkageCount = state.linkageCount;
- uint32_t linkageMask = state.linkageMask;
+ uint32_t linkageCount = state.backendState.numAttributes;
uint32_t numScalarAttribs = linkageCount * 4;
BE_WORK work;
// store active attribs
desc.pAttribs = (float*)pArena->AllocAligned(numScalarAttribs * 3 * sizeof(float), 16);
desc.numAttribs = linkageCount;
- ProcessAttributes<2>(pDC, pa, linkageMask, state.linkageMap, primIndex, desc.pAttribs);
+ pfnProcessAttribs(pDC, pa, primIndex, pPrimID[primIndex], desc.pAttribs);
// store line vertex data
desc.pTriBuffer = (float*)pArena->AllocAligned(4 * 4 * sizeof(float), 16);