uint32_t indexSize = 0;
uint32_t endVertex = work.numVerts;
- const int32_t* pLastRequestedIndex = nullptr;
+ gfxptr_t xpLastRequestedIndex = 0;
if (IsIndexedT::value)
{
switch (work.type)
{
case R32_UINT:
indexSize = sizeof(uint32_t);
- pLastRequestedIndex = &(work.pIB[endVertex]);
break;
case R16_UINT:
indexSize = sizeof(uint16_t);
- // nasty address offset to last index
- pLastRequestedIndex = (int32_t*)(&(((uint16_t*)work.pIB)[endVertex]));
break;
case R8_UINT:
indexSize = sizeof(uint8_t);
- // nasty address offset to last index
- pLastRequestedIndex = (int32_t*)(&(((uint8_t*)work.pIB)[endVertex]));
break;
default:
SWR_INVALID("Invalid work.type: %d", work.type);
}
+ xpLastRequestedIndex = work.xpIB + endVertex * indexSize;
}
else
{
// if the entire index buffer isn't being consumed, set the last index
// so that fetches < a SIMD wide will be masked off
- fetchInfo_lo.pLastIndex = (const int32_t*)(((uint8_t*)state.indexBuffer.pIndices) + state.indexBuffer.size);
- if (pLastRequestedIndex < fetchInfo_lo.pLastIndex)
+ fetchInfo_lo.xpLastIndex = state.indexBuffer.xpIndices + state.indexBuffer.size;
+ if (xpLastRequestedIndex < fetchInfo_lo.xpLastIndex)
{
- fetchInfo_lo.pLastIndex = pLastRequestedIndex;
+ fetchInfo_lo.xpLastIndex = xpLastRequestedIndex;
}
}
else
if (IsIndexedT::value)
{
- fetchInfo_lo.pIndices = work.pIB;
- fetchInfo_hi.pIndices = (int32_t *)((uint8_t *)fetchInfo_lo.pIndices + KNOB_SIMD_WIDTH * indexSize); // 1/2 of KNOB_SIMD16_WIDTH
+ fetchInfo_lo.xpIndices = work.xpIB;
+ fetchInfo_hi.xpIndices = fetchInfo_lo.xpIndices + KNOB_SIMD_WIDTH * indexSize; // 1/2 of KNOB_SIMD16_WIDTH
}
else
{
vIndex = _simd16_add_epi32(_simd16_set1_epi32(work.startVertexID), vScale);
- fetchInfo_lo.pIndices = (const int32_t *)&vIndex;
- fetchInfo_hi.pIndices = (const int32_t *)&vIndex + KNOB_SIMD_WIDTH; // 1/2 of KNOB_SIMD16_WIDTH
+ fetchInfo_lo.xpIndices = (gfxptr_t)&vIndex;
+ fetchInfo_hi.xpIndices = (gfxptr_t)&vIndex + KNOB_SIMD_WIDTH * sizeof(int32_t); // 1/2 of KNOB_SIMD16_WIDTH
}
fetchInfo_lo.CurInstance = instanceNum;
{
if (!IsIndexedT::value)
{
- fetchInfo_lo.pLastIndex = fetchInfo_lo.pIndices;
+ fetchInfo_lo.xpLastIndex = fetchInfo_lo.xpIndices;
uint32_t offset;
offset = std::min(endVertex-i, (uint32_t) KNOB_SIMD16_WIDTH);
#if USE_SIMD16_SHADERS
offset *= 4; // convert from index to address
- fetchInfo_lo.pLastIndex += offset;
+ fetchInfo_lo.xpLastIndex += offset;
#else
- fetchInfo_lo.pLastIndex += std::min(offset, (uint32_t) KNOB_SIMD_WIDTH) * 4; // * 4 for converting index to address
+ fetchInfo_lo.xpLastIndex += std::min(offset, (uint32_t) KNOB_SIMD_WIDTH) * 4; // * 4 for converting index to address
uint32_t offset2 = std::min(offset, (uint32_t) KNOB_SIMD16_WIDTH)-KNOB_SIMD_WIDTH;
assert(offset >= 0);
- fetchInfo_hi.pLastIndex = fetchInfo_hi.pIndices;
- fetchInfo_hi.pLastIndex += offset2 * 4; // * 4 for converting index to address
+ fetchInfo_hi.xpLastIndex = fetchInfo_hi.xpIndices;
+ fetchInfo_hi.xpLastIndex += offset2 * 4; // * 4 for converting index to address
#endif
}
// 1. Execute FS/VS for a single SIMD.
if (IsIndexedT::value)
{
- fetchInfo_lo.pIndices = (int32_t *)((uint8_t*)fetchInfo_lo.pIndices + KNOB_SIMD16_WIDTH * indexSize);
- fetchInfo_hi.pIndices = (int32_t *)((uint8_t*)fetchInfo_hi.pIndices + KNOB_SIMD16_WIDTH * indexSize);
+ fetchInfo_lo.xpIndices = fetchInfo_lo.xpIndices + KNOB_SIMD16_WIDTH * indexSize;
+ fetchInfo_hi.xpIndices = fetchInfo_hi.xpIndices + KNOB_SIMD16_WIDTH * indexSize;
}
else
{
// if the entire index buffer isn't being consumed, set the last index
// so that fetches < a SIMD wide will be masked off
fetchInfo.pLastIndex = (const int32_t*)(((uint8_t*)state.indexBuffer.pIndices) + state.indexBuffer.size);
- if (pLastRequestedIndex < fetchInfo.pLastIndex)
+ if (xpLastRequestedIndex < fetchInfo.pLastIndex)
{
- fetchInfo.pLastIndex = pLastRequestedIndex;
+ fetchInfo.pLastIndex = xpLastRequestedIndex;
}
}
else