uint32_t threadGroupCountY,
uint32_t threadGroupCountZ);
+/// @note this enum needs to be kept in sync with HOTTILE_STATE!
enum SWR_TILE_STATE
{
SWR_TILE_INVALID = 0, // tile is in unitialized state and should be loaded with surface contents
pHotTile->pBuffer);
}
- if (pHotTile->state == HOTTILE_DIRTY || pHotTile->state == HOTTILE_RESOLVED)
+ if (pHotTile->state == HOTTILE_DIRTY || pHotTile->state == HOTTILE_RESOLVED)
{
if (!(pDesc->postStoreTileState == (SWR_TILE_STATE)HOTTILE_DIRTY &&
pHotTile->state == HOTTILE_RESOLVED))
******************************************************************************/
#pragma once
+#include "tilemgr.h"
+
void InitBackendSingleFuncTable(PFN_BACKEND_FUNC (&table)[SWR_INPUT_COVERAGE_COUNT][2][2]);
void InitBackendSampleFuncTable(
PFN_BACKEND_FUNC (&table)[SWR_MULTISAMPLE_TYPE_COUNT][SWR_INPUT_COVERAGE_COUNT][2][2]);
}
}
+INLINE void SetRenderHotTilesDirty(DRAW_CONTEXT* pDC, RenderOutputBuffers& renderBuffers)
+{
+ const API_STATE& state = GetApiState(pDC);
+
+ unsigned long rtSlot = 0;
+ uint32_t colorHottileEnableMask = state.colorHottileEnable;
+ while (_BitScanForward(&rtSlot, colorHottileEnableMask))
+ {
+ colorHottileEnableMask &= ~(1 << rtSlot);
+ renderBuffers.pColorHotTile[rtSlot]->state = HOTTILE_DIRTY;
+ }
+}
+
template <typename T>
void SetupPixelShaderContext(SWR_PS_CONTEXT* psContext,
const SWR_MULTISAMPLE_POS& samplePos,
state.colorHottileEnable,
renderBuffers);
+ bool isTileDirty = false;
+
RDTSC_END(pDC->pContext->pBucketMgr, BESetup, 0);
PixelRateZTestLoop<T> PixelRateZTest(pDC,
goto Endtile;
};
+ isTileDirty = true;
+
// late-Z
if (!T::bCanEarlyZ && !T::bForcedSampleCount)
{
psContext.vY.center = _simd_add_ps(psContext.vY.center, dy);
}
+ if (isTileDirty)
+ {
+ SetRenderHotTilesDirty(pDC, renderBuffers);
+ }
+
RDTSC_END(pDC->pContext->pBucketMgr, BEPixelRateBackend, 0);
}
state.colorHottileEnable,
renderBuffers);
+ bool isTileDirty = false;
+
RDTSC_END(pDC->pContext->pBucketMgr, BESetup, 0);
psContext.vY.UL = _simd_add_ps(vULOffsetsY, _simd_set1_ps(static_cast<float>(y)));
vCoverageMask = _simd_castsi_ps(psContext.activeMask);
+ if (_simd_movemask_ps(vCoverageMask))
+ {
+ isTileDirty = true;
+ }
+
// late-Z
if (!T::bCanEarlyZ)
{
psContext.vY.center = _simd_add_ps(psContext.vY.center, dy);
}
+ if (isTileDirty)
+ {
+ SetRenderHotTilesDirty(pDC, renderBuffers);
+ }
+
RDTSC_END(pDC->pContext->pBucketMgr, BESampleRateBackend, 0);
}
state.colorHottileEnable,
renderBuffers);
+ // Indicates backend rendered something to the color buffer
+ bool isTileDirty = false;
+
RDTSC_END(pDC->pContext->pBucketMgr, BESetup, 1);
psContext.vY.UL = _simd_add_ps(vULOffsetsY, _simd_set1_ps(static_cast<float>(y)));
vCoverageMask = _simd_castsi_ps(psContext.activeMask);
+ if (_simd_movemask_ps(vCoverageMask))
+ {
+ isTileDirty = true;
+ }
+
// late-Z
if (!T::bCanEarlyZ)
{
psContext.vY.center = _simd_add_ps(psContext.vY.center, dy);
}
+ if (isTileDirty)
+ {
+ SetRenderHotTilesDirty(pDC, renderBuffers);
+ }
+
RDTSC_END(pDC->pContext->pBucketMgr, BESingleSampleBackend, 0);
}
class MacroTileMgr;
class DispatchQueue;
+class HOTTILE;
struct RenderOutputBuffers
{
uint8_t* pColor[SWR_NUM_RENDERTARGETS];
uint8_t* pDepth;
uint8_t* pStencil;
+
+ HOTTILE* pColorHotTile[SWR_NUM_RENDERTARGETS];
+ HOTTILE* pDepthHotTile;
+ HOTTILE* pStencilHotTile;
};
// Plane equation A/B/C coeffs used to evaluate I/J barycentric coords
true,
numSamples,
renderTargetArrayIndex);
- pColor->state = HOTTILE_DIRTY;
renderBuffers.pColor[rtSlot] = pColor->pBuffer + offset;
+ renderBuffers.pColorHotTile[rtSlot] = pColor;
colorHottileEnableMask &= ~(1 << rtSlot);
}
pDepth->state = HOTTILE_DIRTY;
SWR_ASSERT(pDepth->pBuffer != nullptr);
renderBuffers.pDepth = pDepth->pBuffer + offset;
+ renderBuffers.pDepthHotTile = pDepth;
}
if (state.stencilHottileEnable)
{
pStencil->state = HOTTILE_DIRTY;
SWR_ASSERT(pStencil->pBuffer != nullptr);
renderBuffers.pStencil = pStencil->pBuffer + offset;
+ renderBuffers.pStencilHotTile = pStencil;
}
}
hotTile.pBuffer);
hotTile.renderTargetArrayIndex = renderTargetArrayIndex;
- hotTile.state = HOTTILE_DIRTY;
+ hotTile.state = HOTTILE_RESOLVED;
}
}
return &tile.Attachment[attachment];
y,
pHotTile->renderTargetArrayIndex,
pHotTile->pBuffer);
- pHotTile->state = HOTTILE_DIRTY;
+ pHotTile->state = HOTTILE_RESOLVED;
RDTSC_END(pContext->pBucketMgr, BELoadTiles, 0);
}
else if (pHotTile->state == HOTTILE_CLEAR)
OSALIGNLINE(volatile long) mTasksOutstanding{0};
};
+/// @note this enum needs to be kept in sync with SWR_TILE_STATE!
enum HOTTILE_STATE
{
HOTTILE_INVALID, // tile is in unitialized state and should be loaded with surface contents
// before rendering
HOTTILE_CLEAR, // tile should be cleared
HOTTILE_DIRTY, // tile has been rendered to
- HOTTILE_RESOLVED, // tile has been stored to memory
+ HOTTILE_RESOLVED, // tile is consistent with memory (either loaded or stored)
};
struct HOTTILE
mInt8Ty = Type::getInt8Ty(pJitMgr->mContext);
mInt16Ty = Type::getInt16Ty(pJitMgr->mContext);
mInt32Ty = Type::getInt32Ty(pJitMgr->mContext);
+ mInt64Ty = Type::getInt64Ty(pJitMgr->mContext);
mInt8PtrTy = PointerType::get(mInt8Ty, 0);
mInt16PtrTy = PointerType::get(mInt16Ty, 0);
mInt32PtrTy = PointerType::get(mInt32Ty, 0);
- mInt64Ty = Type::getInt64Ty(pJitMgr->mContext);
+ mInt64PtrTy = PointerType::get(mInt64Ty, 0);
mSimd4FP64Ty = VectorType::get(mDoubleTy, 4);
Type* mInt8PtrTy;
Type* mInt16PtrTy;
Type* mInt32PtrTy;
+ Type* mInt64PtrTy;
Type* mSimd4FP64Ty;
Value* pDst, Value* vSrc, Value* vOffsets, Value* vMask, MEM_CLIENT usage)
{
AssertMemoryUsageParams(pDst, usage);
-
+// if (vSrc->getType() != mSimdFP32Ty)
+// {
+// vSrc = BITCAST(vSrc, mSimdFP32Ty);
+// }
SWR_ASSERT(vSrc->getType()->getVectorElementType()->isFloatTy());
VSCATTERPS(pDst, vMask, vOffsets, vSrc, C(1));
return;
GFX_MEM_CLIENT_FETCH,
GFX_MEM_CLIENT_SAMPLER,
GFX_MEM_CLIENT_SHADER,
- GFX_MEM_CLIENT_STREAMOUT
+ GFX_MEM_CLIENT_STREAMOUT,
+ GFX_MEM_CLIENT_URB
};
protected: