#include "common/simdintrin.h"
#include "core/threads.h"
#include "ringbuffer.h"
+#include "archrast/archrast.h"
// x.8 fixed point precision values
#define FIXED_POINT_SHIFT 8
float pointSize;
uint32_t primID;
uint32_t renderTargetArrayIndex;
+ uint32_t viewportIndex;
};
//////////////////////////////////////////////////////////////////////////
struct CLEAR_DESC
{
+ SWR_RECT rect;
CLEAR_FLAGS flags;
float clearRTColor[4]; // RGBA_32F
float clearDepth; // [0..1]
struct STORE_TILES_DESC
{
- SWR_RENDERTARGET_ATTACHMENT attachment;
+ uint32_t attachmentMask;
SWR_TILE_STATE postStoreTileState;
+ SWR_RECT rect;
};
struct COMPUTE_DESC
CLEAR,
DISCARDINVALIDATETILES,
STORETILES,
+ SHUTDOWN,
};
-struct BE_WORK
+OSALIGNSIMD(struct) BE_WORK
{
WORK_TYPE type;
PFN_WORK_FUNC pfnWork;
} desc;
};
-struct GUARDBAND
+struct GUARDBANDS
{
- float left, right, top, bottom;
+ float left[KNOB_NUM_VIEWPORTS_SCISSORS];
+ float right[KNOB_NUM_VIEWPORTS_SCISSORS];
+ float top[KNOB_NUM_VIEWPORTS_SCISSORS];
+ float bottom[KNOB_NUM_VIEWPORTS_SCISSORS];
};
struct PA_STATE;
// function signature for pipeline stages that execute after primitive assembly
typedef void(*PFN_PROCESS_PRIMS)(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[],
- uint32_t primMask, simdscalari primID);
+ uint32_t primMask, simdscalari primID, simdscalari viewportIdx);
OSALIGNLINE(struct) API_STATE
{
// floating point multisample offsets
float samplePos[SWR_MAX_NUM_MULTISAMPLES * 2];
- GUARDBAND gbState;
+ GUARDBANDS gbState;
SWR_VIEWPORT vp[KNOB_NUM_VIEWPORTS_SCISSORS];
SWR_VIEWPORT_MATRICES vpMatrices;
- BBOX scissorRects[KNOB_NUM_VIEWPORTS_SCISSORS];
- BBOX scissorInFixedPoint;
+ SWR_RECT scissorRects[KNOB_NUM_VIEWPORTS_SCISSORS];
+ SWR_RECT scissorsInFixedPoint[KNOB_NUM_VIEWPORTS_SCISSORS];
+ bool scissorsTileAligned;
// Backend state
SWR_BACKEND_STATE backendState;
+ SWR_DEPTH_BOUNDS_STATE depthBoundsState;
+
// PS - Pixel shader state
SWR_PS_STATE psState;
struct DRAW_DYNAMIC_STATE
{
+ void Reset(uint32_t numThreads)
+ {
+ SWR_STATS* pSavePtr = pStats;
+ memset(this, 0, sizeof(*this));
+ pStats = pSavePtr;
+ memset(pStats, 0, sizeof(SWR_STATS) * numThreads);
+ }
///@todo Currently assumes only a single FE can do stream output for a draw.
uint32_t SoWriteOffset[4];
bool SoWriteOffsetDirty[4];
- SWR_STATS stats[KNOB_MAX_NUM_THREADS];
+ SWR_STATS_FE statsFE; // Only one FE thread per DC.
+ SWR_STATS* pStats;
};
// Draw Context
CachingArena* pArena;
uint32_t drawId;
- bool dependent;
+ bool dependentFE; // Frontend work is dependent on all previous FE
+ bool dependent; // Backend work is dependent on all previous BE
bool isCompute; // Is this DC a compute context?
bool cleanupState; // True if this is the last draw using an entry in the state ring.
volatile bool doneFE; // Is FE work done for this draw?
HotTileMgr *pHotTileMgr;
// Callback functions, passed in at create context time
- PFN_LOAD_TILE pfnLoadTile;
- PFN_STORE_TILE pfnStoreTile;
- PFN_CLEAR_TILE pfnClearTile;
- PFN_UPDATE_SO_WRITE_OFFSET pfnUpdateSoWriteOffset;
- PFN_UPDATE_STATS pfnUpdateStats;
+ PFN_LOAD_TILE pfnLoadTile;
+ PFN_STORE_TILE pfnStoreTile;
+ PFN_CLEAR_TILE pfnClearTile;
+ PFN_UPDATE_SO_WRITE_OFFSET pfnUpdateSoWriteOffset;
+ PFN_UPDATE_STATS pfnUpdateStats;
+ PFN_UPDATE_STATS_FE pfnUpdateStatsFE;
// Global Stats
- SWR_STATS stats[KNOB_MAX_NUM_THREADS];
+ SWR_STATS* pStats;
// Scratch space for workers.
- uint8_t* pScratch[KNOB_MAX_NUM_THREADS];
+ uint8_t** ppScratch;
volatile int32_t drawsOutstandingFE;
CachingAllocator cachingArenaAllocator;
uint32_t frameCount;
-};
-void WaitForDependencies(SWR_CONTEXT *pContext, uint64_t drawId);
-void WakeAllThreads(SWR_CONTEXT *pContext);
+ uint32_t lastFrameChecked;
+ uint64_t lastDrawChecked;
+ TileSet singleThreadLockedTiles;
+
+ // ArchRast thread contexts.
+ HANDLE* pArContext;
+};
-#define UPDATE_STAT(name, count) if (GetApiState(pDC).enableStats) { pDC->dynState.stats[workerId].name += count; }
+#define UPDATE_STAT(name, count) if (GetApiState(pDC).enableStats) { pDC->dynState.pStats[workerId].name += count; }
+#define UPDATE_STAT_FE(name, count) if (GetApiState(pDC).enableStats) { pDC->dynState.statsFE.name += count; }
+
+// ArchRast instrumentation framework
+#define AR_WORKER_CTX pContext->pArContext[workerId]
+#define AR_API_CTX pContext->pArContext[pContext->NumWorkerThreads]
+
+#ifdef KNOB_ENABLE_AR
+ #define _AR_BEGIN(ctx, type, id) ArchRast::dispatch(ctx, ArchRast::Start(ArchRast::type, id))
+ #define _AR_END(ctx, type, count) ArchRast::dispatch(ctx, ArchRast::End(ArchRast::type, count))
+ #define _AR_EVENT(ctx, event) ArchRast::dispatch(ctx, ArchRast::event)
+#else
+ #ifdef KNOB_ENABLE_RDTSC
+ #define _AR_BEGIN(ctx, type, id) (void)ctx; RDTSC_START(type)
+ #define _AR_END(ctx, type, id) RDTSC_STOP(type, id, 0)
+ #else
+ #define _AR_BEGIN(ctx, type, id) (void)ctx
+ #define _AR_END(ctx, type, id)
+ #endif
+ #define _AR_EVENT(ctx, event)
+#endif
+
+// Use these macros for api thread.
+#define AR_API_BEGIN(type, id) _AR_BEGIN(AR_API_CTX, type, id)
+#define AR_API_END(type, count) _AR_END(AR_API_CTX, type, count)
+#define AR_API_EVENT(event) _AR_EVENT(AR_API_CTX, event)
+
+// Use these macros for worker threads.
+#define AR_BEGIN(type, id) _AR_BEGIN(AR_WORKER_CTX, type, id)
+#define AR_END(type, count) _AR_END(AR_WORKER_CTX, type, count)
+#define AR_EVENT(event) _AR_EVENT(AR_WORKER_CTX, event)