#include "common/simdintrin.h"
#include "core/threads.h"
#include "ringbuffer.h"
+#include "archrast/archrast.h"
// x.8 fixed point precision values
#define FIXED_POINT_SHIFT 8
TRI_FLAGS triFlags;
};
-union CLEAR_FLAGS
-{
- struct
- {
- uint32_t mask : 3;
- };
- uint32_t bits;
-};
-
struct CLEAR_DESC
{
SWR_RECT rect;
- CLEAR_FLAGS flags;
+ uint32_t attachmentMask;
float clearRTColor[4]; // RGBA_32F
float clearDepth; // [0..1]
uint8_t clearStencil;
struct STORE_TILES_DESC
{
- SWR_RENDERTARGET_ATTACHMENT attachment;
+ uint32_t attachmentMask;
SWR_TILE_STATE postStoreTileState;
SWR_RECT rect;
};
CLEAR,
DISCARDINVALIDATETILES,
STORETILES,
+ SHUTDOWN,
};
-struct BE_WORK
+OSALIGNSIMD(struct) BE_WORK
{
WORK_TYPE type;
PFN_WORK_FUNC pfnWork;
} desc;
};
-struct GUARDBAND
+struct GUARDBANDS
{
- float left, right, top, bottom;
+ float left[KNOB_NUM_VIEWPORTS_SCISSORS];
+ float right[KNOB_NUM_VIEWPORTS_SCISSORS];
+ float top[KNOB_NUM_VIEWPORTS_SCISSORS];
+ float bottom[KNOB_NUM_VIEWPORTS_SCISSORS];
};
struct PA_STATE;
// floating point multisample offsets
float samplePos[SWR_MAX_NUM_MULTISAMPLES * 2];
- GUARDBAND gbState;
+ GUARDBANDS gbState;
SWR_VIEWPORT vp[KNOB_NUM_VIEWPORTS_SCISSORS];
SWR_VIEWPORT_MATRICES vpMatrices;
// Backend state
SWR_BACKEND_STATE backendState;
+ SWR_DEPTH_BOUNDS_STATE depthBoundsState;
+
// PS - Pixel shader state
SWR_PS_STATE psState;
SWR_BLEND_STATE blendState;
PFN_BLEND_JIT_FUNC pfnBlendFunc[SWR_NUM_RENDERTARGETS];
- // Stats are incremented when this is true.
- bool enableStats;
-
struct
{
- uint32_t colorHottileEnable : 8;
- uint32_t depthHottileEnable: 1;
- uint32_t stencilHottileEnable : 1;
+ uint32_t enableStatsFE : 1; // Enable frontend pipeline stats
+ uint32_t enableStatsBE : 1; // Enable backend pipeline stats
+ uint32_t colorHottileEnable : 8; // Bitmask of enabled color hottiles
+ uint32_t depthHottileEnable: 1; // Enable depth buffer hottile
+ uint32_t stencilHottileEnable : 1; // Enable stencil buffer hottile
};
PFN_QUANTIZE_DEPTH pfnQuantizeDepth;
struct DRAW_DYNAMIC_STATE
{
+ void Reset(uint32_t numThreads)
+ {
+ SWR_STATS* pSavePtr = pStats;
+ memset(this, 0, sizeof(*this));
+ pStats = pSavePtr;
+ memset(pStats, 0, sizeof(SWR_STATS) * numThreads);
+ }
///@todo Currently assumes only a single FE can do stream output for a draw.
uint32_t SoWriteOffset[4];
bool SoWriteOffsetDirty[4];
SWR_STATS_FE statsFE; // Only one FE thread per DC.
- SWR_STATS stats[KNOB_MAX_NUM_THREADS];
+ SWR_STATS* pStats;
};
// Draw Context
CachingArena* pArena;
uint32_t drawId;
- bool dependent;
+ bool dependentFE; // Frontend work is dependent on all previous FE
+ bool dependent; // Backend work is dependent on all previous BE
bool isCompute; // Is this DC a compute context?
bool cleanupState; // True if this is the last draw using an entry in the state ring.
volatile bool doneFE; // Is FE work done for this draw?
volatile int32_t threadsDone;
SYNC_DESC retireCallback; // Call this func when this DC is retired.
+
+
};
static_assert((sizeof(DRAW_CONTEXT) & 63) == 0, "Invalid size for DRAW_CONTEXT");
std::condition_variable FifosNotEmpty;
std::mutex WaitLock;
- DRIVER_TYPE driverType;
-
uint32_t privateStateSize;
HotTileMgr *pHotTileMgr;
PFN_UPDATE_STATS pfnUpdateStats;
PFN_UPDATE_STATS_FE pfnUpdateStatsFE;
+
// Global Stats
- SWR_STATS stats[KNOB_MAX_NUM_THREADS];
+ SWR_STATS* pStats;
// Scratch space for workers.
- uint8_t* pScratch[KNOB_MAX_NUM_THREADS];
+ uint8_t** ppScratch;
volatile int32_t drawsOutstandingFE;
uint32_t lastFrameChecked;
uint64_t lastDrawChecked;
TileSet singleThreadLockedTiles;
+
+ // ArchRast thread contexts.
+ HANDLE* pArContext;
};
-#define UPDATE_STAT(name, count) if (GetApiState(pDC).enableStats) { pDC->dynState.stats[workerId].name += count; }
-#define UPDATE_STAT_FE(name, count) if (GetApiState(pDC).enableStats) { pDC->dynState.statsFE.name += count; }
+#define UPDATE_STAT_BE(name, count) if (GetApiState(pDC).enableStatsBE) { pDC->dynState.pStats[workerId].name += count; }
+#define UPDATE_STAT_FE(name, count) if (GetApiState(pDC).enableStatsFE) { pDC->dynState.statsFE.name += count; }
+
+// ArchRast instrumentation framework
+#define AR_WORKER_CTX pContext->pArContext[workerId]
+#define AR_API_CTX pContext->pArContext[pContext->NumWorkerThreads]
+
+#ifdef KNOB_ENABLE_AR
+ #define _AR_BEGIN(ctx, type, id) ArchRast::Dispatch(ctx, ArchRast::Start(ArchRast::type, id))
+ #define _AR_END(ctx, type, count) ArchRast::Dispatch(ctx, ArchRast::End(ArchRast::type, count))
+ #define _AR_EVENT(ctx, event) ArchRast::Dispatch(ctx, ArchRast::event)
+#else
+ #ifdef KNOB_ENABLE_RDTSC
+ #define _AR_BEGIN(ctx, type, id) (void)ctx; RDTSC_START(type)
+ #define _AR_END(ctx, type, id) RDTSC_STOP(type, id, 0)
+ #else
+ #define _AR_BEGIN(ctx, type, id) (void)ctx
+ #define _AR_END(ctx, type, id)
+ #endif
+ #define _AR_EVENT(ctx, event)
+#endif
+
+// Use these macros for api thread.
+#define AR_API_BEGIN(type, id) _AR_BEGIN(AR_API_CTX, type, id)
+#define AR_API_END(type, count) _AR_END(AR_API_CTX, type, count)
+#define AR_API_EVENT(event) _AR_EVENT(AR_API_CTX, event)
+
+// Use these macros for worker threads.
+#define AR_BEGIN(type, id) _AR_BEGIN(AR_WORKER_CTX, type, id)
+#define AR_END(type, count) _AR_END(AR_WORKER_CTX, type, count)
+#define AR_EVENT(event) _AR_EVENT(AR_WORKER_CTX, event)