X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fswr%2Frasterizer%2Fcore%2Fapi.cpp;h=6bdb8f4b41041214a8fd55d0951e42b80101b6f5;hb=175052507ccc8569802ae2ee0b0b094c03698ff7;hp=21b9e3f8c7d98747db5c5dee07698f87258d35de;hpb=57b07498d239745c13d4b8db0e9bf32516a770c0;p=mesa.git diff --git a/src/gallium/drivers/swr/rasterizer/core/api.cpp b/src/gallium/drivers/swr/rasterizer/core/api.cpp index 21b9e3f8c7d..6bdb8f4b410 100644 --- a/src/gallium/drivers/swr/rasterizer/core/api.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/api.cpp @@ -1,5 +1,5 @@ /**************************************************************************** -* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved. +* Copyright (C) 2014-2016 Intel Corporation. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -46,6 +46,10 @@ #include "common/simdintrin.h" #include "common/os.h" +#include "archrast/archrast.h" + +static const SWR_RECT g_MaxScissorRect = { 0, 0, KNOB_MAX_SCISSOR_X, KNOB_MAX_SCISSOR_Y }; + void SetupDefaultState(SWR_CONTEXT *pContext); static INLINE SWR_CONTEXT* GetContext(HANDLE hContext) @@ -84,25 +88,31 @@ HANDLE SwrCreateContext( pContext->dsRing[dc].pArena = new CachingArena(pContext->cachingArenaAllocator); } - if (!KNOB_SINGLE_THREADED) - { - memset(&pContext->WaitLock, 0, sizeof(pContext->WaitLock)); - memset(&pContext->FifosNotEmpty, 0, sizeof(pContext->FifosNotEmpty)); - new (&pContext->WaitLock) std::mutex(); - new (&pContext->FifosNotEmpty) std::condition_variable(); - - CreateThreadPool(pContext, &pContext->threadPool); - } + pContext->threadInfo.MAX_WORKER_THREADS = KNOB_MAX_WORKER_THREADS; + pContext->threadInfo.MAX_NUMA_NODES = KNOB_MAX_NUMA_NODES; + pContext->threadInfo.MAX_CORES_PER_NUMA_NODE = KNOB_MAX_CORES_PER_NUMA_NODE; + pContext->threadInfo.MAX_THREADS_PER_CORE = KNOB_MAX_THREADS_PER_CORE; + pContext->threadInfo.SINGLE_THREADED = KNOB_SINGLE_THREADED; - // Calling createThreadPool() above can set SINGLE_THREADED - if (KNOB_SINGLE_THREADED) + if (pCreateInfo->pThreadInfo) { - SET_KNOB(HYPERTHREADED_FE, false); - pContext->NumWorkerThreads = 1; - pContext->NumFEThreads = 1; - pContext->NumBEThreads = 1; + pContext->threadInfo = *pCreateInfo->pThreadInfo; } + memset(&pContext->WaitLock, 0, sizeof(pContext->WaitLock)); + memset(&pContext->FifosNotEmpty, 0, sizeof(pContext->FifosNotEmpty)); + new (&pContext->WaitLock) std::mutex(); + new (&pContext->FifosNotEmpty) std::condition_variable(); + + CreateThreadPool(pContext, &pContext->threadPool); + + pContext->ppScratch = new uint8_t*[pContext->NumWorkerThreads]; + pContext->pStats = new SWR_STATS[pContext->NumWorkerThreads]; + + // Setup ArchRast thread contexts which includes +1 for API thread. + pContext->pArContext = new HANDLE[pContext->NumWorkerThreads+1]; + pContext->pArContext[pContext->NumWorkerThreads] = ArchRast::CreateThreadContext(); + // Allocate scratch space for workers. ///@note We could lazily allocate this but its rather small amount of memory. for (uint32_t i = 0; i < pContext->NumWorkerThreads; ++i) @@ -110,13 +120,16 @@ HANDLE SwrCreateContext( #if defined(_WIN32) uint32_t numaNode = pContext->threadPool.pThreadData ? pContext->threadPool.pThreadData[i].numaId : 0; - pContext->pScratch[i] = (uint8_t*)VirtualAllocExNuma( + pContext->ppScratch[i] = (uint8_t*)VirtualAllocExNuma( GetCurrentProcess(), nullptr, 32 * sizeof(KILOBYTE), MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE, numaNode); #else - pContext->pScratch[i] = (uint8_t*)AlignedMalloc(32 * sizeof(KILOBYTE), KNOB_SIMD_WIDTH * 4); + pContext->ppScratch[i] = (uint8_t*)AlignedMalloc(32 * sizeof(KILOBYTE), KNOB_SIMD_WIDTH * 4); #endif + + // Initialize worker thread context for ArchRast. + pContext->pArContext[i] = ArchRast::CreateThreadContext(); } // State setup AFTER context is fully initialized @@ -128,10 +141,13 @@ HANDLE SwrCreateContext( // initialize function pointer tables InitClearTilesTable(); - // initialize store tiles function + // initialize callback functions pContext->pfnLoadTile = pCreateInfo->pfnLoadTile; pContext->pfnStoreTile = pCreateInfo->pfnStoreTile; pContext->pfnClearTile = pCreateInfo->pfnClearTile; + pContext->pfnUpdateSoWriteOffset = pCreateInfo->pfnUpdateSoWriteOffset; + pContext->pfnUpdateStats = pCreateInfo->pfnUpdateStats; + pContext->pfnUpdateStatsFE = pCreateInfo->pfnUpdateStatsFE; // pass pointer to bucket manager back to caller #ifdef KNOB_ENABLE_RDTSC @@ -151,6 +167,7 @@ void SwrDestroyContext(HANDLE hContext) // free the fifos for (uint32_t i = 0; i < KNOB_MAX_DRAWS_IN_FLIGHT; ++i) { + delete [] pContext->dcRing[i].dynState.pStats; delete pContext->dcRing[i].pArena; delete pContext->dsRing[i].pArena; pContext->pMacroTileManagerArray[i].~MacroTileMgr(); @@ -164,12 +181,18 @@ void SwrDestroyContext(HANDLE hContext) for (uint32_t i = 0; i < pContext->NumWorkerThreads; ++i) { #if defined(_WIN32) - VirtualFree(pContext->pScratch[i], 0, MEM_RELEASE); + VirtualFree(pContext->ppScratch[i], 0, MEM_RELEASE); #else - AlignedFree(pContext->pScratch[i]); + AlignedFree(pContext->ppScratch[i]); #endif + + ArchRast::DestroyThreadContext(pContext->pArContext[i]); } + delete [] pContext->ppScratch; + delete [] pContext->pArContext; + delete [] pContext->pStats; + delete(pContext->pHotTileMgr); pContext->~SWR_CONTEXT(); @@ -186,8 +209,6 @@ void WakeAllThreads(SWR_CONTEXT *pContext) pContext->FifosNotEmpty.notify_all(); } -static TileSet gSingleThreadLockedTiles; - template void QueueWork(SWR_CONTEXT *pContext) { @@ -206,13 +227,18 @@ void QueueWork(SWR_CONTEXT *pContext) // then moved on if all work is done.) pContext->pCurDrawContext->threadsDone = pContext->NumFEThreads + pContext->NumBEThreads; + if (IsDraw) + { + InterlockedIncrement((volatile LONG*)&pContext->drawsOutstandingFE); + } + _ReadWriteBarrier(); { std::unique_lock lock(pContext->WaitLock); pContext->dcRing.Enqueue(); } - if (KNOB_SINGLE_THREADED) + if (pContext->threadInfo.SINGLE_THREADED) { // flush denormals to 0 uint32_t mxcsr = _mm_getcsr(); @@ -222,7 +248,7 @@ void QueueWork(SWR_CONTEXT *pContext) { uint32_t curDraw[2] = { pContext->pCurDrawContext->drawId, pContext->pCurDrawContext->drawId }; WorkOnFifoFE(pContext, 0, curDraw[0]); - WorkOnFifoBE(pContext, 0, curDraw[1], gSingleThreadLockedTiles, 0, 0); + WorkOnFifoBE(pContext, 0, curDraw[1], pContext->singleThreadLockedTiles, 0, 0); } else { @@ -273,16 +299,14 @@ DRAW_CONTEXT* GetDrawContext(SWR_CONTEXT *pContext, bool isSplitDraw = false) uint64_t curDraw = pContext->dcRing.GetHead(); uint32_t dcIndex = curDraw % KNOB_MAX_DRAWS_IN_FLIGHT; - static uint64_t lastDrawChecked; - static uint32_t lastFrameChecked; - if ((pContext->frameCount - lastFrameChecked) > 2 || - (curDraw - lastDrawChecked) > 0x10000) + if ((pContext->frameCount - pContext->lastFrameChecked) > 2 || + (curDraw - pContext->lastDrawChecked) > 0x10000) { // Take this opportunity to clean-up old arena allocations pContext->cachingArenaAllocator.FreeOldBlocks(); - lastFrameChecked = pContext->frameCount; - lastDrawChecked = curDraw; + pContext->lastFrameChecked = pContext->frameCount; + pContext->lastDrawChecked = curDraw; } DRAW_CONTEXT* pCurDrawContext = &pContext->dcRing[dcIndex]; @@ -336,6 +360,8 @@ DRAW_CONTEXT* GetDrawContext(SWR_CONTEXT *pContext, bool isSplitDraw = false) pCurDrawContext->threadsDone = 0; pCurDrawContext->retireCallback.pfnCallbackFunc = nullptr; + pCurDrawContext->dynState.Reset(pContext->NumWorkerThreads); + // Assign unique drawId for this DC pCurDrawContext->drawId = pContext->dcRing.GetHead(); @@ -428,6 +454,20 @@ void SwrWaitForIdle(HANDLE hContext) RDTSC_STOP(APIWaitForIdle, 1, 0); } +void SwrWaitForIdleFE(HANDLE hContext) +{ + SWR_CONTEXT *pContext = GetContext(hContext); + + RDTSC_START(APIWaitForIdle); + + while (pContext->drawsOutstandingFE > 0) + { + _mm_pause(); + } + + RDTSC_STOP(APIWaitForIdle, 1, 0); +} + void SwrSetVertexBuffers( HANDLE hContext, uint32_t numBuffers, @@ -607,13 +647,18 @@ void SwrSetBlendFunc( } // update guardband multipliers for the viewport -void updateGuardband(API_STATE *pState) +void updateGuardbands(API_STATE *pState) { - // guardband center is viewport center - pState->gbState.left = KNOB_GUARDBAND_WIDTH / pState->vp[0].width; - pState->gbState.right = KNOB_GUARDBAND_WIDTH / pState->vp[0].width; - pState->gbState.top = KNOB_GUARDBAND_HEIGHT / pState->vp[0].height; - pState->gbState.bottom = KNOB_GUARDBAND_HEIGHT / pState->vp[0].height; + uint32_t numGbs = pState->gsState.emitsRenderTargetArrayIndex ? KNOB_NUM_VIEWPORTS_SCISSORS : 1; + + for(uint32_t i = 0; i < numGbs; ++i) + { + // guardband center is viewport center + pState->gbState.left[i] = KNOB_GUARDBAND_WIDTH / pState->vp[i].width; + pState->gbState.right[i] = KNOB_GUARDBAND_WIDTH / pState->vp[i].width; + pState->gbState.top[i] = KNOB_GUARDBAND_HEIGHT / pState->vp[i].height; + pState->gbState.bottom[i] = KNOB_GUARDBAND_HEIGHT / pState->vp[i].height; + } } void SwrSetRastState( @@ -630,7 +675,7 @@ void SwrSetViewports( HANDLE hContext, uint32_t numViewports, const SWR_VIEWPORT* pViewports, - const SWR_VIEWPORT_MATRIX* pMatrices) + const SWR_VIEWPORT_MATRICES* pMatrices) { SWR_ASSERT(numViewports <= KNOB_NUM_VIEWPORTS_SCISSORS, "Invalid number of viewports."); @@ -642,7 +687,8 @@ void SwrSetViewports( if (pMatrices != nullptr) { - memcpy(&pState->vpMatrix[0], pMatrices, sizeof(SWR_VIEWPORT_MATRIX) * numViewports); + // @todo Faster to copy portions of the SOA or just copy all of it? + memcpy(&pState->vpMatrices, pMatrices, sizeof(SWR_VIEWPORT_MATRICES)); } else { @@ -651,22 +697,22 @@ void SwrSetViewports( { if (pContext->driverType == DX) { - pState->vpMatrix[i].m00 = pState->vp[i].width / 2.0f; - pState->vpMatrix[i].m11 = -pState->vp[i].height / 2.0f; - pState->vpMatrix[i].m22 = pState->vp[i].maxZ - pState->vp[i].minZ; - pState->vpMatrix[i].m30 = pState->vp[i].x + pState->vpMatrix[i].m00; - pState->vpMatrix[i].m31 = pState->vp[i].y - pState->vpMatrix[i].m11; - pState->vpMatrix[i].m32 = pState->vp[i].minZ; + pState->vpMatrices.m00[i] = pState->vp[i].width / 2.0f; + pState->vpMatrices.m11[i] = -pState->vp[i].height / 2.0f; + pState->vpMatrices.m22[i] = pState->vp[i].maxZ - pState->vp[i].minZ; + pState->vpMatrices.m30[i] = pState->vp[i].x + pState->vpMatrices.m00[i]; + pState->vpMatrices.m31[i] = pState->vp[i].y - pState->vpMatrices.m11[i]; + pState->vpMatrices.m32[i] = pState->vp[i].minZ; } else { // Standard, with the exception that Y is inverted. - pState->vpMatrix[i].m00 = (pState->vp[i].width - pState->vp[i].x) / 2.0f; - pState->vpMatrix[i].m11 = (pState->vp[i].y - pState->vp[i].height) / 2.0f; - pState->vpMatrix[i].m22 = (pState->vp[i].maxZ - pState->vp[i].minZ) / 2.0f; - pState->vpMatrix[i].m30 = pState->vp[i].x + pState->vpMatrix[i].m00; - pState->vpMatrix[i].m31 = pState->vp[i].height + pState->vpMatrix[i].m11; - pState->vpMatrix[i].m32 = pState->vp[i].minZ + pState->vpMatrix[i].m22; + pState->vpMatrices.m00[i] = (pState->vp[i].width - pState->vp[i].x) / 2.0f; + pState->vpMatrices.m11[i] = (pState->vp[i].y - pState->vp[i].height) / 2.0f; + pState->vpMatrices.m22[i] = (pState->vp[i].maxZ - pState->vp[i].minZ) / 2.0f; + pState->vpMatrices.m30[i] = pState->vp[i].x + pState->vpMatrices.m00[i]; + pState->vpMatrices.m31[i] = pState->vp[i].height + pState->vpMatrices.m11[i]; + pState->vpMatrices.m32[i] = pState->vp[i].minZ + pState->vpMatrices.m22[i]; // Now that the matrix is calculated, clip the view coords to screen size. // OpenGL allows for -ve x,y in the viewport. @@ -676,69 +722,75 @@ void SwrSetViewports( } } - updateGuardband(pState); + updateGuardbands(pState); } void SwrSetScissorRects( HANDLE hContext, uint32_t numScissors, - const BBOX* pScissors) + const SWR_RECT* pScissors) { SWR_ASSERT(numScissors <= KNOB_NUM_VIEWPORTS_SCISSORS, "Invalid number of scissor rects."); API_STATE* pState = GetDrawState(GetContext(hContext)); - memcpy(&pState->scissorRects[0], pScissors, numScissors * sizeof(BBOX)); + memcpy(&pState->scissorRects[0], pScissors, numScissors * sizeof(pScissors[0])); }; void SetupMacroTileScissors(DRAW_CONTEXT *pDC) { API_STATE *pState = &pDC->pState->state; - uint32_t left, right, top, bottom; + uint32_t numScissors = pState->gsState.emitsViewportArrayIndex ? KNOB_NUM_VIEWPORTS_SCISSORS : 1; + pState->scissorsTileAligned = true; - // Set up scissor dimensions based on scissor or viewport - if (pState->rastState.scissorEnable) - { - // scissor rect right/bottom edge are exclusive, core expects scissor dimensions to be inclusive, so subtract one pixel from right/bottom edges - left = pState->scissorRects[0].left; - right = pState->scissorRects[0].right; - top = pState->scissorRects[0].top; - bottom = pState->scissorRects[0].bottom; - } - else + for (uint32_t index = 0; index < numScissors; ++index) { - // the vp width and height must be added to origin un-rounded then the result round to -inf. - // The cast to int works for rounding assuming all [left, right, top, bottom] are positive. - left = (int32_t)pState->vp[0].x; - right = (int32_t)(pState->vp[0].x + pState->vp[0].width); - top = (int32_t)pState->vp[0].y; - bottom = (int32_t)(pState->vp[0].y + pState->vp[0].height); - } + SWR_RECT &scissorInFixedPoint = pState->scissorsInFixedPoint[index]; - right = std::min(right, KNOB_MAX_SCISSOR_X); - bottom = std::min(bottom, KNOB_MAX_SCISSOR_Y); + // Set up scissor dimensions based on scissor or viewport + if (pState->rastState.scissorEnable) + { + scissorInFixedPoint = pState->scissorRects[index]; + } + else + { + // the vp width and height must be added to origin un-rounded then the result round to -inf. + // The cast to int works for rounding assuming all [left, right, top, bottom] are positive. + scissorInFixedPoint.xmin = (int32_t)pState->vp[index].x; + scissorInFixedPoint.xmax = (int32_t)(pState->vp[index].x + pState->vp[index].width); + scissorInFixedPoint.ymin = (int32_t)pState->vp[index].y; + scissorInFixedPoint.ymax = (int32_t)(pState->vp[index].y + pState->vp[index].height); + } - if (left > KNOB_MAX_SCISSOR_X || top > KNOB_MAX_SCISSOR_Y) - { - pState->scissorInFixedPoint.left = 0; - pState->scissorInFixedPoint.right = 0; - pState->scissorInFixedPoint.top = 0; - pState->scissorInFixedPoint.bottom = 0; - } - else - { - pState->scissorInFixedPoint.left = left * FIXED_POINT_SCALE; - pState->scissorInFixedPoint.right = right * FIXED_POINT_SCALE - 1; - pState->scissorInFixedPoint.top = top * FIXED_POINT_SCALE; - pState->scissorInFixedPoint.bottom = bottom * FIXED_POINT_SCALE - 1; + // Clamp to max rect + scissorInFixedPoint &= g_MaxScissorRect; + + // Test for tile alignment + bool tileAligned; + tileAligned = (scissorInFixedPoint.xmin % KNOB_TILE_X_DIM) == 0; + tileAligned &= (scissorInFixedPoint.ymin % KNOB_TILE_Y_DIM) == 0; + tileAligned &= (scissorInFixedPoint.xmax % KNOB_TILE_X_DIM) == 0; + tileAligned &= (scissorInFixedPoint.xmax % KNOB_TILE_Y_DIM) == 0; + + pState->scissorsTileAligned &= tileAligned; + + // Scale to fixed point + scissorInFixedPoint.xmin *= FIXED_POINT_SCALE; + scissorInFixedPoint.xmax *= FIXED_POINT_SCALE; + scissorInFixedPoint.ymin *= FIXED_POINT_SCALE; + scissorInFixedPoint.ymax *= FIXED_POINT_SCALE; + + // Make scissor inclusive + scissorInFixedPoint.xmax -= 1; + scissorInFixedPoint.ymax -= 1; } } // templated backend function tables extern PFN_BACKEND_FUNC gBackendNullPs[SWR_MULTISAMPLE_TYPE_COUNT]; -extern PFN_BACKEND_FUNC gBackendSingleSample[2][2][2]; -extern PFN_BACKEND_FUNC gBackendPixelRateTable[SWR_MULTISAMPLE_TYPE_COUNT][SWR_MSAA_SAMPLE_PATTERN_COUNT][2][2][2][2]; -extern PFN_BACKEND_FUNC gBackendSampleRateTable[SWR_MULTISAMPLE_TYPE_COUNT][2][2][2]; +extern PFN_BACKEND_FUNC gBackendSingleSample[SWR_INPUT_COVERAGE_COUNT][2][2]; +extern PFN_BACKEND_FUNC gBackendPixelRateTable[SWR_MULTISAMPLE_TYPE_COUNT][SWR_MSAA_SAMPLE_PATTERN_COUNT][SWR_INPUT_COVERAGE_COUNT][2][2][2]; +extern PFN_BACKEND_FUNC gBackendSampleRateTable[SWR_MULTISAMPLE_TYPE_COUNT][SWR_INPUT_COVERAGE_COUNT][2][2]; void SetupPipeline(DRAW_CONTEXT *pDC) { DRAW_STATE* pState = pDC->pState; @@ -757,8 +809,7 @@ void SetupPipeline(DRAW_CONTEXT *pDC) const bool bMultisampleEnable = ((rastState.sampleCount > SWR_MULTISAMPLE_1X) || rastState.forcedSampleCount) ? 1 : 0; const uint32_t centroid = ((psState.barycentricsMask & SWR_BARYCENTRIC_CENTROID_MASK) > 0) ? 1 : 0; const uint32_t canEarlyZ = (psState.forceEarlyZ || (!psState.writesODepth && !psState.usesSourceDepth && !psState.usesUAV)) ? 1 : 0; - const uint32_t inputCoverage = (psState.inputCoverage != SWR_INPUT_COVERAGE_NONE) ? 1 : 0; - + SWR_BARYCENTRICS_MASK barycentricsMask = (SWR_BARYCENTRICS_MASK)psState.barycentricsMask; // select backend function @@ -769,20 +820,20 @@ void SetupPipeline(DRAW_CONTEXT *pDC) { // always need to generate I & J per sample for Z interpolation barycentricsMask = (SWR_BARYCENTRICS_MASK)(barycentricsMask | SWR_BARYCENTRIC_PER_SAMPLE_MASK); - backendFuncs.pfnBackend = gBackendPixelRateTable[rastState.sampleCount][rastState.samplePattern][inputCoverage][centroid][forcedSampleCount][canEarlyZ]; + backendFuncs.pfnBackend = gBackendPixelRateTable[rastState.sampleCount][rastState.samplePattern][psState.inputCoverage][centroid][forcedSampleCount][canEarlyZ]; } else { // always need to generate I & J per pixel for Z interpolation barycentricsMask = (SWR_BARYCENTRICS_MASK)(barycentricsMask | SWR_BARYCENTRIC_PER_PIXEL_MASK); - backendFuncs.pfnBackend = gBackendSingleSample[inputCoverage][centroid][canEarlyZ]; + backendFuncs.pfnBackend = gBackendSingleSample[psState.inputCoverage][centroid][canEarlyZ]; } break; case SWR_SHADING_RATE_SAMPLE: SWR_ASSERT(rastState.samplePattern == SWR_MSAA_STANDARD_PATTERN); // always need to generate I & J per sample for Z interpolation barycentricsMask = (SWR_BARYCENTRICS_MASK)(barycentricsMask | SWR_BARYCENTRIC_PER_SAMPLE_MASK); - backendFuncs.pfnBackend = gBackendSampleRateTable[rastState.sampleCount][inputCoverage][centroid][canEarlyZ]; + backendFuncs.pfnBackend = gBackendSampleRateTable[rastState.sampleCount][psState.inputCoverage][centroid][canEarlyZ]; break; default: SWR_ASSERT(0 && "Invalid shading rate"); @@ -1155,6 +1206,9 @@ void DrawIndexedInstance( DRAW_CONTEXT* pDC = GetDrawContext(pContext); API_STATE* pState = &pDC->pState->state; + AR_BEGIN(AR_API_CTX, APIDrawIndexed, pDC->drawId); + AR_EVENT(AR_API_CTX, DrawIndexedInstance(topology, numIndices, indexOffset, baseVertex, numInstances, startInstance)); + uint32_t maxIndicesPerDraw = MaxVertsPerDraw(pDC, numIndices, topology); uint32_t primsPerDraw = GetNumPrims(topology, maxIndicesPerDraw); uint32_t remainingIndices = numIndices; @@ -1226,6 +1280,7 @@ void DrawIndexedInstance( pDC = GetDrawContext(pContext); pDC->pState->state.rastState.cullMode = oldCullMode; + AR_END(AR_API_CTX, APIDrawIndexed, numIndices * numInstances); RDTSC_STOP(APIDrawIndexed, numIndices * numInstances, 0); } @@ -1273,9 +1328,12 @@ void SwrDrawIndexedInstanced( /// @brief SwrInvalidateTiles /// @param hContext - Handle passed back from SwrCreateContext /// @param attachmentMask - The mask specifies which surfaces attached to the hottiles to invalidate. -void SwrInvalidateTiles( +/// @param invalidateRect - The pixel-coordinate rectangle to invalidate. This will be expanded to +/// be hottile size-aligned. +void SWR_API SwrInvalidateTiles( HANDLE hContext, - uint32_t attachmentMask) + uint32_t attachmentMask, + const SWR_RECT& invalidateRect) { if (KNOB_TOSS_DRAW) { @@ -1288,7 +1346,8 @@ void SwrInvalidateTiles( pDC->FeWork.type = DISCARDINVALIDATETILES; pDC->FeWork.pfnWork = ProcessDiscardInvalidateTiles; pDC->FeWork.desc.discardInvalidateTiles.attachmentMask = attachmentMask; - memset(&pDC->FeWork.desc.discardInvalidateTiles.rect, 0, sizeof(SWR_RECT)); + pDC->FeWork.desc.discardInvalidateTiles.rect = invalidateRect; + pDC->FeWork.desc.discardInvalidateTiles.rect &= g_MaxScissorRect; pDC->FeWork.desc.discardInvalidateTiles.newTileState = SWR_TILE_INVALID; pDC->FeWork.desc.discardInvalidateTiles.createNewTiles = false; pDC->FeWork.desc.discardInvalidateTiles.fullTilesOnly = false; @@ -1301,11 +1360,12 @@ void SwrInvalidateTiles( /// @brief SwrDiscardRect /// @param hContext - Handle passed back from SwrCreateContext /// @param attachmentMask - The mask specifies which surfaces attached to the hottiles to discard. -/// @param rect - if rect is all zeros, the entire attachment surface will be discarded -void SwrDiscardRect( +/// @param rect - The pixel-coordinate rectangle to discard. Only fully-covered hottiles will be +/// discarded. +void SWR_API SwrDiscardRect( HANDLE hContext, uint32_t attachmentMask, - SWR_RECT rect) + const SWR_RECT& rect) { if (KNOB_TOSS_DRAW) { @@ -1320,6 +1380,7 @@ void SwrDiscardRect( pDC->FeWork.pfnWork = ProcessDiscardInvalidateTiles; pDC->FeWork.desc.discardInvalidateTiles.attachmentMask = attachmentMask; pDC->FeWork.desc.discardInvalidateTiles.rect = rect; + pDC->FeWork.desc.discardInvalidateTiles.rect &= g_MaxScissorRect; pDC->FeWork.desc.discardInvalidateTiles.newTileState = SWR_TILE_RESOLVED; pDC->FeWork.desc.discardInvalidateTiles.createNewTiles = true; pDC->FeWork.desc.discardInvalidateTiles.fullTilesOnly = true; @@ -1368,10 +1429,11 @@ void SwrDispatch( // Deswizzles, converts and stores current contents of the hot tiles to surface // described by pState -void SwrStoreTiles( +void SWR_API SwrStoreTiles( HANDLE hContext, SWR_RENDERTARGET_ATTACHMENT attachment, - SWR_TILE_STATE postStoreTileState) + SWR_TILE_STATE postStoreTileState, + const SWR_RECT& storeRect) { if (KNOB_TOSS_DRAW) { @@ -1383,12 +1445,12 @@ void SwrStoreTiles( SWR_CONTEXT *pContext = GetContext(hContext); DRAW_CONTEXT* pDC = GetDrawContext(pContext); - SetupMacroTileScissors(pDC); - pDC->FeWork.type = STORETILES; pDC->FeWork.pfnWork = ProcessStoreTiles; pDC->FeWork.desc.storeTiles.attachment = attachment; pDC->FeWork.desc.storeTiles.postStoreTileState = postStoreTileState; + pDC->FeWork.desc.storeTiles.rect = storeRect; + pDC->FeWork.desc.storeTiles.rect &= g_MaxScissorRect; //enqueue QueueDraw(pContext); @@ -1396,12 +1458,21 @@ void SwrStoreTiles( RDTSC_STOP(APIStoreTiles, 0, 0); } -void SwrClearRenderTarget( +////////////////////////////////////////////////////////////////////////// +/// @brief SwrClearRenderTarget - Clear attached render targets / depth / stencil +/// @param hContext - Handle passed back from SwrCreateContext +/// @param clearMask - combination of SWR_CLEAR_COLOR / SWR_CLEAR_DEPTH / SWR_CLEAR_STENCIL flags (or SWR_CLEAR_NONE) +/// @param clearColor - color use for clearing render targets +/// @param z - depth value use for clearing depth buffer +/// @param stencil - stencil value used for clearing stencil buffer +/// @param clearRect - The pixel-coordinate rectangle to clear in all cleared buffers +void SWR_API SwrClearRenderTarget( HANDLE hContext, uint32_t clearMask, const float clearColor[4], float z, - uint8_t stencil) + uint8_t stencil, + const SWR_RECT& clearRect) { if (KNOB_TOSS_DRAW) { @@ -1411,16 +1482,16 @@ void SwrClearRenderTarget( RDTSC_START(APIClearRenderTarget); SWR_CONTEXT *pContext = GetContext(hContext); - DRAW_CONTEXT* pDC = GetDrawContext(pContext); - SetupMacroTileScissors(pDC); - CLEAR_FLAGS flags; + flags.bits = 0; flags.mask = clearMask; pDC->FeWork.type = CLEAR; pDC->FeWork.pfnWork = ProcessClear; + pDC->FeWork.desc.clear.rect = clearRect; + pDC->FeWork.desc.clear.rect &= g_MaxScissorRect; pDC->FeWork.desc.clear.flags = flags; pDC->FeWork.desc.clear.clearDepth = z; pDC->FeWork.desc.clear.clearRTColor[0] = clearColor[0]; @@ -1475,33 +1546,6 @@ VOID* SwrAllocDrawContextMemory( return pDC->pState->pArena->AllocAligned(size, align); } -////////////////////////////////////////////////////////////////////////// -/// @brief Returns pointer to SWR stats. -/// @note The counters are atomically incremented by multiple threads. -/// When calling this, you need to ensure all previous operations -/// have completed. -/// @todo If necessary, add a callback to avoid stalling the pipe to -/// sample the counters. -/// @param hContext - Handle passed back from SwrCreateContext -/// @param pStats - SWR will fill this out for caller. -void SwrGetStats( - HANDLE hContext, - SWR_STATS* pStats) -{ - SWR_CONTEXT *pContext = GetContext(hContext); - DRAW_CONTEXT* pDC = GetDrawContext(pContext); - - pDC->FeWork.type = QUERYSTATS; - pDC->FeWork.pfnWork = ProcessQueryStats; - pDC->FeWork.desc.queryStats.pStats = pStats; - - // cannot execute until all previous draws have completed - pDC->dependent = true; - - //enqueue - QueueDraw(pContext); -} - ////////////////////////////////////////////////////////////////////////// /// @brief Enables stats counting /// @param hContext - Handle passed back from SwrCreateContext