From: George Kyriazis Date: Tue, 10 Apr 2018 06:05:19 +0000 (-0500) Subject: swr/rast: Optimize late/bindless JIT of samplers X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=0899122c03f06eba89889090b1fb1ab1d4d3ddff;p=mesa.git swr/rast: Optimize late/bindless JIT of samplers Add per-worker thread private data to all shader calls Add per-worker sampler cache and jit context Add late LoadTexel JIT support Add per-worker-thread Sampler / LoadTexel JIT Reviewed-by: Bruce Cherniak --- diff --git a/src/gallium/drivers/swr/rasterizer/core/api.cpp b/src/gallium/drivers/swr/rasterizer/core/api.cpp index 3141db69ef1..e37e2e4a538 100644 --- a/src/gallium/drivers/swr/rasterizer/core/api.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/api.cpp @@ -1,5 +1,5 @@ /**************************************************************************** -* Copyright (C) 2014-2016 Intel Corporation. All Rights Reserved. +* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -122,6 +122,11 @@ HANDLE SwrCreateContext( pContext->apiThreadInfo.numAPIThreadsPerCore = 1; } + if (pCreateInfo->pWorkerPrivateState) + { + pContext->workerPrivateState = *pCreateInfo->pWorkerPrivateState; + } + memset(&pContext->WaitLock, 0, sizeof(pContext->WaitLock)); memset(&pContext->FifosNotEmpty, 0, sizeof(pContext->FifosNotEmpty)); new (&pContext->WaitLock) std::mutex(); diff --git a/src/gallium/drivers/swr/rasterizer/core/api.h b/src/gallium/drivers/swr/rasterizer/core/api.h index 7247fa4215f..b171188c927 100644 --- a/src/gallium/drivers/swr/rasterizer/core/api.h +++ b/src/gallium/drivers/swr/rasterizer/core/api.h @@ -1,5 +1,5 @@ /**************************************************************************** -* Copyright (C) 2014-2016 Intel Corporation. All Rights Reserved. +* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -115,7 +115,8 @@ struct SWR_RECT /// @param x - destination x coordinate /// @param y - destination y coordinate /// @param pDstHotTile - pointer to the hot tile surface -typedef void(SWR_API *PFN_LOAD_TILE)(HANDLE hPrivateContext, SWR_FORMAT dstFormat, +typedef void(SWR_API *PFN_LOAD_TILE)(HANDLE hPrivateContext, HANDLE hWorkerPrivateData, + SWR_FORMAT dstFormat, SWR_RENDERTARGET_ATTACHMENT renderTargetIndex, uint32_t x, uint32_t y, uint32_t renderTargetArrayIndex, uint8_t *pDstHotTile); @@ -127,7 +128,8 @@ typedef void(SWR_API *PFN_LOAD_TILE)(HANDLE hPrivateContext, SWR_FORMAT dstForma /// @param x - destination x coordinate /// @param y - destination y coordinate /// @param pSrcHotTile - pointer to the hot tile surface -typedef void(SWR_API *PFN_STORE_TILE)(HANDLE hPrivateContext, SWR_FORMAT srcFormat, +typedef void(SWR_API *PFN_STORE_TILE)(HANDLE hPrivateContext, HANDLE hWorkerPrivateData, + SWR_FORMAT srcFormat, SWR_RENDERTARGET_ATTACHMENT renderTargetIndex, uint32_t x, uint32_t y, uint32_t renderTargetArrayIndex, uint8_t *pSrcHotTile); @@ -139,7 +141,7 @@ typedef void(SWR_API *PFN_STORE_TILE)(HANDLE hPrivateContext, SWR_FORMAT srcForm /// @param y - destination y coordinate /// @param renderTargetArrayIndex - render target array offset from arrayIndex /// @param pClearColor - pointer to the hot tile's clear value -typedef void(SWR_API *PFN_CLEAR_TILE)(HANDLE hPrivateContext, +typedef void(SWR_API *PFN_CLEAR_TILE)(HANDLE hPrivateContext, HANDLE hWorkerPrivateData, SWR_RENDERTARGET_ATTACHMENT rtIndex, uint32_t x, uint32_t y, uint32_t renderTargetArrayIndex, const float* pClearColor); @@ -208,6 +210,21 @@ struct SWR_API_THREADING_INFO // Independent of KNOB_MAX_THREADS_PER_CORE. }; +////////////////////////////////////////////////////////////////////////// +/// SWR_WORKER_PRIVATE_STATE +/// Data used to allocate per-worker thread private data. A pointer +/// to this data will be passed in to each shader function. +///////////////////////////////////////////////////////////////////////// +struct SWR_WORKER_PRIVATE_STATE +{ + typedef void (SWR_API *PFN_WORKER_DATA)(HANDLE hWorkerPrivateData, uint32_t iWorkerNum); + + size_t perWorkerPrivateStateSize; ///< Amount of data to allocate per-worker + PFN_WORKER_DATA pfnInitWorkerData; ///< Init function for worker data. If null + ///< worker data will be initialized to 0. + PFN_WORKER_DATA pfnFinishWorkerData; ///< Finish / destroy function for worker data. + ///< Can be null. +}; ////////////////////////////////////////////////////////////////////////// /// SWR_CREATECONTEXT_INFO @@ -216,7 +233,10 @@ struct SWR_CREATECONTEXT_INFO { // External functions (e.g. sampler) need per draw context state. // Use SwrGetPrivateContextState() to access private state. - uint32_t privateStateSize; + size_t privateStateSize; + + // Optional per-worker state, can be NULL for no worker-private data + SWR_WORKER_PRIVATE_STATE* pWorkerPrivateState; // Callback functions PFN_LOAD_TILE pfnLoadTile; @@ -229,23 +249,23 @@ struct SWR_CREATECONTEXT_INFO // Pointer to rdtsc buckets mgr returned to the caller. // Only populated when KNOB_ENABLE_RDTSC is set - BucketManager* pBucketMgr; + BucketManager* pBucketMgr; // Output: size required memory passed to for SwrSaveState / SwrRestoreState - size_t contextSaveSize; + size_t contextSaveSize; // ArchRast event manager. - HANDLE hArEventManager; + HANDLE hArEventManager; // Input (optional): Threading info that overrides any set KNOB values. - SWR_THREADING_INFO* pThreadInfo; + SWR_THREADING_INFO* pThreadInfo; - // Input (optional}: Info for reserving API threads - SWR_API_THREADING_INFO* pApiThreadInfo; + // Input (optional): Info for reserving API threads + SWR_API_THREADING_INFO* pApiThreadInfo; // Input: if set to non-zero value, overrides KNOB value for maximum // number of draws in flight - uint32_t MAX_DRAWS_IN_FLIGHT; + uint32_t MAX_DRAWS_IN_FLIGHT; }; ////////////////////////////////////////////////////////////////////////// @@ -714,6 +734,7 @@ SWR_FUNC(void, SwrInit); /// @param x, y - Coordinates to raster tile. /// @param pDstHotTile - Pointer to Hot Tile SWR_FUNC(void, SwrLoadHotTile, + HANDLE hWorkerPrivateData, const SWR_SURFACE_STATE *pSrcSurface, SWR_FORMAT dstFormat, SWR_RENDERTARGET_ATTACHMENT renderTargetIndex, @@ -728,6 +749,7 @@ SWR_FUNC(void, SwrLoadHotTile, /// @param x, y - Coordinates to raster tile. /// @param pSrcHotTile - Pointer to Hot Tile SWR_FUNC(void, SwrStoreHotTileToSurface, + HANDLE hWorkerPrivateData, SWR_SURFACE_STATE *pDstSurface, SWR_FORMAT srcFormat, SWR_RENDERTARGET_ATTACHMENT renderTargetIndex, @@ -741,6 +763,7 @@ SWR_FUNC(void, SwrStoreHotTileToSurface, /// @param x, y - Coordinates to raster tile. /// @param pClearColor - Pointer to clear color SWR_FUNC(void, SwrStoreHotTileClear, + HANDLE hWorkerPrivateData, SWR_SURFACE_STATE *pDstSurface, SWR_RENDERTARGET_ATTACHMENT renderTargetIndex, uint32_t x, diff --git a/src/gallium/drivers/swr/rasterizer/core/backend.cpp b/src/gallium/drivers/swr/rasterizer/core/backend.cpp index 1e0769ae574..5ac9ceb165e 100644 --- a/src/gallium/drivers/swr/rasterizer/core/backend.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/backend.cpp @@ -1,5 +1,5 @@ /**************************************************************************** -* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved. +* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -78,7 +78,7 @@ void ProcessComputeBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroup csContext.pScratchSpace = (uint8_t*)pScratchSpace; csContext.scratchSpacePerSimd = pDC->pState->state.scratchSpaceSize; - state.pfnCsFunc(GetPrivateState(pDC), &csContext); + state.pfnCsFunc(GetPrivateState(pDC), pContext->threadPool.pThreadData[workerId].pWorkerPrivateData, &csContext); UPDATE_STAT_BE(CsInvocations, state.totalThreadsInGroup); AR_EVENT(CSStats(csContext.stats.numInstExecuted)); @@ -107,6 +107,7 @@ void ProcessStoreTileBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile SWR_RENDERTARGET_ATTACHMENT attachment) { SWR_CONTEXT *pContext = pDC->pContext; + HANDLE hWorkerPrivateData = pContext->threadPool.pThreadData[workerId].pWorkerPrivateData; RDTSC_BEGIN(BEStoreTiles, pDC->drawId); @@ -139,7 +140,7 @@ void ProcessStoreTileBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile PFN_CLEAR_TILES pfnClearTiles = gClearTilesTable[srcFormat]; SWR_ASSERT(pfnClearTiles != nullptr); - pfnClearTiles(pDC, attachment, macroTile, pHotTile->renderTargetArrayIndex, pHotTile->clearData, pDesc->rect); + pfnClearTiles(pDC, hWorkerPrivateData, attachment, macroTile, pHotTile->renderTargetArrayIndex, pHotTile->clearData, pDesc->rect); } if (pHotTile->state == HOTTILE_DIRTY || pDesc->postStoreTileState == (SWR_TILE_STATE)HOTTILE_DIRTY) @@ -147,7 +148,7 @@ void ProcessStoreTileBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile int32_t destX = KNOB_MACROTILE_X_DIM * x; int32_t destY = KNOB_MACROTILE_Y_DIM * y; - pContext->pfnStoreTile(GetPrivateState(pDC), srcFormat, + pContext->pfnStoreTile(GetPrivateState(pDC), hWorkerPrivateData, srcFormat, attachment, destX, destY, pHotTile->renderTargetArrayIndex, pHotTile->pBuffer); } diff --git a/src/gallium/drivers/swr/rasterizer/core/backend.h b/src/gallium/drivers/swr/rasterizer/core/backend.h index c8c37e65257..7a842fe0e20 100644 --- a/src/gallium/drivers/swr/rasterizer/core/backend.h +++ b/src/gallium/drivers/swr/rasterizer/core/backend.h @@ -1,5 +1,5 @@ /**************************************************************************** -* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved. +* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -41,7 +41,7 @@ void ProcessStoreTilesBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTil void ProcessDiscardInvalidateTilesBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pData); void ProcessShutdownBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pUserData); -typedef void(*PFN_CLEAR_TILES)(DRAW_CONTEXT*, SWR_RENDERTARGET_ATTACHMENT rt, uint32_t, uint32_t, DWORD[4], const SWR_RECT& rect); +typedef void(*PFN_CLEAR_TILES)(DRAW_CONTEXT*, HANDLE hWorkerData, SWR_RENDERTARGET_ATTACHMENT rt, uint32_t, uint32_t, DWORD[4], const SWR_RECT& rect); extern PFN_CLEAR_TILES gClearTilesTable[NUM_SWR_FORMATS]; extern PFN_BACKEND_FUNC gBackendNullPs[SWR_MULTISAMPLE_TYPE_COUNT]; diff --git a/src/gallium/drivers/swr/rasterizer/core/backend_clear.cpp b/src/gallium/drivers/swr/rasterizer/core/backend_clear.cpp index baaa7e61f75..af031f9f9d7 100644 --- a/src/gallium/drivers/swr/rasterizer/core/backend_clear.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/backend_clear.cpp @@ -1,5 +1,5 @@ /**************************************************************************** -* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved. +* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -76,7 +76,7 @@ void ClearRasterTile(uint8_t *pTileBuffer, simd16vector &value) #endif template -INLINE void ClearMacroTile(DRAW_CONTEXT *pDC, SWR_RENDERTARGET_ATTACHMENT rt, uint32_t macroTile, uint32_t renderTargetArrayIndex, DWORD clear[4], const SWR_RECT& rect) +INLINE void ClearMacroTile(DRAW_CONTEXT *pDC, HANDLE hWorkerPrivateData, SWR_RENDERTARGET_ATTACHMENT rt, uint32_t macroTile, uint32_t renderTargetArrayIndex, DWORD clear[4], const SWR_RECT& rect) { // convert clear color to hottile format // clear color is in RGBA float/uint32 @@ -146,7 +146,7 @@ INLINE void ClearMacroTile(DRAW_CONTEXT *pDC, SWR_RENDERTARGET_ATTACHMENT rt, ui const uint32_t macroTileRowStep = (KNOB_MACROTILE_X_DIM / KNOB_TILE_X_DIM) * rasterTileStep; const uint32_t pitch = (FormatTraits::bpp * KNOB_MACROTILE_X_DIM / 8); - HOTTILE *pHotTile = pDC->pContext->pHotTileMgr->GetHotTile(pDC->pContext, pDC, macroTile, rt, true, numSamples, renderTargetArrayIndex); + HOTTILE *pHotTile = pDC->pContext->pHotTileMgr->GetHotTile(pDC->pContext, pDC, hWorkerPrivateData, macroTile, rt, true, numSamples, renderTargetArrayIndex); uint32_t rasterTileStartOffset = (ComputeTileOffset2D< TilingTraits::bpp > >(pitch, clearTile.xmin, clearTile.ymin)) * numSamples; uint8_t* pRasterTileRow = pHotTile->pBuffer + rasterTileStartOffset; //(ComputeTileOffset2D< TilingTraits::bpp > >(pitch, x, y)) * numSamples; @@ -172,6 +172,7 @@ INLINE void ClearMacroTile(DRAW_CONTEXT *pDC, SWR_RENDERTARGET_ATTACHMENT rt, ui void ProcessClearBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pUserData) { SWR_CONTEXT *pContext = pDC->pContext; + HANDLE hWorkerPrivateData = pContext->threadPool.pThreadData[workerId].pWorkerPrivateData; if (KNOB_FAST_CLEAR) { @@ -191,7 +192,7 @@ void ProcessClearBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, vo { mask &= ~(1 << rt); - HOTTILE *pHotTile = pContext->pHotTileMgr->GetHotTile(pContext, pDC, macroTile, (SWR_RENDERTARGET_ATTACHMENT)rt, true, numSamples, pClear->renderTargetArrayIndex); + HOTTILE *pHotTile = pContext->pHotTileMgr->GetHotTile(pContext, pDC, hWorkerPrivateData, macroTile, (SWR_RENDERTARGET_ATTACHMENT)rt, true, numSamples, pClear->renderTargetArrayIndex); // All we want to do here is to mark the hot tile as being in a "needs clear" state. pHotTile->clearData[0] = *(DWORD*)&(pClear->clearRTColor[0]); @@ -204,14 +205,14 @@ void ProcessClearBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, vo if (pClear->attachmentMask & SWR_ATTACHMENT_DEPTH_BIT) { - HOTTILE *pHotTile = pContext->pHotTileMgr->GetHotTile(pContext, pDC, macroTile, SWR_ATTACHMENT_DEPTH, true, numSamples, pClear->renderTargetArrayIndex); + HOTTILE *pHotTile = pContext->pHotTileMgr->GetHotTile(pContext, pDC, hWorkerPrivateData, macroTile, SWR_ATTACHMENT_DEPTH, true, numSamples, pClear->renderTargetArrayIndex); pHotTile->clearData[0] = *(DWORD*)&pClear->clearDepth; pHotTile->state = HOTTILE_CLEAR; } if (pClear->attachmentMask & SWR_ATTACHMENT_STENCIL_BIT) { - HOTTILE *pHotTile = pContext->pHotTileMgr->GetHotTile(pContext, pDC, macroTile, SWR_ATTACHMENT_STENCIL, true, numSamples, pClear->renderTargetArrayIndex); + HOTTILE *pHotTile = pContext->pHotTileMgr->GetHotTile(pContext, pDC, hWorkerPrivateData, macroTile, SWR_ATTACHMENT_STENCIL, true, numSamples, pClear->renderTargetArrayIndex); pHotTile->clearData[0] = pClear->clearStencil; pHotTile->state = HOTTILE_CLEAR; @@ -242,7 +243,7 @@ void ProcessClearBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, vo { mask &= ~(1 << rt); - pfnClearTiles(pDC, (SWR_RENDERTARGET_ATTACHMENT)rt, macroTile, pClear->renderTargetArrayIndex, clearData, pClear->rect); + pfnClearTiles(pDC, hWorkerPrivateData, (SWR_RENDERTARGET_ATTACHMENT)rt, macroTile, pClear->renderTargetArrayIndex, clearData, pClear->rect); } } @@ -253,7 +254,7 @@ void ProcessClearBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, vo PFN_CLEAR_TILES pfnClearTiles = gClearTilesTable[KNOB_DEPTH_HOT_TILE_FORMAT]; SWR_ASSERT(pfnClearTiles != nullptr); - pfnClearTiles(pDC, SWR_ATTACHMENT_DEPTH, macroTile, pClear->renderTargetArrayIndex, clearData, pClear->rect); + pfnClearTiles(pDC, hWorkerPrivateData, SWR_ATTACHMENT_DEPTH, macroTile, pClear->renderTargetArrayIndex, clearData, pClear->rect); } if (pClear->attachmentMask & SWR_ATTACHMENT_STENCIL_BIT) @@ -262,7 +263,7 @@ void ProcessClearBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, vo clearData[0] = pClear->clearStencil; PFN_CLEAR_TILES pfnClearTiles = gClearTilesTable[KNOB_STENCIL_HOT_TILE_FORMAT]; - pfnClearTiles(pDC, SWR_ATTACHMENT_STENCIL, macroTile, pClear->renderTargetArrayIndex, clearData, pClear->rect); + pfnClearTiles(pDC, hWorkerPrivateData, SWR_ATTACHMENT_STENCIL, macroTile, pClear->renderTargetArrayIndex, clearData, pClear->rect); } RDTSC_END(BEClear, 1); diff --git a/src/gallium/drivers/swr/rasterizer/core/backend_impl.h b/src/gallium/drivers/swr/rasterizer/core/backend_impl.h index 20b2ec58287..05234c21822 100644 --- a/src/gallium/drivers/swr/rasterizer/core/backend_impl.h +++ b/src/gallium/drivers/swr/rasterizer/core/backend_impl.h @@ -1,5 +1,5 @@ /**************************************************************************** -* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved. +* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -884,6 +884,9 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t BarycentricCoeffs coeffs; SetupBarycentricCoeffs(&coeffs, work); + SWR_CONTEXT *pContext = pDC->pContext; + void* pWorkerData = pContext->threadPool.pThreadData[workerId].pWorkerPrivateData; + SWR_PS_CONTEXT psContext; const SWR_MULTISAMPLE_POS& samplePos = state.rastState.samplePositions; SetupPixelShaderContext(&psContext, samplePos, work); @@ -964,7 +967,7 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t // execute pixel shader RDTSC_BEGIN(BEPixelShader, pDC->drawId); - state.psState.pfnPixelShader(GetPrivateState(pDC), &psContext); + state.psState.pfnPixelShader(GetPrivateState(pDC), pWorkerData, &psContext); UPDATE_STAT_BE(PsInvocations, _mm_popcnt_u32(_simd_movemask_ps(activeLanes))); RDTSC_END(BEPixelShader, 0); diff --git a/src/gallium/drivers/swr/rasterizer/core/backend_sample.cpp b/src/gallium/drivers/swr/rasterizer/core/backend_sample.cpp index c7c6c533e37..5940aa7ba45 100644 --- a/src/gallium/drivers/swr/rasterizer/core/backend_sample.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/backend_sample.cpp @@ -1,5 +1,5 @@ /**************************************************************************** -* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved. +* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -43,6 +43,7 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_ RDTSC_BEGIN(BESampleRateBackend, pDC->drawId); RDTSC_BEGIN(BESetup, pDC->drawId); + void* pWorkerData = pDC->pContext->threadPool.pThreadData[workerId].pWorkerPrivateData; const API_STATE &state = GetApiState(pDC); BarycentricCoeffs coeffs; @@ -163,7 +164,7 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_ // execute pixel shader RDTSC_BEGIN(BEPixelShader, pDC->drawId); - state.psState.pfnPixelShader(GetPrivateState(pDC), &psContext); + state.psState.pfnPixelShader(GetPrivateState(pDC), pWorkerData, &psContext); RDTSC_END(BEPixelShader, 0); // update stats diff --git a/src/gallium/drivers/swr/rasterizer/core/backend_singlesample.cpp b/src/gallium/drivers/swr/rasterizer/core/backend_singlesample.cpp index 26d5a75bd12..aaaba636ed3 100644 --- a/src/gallium/drivers/swr/rasterizer/core/backend_singlesample.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/backend_singlesample.cpp @@ -1,5 +1,5 @@ /**************************************************************************** -* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved. +* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -43,6 +43,8 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3 RDTSC_BEGIN(BESingleSampleBackend, pDC->drawId); RDTSC_BEGIN(BESetup, pDC->drawId); + void* pWorkerData = pDC->pContext->threadPool.pThreadData[workerId].pWorkerPrivateData; + const API_STATE &state = GetApiState(pDC); BarycentricCoeffs coeffs; @@ -146,7 +148,7 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3 // execute pixel shader RDTSC_BEGIN(BEPixelShader, pDC->drawId); - state.psState.pfnPixelShader(GetPrivateState(pDC), &psContext); + state.psState.pfnPixelShader(GetPrivateState(pDC), pWorkerData, &psContext); RDTSC_END(BEPixelShader, 0); // update stats diff --git a/src/gallium/drivers/swr/rasterizer/core/binner.cpp b/src/gallium/drivers/swr/rasterizer/core/binner.cpp index d31fd37095d..9f8dc887aa6 100644 --- a/src/gallium/drivers/swr/rasterizer/core/binner.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/binner.cpp @@ -1,5 +1,5 @@ /**************************************************************************** -* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved. +* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), diff --git a/src/gallium/drivers/swr/rasterizer/core/context.h b/src/gallium/drivers/swr/rasterizer/core/context.h index 7bc69f50723..af8f4b8db4f 100644 --- a/src/gallium/drivers/swr/rasterizer/core/context.h +++ b/src/gallium/drivers/swr/rasterizer/core/context.h @@ -1,5 +1,5 @@ /**************************************************************************** -* Copyright (C) 2014-2016 Intel Corporation. All Rights Reserved. +* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -483,6 +483,7 @@ struct SWR_CONTEXT THREAD_POOL threadPool; // Thread pool associated with this context SWR_THREADING_INFO threadInfo; SWR_API_THREADING_INFO apiThreadInfo; + SWR_WORKER_PRIVATE_STATE workerPrivateState; uint32_t MAX_DRAWS_IN_FLIGHT; diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp index 30c2e7bab51..9630afa036d 100644 --- a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp @@ -1,5 +1,5 @@ /**************************************************************************** -* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved. +* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -799,6 +799,8 @@ static void GeometryShaderStage( { RDTSC_BEGIN(FEGeometryShader, pDC->drawId); + void* pWorkerData = pDC->pContext->threadPool.pThreadData[workerId].pWorkerPrivateData; + const API_STATE& state = GetApiState(pDC); const SWR_GS_STATE* pState = &state.gsState; SWR_GS_CONTEXT gsContext; @@ -850,7 +852,7 @@ static void GeometryShaderStage( gsContext.mask = GenerateMask(numInputPrims); // execute the geometry shader - state.pfnGsFunc(GetPrivateState(pDC), &gsContext); + state.pfnGsFunc(GetPrivateState(pDC), pWorkerData, &gsContext); AR_EVENT(GSStats(gsContext.stats.numInstExecuted)); for (uint32_t i = 0; i < KNOB_SIMD_WIDTH; ++i) @@ -1169,6 +1171,7 @@ static void TessellationStages( { const API_STATE& state = GetApiState(pDC); const SWR_TS_STATE& tsState = state.tsState; + void* pWorkerData = pDC->pContext->threadPool.pThreadData[workerId].pWorkerPrivateData; SWR_ASSERT(gt_pTessellationThreadData); @@ -1250,7 +1253,7 @@ static void TessellationStages( // Run the HS RDTSC_BEGIN(FEHullShader, pDC->drawId); - state.pfnHsFunc(GetPrivateState(pDC), &hsContext); + state.pfnHsFunc(GetPrivateState(pDC), pWorkerData, &hsContext); RDTSC_END(FEHullShader, 0); UPDATE_STAT_FE(HsInvocations, numPrims); @@ -1315,7 +1318,7 @@ static void TessellationStages( dsContext.mask = GenerateMask(tsData.NumDomainPoints - dsInvocations); RDTSC_BEGIN(FEDomainShader, pDC->drawId); - state.pfnDsFunc(GetPrivateState(pDC), &dsContext); + state.pfnDsFunc(GetPrivateState(pDC), pWorkerData, &dsContext); RDTSC_END(FEDomainShader, 0); AR_EVENT(DSStats(dsContext.stats.numInstExecuted)); @@ -1521,6 +1524,8 @@ void ProcessDraw( RDTSC_BEGIN(FEProcessDraw, pDC->drawId); + void* pWorkerData = pContext->threadPool.pThreadData[workerId].pWorkerPrivateData; + DRAW_WORK& work = *(DRAW_WORK*)pUserData; const API_STATE& state = GetApiState(pDC); @@ -1738,13 +1743,13 @@ void ProcessDraw( // 1. Execute FS/VS for a single SIMD. RDTSC_BEGIN(FEFetchShader, pDC->drawId); #if USE_SIMD16_SHADERS - state.pfnFetchFunc(GetPrivateState(pDC), fetchInfo_lo, vin); + state.pfnFetchFunc(GetPrivateState(pDC), pWorkerData, fetchInfo_lo, vin); #else - state.pfnFetchFunc(GetPrivateState(pDC), fetchInfo_lo, vin_lo); + state.pfnFetchFunc(GetPrivateState(pDC), pWorkerData, fetchInfo_lo, vin_lo); if ((i + KNOB_SIMD_WIDTH) < endVertex) // 1/2 of KNOB_SIMD16_WIDTH { - state.pfnFetchFunc(GetPrivateState(pDC), fetchInfo_hi, vin_hi); + state.pfnFetchFunc(GetPrivateState(pDC), pWorkerData, fetchInfo_hi, vin_hi); } #endif RDTSC_END(FEFetchShader, 0); @@ -1793,15 +1798,15 @@ void ProcessDraw( { RDTSC_BEGIN(FEVertexShader, pDC->drawId); #if USE_SIMD16_VS - state.pfnVertexFunc(GetPrivateState(pDC), &vsContext_lo); + state.pfnVertexFunc(GetPrivateState(pDC), pWorkerData, &vsContext_lo); AR_EVENT(VSStats(vsContext_lo.stats.numInstExecuted)); #else - state.pfnVertexFunc(GetPrivateState(pDC), &vsContext_lo); + state.pfnVertexFunc(GetPrivateState(pDC), pWorkerData, &vsContext_lo); AR_EVENT(VSStats(vsContext_lo.stats.numInstExecuted)); if ((i + KNOB_SIMD_WIDTH) < endVertex) // 1/2 of KNOB_SIMD16_WIDTH { - state.pfnVertexFunc(GetPrivateState(pDC), &vsContext_hi); + state.pfnVertexFunc(GetPrivateState(pDC), pWorkerData, &vsContext_hi); AR_EVENT(VSStats(vsContext_hi.stats.numInstExecuted)); } #endif @@ -1994,7 +1999,7 @@ void ProcessDraw( // 1. Execute FS/VS for a single SIMD. RDTSC_BEGIN(FEFetchShader, pDC->drawId); - state.pfnFetchFunc(GetPrivateState(pDC), fetchInfo, vout); + state.pfnFetchFunc(GetPrivateState(pDC), pWorkerData, fetchInfo, vout); RDTSC_END(FEFetchShader, 0); // forward fetch generated vertex IDs to the vertex shader @@ -2016,7 +2021,7 @@ void ProcessDraw( #endif { RDTSC_BEGIN(FEVertexShader, pDC->drawId); - state.pfnVertexFunc(GetPrivateState(pDC), &vsContext); + state.pfnVertexFunc(GetPrivateState(pDC), pWorkerData, &vsContext); RDTSC_END(FEVertexShader, 0); UPDATE_STAT_FE(VsInvocations, GetNumInvocations(i, endVertex)); diff --git a/src/gallium/drivers/swr/rasterizer/core/rasterizer.cpp b/src/gallium/drivers/swr/rasterizer/core/rasterizer.cpp index 08dd51bf731..67c28ad97c4 100644 --- a/src/gallium/drivers/swr/rasterizer/core/rasterizer.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/rasterizer.cpp @@ -1,5 +1,5 @@ /**************************************************************************** -* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved. +* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -302,7 +302,7 @@ void RasterizeSimplePoint(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTi triDesc.Z[0] = triDesc.Z[1] = triDesc.Z[2] = z; RenderOutputBuffers renderBuffers; - GetRenderHotTiles(pDC, macroTile, tileAlignedX >> KNOB_TILE_X_DIM_SHIFT , tileAlignedY >> KNOB_TILE_Y_DIM_SHIFT, + GetRenderHotTiles(pDC, workerId, macroTile, tileAlignedX >> KNOB_TILE_X_DIM_SHIFT , tileAlignedY >> KNOB_TILE_Y_DIM_SHIFT, renderBuffers, triDesc.triFlags.renderTargetArrayIndex); RDTSC_BEGIN(BEPixelBackend, pDC->drawId); diff --git a/src/gallium/drivers/swr/rasterizer/core/rasterizer_impl.h b/src/gallium/drivers/swr/rasterizer/core/rasterizer_impl.h index 7f9b3788c7f..ca39d7c38f8 100644 --- a/src/gallium/drivers/swr/rasterizer/core/rasterizer_impl.h +++ b/src/gallium/drivers/swr/rasterizer/core/rasterizer_impl.h @@ -1,5 +1,5 @@ /**************************************************************************** -* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved. +* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -40,7 +40,7 @@ extern PFN_WORK_FUNC gRasterizerFuncs[SWR_MULTISAMPLE_TYPE_COUNT][2][2][SWR_INPUT_COVERAGE_COUNT][STATE_VALID_TRI_EDGE_COUNT][2]; template -void GetRenderHotTiles(DRAW_CONTEXT *pDC, uint32_t macroID, uint32_t x, uint32_t y, RenderOutputBuffers &renderBuffers, uint32_t renderTargetArrayIndex); +void GetRenderHotTiles(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroID, uint32_t x, uint32_t y, RenderOutputBuffers &renderBuffers, uint32_t renderTargetArrayIndex); template void StepRasterTileX(uint32_t colorHotTileMask, RenderOutputBuffers &buffers); template @@ -1145,7 +1145,7 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, uint32_t maxX = maxTileX; RenderOutputBuffers renderBuffers, currentRenderBufferRow; - GetRenderHotTiles(pDC, macroTile, minTileX, minTileY, renderBuffers, triDesc.triFlags.renderTargetArrayIndex); + GetRenderHotTiles(pDC, workerId, macroTile, minTileX, minTileY, renderBuffers, triDesc.triFlags.renderTargetArrayIndex); currentRenderBufferRow = renderBuffers; // rasterize and generate coverage masks per sample @@ -1297,10 +1297,11 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, // Get pointers to hot tile memory for color RT, depth, stencil template -void GetRenderHotTiles(DRAW_CONTEXT *pDC, uint32_t macroID, uint32_t tileX, uint32_t tileY, RenderOutputBuffers &renderBuffers, uint32_t renderTargetArrayIndex) +void GetRenderHotTiles(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroID, uint32_t tileX, uint32_t tileY, RenderOutputBuffers &renderBuffers, uint32_t renderTargetArrayIndex) { const API_STATE& state = GetApiState(pDC); SWR_CONTEXT *pContext = pDC->pContext; + HANDLE hWorkerPrivateData = pContext->threadPool.pThreadData[workerId].pWorkerPrivateData; uint32_t mx, my; MacroTileMgr::getTileIndices(macroID, mx, my); @@ -1316,7 +1317,7 @@ void GetRenderHotTiles(DRAW_CONTEXT *pDC, uint32_t macroID, uint32_t tileX, uint uint32_t colorHottileEnableMask = state.colorHottileEnable; while(_BitScanForward(&rtSlot, colorHottileEnableMask)) { - HOTTILE *pColor = pContext->pHotTileMgr->GetHotTile(pContext, pDC, macroID, (SWR_RENDERTARGET_ATTACHMENT)(SWR_ATTACHMENT_COLOR0 + rtSlot), true, + HOTTILE *pColor = pContext->pHotTileMgr->GetHotTile(pContext, pDC, hWorkerPrivateData, macroID, (SWR_RENDERTARGET_ATTACHMENT)(SWR_ATTACHMENT_COLOR0 + rtSlot), true, numSamples, renderTargetArrayIndex); pColor->state = HOTTILE_DIRTY; renderBuffers.pColor[rtSlot] = pColor->pBuffer + offset; @@ -1328,7 +1329,7 @@ void GetRenderHotTiles(DRAW_CONTEXT *pDC, uint32_t macroID, uint32_t tileX, uint const uint32_t pitch = KNOB_MACROTILE_X_DIM * FormatTraits::bpp / 8; uint32_t offset = ComputeTileOffset2D::bpp> >(pitch, tileX, tileY); offset*=numSamples; - HOTTILE *pDepth = pContext->pHotTileMgr->GetHotTile(pContext, pDC, macroID, SWR_ATTACHMENT_DEPTH, true, + HOTTILE *pDepth = pContext->pHotTileMgr->GetHotTile(pContext, pDC, hWorkerPrivateData, macroID, SWR_ATTACHMENT_DEPTH, true, numSamples, renderTargetArrayIndex); pDepth->state = HOTTILE_DIRTY; SWR_ASSERT(pDepth->pBuffer != nullptr); @@ -1339,7 +1340,7 @@ void GetRenderHotTiles(DRAW_CONTEXT *pDC, uint32_t macroID, uint32_t tileX, uint const uint32_t pitch = KNOB_MACROTILE_X_DIM * FormatTraits::bpp / 8; uint32_t offset = ComputeTileOffset2D::bpp> >(pitch, tileX, tileY); offset*=numSamples; - HOTTILE* pStencil = pContext->pHotTileMgr->GetHotTile(pContext, pDC, macroID, SWR_ATTACHMENT_STENCIL, true, + HOTTILE* pStencil = pContext->pHotTileMgr->GetHotTile(pContext, pDC, hWorkerPrivateData, macroID, SWR_ATTACHMENT_STENCIL, true, numSamples, renderTargetArrayIndex); pStencil->state = HOTTILE_DIRTY; SWR_ASSERT(pStencil->pBuffer != nullptr); diff --git a/src/gallium/drivers/swr/rasterizer/core/state.h b/src/gallium/drivers/swr/rasterizer/core/state.h index cdb30f60fdf..217cf44c58f 100644 --- a/src/gallium/drivers/swr/rasterizer/core/state.h +++ b/src/gallium/drivers/swr/rasterizer/core/state.h @@ -911,18 +911,18 @@ struct SWR_BLEND_CONTEXT /// FUNCTION POINTERS FOR SHADERS #if USE_SIMD16_SHADERS -typedef void(__cdecl *PFN_FETCH_FUNC)(HANDLE hPrivateData, SWR_FETCH_CONTEXT& fetchInfo, simd16vertex& out); +typedef void(__cdecl *PFN_FETCH_FUNC)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_FETCH_CONTEXT& fetchInfo, simd16vertex& out); #else -typedef void(__cdecl *PFN_FETCH_FUNC)(HANDLE hPrivateData, SWR_FETCH_CONTEXT& fetchInfo, simdvertex& out); +typedef void(__cdecl *PFN_FETCH_FUNC)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_FETCH_CONTEXT& fetchInfo, simdvertex& out); #endif -typedef void(__cdecl *PFN_VERTEX_FUNC)(HANDLE hPrivateData, SWR_VS_CONTEXT* pVsContext); -typedef void(__cdecl *PFN_HS_FUNC)(HANDLE hPrivateData, SWR_HS_CONTEXT* pHsContext); -typedef void(__cdecl *PFN_DS_FUNC)(HANDLE hPrivateData, SWR_DS_CONTEXT* pDsContext); -typedef void(__cdecl *PFN_GS_FUNC)(HANDLE hPrivateData, SWR_GS_CONTEXT* pGsContext); -typedef void(__cdecl *PFN_CS_FUNC)(HANDLE hPrivateData, SWR_CS_CONTEXT* pCsContext); +typedef void(__cdecl *PFN_VERTEX_FUNC)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_VS_CONTEXT* pVsContext); +typedef void(__cdecl *PFN_HS_FUNC)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_HS_CONTEXT* pHsContext); +typedef void(__cdecl *PFN_DS_FUNC)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_DS_CONTEXT* pDsContext); +typedef void(__cdecl *PFN_GS_FUNC)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_GS_CONTEXT* pGsContext); +typedef void(__cdecl *PFN_CS_FUNC)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_CS_CONTEXT* pCsContext); typedef void(__cdecl *PFN_SO_FUNC)(SWR_STREAMOUT_CONTEXT& soContext); -typedef void(__cdecl *PFN_PIXEL_KERNEL)(HANDLE hPrivateData, SWR_PS_CONTEXT *pContext); -typedef void(__cdecl *PFN_CPIXEL_KERNEL)(HANDLE hPrivateData, SWR_PS_CONTEXT *pContext); +typedef void(__cdecl *PFN_PIXEL_KERNEL)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_PS_CONTEXT *pContext); +typedef void(__cdecl *PFN_CPIXEL_KERNEL)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_PS_CONTEXT *pContext); typedef void(__cdecl *PFN_BLEND_JIT_FUNC)(SWR_BLEND_CONTEXT*); typedef simdscalar(*PFN_QUANTIZE_DEPTH)(simdscalar const &); diff --git a/src/gallium/drivers/swr/rasterizer/core/threads.cpp b/src/gallium/drivers/swr/rasterizer/core/threads.cpp index 3eb20abcbfc..9e16246c3f4 100644 --- a/src/gallium/drivers/swr/rasterizer/core/threads.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/threads.cpp @@ -1,5 +1,5 @@ /**************************************************************************** -* Copyright (C) 2014-2016 Intel Corporation. All Rights Reserved. +* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -42,6 +42,7 @@ #endif #include "common/os.h" +#include "core/api.h" #include "context.h" #include "frontend.h" #include "backend.h" @@ -1128,7 +1129,8 @@ void CreateThreadPool(SWR_CONTEXT* pContext, THREAD_POOL* pPool) if (pContext->threadInfo.SINGLE_THREADED) { - return; + numAPIReservedThreads = 0; + numThreads = 1; } if (numAPIReservedThreads) @@ -1139,6 +1141,10 @@ void CreateThreadPool(SWR_CONTEXT* pContext, THREAD_POOL* pPool) { numAPIReservedThreads = 0; } + else + { + memset(pPool->pApiThreadData, 0, sizeof(THREAD_DATA) * numAPIReservedThreads); + } } pPool->numReservedThreads = numAPIReservedThreads; @@ -1147,8 +1153,37 @@ void CreateThreadPool(SWR_CONTEXT* pContext, THREAD_POOL* pPool) pPool->pThreadData = new (std::nothrow) THREAD_DATA[pPool->numThreads]; SWR_ASSERT(pPool->pThreadData); + memset(pPool->pThreadData, 0, sizeof(THREAD_DATA) * pPool->numThreads); pPool->numaMask = 0; + // Allocate worker private data + pPool->pWorkerPrivateDataArray = nullptr; + if (pContext->workerPrivateState.perWorkerPrivateStateSize) + { + size_t perWorkerSize = AlignUpPow2(pContext->workerPrivateState.perWorkerPrivateStateSize, 64); + size_t totalSize = perWorkerSize * pPool->numThreads; + if (totalSize) + { + pPool->pWorkerPrivateDataArray = AlignedMalloc(totalSize, 64); + SWR_ASSERT(pPool->pWorkerPrivateDataArray); + + void* pWorkerData = pPool->pWorkerPrivateDataArray; + for (uint32_t i = 0; i < pPool->numThreads; ++i) + { + pPool->pThreadData[i].pWorkerPrivateData = pWorkerData; + if (pContext->workerPrivateState.pfnInitWorkerData) + { + pContext->workerPrivateState.pfnInitWorkerData(pWorkerData, i); + } + pWorkerData = PtrAdd(pWorkerData, perWorkerSize); + } + } + } + + if (pContext->threadInfo.SINGLE_THREADED) + { + return; + } pPool->pThreads = new (std::nothrow) THREAD_PTR[pPool->numThreads]; SWR_ASSERT(pPool->pThreads); @@ -1293,13 +1328,13 @@ void StartThreadPool(SWR_CONTEXT* pContext, THREAD_POOL* pPool) /// @param pPool - pointer to thread pool object. void DestroyThreadPool(SWR_CONTEXT *pContext, THREAD_POOL *pPool) { - if (!pContext->threadInfo.SINGLE_THREADED) - { - // Wait for all threads to finish - SwrWaitForIdle(pContext); + // Wait for all threads to finish + SwrWaitForIdle(pContext); - // Wait for threads to finish and destroy them - for (uint32_t t = 0; t < pPool->numThreads; ++t) + // Wait for threads to finish and destroy them + for (uint32_t t = 0; t < pPool->numThreads; ++t) + { + if (!pContext->threadInfo.SINGLE_THREADED) { // Detach from thread. Cannot join() due to possibility (in Windows) of code // in some DLLMain(THREAD_DETATCH case) blocking the thread until after this returns. @@ -1307,10 +1342,17 @@ void DestroyThreadPool(SWR_CONTEXT *pContext, THREAD_POOL *pPool) delete(pPool->pThreads[t]); } - delete[] pPool->pThreads; - - // Clean up data used by threads - delete[] pPool->pThreadData; - delete[] pPool->pApiThreadData; + if (pContext->workerPrivateState.pfnFinishWorkerData) + { + pContext->workerPrivateState.pfnFinishWorkerData(pPool->pThreadData[t].pWorkerPrivateData, t); + } } + + delete[] pPool->pThreads; + + // Clean up data used by threads + delete[] pPool->pThreadData; + delete[] pPool->pApiThreadData; + + AlignedFree(pPool->pWorkerPrivateDataArray); } diff --git a/src/gallium/drivers/swr/rasterizer/core/threads.h b/src/gallium/drivers/swr/rasterizer/core/threads.h index 2e53265f424..cb918ddb60d 100644 --- a/src/gallium/drivers/swr/rasterizer/core/threads.h +++ b/src/gallium/drivers/swr/rasterizer/core/threads.h @@ -1,5 +1,5 @@ /**************************************************************************** -* Copyright (C) 2014-2016 Intel Corporation. All Rights Reserved. +* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -35,9 +35,11 @@ typedef std::thread* THREAD_PTR; struct SWR_CONTEXT; struct DRAW_CONTEXT; +struct SWR_WORKER_PRIVATE_STATE; struct THREAD_DATA { + void* pWorkerPrivateData;// Pointer to per-worker private data uint32_t procGroupId; // Will always be 0 for non-Windows OS uint32_t threadId; // within the procGroup for Windows uint32_t numaId; // NUMA node id @@ -55,6 +57,7 @@ struct THREAD_POOL uint32_t numThreads; uint32_t numaMask; THREAD_DATA *pThreadData; + void* pWorkerPrivateDataArray; // All memory for worker private data uint32_t numReservedThreads; // Number of threads reserved for API use THREAD_DATA *pApiThreadData; }; diff --git a/src/gallium/drivers/swr/rasterizer/core/tilemgr.cpp b/src/gallium/drivers/swr/rasterizer/core/tilemgr.cpp index f4686703291..28fa7877114 100644 --- a/src/gallium/drivers/swr/rasterizer/core/tilemgr.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/tilemgr.cpp @@ -1,5 +1,5 @@ /**************************************************************************** -* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved. +* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -83,7 +83,7 @@ void MacroTileMgr::markTileComplete(uint32_t id) tile.mWorkItemsBE = 0; } -HOTTILE* HotTileMgr::GetHotTile(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t macroID, SWR_RENDERTARGET_ATTACHMENT attachment, bool create, uint32_t numSamples, +HOTTILE* HotTileMgr::GetHotTile(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, HANDLE hWorkerPrivateData, uint32_t macroID, SWR_RENDERTARGET_ATTACHMENT attachment, bool create, uint32_t numSamples, uint32_t renderTargetArrayIndex) { uint32_t x, y; @@ -163,11 +163,11 @@ HOTTILE* HotTileMgr::GetHotTile(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32 if (hotTile.state == HOTTILE_DIRTY) { - pContext->pfnStoreTile(GetPrivateState(pDC), format, attachment, + pContext->pfnStoreTile(GetPrivateState(pDC), hWorkerPrivateData, format, attachment, x * KNOB_MACROTILE_X_DIM, y * KNOB_MACROTILE_Y_DIM, hotTile.renderTargetArrayIndex, hotTile.pBuffer); } - pContext->pfnLoadTile(GetPrivateState(pDC), format, attachment, + pContext->pfnLoadTile(GetPrivateState(pDC), hWorkerPrivateData, format, attachment, x * KNOB_MACROTILE_X_DIM, y * KNOB_MACROTILE_Y_DIM, renderTargetArrayIndex, hotTile.pBuffer); hotTile.renderTargetArrayIndex = renderTargetArrayIndex; @@ -379,6 +379,7 @@ void HotTileMgr::ClearStencilHotTile(const HOTTILE* pHotTile) void HotTileMgr::InitializeHotTiles(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroID) { const API_STATE& state = GetApiState(pDC); + HANDLE hWorkerPrivateData = pDC->pContext->threadPool.pThreadData[workerId].pWorkerPrivateData; uint32_t x, y; MacroTileMgr::getTileIndices(macroID, x, y); @@ -392,13 +393,13 @@ void HotTileMgr::InitializeHotTiles(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, ui uint32_t colorHottileEnableMask = state.colorHottileEnable; while (_BitScanForward(&rtSlot, colorHottileEnableMask)) { - HOTTILE* pHotTile = GetHotTile(pContext, pDC, macroID, (SWR_RENDERTARGET_ATTACHMENT)(SWR_ATTACHMENT_COLOR0 + rtSlot), true, numSamples); + HOTTILE* pHotTile = GetHotTile(pContext, pDC, hWorkerPrivateData, macroID, (SWR_RENDERTARGET_ATTACHMENT)(SWR_ATTACHMENT_COLOR0 + rtSlot), true, numSamples); if (pHotTile->state == HOTTILE_INVALID) { RDTSC_BEGIN(BELoadTiles, pDC->drawId); // invalid hottile before draw requires a load from surface before we can draw to it - pContext->pfnLoadTile(GetPrivateState(pDC), KNOB_COLOR_HOT_TILE_FORMAT, (SWR_RENDERTARGET_ATTACHMENT)(SWR_ATTACHMENT_COLOR0 + rtSlot), x, y, pHotTile->renderTargetArrayIndex, pHotTile->pBuffer); + pContext->pfnLoadTile(GetPrivateState(pDC), hWorkerPrivateData, KNOB_COLOR_HOT_TILE_FORMAT, (SWR_RENDERTARGET_ATTACHMENT)(SWR_ATTACHMENT_COLOR0 + rtSlot), x, y, pHotTile->renderTargetArrayIndex, pHotTile->pBuffer); pHotTile->state = HOTTILE_DIRTY; RDTSC_END(BELoadTiles, 0); } @@ -416,12 +417,12 @@ void HotTileMgr::InitializeHotTiles(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, ui // check depth if enabled if (state.depthHottileEnable) { - HOTTILE* pHotTile = GetHotTile(pContext, pDC, macroID, SWR_ATTACHMENT_DEPTH, true, numSamples); + HOTTILE* pHotTile = GetHotTile(pContext, pDC, hWorkerPrivateData, macroID, SWR_ATTACHMENT_DEPTH, true, numSamples); if (pHotTile->state == HOTTILE_INVALID) { RDTSC_BEGIN(BELoadTiles, pDC->drawId); // invalid hottile before draw requires a load from surface before we can draw to it - pContext->pfnLoadTile(GetPrivateState(pDC), KNOB_DEPTH_HOT_TILE_FORMAT, SWR_ATTACHMENT_DEPTH, x, y, pHotTile->renderTargetArrayIndex, pHotTile->pBuffer); + pContext->pfnLoadTile(GetPrivateState(pDC), hWorkerPrivateData, KNOB_DEPTH_HOT_TILE_FORMAT, SWR_ATTACHMENT_DEPTH, x, y, pHotTile->renderTargetArrayIndex, pHotTile->pBuffer); pHotTile->state = HOTTILE_DIRTY; RDTSC_END(BELoadTiles, 0); } @@ -438,12 +439,12 @@ void HotTileMgr::InitializeHotTiles(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, ui // check stencil if enabled if (state.stencilHottileEnable) { - HOTTILE* pHotTile = GetHotTile(pContext, pDC, macroID, SWR_ATTACHMENT_STENCIL, true, numSamples); + HOTTILE* pHotTile = GetHotTile(pContext, pDC, hWorkerPrivateData, macroID, SWR_ATTACHMENT_STENCIL, true, numSamples); if (pHotTile->state == HOTTILE_INVALID) { RDTSC_BEGIN(BELoadTiles, pDC->drawId); // invalid hottile before draw requires a load from surface before we can draw to it - pContext->pfnLoadTile(GetPrivateState(pDC), KNOB_STENCIL_HOT_TILE_FORMAT, SWR_ATTACHMENT_STENCIL, x, y, pHotTile->renderTargetArrayIndex, pHotTile->pBuffer); + pContext->pfnLoadTile(GetPrivateState(pDC), hWorkerPrivateData, KNOB_STENCIL_HOT_TILE_FORMAT, SWR_ATTACHMENT_STENCIL, x, y, pHotTile->renderTargetArrayIndex, pHotTile->pBuffer); pHotTile->state = HOTTILE_DIRTY; RDTSC_END(BELoadTiles, 0); } diff --git a/src/gallium/drivers/swr/rasterizer/core/tilemgr.h b/src/gallium/drivers/swr/rasterizer/core/tilemgr.h index 8ef74a7f4b9..2831010b12f 100644 --- a/src/gallium/drivers/swr/rasterizer/core/tilemgr.h +++ b/src/gallium/drivers/swr/rasterizer/core/tilemgr.h @@ -1,5 +1,5 @@ /**************************************************************************** -* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved. +* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -305,7 +305,7 @@ public: void InitializeHotTiles(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroID); - HOTTILE *GetHotTile(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t macroID, SWR_RENDERTARGET_ATTACHMENT attachment, bool create, uint32_t numSamples = 1, + HOTTILE *GetHotTile(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, HANDLE hWorkerData, uint32_t macroID, SWR_RENDERTARGET_ATTACHMENT attachment, bool create, uint32_t numSamples = 1, uint32_t renderTargetArrayIndex = 0); HOTTILE *GetHotTileNoLoad(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t macroID, SWR_RENDERTARGET_ATTACHMENT attachment, bool create, uint32_t numSamples = 1); diff --git a/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp b/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp index 7f9c9dd9d7b..284eb27a7d3 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp @@ -1,5 +1,5 @@ /**************************************************************************** -* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved. +* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -19,13 +19,13 @@ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. -* +* * @file JitManager.cpp -* +* * @brief Implementation if the Jit Manager. -* +* * Notes: -* +* ******************************************************************************/ #include "jit_pch.hpp" @@ -66,7 +66,7 @@ JitManager::JitManager(uint32_t simdWidth, const char *arch, const char* core) InitializeNativeTargetAsmPrinter(); InitializeNativeTargetDisassembler(); - + TargetOptions tOpts; tOpts.AllowFPOpFusion = FPOpFusion::Fast; tOpts.NoInfsFPMath = false; @@ -125,6 +125,8 @@ JitManager::JitManager(uint32_t simdWidth, const char *arch, const char* core) // llvm5 is picky and does not take a void * type fsArgs.push_back(PointerType::get(Gen_SWR_FETCH_CONTEXT(this), 0)); + fsArgs.push_back(Type::getInt8PtrTy(mContext)); + fsArgs.push_back(PointerType::get(Gen_SWR_FETCH_CONTEXT(this), 0)); #if USE_SIMD16_SHADERS fsArgs.push_back(PointerType::get(Gen_simd16vertex(this), 0)); @@ -158,7 +160,7 @@ JitManager::JitManager(uint32_t simdWidth, const char *arch, const char* core) void JitManager::SetupNewModule() { SWR_ASSERT(mIsModuleFinalized == true && "Current module is not finalized!"); - + std::unique_ptr newModule(new Module("", mContext)); mpCurrentModule = newModule.get(); #if defined(_WIN32) diff --git a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp index af97b83cb2d..09590b71047 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp @@ -1,5 +1,5 @@ /**************************************************************************** -* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved. +* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -91,6 +91,7 @@ struct FetchJit : public BuilderGfxMem void CreateGatherOddFormats(SWR_FORMAT format, Value* pMask, Value* pBase, Value* offsets, Value* result[4]); void ConvertFormat(SWR_FORMAT format, Value *texels[4]); + Value* mpWorkerData; Value* mpFetchInfo; }; @@ -113,6 +114,8 @@ Function* FetchJit::Create(const FETCH_COMPILE_STATE& fetchState) privateContext->setName("privateContext"); SetPrivateContext(privateContext); + mpWorkerData = &*argitr; ++argitr; + mpWorkerData->setName("pWorkerData"); mpFetchInfo = &*argitr; ++argitr; mpFetchInfo->setName("fetchInfo"); Value* pVtxOut = &*argitr; @@ -1097,8 +1100,7 @@ Value* FetchJit::GetSimdValid32bitIndices(Value* pIndices, Value* pLastIndex) Value* vIndexMask = ICMP_SGT(vMaxIndex, vIndexOffsets); // Load the indices; OOB loads 0 - pIndices = BITCAST(pIndices, PointerType::get(mSimdInt32Ty, 0)); - return MASKED_LOAD(pIndices, 4, vIndexMask, VIMMED1(0)); + return MASKED_LOAD(pIndices, 4, vIndexMask, VIMMED1(0), "vIndices", PointerType::get(mSimdInt32Ty, 0), GFX_MEM_CLIENT_FETCH); } ////////////////////////////////////////////////////////////////////////// diff --git a/src/gallium/drivers/swr/rasterizer/memory/ClearTile.cpp b/src/gallium/drivers/swr/rasterizer/memory/ClearTile.cpp index 98bf28b21d1..6a528b6a0f2 100644 --- a/src/gallium/drivers/swr/rasterizer/memory/ClearTile.cpp +++ b/src/gallium/drivers/swr/rasterizer/memory/ClearTile.cpp @@ -153,6 +153,7 @@ struct StoreMacroTileClear /// @param x, y - Coordinates to raster tile. /// @param pClearColor - Pointer to clear color void SwrStoreHotTileClear( + HANDLE hWorkerPrivateData, SWR_SURFACE_STATE *pDstSurface, SWR_RENDERTARGET_ATTACHMENT renderTargetIndex, UINT x, diff --git a/src/gallium/drivers/swr/rasterizer/memory/LoadTile.cpp b/src/gallium/drivers/swr/rasterizer/memory/LoadTile.cpp index 9dbc16ad9ae..8033304d20c 100644 --- a/src/gallium/drivers/swr/rasterizer/memory/LoadTile.cpp +++ b/src/gallium/drivers/swr/rasterizer/memory/LoadTile.cpp @@ -54,6 +54,7 @@ static std::mutex sBucketMutex; /// @param x, y - Coordinates to raster tile. /// @param pDstHotTile - Pointer to Hot Tile void SwrLoadHotTile( + HANDLE hWorkerPrivateData, const SWR_SURFACE_STATE *pSrcSurface, SWR_FORMAT dstFormat, SWR_RENDERTARGET_ATTACHMENT renderTargetIndex, diff --git a/src/gallium/drivers/swr/rasterizer/memory/StoreTile.cpp b/src/gallium/drivers/swr/rasterizer/memory/StoreTile.cpp index 9c20669f77b..53b82c4c12c 100644 --- a/src/gallium/drivers/swr/rasterizer/memory/StoreTile.cpp +++ b/src/gallium/drivers/swr/rasterizer/memory/StoreTile.cpp @@ -59,6 +59,7 @@ static std::vector sBuckets(NUM_SWR_FORMATS, -1); /// @param x, y - Coordinates to raster tile. /// @param pSrcHotTile - Pointer to Hot Tile void SwrStoreHotTileToSurface( + HANDLE hWorkerPrivateData, SWR_SURFACE_STATE *pDstSurface, SWR_FORMAT srcFormat, SWR_RENDERTARGET_ATTACHMENT renderTargetIndex, diff --git a/src/gallium/drivers/swr/swr_memory.h b/src/gallium/drivers/swr/swr_memory.h index fc5561680cb..bab7800c604 100644 --- a/src/gallium/drivers/swr/swr_memory.h +++ b/src/gallium/drivers/swr/swr_memory.h @@ -25,6 +25,7 @@ INLINE void swr_LoadHotTile(HANDLE hPrivateContext, + HANDLE hWorkerPrivateData, SWR_FORMAT dstFormat, SWR_RENDERTARGET_ATTACHMENT renderTargetIndex, UINT x, UINT y, @@ -34,11 +35,12 @@ swr_LoadHotTile(HANDLE hPrivateContext, swr_draw_context *pDC = (swr_draw_context*)hPrivateContext; SWR_SURFACE_STATE *pSrcSurface = &pDC->renderTargets[renderTargetIndex]; - pDC->pAPI->pfnSwrLoadHotTile(pSrcSurface, dstFormat, renderTargetIndex, x, y, renderTargetArrayIndex, pDstHotTile); + pDC->pAPI->pfnSwrLoadHotTile(hWorkerPrivateData, pSrcSurface, dstFormat, renderTargetIndex, x, y, renderTargetArrayIndex, pDstHotTile); } INLINE void swr_StoreHotTile(HANDLE hPrivateContext, + HANDLE hWorkerPrivateData, SWR_FORMAT srcFormat, SWR_RENDERTARGET_ATTACHMENT renderTargetIndex, UINT x, UINT y, @@ -48,11 +50,12 @@ swr_StoreHotTile(HANDLE hPrivateContext, swr_draw_context *pDC = (swr_draw_context*)hPrivateContext; SWR_SURFACE_STATE *pDstSurface = &pDC->renderTargets[renderTargetIndex]; - pDC->pAPI->pfnSwrStoreHotTileToSurface(pDstSurface, srcFormat, renderTargetIndex, x, y, renderTargetArrayIndex, pSrcHotTile); + pDC->pAPI->pfnSwrStoreHotTileToSurface(hWorkerPrivateData, pDstSurface, srcFormat, renderTargetIndex, x, y, renderTargetArrayIndex, pSrcHotTile); } INLINE void swr_StoreHotTileClear(HANDLE hPrivateContext, + HANDLE hWorkerPrivateData, SWR_RENDERTARGET_ATTACHMENT renderTargetIndex, UINT x, UINT y, @@ -63,5 +66,5 @@ swr_StoreHotTileClear(HANDLE hPrivateContext, swr_draw_context *pDC = (swr_draw_context*)hPrivateContext; SWR_SURFACE_STATE *pDstSurface = &pDC->renderTargets[renderTargetIndex]; - pDC->pAPI->pfnSwrStoreHotTileClear(pDstSurface, renderTargetIndex, x, y, renderTargetArrayIndex, pClearColor); + pDC->pAPI->pfnSwrStoreHotTileClear(hWorkerPrivateData, pDstSurface, renderTargetIndex, x, y, renderTargetArrayIndex, pClearColor); } diff --git a/src/gallium/drivers/swr/swr_shader.cpp b/src/gallium/drivers/swr/swr_shader.cpp index 6ea021a987e..13d89863fd7 100644 --- a/src/gallium/drivers/swr/swr_shader.cpp +++ b/src/gallium/drivers/swr/swr_shader.cpp @@ -586,6 +586,7 @@ BuilderSWR::CompileGS(struct swr_context *ctx, swr_jit_gs_key &key) attrBuilder.addStackAlignmentAttr(JM()->mVWidth * sizeof(float)); std::vector gsArgs{PointerType::get(Gen_swr_draw_context(JM()), 0), + PointerType::get(mInt8Ty, 0), PointerType::get(Gen_SWR_GS_CONTEXT(JM()), 0)}; FunctionType *vsFuncType = FunctionType::get(Type::getVoidTy(JM()->mContext), gsArgs, false); @@ -610,6 +611,8 @@ BuilderSWR::CompileGS(struct swr_context *ctx, swr_jit_gs_key &key) auto argitr = pFunction->arg_begin(); Value *hPrivateData = &*argitr++; hPrivateData->setName("hPrivateData"); + Value *pWorkerData = &*argitr++; + pWorkerData->setName("pWorkerData"); Value *pGsCtx = &*argitr++; pGsCtx->setName("gsCtx"); @@ -754,6 +757,7 @@ BuilderSWR::CompileVS(struct swr_context *ctx, swr_jit_vs_key &key) attrBuilder.addStackAlignmentAttr(JM()->mVWidth * sizeof(float)); std::vector vsArgs{PointerType::get(Gen_swr_draw_context(JM()), 0), + PointerType::get(mInt8Ty, 0), PointerType::get(Gen_SWR_VS_CONTEXT(JM()), 0)}; FunctionType *vsFuncType = FunctionType::get(Type::getVoidTy(JM()->mContext), vsArgs, false); @@ -778,6 +782,8 @@ BuilderSWR::CompileVS(struct swr_context *ctx, swr_jit_vs_key &key) auto argitr = pFunction->arg_begin(); Value *hPrivateData = &*argitr++; hPrivateData->setName("hPrivateData"); + Value *pWorkerData = &*argitr++; + pWorkerData->setName("pWorkerData"); Value *pVsCtx = &*argitr++; pVsCtx->setName("vsCtx"); @@ -1037,6 +1043,7 @@ BuilderSWR::CompileFS(struct swr_context *ctx, swr_jit_fs_key &key) attrBuilder.addStackAlignmentAttr(JM()->mVWidth * sizeof(float)); std::vector fsArgs{PointerType::get(Gen_swr_draw_context(JM()), 0), + PointerType::get(mInt8Ty, 0), PointerType::get(Gen_SWR_PS_CONTEXT(JM()), 0)}; FunctionType *funcType = FunctionType::get(Type::getVoidTy(JM()->mContext), fsArgs, false); @@ -1060,6 +1067,8 @@ BuilderSWR::CompileFS(struct swr_context *ctx, swr_jit_fs_key &key) auto args = pFunction->arg_begin(); Value *hPrivateData = &*args++; hPrivateData->setName("hPrivateData"); + Value *pWorkerData = &*args++; + pWorkerData->setName("pWorkerData"); Value *pPS = &*args++; pPS->setName("psCtx");