From b62217780a1a5a7cb7eb940a2b9bf265af1a91a0 Mon Sep 17 00:00:00 2001 From: Tomasz Pyra Date: Thu, 12 Dec 2019 15:38:43 +0100 Subject: [PATCH] gallium/swr: Fix arb_transform_feedback2 Added support for pause/resume transform feedback. Fixed DrawTransformFeedback. Reviewed-by: Jan Zielinski Reviewed-by: Krzysztof Raszkowski --- .../drivers/swr/rasterizer/core/api.cpp | 13 +++++++++-- src/gallium/drivers/swr/rasterizer/core/api.h | 7 ++++++ .../drivers/swr/rasterizer/core/context.h | 3 +++ .../drivers/swr/rasterizer/core/frontend.cpp | 2 ++ .../drivers/swr/rasterizer/core/threads.cpp | 3 +++ src/gallium/drivers/swr/swr_context.cpp | 13 +++++++++++ src/gallium/drivers/swr/swr_context.h | 3 +++ src/gallium/drivers/swr/swr_draw.cpp | 10 +++++++++ src/gallium/drivers/swr/swr_screen.cpp | 4 ++-- src/gallium/drivers/swr/swr_state.cpp | 22 ++++++++++--------- 10 files changed, 66 insertions(+), 14 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/core/api.cpp b/src/gallium/drivers/swr/rasterizer/core/api.cpp index a6f86b36f98..5405bf2d8ba 100644 --- a/src/gallium/drivers/swr/rasterizer/core/api.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/api.cpp @@ -82,6 +82,7 @@ HANDLE SwrCreateContext(SWR_CREATECONTEXT_INFO* pCreateInfo) pContext->pfnUpdateSoWriteOffset = pCreateInfo->pfnUpdateSoWriteOffset; pContext->pfnUpdateStats = pCreateInfo->pfnUpdateStats; pContext->pfnUpdateStatsFE = pCreateInfo->pfnUpdateStatsFE; + pContext->pfnUpdateStreamOut = pCreateInfo->pfnUpdateStreamOut; pContext->hExternalMemory = pCreateInfo->hExternalMemory; @@ -616,9 +617,17 @@ void SwrSetSoBuffers(HANDLE hContext, SWR_STREAMOUT_BUFFER* pSoBuffer, uint32_t { API_STATE* pState = GetDrawState(GetContext(hContext)); - SWR_ASSERT((slot < 4), "There are only 4 SO buffer slots [0, 3]\nSlot requested: %d", slot); + SWR_ASSERT((slot < MAX_SO_STREAMS), "There are only 4 SO buffer slots [0, 3]\nSlot requested: %d", slot); - pState->soBuffer[slot] = *pSoBuffer; + // remember buffer status in case of future resume StreamOut + if ((pState->soBuffer[slot].pBuffer != 0) && (pSoBuffer->pBuffer == 0)) + pState->soPausedBuffer[slot] = pState->soBuffer[slot]; + + // resume + if (pState->soPausedBuffer[slot].pBuffer == pSoBuffer->pBuffer) + pState->soBuffer[slot] = pState->soPausedBuffer[slot]; + else + pState->soBuffer[slot] = *pSoBuffer; } void SwrSetVertexFunc(HANDLE hContext, PFN_VERTEX_FUNC pfnVertexFunc) diff --git a/src/gallium/drivers/swr/rasterizer/core/api.h b/src/gallium/drivers/swr/rasterizer/core/api.h index 93ea0d42535..29651c9beed 100644 --- a/src/gallium/drivers/swr/rasterizer/core/api.h +++ b/src/gallium/drivers/swr/rasterizer/core/api.h @@ -187,6 +187,12 @@ typedef void(SWR_API* PFN_UPDATE_STATS)(HANDLE hPrivateContext, const SWR_STATS* /// @param pStats - pointer to draw stats typedef void(SWR_API* PFN_UPDATE_STATS_FE)(HANDLE hPrivateContext, const SWR_STATS_FE* pStats); +////////////////////////////////////////////////////////////////////////// +/// @brief Callback to allow driver to update StreamOut status +/// @param hPrivateContext - handle to private data +/// @param numPrims - number of primitives written to StreamOut buffer +typedef void(SWR_API* PFN_UPDATE_STREAMOUT)(HANDLE hPrivateContext, uint64_t numPrims); + ////////////////////////////////////////////////////////////////////////// /// BucketManager /// Forward Declaration (see rdtsc_buckets.h for full definition) @@ -272,6 +278,7 @@ struct SWR_CREATECONTEXT_INFO PFN_UPDATE_SO_WRITE_OFFSET pfnUpdateSoWriteOffset; PFN_UPDATE_STATS pfnUpdateStats; PFN_UPDATE_STATS_FE pfnUpdateStatsFE; + PFN_UPDATE_STREAMOUT pfnUpdateStreamOut; // Pointer to rdtsc buckets mgr returned to the caller. diff --git a/src/gallium/drivers/swr/rasterizer/core/context.h b/src/gallium/drivers/swr/rasterizer/core/context.h index 13cb7c8b856..8f74f135b63 100644 --- a/src/gallium/drivers/swr/rasterizer/core/context.h +++ b/src/gallium/drivers/swr/rasterizer/core/context.h @@ -276,6 +276,7 @@ OSALIGNLINE(struct) API_STATE // Streamout state SWR_STREAMOUT_STATE soState; mutable SWR_STREAMOUT_BUFFER soBuffer[MAX_SO_STREAMS]; + mutable SWR_STREAMOUT_BUFFER soPausedBuffer[MAX_SO_STREAMS]; // Tessellation State PFN_HS_FUNC pfnHsFunc; @@ -422,6 +423,7 @@ struct DRAW_DYNAMIC_STATE SWR_STATS_FE statsFE; // Only one FE thread per DC. SWR_STATS* pStats; + uint64_t soPrims; // number of primitives written to StremOut buffer }; // Draw Context @@ -540,6 +542,7 @@ struct SWR_CONTEXT PFN_UPDATE_SO_WRITE_OFFSET pfnUpdateSoWriteOffset; PFN_UPDATE_STATS pfnUpdateStats; PFN_UPDATE_STATS_FE pfnUpdateStatsFE; + PFN_UPDATE_STREAMOUT pfnUpdateStreamOut; // Global Stats diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp index ab079ab4aa0..45bc545b164 100644 --- a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp @@ -589,6 +589,8 @@ static void StreamOut( } } + pDC->dynState.soPrims += soContext.numPrimsWritten; + UPDATE_STAT_FE(SoPrimStorageNeeded[streamIndex], soContext.numPrimStorageNeeded); UPDATE_STAT_FE(SoNumPrimsWritten[streamIndex], soContext.numPrimsWritten); diff --git a/src/gallium/drivers/swr/rasterizer/core/threads.cpp b/src/gallium/drivers/swr/rasterizer/core/threads.cpp index 987469340d2..113a31ee0c6 100644 --- a/src/gallium/drivers/swr/rasterizer/core/threads.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/threads.cpp @@ -714,6 +714,9 @@ INLINE void CompleteDrawFE(SWR_CONTEXT* pContext, uint32_t workerId, DRAW_CONTEX } } + if (pContext->pfnUpdateStreamOut) + pContext->pfnUpdateStreamOut(GetPrivateState(pDC), pDC->dynState.soPrims); + // Ensure all streaming writes are globally visible before marking this FE done _mm_mfence(); pDC->doneFE = true; diff --git a/src/gallium/drivers/swr/swr_context.cpp b/src/gallium/drivers/swr/swr_context.cpp index 74516c80f9b..dbc4487fbda 100644 --- a/src/gallium/drivers/swr/swr_context.cpp +++ b/src/gallium/drivers/swr/swr_context.cpp @@ -472,6 +472,18 @@ swr_UpdateStatsFE(HANDLE hPrivateContext, const SWR_STATS_FE *pStats) } } +static void +swr_UpdateStreamOut(HANDLE hPrivateContext, uint64_t numPrims) +{ + swr_draw_context *pDC = (swr_draw_context*)hPrivateContext; + + if (!pDC) + return; + + if (pDC->soPrims) + *pDC->soPrims += numPrims; +} + struct pipe_context * swr_create_context(struct pipe_screen *p_screen, void *priv, unsigned flags) { @@ -496,6 +508,7 @@ swr_create_context(struct pipe_screen *p_screen, void *priv, unsigned flags) createInfo.pfnStoreTile = swr_StoreHotTile; createInfo.pfnUpdateStats = swr_UpdateStats; createInfo.pfnUpdateStatsFE = swr_UpdateStatsFE; + createInfo.pfnUpdateStreamOut = swr_UpdateStreamOut; createInfo.pfnMakeGfxPtr = swr_MakeGfxPtr; SWR_THREADING_INFO threadingInfo {0}; diff --git a/src/gallium/drivers/swr/swr_context.h b/src/gallium/drivers/swr/swr_context.h index 55de8e04663..82e6a6692f7 100644 --- a/src/gallium/drivers/swr/swr_context.h +++ b/src/gallium/drivers/swr/swr_context.h @@ -107,6 +107,8 @@ struct swr_draw_context { struct swr_query_result *pStats; // @llvm_struct SWR_INTERFACE *pAPI; // @llvm_struct - Needed for the swr_memory callbacks SWR_TILE_INTERFACE *pTileAPI; // @llvm_struct - Needed for the swr_memory callbacks + + uint64_t* soPrims; //number of primitives written to StreamOut buffer }; /* gen_llvm_types FINI */ @@ -160,6 +162,7 @@ struct swr_context { // streamout pipe_stream_output_target *so_targets[MAX_SO_STREAMS]; uint32_t num_so_targets; + uint64_t so_primCounter; // number of primitives written to StreamOut buffer /* Temp storage for user_buffer constants */ struct swr_scratch_buffers *scratch; diff --git a/src/gallium/drivers/swr/swr_draw.cpp b/src/gallium/drivers/swr/swr_draw.cpp index a94cdd6da0b..0377861b7a4 100644 --- a/src/gallium/drivers/swr/swr_draw.cpp +++ b/src/gallium/drivers/swr/swr_draw.cpp @@ -62,6 +62,16 @@ swr_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) swr_update_draw_context(ctx); + struct pipe_draw_info resolved_info; + /* DrawTransformFeedback */ + if (info->count_from_stream_output) { + // trick copied from softpipe to modify const struct *info + memcpy(&resolved_info, (void*)info, sizeof(struct pipe_draw_info)); + resolved_info.count = ctx->so_primCounter * resolved_info.vertices_per_patch; + resolved_info.max_index = resolved_info.count - 1; + info = &resolved_info; + } + if (ctx->vs->pipe.stream_output.num_outputs) { if (!ctx->vs->soFunc[info->mode]) { STREAMOUT_COMPILE_STATE state = {0}; diff --git a/src/gallium/drivers/swr/swr_screen.cpp b/src/gallium/drivers/swr/swr_screen.cpp index ac53fc518f0..e54be2dc35b 100644 --- a/src/gallium/drivers/swr/swr_screen.cpp +++ b/src/gallium/drivers/swr/swr_screen.cpp @@ -277,6 +277,8 @@ swr_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_TEXTURE_QUERY_LOD: case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS: case PIPE_CAP_TGSI_TG4_COMPONENT_IN_SWIZZLE: + case PIPE_CAP_QUERY_SO_OVERFLOW: + case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME: return 1; /* MSAA support @@ -347,7 +349,6 @@ swr_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_VIEWPORT_SUBPIXEL_BITS: case PIPE_CAP_TGSI_ARRAY_COMPONENTS: case PIPE_CAP_TGSI_CAN_READ_OUTPUTS: - case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME: case PIPE_CAP_NATIVE_FENCE_FD: case PIPE_CAP_GLSL_OPTIMIZE_CONSERVATIVELY: case PIPE_CAP_FBFETCH: @@ -365,7 +366,6 @@ swr_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_POST_DEPTH_COVERAGE: case PIPE_CAP_BINDLESS_TEXTURE: case PIPE_CAP_NIR_SAMPLERS_AS_DEREF: - case PIPE_CAP_QUERY_SO_OVERFLOW: case PIPE_CAP_MEMOBJ: case PIPE_CAP_LOAD_CONSTBUF: case PIPE_CAP_TGSI_ANY_REG_AS_ADDRESS: diff --git a/src/gallium/drivers/swr/swr_state.cpp b/src/gallium/drivers/swr/swr_state.cpp index 345fef4c856..3a007db4c1c 100644 --- a/src/gallium/drivers/swr/swr_state.cpp +++ b/src/gallium/drivers/swr/swr_state.cpp @@ -1763,22 +1763,23 @@ swr_update_derived(struct pipe_context *pipe, pipe_stream_output_info *stream_output = &ctx->vs->pipe.stream_output; - for (uint32_t i = 0; i < ctx->num_so_targets; i++) { + for (uint32_t i = 0; i < MAX_SO_STREAMS; i++) { SWR_STREAMOUT_BUFFER buffer = {0}; - if (!ctx->so_targets[i]) - continue; - buffer.enable = true; - buffer.pBuffer = - (gfxptr_t)(swr_resource_data(ctx->so_targets[i]->buffer) + - ctx->so_targets[i]->buffer_offset); - buffer.bufferSize = ctx->so_targets[i]->buffer_size >> 2; - buffer.pitch = stream_output->stride[i]; - buffer.streamOffset = 0; + if (ctx->so_targets[i]) { + buffer.enable = true; + buffer.pBuffer = + (gfxptr_t)(swr_resource_data(ctx->so_targets[i]->buffer) + + ctx->so_targets[i]->buffer_offset); + buffer.bufferSize = ctx->so_targets[i]->buffer_size >> 2; + buffer.pitch = stream_output->stride[i]; + buffer.streamOffset = 0; + } ctx->api.pfnSwrSetSoBuffers(ctx->swrContext, &buffer, i); } } + if (ctx->dirty & (SWR_NEW_CLIP | SWR_NEW_RASTERIZER | SWR_NEW_VS)) { // shader exporting clip distances overrides all user clip planes if (ctx->rasterizer->clip_plane_enable && @@ -1902,6 +1903,7 @@ swr_set_so_targets(struct pipe_context *pipe, } swr->num_so_targets = num_targets; + swr->swrDC.soPrims = &swr->so_primCounter; swr->dirty |= SWR_NEW_SO; } -- 2.30.2