gallium/swr: Fix arb_transform_feedback2
authorTomasz Pyra <tomasz.pyra@intel.com>
Thu, 12 Dec 2019 14:38:43 +0000 (15:38 +0100)
committerJan Zielinski <jan.zielinski@intel.com>
Fri, 13 Dec 2019 10:58:36 +0000 (10:58 +0000)
Added support for pause/resume transform feedback.
Fixed DrawTransformFeedback.

Reviewed-by: Jan Zielinski <jan.zielinski@intel.com>
Reviewed-by: Krzysztof Raszkowski <krzysztof.raszkowski@intel.com>
src/gallium/drivers/swr/rasterizer/core/api.cpp
src/gallium/drivers/swr/rasterizer/core/api.h
src/gallium/drivers/swr/rasterizer/core/context.h
src/gallium/drivers/swr/rasterizer/core/frontend.cpp
src/gallium/drivers/swr/rasterizer/core/threads.cpp
src/gallium/drivers/swr/swr_context.cpp
src/gallium/drivers/swr/swr_context.h
src/gallium/drivers/swr/swr_draw.cpp
src/gallium/drivers/swr/swr_screen.cpp
src/gallium/drivers/swr/swr_state.cpp

index a6f86b36f9805087216c79b8cdd7f0b526d186f4..5405bf2d8ba476bdc39e47d572b8144544722135 100644 (file)
@@ -82,6 +82,7 @@ HANDLE SwrCreateContext(SWR_CREATECONTEXT_INFO* pCreateInfo)
     pContext->pfnUpdateSoWriteOffset     = pCreateInfo->pfnUpdateSoWriteOffset;
     pContext->pfnUpdateStats             = pCreateInfo->pfnUpdateStats;
     pContext->pfnUpdateStatsFE           = pCreateInfo->pfnUpdateStatsFE;
+    pContext->pfnUpdateStreamOut         = pCreateInfo->pfnUpdateStreamOut;
 
 
     pContext->hExternalMemory = pCreateInfo->hExternalMemory;
@@ -616,9 +617,17 @@ void SwrSetSoBuffers(HANDLE hContext, SWR_STREAMOUT_BUFFER* pSoBuffer, uint32_t
 {
     API_STATE* pState = GetDrawState(GetContext(hContext));
 
-    SWR_ASSERT((slot < 4), "There are only 4 SO buffer slots [0, 3]\nSlot requested: %d", slot);
+    SWR_ASSERT((slot < MAX_SO_STREAMS), "There are only 4 SO buffer slots [0, 3]\nSlot requested: %d", slot);
 
-    pState->soBuffer[slot] = *pSoBuffer;
+    // remember buffer status in case of future resume StreamOut
+    if ((pState->soBuffer[slot].pBuffer != 0) && (pSoBuffer->pBuffer == 0))
+       pState->soPausedBuffer[slot] = pState->soBuffer[slot];
+
+    // resume
+    if (pState->soPausedBuffer[slot].pBuffer == pSoBuffer->pBuffer)
+       pState->soBuffer[slot] = pState->soPausedBuffer[slot];
+    else
+        pState->soBuffer[slot] = *pSoBuffer;
 }
 
 void SwrSetVertexFunc(HANDLE hContext, PFN_VERTEX_FUNC pfnVertexFunc)
index 93ea0d42535a51fcae3f209ecfbdd6569554a5ce..29651c9beedaf9a5a53d8e26e994fee35e54251c 100644 (file)
@@ -187,6 +187,12 @@ typedef void(SWR_API* PFN_UPDATE_STATS)(HANDLE hPrivateContext, const SWR_STATS*
 /// @param pStats - pointer to draw stats
 typedef void(SWR_API* PFN_UPDATE_STATS_FE)(HANDLE hPrivateContext, const SWR_STATS_FE* pStats);
 
+//////////////////////////////////////////////////////////////////////////
+/// @brief Callback to allow driver to update StreamOut status
+/// @param hPrivateContext - handle to private data
+/// @param numPrims - number of primitives written to StreamOut buffer
+typedef void(SWR_API* PFN_UPDATE_STREAMOUT)(HANDLE hPrivateContext, uint64_t numPrims);
+
 //////////////////////////////////////////////////////////////////////////
 /// BucketManager
 /// Forward Declaration (see rdtsc_buckets.h for full definition)
@@ -272,6 +278,7 @@ struct SWR_CREATECONTEXT_INFO
     PFN_UPDATE_SO_WRITE_OFFSET     pfnUpdateSoWriteOffset;
     PFN_UPDATE_STATS               pfnUpdateStats;
     PFN_UPDATE_STATS_FE            pfnUpdateStatsFE;
+    PFN_UPDATE_STREAMOUT           pfnUpdateStreamOut;
 
 
     // Pointer to rdtsc buckets mgr returned to the caller.
index 13cb7c8b856b2e20502846a3d525e254637d25c2..8f74f135b63cfd58f664ed9063f565c21e7aba31 100644 (file)
@@ -276,6 +276,7 @@ OSALIGNLINE(struct) API_STATE
     // Streamout state
     SWR_STREAMOUT_STATE          soState;
     mutable SWR_STREAMOUT_BUFFER soBuffer[MAX_SO_STREAMS];
+    mutable SWR_STREAMOUT_BUFFER soPausedBuffer[MAX_SO_STREAMS];
 
     // Tessellation State
     PFN_HS_FUNC  pfnHsFunc;
@@ -422,6 +423,7 @@ struct DRAW_DYNAMIC_STATE
 
     SWR_STATS_FE statsFE; // Only one FE thread per DC.
     SWR_STATS*   pStats;
+    uint64_t     soPrims; // number of primitives written to StremOut buffer
 };
 
 // Draw Context
@@ -540,6 +542,7 @@ struct SWR_CONTEXT
     PFN_UPDATE_SO_WRITE_OFFSET     pfnUpdateSoWriteOffset;
     PFN_UPDATE_STATS               pfnUpdateStats;
     PFN_UPDATE_STATS_FE            pfnUpdateStatsFE;
+    PFN_UPDATE_STREAMOUT           pfnUpdateStreamOut;
 
 
     // Global Stats
index ab079ab4aa02751831d9bd1ff154c9305ababa1c..45bc545b164381a3ba626c6cecb98cceee7b75eb 100644 (file)
@@ -589,6 +589,8 @@ static void StreamOut(
         }
     }
 
+    pDC->dynState.soPrims += soContext.numPrimsWritten;
+
     UPDATE_STAT_FE(SoPrimStorageNeeded[streamIndex], soContext.numPrimStorageNeeded);
     UPDATE_STAT_FE(SoNumPrimsWritten[streamIndex], soContext.numPrimsWritten);
 
index 987469340d2b7a89b3c0462af6726fb74f038c03..113a31ee0c63e6242313c3911cbfc7700a75889f 100644 (file)
@@ -714,6 +714,9 @@ INLINE void CompleteDrawFE(SWR_CONTEXT* pContext, uint32_t workerId, DRAW_CONTEX
         }
     }
 
+    if (pContext->pfnUpdateStreamOut)
+        pContext->pfnUpdateStreamOut(GetPrivateState(pDC),  pDC->dynState.soPrims);
+
     // Ensure all streaming writes are globally visible before marking this FE done
     _mm_mfence();
     pDC->doneFE = true;
index 74516c80f9b703ba02833c6efa91dcca39b57256..dbc4487fbdaaaca984d3885ad551d825f5229880 100644 (file)
@@ -472,6 +472,18 @@ swr_UpdateStatsFE(HANDLE hPrivateContext, const SWR_STATS_FE *pStats)
    }
 }
 
+static void
+swr_UpdateStreamOut(HANDLE hPrivateContext, uint64_t numPrims)
+{
+   swr_draw_context *pDC = (swr_draw_context*)hPrivateContext;
+
+   if (!pDC)
+      return;
+
+   if (pDC->soPrims)
+       *pDC->soPrims += numPrims;
+}
+
 struct pipe_context *
 swr_create_context(struct pipe_screen *p_screen, void *priv, unsigned flags)
 {
@@ -496,6 +508,7 @@ swr_create_context(struct pipe_screen *p_screen, void *priv, unsigned flags)
    createInfo.pfnStoreTile = swr_StoreHotTile;
    createInfo.pfnUpdateStats = swr_UpdateStats;
    createInfo.pfnUpdateStatsFE = swr_UpdateStatsFE;
+   createInfo.pfnUpdateStreamOut = swr_UpdateStreamOut;
    createInfo.pfnMakeGfxPtr = swr_MakeGfxPtr;
 
    SWR_THREADING_INFO threadingInfo {0};
index 55de8e04663df69719e0a92157f87dd3bf374110..82e6a6692f744c3ae547113cc0fe3ceb8cb92216 100644 (file)
@@ -107,6 +107,8 @@ struct swr_draw_context {
    struct swr_query_result *pStats; // @llvm_struct
    SWR_INTERFACE *pAPI; // @llvm_struct - Needed for the swr_memory callbacks
    SWR_TILE_INTERFACE *pTileAPI; // @llvm_struct - Needed for the swr_memory callbacks
+
+   uint64_t* soPrims; //number of primitives written to StreamOut buffer
 };
 
 /* gen_llvm_types FINI */
@@ -160,6 +162,7 @@ struct swr_context {
    // streamout
    pipe_stream_output_target *so_targets[MAX_SO_STREAMS];
    uint32_t num_so_targets;
+   uint64_t so_primCounter; // number of primitives written to StreamOut buffer
 
    /* Temp storage for user_buffer constants */
    struct swr_scratch_buffers *scratch;
index a94cdd6da0b89b66ee0aa64eecea22627c423255..0377861b7a49e8b791b68d747a174a9372a907e3 100644 (file)
@@ -62,6 +62,16 @@ swr_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
 
    swr_update_draw_context(ctx);
 
+   struct pipe_draw_info resolved_info;
+   /* DrawTransformFeedback */
+   if (info->count_from_stream_output) {
+      // trick copied from softpipe to modify const struct *info
+      memcpy(&resolved_info, (void*)info, sizeof(struct pipe_draw_info));
+      resolved_info.count = ctx->so_primCounter * resolved_info.vertices_per_patch;
+      resolved_info.max_index = resolved_info.count - 1;
+      info = &resolved_info;
+   }
+
    if (ctx->vs->pipe.stream_output.num_outputs) {
       if (!ctx->vs->soFunc[info->mode]) {
          STREAMOUT_COMPILE_STATE state = {0};
index ac53fc518f039b2ff3da1e42ecf928fc71031e64..e54be2dc35b76ec46c38dbcec5d73d4b210e4be2 100644 (file)
@@ -277,6 +277,8 @@ swr_get_param(struct pipe_screen *screen, enum pipe_cap param)
    case PIPE_CAP_TEXTURE_QUERY_LOD:
    case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
    case PIPE_CAP_TGSI_TG4_COMPONENT_IN_SWIZZLE:
+   case PIPE_CAP_QUERY_SO_OVERFLOW:
+   case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
       return 1;
 
    /* MSAA support
@@ -347,7 +349,6 @@ swr_get_param(struct pipe_screen *screen, enum pipe_cap param)
    case PIPE_CAP_VIEWPORT_SUBPIXEL_BITS:
    case PIPE_CAP_TGSI_ARRAY_COMPONENTS:
    case PIPE_CAP_TGSI_CAN_READ_OUTPUTS:
-   case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
    case PIPE_CAP_NATIVE_FENCE_FD:
    case PIPE_CAP_GLSL_OPTIMIZE_CONSERVATIVELY:
    case PIPE_CAP_FBFETCH:
@@ -365,7 +366,6 @@ swr_get_param(struct pipe_screen *screen, enum pipe_cap param)
    case PIPE_CAP_POST_DEPTH_COVERAGE:
    case PIPE_CAP_BINDLESS_TEXTURE:
    case PIPE_CAP_NIR_SAMPLERS_AS_DEREF:
-   case PIPE_CAP_QUERY_SO_OVERFLOW:
    case PIPE_CAP_MEMOBJ:
    case PIPE_CAP_LOAD_CONSTBUF:
    case PIPE_CAP_TGSI_ANY_REG_AS_ADDRESS:
index 345fef4c856be0ac09a65302da499885308e9207..3a007db4c1c0af4965bdc90408a039d732669a4e 100644 (file)
@@ -1763,22 +1763,23 @@ swr_update_derived(struct pipe_context *pipe,
 
       pipe_stream_output_info *stream_output = &ctx->vs->pipe.stream_output;
 
-      for (uint32_t i = 0; i < ctx->num_so_targets; i++) {
+      for (uint32_t i = 0; i < MAX_SO_STREAMS; i++) {
          SWR_STREAMOUT_BUFFER buffer = {0};
-         if (!ctx->so_targets[i])
-            continue;
-         buffer.enable = true;
-         buffer.pBuffer =
-            (gfxptr_t)(swr_resource_data(ctx->so_targets[i]->buffer) +
-                         ctx->so_targets[i]->buffer_offset);
-         buffer.bufferSize = ctx->so_targets[i]->buffer_size >> 2;
-         buffer.pitch = stream_output->stride[i];
-         buffer.streamOffset = 0;
+         if (ctx->so_targets[i]) {
+             buffer.enable = true;
+             buffer.pBuffer =
+                (gfxptr_t)(swr_resource_data(ctx->so_targets[i]->buffer) +
+                             ctx->so_targets[i]->buffer_offset);
+             buffer.bufferSize = ctx->so_targets[i]->buffer_size >> 2;
+             buffer.pitch = stream_output->stride[i];
+             buffer.streamOffset = 0;
+        }
 
          ctx->api.pfnSwrSetSoBuffers(ctx->swrContext, &buffer, i);
       }
    }
 
+
    if (ctx->dirty & (SWR_NEW_CLIP | SWR_NEW_RASTERIZER | SWR_NEW_VS)) {
       // shader exporting clip distances overrides all user clip planes
       if (ctx->rasterizer->clip_plane_enable &&
@@ -1902,6 +1903,7 @@ swr_set_so_targets(struct pipe_context *pipe,
    }
 
    swr->num_so_targets = num_targets;
+   swr->swrDC.soPrims = &swr->so_primCounter;
 
    swr->dirty |= SWR_NEW_SO;
 }