Added support for pause/resume transform feedback.
Fixed DrawTransformFeedback.
Reviewed-by: Jan Zielinski <jan.zielinski@intel.com>
Reviewed-by: Krzysztof Raszkowski <krzysztof.raszkowski@intel.com>
pContext->pfnUpdateSoWriteOffset = pCreateInfo->pfnUpdateSoWriteOffset;
pContext->pfnUpdateStats = pCreateInfo->pfnUpdateStats;
pContext->pfnUpdateStatsFE = pCreateInfo->pfnUpdateStatsFE;
+ pContext->pfnUpdateStreamOut = pCreateInfo->pfnUpdateStreamOut;
pContext->hExternalMemory = pCreateInfo->hExternalMemory;
{
API_STATE* pState = GetDrawState(GetContext(hContext));
- SWR_ASSERT((slot < 4), "There are only 4 SO buffer slots [0, 3]\nSlot requested: %d", slot);
+ SWR_ASSERT((slot < MAX_SO_STREAMS), "There are only 4 SO buffer slots [0, 3]\nSlot requested: %d", slot);
- pState->soBuffer[slot] = *pSoBuffer;
+ // remember buffer status in case of future resume StreamOut
+ if ((pState->soBuffer[slot].pBuffer != 0) && (pSoBuffer->pBuffer == 0))
+ pState->soPausedBuffer[slot] = pState->soBuffer[slot];
+
+ // resume
+ if (pState->soPausedBuffer[slot].pBuffer == pSoBuffer->pBuffer)
+ pState->soBuffer[slot] = pState->soPausedBuffer[slot];
+ else
+ pState->soBuffer[slot] = *pSoBuffer;
}
void SwrSetVertexFunc(HANDLE hContext, PFN_VERTEX_FUNC pfnVertexFunc)
/// @param pStats - pointer to draw stats
typedef void(SWR_API* PFN_UPDATE_STATS_FE)(HANDLE hPrivateContext, const SWR_STATS_FE* pStats);
+//////////////////////////////////////////////////////////////////////////
+/// @brief Callback to allow driver to update StreamOut status
+/// @param hPrivateContext - handle to private data
+/// @param numPrims - number of primitives written to StreamOut buffer
+typedef void(SWR_API* PFN_UPDATE_STREAMOUT)(HANDLE hPrivateContext, uint64_t numPrims);
+
//////////////////////////////////////////////////////////////////////////
/// BucketManager
/// Forward Declaration (see rdtsc_buckets.h for full definition)
PFN_UPDATE_SO_WRITE_OFFSET pfnUpdateSoWriteOffset;
PFN_UPDATE_STATS pfnUpdateStats;
PFN_UPDATE_STATS_FE pfnUpdateStatsFE;
+ PFN_UPDATE_STREAMOUT pfnUpdateStreamOut;
// Pointer to rdtsc buckets mgr returned to the caller.
// Streamout state
SWR_STREAMOUT_STATE soState;
mutable SWR_STREAMOUT_BUFFER soBuffer[MAX_SO_STREAMS];
+ mutable SWR_STREAMOUT_BUFFER soPausedBuffer[MAX_SO_STREAMS];
// Tessellation State
PFN_HS_FUNC pfnHsFunc;
SWR_STATS_FE statsFE; // Only one FE thread per DC.
SWR_STATS* pStats;
+ uint64_t soPrims; // number of primitives written to StremOut buffer
};
// Draw Context
PFN_UPDATE_SO_WRITE_OFFSET pfnUpdateSoWriteOffset;
PFN_UPDATE_STATS pfnUpdateStats;
PFN_UPDATE_STATS_FE pfnUpdateStatsFE;
+ PFN_UPDATE_STREAMOUT pfnUpdateStreamOut;
// Global Stats
}
}
+ pDC->dynState.soPrims += soContext.numPrimsWritten;
+
UPDATE_STAT_FE(SoPrimStorageNeeded[streamIndex], soContext.numPrimStorageNeeded);
UPDATE_STAT_FE(SoNumPrimsWritten[streamIndex], soContext.numPrimsWritten);
}
}
+ if (pContext->pfnUpdateStreamOut)
+ pContext->pfnUpdateStreamOut(GetPrivateState(pDC), pDC->dynState.soPrims);
+
// Ensure all streaming writes are globally visible before marking this FE done
_mm_mfence();
pDC->doneFE = true;
}
}
+static void
+swr_UpdateStreamOut(HANDLE hPrivateContext, uint64_t numPrims)
+{
+ swr_draw_context *pDC = (swr_draw_context*)hPrivateContext;
+
+ if (!pDC)
+ return;
+
+ if (pDC->soPrims)
+ *pDC->soPrims += numPrims;
+}
+
struct pipe_context *
swr_create_context(struct pipe_screen *p_screen, void *priv, unsigned flags)
{
createInfo.pfnStoreTile = swr_StoreHotTile;
createInfo.pfnUpdateStats = swr_UpdateStats;
createInfo.pfnUpdateStatsFE = swr_UpdateStatsFE;
+ createInfo.pfnUpdateStreamOut = swr_UpdateStreamOut;
createInfo.pfnMakeGfxPtr = swr_MakeGfxPtr;
SWR_THREADING_INFO threadingInfo {0};
struct swr_query_result *pStats; // @llvm_struct
SWR_INTERFACE *pAPI; // @llvm_struct - Needed for the swr_memory callbacks
SWR_TILE_INTERFACE *pTileAPI; // @llvm_struct - Needed for the swr_memory callbacks
+
+ uint64_t* soPrims; //number of primitives written to StreamOut buffer
};
/* gen_llvm_types FINI */
// streamout
pipe_stream_output_target *so_targets[MAX_SO_STREAMS];
uint32_t num_so_targets;
+ uint64_t so_primCounter; // number of primitives written to StreamOut buffer
/* Temp storage for user_buffer constants */
struct swr_scratch_buffers *scratch;
swr_update_draw_context(ctx);
+ struct pipe_draw_info resolved_info;
+ /* DrawTransformFeedback */
+ if (info->count_from_stream_output) {
+ // trick copied from softpipe to modify const struct *info
+ memcpy(&resolved_info, (void*)info, sizeof(struct pipe_draw_info));
+ resolved_info.count = ctx->so_primCounter * resolved_info.vertices_per_patch;
+ resolved_info.max_index = resolved_info.count - 1;
+ info = &resolved_info;
+ }
+
if (ctx->vs->pipe.stream_output.num_outputs) {
if (!ctx->vs->soFunc[info->mode]) {
STREAMOUT_COMPILE_STATE state = {0};
case PIPE_CAP_TEXTURE_QUERY_LOD:
case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
case PIPE_CAP_TGSI_TG4_COMPONENT_IN_SWIZZLE:
+ case PIPE_CAP_QUERY_SO_OVERFLOW:
+ case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
return 1;
/* MSAA support
case PIPE_CAP_VIEWPORT_SUBPIXEL_BITS:
case PIPE_CAP_TGSI_ARRAY_COMPONENTS:
case PIPE_CAP_TGSI_CAN_READ_OUTPUTS:
- case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
case PIPE_CAP_NATIVE_FENCE_FD:
case PIPE_CAP_GLSL_OPTIMIZE_CONSERVATIVELY:
case PIPE_CAP_FBFETCH:
case PIPE_CAP_POST_DEPTH_COVERAGE:
case PIPE_CAP_BINDLESS_TEXTURE:
case PIPE_CAP_NIR_SAMPLERS_AS_DEREF:
- case PIPE_CAP_QUERY_SO_OVERFLOW:
case PIPE_CAP_MEMOBJ:
case PIPE_CAP_LOAD_CONSTBUF:
case PIPE_CAP_TGSI_ANY_REG_AS_ADDRESS:
pipe_stream_output_info *stream_output = &ctx->vs->pipe.stream_output;
- for (uint32_t i = 0; i < ctx->num_so_targets; i++) {
+ for (uint32_t i = 0; i < MAX_SO_STREAMS; i++) {
SWR_STREAMOUT_BUFFER buffer = {0};
- if (!ctx->so_targets[i])
- continue;
- buffer.enable = true;
- buffer.pBuffer =
- (gfxptr_t)(swr_resource_data(ctx->so_targets[i]->buffer) +
- ctx->so_targets[i]->buffer_offset);
- buffer.bufferSize = ctx->so_targets[i]->buffer_size >> 2;
- buffer.pitch = stream_output->stride[i];
- buffer.streamOffset = 0;
+ if (ctx->so_targets[i]) {
+ buffer.enable = true;
+ buffer.pBuffer =
+ (gfxptr_t)(swr_resource_data(ctx->so_targets[i]->buffer) +
+ ctx->so_targets[i]->buffer_offset);
+ buffer.bufferSize = ctx->so_targets[i]->buffer_size >> 2;
+ buffer.pitch = stream_output->stride[i];
+ buffer.streamOffset = 0;
+ }
ctx->api.pfnSwrSetSoBuffers(ctx->swrContext, &buffer, i);
}
}
+
if (ctx->dirty & (SWR_NEW_CLIP | SWR_NEW_RASTERIZER | SWR_NEW_VS)) {
// shader exporting clip distances overrides all user clip planes
if (ctx->rasterizer->clip_plane_enable &&
}
swr->num_so_targets = num_targets;
+ swr->swrDC.soPrims = &swr->so_primCounter;
swr->dirty |= SWR_NEW_SO;
}