swr: Remove stall waiting for core query counters.
[mesa.git] / src / gallium / drivers / swr / swr_query.cpp
index f038a6e63901ef5d4198b7d9d147f1b93b3371b3..5c599654ba90ac99bd89777bd5f2878cfefbddc3 100644 (file)
@@ -62,10 +62,8 @@ swr_destroy_query(struct pipe_context *pipe, struct pipe_query *q)
    struct swr_query *pq = swr_query(q);
 
    if (pq->fence) {
-      if (!swr_is_fence_pending(pq->fence)) {
-         swr_fence_submit(swr_context(pipe), pq->fence);
+      if (swr_is_fence_pending(pq->fence))
          swr_fence_finish(pipe->screen, pq->fence, 0);
-      }
       swr_fence_reference(pipe->screen, &pq->fence, NULL);
    }
 
@@ -73,100 +71,45 @@ swr_destroy_query(struct pipe_context *pipe, struct pipe_query *q)
 }
 
 
-// XXX Create a fence callback, rather than stalling SwrWaitForIdle
 static void
 swr_gather_stats(struct pipe_context *pipe, struct swr_query *pq)
 {
    struct swr_context *ctx = swr_context(pipe);
 
    assert(pq->result);
-   union pipe_query_result *result = pq->result;
+   struct swr_query_result *result = pq->result;
    boolean enable_stats = pq->enable_stats;
-   SWR_STATS swr_stats = {0};
-
-   if (pq->fence) {
-      if (!swr_is_fence_pending(pq->fence)) {
-         swr_fence_submit(ctx, pq->fence);
-         swr_fence_finish(pipe->screen, pq->fence, 0);
-      }
-      swr_fence_reference(pipe->screen, &pq->fence, NULL);
-   }
 
-   /*
-    * These queries don't need SWR Stats enabled in the core
-    * Set and return.
-    */
+   /* A few results don't require the core, so don't involve it */
    switch (pq->type) {
    case PIPE_QUERY_TIMESTAMP:
    case PIPE_QUERY_TIME_ELAPSED:
-      result->u64 = swr_get_timestamp(pipe->screen);
-      return;
+      result->timestamp = swr_get_timestamp(pipe->screen);
       break;
    case PIPE_QUERY_TIMESTAMP_DISJOINT:
-      /* nothing to do here */
-      return;
-      break;
    case PIPE_QUERY_GPU_FINISHED:
-      result->b = TRUE; /* XXX TODO Add an api func to SWR to compare drawId
-                           vs LastRetiredId? */
-      return;
+      /* nothing to do here */
       break;
    default:
-      /* Any query that needs SwrCore stats */
-      break;
-   }
-
-   /*
-    * All other results are collected from SwrCore counters
-    */
+      /*
+       * All other results are collected from SwrCore counters via
+       * SwrGetStats. This returns immediately, but results are later filled
+       * in by the backend.  Fence status is the only indication of
+       * completion.  */
+      SwrGetStats(ctx->swrContext, &result->core);
+
+      if (!pq->fence) {
+         struct swr_screen *screen = swr_screen(pipe->screen);
+         swr_fence_reference(pipe->screen, &pq->fence, screen->flush_fence);
+      }
+      swr_fence_submit(ctx, pq->fence);
 
-   /* XXX, Should turn this into a fence callback and skip the stall */
-   SwrGetStats(ctx->swrContext, &swr_stats);
-   /* SwrGetStats returns immediately, wait for collection */
-   SwrWaitForIdle(ctx->swrContext);
+      /* Only change stat collection if there are no active queries */
+      if (ctx->active_queries == 0)
+         SwrEnableStats(ctx->swrContext, enable_stats);
 
-   switch (pq->type) {
-   case PIPE_QUERY_OCCLUSION_PREDICATE:
-   case PIPE_QUERY_OCCLUSION_COUNTER:
-      result->u64 = swr_stats.DepthPassCount;
-      break;
-   case PIPE_QUERY_PRIMITIVES_GENERATED:
-      result->u64 = swr_stats.IaPrimitives;
-      break;
-   case PIPE_QUERY_PRIMITIVES_EMITTED:
-      result->u64 = swr_stats.SoNumPrimsWritten[pq->index];
-      break;
-   case PIPE_QUERY_SO_STATISTICS:
-   case PIPE_QUERY_SO_OVERFLOW_PREDICATE: {
-      struct pipe_query_data_so_statistics *so_stats = &result->so_statistics;
-      so_stats->num_primitives_written =
-         swr_stats.SoNumPrimsWritten[pq->index];
-      so_stats->primitives_storage_needed =
-         swr_stats.SoPrimStorageNeeded[pq->index];
-   } break;
-   case PIPE_QUERY_PIPELINE_STATISTICS: {
-      struct pipe_query_data_pipeline_statistics *p_stats =
-         &result->pipeline_statistics;
-      p_stats->ia_vertices = swr_stats.IaVertices;
-      p_stats->ia_primitives = swr_stats.IaPrimitives;
-      p_stats->vs_invocations = swr_stats.VsInvocations;
-      p_stats->gs_invocations = swr_stats.GsInvocations;
-      p_stats->gs_primitives = swr_stats.GsPrimitives;
-      p_stats->c_invocations = swr_stats.CPrimitives;
-      p_stats->c_primitives = swr_stats.CPrimitives;
-      p_stats->ps_invocations = swr_stats.PsInvocations;
-      p_stats->hs_invocations = swr_stats.HsInvocations;
-      p_stats->ds_invocations = swr_stats.DsInvocations;
-      p_stats->cs_invocations = swr_stats.CsInvocations;
-   } break;
-   default:
-      assert(0 && "Unsupported query");
       break;
    }
-
-   /* Only change stat collection if there are no active queries */
-   if (ctx->active_queries == 0)
-      SwrEnableStats(ctx->swrContext, enable_stats);
 }
 
 
@@ -176,16 +119,16 @@ swr_get_query_result(struct pipe_context *pipe,
                      boolean wait,
                      union pipe_query_result *result)
 {
-   struct swr_context *ctx = swr_context(pipe);
    struct swr_query *pq = swr_query(q);
+   struct swr_query_result *start = &pq->start;
+   struct swr_query_result *end = &pq->end;
+   unsigned index = pq->index;
 
    if (pq->fence) {
-      if (!swr_is_fence_pending(pq->fence)) {
-         swr_fence_submit(ctx, pq->fence);
-         if (!wait)
-            return FALSE;
-         swr_fence_finish(pipe->screen, pq->fence, 0);
-      }
+      if (!wait && !swr_is_fence_done(pq->fence))
+         return FALSE;
+
+      swr_fence_finish(pipe->screen, pq->fence, 0);
       swr_fence_reference(pipe->screen, &pq->fence, NULL);
    }
 
@@ -194,62 +137,67 @@ swr_get_query_result(struct pipe_context *pipe,
    switch (pq->type) {
    /* Booleans */
    case PIPE_QUERY_OCCLUSION_PREDICATE:
-      result->b = pq->end.u64 != pq->start.u64 ? TRUE : FALSE;
+      result->b = end->core.DepthPassCount != start->core.DepthPassCount;
       break;
    case PIPE_QUERY_GPU_FINISHED:
-      result->b = pq->end.b;
+      result->b = TRUE;
       break;
    /* Counters */
    case PIPE_QUERY_OCCLUSION_COUNTER:
+      result->u64 = end->core.DepthPassCount - start->core.DepthPassCount;
+      break;
    case PIPE_QUERY_TIMESTAMP:
    case PIPE_QUERY_TIME_ELAPSED:
+      result->u64 = end->timestamp - start->timestamp;
+      break;
    case PIPE_QUERY_PRIMITIVES_GENERATED:
+      result->u64 = end->core.IaPrimitives - start->core.IaPrimitives;
    case PIPE_QUERY_PRIMITIVES_EMITTED:
-      result->u64 = pq->end.u64 - pq->start.u64;
+      result->u64 = end->core.SoNumPrimsWritten[index]
+         - start->core.SoNumPrimsWritten[index];
       break;
    /* Structures */
    case PIPE_QUERY_SO_STATISTICS: {
       struct pipe_query_data_so_statistics *so_stats = &result->so_statistics;
-      struct pipe_query_data_so_statistics *start = &pq->start.so_statistics;
-      struct pipe_query_data_so_statistics *end = &pq->end.so_statistics;
+      struct SWR_STATS *start = &pq->start.core;
+      struct SWR_STATS *end = &pq->end.core;
       so_stats->num_primitives_written =
-         end->num_primitives_written - start->num_primitives_written;
+         end->SoNumPrimsWritten[index] - start->SoNumPrimsWritten[index];
       so_stats->primitives_storage_needed =
-         end->primitives_storage_needed - start->primitives_storage_needed;
+         end->SoPrimStorageNeeded[index] - start->SoPrimStorageNeeded[index];
    } break;
-   case PIPE_QUERY_TIMESTAMP_DISJOINT: {
+   case PIPE_QUERY_TIMESTAMP_DISJOINT:
       /* os_get_time_nano returns nanoseconds */
       result->timestamp_disjoint.frequency = UINT64_C(1000000000);
       result->timestamp_disjoint.disjoint = FALSE;
-   } break;
+      break;
    case PIPE_QUERY_PIPELINE_STATISTICS: {
       struct pipe_query_data_pipeline_statistics *p_stats =
          &result->pipeline_statistics;
-      struct pipe_query_data_pipeline_statistics *start =
-         &pq->start.pipeline_statistics;
-      struct pipe_query_data_pipeline_statistics *end =
-         &pq->end.pipeline_statistics;
-      p_stats->ia_vertices = end->ia_vertices - start->ia_vertices;
-      p_stats->ia_primitives = end->ia_primitives - start->ia_primitives;
-      p_stats->vs_invocations = end->vs_invocations - start->vs_invocations;
-      p_stats->gs_invocations = end->gs_invocations - start->gs_invocations;
-      p_stats->gs_primitives = end->gs_primitives - start->gs_primitives;
-      p_stats->c_invocations = end->c_invocations - start->c_invocations;
-      p_stats->c_primitives = end->c_primitives - start->c_primitives;
-      p_stats->ps_invocations = end->ps_invocations - start->ps_invocations;
-      p_stats->hs_invocations = end->hs_invocations - start->hs_invocations;
-      p_stats->ds_invocations = end->ds_invocations - start->ds_invocations;
-      p_stats->cs_invocations = end->cs_invocations - start->cs_invocations;
-   } break;
+      struct SWR_STATS *start = &pq->start.core;
+      struct SWR_STATS *end = &pq->end.core;
+      p_stats->ia_vertices = end->IaVertices - start->IaVertices;
+      p_stats->ia_primitives = end->IaPrimitives - start->IaPrimitives;
+      p_stats->vs_invocations = end->VsInvocations - start->VsInvocations;
+      p_stats->gs_invocations = end->GsInvocations - start->GsInvocations;
+      p_stats->gs_primitives = end->GsPrimitives - start->GsPrimitives;
+      p_stats->c_invocations = end->CPrimitives - start->CPrimitives;
+      p_stats->c_primitives = end->CPrimitives - start->CPrimitives;
+      p_stats->ps_invocations = end->PsInvocations - start->PsInvocations;
+      p_stats->hs_invocations = end->HsInvocations - start->HsInvocations;
+      p_stats->ds_invocations = end->DsInvocations - start->DsInvocations;
+      p_stats->cs_invocations = end->CsInvocations - start->CsInvocations;
+    } break;
    case PIPE_QUERY_SO_OVERFLOW_PREDICATE: {
-      struct pipe_query_data_so_statistics *start = &pq->start.so_statistics;
-      struct pipe_query_data_so_statistics *end = &pq->end.so_statistics;
+      struct SWR_STATS *start = &pq->start.core;
+      struct SWR_STATS *end = &pq->end.core;
       uint64_t num_primitives_written =
-         end->num_primitives_written - start->num_primitives_written;
+         end->SoNumPrimsWritten[index] - start->SoNumPrimsWritten[index];
       uint64_t primitives_storage_needed =
-         end->primitives_storage_needed - start->primitives_storage_needed;
+         end->SoPrimStorageNeeded[index] - start->SoPrimStorageNeeded[index];
       result->b = num_primitives_written > primitives_storage_needed;
-   } break;
+   }
+      break;
    default:
       assert(0 && "Unsupported query");
       break;
@@ -264,6 +212,8 @@ swr_begin_query(struct pipe_context *pipe, struct pipe_query *q)
    struct swr_context *ctx = swr_context(pipe);
    struct swr_query *pq = swr_query(q);
 
+   assert(!pq->enable_stats && "swr_begin_query: Query is already active!");
+
    /* Initialize Results */
    memset(&pq->start, 0, sizeof(pq->start));
    memset(&pq->end, 0, sizeof(pq->end));
@@ -276,7 +226,7 @@ swr_begin_query(struct pipe_context *pipe, struct pipe_query *q)
 
    /* override start timestamp to 0 for TIMESTAMP query */
    if (pq->type == PIPE_QUERY_TIMESTAMP)
-      pq->start.u64 = 0;
+      pq->start.timestamp = 0;
 
    return true;
 }