zink: refcount zink_gfx_program objects

[mesa.git] / src / gallium / drivers / svga / svga_pipe_query.c
diff --git a/src/gallium/drivers/svga/svga_pipe_query.c b/src/gallium/drivers/svga/svga_pipe_query.c

index 11e69edce82958ae111d31b2a27594e4529bff5f..77be3692ba06c6176cc36905205bd554ba488256 100644 (file)
--- a/src/gallium/drivers/svga/svga_pipe_query.c
+++ b/src/gallium/drivers/svga/svga_pipe_query.c
@@ -50,6 +50,7 @@ struct svga_query {
     SVGA3dQueryType svga_type;      /**< SVGA3D_QUERYTYPE_x or unused */
  
     unsigned id;                    /** Per-context query identifier */
+   boolean active;                 /** TRUE if query is active */
  
     struct pipe_fence_handle *fence;
  
@@ -72,16 +73,19 @@ struct svga_query {
  
  /** cast wrapper */
  static inline struct svga_query *
-svga_query( struct pipe_query *q )
+svga_query(struct pipe_query *q)
  {
     return (struct svga_query *)q;
  }
  
+/**
+ * VGPU9
+ */
  
-static boolean
+static bool
  svga_get_query_result(struct pipe_context *pipe,
                        struct pipe_query *q,
-                      boolean wait,
+                      bool wait,
                        union pipe_query_result *result);
  
  static enum pipe_error
@@ -115,11 +119,10 @@ define_query_vgpu9(struct svga_context *svga,
     return PIPE_OK;
  }
  
-static enum pipe_error
+static void
  begin_query_vgpu9(struct svga_context *svga, struct svga_query *sq)
  {
     struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
-   enum pipe_error ret = PIPE_OK;
  
     if (sq->queryResult->state == SVGA3D_QUERYSTATE_PENDING) {
        /* The application doesn't care for the pending query result.
@@ -137,36 +140,23 @@ begin_query_vgpu9(struct svga_context *svga, struct svga_query *sq)
     sq->queryResult->state = SVGA3D_QUERYSTATE_NEW;
     sws->fence_reference(sws, &sq->fence, NULL);
  
-   ret = SVGA3D_BeginQuery(svga->swc, sq->svga_type);
-   if (ret != PIPE_OK) {
-      svga_context_flush(svga, NULL);
-      ret = SVGA3D_BeginQuery(svga->swc, sq->svga_type);
-   }
-   return ret;
+   SVGA_RETRY(svga, SVGA3D_BeginQuery(svga->swc, sq->svga_type));
  }
  
-static enum pipe_error
+static void
  end_query_vgpu9(struct svga_context *svga, struct svga_query *sq)
  {
-   enum pipe_error ret = PIPE_OK;
-
     /* Set to PENDING before sending EndQuery. */
     sq->queryResult->state = SVGA3D_QUERYSTATE_PENDING;
  
-   ret = SVGA3D_EndQuery(svga->swc, sq->svga_type, sq->hwbuf);
-   if (ret != PIPE_OK) {
-      svga_context_flush(svga, NULL);
-      ret = SVGA3D_EndQuery(svga->swc, sq->svga_type, sq->hwbuf);
-   }
-   return ret;
+   SVGA_RETRY(svga, SVGA3D_EndQuery(svga->swc, sq->svga_type, sq->hwbuf));
  }
  
-static boolean
+static bool
  get_query_result_vgpu9(struct svga_context *svga, struct svga_query *sq,
-                       boolean wait, uint64_t *result)
+                       bool wait, uint64_t *result)
  {
     struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
-   enum pipe_error ret;
     SVGA3dQueryState state;
  
     if (!sq->fence) {
@@ -174,12 +164,8 @@ get_query_result_vgpu9(struct svga_context *svga, struct svga_query *sq,
         * SVGA_3D_CMD_WAIT_FOR_QUERY is emitted. Unfortunately this will cause
         * a synchronous wait on the host.
         */
-      ret = SVGA3D_WaitForQuery(svga->swc, sq->svga_type, sq->hwbuf);
-      if (ret != PIPE_OK) {
-         svga_context_flush(svga, NULL);
-         ret = SVGA3D_WaitForQuery(svga->swc, sq->svga_type, sq->hwbuf);
-      }
-      assert (ret == PIPE_OK);
+      SVGA_RETRY(svga, SVGA3D_WaitForQuery(svga->swc, sq->svga_type,
+                                           sq->hwbuf));
        svga_context_flush(svga, &sq->fence);
        assert(sq->fence);
     }
@@ -187,8 +173,9 @@ get_query_result_vgpu9(struct svga_context *svga, struct svga_query *sq,
     state = sq->queryResult->state;
     if (state == SVGA3D_QUERYSTATE_PENDING) {
        if (!wait)
-         return FALSE;
-      sws->fence_finish(sws, sq->fence, SVGA_FENCE_FLAG_QUERY);
+         return false;
+      sws->fence_finish(sws, sq->fence, PIPE_TIMEOUT_INFINITE,
+                        SVGA_FENCE_FLAG_QUERY);
        state = sq->queryResult->state;
     }
  
@@ -196,7 +183,7 @@ get_query_result_vgpu9(struct svga_context *svga, struct svga_query *sq,
            state == SVGA3D_QUERYSTATE_FAILED);
  
     *result = (uint64_t)sq->queryResult->result32;
-   return TRUE;
+   return true;
  }
  
  
@@ -210,10 +197,10 @@ get_query_result_vgpu9(struct svga_context *svga, struct svga_query *sq,
   * will hold queries of the same type. Multiple memory blocks can be allocated
   * for a particular query type.
   *
- * Currently each memory block is of 184 bytes. We support up to 128
+ * Currently each memory block is of 184 bytes. We support up to 512
   * memory blocks. The query memory size is arbitrary right now.
   * Each occlusion query takes about 8 bytes. One memory block can accomodate
- * 23 occlusion queries. 128 of those blocks can support up to 2944 occlusion
+ * 23 occlusion queries. 512 of those blocks can support up to 11K occlusion
   * queries. That seems reasonable for now. If we think this limit is
   * not enough, we can increase the limit or try to grow the mob in runtime.
   * Note, SVGA device does not impose one mob per context for queries,
@@ -224,7 +211,7 @@ get_query_result_vgpu9(struct svga_context *svga, struct svga_query *sq,
   * following commands: DXMoveQuery, DXBindAllQuery & DXReadbackAllQuery.
   */
  #define SVGA_QUERY_MEM_BLOCK_SIZE    (sizeof(SVGADXQueryResultUnion) * 2)
-#define SVGA_QUERY_MEM_SIZE          (128 * SVGA_QUERY_MEM_BLOCK_SIZE)
+#define SVGA_QUERY_MEM_SIZE          (512 * SVGA_QUERY_MEM_BLOCK_SIZE)
  
  struct svga_qmem_alloc_entry
  {
@@ -239,31 +226,34 @@ struct svga_qmem_alloc_entry
  
  /**
   * Allocate a memory block from the query object memory
- * \return -1 if out of memory, else index of the query memory block
+ * \return NULL if out of memory, else pointer to the query memory block
   */
-static int
+static struct svga_qmem_alloc_entry *
  allocate_query_block(struct svga_context *svga)
  {
     int index;
     unsigned offset;
+   struct svga_qmem_alloc_entry *alloc_entry = NULL;
  
     /* Find the next available query block */
     index = util_bitmask_add(svga->gb_query_alloc_mask);
  
     if (index == UTIL_BITMASK_INVALID_INDEX)
-      return -1;
+      return NULL;
  
     offset = index * SVGA_QUERY_MEM_BLOCK_SIZE;
     if (offset >= svga->gb_query_len) {
        unsigned i;
  
+      /* Deallocate the out-of-range index */
+      util_bitmask_clear(svga->gb_query_alloc_mask, index);
+      index = -1;
+
        /**
         * All the memory blocks are allocated, lets see if there is
         * any empty memory block around that can be freed up.
         */
-      index = -1;
-      for (i = 0; i < SVGA_QUERY_MAX && index == -1; i++) {
-         struct svga_qmem_alloc_entry *alloc_entry;
+      for (i = 0; i < SVGA3D_QUERYTYPE_MAX && index == -1; i++) {
           struct svga_qmem_alloc_entry *prev_alloc_entry = NULL;
  
           alloc_entry = svga->gb_query_map[i];
@@ -282,9 +272,20 @@ allocate_query_block(struct svga_context *svga)
              }
           }
        }
+
+      if (index == -1) {
+         debug_printf("Query memory object is full\n");
+         return NULL;
+      }
     }
  
-   return index;
+   if (!alloc_entry) {
+      assert(index != -1);
+      alloc_entry = CALLOC_STRUCT(svga_qmem_alloc_entry);
+      alloc_entry->block_index = index;
+   }
+
+   return alloc_entry;
  }
  
  /**
@@ -342,17 +343,14 @@ allocate_query_block_entry(struct svga_context *svga,
                             unsigned len)
  {
     struct svga_qmem_alloc_entry *alloc_entry;
-   int block_index = -1;
  
-   block_index = allocate_query_block(svga);
-   if (block_index == -1)
-      return NULL;
-   alloc_entry = CALLOC_STRUCT(svga_qmem_alloc_entry);
+   alloc_entry = allocate_query_block(svga);
     if (!alloc_entry)
        return NULL;
  
-   alloc_entry->block_index = block_index;
-   alloc_entry->start_offset = block_index * SVGA_QUERY_MEM_BLOCK_SIZE;
+   assert(alloc_entry->block_index != -1);
+   alloc_entry->start_offset =
+      alloc_entry->block_index * SVGA_QUERY_MEM_BLOCK_SIZE;
     alloc_entry->nquery = 0;
     alloc_entry->alloc_mask = util_bitmask_create();
     alloc_entry->next = NULL;
@@ -377,7 +375,7 @@ allocate_query(struct svga_context *svga,
     int slot_index = -1;
     unsigned offset;
  
-   assert(type < SVGA_QUERY_MAX);
+   assert(type < SVGA3D_QUERYTYPE_MAX);
  
     alloc_entry = svga->gb_query_map[type];
  
@@ -450,7 +448,7 @@ destroy_gb_query_obj(struct svga_context *svga)
     struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
     unsigned i;
  
-   for (i = 0; i < SVGA_QUERY_MAX; i++) {
+   for (i = 0; i < SVGA3D_QUERYTYPE_MAX; i++) {
        struct svga_qmem_alloc_entry *alloc_entry, *next;
        alloc_entry = svga->gb_query_map[i];
        while (alloc_entry) {
@@ -494,69 +492,48 @@ define_query_vgpu10(struct svga_context *svga,
        svga->gb_query_alloc_mask = util_bitmask_create();
  
        /* Bind the query object to the context */
-      if (svga->swc->query_bind(svga->swc, svga->gb_query,
-                                SVGA_QUERY_FLAG_SET) != PIPE_OK) {
-         svga_context_flush(svga, NULL);
-         svga->swc->query_bind(svga->swc, svga->gb_query,
-                               SVGA_QUERY_FLAG_SET);
-      }
+      SVGA_RETRY(svga, svga->swc->query_bind(svga->swc, svga->gb_query,
+                                             SVGA_QUERY_FLAG_SET));
     }
  
     sq->gb_query = svga->gb_query;
  
-   /* Allocate an integer ID for this query */
-   sq->id = util_bitmask_add(svga->query_id_bm);
-   if (sq->id == UTIL_BITMASK_INVALID_INDEX)
-      return PIPE_ERROR_OUT_OF_MEMORY;
+   /* Make sure query length is in multiples of 8 bytes */
+   qlen = align(resultLen + sizeof(SVGA3dQueryState), 8);
  
     /* Find a slot for this query in the gb object */
-   qlen = resultLen + sizeof(SVGA3dQueryState);
     sq->offset = allocate_query(svga, sq->svga_type, qlen);
     if (sq->offset == -1)
        return PIPE_ERROR_OUT_OF_MEMORY;
  
+   assert((sq->offset & 7) == 0);
+
     SVGA_DBG(DEBUG_QUERY, "   query type=%d qid=0x%x offset=%d\n",
              sq->svga_type, sq->id, sq->offset);
  
     /**
      * Send SVGA3D commands to define the query
      */
-   ret = SVGA3D_vgpu10_DefineQuery(svga->swc, sq->id, sq->svga_type, sq->flags);
-   if (ret != PIPE_OK) {
-      svga_context_flush(svga, NULL);
-      ret = SVGA3D_vgpu10_DefineQuery(svga->swc, sq->id, sq->svga_type, sq->flags);
-   }
+   SVGA_RETRY_OOM(svga, ret, SVGA3D_vgpu10_DefineQuery(svga->swc, sq->id,
+                                                       sq->svga_type,
+                                                       sq->flags));
     if (ret != PIPE_OK)
        return PIPE_ERROR_OUT_OF_MEMORY;
  
-   ret = SVGA3D_vgpu10_BindQuery(svga->swc, sq->gb_query, sq->id);
-   if (ret != PIPE_OK) {
-      svga_context_flush(svga, NULL);
-      ret = SVGA3D_vgpu10_BindQuery(svga->swc, sq->gb_query, sq->id);
-   }
-   assert(ret == PIPE_OK);
-
-   ret = SVGA3D_vgpu10_SetQueryOffset(svga->swc, sq->id, sq->offset);
-   if (ret != PIPE_OK) {
-      svga_context_flush(svga, NULL);
-      ret = SVGA3D_vgpu10_SetQueryOffset(svga->swc, sq->id, sq->offset);
-   }
-   assert(ret == PIPE_OK);
+   SVGA_RETRY(svga, SVGA3D_vgpu10_BindQuery(svga->swc, sq->gb_query, sq->id));
+   SVGA_RETRY(svga, SVGA3D_vgpu10_SetQueryOffset(svga->swc, sq->id,
+                                                 sq->offset));
  
     return PIPE_OK;
  }
  
-static enum pipe_error
+static void
  destroy_query_vgpu10(struct svga_context *svga, struct svga_query *sq)
  {
-   enum pipe_error ret;
-
-   ret = SVGA3D_vgpu10_DestroyQuery(svga->swc, sq->id);
+   SVGA_RETRY(svga, SVGA3D_vgpu10_DestroyQuery(svga->swc, sq->id));
  
     /* Deallocate the memory slot allocated for this query */
     deallocate_query(svga, sq);
-
-   return ret;
  }
  
  
@@ -566,13 +543,8 @@ destroy_query_vgpu10(struct svga_context *svga, struct svga_query *sq)
  static void
  rebind_vgpu10_query(struct svga_context *svga)
  {
-   if (svga->swc->query_bind(svga->swc, svga->gb_query,
-                             SVGA_QUERY_FLAG_REF) != PIPE_OK) {
-      svga_context_flush(svga, NULL);
-      svga->swc->query_bind(svga->swc, svga->gb_query,
-                            SVGA_QUERY_FLAG_REF);
-   }
-
+   SVGA_RETRY(svga, svga->swc->query_bind(svga->swc, svga->gb_query,
+                                          SVGA_QUERY_FLAG_REF));
     svga->rebind.flags.query = FALSE;
  }
  
@@ -581,7 +553,6 @@ static enum pipe_error
  begin_query_vgpu10(struct svga_context *svga, struct svga_query *sq)
  {
     struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
-   enum pipe_error ret = PIPE_OK;
     int status = 0;
  
     sws->fence_reference(sws, &sq->fence, NULL);
@@ -596,45 +567,23 @@ begin_query_vgpu10(struct svga_context *svga, struct svga_query *sq)
     }
  
     /* Send the BeginQuery command to the device */
-   ret = SVGA3D_vgpu10_BeginQuery(svga->swc, sq->id);
-   if (ret != PIPE_OK) {
-      svga_context_flush(svga, NULL);
-      ret = SVGA3D_vgpu10_BeginQuery(svga->swc, sq->id);
-   }
-   return ret;
+   SVGA_RETRY(svga, SVGA3D_vgpu10_BeginQuery(svga->swc, sq->id));
+   return PIPE_OK;
  }
  
-static enum pipe_error
+static void
  end_query_vgpu10(struct svga_context *svga, struct svga_query *sq)
  {
-   struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
-   enum pipe_error ret = PIPE_OK;
-
     if (svga->rebind.flags.query) {
        rebind_vgpu10_query(svga);
     }
  
-   ret = SVGA3D_vgpu10_EndQuery(svga->swc, sq->id);
-   if (ret != PIPE_OK) {
-      svga_context_flush(svga, NULL);
-      ret = SVGA3D_vgpu10_EndQuery(svga->swc, sq->id);
-   }
-
-   /* Finish fence is copied here from get_query_result_vgpu10. This helps
-    * with cases where svga_begin_query might be called again before
-    * svga_get_query_result, such as GL_TIME_ELAPSED.
-    */
-   if (!sq->fence) {
-      svga_context_flush(svga, &sq->fence);
-   }
-   sws->fence_finish(sws, sq->fence, SVGA_FENCE_FLAG_QUERY);
-
-   return ret;
+   SVGA_RETRY(svga, SVGA3D_vgpu10_EndQuery(svga->swc, sq->id));
  }
  
-static boolean
+static bool
  get_query_result_vgpu10(struct svga_context *svga, struct svga_query *sq,
-                        boolean wait, void *result, int resultLen)
+                        bool wait, void *result, int resultLen)
  {
     struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
     SVGA3dQueryState queryState;
@@ -645,17 +594,28 @@ get_query_result_vgpu10(struct svga_context *svga, struct svga_query *sq,
  
     sws->query_get_result(sws, sq->gb_query, sq->offset, &queryState, result, resultLen);
  
-   if (queryState == SVGA3D_QUERYSTATE_PENDING) {
+   if (queryState != SVGA3D_QUERYSTATE_SUCCEEDED && !sq->fence) {
+      /* We don't have the query result yet, and the query hasn't been
+       * submitted.  We need to submit it now since the GL spec says
+       * "Querying the state for a given occlusion query forces that
+       * occlusion query to complete within a finite amount of time."
+       */
+      svga_context_flush(svga, &sq->fence);
+   }
+
+   if (queryState == SVGA3D_QUERYSTATE_PENDING ||
+       queryState == SVGA3D_QUERYSTATE_NEW) {
        if (!wait)
-         return FALSE;
-      sws->fence_finish(sws, sq->fence, SVGA_FENCE_FLAG_QUERY);
+         return false;
+      sws->fence_finish(sws, sq->fence, PIPE_TIMEOUT_INFINITE,
+                        SVGA_FENCE_FLAG_QUERY);
        sws->query_get_result(sws, sq->gb_query, sq->offset, &queryState, result, resultLen);
     }
  
     assert(queryState == SVGA3D_QUERYSTATE_SUCCEEDED ||
            queryState == SVGA3D_QUERYSTATE_FAILED);
  
-   return TRUE;
+   return true;
  }
  
  static struct pipe_query *
@@ -665,6 +625,7 @@ svga_create_query(struct pipe_context *pipe,
  {
     struct svga_context *svga = svga_context(pipe);
     struct svga_query *sq;
+   enum pipe_error ret;
  
     assert(query_type < SVGA_QUERY_MAX);
  
@@ -684,7 +645,10 @@ svga_create_query(struct pipe_context *pipe,
     case PIPE_QUERY_OCCLUSION_COUNTER:
        sq->svga_type = SVGA3D_QUERYTYPE_OCCLUSION;
        if (svga_have_vgpu10(svga)) {
-         define_query_vgpu10(svga, sq, sizeof(SVGADXOcclusionQueryResult));
+         ret = define_query_vgpu10(svga, sq,
+                                   sizeof(SVGADXOcclusionQueryResult));
+         if (ret != PIPE_OK)
+            goto fail;
  
           /**
            * In OpenGL, occlusion counter query can be used in conditional
@@ -698,37 +662,65 @@ svga_create_query(struct pipe_context *pipe,
           sq->predicate = svga_create_query(pipe, PIPE_QUERY_OCCLUSION_PREDICATE, index);
  
        } else {
-         define_query_vgpu9(svga, sq);
+         ret = define_query_vgpu9(svga, sq);
+         if (ret != PIPE_OK)
+            goto fail;
        }
        break;
     case PIPE_QUERY_OCCLUSION_PREDICATE:
-      assert(svga_have_vgpu10(svga));
-      sq->svga_type = SVGA3D_QUERYTYPE_OCCLUSIONPREDICATE;
-      define_query_vgpu10(svga, sq, sizeof(SVGADXOcclusionPredicateQueryResult));
+   case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
+      if (svga_have_vgpu10(svga)) {
+         sq->svga_type = SVGA3D_QUERYTYPE_OCCLUSIONPREDICATE;
+         ret = define_query_vgpu10(svga, sq,
+                                   sizeof(SVGADXOcclusionPredicateQueryResult));
+         if (ret != PIPE_OK)
+            goto fail;
+      } else {
+         sq->svga_type = SVGA3D_QUERYTYPE_OCCLUSION;
+         ret = define_query_vgpu9(svga, sq);
+         if (ret != PIPE_OK)
+            goto fail;
+      }
        break;
     case PIPE_QUERY_PRIMITIVES_GENERATED:
     case PIPE_QUERY_PRIMITIVES_EMITTED:
     case PIPE_QUERY_SO_STATISTICS:
        assert(svga_have_vgpu10(svga));
-      sq->svga_type = SVGA3D_QUERYTYPE_STREAMOUTPUTSTATS;
-      define_query_vgpu10(svga, sq,
-                          sizeof(SVGADXStreamOutStatisticsQueryResult));
+
+      /* Until the device supports the new query type for multiple streams,
+       * we will use the single stream query type for stream 0.
+       */
+      if (svga_have_sm5(svga) && index > 0) {
+         assert(index < 4);
+
+         sq->svga_type = SVGA3D_QUERYTYPE_SOSTATS_STREAM0 + index;
+      }
+      else {
+         assert(index == 0);
+         sq->svga_type = SVGA3D_QUERYTYPE_STREAMOUTPUTSTATS;
+      }
+      ret = define_query_vgpu10(svga, sq,
+                                sizeof(SVGADXStreamOutStatisticsQueryResult));
+      if (ret != PIPE_OK)
+         goto fail;
        break;
     case PIPE_QUERY_TIMESTAMP:
        assert(svga_have_vgpu10(svga));
        sq->svga_type = SVGA3D_QUERYTYPE_TIMESTAMP;
-      define_query_vgpu10(svga, sq,
-                          sizeof(SVGADXTimestampQueryResult));
+      ret = define_query_vgpu10(svga, sq,
+                                sizeof(SVGADXTimestampQueryResult));
+      if (ret != PIPE_OK)
+         goto fail;
        break;
     case SVGA_QUERY_NUM_DRAW_CALLS:
     case SVGA_QUERY_NUM_FALLBACKS:
     case SVGA_QUERY_NUM_FLUSHES:
     case SVGA_QUERY_NUM_VALIDATIONS:
-   case SVGA_QUERY_MAP_BUFFER_TIME:
-   case SVGA_QUERY_NUM_RESOURCES_MAPPED:
+   case SVGA_QUERY_NUM_BUFFERS_MAPPED:
+   case SVGA_QUERY_NUM_TEXTURES_MAPPED:
     case SVGA_QUERY_NUM_BYTES_UPLOADED:
+   case SVGA_QUERY_NUM_COMMAND_BUFFERS:
     case SVGA_QUERY_COMMAND_BUFFER_SIZE:
-   case SVGA_QUERY_FLUSH_TIME:
     case SVGA_QUERY_SURFACE_WRITE_FLUSHES:
     case SVGA_QUERY_MEMORY_USED:
     case SVGA_QUERY_NUM_SHADERS:
@@ -737,7 +729,22 @@ svga_create_query(struct pipe_context *pipe,
     case SVGA_QUERY_NUM_SURFACE_VIEWS:
     case SVGA_QUERY_NUM_GENERATE_MIPMAP:
     case SVGA_QUERY_NUM_READBACKS:
+   case SVGA_QUERY_NUM_RESOURCE_UPDATES:
+   case SVGA_QUERY_NUM_BUFFER_UPLOADS:
+   case SVGA_QUERY_NUM_CONST_BUF_UPDATES:
+   case SVGA_QUERY_NUM_CONST_UPDATES:
+   case SVGA_QUERY_NUM_FAILED_ALLOCATIONS:
+   case SVGA_QUERY_NUM_COMMANDS_PER_DRAW:
+   case SVGA_QUERY_NUM_SHADER_RELOCATIONS:
+   case SVGA_QUERY_NUM_SURFACE_RELOCATIONS:
+   case SVGA_QUERY_SHADER_MEM_USED:
        break;
+   case SVGA_QUERY_FLUSH_TIME:
+   case SVGA_QUERY_MAP_BUFFER_TIME:
+      /* These queries need os_time_get() */
+      svga->hud.uses_time = TRUE;
+      break;
+
     default:
        assert(!"unexpected query type in svga_create_query()");
     }
@@ -770,6 +777,8 @@ svga_destroy_query(struct pipe_context *pipe, struct pipe_query *q)
  
     switch (sq->type) {
     case PIPE_QUERY_OCCLUSION_COUNTER:
+   case PIPE_QUERY_OCCLUSION_PREDICATE:
+   case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
        if (svga_have_vgpu10(svga)) {
           /* make sure to also destroy any associated predicate query */
           if (sq->predicate)
@@ -780,11 +789,6 @@ svga_destroy_query(struct pipe_context *pipe, struct pipe_query *q)
        }
        sws->fence_reference(sws, &sq->fence, NULL);
        break;
-   case PIPE_QUERY_OCCLUSION_PREDICATE:
-      assert(svga_have_vgpu10(svga));
-      destroy_query_vgpu10(svga, sq);
-      sws->fence_reference(sws, &sq->fence, NULL);
-      break;
     case PIPE_QUERY_PRIMITIVES_GENERATED:
     case PIPE_QUERY_PRIMITIVES_EMITTED:
     case PIPE_QUERY_SO_STATISTICS:
@@ -798,8 +802,10 @@ svga_destroy_query(struct pipe_context *pipe, struct pipe_query *q)
     case SVGA_QUERY_NUM_FLUSHES:
     case SVGA_QUERY_NUM_VALIDATIONS:
     case SVGA_QUERY_MAP_BUFFER_TIME:
-   case SVGA_QUERY_NUM_RESOURCES_MAPPED:
+   case SVGA_QUERY_NUM_BUFFERS_MAPPED:
+   case SVGA_QUERY_NUM_TEXTURES_MAPPED:
     case SVGA_QUERY_NUM_BYTES_UPLOADED:
+   case SVGA_QUERY_NUM_COMMAND_BUFFERS:
     case SVGA_QUERY_COMMAND_BUFFER_SIZE:
     case SVGA_QUERY_FLUSH_TIME:
     case SVGA_QUERY_SURFACE_WRITE_FLUSHES:
@@ -810,6 +816,15 @@ svga_destroy_query(struct pipe_context *pipe, struct pipe_query *q)
     case SVGA_QUERY_NUM_SURFACE_VIEWS:
     case SVGA_QUERY_NUM_GENERATE_MIPMAP:
     case SVGA_QUERY_NUM_READBACKS:
+   case SVGA_QUERY_NUM_RESOURCE_UPDATES:
+   case SVGA_QUERY_NUM_BUFFER_UPLOADS:
+   case SVGA_QUERY_NUM_CONST_BUF_UPDATES:
+   case SVGA_QUERY_NUM_CONST_UPDATES:
+   case SVGA_QUERY_NUM_FAILED_ALLOCATIONS:
+   case SVGA_QUERY_NUM_COMMANDS_PER_DRAW:
+   case SVGA_QUERY_NUM_SHADER_RELOCATIONS:
+   case SVGA_QUERY_NUM_SURFACE_RELOCATIONS:
+   case SVGA_QUERY_SHADER_MEM_USED:
        /* nothing */
        break;
     default:
@@ -823,19 +838,16 @@ svga_destroy_query(struct pipe_context *pipe, struct pipe_query *q)
  }
  
  
-static boolean
+static bool
  svga_begin_query(struct pipe_context *pipe, struct pipe_query *q)
  {
     struct svga_context *svga = svga_context(pipe);
     struct svga_query *sq = svga_query(q);
-   enum pipe_error ret;
+   enum pipe_error ret = PIPE_OK;
  
     assert(sq);
     assert(sq->type < SVGA_QUERY_MAX);
  
-   SVGA_DBG(DEBUG_QUERY, "%s sq=0x%x id=%d\n", __FUNCTION__,
-            sq, sq->id);
-
     /* Need to flush out buffered drawing commands so that they don't
      * get counted in the query results.
      */
@@ -843,6 +855,8 @@ svga_begin_query(struct pipe_context *pipe, struct pipe_query *q)
  
     switch (sq->type) {
     case PIPE_QUERY_OCCLUSION_COUNTER:
+   case PIPE_QUERY_OCCLUSION_PREDICATE:
+   case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
        if (svga_have_vgpu10(svga)) {
           ret = begin_query_vgpu10(svga, sq);
           /* also need to start the associated occlusion predicate query */
@@ -853,16 +867,11 @@ svga_begin_query(struct pipe_context *pipe, struct pipe_query *q)
              (void) status;
           }
        } else {
-         ret = begin_query_vgpu9(svga, sq);
+         begin_query_vgpu9(svga, sq);
        }
        assert(ret == PIPE_OK);
        (void) ret;
        break;
-   case PIPE_QUERY_OCCLUSION_PREDICATE:
-      assert(svga_have_vgpu10(svga));
-      ret = begin_query_vgpu10(svga, sq);
-      assert(ret == PIPE_OK);
-      break;
     case PIPE_QUERY_PRIMITIVES_GENERATED:
     case PIPE_QUERY_PRIMITIVES_EMITTED:
     case PIPE_QUERY_SO_STATISTICS:
@@ -886,12 +895,18 @@ svga_begin_query(struct pipe_context *pipe, struct pipe_query *q)
     case SVGA_QUERY_MAP_BUFFER_TIME:
        sq->begin_count = svga->hud.map_buffer_time;
        break;
-   case SVGA_QUERY_NUM_RESOURCES_MAPPED:
-      sq->begin_count = svga->hud.num_resources_mapped;
+   case SVGA_QUERY_NUM_BUFFERS_MAPPED:
+      sq->begin_count = svga->hud.num_buffers_mapped;
+      break;
+   case SVGA_QUERY_NUM_TEXTURES_MAPPED:
+      sq->begin_count = svga->hud.num_textures_mapped;
        break;
     case SVGA_QUERY_NUM_BYTES_UPLOADED:
        sq->begin_count = svga->hud.num_bytes_uploaded;
        break;
+   case SVGA_QUERY_NUM_COMMAND_BUFFERS:
+      sq->begin_count = svga->swc->num_command_buffers;
+      break;
     case SVGA_QUERY_COMMAND_BUFFER_SIZE:
        sq->begin_count = svga->hud.command_buffer_size;
        break;
@@ -904,78 +919,90 @@ svga_begin_query(struct pipe_context *pipe, struct pipe_query *q)
     case SVGA_QUERY_NUM_READBACKS:
        sq->begin_count = svga->hud.num_readbacks;
        break;
+   case SVGA_QUERY_NUM_RESOURCE_UPDATES:
+      sq->begin_count = svga->hud.num_resource_updates;
+      break;
+   case SVGA_QUERY_NUM_BUFFER_UPLOADS:
+      sq->begin_count = svga->hud.num_buffer_uploads;
+      break;
+   case SVGA_QUERY_NUM_CONST_BUF_UPDATES:
+      sq->begin_count = svga->hud.num_const_buf_updates;
+      break;
+   case SVGA_QUERY_NUM_CONST_UPDATES:
+      sq->begin_count = svga->hud.num_const_updates;
+      break;
+   case SVGA_QUERY_NUM_SHADER_RELOCATIONS:
+      sq->begin_count = svga->swc->num_shader_reloc;
+      break;
+   case SVGA_QUERY_NUM_SURFACE_RELOCATIONS:
+      sq->begin_count = svga->swc->num_surf_reloc;
+      break;
     case SVGA_QUERY_MEMORY_USED:
     case SVGA_QUERY_NUM_SHADERS:
     case SVGA_QUERY_NUM_RESOURCES:
     case SVGA_QUERY_NUM_STATE_OBJECTS:
     case SVGA_QUERY_NUM_SURFACE_VIEWS:
     case SVGA_QUERY_NUM_GENERATE_MIPMAP:
+   case SVGA_QUERY_NUM_FAILED_ALLOCATIONS:
+   case SVGA_QUERY_NUM_COMMANDS_PER_DRAW:
+   case SVGA_QUERY_SHADER_MEM_USED:
        /* nothing */
        break;
     default:
        assert(!"unexpected query type in svga_begin_query()");
     }
  
-   svga->sq[sq->type] = sq;
+   SVGA_DBG(DEBUG_QUERY, "%s sq=0x%x id=%d type=%d svga_type=%d\n",
+            __FUNCTION__, sq, sq->id, sq->type, sq->svga_type);
+
+   sq->active = TRUE;
  
     return true;
  }
  
  
-static void
+static bool
  svga_end_query(struct pipe_context *pipe, struct pipe_query *q)
  {
     struct svga_context *svga = svga_context(pipe);
     struct svga_query *sq = svga_query(q);
-   enum pipe_error ret;
  
     assert(sq);
     assert(sq->type < SVGA_QUERY_MAX);
  
-   SVGA_DBG(DEBUG_QUERY, "%s sq=0x%x id=%d\n", __FUNCTION__,
-            sq, sq->id);
+   SVGA_DBG(DEBUG_QUERY, "%s sq=0x%x type=%d\n",
+            __FUNCTION__, sq, sq->type);
  
-   if (sq->type == PIPE_QUERY_TIMESTAMP && svga->sq[sq->type] != sq)
+   if (sq->type == PIPE_QUERY_TIMESTAMP && !sq->active)
        svga_begin_query(pipe, q);
  
+   SVGA_DBG(DEBUG_QUERY, "%s sq=0x%x id=%d type=%d svga_type=%d\n",
+            __FUNCTION__, sq, sq->id, sq->type, sq->svga_type);
+
     svga_hwtnl_flush_retry(svga);
  
-   assert(svga->sq[sq->type] == sq);
+   assert(sq->active);
  
     switch (sq->type) {
     case PIPE_QUERY_OCCLUSION_COUNTER:
+   case PIPE_QUERY_OCCLUSION_PREDICATE:
+   case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
        if (svga_have_vgpu10(svga)) {
-         ret = end_query_vgpu10(svga, sq);
+         end_query_vgpu10(svga, sq);
           /* also need to end the associated occlusion predicate query */
           if (sq->predicate) {
-            enum pipe_error status;
-            status = end_query_vgpu10(svga, svga_query(sq->predicate));
-            assert(status == PIPE_OK);
-            (void) status;
+            end_query_vgpu10(svga, svga_query(sq->predicate));
           }
        } else {
-         ret = end_query_vgpu9(svga, sq);
+         end_query_vgpu9(svga, sq);
        }
-      assert(ret == PIPE_OK);
-      (void) ret;
-      /* TODO: Delay flushing. We don't really need to flush here, just ensure
-       * that there is one flush before svga_get_query_result attempts to get
-       * the result.
-       */
-      svga_context_flush(svga, NULL);
-      break;
-   case PIPE_QUERY_OCCLUSION_PREDICATE:
-      assert(svga_have_vgpu10(svga));
-      ret = end_query_vgpu10(svga, sq);
-      assert(ret == PIPE_OK);
        break;
     case PIPE_QUERY_PRIMITIVES_GENERATED:
     case PIPE_QUERY_PRIMITIVES_EMITTED:
     case PIPE_QUERY_SO_STATISTICS:
     case PIPE_QUERY_TIMESTAMP:
        assert(svga_have_vgpu10(svga));
-      ret = end_query_vgpu10(svga, sq);
-      assert(ret == PIPE_OK);
+      end_query_vgpu10(svga, sq);
        break;
     case SVGA_QUERY_NUM_DRAW_CALLS:
        sq->end_count = svga->hud.num_draw_calls;
@@ -992,12 +1019,18 @@ svga_end_query(struct pipe_context *pipe, struct pipe_query *q)
     case SVGA_QUERY_MAP_BUFFER_TIME:
        sq->end_count = svga->hud.map_buffer_time;
        break;
-   case SVGA_QUERY_NUM_RESOURCES_MAPPED:
-      sq->end_count = svga->hud.num_resources_mapped;
+   case SVGA_QUERY_NUM_BUFFERS_MAPPED:
+      sq->end_count = svga->hud.num_buffers_mapped;
+      break;
+   case SVGA_QUERY_NUM_TEXTURES_MAPPED:
+      sq->end_count = svga->hud.num_textures_mapped;
        break;
     case SVGA_QUERY_NUM_BYTES_UPLOADED:
        sq->end_count = svga->hud.num_bytes_uploaded;
        break;
+   case SVGA_QUERY_NUM_COMMAND_BUFFERS:
+      sq->end_count = svga->swc->num_command_buffers;
+      break;
     case SVGA_QUERY_COMMAND_BUFFER_SIZE:
        sq->end_count = svga->hud.command_buffer_size;
        break;
@@ -1010,32 +1043,54 @@ svga_end_query(struct pipe_context *pipe, struct pipe_query *q)
     case SVGA_QUERY_NUM_READBACKS:
        sq->end_count = svga->hud.num_readbacks;
        break;
+   case SVGA_QUERY_NUM_RESOURCE_UPDATES:
+      sq->end_count = svga->hud.num_resource_updates;
+      break;
+   case SVGA_QUERY_NUM_BUFFER_UPLOADS:
+      sq->end_count = svga->hud.num_buffer_uploads;
+      break;
+   case SVGA_QUERY_NUM_CONST_BUF_UPDATES:
+      sq->end_count = svga->hud.num_const_buf_updates;
+      break;
+   case SVGA_QUERY_NUM_CONST_UPDATES:
+      sq->end_count = svga->hud.num_const_updates;
+      break;
+   case SVGA_QUERY_NUM_SHADER_RELOCATIONS:
+      sq->end_count = svga->swc->num_shader_reloc;
+      break;
+   case SVGA_QUERY_NUM_SURFACE_RELOCATIONS:
+      sq->end_count = svga->swc->num_surf_reloc;
+      break;
     case SVGA_QUERY_MEMORY_USED:
     case SVGA_QUERY_NUM_SHADERS:
     case SVGA_QUERY_NUM_RESOURCES:
     case SVGA_QUERY_NUM_STATE_OBJECTS:
     case SVGA_QUERY_NUM_SURFACE_VIEWS:
     case SVGA_QUERY_NUM_GENERATE_MIPMAP:
+   case SVGA_QUERY_NUM_FAILED_ALLOCATIONS:
+   case SVGA_QUERY_NUM_COMMANDS_PER_DRAW:
+   case SVGA_QUERY_SHADER_MEM_USED:
        /* nothing */
        break;
     default:
        assert(!"unexpected query type in svga_end_query()");
     }
-   svga->sq[sq->type] = NULL;
+   sq->active = FALSE;
+   return true;
  }
  
  
-static boolean
+static bool
  svga_get_query_result(struct pipe_context *pipe,
                        struct pipe_query *q,
-                      boolean wait,
+                      bool wait,
                        union pipe_query_result *vresult)
  {
     struct svga_screen *svgascreen = svga_screen(pipe->screen);
     struct svga_context *svga = svga_context(pipe);
     struct svga_query *sq = svga_query(q);
     uint64_t *result = (uint64_t *)vresult;
-   boolean ret = TRUE;
+   bool ret = true;
  
     assert(sq);
  
@@ -1050,15 +1105,21 @@ svga_get_query_result(struct pipe_context *pipe,
                                         (void *)&occResult, sizeof(occResult));
           *result = (uint64_t)occResult.samplesRendered;
        } else {
-         ret = get_query_result_vgpu9(svga, sq, wait, (uint64_t *)result);
+         ret = get_query_result_vgpu9(svga, sq, wait, result);
        }
        break;
-   case PIPE_QUERY_OCCLUSION_PREDICATE: {
-      SVGADXOcclusionPredicateQueryResult occResult;
-      assert(svga_have_vgpu10(svga));
-      ret = get_query_result_vgpu10(svga, sq, wait,
-                                    (void *)&occResult, sizeof(occResult));
-      vresult->b = occResult.anySamplesRendered != 0;
+   case PIPE_QUERY_OCCLUSION_PREDICATE:
+   case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: {
+      if (svga_have_vgpu10(svga)) {
+         SVGADXOcclusionPredicateQueryResult occResult;
+         ret = get_query_result_vgpu10(svga, sq, wait,
+                                       (void *)&occResult, sizeof(occResult));
+         vresult->b = occResult.anySamplesRendered != 0;
+      } else {
+         uint64_t count = 0;
+         ret = get_query_result_vgpu9(svga, sq, wait, &count);
+         vresult->b = count != 0;
+      }
        break;
     }
     case PIPE_QUERY_SO_STATISTICS: {
@@ -1106,12 +1167,20 @@ svga_get_query_result(struct pipe_context *pipe,
     case SVGA_QUERY_NUM_FLUSHES:
     case SVGA_QUERY_NUM_VALIDATIONS:
     case SVGA_QUERY_MAP_BUFFER_TIME:
-   case SVGA_QUERY_NUM_RESOURCES_MAPPED:
+   case SVGA_QUERY_NUM_BUFFERS_MAPPED:
+   case SVGA_QUERY_NUM_TEXTURES_MAPPED:
     case SVGA_QUERY_NUM_BYTES_UPLOADED:
+   case SVGA_QUERY_NUM_COMMAND_BUFFERS:
     case SVGA_QUERY_COMMAND_BUFFER_SIZE:
     case SVGA_QUERY_FLUSH_TIME:
     case SVGA_QUERY_SURFACE_WRITE_FLUSHES:
     case SVGA_QUERY_NUM_READBACKS:
+   case SVGA_QUERY_NUM_RESOURCE_UPDATES:
+   case SVGA_QUERY_NUM_BUFFER_UPLOADS:
+   case SVGA_QUERY_NUM_CONST_BUF_UPDATES:
+   case SVGA_QUERY_NUM_CONST_UPDATES:
+   case SVGA_QUERY_NUM_SHADER_RELOCATIONS:
+   case SVGA_QUERY_NUM_SURFACE_RELOCATIONS:
        vresult->u64 = sq->end_count - sq->begin_count;
        break;
     /* These are running total counters */
@@ -1125,7 +1194,12 @@ svga_get_query_result(struct pipe_context *pipe,
        vresult->u64 = svgascreen->hud.num_resources;
        break;
     case SVGA_QUERY_NUM_STATE_OBJECTS:
-      vresult->u64 = svga->hud.num_state_objects;
+      vresult->u64 = (svga->hud.num_blend_objects +
+                      svga->hud.num_depthstencil_objects +
+                      svga->hud.num_rasterizer_objects +
+                      svga->hud.num_sampler_objects +
+                      svga->hud.num_samplerview_objects +
+                      svga->hud.num_vertexelement_objects);
        break;
     case SVGA_QUERY_NUM_SURFACE_VIEWS:
        vresult->u64 = svga->hud.num_surface_views;
@@ -1133,6 +1207,16 @@ svga_get_query_result(struct pipe_context *pipe,
     case SVGA_QUERY_NUM_GENERATE_MIPMAP:
        vresult->u64 = svga->hud.num_generate_mipmap;
        break;
+   case SVGA_QUERY_NUM_FAILED_ALLOCATIONS:
+      vresult->u64 = svgascreen->hud.num_failed_allocations;
+      break;
+   case SVGA_QUERY_NUM_COMMANDS_PER_DRAW:
+      vresult->f = (float) svga->swc->num_commands
+         / (float) svga->swc->num_draw_commands;
+      break;
+   case SVGA_QUERY_SHADER_MEM_USED:
+      vresult->u64 = svga->hud.shader_mem_used;
+      break;
     default:
        assert(!"unexpected query type in svga_get_query_result");
     }
@@ -1144,13 +1228,12 @@ svga_get_query_result(struct pipe_context *pipe,
  
  static void
  svga_render_condition(struct pipe_context *pipe, struct pipe_query *q,
-                      boolean condition, uint mode)
+                      bool condition, enum pipe_render_cond_flag mode)
  {
     struct svga_context *svga = svga_context(pipe);
     struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
     struct svga_query *sq = svga_query(q);
     SVGA3dQueryId queryId;
-   enum pipe_error ret;
  
     SVGA_DBG(DEBUG_QUERY, "%s\n", __FUNCTION__);
  
@@ -1174,17 +1257,23 @@ svga_render_condition(struct pipe_context *pipe, struct pipe_query *q,
  
        if ((mode == PIPE_RENDER_COND_WAIT ||
             mode == PIPE_RENDER_COND_BY_REGION_WAIT) && sq->fence) {
-         sws->fence_finish(sws, sq->fence, SVGA_FENCE_FLAG_QUERY);
+         sws->fence_finish(sws, sq->fence, PIPE_TIMEOUT_INFINITE,
+                           SVGA_FENCE_FLAG_QUERY);
        }
     }
-
-   ret = SVGA3D_vgpu10_SetPredication(svga->swc, queryId,
-                                      (uint32) condition);
-   if (ret != PIPE_OK) {
-      svga_context_flush(svga, NULL);
-      ret = SVGA3D_vgpu10_SetPredication(svga->swc, queryId,
-                                         (uint32) condition);
+   /*
+    * if the kernel module doesn't support the predication command,
+    * we'll just render unconditionally.
+    * This is probably acceptable for the typical case of occlusion culling.
+    */
+   if (sws->have_set_predication_cmd) {
+      SVGA_RETRY(svga, SVGA3D_vgpu10_SetPredication(svga->swc, queryId,
+                                                    (uint32) condition));
+      svga->pred.query_id = queryId;
+      svga->pred.cond = condition;
     }
+
+   svga->render_condition = (sq != NULL);
  }
  
  
@@ -1207,6 +1296,45 @@ svga_get_timestamp(struct pipe_context *pipe)
  }
  
  
+static void
+svga_set_active_query_state(struct pipe_context *pipe, bool enable)
+{
+}
+
+
+/**
+ * \brief Toggle conditional rendering if already enabled
+ *
+ * \param svga[in]  The svga context
+ * \param render_condition_enabled[in]  Whether to ignore requests to turn
+ * conditional rendering off
+ * \param on[in]  Whether to turn conditional rendering on or off
+ */
+void
+svga_toggle_render_condition(struct svga_context *svga,
+                             boolean render_condition_enabled,
+                             boolean on)
+{
+   SVGA3dQueryId query_id;
+
+   if (render_condition_enabled ||
+       svga->pred.query_id == SVGA3D_INVALID_ID) {
+      return;
+   }
+
+   /*
+    * If we get here, it means that the system supports
+    * conditional rendering since svga->pred.query_id has already been
+    * modified for this context and thus support has already been
+    * verified.
+    */
+   query_id = on ? svga->pred.query_id : SVGA3D_INVALID_ID;
+
+   SVGA_RETRY(svga, SVGA3D_vgpu10_SetPredication(svga->swc, query_id,
+                                                 (uint32) svga->pred.cond));
+}
+
+
  void
  svga_init_query_functions(struct svga_context *svga)
  {
@@ -1215,6 +1343,7 @@ svga_init_query_functions(struct svga_context *svga)
     svga->pipe.begin_query = svga_begin_query;
     svga->pipe.end_query = svga_end_query;
     svga->pipe.get_query_result = svga_get_query_result;
+   svga->pipe.set_active_query_state = svga_set_active_query_state;
     svga->pipe.render_condition = svga_render_condition;
     svga->pipe.get_timestamp = svga_get_timestamp;
  }