X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fsvga%2Fsvga_pipe_query.c;h=8922ef56e590c2b390c48f0a3c1b4772bf5521f4;hb=c578600489e35abb481816c87124b1dc6b279655;hp=96fb4b8e5368a07e425c41af72c00bf94e32e5be;hpb=0ae2f59c0287f4baec6c7de5f2f0fdf736fba26d;p=mesa.git

diff --git a/src/gallium/drivers/svga/svga_pipe_query.c b/src/gallium/drivers/svga/svga_pipe_query.c
index 96fb4b8e536..8922ef56e59 100644
--- a/src/gallium/drivers/svga/svga_pipe_query.c
+++ b/src/gallium/drivers/svga/svga_pipe_query.c
@@ -1,5 +1,5 @@
 /**********************************************************
- * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ * Copyright 2008-2015 VMware, Inc.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person
  * obtaining a copy of this software and associated documentation
@@ -25,6 +25,8 @@
 
 #include "pipe/p_state.h"
 #include "pipe/p_context.h"
+
+#include "util/u_bitmask.h"
 #include "util/u_memory.h"
 
 #include "svga_cmd.h"
@@ -44,224 +46,1304 @@ struct pipe_query {
 
 struct svga_query {
    struct pipe_query base;
-   SVGA3dQueryType type;
+   unsigned type;                  /**< PIPE_QUERY_x or SVGA_QUERY_x */
+   SVGA3dQueryType svga_type;      /**< SVGA3D_QUERYTYPE_x or unused */
+
+   unsigned id;                    /** Per-context query identifier */
+
+   struct pipe_fence_handle *fence;
+
+   /** For PIPE_QUERY_OCCLUSION_COUNTER / SVGA3D_QUERYTYPE_OCCLUSION */
+
+   /* For VGPU9 */
    struct svga_winsys_buffer *hwbuf;
    volatile SVGA3dQueryResult *queryResult;
-   struct pipe_fence_handle *fence;
+
+   /** For VGPU10 */
+   struct svga_winsys_gb_query *gb_query;
+   SVGA3dDXQueryFlags flags;
+   unsigned offset;                /**< offset to the gb_query memory */
+   struct pipe_query *predicate;   /** The associated query that can be used for predicate */
+
+   /** For non-GPU SVGA_QUERY_x queries */
+   uint64_t begin_count, end_count;
 };
 
-/***********************************************************************
- * Inline conversion functions.  These are better-typed than the
- * macros used previously:
- */
-static INLINE struct svga_query *
-svga_query( struct pipe_query *q )
+
+/** cast wrapper */
+static inline struct svga_query *
+svga_query(struct pipe_query *q)
 {
    return (struct svga_query *)q;
 }
 
-static boolean svga_get_query_result(struct pipe_context *pipe, 
-                                     struct pipe_query *q,
-                                     boolean wait,
-                                     uint64_t *result);
-
-static struct pipe_query *svga_create_query( struct pipe_context *pipe,
-                                             unsigned query_type )
-{
-   struct svga_context *svga = svga_context( pipe );
-   struct svga_screen *svgascreen = svga_screen(pipe->screen);
-   struct svga_winsys_screen *sws = svgascreen->sws;
-   struct svga_query *sq;
-
-   SVGA_DBG(DEBUG_QUERY, "%s\n", __FUNCTION__);
+/**
+ * VGPU9
+ */
 
-   sq = CALLOC_STRUCT(svga_query);
-   if (!sq)
-      goto no_sq;
+static bool
+svga_get_query_result(struct pipe_context *pipe,
+                      struct pipe_query *q,
+                      bool wait,
+                      union pipe_query_result *result);
 
-   sq->type = SVGA3D_QUERYTYPE_OCCLUSION;
+static enum pipe_error
+define_query_vgpu9(struct svga_context *svga,
+                   struct svga_query *sq)
+{
+   struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
 
-   sq->hwbuf = svga_winsys_buffer_create(svga,
-                                         1,
+   sq->hwbuf = svga_winsys_buffer_create(svga, 1,
                                          SVGA_BUFFER_USAGE_PINNED,
                                          sizeof *sq->queryResult);
-   if(!sq->hwbuf)
-      goto no_hwbuf;
-    
-   sq->queryResult = (SVGA3dQueryResult *)sws->buffer_map(sws, 
-                                                          sq->hwbuf, 
-                                                          PIPE_TRANSFER_WRITE);
-   if(!sq->queryResult)
-      goto no_query_result;
+   if (!sq->hwbuf)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   sq->queryResult = (SVGA3dQueryResult *)
+                     sws->buffer_map(sws, sq->hwbuf, PIPE_TRANSFER_WRITE);
+   if (!sq->queryResult) {
+      sws->buffer_destroy(sws, sq->hwbuf);
+      return PIPE_ERROR_OUT_OF_MEMORY;
+   }
 
    sq->queryResult->totalSize = sizeof *sq->queryResult;
    sq->queryResult->state = SVGA3D_QUERYSTATE_NEW;
 
-   /*
-    * We request the buffer to be pinned and assume it is always mapped.
-    * 
+   /* We request the buffer to be pinned and assume it is always mapped.
     * The reason is that we don't want to wait for fences when checking the
     * query status.
     */
    sws->buffer_unmap(sws, sq->hwbuf);
 
+   return PIPE_OK;
+}
+
+static enum pipe_error
+begin_query_vgpu9(struct svga_context *svga, struct svga_query *sq)
+{
+   struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
+   enum pipe_error ret = PIPE_OK;
+
+   if (sq->queryResult->state == SVGA3D_QUERYSTATE_PENDING) {
+      /* The application doesn't care for the pending query result.
+       * We cannot let go of the existing buffer and just get a new one
+       * because its storage may be reused for other purposes and clobbered
+       * by the host when it determines the query result.  So the only
+       * option here is to wait for the existing query's result -- not a
+       * big deal, given that no sane application would do this.
+       */
+       uint64_t result;
+       svga_get_query_result(&svga->pipe, &sq->base, TRUE, (void*)&result);
+       assert(sq->queryResult->state != SVGA3D_QUERYSTATE_PENDING);
+   }
+
+   sq->queryResult->state = SVGA3D_QUERYSTATE_NEW;
+   sws->fence_reference(sws, &sq->fence, NULL);
+
+   ret = SVGA3D_BeginQuery(svga->swc, sq->svga_type);
+   if (ret != PIPE_OK) {
+      svga_context_flush(svga, NULL);
+      ret = SVGA3D_BeginQuery(svga->swc, sq->svga_type);
+   }
+   return ret;
+}
+
+static enum pipe_error
+end_query_vgpu9(struct svga_context *svga, struct svga_query *sq)
+{
+   enum pipe_error ret = PIPE_OK;
+
+   /* Set to PENDING before sending EndQuery. */
+   sq->queryResult->state = SVGA3D_QUERYSTATE_PENDING;
+
+   ret = SVGA3D_EndQuery(svga->swc, sq->svga_type, sq->hwbuf);
+   if (ret != PIPE_OK) {
+      svga_context_flush(svga, NULL);
+      ret = SVGA3D_EndQuery(svga->swc, sq->svga_type, sq->hwbuf);
+   }
+   return ret;
+}
+
+static bool
+get_query_result_vgpu9(struct svga_context *svga, struct svga_query *sq,
+                       bool wait, uint64_t *result)
+{
+   struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
+   enum pipe_error ret;
+   SVGA3dQueryState state;
+
+   if (!sq->fence) {
+      /* The query status won't be updated by the host unless
+       * SVGA_3D_CMD_WAIT_FOR_QUERY is emitted. Unfortunately this will cause
+       * a synchronous wait on the host.
+       */
+      ret = SVGA3D_WaitForQuery(svga->swc, sq->svga_type, sq->hwbuf);
+      if (ret != PIPE_OK) {
+         svga_context_flush(svga, NULL);
+         ret = SVGA3D_WaitForQuery(svga->swc, sq->svga_type, sq->hwbuf);
+      }
+      assert (ret == PIPE_OK);
+      svga_context_flush(svga, &sq->fence);
+      assert(sq->fence);
+   }
+
+   state = sq->queryResult->state;
+   if (state == SVGA3D_QUERYSTATE_PENDING) {
+      if (!wait)
+         return false;
+      sws->fence_finish(sws, sq->fence, PIPE_TIMEOUT_INFINITE,
+                        SVGA_FENCE_FLAG_QUERY);
+      state = sq->queryResult->state;
+   }
+
+   assert(state == SVGA3D_QUERYSTATE_SUCCEEDED ||
+          state == SVGA3D_QUERYSTATE_FAILED);
+
+   *result = (uint64_t)sq->queryResult->result32;
+   return true;
+}
+
+
+/**
+ * VGPU10
+ *
+ * There is one query mob allocated for each context to be shared by all
+ * query types. The mob is used to hold queries's state and result. Since
+ * each query result type is of different length, to ease the query allocation
+ * management, the mob is divided into memory blocks. Each memory block
+ * will hold queries of the same type. Multiple memory blocks can be allocated
+ * for a particular query type.
+ *
+ * Currently each memory block is of 184 bytes. We support up to 128
+ * memory blocks. The query memory size is arbitrary right now.
+ * Each occlusion query takes about 8 bytes. One memory block can accomodate
+ * 23 occlusion queries. 128 of those blocks can support up to 2944 occlusion
+ * queries. That seems reasonable for now. If we think this limit is
+ * not enough, we can increase the limit or try to grow the mob in runtime.
+ * Note, SVGA device does not impose one mob per context for queries,
+ * we could allocate multiple mobs for queries; however, wddm KMD does not
+ * currently support that.
+ *
+ * Also note that the GL guest driver does not issue any of the
+ * following commands: DXMoveQuery, DXBindAllQuery & DXReadbackAllQuery.
+ */
+#define SVGA_QUERY_MEM_BLOCK_SIZE    (sizeof(SVGADXQueryResultUnion) * 2)
+#define SVGA_QUERY_MEM_SIZE          (128 * SVGA_QUERY_MEM_BLOCK_SIZE)
+
+struct svga_qmem_alloc_entry
+{
+   unsigned start_offset;               /* start offset of the memory block */
+   unsigned block_index;                /* block index of the memory block */
+   unsigned query_size;                 /* query size in this memory block */
+   unsigned nquery;                     /* number of queries allocated */
+   struct util_bitmask *alloc_mask;     /* allocation mask */
+   struct svga_qmem_alloc_entry *next;  /* next memory block */
+};
+
+
+/**
+ * Allocate a memory block from the query object memory
+ * \return -1 if out of memory, else index of the query memory block
+ */
+static int
+allocate_query_block(struct svga_context *svga)
+{
+   int index;
+   unsigned offset;
+
+   /* Find the next available query block */
+   index = util_bitmask_add(svga->gb_query_alloc_mask);
+
+   if (index == UTIL_BITMASK_INVALID_INDEX)
+      return -1;
+
+   offset = index * SVGA_QUERY_MEM_BLOCK_SIZE;
+   if (offset >= svga->gb_query_len) {
+      unsigned i;
+
+      /**
+       * All the memory blocks are allocated, lets see if there is
+       * any empty memory block around that can be freed up.
+       */
+      index = -1;
+      for (i = 0; i < SVGA3D_QUERYTYPE_MAX && index == -1; i++) {
+         struct svga_qmem_alloc_entry *alloc_entry;
+         struct svga_qmem_alloc_entry *prev_alloc_entry = NULL;
+
+         alloc_entry = svga->gb_query_map[i];
+         while (alloc_entry && index == -1) {
+            if (alloc_entry->nquery == 0) {
+               /* This memory block is empty, it can be recycled. */
+               if (prev_alloc_entry) {
+                  prev_alloc_entry->next = alloc_entry->next;
+               } else {
+                  svga->gb_query_map[i] = alloc_entry->next;
+               }
+               index = alloc_entry->block_index;
+            } else {
+               prev_alloc_entry = alloc_entry;
+               alloc_entry = alloc_entry->next;
+            }
+         }
+      }
+   }
+
+   return index;
+}
+
+/**
+ * Allocate a slot in the specified memory block.
+ * All slots in this memory block are of the same size.
+ *
+ * \return -1 if out of memory, else index of the query slot
+ */
+static int
+allocate_query_slot(struct svga_context *svga,
+                    struct svga_qmem_alloc_entry *alloc)
+{
+   int index;
+   unsigned offset;
+
+   /* Find the next available slot */
+   index = util_bitmask_add(alloc->alloc_mask);
+
+   if (index == UTIL_BITMASK_INVALID_INDEX)
+      return -1;
+
+   offset = index * alloc->query_size;
+   if (offset >= SVGA_QUERY_MEM_BLOCK_SIZE)
+      return -1;
+
+   alloc->nquery++;
+
+   return index;
+}
+
+/**
+ * Deallocate the specified slot in the memory block.
+ * If all slots are freed up, then deallocate the memory block
+ * as well, so it can be allocated for other query type
+ */
+static void
+deallocate_query_slot(struct svga_context *svga,
+                      struct svga_qmem_alloc_entry *alloc,
+                      unsigned index)
+{
+   assert(index != UTIL_BITMASK_INVALID_INDEX);
+
+   util_bitmask_clear(alloc->alloc_mask, index);
+   alloc->nquery--;
+
+   /**
+    * Don't worry about deallocating the empty memory block here.
+    * The empty memory block will be recycled when no more memory block
+    * can be allocated.
+    */
+}
+
+static struct svga_qmem_alloc_entry *
+allocate_query_block_entry(struct svga_context *svga,
+                           unsigned len)
+{
+   struct svga_qmem_alloc_entry *alloc_entry;
+   int block_index = -1;
+
+   block_index = allocate_query_block(svga);
+   if (block_index == -1)
+      return NULL;
+   alloc_entry = CALLOC_STRUCT(svga_qmem_alloc_entry);
+   if (!alloc_entry)
+      return NULL;
+
+   alloc_entry->block_index = block_index;
+   alloc_entry->start_offset = block_index * SVGA_QUERY_MEM_BLOCK_SIZE;
+   alloc_entry->nquery = 0;
+   alloc_entry->alloc_mask = util_bitmask_create();
+   alloc_entry->next = NULL;
+   alloc_entry->query_size = len;
+
+   return alloc_entry;
+}
+
+/**
+ * Allocate a memory slot for a query of the specified type.
+ * It will first search through the memory blocks that are allocated
+ * for the query type. If no memory slot is available, it will try
+ * to allocate another memory block within the query object memory for
+ * this query type.
+ */
+static int
+allocate_query(struct svga_context *svga,
+               SVGA3dQueryType type,
+               unsigned len)
+{
+   struct svga_qmem_alloc_entry *alloc_entry;
+   int slot_index = -1;
+   unsigned offset;
+
+   assert(type < SVGA3D_QUERYTYPE_MAX);
+
+   alloc_entry = svga->gb_query_map[type];
+
+   if (!alloc_entry) {
+      /**
+       * No query memory block has been allocated for this query type,
+       * allocate one now
+       */
+      alloc_entry = allocate_query_block_entry(svga, len);
+      if (!alloc_entry)
+         return -1;
+      svga->gb_query_map[type] = alloc_entry;
+   }
+
+   /* Allocate a slot within the memory block allocated for this query type */
+   slot_index = allocate_query_slot(svga, alloc_entry);
+
+   if (slot_index == -1) {
+      /* This query memory block is full, allocate another one */
+      alloc_entry = allocate_query_block_entry(svga, len);
+      if (!alloc_entry)
+         return -1;
+      alloc_entry->next = svga->gb_query_map[type];
+      svga->gb_query_map[type] = alloc_entry;
+      slot_index = allocate_query_slot(svga, alloc_entry);
+   }
+
+   assert(slot_index != -1);
+   offset = slot_index * len + alloc_entry->start_offset;
+
+   return offset;
+}
+
+
+/**
+ * Deallocate memory slot allocated for the specified query
+ */
+static void
+deallocate_query(struct svga_context *svga,
+                 struct svga_query *sq)
+{
+   struct svga_qmem_alloc_entry *alloc_entry;
+   unsigned slot_index;
+   unsigned offset = sq->offset;
+
+   alloc_entry = svga->gb_query_map[sq->svga_type];
+
+   while (alloc_entry) {
+      if (offset >= alloc_entry->start_offset &&
+          offset < alloc_entry->start_offset + SVGA_QUERY_MEM_BLOCK_SIZE) {
+
+         /* The slot belongs to this memory block, deallocate it */
+         slot_index = (offset - alloc_entry->start_offset) /
+                      alloc_entry->query_size;
+         deallocate_query_slot(svga, alloc_entry, slot_index);
+         alloc_entry = NULL;
+      } else {
+         alloc_entry = alloc_entry->next;
+      }
+   }
+}
+
+
+/**
+ * Destroy the gb query object and all the related query structures
+ */
+static void
+destroy_gb_query_obj(struct svga_context *svga)
+{
+   struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
+   unsigned i;
+
+   for (i = 0; i < SVGA3D_QUERYTYPE_MAX; i++) {
+      struct svga_qmem_alloc_entry *alloc_entry, *next;
+      alloc_entry = svga->gb_query_map[i];
+      while (alloc_entry) {
+         next = alloc_entry->next;
+         util_bitmask_destroy(alloc_entry->alloc_mask);
+         FREE(alloc_entry);
+         alloc_entry = next;
+      }
+      svga->gb_query_map[i] = NULL;
+   }
+
+   if (svga->gb_query)
+      sws->query_destroy(sws, svga->gb_query);
+   svga->gb_query = NULL;
+
+   util_bitmask_destroy(svga->gb_query_alloc_mask);
+}
+
+/**
+ * Define query and create the gb query object if it is not already created.
+ * There is only one gb query object per context which will be shared by
+ * queries of all types.
+ */
+static enum pipe_error
+define_query_vgpu10(struct svga_context *svga,
+                    struct svga_query *sq, int resultLen)
+{
+   struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
+   int qlen;
+   enum pipe_error ret = PIPE_OK;
+
+   SVGA_DBG(DEBUG_QUERY, "%s\n", __FUNCTION__);
+
+   if (svga->gb_query == NULL) {
+      /* Create a gb query object */
+      svga->gb_query = sws->query_create(sws, SVGA_QUERY_MEM_SIZE);
+      if (!svga->gb_query)
+         return PIPE_ERROR_OUT_OF_MEMORY;
+      svga->gb_query_len = SVGA_QUERY_MEM_SIZE;
+      memset (svga->gb_query_map, 0, sizeof(svga->gb_query_map));
+      svga->gb_query_alloc_mask = util_bitmask_create();
+
+      /* Bind the query object to the context */
+      if (svga->swc->query_bind(svga->swc, svga->gb_query,
+                                SVGA_QUERY_FLAG_SET) != PIPE_OK) {
+         svga_context_flush(svga, NULL);
+         svga->swc->query_bind(svga->swc, svga->gb_query,
+                               SVGA_QUERY_FLAG_SET);
+      }
+   }
+
+   sq->gb_query = svga->gb_query;
+
+   /* Allocate an integer ID for this query */
+   sq->id = util_bitmask_add(svga->query_id_bm);
+   if (sq->id == UTIL_BITMASK_INVALID_INDEX)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   /* Find a slot for this query in the gb object */
+   qlen = resultLen + sizeof(SVGA3dQueryState);
+   sq->offset = allocate_query(svga, sq->svga_type, qlen);
+   if (sq->offset == -1)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   SVGA_DBG(DEBUG_QUERY, "   query type=%d qid=0x%x offset=%d\n",
+            sq->svga_type, sq->id, sq->offset);
+
+   /**
+    * Send SVGA3D commands to define the query
+    */
+   ret = SVGA3D_vgpu10_DefineQuery(svga->swc, sq->id, sq->svga_type, sq->flags);
+   if (ret != PIPE_OK) {
+      svga_context_flush(svga, NULL);
+      ret = SVGA3D_vgpu10_DefineQuery(svga->swc, sq->id, sq->svga_type, sq->flags);
+   }
+   if (ret != PIPE_OK)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   ret = SVGA3D_vgpu10_BindQuery(svga->swc, sq->gb_query, sq->id);
+   if (ret != PIPE_OK) {
+      svga_context_flush(svga, NULL);
+      ret = SVGA3D_vgpu10_BindQuery(svga->swc, sq->gb_query, sq->id);
+   }
+   assert(ret == PIPE_OK);
+
+   ret = SVGA3D_vgpu10_SetQueryOffset(svga->swc, sq->id, sq->offset);
+   if (ret != PIPE_OK) {
+      svga_context_flush(svga, NULL);
+      ret = SVGA3D_vgpu10_SetQueryOffset(svga->swc, sq->id, sq->offset);
+   }
+   assert(ret == PIPE_OK);
+
+   return PIPE_OK;
+}
+
+static enum pipe_error
+destroy_query_vgpu10(struct svga_context *svga, struct svga_query *sq)
+{
+   enum pipe_error ret;
+
+   ret = SVGA3D_vgpu10_DestroyQuery(svga->swc, sq->id);
+
+   /* Deallocate the memory slot allocated for this query */
+   deallocate_query(svga, sq);
+
+   return ret;
+}
+
+
+/**
+ * Rebind queryies to the context.
+ */
+static void
+rebind_vgpu10_query(struct svga_context *svga)
+{
+   if (svga->swc->query_bind(svga->swc, svga->gb_query,
+                             SVGA_QUERY_FLAG_REF) != PIPE_OK) {
+      svga_context_flush(svga, NULL);
+      svga->swc->query_bind(svga->swc, svga->gb_query,
+                            SVGA_QUERY_FLAG_REF);
+   }
+
+   svga->rebind.flags.query = FALSE;
+}
+
+
+static enum pipe_error
+begin_query_vgpu10(struct svga_context *svga, struct svga_query *sq)
+{
+   struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
+   enum pipe_error ret = PIPE_OK;
+   int status = 0;
+
+   sws->fence_reference(sws, &sq->fence, NULL);
+
+   /* Initialize the query state to NEW */
+   status = sws->query_init(sws, sq->gb_query, sq->offset, SVGA3D_QUERYSTATE_NEW);
+   if (status)
+      return PIPE_ERROR;
+
+   if (svga->rebind.flags.query) {
+      rebind_vgpu10_query(svga);
+   }
+
+   /* Send the BeginQuery command to the device */
+   ret = SVGA3D_vgpu10_BeginQuery(svga->swc, sq->id);
+   if (ret != PIPE_OK) {
+      svga_context_flush(svga, NULL);
+      ret = SVGA3D_vgpu10_BeginQuery(svga->swc, sq->id);
+   }
+   return ret;
+}
+
+static enum pipe_error
+end_query_vgpu10(struct svga_context *svga, struct svga_query *sq)
+{
+   enum pipe_error ret = PIPE_OK;
+
+   if (svga->rebind.flags.query) {
+      rebind_vgpu10_query(svga);
+   }
+
+   ret = SVGA3D_vgpu10_EndQuery(svga->swc, sq->id);
+   if (ret != PIPE_OK) {
+      svga_context_flush(svga, NULL);
+      ret = SVGA3D_vgpu10_EndQuery(svga->swc, sq->id);
+   }
+
+   return ret;
+}
+
+static bool
+get_query_result_vgpu10(struct svga_context *svga, struct svga_query *sq,
+                        bool wait, void *result, int resultLen)
+{
+   struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
+   SVGA3dQueryState queryState;
+
+   if (svga->rebind.flags.query) {
+      rebind_vgpu10_query(svga);
+   }
+
+   sws->query_get_result(sws, sq->gb_query, sq->offset, &queryState, result, resultLen);
+
+   if (queryState != SVGA3D_QUERYSTATE_SUCCEEDED && !sq->fence) {
+      /* We don't have the query result yet, and the query hasn't been
+       * submitted.  We need to submit it now since the GL spec says
+       * "Querying the state for a given occlusion query forces that
+       * occlusion query to complete within a finite amount of time."
+       */
+      svga_context_flush(svga, &sq->fence);
+   }
+
+   if (queryState == SVGA3D_QUERYSTATE_PENDING ||
+       queryState == SVGA3D_QUERYSTATE_NEW) {
+      if (!wait)
+         return false;
+      sws->fence_finish(sws, sq->fence, PIPE_TIMEOUT_INFINITE,
+                        SVGA_FENCE_FLAG_QUERY);
+      sws->query_get_result(sws, sq->gb_query, sq->offset, &queryState, result, resultLen);
+   }
+
+   assert(queryState == SVGA3D_QUERYSTATE_SUCCEEDED ||
+          queryState == SVGA3D_QUERYSTATE_FAILED);
+
+   return true;
+}
+
+static struct pipe_query *
+svga_create_query(struct pipe_context *pipe,
+                  unsigned query_type,
+                  unsigned index)
+{
+   struct svga_context *svga = svga_context(pipe);
+   struct svga_query *sq;
+
+   assert(query_type < SVGA_QUERY_MAX);
+
+   sq = CALLOC_STRUCT(svga_query);
+   if (!sq)
+      goto fail;
+
+   /* Allocate an integer ID for the query */
+   sq->id = util_bitmask_add(svga->query_id_bm);
+   if (sq->id == UTIL_BITMASK_INVALID_INDEX)
+      goto fail;
+
+   SVGA_DBG(DEBUG_QUERY, "%s type=%d sq=0x%x id=%d\n", __FUNCTION__,
+            query_type, sq, sq->id);
+
+   switch (query_type) {
+   case PIPE_QUERY_OCCLUSION_COUNTER:
+      sq->svga_type = SVGA3D_QUERYTYPE_OCCLUSION;
+      if (svga_have_vgpu10(svga)) {
+         define_query_vgpu10(svga, sq, sizeof(SVGADXOcclusionQueryResult));
+
+         /**
+          * In OpenGL, occlusion counter query can be used in conditional
+          * rendering; however, in DX10, only OCCLUSION_PREDICATE query can
+          * be used for predication. Hence, we need to create an occlusion
+          * predicate query along with the occlusion counter query. So when
+          * the occlusion counter query is used for predication, the associated
+          * query of occlusion predicate type will be used
+          * in the SetPredication command.
+          */
+         sq->predicate = svga_create_query(pipe, PIPE_QUERY_OCCLUSION_PREDICATE, index);
+
+      } else {
+         define_query_vgpu9(svga, sq);
+      }
+      break;
+   case PIPE_QUERY_OCCLUSION_PREDICATE:
+   case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
+      if (svga_have_vgpu10(svga)) {
+         sq->svga_type = SVGA3D_QUERYTYPE_OCCLUSIONPREDICATE;
+         define_query_vgpu10(svga, sq, sizeof(SVGADXOcclusionPredicateQueryResult));
+      } else {
+         sq->svga_type = SVGA3D_QUERYTYPE_OCCLUSION;
+         define_query_vgpu9(svga, sq);
+      }
+      break;
+   case PIPE_QUERY_PRIMITIVES_GENERATED:
+   case PIPE_QUERY_PRIMITIVES_EMITTED:
+   case PIPE_QUERY_SO_STATISTICS:
+      assert(svga_have_vgpu10(svga));
+      sq->svga_type = SVGA3D_QUERYTYPE_STREAMOUTPUTSTATS;
+      define_query_vgpu10(svga, sq,
+                          sizeof(SVGADXStreamOutStatisticsQueryResult));
+      break;
+   case PIPE_QUERY_TIMESTAMP:
+      assert(svga_have_vgpu10(svga));
+      sq->svga_type = SVGA3D_QUERYTYPE_TIMESTAMP;
+      define_query_vgpu10(svga, sq,
+                          sizeof(SVGADXTimestampQueryResult));
+      break;
+   case SVGA_QUERY_NUM_DRAW_CALLS:
+   case SVGA_QUERY_NUM_FALLBACKS:
+   case SVGA_QUERY_NUM_FLUSHES:
+   case SVGA_QUERY_NUM_VALIDATIONS:
+   case SVGA_QUERY_NUM_BUFFERS_MAPPED:
+   case SVGA_QUERY_NUM_TEXTURES_MAPPED:
+   case SVGA_QUERY_NUM_BYTES_UPLOADED:
+   case SVGA_QUERY_COMMAND_BUFFER_SIZE:
+   case SVGA_QUERY_SURFACE_WRITE_FLUSHES:
+   case SVGA_QUERY_MEMORY_USED:
+   case SVGA_QUERY_NUM_SHADERS:
+   case SVGA_QUERY_NUM_RESOURCES:
+   case SVGA_QUERY_NUM_STATE_OBJECTS:
+   case SVGA_QUERY_NUM_SURFACE_VIEWS:
+   case SVGA_QUERY_NUM_GENERATE_MIPMAP:
+   case SVGA_QUERY_NUM_READBACKS:
+   case SVGA_QUERY_NUM_RESOURCE_UPDATES:
+   case SVGA_QUERY_NUM_BUFFER_UPLOADS:
+   case SVGA_QUERY_NUM_CONST_BUF_UPDATES:
+   case SVGA_QUERY_NUM_CONST_UPDATES:
+   case SVGA_QUERY_NUM_FAILED_ALLOCATIONS:
+   case SVGA_QUERY_NUM_COMMANDS_PER_DRAW:
+      break;
+   case SVGA_QUERY_FLUSH_TIME:
+   case SVGA_QUERY_MAP_BUFFER_TIME:
+      /* These queries need os_time_get() */
+      svga->hud.uses_time = TRUE;
+      break;
+   default:
+      assert(!"unexpected query type in svga_create_query()");
+   }
+
+   sq->type = query_type;
+
    return &sq->base;
 
-no_query_result:
-   sws->buffer_destroy(sws, sq->hwbuf);
-no_hwbuf:
+fail:
    FREE(sq);
-no_sq:
    return NULL;
 }
 
-static void svga_destroy_query(struct pipe_context *pipe,
-                               struct pipe_query *q)
+static void
+svga_destroy_query(struct pipe_context *pipe, struct pipe_query *q)
 {
-   struct svga_screen *svgascreen = svga_screen(pipe->screen);
-   struct svga_winsys_screen *sws = svgascreen->sws;
-   struct svga_query *sq = svga_query( q );
+   struct svga_context *svga = svga_context(pipe);
+   struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
+   struct svga_query *sq;
+
+   if (!q) {
+      destroy_gb_query_obj(svga);
+      return;
+   }
+
+   sq = svga_query(q);
+
+   SVGA_DBG(DEBUG_QUERY, "%s sq=0x%x id=%d\n", __FUNCTION__,
+            sq, sq->id);
+
+   switch (sq->type) {
+   case PIPE_QUERY_OCCLUSION_COUNTER:
+   case PIPE_QUERY_OCCLUSION_PREDICATE:
+   case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
+      if (svga_have_vgpu10(svga)) {
+         /* make sure to also destroy any associated predicate query */
+         if (sq->predicate)
+            svga_destroy_query(pipe, sq->predicate);
+         destroy_query_vgpu10(svga, sq);
+      } else {
+         sws->buffer_destroy(sws, sq->hwbuf);
+      }
+      sws->fence_reference(sws, &sq->fence, NULL);
+      break;
+   case PIPE_QUERY_PRIMITIVES_GENERATED:
+   case PIPE_QUERY_PRIMITIVES_EMITTED:
+   case PIPE_QUERY_SO_STATISTICS:
+   case PIPE_QUERY_TIMESTAMP:
+      assert(svga_have_vgpu10(svga));
+      destroy_query_vgpu10(svga, sq);
+      sws->fence_reference(sws, &sq->fence, NULL);
+      break;
+   case SVGA_QUERY_NUM_DRAW_CALLS:
+   case SVGA_QUERY_NUM_FALLBACKS:
+   case SVGA_QUERY_NUM_FLUSHES:
+   case SVGA_QUERY_NUM_VALIDATIONS:
+   case SVGA_QUERY_MAP_BUFFER_TIME:
+   case SVGA_QUERY_NUM_BUFFERS_MAPPED:
+   case SVGA_QUERY_NUM_TEXTURES_MAPPED:
+   case SVGA_QUERY_NUM_BYTES_UPLOADED:
+   case SVGA_QUERY_COMMAND_BUFFER_SIZE:
+   case SVGA_QUERY_FLUSH_TIME:
+   case SVGA_QUERY_SURFACE_WRITE_FLUSHES:
+   case SVGA_QUERY_MEMORY_USED:
+   case SVGA_QUERY_NUM_SHADERS:
+   case SVGA_QUERY_NUM_RESOURCES:
+   case SVGA_QUERY_NUM_STATE_OBJECTS:
+   case SVGA_QUERY_NUM_SURFACE_VIEWS:
+   case SVGA_QUERY_NUM_GENERATE_MIPMAP:
+   case SVGA_QUERY_NUM_READBACKS:
+   case SVGA_QUERY_NUM_RESOURCE_UPDATES:
+   case SVGA_QUERY_NUM_BUFFER_UPLOADS:
+   case SVGA_QUERY_NUM_CONST_BUF_UPDATES:
+   case SVGA_QUERY_NUM_CONST_UPDATES:
+   case SVGA_QUERY_NUM_FAILED_ALLOCATIONS:
+   case SVGA_QUERY_NUM_COMMANDS_PER_DRAW:
+      /* nothing */
+      break;
+   default:
+      assert(!"svga: unexpected query type in svga_destroy_query()");
+   }
+
+   /* Free the query id */
+   util_bitmask_clear(svga->query_id_bm, sq->id);
 
-   SVGA_DBG(DEBUG_QUERY, "%s\n", __FUNCTION__);
-   sws->buffer_destroy(sws, sq->hwbuf);
-   sws->fence_reference(sws, &sq->fence, NULL);
    FREE(sq);
 }
 
-static void svga_begin_query(struct pipe_context *pipe, 
-                             struct pipe_query *q)
+
+static bool
+svga_begin_query(struct pipe_context *pipe, struct pipe_query *q)
 {
-   struct svga_screen *svgascreen = svga_screen(pipe->screen);
-   struct svga_winsys_screen *sws = svgascreen->sws;
-   struct svga_context *svga = svga_context( pipe );
-   struct svga_query *sq = svga_query( q );
+   struct svga_context *svga = svga_context(pipe);
+   struct svga_query *sq = svga_query(q);
    enum pipe_error ret;
 
-   SVGA_DBG(DEBUG_QUERY, "%s\n", __FUNCTION__);
-   
-   assert(!svga->sq);
+   assert(sq);
+   assert(sq->type < SVGA_QUERY_MAX);
+
+   SVGA_DBG(DEBUG_QUERY, "%s sq=0x%x id=%d\n", __FUNCTION__,
+            sq, sq->id);
 
    /* Need to flush out buffered drawing commands so that they don't
     * get counted in the query results.
     */
    svga_hwtnl_flush_retry(svga);
-   
-   if(sq->queryResult->state == SVGA3D_QUERYSTATE_PENDING) {
-      /* The application doesn't care for the pending query result. We cannot
-       * let go the existing buffer and just get a new one because its storage
-       * may be reused for other purposes and clobbered by the host when it
-       * determines the query result. So the only option here is to wait for
-       * the existing query's result -- not a big deal, given that no sane
-       * application would do this.
-       */
-      uint64_t result;
 
-      svga_get_query_result(pipe, q, TRUE, &result);
-      
-      assert(sq->queryResult->state != SVGA3D_QUERYSTATE_PENDING);
-   }
-   
-   sq->queryResult->state = SVGA3D_QUERYSTATE_NEW;
-   sws->fence_reference(sws, &sq->fence, NULL);
-
-   ret = SVGA3D_BeginQuery(svga->swc, sq->type);
-   if(ret != PIPE_OK) {
-      svga_context_flush(svga, NULL);
-      ret = SVGA3D_BeginQuery(svga->swc, sq->type);
+   switch (sq->type) {
+   case PIPE_QUERY_OCCLUSION_COUNTER:
+   case PIPE_QUERY_OCCLUSION_PREDICATE:
+   case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
+      if (svga_have_vgpu10(svga)) {
+         ret = begin_query_vgpu10(svga, sq);
+         /* also need to start the associated occlusion predicate query */
+         if (sq->predicate) {
+            enum pipe_error status;
+            status = begin_query_vgpu10(svga, svga_query(sq->predicate));
+            assert(status == PIPE_OK);
+            (void) status;
+         }
+      } else {
+         ret = begin_query_vgpu9(svga, sq);
+      }
+      assert(ret == PIPE_OK);
+      (void) ret;
+      break;
+   case PIPE_QUERY_PRIMITIVES_GENERATED:
+   case PIPE_QUERY_PRIMITIVES_EMITTED:
+   case PIPE_QUERY_SO_STATISTICS:
+   case PIPE_QUERY_TIMESTAMP:
+      assert(svga_have_vgpu10(svga));
+      ret = begin_query_vgpu10(svga, sq);
       assert(ret == PIPE_OK);
+      break;
+   case SVGA_QUERY_NUM_DRAW_CALLS:
+      sq->begin_count = svga->hud.num_draw_calls;
+      break;
+   case SVGA_QUERY_NUM_FALLBACKS:
+      sq->begin_count = svga->hud.num_fallbacks;
+      break;
+   case SVGA_QUERY_NUM_FLUSHES:
+      sq->begin_count = svga->hud.num_flushes;
+      break;
+   case SVGA_QUERY_NUM_VALIDATIONS:
+      sq->begin_count = svga->hud.num_validations;
+      break;
+   case SVGA_QUERY_MAP_BUFFER_TIME:
+      sq->begin_count = svga->hud.map_buffer_time;
+      break;
+   case SVGA_QUERY_NUM_BUFFERS_MAPPED:
+      sq->begin_count = svga->hud.num_buffers_mapped;
+      break;
+   case SVGA_QUERY_NUM_TEXTURES_MAPPED:
+      sq->begin_count = svga->hud.num_textures_mapped;
+      break;
+   case SVGA_QUERY_NUM_BYTES_UPLOADED:
+      sq->begin_count = svga->hud.num_bytes_uploaded;
+      break;
+   case SVGA_QUERY_COMMAND_BUFFER_SIZE:
+      sq->begin_count = svga->hud.command_buffer_size;
+      break;
+   case SVGA_QUERY_FLUSH_TIME:
+      sq->begin_count = svga->hud.flush_time;
+      break;
+   case SVGA_QUERY_SURFACE_WRITE_FLUSHES:
+      sq->begin_count = svga->hud.surface_write_flushes;
+      break;
+   case SVGA_QUERY_NUM_READBACKS:
+      sq->begin_count = svga->hud.num_readbacks;
+      break;
+   case SVGA_QUERY_NUM_RESOURCE_UPDATES:
+      sq->begin_count = svga->hud.num_resource_updates;
+      break;
+   case SVGA_QUERY_NUM_BUFFER_UPLOADS:
+      sq->begin_count = svga->hud.num_buffer_uploads;
+      break;
+   case SVGA_QUERY_NUM_CONST_BUF_UPDATES:
+      sq->begin_count = svga->hud.num_const_buf_updates;
+      break;
+   case SVGA_QUERY_NUM_CONST_UPDATES:
+      sq->begin_count = svga->hud.num_const_updates;
+      break;
+   case SVGA_QUERY_MEMORY_USED:
+   case SVGA_QUERY_NUM_SHADERS:
+   case SVGA_QUERY_NUM_RESOURCES:
+   case SVGA_QUERY_NUM_STATE_OBJECTS:
+   case SVGA_QUERY_NUM_SURFACE_VIEWS:
+   case SVGA_QUERY_NUM_GENERATE_MIPMAP:
+   case SVGA_QUERY_NUM_FAILED_ALLOCATIONS:
+   case SVGA_QUERY_NUM_COMMANDS_PER_DRAW:
+      /* nothing */
+      break;
+   default:
+      assert(!"unexpected query type in svga_begin_query()");
    }
 
-   svga->sq = sq;
+   svga->sq[sq->type] = sq;
+
+   return true;
 }
 
-static void svga_end_query(struct pipe_context *pipe, 
-                           struct pipe_query *q)
+
+static bool
+svga_end_query(struct pipe_context *pipe, struct pipe_query *q)
 {
-   struct svga_context *svga = svga_context( pipe );
-   struct svga_query *sq = svga_query( q );
+   struct svga_context *svga = svga_context(pipe);
+   struct svga_query *sq = svga_query(q);
    enum pipe_error ret;
 
-   SVGA_DBG(DEBUG_QUERY, "%s\n", __FUNCTION__);
-   assert(svga->sq == sq);
+   assert(sq);
+   assert(sq->type < SVGA_QUERY_MAX);
+
+   SVGA_DBG(DEBUG_QUERY, "%s sq=0x%x id=%d\n", __FUNCTION__,
+            sq, sq->id);
+
+   if (sq->type == PIPE_QUERY_TIMESTAMP && svga->sq[sq->type] != sq)
+      svga_begin_query(pipe, q);
 
    svga_hwtnl_flush_retry(svga);
-   
-   /* Set to PENDING before sending EndQuery. */
-   sq->queryResult->state = SVGA3D_QUERYSTATE_PENDING;
 
-   ret = SVGA3D_EndQuery( svga->swc, sq->type, sq->hwbuf);
-   if(ret != PIPE_OK) {
-      svga_context_flush(svga, NULL);
-      ret = SVGA3D_EndQuery( svga->swc, sq->type, sq->hwbuf);
+   assert(svga->sq[sq->type] == sq);
+
+   switch (sq->type) {
+   case PIPE_QUERY_OCCLUSION_COUNTER:
+   case PIPE_QUERY_OCCLUSION_PREDICATE:
+   case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
+      if (svga_have_vgpu10(svga)) {
+         ret = end_query_vgpu10(svga, sq);
+         /* also need to end the associated occlusion predicate query */
+         if (sq->predicate) {
+            enum pipe_error status;
+            status = end_query_vgpu10(svga, svga_query(sq->predicate));
+            assert(status == PIPE_OK);
+            (void) status;
+         }
+      } else {
+         ret = end_query_vgpu9(svga, sq);
+      }
       assert(ret == PIPE_OK);
+      (void) ret;
+      break;
+   case PIPE_QUERY_PRIMITIVES_GENERATED:
+   case PIPE_QUERY_PRIMITIVES_EMITTED:
+   case PIPE_QUERY_SO_STATISTICS:
+   case PIPE_QUERY_TIMESTAMP:
+      assert(svga_have_vgpu10(svga));
+      ret = end_query_vgpu10(svga, sq);
+      assert(ret == PIPE_OK);
+      break;
+   case SVGA_QUERY_NUM_DRAW_CALLS:
+      sq->end_count = svga->hud.num_draw_calls;
+      break;
+   case SVGA_QUERY_NUM_FALLBACKS:
+      sq->end_count = svga->hud.num_fallbacks;
+      break;
+   case SVGA_QUERY_NUM_FLUSHES:
+      sq->end_count = svga->hud.num_flushes;
+      break;
+   case SVGA_QUERY_NUM_VALIDATIONS:
+      sq->end_count = svga->hud.num_validations;
+      break;
+   case SVGA_QUERY_MAP_BUFFER_TIME:
+      sq->end_count = svga->hud.map_buffer_time;
+      break;
+   case SVGA_QUERY_NUM_BUFFERS_MAPPED:
+      sq->end_count = svga->hud.num_buffers_mapped;
+      break;
+   case SVGA_QUERY_NUM_TEXTURES_MAPPED:
+      sq->end_count = svga->hud.num_textures_mapped;
+      break;
+   case SVGA_QUERY_NUM_BYTES_UPLOADED:
+      sq->end_count = svga->hud.num_bytes_uploaded;
+      break;
+   case SVGA_QUERY_COMMAND_BUFFER_SIZE:
+      sq->end_count = svga->hud.command_buffer_size;
+      break;
+   case SVGA_QUERY_FLUSH_TIME:
+      sq->end_count = svga->hud.flush_time;
+      break;
+   case SVGA_QUERY_SURFACE_WRITE_FLUSHES:
+      sq->end_count = svga->hud.surface_write_flushes;
+      break;
+   case SVGA_QUERY_NUM_READBACKS:
+      sq->end_count = svga->hud.num_readbacks;
+      break;
+   case SVGA_QUERY_NUM_RESOURCE_UPDATES:
+      sq->end_count = svga->hud.num_resource_updates;
+      break;
+   case SVGA_QUERY_NUM_BUFFER_UPLOADS:
+      sq->end_count = svga->hud.num_buffer_uploads;
+      break;
+   case SVGA_QUERY_NUM_CONST_BUF_UPDATES:
+      sq->end_count = svga->hud.num_const_buf_updates;
+      break;
+   case SVGA_QUERY_NUM_CONST_UPDATES:
+      sq->end_count = svga->hud.num_const_updates;
+      break;
+   case SVGA_QUERY_MEMORY_USED:
+   case SVGA_QUERY_NUM_SHADERS:
+   case SVGA_QUERY_NUM_RESOURCES:
+   case SVGA_QUERY_NUM_STATE_OBJECTS:
+   case SVGA_QUERY_NUM_SURFACE_VIEWS:
+   case SVGA_QUERY_NUM_GENERATE_MIPMAP:
+   case SVGA_QUERY_NUM_FAILED_ALLOCATIONS:
+   case SVGA_QUERY_NUM_COMMANDS_PER_DRAW:
+      /* nothing */
+      break;
+   default:
+      assert(!"unexpected query type in svga_end_query()");
+   }
+   svga->sq[sq->type] = NULL;
+   return true;
+}
+
+
+static bool
+svga_get_query_result(struct pipe_context *pipe,
+                      struct pipe_query *q,
+                      bool wait,
+                      union pipe_query_result *vresult)
+{
+   struct svga_screen *svgascreen = svga_screen(pipe->screen);
+   struct svga_context *svga = svga_context(pipe);
+   struct svga_query *sq = svga_query(q);
+   uint64_t *result = (uint64_t *)vresult;
+   bool ret = true;
+
+   assert(sq);
+
+   SVGA_DBG(DEBUG_QUERY, "%s sq=0x%x id=%d wait: %d\n",
+            __FUNCTION__, sq, sq->id, wait);
+
+   switch (sq->type) {
+   case PIPE_QUERY_OCCLUSION_COUNTER:
+      if (svga_have_vgpu10(svga)) {
+         SVGADXOcclusionQueryResult occResult;
+         ret = get_query_result_vgpu10(svga, sq, wait,
+                                       (void *)&occResult, sizeof(occResult));
+         *result = (uint64_t)occResult.samplesRendered;
+      } else {
+         ret = get_query_result_vgpu9(svga, sq, wait, result);
+      }
+      break;
+   case PIPE_QUERY_OCCLUSION_PREDICATE:
+   case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: {
+      if (svga_have_vgpu10(svga)) {
+         SVGADXOcclusionPredicateQueryResult occResult;
+         ret = get_query_result_vgpu10(svga, sq, wait,
+                                       (void *)&occResult, sizeof(occResult));
+         vresult->b = occResult.anySamplesRendered != 0;
+      } else {
+         uint64_t count = 0;
+         ret = get_query_result_vgpu9(svga, sq, wait, &count);
+         vresult->b = count != 0;
+      }
+      break;
+   }
+   case PIPE_QUERY_SO_STATISTICS: {
+      SVGADXStreamOutStatisticsQueryResult sResult;
+      struct pipe_query_data_so_statistics *pResult =
+         (struct pipe_query_data_so_statistics *)vresult;
+
+      assert(svga_have_vgpu10(svga));
+      ret = get_query_result_vgpu10(svga, sq, wait,
+                                    (void *)&sResult, sizeof(sResult));
+      pResult->num_primitives_written = sResult.numPrimitivesWritten;
+      pResult->primitives_storage_needed = sResult.numPrimitivesRequired;
+      break;
+   }
+   case PIPE_QUERY_TIMESTAMP: {
+      SVGADXTimestampQueryResult sResult;
+
+      assert(svga_have_vgpu10(svga));
+      ret = get_query_result_vgpu10(svga, sq, wait,
+                                    (void *)&sResult, sizeof(sResult));
+      *result = (uint64_t)sResult.timestamp;
+      break;
+   }
+   case PIPE_QUERY_PRIMITIVES_GENERATED: {
+      SVGADXStreamOutStatisticsQueryResult sResult;
+
+      assert(svga_have_vgpu10(svga));
+      ret = get_query_result_vgpu10(svga, sq, wait,
+                                    (void *)&sResult, sizeof sResult);
+      *result = (uint64_t)sResult.numPrimitivesRequired;
+      break;
+   }
+   case PIPE_QUERY_PRIMITIVES_EMITTED: {
+      SVGADXStreamOutStatisticsQueryResult sResult;
+
+      assert(svga_have_vgpu10(svga));
+      ret = get_query_result_vgpu10(svga, sq, wait,
+                                    (void *)&sResult, sizeof sResult);
+      *result = (uint64_t)sResult.numPrimitivesWritten;
+      break;
+   }
+   /* These are per-frame counters */
+   case SVGA_QUERY_NUM_DRAW_CALLS:
+   case SVGA_QUERY_NUM_FALLBACKS:
+   case SVGA_QUERY_NUM_FLUSHES:
+   case SVGA_QUERY_NUM_VALIDATIONS:
+   case SVGA_QUERY_MAP_BUFFER_TIME:
+   case SVGA_QUERY_NUM_BUFFERS_MAPPED:
+   case SVGA_QUERY_NUM_TEXTURES_MAPPED:
+   case SVGA_QUERY_NUM_BYTES_UPLOADED:
+   case SVGA_QUERY_COMMAND_BUFFER_SIZE:
+   case SVGA_QUERY_FLUSH_TIME:
+   case SVGA_QUERY_SURFACE_WRITE_FLUSHES:
+   case SVGA_QUERY_NUM_READBACKS:
+   case SVGA_QUERY_NUM_RESOURCE_UPDATES:
+   case SVGA_QUERY_NUM_BUFFER_UPLOADS:
+   case SVGA_QUERY_NUM_CONST_BUF_UPDATES:
+   case SVGA_QUERY_NUM_CONST_UPDATES:
+      vresult->u64 = sq->end_count - sq->begin_count;
+      break;
+   /* These are running total counters */
+   case SVGA_QUERY_MEMORY_USED:
+      vresult->u64 = svgascreen->hud.total_resource_bytes;
+      break;
+   case SVGA_QUERY_NUM_SHADERS:
+      vresult->u64 = svga->hud.num_shaders;
+      break;
+   case SVGA_QUERY_NUM_RESOURCES:
+      vresult->u64 = svgascreen->hud.num_resources;
+      break;
+   case SVGA_QUERY_NUM_STATE_OBJECTS:
+      vresult->u64 = (svga->hud.num_blend_objects +
+                      svga->hud.num_depthstencil_objects +
+                      svga->hud.num_rasterizer_objects +
+                      svga->hud.num_sampler_objects +
+                      svga->hud.num_samplerview_objects +
+                      svga->hud.num_vertexelement_objects);
+      break;
+   case SVGA_QUERY_NUM_SURFACE_VIEWS:
+      vresult->u64 = svga->hud.num_surface_views;
+      break;
+   case SVGA_QUERY_NUM_GENERATE_MIPMAP:
+      vresult->u64 = svga->hud.num_generate_mipmap;
+      break;
+   case SVGA_QUERY_NUM_FAILED_ALLOCATIONS:
+      vresult->u64 = svgascreen->hud.num_failed_allocations;
+      break;
+   case SVGA_QUERY_NUM_COMMANDS_PER_DRAW:
+      vresult->f = (float) svga->swc->num_commands
+         / (float) svga->swc->num_draw_commands;
+      break;
+   default:
+      assert(!"unexpected query type in svga_get_query_result");
    }
-   
-   /* TODO: Delay flushing. We don't really need to flush here, just ensure 
-    * that there is one flush before svga_get_query_result attempts to get the
-    * result */
-   svga_context_flush(svga, NULL);
 
-   svga->sq = NULL;
+   SVGA_DBG(DEBUG_QUERY, "%s result %d\n", __FUNCTION__, *((uint64_t *)vresult));
+
+   return ret;
 }
 
-static boolean svga_get_query_result(struct pipe_context *pipe, 
-                                     struct pipe_query *q,
-                                     boolean wait,
-                                     uint64_t *result)
+static void
+svga_render_condition(struct pipe_context *pipe, struct pipe_query *q,
+                      bool condition, enum pipe_render_cond_flag mode)
 {
-   struct svga_context *svga = svga_context( pipe );
-   struct svga_screen *svgascreen = svga_screen( pipe->screen );
-   struct svga_winsys_screen *sws = svgascreen->sws;
-   struct svga_query *sq = svga_query( q );
-   SVGA3dQueryState state;
-   
-   SVGA_DBG(DEBUG_QUERY, "%s wait: %d\n", __FUNCTION__);
+   struct svga_context *svga = svga_context(pipe);
+   struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
+   struct svga_query *sq = svga_query(q);
+   SVGA3dQueryId queryId;
+   enum pipe_error ret;
 
-   /* The query status won't be updated by the host unless 
-    * SVGA_3D_CMD_WAIT_FOR_QUERY is emitted. Unfortunately this will cause a 
-    * synchronous wait on the host */
-   if(!sq->fence) {
-      enum pipe_error ret;
+   SVGA_DBG(DEBUG_QUERY, "%s\n", __FUNCTION__);
+
+   assert(svga_have_vgpu10(svga));
+   if (sq == NULL) {
+      queryId = SVGA3D_INVALID_ID;
+   }
+   else {
+      assert(sq->svga_type == SVGA3D_QUERYTYPE_OCCLUSION ||
+             sq->svga_type == SVGA3D_QUERYTYPE_OCCLUSIONPREDICATE);
 
-      ret = SVGA3D_WaitForQuery( svga->swc, sq->type, sq->hwbuf);
-      if(ret != PIPE_OK) {
+      if (sq->svga_type == SVGA3D_QUERYTYPE_OCCLUSION) {
+         assert(sq->predicate);
+         /**
+          * For conditional rendering, make sure to use the associated
+          * predicate query.
+          */
+         sq = svga_query(sq->predicate);
+      }
+      queryId = sq->id;
+
+      if ((mode == PIPE_RENDER_COND_WAIT ||
+           mode == PIPE_RENDER_COND_BY_REGION_WAIT) && sq->fence) {
+         sws->fence_finish(sws, sq->fence, PIPE_TIMEOUT_INFINITE,
+                           SVGA_FENCE_FLAG_QUERY);
+      }
+   }
+   /*
+    * if the kernel module doesn't support the predication command,
+    * we'll just render unconditionally.
+    * This is probably acceptable for the typical case of occlusion culling.
+    */
+   if (sws->have_set_predication_cmd) {
+      ret = SVGA3D_vgpu10_SetPredication(svga->swc, queryId,
+                                         (uint32) condition);
+      if (ret != PIPE_OK) {
          svga_context_flush(svga, NULL);
-         ret = SVGA3D_WaitForQuery( svga->swc, sq->type, sq->hwbuf);
-         assert(ret == PIPE_OK);
+         ret = SVGA3D_vgpu10_SetPredication(svga->swc, queryId,
+                                            (uint32) condition);
       }
-   
-      svga_context_flush(svga, &sq->fence);
-      
-      assert(sq->fence);
+      svga->pred.query_id = queryId;
+      svga->pred.cond = condition;
    }
 
-   state = sq->queryResult->state;
-   if(state == SVGA3D_QUERYSTATE_PENDING) {
-      if(!wait)
-         return FALSE;
-   
-      sws->fence_finish(sws, sq->fence, 0);
-      
-      state = sq->queryResult->state;
-   }
+   svga->render_condition = (sq != NULL);
+}
 
-   assert(state == SVGA3D_QUERYSTATE_SUCCEEDED || 
-          state == SVGA3D_QUERYSTATE_FAILED);
-   
-   *result = (uint64_t)sq->queryResult->result32;
 
-   SVGA_DBG(DEBUG_QUERY, "%s result %d\n", __FUNCTION__, (unsigned)*result);
+/*
+ * This function is a workaround because we lack the ability to query
+ * renderer's time synchornously.
+ */
+static uint64_t
+svga_get_timestamp(struct pipe_context *pipe)
+{
+   struct pipe_query *q = svga_create_query(pipe, PIPE_QUERY_TIMESTAMP, 0);
+   union pipe_query_result result;
+
+   svga_begin_query(pipe, q);
+   svga_end_query(pipe,q);
+   svga_get_query_result(pipe, q, TRUE, &result);
+   svga_destroy_query(pipe, q);
+
+   return result.u64;
+}
+
 
-   return TRUE;
+static void
+svga_set_active_query_state(struct pipe_context *pipe, bool enable)
+{
 }
 
 
+/**
+ * \brief Toggle conditional rendering if already enabled
+ *
+ * \param svga[in]  The svga context
+ * \param render_condition_enabled[in]  Whether to ignore requests to turn
+ * conditional rendering off
+ * \param on[in]  Whether to turn conditional rendering on or off
+ */
+void
+svga_toggle_render_condition(struct svga_context *svga,
+                             boolean render_condition_enabled,
+                             boolean on)
+{
+   SVGA3dQueryId query_id;
+   enum pipe_error ret;
+
+   if (render_condition_enabled ||
+       svga->pred.query_id == SVGA3D_INVALID_ID) {
+      return;
+   }
+
+   /*
+    * If we get here, it means that the system supports
+    * conditional rendering since svga->pred.query_id has already been
+    * modified for this context and thus support has already been
+    * verified.
+    */
+   query_id = on ? svga->pred.query_id : SVGA3D_INVALID_ID;
+
+   ret = SVGA3D_vgpu10_SetPredication(svga->swc, query_id,
+                                      (uint32) svga->pred.cond);
+   if (ret == PIPE_ERROR_OUT_OF_MEMORY) {
+      svga_context_flush(svga, NULL);
+      ret = SVGA3D_vgpu10_SetPredication(svga->swc, query_id,
+                                         (uint32) svga->pred.cond);
+      assert(ret == PIPE_OK);
+   }
+}
+
 
-void svga_init_query_functions( struct svga_context *svga )
+void
+svga_init_query_functions(struct svga_context *svga)
 {
    svga->pipe.create_query = svga_create_query;
    svga->pipe.destroy_query = svga_destroy_query;
    svga->pipe.begin_query = svga_begin_query;
    svga->pipe.end_query = svga_end_query;
    svga->pipe.get_query_result = svga_get_query_result;
+   svga->pipe.set_active_query_state = svga_set_active_query_state;
+   svga->pipe.render_condition = svga_render_condition;
+   svga->pipe.get_timestamp = svga_get_timestamp;
 }