anv: Put everything about queries in genX_query.c
authorJason Ekstrand <jason.ekstrand@intel.com>
Sat, 18 Feb 2017 23:58:40 +0000 (15:58 -0800)
committerJason Ekstrand <jason.ekstrand@intel.com>
Tue, 21 Feb 2017 20:26:35 +0000 (12:26 -0800)
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
src/intel/vulkan/Makefile.sources
src/intel/vulkan/anv_query.c [deleted file]
src/intel/vulkan/genX_cmd_buffer.c
src/intel/vulkan/genX_query.c [new file with mode: 0644]

index 0000b99f15da19c0199daf6f0906abff9e9b1e00..21c04cd9c46f5b4b0e7092000215d6d8f65f9104 100644 (file)
@@ -40,7 +40,6 @@ VULKAN_FILES := \
        anv_pipeline.c \
        anv_pipeline_cache.c \
        anv_private.h \
-       anv_query.c \
        anv_util.c \
        anv_wsi.c \
        vk_format_info.h
@@ -68,6 +67,7 @@ GEN7_FILES := \
        genX_blorp_exec.c \
        genX_gpu_memcpy.c \
        genX_pipeline.c \
+       genX_query.c \
        genX_state.c
 
 GEN75_FILES := \
@@ -76,6 +76,7 @@ GEN75_FILES := \
        genX_blorp_exec.c \
        genX_gpu_memcpy.c \
        genX_pipeline.c \
+       genX_query.c \
        genX_state.c
 
 GEN8_FILES := \
@@ -84,6 +85,7 @@ GEN8_FILES := \
        genX_blorp_exec.c \
        genX_gpu_memcpy.c \
        genX_pipeline.c \
+       genX_query.c \
        genX_state.c
 
 GEN9_FILES := \
@@ -92,4 +94,5 @@ GEN9_FILES := \
        genX_blorp_exec.c \
        genX_gpu_memcpy.c \
        genX_pipeline.c \
+       genX_query.c \
        genX_state.c
diff --git a/src/intel/vulkan/anv_query.c b/src/intel/vulkan/anv_query.c
deleted file mode 100644 (file)
index 6fe94b0..0000000
+++ /dev/null
@@ -1,171 +0,0 @@
-/*
- * Copyright © 2015 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include <assert.h>
-#include <stdbool.h>
-#include <string.h>
-#include <unistd.h>
-#include <fcntl.h>
-
-#include "anv_private.h"
-
-VkResult anv_CreateQueryPool(
-    VkDevice                                    _device,
-    const VkQueryPoolCreateInfo*                pCreateInfo,
-    const VkAllocationCallbacks*                pAllocator,
-    VkQueryPool*                                pQueryPool)
-{
-   ANV_FROM_HANDLE(anv_device, device, _device);
-   struct anv_query_pool *pool;
-   VkResult result;
-   uint32_t slot_size;
-   uint64_t size;
-
-   assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO);
-
-   switch (pCreateInfo->queryType) {
-   case VK_QUERY_TYPE_OCCLUSION:
-   case VK_QUERY_TYPE_TIMESTAMP:
-      break;
-   case VK_QUERY_TYPE_PIPELINE_STATISTICS:
-      return VK_ERROR_INCOMPATIBLE_DRIVER;
-   default:
-      assert(!"Invalid query type");
-   }
-
-   slot_size = sizeof(struct anv_query_pool_slot);
-   pool = vk_alloc2(&device->alloc, pAllocator, sizeof(*pool), 8,
-                     VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
-   if (pool == NULL)
-      return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
-
-   pool->type = pCreateInfo->queryType;
-   pool->slots = pCreateInfo->queryCount;
-
-   size = pCreateInfo->queryCount * slot_size;
-   result = anv_bo_init_new(&pool->bo, device, size);
-   if (result != VK_SUCCESS)
-      goto fail;
-
-   pool->bo.map = anv_gem_mmap(device, pool->bo.gem_handle, 0, size, 0);
-
-   *pQueryPool = anv_query_pool_to_handle(pool);
-
-   return VK_SUCCESS;
-
- fail:
-   vk_free2(&device->alloc, pAllocator, pool);
-
-   return result;
-}
-
-void anv_DestroyQueryPool(
-    VkDevice                                    _device,
-    VkQueryPool                                 _pool,
-    const VkAllocationCallbacks*                pAllocator)
-{
-   ANV_FROM_HANDLE(anv_device, device, _device);
-   ANV_FROM_HANDLE(anv_query_pool, pool, _pool);
-
-   if (!pool)
-      return;
-
-   anv_gem_munmap(pool->bo.map, pool->bo.size);
-   anv_gem_close(device, pool->bo.gem_handle);
-   vk_free2(&device->alloc, pAllocator, pool);
-}
-
-VkResult anv_GetQueryPoolResults(
-    VkDevice                                    _device,
-    VkQueryPool                                 queryPool,
-    uint32_t                                    firstQuery,
-    uint32_t                                    queryCount,
-    size_t                                      dataSize,
-    void*                                       pData,
-    VkDeviceSize                                stride,
-    VkQueryResultFlags                          flags)
-{
-   ANV_FROM_HANDLE(anv_device, device, _device);
-   ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
-   int64_t timeout = INT64_MAX;
-   uint64_t result;
-   int ret;
-
-   assert(pool->type == VK_QUERY_TYPE_OCCLUSION ||
-          pool->type == VK_QUERY_TYPE_TIMESTAMP);
-
-   if (pData == NULL)
-      return VK_SUCCESS;
-
-   if (flags & VK_QUERY_RESULT_WAIT_BIT) {
-      ret = anv_gem_wait(device, pool->bo.gem_handle, &timeout);
-      if (ret == -1) {
-         /* We don't know the real error. */
-         return vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY,
-                          "gem_wait failed %m");
-      }
-   }
-
-   void *data_end = pData + dataSize;
-   struct anv_query_pool_slot *slot = pool->bo.map;
-
-   if (!device->info.has_llc)
-      anv_invalidate_range(slot, MIN2(queryCount * sizeof(*slot), pool->bo.size));
-
-   for (uint32_t i = 0; i < queryCount; i++) {
-      switch (pool->type) {
-      case VK_QUERY_TYPE_OCCLUSION: {
-         result = slot[firstQuery + i].end - slot[firstQuery + i].begin;
-         break;
-      }
-      case VK_QUERY_TYPE_PIPELINE_STATISTICS:
-         unreachable("pipeline stats not supported");
-      case VK_QUERY_TYPE_TIMESTAMP: {
-         result = slot[firstQuery + i].begin;
-         break;
-      }
-      default:
-         unreachable("invalid pool type");
-      }
-
-      if (flags & VK_QUERY_RESULT_64_BIT) {
-         uint64_t *dst = pData;
-         dst[0] = result;
-         if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)
-            dst[1] = slot[firstQuery + i].available;
-      } else {
-         uint32_t *dst = pData;
-         if (result > UINT32_MAX)
-            result = UINT32_MAX;
-         dst[0] = result;
-         if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)
-            dst[1] = slot[firstQuery + i].available;
-      }
-
-      pData += stride;
-      if (pData >= data_end)
-         break;
-   }
-
-   return VK_SUCCESS;
-}
index 12ff410d6544ab4f664def8107a24d482fda55ec..f45b714dfb2f3d941b28d41d222c1a5820a567fb 100644 (file)
@@ -2482,309 +2482,3 @@ void genX(CmdEndRenderPass)(
    anv_dump_add_framebuffer(cmd_buffer, cmd_buffer->state.framebuffer);
 #endif
 }
-
-static void
-emit_ps_depth_count(struct anv_cmd_buffer *cmd_buffer,
-                    struct anv_bo *bo, uint32_t offset)
-{
-   anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
-      pc.DestinationAddressType  = DAT_PPGTT;
-      pc.PostSyncOperation       = WritePSDepthCount;
-      pc.DepthStallEnable        = true;
-      pc.Address                 = (struct anv_address) { bo, offset };
-
-      if (GEN_GEN == 9 && cmd_buffer->device->info.gt == 4)
-         pc.CommandStreamerStallEnable = true;
-   }
-}
-
-static void
-emit_query_availability(struct anv_cmd_buffer *cmd_buffer,
-                        struct anv_bo *bo, uint32_t offset)
-{
-   anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
-      pc.DestinationAddressType  = DAT_PPGTT;
-      pc.PostSyncOperation       = WriteImmediateData;
-      pc.Address                 = (struct anv_address) { bo, offset };
-      pc.ImmediateData           = 1;
-   }
-}
-
-void genX(CmdResetQueryPool)(
-    VkCommandBuffer                             commandBuffer,
-    VkQueryPool                                 queryPool,
-    uint32_t                                    firstQuery,
-    uint32_t                                    queryCount)
-{
-   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
-   ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
-
-   for (uint32_t i = 0; i < queryCount; i++) {
-      switch (pool->type) {
-      case VK_QUERY_TYPE_OCCLUSION:
-      case VK_QUERY_TYPE_TIMESTAMP: {
-         anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_DATA_IMM), sdm) {
-            sdm.Address = (struct anv_address) {
-               .bo = &pool->bo,
-               .offset = (firstQuery + i) * sizeof(struct anv_query_pool_slot) +
-                         offsetof(struct anv_query_pool_slot, available),
-            };
-            sdm.DataDWord0 = 0;
-            sdm.DataDWord1 = 0;
-         }
-         break;
-      }
-      default:
-         assert(!"Invalid query type");
-      }
-   }
-}
-
-void genX(CmdBeginQuery)(
-    VkCommandBuffer                             commandBuffer,
-    VkQueryPool                                 queryPool,
-    uint32_t                                    query,
-    VkQueryControlFlags                         flags)
-{
-   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
-   ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
-
-   /* Workaround: When meta uses the pipeline with the VS disabled, it seems
-    * that the pipelining of the depth write breaks. What we see is that
-    * samples from the render pass clear leaks into the first query
-    * immediately after the clear. Doing a pipecontrol with a post-sync
-    * operation and DepthStallEnable seems to work around the issue.
-    */
-   if (cmd_buffer->state.need_query_wa) {
-      cmd_buffer->state.need_query_wa = false;
-      anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
-         pc.DepthCacheFlushEnable   = true;
-         pc.DepthStallEnable        = true;
-      }
-   }
-
-   switch (pool->type) {
-   case VK_QUERY_TYPE_OCCLUSION:
-      emit_ps_depth_count(cmd_buffer, &pool->bo,
-                          query * sizeof(struct anv_query_pool_slot));
-      break;
-
-   case VK_QUERY_TYPE_PIPELINE_STATISTICS:
-   default:
-      unreachable("");
-   }
-}
-
-void genX(CmdEndQuery)(
-    VkCommandBuffer                             commandBuffer,
-    VkQueryPool                                 queryPool,
-    uint32_t                                    query)
-{
-   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
-   ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
-
-   switch (pool->type) {
-   case VK_QUERY_TYPE_OCCLUSION:
-      emit_ps_depth_count(cmd_buffer, &pool->bo,
-                          query * sizeof(struct anv_query_pool_slot) + 8);
-
-      emit_query_availability(cmd_buffer, &pool->bo,
-                              query * sizeof(struct anv_query_pool_slot) + 16);
-      break;
-
-   case VK_QUERY_TYPE_PIPELINE_STATISTICS:
-   default:
-      unreachable("");
-   }
-}
-
-#define TIMESTAMP 0x2358
-
-void genX(CmdWriteTimestamp)(
-    VkCommandBuffer                             commandBuffer,
-    VkPipelineStageFlagBits                     pipelineStage,
-    VkQueryPool                                 queryPool,
-    uint32_t                                    query)
-{
-   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
-   ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
-   uint32_t offset = query * sizeof(struct anv_query_pool_slot);
-
-   assert(pool->type == VK_QUERY_TYPE_TIMESTAMP);
-
-   switch (pipelineStage) {
-   case VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT:
-      anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM), srm) {
-         srm.RegisterAddress  = TIMESTAMP;
-         srm.MemoryAddress    = (struct anv_address) { &pool->bo, offset };
-      }
-      anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM), srm) {
-         srm.RegisterAddress  = TIMESTAMP + 4;
-         srm.MemoryAddress    = (struct anv_address) { &pool->bo, offset + 4 };
-      }
-      break;
-
-   default:
-      /* Everything else is bottom-of-pipe */
-      anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
-         pc.DestinationAddressType  = DAT_PPGTT;
-         pc.PostSyncOperation       = WriteTimestamp;
-         pc.Address = (struct anv_address) { &pool->bo, offset };
-
-         if (GEN_GEN == 9 && cmd_buffer->device->info.gt == 4)
-            pc.CommandStreamerStallEnable = true;
-      }
-      break;
-   }
-
-   emit_query_availability(cmd_buffer, &pool->bo, query + 16);
-}
-
-#if GEN_GEN > 7 || GEN_IS_HASWELL
-
-#define alu_opcode(v)   __gen_uint((v),  20, 31)
-#define alu_operand1(v) __gen_uint((v),  10, 19)
-#define alu_operand2(v) __gen_uint((v),   0,  9)
-#define alu(opcode, operand1, operand2) \
-   alu_opcode(opcode) | alu_operand1(operand1) | alu_operand2(operand2)
-
-#define OPCODE_NOOP      0x000
-#define OPCODE_LOAD      0x080
-#define OPCODE_LOADINV   0x480
-#define OPCODE_LOAD0     0x081
-#define OPCODE_LOAD1     0x481
-#define OPCODE_ADD       0x100
-#define OPCODE_SUB       0x101
-#define OPCODE_AND       0x102
-#define OPCODE_OR        0x103
-#define OPCODE_XOR       0x104
-#define OPCODE_STORE     0x180
-#define OPCODE_STOREINV  0x580
-
-#define OPERAND_R0   0x00
-#define OPERAND_R1   0x01
-#define OPERAND_R2   0x02
-#define OPERAND_R3   0x03
-#define OPERAND_R4   0x04
-#define OPERAND_SRCA 0x20
-#define OPERAND_SRCB 0x21
-#define OPERAND_ACCU 0x31
-#define OPERAND_ZF   0x32
-#define OPERAND_CF   0x33
-
-#define CS_GPR(n) (0x2600 + (n) * 8)
-
-static void
-emit_load_alu_reg_u64(struct anv_batch *batch, uint32_t reg,
-                      struct anv_bo *bo, uint32_t offset)
-{
-   anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) {
-      lrm.RegisterAddress  = reg,
-      lrm.MemoryAddress    = (struct anv_address) { bo, offset };
-   }
-   anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) {
-      lrm.RegisterAddress  = reg + 4;
-      lrm.MemoryAddress    = (struct anv_address) { bo, offset + 4 };
-   }
-}
-
-static void
-store_query_result(struct anv_batch *batch, uint32_t reg,
-                   struct anv_bo *bo, uint32_t offset, VkQueryResultFlags flags)
-{
-   anv_batch_emit(batch, GENX(MI_STORE_REGISTER_MEM), srm) {
-      srm.RegisterAddress  = reg;
-      srm.MemoryAddress    = (struct anv_address) { bo, offset };
-   }
-
-   if (flags & VK_QUERY_RESULT_64_BIT) {
-      anv_batch_emit(batch, GENX(MI_STORE_REGISTER_MEM), srm) {
-         srm.RegisterAddress  = reg + 4;
-         srm.MemoryAddress    = (struct anv_address) { bo, offset + 4 };
-      }
-   }
-}
-
-void genX(CmdCopyQueryPoolResults)(
-    VkCommandBuffer                             commandBuffer,
-    VkQueryPool                                 queryPool,
-    uint32_t                                    firstQuery,
-    uint32_t                                    queryCount,
-    VkBuffer                                    destBuffer,
-    VkDeviceSize                                destOffset,
-    VkDeviceSize                                destStride,
-    VkQueryResultFlags                          flags)
-{
-   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
-   ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
-   ANV_FROM_HANDLE(anv_buffer, buffer, destBuffer);
-   uint32_t slot_offset, dst_offset;
-
-   if (flags & VK_QUERY_RESULT_WAIT_BIT) {
-      anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
-         pc.CommandStreamerStallEnable = true;
-         pc.StallAtPixelScoreboard     = true;
-      }
-   }
-
-   dst_offset = buffer->offset + destOffset;
-   for (uint32_t i = 0; i < queryCount; i++) {
-
-      slot_offset = (firstQuery + i) * sizeof(struct anv_query_pool_slot);
-      switch (pool->type) {
-      case VK_QUERY_TYPE_OCCLUSION:
-         emit_load_alu_reg_u64(&cmd_buffer->batch,
-                               CS_GPR(0), &pool->bo, slot_offset);
-         emit_load_alu_reg_u64(&cmd_buffer->batch,
-                               CS_GPR(1), &pool->bo, slot_offset + 8);
-
-         /* FIXME: We need to clamp the result for 32 bit. */
-
-         uint32_t *dw = anv_batch_emitn(&cmd_buffer->batch, 5, GENX(MI_MATH));
-         dw[1] = alu(OPCODE_LOAD, OPERAND_SRCA, OPERAND_R1);
-         dw[2] = alu(OPCODE_LOAD, OPERAND_SRCB, OPERAND_R0);
-         dw[3] = alu(OPCODE_SUB, 0, 0);
-         dw[4] = alu(OPCODE_STORE, OPERAND_R2, OPERAND_ACCU);
-         break;
-
-      case VK_QUERY_TYPE_TIMESTAMP:
-         emit_load_alu_reg_u64(&cmd_buffer->batch,
-                               CS_GPR(2), &pool->bo, slot_offset);
-         break;
-
-      default:
-         unreachable("unhandled query type");
-      }
-
-      store_query_result(&cmd_buffer->batch,
-                         CS_GPR(2), buffer->bo, dst_offset, flags);
-
-      if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) {
-         emit_load_alu_reg_u64(&cmd_buffer->batch, CS_GPR(0),
-                               &pool->bo, slot_offset + 16);
-         if (flags & VK_QUERY_RESULT_64_BIT)
-            store_query_result(&cmd_buffer->batch,
-                               CS_GPR(0), buffer->bo, dst_offset + 8, flags);
-         else
-            store_query_result(&cmd_buffer->batch,
-                               CS_GPR(0), buffer->bo, dst_offset + 4, flags);
-      }
-
-      dst_offset += destStride;
-   }
-}
-
-#else
-void genX(CmdCopyQueryPoolResults)(
-    VkCommandBuffer                             commandBuffer,
-    VkQueryPool                                 queryPool,
-    uint32_t                                    firstQuery,
-    uint32_t                                    queryCount,
-    VkBuffer                                    destBuffer,
-    VkDeviceSize                                destOffset,
-    VkDeviceSize                                destStride,
-    VkQueryResultFlags                          flags)
-{
-   anv_finishme("Queries not yet supported on Ivy Bridge");
-}
-#endif
diff --git a/src/intel/vulkan/genX_query.c b/src/intel/vulkan/genX_query.c
new file mode 100644 (file)
index 0000000..830f867
--- /dev/null
@@ -0,0 +1,480 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <assert.h>
+#include <stdbool.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+
+#include "anv_private.h"
+
+#include "genxml/gen_macros.h"
+#include "genxml/genX_pack.h"
+
+VkResult genX(CreateQueryPool)(
+    VkDevice                                    _device,
+    const VkQueryPoolCreateInfo*                pCreateInfo,
+    const VkAllocationCallbacks*                pAllocator,
+    VkQueryPool*                                pQueryPool)
+{
+   ANV_FROM_HANDLE(anv_device, device, _device);
+   struct anv_query_pool *pool;
+   VkResult result;
+   uint32_t slot_size;
+   uint64_t size;
+
+   assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO);
+
+   switch (pCreateInfo->queryType) {
+   case VK_QUERY_TYPE_OCCLUSION:
+   case VK_QUERY_TYPE_TIMESTAMP:
+      break;
+   case VK_QUERY_TYPE_PIPELINE_STATISTICS:
+      return VK_ERROR_INCOMPATIBLE_DRIVER;
+   default:
+      assert(!"Invalid query type");
+   }
+
+   slot_size = sizeof(struct anv_query_pool_slot);
+   pool = vk_alloc2(&device->alloc, pAllocator, sizeof(*pool), 8,
+                     VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+   if (pool == NULL)
+      return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+
+   pool->type = pCreateInfo->queryType;
+   pool->slots = pCreateInfo->queryCount;
+
+   size = pCreateInfo->queryCount * slot_size;
+   result = anv_bo_init_new(&pool->bo, device, size);
+   if (result != VK_SUCCESS)
+      goto fail;
+
+   pool->bo.map = anv_gem_mmap(device, pool->bo.gem_handle, 0, size, 0);
+
+   *pQueryPool = anv_query_pool_to_handle(pool);
+
+   return VK_SUCCESS;
+
+ fail:
+   vk_free2(&device->alloc, pAllocator, pool);
+
+   return result;
+}
+
+void genX(DestroyQueryPool)(
+    VkDevice                                    _device,
+    VkQueryPool                                 _pool,
+    const VkAllocationCallbacks*                pAllocator)
+{
+   ANV_FROM_HANDLE(anv_device, device, _device);
+   ANV_FROM_HANDLE(anv_query_pool, pool, _pool);
+
+   if (!pool)
+      return;
+
+   anv_gem_munmap(pool->bo.map, pool->bo.size);
+   anv_gem_close(device, pool->bo.gem_handle);
+   vk_free2(&device->alloc, pAllocator, pool);
+}
+
+VkResult genX(GetQueryPoolResults)(
+    VkDevice                                    _device,
+    VkQueryPool                                 queryPool,
+    uint32_t                                    firstQuery,
+    uint32_t                                    queryCount,
+    size_t                                      dataSize,
+    void*                                       pData,
+    VkDeviceSize                                stride,
+    VkQueryResultFlags                          flags)
+{
+   ANV_FROM_HANDLE(anv_device, device, _device);
+   ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
+   int64_t timeout = INT64_MAX;
+   uint64_t result;
+   int ret;
+
+   assert(pool->type == VK_QUERY_TYPE_OCCLUSION ||
+          pool->type == VK_QUERY_TYPE_TIMESTAMP);
+
+   if (pData == NULL)
+      return VK_SUCCESS;
+
+   if (flags & VK_QUERY_RESULT_WAIT_BIT) {
+      ret = anv_gem_wait(device, pool->bo.gem_handle, &timeout);
+      if (ret == -1) {
+         /* We don't know the real error. */
+         return vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY,
+                          "gem_wait failed %m");
+      }
+   }
+
+   void *data_end = pData + dataSize;
+   struct anv_query_pool_slot *slot = pool->bo.map;
+
+   if (!device->info.has_llc)
+      anv_invalidate_range(slot, MIN2(queryCount * sizeof(*slot), pool->bo.size));
+
+   for (uint32_t i = 0; i < queryCount; i++) {
+      switch (pool->type) {
+      case VK_QUERY_TYPE_OCCLUSION: {
+         result = slot[firstQuery + i].end - slot[firstQuery + i].begin;
+         break;
+      }
+      case VK_QUERY_TYPE_PIPELINE_STATISTICS:
+         unreachable("pipeline stats not supported");
+      case VK_QUERY_TYPE_TIMESTAMP: {
+         result = slot[firstQuery + i].begin;
+         break;
+      }
+      default:
+         unreachable("invalid pool type");
+      }
+
+      if (flags & VK_QUERY_RESULT_64_BIT) {
+         uint64_t *dst = pData;
+         dst[0] = result;
+         if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)
+            dst[1] = slot[firstQuery + i].available;
+      } else {
+         uint32_t *dst = pData;
+         if (result > UINT32_MAX)
+            result = UINT32_MAX;
+         dst[0] = result;
+         if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)
+            dst[1] = slot[firstQuery + i].available;
+      }
+
+      pData += stride;
+      if (pData >= data_end)
+         break;
+   }
+
+   return VK_SUCCESS;
+}
+
+static void
+emit_ps_depth_count(struct anv_cmd_buffer *cmd_buffer,
+                    struct anv_bo *bo, uint32_t offset)
+{
+   anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
+      pc.DestinationAddressType  = DAT_PPGTT;
+      pc.PostSyncOperation       = WritePSDepthCount;
+      pc.DepthStallEnable        = true;
+      pc.Address                 = (struct anv_address) { bo, offset };
+
+      if (GEN_GEN == 9 && cmd_buffer->device->info.gt == 4)
+         pc.CommandStreamerStallEnable = true;
+   }
+}
+
+static void
+emit_query_availability(struct anv_cmd_buffer *cmd_buffer,
+                        struct anv_bo *bo, uint32_t offset)
+{
+   anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
+      pc.DestinationAddressType  = DAT_PPGTT;
+      pc.PostSyncOperation       = WriteImmediateData;
+      pc.Address                 = (struct anv_address) { bo, offset };
+      pc.ImmediateData           = 1;
+   }
+}
+
+void genX(CmdResetQueryPool)(
+    VkCommandBuffer                             commandBuffer,
+    VkQueryPool                                 queryPool,
+    uint32_t                                    firstQuery,
+    uint32_t                                    queryCount)
+{
+   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
+   ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
+
+   for (uint32_t i = 0; i < queryCount; i++) {
+      switch (pool->type) {
+      case VK_QUERY_TYPE_OCCLUSION:
+      case VK_QUERY_TYPE_TIMESTAMP: {
+         anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_DATA_IMM), sdm) {
+            sdm.Address = (struct anv_address) {
+               .bo = &pool->bo,
+               .offset = (firstQuery + i) * sizeof(struct anv_query_pool_slot) +
+                         offsetof(struct anv_query_pool_slot, available),
+            };
+            sdm.DataDWord0 = 0;
+            sdm.DataDWord1 = 0;
+         }
+         break;
+      }
+      default:
+         assert(!"Invalid query type");
+      }
+   }
+}
+
+void genX(CmdBeginQuery)(
+    VkCommandBuffer                             commandBuffer,
+    VkQueryPool                                 queryPool,
+    uint32_t                                    query,
+    VkQueryControlFlags                         flags)
+{
+   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
+   ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
+
+   /* Workaround: When meta uses the pipeline with the VS disabled, it seems
+    * that the pipelining of the depth write breaks. What we see is that
+    * samples from the render pass clear leaks into the first query
+    * immediately after the clear. Doing a pipecontrol with a post-sync
+    * operation and DepthStallEnable seems to work around the issue.
+    */
+   if (cmd_buffer->state.need_query_wa) {
+      cmd_buffer->state.need_query_wa = false;
+      anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
+         pc.DepthCacheFlushEnable   = true;
+         pc.DepthStallEnable        = true;
+      }
+   }
+
+   switch (pool->type) {
+   case VK_QUERY_TYPE_OCCLUSION:
+      emit_ps_depth_count(cmd_buffer, &pool->bo,
+                          query * sizeof(struct anv_query_pool_slot));
+      break;
+
+   case VK_QUERY_TYPE_PIPELINE_STATISTICS:
+   default:
+      unreachable("");
+   }
+}
+
+void genX(CmdEndQuery)(
+    VkCommandBuffer                             commandBuffer,
+    VkQueryPool                                 queryPool,
+    uint32_t                                    query)
+{
+   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
+   ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
+
+   switch (pool->type) {
+   case VK_QUERY_TYPE_OCCLUSION:
+      emit_ps_depth_count(cmd_buffer, &pool->bo,
+                          query * sizeof(struct anv_query_pool_slot) + 8);
+
+      emit_query_availability(cmd_buffer, &pool->bo,
+                              query * sizeof(struct anv_query_pool_slot) + 16);
+      break;
+
+   case VK_QUERY_TYPE_PIPELINE_STATISTICS:
+   default:
+      unreachable("");
+   }
+}
+
+#define TIMESTAMP 0x2358
+
+void genX(CmdWriteTimestamp)(
+    VkCommandBuffer                             commandBuffer,
+    VkPipelineStageFlagBits                     pipelineStage,
+    VkQueryPool                                 queryPool,
+    uint32_t                                    query)
+{
+   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
+   ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
+   uint32_t offset = query * sizeof(struct anv_query_pool_slot);
+
+   assert(pool->type == VK_QUERY_TYPE_TIMESTAMP);
+
+   switch (pipelineStage) {
+   case VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT:
+      anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM), srm) {
+         srm.RegisterAddress  = TIMESTAMP;
+         srm.MemoryAddress    = (struct anv_address) { &pool->bo, offset };
+      }
+      anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM), srm) {
+         srm.RegisterAddress  = TIMESTAMP + 4;
+         srm.MemoryAddress    = (struct anv_address) { &pool->bo, offset + 4 };
+      }
+      break;
+
+   default:
+      /* Everything else is bottom-of-pipe */
+      anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
+         pc.DestinationAddressType  = DAT_PPGTT;
+         pc.PostSyncOperation       = WriteTimestamp;
+         pc.Address = (struct anv_address) { &pool->bo, offset };
+
+         if (GEN_GEN == 9 && cmd_buffer->device->info.gt == 4)
+            pc.CommandStreamerStallEnable = true;
+      }
+      break;
+   }
+
+   emit_query_availability(cmd_buffer, &pool->bo, query + 16);
+}
+
+#if GEN_GEN > 7 || GEN_IS_HASWELL
+
+#define alu_opcode(v)   __gen_uint((v),  20, 31)
+#define alu_operand1(v) __gen_uint((v),  10, 19)
+#define alu_operand2(v) __gen_uint((v),   0,  9)
+#define alu(opcode, operand1, operand2) \
+   alu_opcode(opcode) | alu_operand1(operand1) | alu_operand2(operand2)
+
+#define OPCODE_NOOP      0x000
+#define OPCODE_LOAD      0x080
+#define OPCODE_LOADINV   0x480
+#define OPCODE_LOAD0     0x081
+#define OPCODE_LOAD1     0x481
+#define OPCODE_ADD       0x100
+#define OPCODE_SUB       0x101
+#define OPCODE_AND       0x102
+#define OPCODE_OR        0x103
+#define OPCODE_XOR       0x104
+#define OPCODE_STORE     0x180
+#define OPCODE_STOREINV  0x580
+
+#define OPERAND_R0   0x00
+#define OPERAND_R1   0x01
+#define OPERAND_R2   0x02
+#define OPERAND_R3   0x03
+#define OPERAND_R4   0x04
+#define OPERAND_SRCA 0x20
+#define OPERAND_SRCB 0x21
+#define OPERAND_ACCU 0x31
+#define OPERAND_ZF   0x32
+#define OPERAND_CF   0x33
+
+#define CS_GPR(n) (0x2600 + (n) * 8)
+
+static void
+emit_load_alu_reg_u64(struct anv_batch *batch, uint32_t reg,
+                      struct anv_bo *bo, uint32_t offset)
+{
+   anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) {
+      lrm.RegisterAddress  = reg,
+      lrm.MemoryAddress    = (struct anv_address) { bo, offset };
+   }
+   anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) {
+      lrm.RegisterAddress  = reg + 4;
+      lrm.MemoryAddress    = (struct anv_address) { bo, offset + 4 };
+   }
+}
+
+static void
+store_query_result(struct anv_batch *batch, uint32_t reg,
+                   struct anv_bo *bo, uint32_t offset, VkQueryResultFlags flags)
+{
+   anv_batch_emit(batch, GENX(MI_STORE_REGISTER_MEM), srm) {
+      srm.RegisterAddress  = reg;
+      srm.MemoryAddress    = (struct anv_address) { bo, offset };
+   }
+
+   if (flags & VK_QUERY_RESULT_64_BIT) {
+      anv_batch_emit(batch, GENX(MI_STORE_REGISTER_MEM), srm) {
+         srm.RegisterAddress  = reg + 4;
+         srm.MemoryAddress    = (struct anv_address) { bo, offset + 4 };
+      }
+   }
+}
+
+void genX(CmdCopyQueryPoolResults)(
+    VkCommandBuffer                             commandBuffer,
+    VkQueryPool                                 queryPool,
+    uint32_t                                    firstQuery,
+    uint32_t                                    queryCount,
+    VkBuffer                                    destBuffer,
+    VkDeviceSize                                destOffset,
+    VkDeviceSize                                destStride,
+    VkQueryResultFlags                          flags)
+{
+   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
+   ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
+   ANV_FROM_HANDLE(anv_buffer, buffer, destBuffer);
+   uint32_t slot_offset, dst_offset;
+
+   if (flags & VK_QUERY_RESULT_WAIT_BIT) {
+      anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
+         pc.CommandStreamerStallEnable = true;
+         pc.StallAtPixelScoreboard     = true;
+      }
+   }
+
+   dst_offset = buffer->offset + destOffset;
+   for (uint32_t i = 0; i < queryCount; i++) {
+
+      slot_offset = (firstQuery + i) * sizeof(struct anv_query_pool_slot);
+      switch (pool->type) {
+      case VK_QUERY_TYPE_OCCLUSION:
+         emit_load_alu_reg_u64(&cmd_buffer->batch,
+                               CS_GPR(0), &pool->bo, slot_offset);
+         emit_load_alu_reg_u64(&cmd_buffer->batch,
+                               CS_GPR(1), &pool->bo, slot_offset + 8);
+
+         /* FIXME: We need to clamp the result for 32 bit. */
+
+         uint32_t *dw = anv_batch_emitn(&cmd_buffer->batch, 5, GENX(MI_MATH));
+         dw[1] = alu(OPCODE_LOAD, OPERAND_SRCA, OPERAND_R1);
+         dw[2] = alu(OPCODE_LOAD, OPERAND_SRCB, OPERAND_R0);
+         dw[3] = alu(OPCODE_SUB, 0, 0);
+         dw[4] = alu(OPCODE_STORE, OPERAND_R2, OPERAND_ACCU);
+         break;
+
+      case VK_QUERY_TYPE_TIMESTAMP:
+         emit_load_alu_reg_u64(&cmd_buffer->batch,
+                               CS_GPR(2), &pool->bo, slot_offset);
+         break;
+
+      default:
+         unreachable("unhandled query type");
+      }
+
+      store_query_result(&cmd_buffer->batch,
+                         CS_GPR(2), buffer->bo, dst_offset, flags);
+
+      if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) {
+         emit_load_alu_reg_u64(&cmd_buffer->batch, CS_GPR(0),
+                               &pool->bo, slot_offset + 16);
+         if (flags & VK_QUERY_RESULT_64_BIT)
+            store_query_result(&cmd_buffer->batch,
+                               CS_GPR(0), buffer->bo, dst_offset + 8, flags);
+         else
+            store_query_result(&cmd_buffer->batch,
+                               CS_GPR(0), buffer->bo, dst_offset + 4, flags);
+      }
+
+      dst_offset += destStride;
+   }
+}
+
+#else
+void genX(CmdCopyQueryPoolResults)(
+    VkCommandBuffer                             commandBuffer,
+    VkQueryPool                                 queryPool,
+    uint32_t                                    firstQuery,
+    uint32_t                                    queryCount,
+    VkBuffer                                    destBuffer,
+    VkDeviceSize                                destOffset,
+    VkDeviceSize                                destStride,
+    VkQueryResultFlags                          flags)
+{
+   anv_finishme("Queries not yet supported on Ivy Bridge");
+}
+#endif