vk: Make occlusion query work, both copy and get functions
authorKristian Høgsberg <kristian.h.kristensen@intel.com>
Tue, 19 May 2015 00:03:58 +0000 (17:03 -0700)
committerKristian Høgsberg <kristian.h.kristensen@intel.com>
Tue, 19 May 2015 03:52:43 +0000 (20:52 -0700)
src/vulkan/device.c
src/vulkan/private.h

index b3fa69015c15263bca86e221d29a4726dcfc02d4..5cefc0ade6c09142e31872c848f10340cf736cef 100644 (file)
@@ -956,6 +956,19 @@ anv_fence_destructor(struct anv_device *   device,
    return VK_SUCCESS;
 }
 
+static VkResult
+anv_query_pool_destructor(struct anv_device *   device,
+                          VkObject              object)
+{
+   struct anv_query_pool *pool = (struct anv_query_pool *) object;
+
+   anv_gem_munmap(pool->bo.map, pool->bo.size);
+   anv_gem_close(device, pool->bo.gem_handle);
+   anv_device_free(device, pool);
+
+   return VK_SUCCESS;
+}
+
 static VkResult (*anv_object_destructors[])(struct anv_device *device,
                                             VkObject object) = {
    [VK_OBJECT_TYPE_INSTANCE] =        anv_instance_destructor,
@@ -968,7 +981,8 @@ static VkResult (*anv_object_destructors[])(struct anv_device *device,
    [VK_OBJECT_TYPE_BUFFER] =          anv_free_destructor,
    [VK_OBJECT_TYPE_IMAGE] =           anv_free_destructor,
    [VK_OBJECT_TYPE_RENDER_PASS] =     anv_free_destructor,
-   [VK_OBJECT_TYPE_FENCE] =           anv_fence_destructor
+   [VK_OBJECT_TYPE_FENCE] =           anv_fence_destructor,
+   [VK_OBJECT_TYPE_QUERY_POOL] =      anv_query_pool_destructor
 };
 
 VkResult anv_DestroyObject(
@@ -1299,12 +1313,6 @@ VkResult anv_ResetEvent(
 
 // Query functions
 
-struct anv_query_pool {
-   VkQueryType                                 type;
-   uint32_t                                    slots;
-   struct anv_bo bo;
-};
-
 VkResult anv_CreateQueryPool(
     VkDevice                                    _device,
     const VkQueryPoolCreateInfo*                pCreateInfo,
@@ -1313,19 +1321,32 @@ VkResult anv_CreateQueryPool(
    struct anv_device *device = (struct anv_device *) _device;
    struct anv_query_pool *pool;
    VkResult result;
+   size_t size;
 
    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO);
    
+   switch (pCreateInfo->queryType) {
+   case VK_QUERY_TYPE_OCCLUSION:
+      break;
+   case VK_QUERY_TYPE_PIPELINE_STATISTICS:
+      return VK_UNSUPPORTED;
+   default:
+      unreachable("");
+   }
+
    pool = anv_device_alloc(device, sizeof(*pool), 8,
                             VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
    if (pool == NULL)
       return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
 
    pool->type = pCreateInfo->queryType;
-   result = anv_bo_init_new(&pool->bo, device, pCreateInfo->slots * 16);
+   size = pCreateInfo->slots * sizeof(struct anv_query_pool_slot);
+   result = anv_bo_init_new(&pool->bo, device, size);
    if (result != VK_SUCCESS)
       goto fail;
 
+   pool->bo.map = anv_gem_mmap(device, pool->bo.gem_handle, 0, size);
+
    *pQueryPool = (VkQueryPool) pool;
 
    return VK_SUCCESS;
@@ -1337,7 +1358,7 @@ VkResult anv_CreateQueryPool(
 }
 
 VkResult anv_GetQueryPoolResults(
-    VkDevice                                    device,
+    VkDevice                                    _device,
     VkQueryPool                                 queryPool,
     uint32_t                                    startQuery,
     uint32_t                                    queryCount,
@@ -1345,7 +1366,49 @@ VkResult anv_GetQueryPoolResults(
     void*                                       pData,
     VkQueryResultFlags                          flags)
 {
-   stub_return(VK_UNSUPPORTED);
+   struct anv_device *device = (struct anv_device *) _device;
+   struct anv_query_pool *pool = (struct anv_query_pool *) queryPool;
+   struct anv_query_pool_slot *slot = pool->bo.map;
+   int64_t timeout = INT64_MAX;
+   uint32_t *dst32 = pData;
+   uint64_t *dst64 = pData;
+   uint64_t result;
+   int ret;
+
+   if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) {
+      /* Where is the availabilty info supposed to go? */
+      anv_finishme("VK_QUERY_RESULT_WITH_AVAILABILITY_BIT");
+      return VK_UNSUPPORTED;
+   }
+
+   assert(pool->type == VK_QUERY_TYPE_OCCLUSION);
+
+   if (flags & VK_QUERY_RESULT_64_BIT)
+      *pDataSize = queryCount * sizeof(uint64_t);
+   else
+      *pDataSize = queryCount * sizeof(uint32_t);
+
+   if (pData == NULL)
+      return VK_SUCCESS;
+
+   if (flags & VK_QUERY_RESULT_WAIT_BIT) {
+      ret = anv_gem_wait(device, pool->bo.gem_handle, &timeout);
+      if (ret == -1)
+         return vk_error(VK_ERROR_UNKNOWN);
+   }
+
+   for (uint32_t i = 0; i < queryCount; i++) {
+      result = slot[startQuery + i].end - slot[startQuery + i].begin;
+      if (flags & VK_QUERY_RESULT_64_BIT) {
+         *dst64++ = result;
+      } else {
+         if (result > UINT32_MAX)
+            result = UINT32_MAX;
+         *dst32++ = result;
+      }
+   }
+
+   return VK_SUCCESS;
 }
 
 // Buffer functions
@@ -2698,14 +2761,13 @@ void anv_CmdBeginQuery(
 
    switch (pool->type) {
    case VK_QUERY_TYPE_OCCLUSION:
-      anv_batch_emit_ps_depth_count(&cmd_buffer->batch, &pool->bo, slot * 16);
+      anv_batch_emit_ps_depth_count(&cmd_buffer->batch, &pool->bo,
+                                    slot * sizeof(struct anv_query_pool_slot));
       break;
 
    case VK_QUERY_TYPE_PIPELINE_STATISTICS:
-      break;
-
    default:
-      break;
+      unreachable("");
    }
 }
 
@@ -2719,14 +2781,13 @@ void anv_CmdEndQuery(
 
    switch (pool->type) {
    case VK_QUERY_TYPE_OCCLUSION:
-      anv_batch_emit_ps_depth_count(&cmd_buffer->batch, &pool->bo, slot * 16 + 8);
+      anv_batch_emit_ps_depth_count(&cmd_buffer->batch, &pool->bo,
+                                    slot * sizeof(struct anv_query_pool_slot) + 8);
       break;
 
    case VK_QUERY_TYPE_PIPELINE_STATISTICS:
-      break;
-
    default:
-      break;
+      unreachable("");
    }
 }
 
@@ -2771,6 +2832,50 @@ void anv_CmdWriteTimestamp(
    }
 }
 
+#define alu_opcode(v)   __gen_field((v),  20, 31)
+#define alu_operand1(v) __gen_field((v),  10, 19)
+#define alu_operand2(v) __gen_field((v),   0,  9)
+#define alu(opcode, operand1, operand2) \
+   alu_opcode(opcode) | alu_operand1(operand1) | alu_operand2(operand2)
+
+#define OPCODE_NOOP      0x000
+#define OPCODE_LOAD      0x080
+#define OPCODE_LOADINV   0x480
+#define OPCODE_LOAD0     0x081
+#define OPCODE_LOAD1     0x481
+#define OPCODE_ADD       0x100
+#define OPCODE_SUB       0x101
+#define OPCODE_AND       0x102
+#define OPCODE_OR        0x103
+#define OPCODE_XOR       0x104
+#define OPCODE_STORE     0x180
+#define OPCODE_STOREINV  0x580
+
+#define OPERAND_R0   0x00
+#define OPERAND_R1   0x01
+#define OPERAND_R2   0x02
+#define OPERAND_R3   0x03
+#define OPERAND_R4   0x04
+#define OPERAND_SRCA 0x20
+#define OPERAND_SRCB 0x21
+#define OPERAND_ACCU 0x31
+#define OPERAND_ZF   0x32
+#define OPERAND_CF   0x33
+
+#define CS_GPR(n) (0x2600 + (n) * 8)
+
+static void
+emit_load_alu_reg_u64(struct anv_batch *batch, uint32_t reg,
+                      struct anv_bo *bo, uint32_t offset)
+{
+   anv_batch_emit(batch, GEN8_MI_LOAD_REGISTER_MEM,
+                  .RegisterAddress = reg,
+                  .MemoryAddress = { bo, offset });
+   anv_batch_emit(batch, GEN8_MI_LOAD_REGISTER_MEM,
+                  .RegisterAddress = reg + 4,
+                  .MemoryAddress = { bo, offset + 4 });
+}
+
 void anv_CmdCopyQueryPoolResults(
     VkCmdBuffer                                 cmdBuffer,
     VkQueryPool                                 queryPool,
@@ -2781,7 +2886,53 @@ void anv_CmdCopyQueryPoolResults(
     VkDeviceSize                                destStride,
     VkQueryResultFlags                          flags)
 {
-   stub();
+   struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer;
+   struct anv_query_pool *pool = (struct anv_query_pool *) queryPool;
+   struct anv_buffer *buffer = (struct anv_buffer *) destBuffer;
+   uint32_t slot_offset, dst_offset;
+
+   if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) {
+      /* Where is the availabilty info supposed to go? */
+      anv_finishme("VK_QUERY_RESULT_WITH_AVAILABILITY_BIT");
+      return;
+   }
+
+   assert(pool->type == VK_QUERY_TYPE_OCCLUSION);
+
+   /* FIXME: If we're not waiting, should we just do this on the CPU? */
+   if (flags & VK_QUERY_RESULT_WAIT_BIT)
+      anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL,
+                     .CommandStreamerStallEnable = true);
+
+   dst_offset = buffer->offset + destOffset;
+   for (uint32_t i = 0; i < queryCount; i++) {
+
+      slot_offset = (startQuery + i) * sizeof(struct anv_query_pool_slot);
+
+      emit_load_alu_reg_u64(&cmd_buffer->batch, CS_GPR(0), &pool->bo, slot_offset);
+      emit_load_alu_reg_u64(&cmd_buffer->batch, CS_GPR(1), &pool->bo, slot_offset + 8);
+
+      /* FIXME: We need to clamp the result for 32 bit. */
+
+      uint32_t *dw = anv_batch_emitn(&cmd_buffer->batch, 5, GEN8_MI_MATH);
+      dw[1] = alu(OPCODE_LOAD, OPERAND_SRCA, OPERAND_R1);
+      dw[2] = alu(OPCODE_LOAD, OPERAND_SRCB, OPERAND_R0);
+      dw[3] = alu(OPCODE_SUB, 0, 0);
+      dw[4] = alu(OPCODE_STORE, OPERAND_R2, OPERAND_ACCU);
+
+      anv_batch_emit(&cmd_buffer->batch, GEN8_MI_STORE_REGISTER_MEM,
+                     .RegisterAddress = CS_GPR(2),
+                     /* FIXME: This is only lower 32 bits */
+                     .MemoryAddress = { buffer->bo, dst_offset });
+
+      if (flags & VK_QUERY_RESULT_64_BIT)
+         anv_batch_emit(&cmd_buffer->batch, GEN8_MI_STORE_REGISTER_MEM,
+                        .RegisterAddress = CS_GPR(2) + 4,
+                        /* FIXME: This is only lower 32 bits */
+                        .MemoryAddress = { buffer->bo, dst_offset + 4 });
+
+      dst_offset += destStride;
+   }
 }
 
 void anv_CmdInitAtomicCounters(
index 554ec66d74831a35cbd46c21027d1eb79d89848f..b79dea3ddc480295b34929b299dcf02d6b6d87b9 100644 (file)
@@ -452,6 +452,18 @@ struct anv_dynamic_cb_state {
    uint32_t blend_offset;
 };
 
+struct anv_query_pool_slot {
+   uint64_t begin;
+   uint64_t end;
+   uint64_t available;
+};
+
+struct anv_query_pool {
+   VkQueryType                                  type;
+   uint32_t                                     slots;
+   struct anv_bo                                bo;
+};
+
 struct anv_descriptor_set_layout {
    struct {
       uint32_t surface_count;