turnip: make the struct slot_value of queries get 2 values
authorHyunjun Ko <zzoon@igalia.com>
Fri, 17 Apr 2020 06:39:14 +0000 (06:39 +0000)
committerMarge Bot <eric+marge@anholt.net>
Thu, 23 Apr 2020 01:14:19 +0000 (01:14 +0000)
In case of transform feedback query, it writes two integer values,
which one is for primitives written and another is for primitives
generated.

To handle this, the second member of the struct slot_value is worth
to be presented not as a padding.

In addition, we also need to modify get/copy_result to access both
values.

This patch is the prep work for the transform feedback query support.

Tested with
dEQP-VK.pipeline.timestamp.*
dEQP-VK.query_pool.occlusion_query.*

Signed-off-by: Hyunjun Ko <zzoon@igalia.com>
Reviewed-by: Brian Ho <brian@brkho.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4604>

src/freedreno/vulkan/tu_query.c

index b11306ddf3be2040e4bbaaf66e48aefe65f2f71c..9b1d1db4a4f626860576afe0050849d40898a176 100644 (file)
 #define NSEC_PER_SEC 1000000000ull
 #define WAIT_TIMEOUT 5
 
-/* It seems like sample counts need to be copied over to 16-byte aligned
- * memory. */
+/* Depending on the query type, there might be 2 integer values.
+ * eg. VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT
+ *   values[0] : primitives written, values[1]: primitives generated
+ */
 struct PACKED slot_value {
-   uint64_t value;
-   uint64_t __padding;
+   uint64_t values[2];
 };
 
 struct PACKED query_slot {
@@ -63,20 +64,20 @@ struct PACKED occlusion_query_slot {
 
 /* Returns the IOVA of a given uint64_t field in a given slot of a query
  * pool. */
-#define query_iova(type, pool, query, field)                         \
+#define query_iova(type, pool, query, field, value_index)            \
    pool->bo.iova + pool->stride * query + offsetof(type, field) +    \
-         offsetof(struct slot_value, value)
+         offsetof(struct slot_value, values[value_index])
 
 #define occlusion_query_iova(pool, query, field)                     \
-   query_iova(struct occlusion_query_slot, pool, query, field)
+   query_iova(struct occlusion_query_slot, pool, query, field, 0)
 
 #define query_available_iova(pool, query)                            \
-   query_iova(struct query_slot, pool, query, available)
+   query_iova(struct query_slot, pool, query, available, 0)
 
-#define query_result_iova(pool, query)                               \
-   query_iova(struct query_slot, pool, query, result)
+#define query_result_iova(pool, query, i)                            \
+   query_iova(struct query_slot, pool, query, result, i)
 
-#define query_is_available(slot) slot->available.value
+#define query_is_available(slot) slot->available.values[0]
 
 /*
  * Returns a pointer to a given slot in a query pool.
@@ -158,6 +159,23 @@ tu_DestroyQueryPool(VkDevice _device,
    vk_free2(&device->alloc, pAllocator, pool);
 }
 
+static uint32_t
+get_result_count(struct tu_query_pool *pool)
+{
+   switch (pool->type) {
+   /* Occulusion and timestamp queries write one integer value */
+   case VK_QUERY_TYPE_OCCLUSION:
+   case VK_QUERY_TYPE_TIMESTAMP:
+      return 1;
+   /* Transform feedback queries write two integer values */
+   case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
+      return 2;
+   default:
+      assert(!"Invalid query type");
+      return 0;
+   }
+}
+
 /* Wait on the the availability status of a query up until a timeout. */
 static VkResult
 wait_for_available(struct tu_device *device, struct tu_query_pool *pool,
@@ -208,6 +226,8 @@ get_query_pool_results(struct tu_device *device,
       uint32_t query = firstQuery + i;
       struct query_slot *slot = slot_address(pool, query);
       bool available = query_is_available(slot);
+      uint32_t result_count = get_result_count(pool);
+
       if ((flags & VK_QUERY_RESULT_WAIT_BIT) && !available) {
          VkResult wait_result = wait_for_available(device, pool, query);
          if (wait_result != VK_SUCCESS)
@@ -230,19 +250,21 @@ get_query_pool_results(struct tu_device *device,
          }
       }
 
-      if (available)
-         write_query_value_cpu(result_base, 0, slot->result.value, flags);
-      else if (flags & VK_QUERY_RESULT_PARTIAL_BIT)
-          /* From the Vulkan 1.1.130 spec:
-           *
-           *   If VK_QUERY_RESULT_PARTIAL_BIT is set, VK_QUERY_RESULT_WAIT_BIT
-           *   is not set, and the query’s status is unavailable, an
-           *   intermediate result value between zero and the final result
-           *   value is written to pData for that query.
-           *
-           * Just return 0 here for simplicity since it's a valid result.
-           */
-         write_query_value_cpu(result_base, 0, 0, flags);
+      for (uint32_t k = 0; k < result_count; k++) {
+         if (available)
+            write_query_value_cpu(result_base, k, slot->result.values[k], flags);
+         else if (flags & VK_QUERY_RESULT_PARTIAL_BIT)
+             /* From the Vulkan 1.1.130 spec:
+              *
+              *   If VK_QUERY_RESULT_PARTIAL_BIT is set, VK_QUERY_RESULT_WAIT_BIT
+              *   is not set, and the query’s status is unavailable, an
+              *   intermediate result value between zero and the final result
+              *   value is written to pData for that query.
+              *
+              * Just return 0 here for simplicity since it's a valid result.
+              */
+            write_query_value_cpu(result_base, k, 0, flags);
+      }
 
       if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)
          /* From the Vulkan 1.1.130 spec:
@@ -251,7 +273,7 @@ get_query_pool_results(struct tu_device *device,
           *    integer value written for each query is non-zero if the query’s
           *    status was available or zero if the status was unavailable.
           */
-         write_query_value_cpu(result_base, 1, available, flags);
+         write_query_value_cpu(result_base, result_count, available, flags);
 
       result_base += stride;
    }
@@ -330,8 +352,8 @@ emit_copy_query_pool_results(struct tu_cmd_buffer *cmdbuf,
    for (uint32_t i = 0; i < queryCount; i++) {
       uint32_t query = firstQuery + i;
       uint64_t available_iova = query_available_iova(pool, query);
-      uint64_t result_iova = query_result_iova(pool, query);
       uint64_t buffer_iova = tu_buffer_iova(buffer) + dstOffset + i * stride;
+      uint32_t result_count = get_result_count(pool);
 
       /* Wait for the available bit to be set if executed with the
        * VK_QUERY_RESULT_WAIT_BIT flag. */
@@ -345,38 +367,42 @@ emit_copy_query_pool_results(struct tu_cmd_buffer *cmdbuf,
          tu_cs_emit(cs, CP_WAIT_REG_MEM_5_DELAY_LOOP_CYCLES(16));
       }
 
-      if (flags & VK_QUERY_RESULT_PARTIAL_BIT) {
-         /* Unconditionally copying the bo->result into the buffer here is
-          * valid because we only set bo->result on vkCmdEndQuery. Thus, even
-          * if the query is unavailable, this will copy the correct partial
-          * value of 0.
-          */
-         copy_query_value_gpu(cmdbuf, cs, result_iova, buffer_iova,
-                              0 /* offset */, flags);
-      } else {
-         /* Conditionally copy bo->result into the buffer based on whether the
-          * query is available.
-          *
-          * NOTE: For the conditional packets to be executed, CP_COND_EXEC
-          * tests that ADDR0 != 0 and ADDR1 < REF. The packet here simply tests
-          * that 0 < available < 2, aka available == 1.
-          */
-         tu_cs_reserve(cs, 7 + 6);
-         tu_cs_emit_pkt7(cs, CP_COND_EXEC, 6);
-         tu_cs_emit_qw(cs, available_iova);
-         tu_cs_emit_qw(cs, available_iova);
-         tu_cs_emit(cs, CP_COND_EXEC_4_REF(0x2));
-         tu_cs_emit(cs, 6); /* Cond execute the next 6 DWORDS */
-
-         /* Start of conditional execution */
-         copy_query_value_gpu(cmdbuf, cs, result_iova, buffer_iova,
-                              0 /* offset */, flags);
-         /* End of conditional execution */
+      for (uint32_t k = 0; k < result_count; k++) {
+         uint64_t result_iova = query_result_iova(pool, query, k);
+
+         if (flags & VK_QUERY_RESULT_PARTIAL_BIT) {
+            /* Unconditionally copying the bo->result into the buffer here is
+             * valid because we only set bo->result on vkCmdEndQuery. Thus, even
+             * if the query is unavailable, this will copy the correct partial
+             * value of 0.
+             */
+            copy_query_value_gpu(cmdbuf, cs, result_iova, buffer_iova,
+                                 k /* offset */, flags);
+         } else {
+            /* Conditionally copy bo->result into the buffer based on whether the
+             * query is available.
+             *
+             * NOTE: For the conditional packets to be executed, CP_COND_EXEC
+             * tests that ADDR0 != 0 and ADDR1 < REF. The packet here simply tests
+             * that 0 < available < 2, aka available == 1.
+             */
+            tu_cs_reserve(cs, 7 + 6);
+            tu_cs_emit_pkt7(cs, CP_COND_EXEC, 6);
+            tu_cs_emit_qw(cs, available_iova);
+            tu_cs_emit_qw(cs, available_iova);
+            tu_cs_emit(cs, CP_COND_EXEC_4_REF(0x2));
+            tu_cs_emit(cs, 6); /* Cond execute the next 6 DWORDS */
+
+            /* Start of conditional execution */
+            copy_query_value_gpu(cmdbuf, cs, result_iova, buffer_iova,
+                              k /* offset */, flags);
+            /* End of conditional execution */
+         }
       }
 
       if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) {
          copy_query_value_gpu(cmdbuf, cs, available_iova, buffer_iova,
-                              1 /* offset */, flags);
+                              result_count /* offset */, flags);
       }
    }
 
@@ -427,7 +453,10 @@ emit_reset_query_pool(struct tu_cmd_buffer *cmdbuf,
       tu_cs_emit_qw(cs, 0x0);
 
       tu_cs_emit_pkt7(cs, CP_MEM_WRITE, 4);
-      tu_cs_emit_qw(cs, query_result_iova(pool, query));
+      tu_cs_emit_qw(cs, query_result_iova(pool, query, 0));
+      tu_cs_emit_qw(cs, 0x0);
+      tu_cs_emit_pkt7(cs, CP_MEM_WRITE, 4);
+      tu_cs_emit_qw(cs, query_result_iova(pool, query, 1));
       tu_cs_emit_qw(cs, 0x0);
    }
 }
@@ -540,7 +569,7 @@ emit_end_occlusion_query(struct tu_cmd_buffer *cmdbuf,
    uint64_t available_iova = query_available_iova(pool, query);
    uint64_t begin_iova = occlusion_query_iova(pool, query, begin);
    uint64_t end_iova = occlusion_query_iova(pool, query, end);
-   uint64_t result_iova = query_result_iova(pool, query);
+   uint64_t result_iova = query_result_iova(pool, query, 0);
    tu_cs_emit_pkt7(cs, CP_MEM_WRITE, 4);
    tu_cs_emit_qw(cs, end_iova);
    tu_cs_emit_qw(cs, 0xffffffffffffffffull);
@@ -628,7 +657,7 @@ tu_CmdWriteTimestamp(VkCommandBuffer commandBuffer,
    tu_cs_emit(cs, CP_REG_TO_MEM_0_REG(REG_A6XX_CP_ALWAYS_ON_COUNTER_LO) |
                   CP_REG_TO_MEM_0_CNT(2) |
                   CP_REG_TO_MEM_0_64B);
-   tu_cs_emit_qw(cs, query_result_iova(pool, query));
+   tu_cs_emit_qw(cs, query_result_iova(pool, query, 0));
 
    tu_cs_emit_pkt7(cs, CP_MEM_WRITE, 4);
    tu_cs_emit_qw(cs, query_available_iova(pool, query));