anv/hsw: Move query code to genX file for Haswell
authorJordan Justen <jordan.l.justen@intel.com>
Sat, 5 Mar 2016 08:54:54 +0000 (00:54 -0800)
committerJordan Justen <jordan.l.justen@intel.com>
Sat, 5 Mar 2016 09:08:07 +0000 (01:08 -0800)
This fixes many CTS cases, but will require an update to the kernel
command parser register whitelist. (The CS GPRs and TIMESTAMP
registers need to be whitelisted.)

Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
src/intel/vulkan/gen8_cmd_buffer.c
src/intel/vulkan/genX_cmd_buffer.c

index 8972a8db6fc8ede8ec48e33b2ac6b6ff5aaca489..8d8775fb01dface6acc284276408236396ff1335 100644 (file)
@@ -601,246 +601,6 @@ genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer)
    cmd_buffer->state.compute_dirty = 0;
 }
 
-static void
-emit_ps_depth_count(struct anv_batch *batch,
-                    struct anv_bo *bo, uint32_t offset)
-{
-   anv_batch_emit(batch, GENX(PIPE_CONTROL),
-                  .DestinationAddressType = DAT_PPGTT,
-                  .PostSyncOperation = WritePSDepthCount,
-                  .DepthStallEnable = true,
-                  .Address = { bo, offset });
-}
-
-static void
-emit_query_availability(struct anv_batch *batch,
-                        struct anv_bo *bo, uint32_t offset)
-{
-   anv_batch_emit(batch, GENX(PIPE_CONTROL),
-                  .DestinationAddressType = DAT_PPGTT,
-                  .PostSyncOperation = WriteImmediateData,
-                  .Address = { bo, offset },
-                  .ImmediateData = 1);
-}
-
-void genX(CmdBeginQuery)(
-    VkCommandBuffer                             commandBuffer,
-    VkQueryPool                                 queryPool,
-    uint32_t                                    query,
-    VkQueryControlFlags                         flags)
-{
-   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
-   ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
-
-   /* Workaround: When meta uses the pipeline with the VS disabled, it seems
-    * that the pipelining of the depth write breaks. What we see is that
-    * samples from the render pass clear leaks into the first query
-    * immediately after the clear. Doing a pipecontrol with a post-sync
-    * operation and DepthStallEnable seems to work around the issue.
-    */
-   if (cmd_buffer->state.need_query_wa) {
-      cmd_buffer->state.need_query_wa = false;
-      anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL),
-                     .DepthCacheFlushEnable = true,
-                     .DepthStallEnable = true);
-   }
-
-   switch (pool->type) {
-   case VK_QUERY_TYPE_OCCLUSION:
-      emit_ps_depth_count(&cmd_buffer->batch, &pool->bo,
-                          query * sizeof(struct anv_query_pool_slot));
-      break;
-
-   case VK_QUERY_TYPE_PIPELINE_STATISTICS:
-   default:
-      unreachable("");
-   }
-}
-
-void genX(CmdEndQuery)(
-    VkCommandBuffer                             commandBuffer,
-    VkQueryPool                                 queryPool,
-    uint32_t                                    query)
-{
-   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
-   ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
-
-   switch (pool->type) {
-   case VK_QUERY_TYPE_OCCLUSION:
-      emit_ps_depth_count(&cmd_buffer->batch, &pool->bo,
-                          query * sizeof(struct anv_query_pool_slot) + 8);
-
-      emit_query_availability(&cmd_buffer->batch, &pool->bo,
-                              query * sizeof(struct anv_query_pool_slot) + 16);
-      break;
-
-   case VK_QUERY_TYPE_PIPELINE_STATISTICS:
-   default:
-      unreachable("");
-   }
-}
-
-#define TIMESTAMP 0x2358
-
-void genX(CmdWriteTimestamp)(
-    VkCommandBuffer                             commandBuffer,
-    VkPipelineStageFlagBits                     pipelineStage,
-    VkQueryPool                                 queryPool,
-    uint32_t                                    query)
-{
-   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
-   ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
-   uint32_t offset = query * sizeof(struct anv_query_pool_slot);
-
-   assert(pool->type == VK_QUERY_TYPE_TIMESTAMP);
-
-   switch (pipelineStage) {
-   case VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT:
-      anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM),
-                     .RegisterAddress = TIMESTAMP,
-                     .MemoryAddress = { &pool->bo, offset });
-      anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM),
-                     .RegisterAddress = TIMESTAMP + 4,
-                     .MemoryAddress = { &pool->bo, offset + 4 });
-      break;
-
-   default:
-      /* Everything else is bottom-of-pipe */
-      anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL),
-                     .DestinationAddressType = DAT_PPGTT,
-                     .PostSyncOperation = WriteTimestamp,
-                     .Address = { &pool->bo, offset });
-      break;
-   }
-
-   emit_query_availability(&cmd_buffer->batch, &pool->bo, query + 16);
-}
-
-#define alu_opcode(v)   __gen_uint((v),  20, 31)
-#define alu_operand1(v) __gen_uint((v),  10, 19)
-#define alu_operand2(v) __gen_uint((v),   0,  9)
-#define alu(opcode, operand1, operand2) \
-   alu_opcode(opcode) | alu_operand1(operand1) | alu_operand2(operand2)
-
-#define OPCODE_NOOP      0x000
-#define OPCODE_LOAD      0x080
-#define OPCODE_LOADINV   0x480
-#define OPCODE_LOAD0     0x081
-#define OPCODE_LOAD1     0x481
-#define OPCODE_ADD       0x100
-#define OPCODE_SUB       0x101
-#define OPCODE_AND       0x102
-#define OPCODE_OR        0x103
-#define OPCODE_XOR       0x104
-#define OPCODE_STORE     0x180
-#define OPCODE_STOREINV  0x580
-
-#define OPERAND_R0   0x00
-#define OPERAND_R1   0x01
-#define OPERAND_R2   0x02
-#define OPERAND_R3   0x03
-#define OPERAND_R4   0x04
-#define OPERAND_SRCA 0x20
-#define OPERAND_SRCB 0x21
-#define OPERAND_ACCU 0x31
-#define OPERAND_ZF   0x32
-#define OPERAND_CF   0x33
-
-#define CS_GPR(n) (0x2600 + (n) * 8)
-
-static void
-emit_load_alu_reg_u64(struct anv_batch *batch, uint32_t reg,
-                      struct anv_bo *bo, uint32_t offset)
-{
-   anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_MEM),
-                  .RegisterAddress = reg,
-                  .MemoryAddress = { bo, offset });
-   anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_MEM),
-                  .RegisterAddress = reg + 4,
-                  .MemoryAddress = { bo, offset + 4 });
-}
-
-static void
-store_query_result(struct anv_batch *batch, uint32_t reg,
-                   struct anv_bo *bo, uint32_t offset, VkQueryResultFlags flags)
-{
-      anv_batch_emit(batch, GENX(MI_STORE_REGISTER_MEM),
-                     .RegisterAddress = reg,
-                     .MemoryAddress = { bo, offset });
-
-      if (flags & VK_QUERY_RESULT_64_BIT)
-         anv_batch_emit(batch, GENX(MI_STORE_REGISTER_MEM),
-                        .RegisterAddress = reg + 4,
-                        .MemoryAddress = { bo, offset + 4 });
-}
-
-void genX(CmdCopyQueryPoolResults)(
-    VkCommandBuffer                             commandBuffer,
-    VkQueryPool                                 queryPool,
-    uint32_t                                    firstQuery,
-    uint32_t                                    queryCount,
-    VkBuffer                                    destBuffer,
-    VkDeviceSize                                destOffset,
-    VkDeviceSize                                destStride,
-    VkQueryResultFlags                          flags)
-{
-   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
-   ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
-   ANV_FROM_HANDLE(anv_buffer, buffer, destBuffer);
-   uint32_t slot_offset, dst_offset;
-
-   if (flags & VK_QUERY_RESULT_WAIT_BIT)
-      anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL),
-                     .CommandStreamerStallEnable = true,
-                     .StallAtPixelScoreboard = true);
-
-   dst_offset = buffer->offset + destOffset;
-   for (uint32_t i = 0; i < queryCount; i++) {
-
-      slot_offset = (firstQuery + i) * sizeof(struct anv_query_pool_slot);
-      switch (pool->type) {
-      case VK_QUERY_TYPE_OCCLUSION:
-         emit_load_alu_reg_u64(&cmd_buffer->batch,
-                               CS_GPR(0), &pool->bo, slot_offset);
-         emit_load_alu_reg_u64(&cmd_buffer->batch,
-                               CS_GPR(1), &pool->bo, slot_offset + 8);
-
-         /* FIXME: We need to clamp the result for 32 bit. */
-
-         uint32_t *dw = anv_batch_emitn(&cmd_buffer->batch, 5, GENX(MI_MATH));
-         dw[1] = alu(OPCODE_LOAD, OPERAND_SRCA, OPERAND_R1);
-         dw[2] = alu(OPCODE_LOAD, OPERAND_SRCB, OPERAND_R0);
-         dw[3] = alu(OPCODE_SUB, 0, 0);
-         dw[4] = alu(OPCODE_STORE, OPERAND_R2, OPERAND_ACCU);
-         break;
-
-      case VK_QUERY_TYPE_TIMESTAMP:
-         emit_load_alu_reg_u64(&cmd_buffer->batch,
-                               CS_GPR(2), &pool->bo, slot_offset);
-         break;
-
-      default:
-         unreachable("unhandled query type");
-      }
-
-      store_query_result(&cmd_buffer->batch,
-                         CS_GPR(2), buffer->bo, dst_offset, flags);
-
-      if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) {
-         emit_load_alu_reg_u64(&cmd_buffer->batch, CS_GPR(0),
-                               &pool->bo, slot_offset + 16);
-         if (flags & VK_QUERY_RESULT_64_BIT)
-            store_query_result(&cmd_buffer->batch,
-                               CS_GPR(0), buffer->bo, dst_offset + 8, flags);
-         else
-            store_query_result(&cmd_buffer->batch,
-                               CS_GPR(0), buffer->bo, dst_offset + 4, flags);
-      }
-
-      dst_offset += destStride;
-   }
-}
-
 void genX(CmdSetEvent)(
     VkCommandBuffer                             commandBuffer,
     VkEvent                                     _event,
index a31ecc398e729626d889baef0a7255db15d34766..82959f3abf6850c98766f36577f24313217f60eb 100644 (file)
@@ -754,3 +754,247 @@ void genX(CmdEndRenderPass)(
 
    anv_cmd_buffer_resolve_subpass(cmd_buffer);
 }
+
+static void
+emit_ps_depth_count(struct anv_batch *batch,
+                    struct anv_bo *bo, uint32_t offset)
+{
+   anv_batch_emit(batch, GENX(PIPE_CONTROL),
+                  .DestinationAddressType = DAT_PPGTT,
+                  .PostSyncOperation = WritePSDepthCount,
+                  .DepthStallEnable = true,
+                  .Address = { bo, offset });
+}
+
+static void
+emit_query_availability(struct anv_batch *batch,
+                        struct anv_bo *bo, uint32_t offset)
+{
+   anv_batch_emit(batch, GENX(PIPE_CONTROL),
+                  .DestinationAddressType = DAT_PPGTT,
+                  .PostSyncOperation = WriteImmediateData,
+                  .Address = { bo, offset },
+                  .ImmediateData = 1);
+}
+
+void genX(CmdBeginQuery)(
+    VkCommandBuffer                             commandBuffer,
+    VkQueryPool                                 queryPool,
+    uint32_t                                    query,
+    VkQueryControlFlags                         flags)
+{
+   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
+   ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
+
+   /* Workaround: When meta uses the pipeline with the VS disabled, it seems
+    * that the pipelining of the depth write breaks. What we see is that
+    * samples from the render pass clear leaks into the first query
+    * immediately after the clear. Doing a pipecontrol with a post-sync
+    * operation and DepthStallEnable seems to work around the issue.
+    */
+   if (cmd_buffer->state.need_query_wa) {
+      cmd_buffer->state.need_query_wa = false;
+      anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL),
+                     .DepthCacheFlushEnable = true,
+                     .DepthStallEnable = true);
+   }
+
+   switch (pool->type) {
+   case VK_QUERY_TYPE_OCCLUSION:
+      emit_ps_depth_count(&cmd_buffer->batch, &pool->bo,
+                          query * sizeof(struct anv_query_pool_slot));
+      break;
+
+   case VK_QUERY_TYPE_PIPELINE_STATISTICS:
+   default:
+      unreachable("");
+   }
+}
+
+void genX(CmdEndQuery)(
+    VkCommandBuffer                             commandBuffer,
+    VkQueryPool                                 queryPool,
+    uint32_t                                    query)
+{
+   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
+   ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
+
+   switch (pool->type) {
+   case VK_QUERY_TYPE_OCCLUSION:
+      emit_ps_depth_count(&cmd_buffer->batch, &pool->bo,
+                          query * sizeof(struct anv_query_pool_slot) + 8);
+
+      emit_query_availability(&cmd_buffer->batch, &pool->bo,
+                              query * sizeof(struct anv_query_pool_slot) + 16);
+      break;
+
+   case VK_QUERY_TYPE_PIPELINE_STATISTICS:
+   default:
+      unreachable("");
+   }
+}
+
+#define TIMESTAMP 0x2358
+
+void genX(CmdWriteTimestamp)(
+    VkCommandBuffer                             commandBuffer,
+    VkPipelineStageFlagBits                     pipelineStage,
+    VkQueryPool                                 queryPool,
+    uint32_t                                    query)
+{
+   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
+   ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
+   uint32_t offset = query * sizeof(struct anv_query_pool_slot);
+
+   assert(pool->type == VK_QUERY_TYPE_TIMESTAMP);
+
+   switch (pipelineStage) {
+   case VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT:
+      anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM),
+                     .RegisterAddress = TIMESTAMP,
+                     .MemoryAddress = { &pool->bo, offset });
+      anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM),
+                     .RegisterAddress = TIMESTAMP + 4,
+                     .MemoryAddress = { &pool->bo, offset + 4 });
+      break;
+
+   default:
+      /* Everything else is bottom-of-pipe */
+      anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL),
+                     .DestinationAddressType = DAT_PPGTT,
+                     .PostSyncOperation = WriteTimestamp,
+                     .Address = { &pool->bo, offset });
+      break;
+   }
+
+   emit_query_availability(&cmd_buffer->batch, &pool->bo, query + 16);
+}
+
+#if GEN_GEN > 7 || GEN_IS_HASWELL
+
+#define alu_opcode(v)   __gen_uint((v),  20, 31)
+#define alu_operand1(v) __gen_uint((v),  10, 19)
+#define alu_operand2(v) __gen_uint((v),   0,  9)
+#define alu(opcode, operand1, operand2) \
+   alu_opcode(opcode) | alu_operand1(operand1) | alu_operand2(operand2)
+
+#define OPCODE_NOOP      0x000
+#define OPCODE_LOAD      0x080
+#define OPCODE_LOADINV   0x480
+#define OPCODE_LOAD0     0x081
+#define OPCODE_LOAD1     0x481
+#define OPCODE_ADD       0x100
+#define OPCODE_SUB       0x101
+#define OPCODE_AND       0x102
+#define OPCODE_OR        0x103
+#define OPCODE_XOR       0x104
+#define OPCODE_STORE     0x180
+#define OPCODE_STOREINV  0x580
+
+#define OPERAND_R0   0x00
+#define OPERAND_R1   0x01
+#define OPERAND_R2   0x02
+#define OPERAND_R3   0x03
+#define OPERAND_R4   0x04
+#define OPERAND_SRCA 0x20
+#define OPERAND_SRCB 0x21
+#define OPERAND_ACCU 0x31
+#define OPERAND_ZF   0x32
+#define OPERAND_CF   0x33
+
+#define CS_GPR(n) (0x2600 + (n) * 8)
+
+static void
+emit_load_alu_reg_u64(struct anv_batch *batch, uint32_t reg,
+                      struct anv_bo *bo, uint32_t offset)
+{
+   anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_MEM),
+                  .RegisterAddress = reg,
+                  .MemoryAddress = { bo, offset });
+   anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_MEM),
+                  .RegisterAddress = reg + 4,
+                  .MemoryAddress = { bo, offset + 4 });
+}
+
+static void
+store_query_result(struct anv_batch *batch, uint32_t reg,
+                   struct anv_bo *bo, uint32_t offset, VkQueryResultFlags flags)
+{
+      anv_batch_emit(batch, GENX(MI_STORE_REGISTER_MEM),
+                     .RegisterAddress = reg,
+                     .MemoryAddress = { bo, offset });
+
+      if (flags & VK_QUERY_RESULT_64_BIT)
+         anv_batch_emit(batch, GENX(MI_STORE_REGISTER_MEM),
+                        .RegisterAddress = reg + 4,
+                        .MemoryAddress = { bo, offset + 4 });
+}
+
+void genX(CmdCopyQueryPoolResults)(
+    VkCommandBuffer                             commandBuffer,
+    VkQueryPool                                 queryPool,
+    uint32_t                                    firstQuery,
+    uint32_t                                    queryCount,
+    VkBuffer                                    destBuffer,
+    VkDeviceSize                                destOffset,
+    VkDeviceSize                                destStride,
+    VkQueryResultFlags                          flags)
+{
+   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
+   ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
+   ANV_FROM_HANDLE(anv_buffer, buffer, destBuffer);
+   uint32_t slot_offset, dst_offset;
+
+   if (flags & VK_QUERY_RESULT_WAIT_BIT)
+      anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL),
+                     .CommandStreamerStallEnable = true,
+                     .StallAtPixelScoreboard = true);
+
+   dst_offset = buffer->offset + destOffset;
+   for (uint32_t i = 0; i < queryCount; i++) {
+
+      slot_offset = (firstQuery + i) * sizeof(struct anv_query_pool_slot);
+      switch (pool->type) {
+      case VK_QUERY_TYPE_OCCLUSION:
+         emit_load_alu_reg_u64(&cmd_buffer->batch,
+                               CS_GPR(0), &pool->bo, slot_offset);
+         emit_load_alu_reg_u64(&cmd_buffer->batch,
+                               CS_GPR(1), &pool->bo, slot_offset + 8);
+
+         /* FIXME: We need to clamp the result for 32 bit. */
+
+         uint32_t *dw = anv_batch_emitn(&cmd_buffer->batch, 5, GENX(MI_MATH));
+         dw[1] = alu(OPCODE_LOAD, OPERAND_SRCA, OPERAND_R1);
+         dw[2] = alu(OPCODE_LOAD, OPERAND_SRCB, OPERAND_R0);
+         dw[3] = alu(OPCODE_SUB, 0, 0);
+         dw[4] = alu(OPCODE_STORE, OPERAND_R2, OPERAND_ACCU);
+         break;
+
+      case VK_QUERY_TYPE_TIMESTAMP:
+         emit_load_alu_reg_u64(&cmd_buffer->batch,
+                               CS_GPR(2), &pool->bo, slot_offset);
+         break;
+
+      default:
+         unreachable("unhandled query type");
+      }
+
+      store_query_result(&cmd_buffer->batch,
+                         CS_GPR(2), buffer->bo, dst_offset, flags);
+
+      if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) {
+         emit_load_alu_reg_u64(&cmd_buffer->batch, CS_GPR(0),
+                               &pool->bo, slot_offset + 16);
+         if (flags & VK_QUERY_RESULT_64_BIT)
+            store_query_result(&cmd_buffer->batch,
+                               CS_GPR(0), buffer->bo, dst_offset + 8, flags);
+         else
+            store_query_result(&cmd_buffer->batch,
+                               CS_GPR(0), buffer->bo, dst_offset + 4, flags);
+      }
+
+      dst_offset += destStride;
+   }
+}
+
+#endif