anv: Implement VK_KHR_performance_query
[mesa.git] / src / intel / vulkan / anv_perf.c
index 133315b2c8d45b021cbf558a2a1d1e96ea7912c1..e8575b1bd702378c15b95b786f0f39043174bc08 100644 (file)
 #include <stdint.h>
 
 #include "anv_private.h"
+#include "vk_util.h"
 
 #include "perf/gen_perf.h"
 #include "perf/gen_perf_mdapi.h"
 
+#include "util/mesa-sha1.h"
+
 struct gen_perf_config *
 anv_get_perf(const struct gen_device_info *devinfo, int fd)
 {
+   /* We need self modifying batches. The i915 parser prevents it on
+    * Gen7.5 :( maybe one day.
+    */
+   if (devinfo->gen < 8)
+      return NULL;
+
    struct gen_perf_config *perf = gen_perf_new(NULL);
 
    gen_perf_init_metrics(perf, devinfo, fd, false /* pipeline statistics */);
 
+   if (!perf->n_queries) {
+      if (perf->platform_supported)
+         intel_logw("Performance support disabled, "
+                    "consider sysctl dev.i915.perf_stream_paranoid=0\n");
+      goto err;
+   }
+
    /* We need DRM_I915_PERF_PROP_HOLD_PREEMPTION support, only available in
     * perf revision 2.
     */
@@ -103,6 +119,7 @@ anv_device_perf_open(struct anv_device *device, uint64_t metric_id)
    return stream_fd;
 }
 
+/* VK_INTEL_performance_query */
 VkResult anv_InitializePerformanceApiINTEL(
     VkDevice                                    _device,
     const VkInitializePerformanceApiInfoINTEL*  pInitializeInfo)
@@ -226,3 +243,175 @@ void anv_UninitializePerformanceApiINTEL(
       device->perf_fd = -1;
    }
 }
+
+/* VK_KHR_performance_query */
+static const VkPerformanceCounterUnitKHR
+gen_perf_counter_unit_to_vk_unit[] = {
+   [GEN_PERF_COUNTER_UNITS_BYTES]                                = VK_PERFORMANCE_COUNTER_UNIT_BYTES_KHR,
+   [GEN_PERF_COUNTER_UNITS_HZ]                                   = VK_PERFORMANCE_COUNTER_UNIT_HERTZ_KHR,
+   [GEN_PERF_COUNTER_UNITS_NS]                                   = VK_PERFORMANCE_COUNTER_UNIT_NANOSECONDS_KHR,
+   [GEN_PERF_COUNTER_UNITS_US]                                   = VK_PERFORMANCE_COUNTER_UNIT_NANOSECONDS_KHR, /* todo */
+   [GEN_PERF_COUNTER_UNITS_PIXELS]                               = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
+   [GEN_PERF_COUNTER_UNITS_TEXELS]                               = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
+   [GEN_PERF_COUNTER_UNITS_THREADS]                              = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
+   [GEN_PERF_COUNTER_UNITS_PERCENT]                              = VK_PERFORMANCE_COUNTER_UNIT_PERCENTAGE_KHR,
+   [GEN_PERF_COUNTER_UNITS_MESSAGES]                             = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
+   [GEN_PERF_COUNTER_UNITS_NUMBER]                               = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
+   [GEN_PERF_COUNTER_UNITS_CYCLES]                               = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
+   [GEN_PERF_COUNTER_UNITS_EVENTS]                               = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
+   [GEN_PERF_COUNTER_UNITS_UTILIZATION]                          = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
+   [GEN_PERF_COUNTER_UNITS_EU_SENDS_TO_L3_CACHE_LINES]           = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
+   [GEN_PERF_COUNTER_UNITS_EU_ATOMIC_REQUESTS_TO_L3_CACHE_LINES] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
+   [GEN_PERF_COUNTER_UNITS_EU_REQUESTS_TO_L3_CACHE_LINES]        = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
+   [GEN_PERF_COUNTER_UNITS_EU_BYTES_PER_L3_CACHE_LINE]           = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
+};
+
+static const VkPerformanceCounterStorageKHR
+gen_perf_counter_data_type_to_vk_storage[] = {
+   [GEN_PERF_COUNTER_DATA_TYPE_BOOL32] = VK_PERFORMANCE_COUNTER_STORAGE_UINT32_KHR,
+   [GEN_PERF_COUNTER_DATA_TYPE_UINT32] = VK_PERFORMANCE_COUNTER_STORAGE_UINT32_KHR,
+   [GEN_PERF_COUNTER_DATA_TYPE_UINT64] = VK_PERFORMANCE_COUNTER_STORAGE_UINT64_KHR,
+   [GEN_PERF_COUNTER_DATA_TYPE_FLOAT]  = VK_PERFORMANCE_COUNTER_STORAGE_FLOAT32_KHR,
+   [GEN_PERF_COUNTER_DATA_TYPE_DOUBLE] = VK_PERFORMANCE_COUNTER_STORAGE_FLOAT64_KHR,
+};
+
+VkResult anv_EnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(
+    VkPhysicalDevice                            physicalDevice,
+    uint32_t                                    queueFamilyIndex,
+    uint32_t*                                   pCounterCount,
+    VkPerformanceCounterKHR*                    pCounters,
+    VkPerformanceCounterDescriptionKHR*         pCounterDescriptions)
+{
+   ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
+   struct gen_perf_config *perf = pdevice->perf;
+
+   uint32_t desc_count = *pCounterCount;
+
+   VK_OUTARRAY_MAKE(out, pCounters, pCounterCount);
+   VK_OUTARRAY_MAKE(out_desc, pCounterDescriptions, &desc_count);
+
+   for (int c = 0; c < (perf ? perf->n_counters : 0); c++) {
+      const struct gen_perf_query_counter *gen_counter = perf->counters[c];
+
+      vk_outarray_append(&out, counter) {
+         counter->unit = gen_perf_counter_unit_to_vk_unit[gen_counter->units];
+         counter->scope = VK_QUERY_SCOPE_COMMAND_KHR;
+         counter->storage = gen_perf_counter_data_type_to_vk_storage[gen_counter->data_type];
+
+         unsigned char sha1_result[20];
+         _mesa_sha1_compute(gen_counter->symbol_name,
+                            strlen(gen_counter->symbol_name),
+                            sha1_result);
+         memcpy(counter->uuid, sha1_result, sizeof(counter->uuid));
+      }
+
+      vk_outarray_append(&out_desc, desc) {
+         desc->flags = 0; /* None so far. */
+         snprintf(desc->name, sizeof(desc->name), "%s", gen_counter->name);
+         snprintf(desc->category, sizeof(desc->category), "%s", gen_counter->category);
+         snprintf(desc->description, sizeof(desc->description), "%s", gen_counter->desc);
+      }
+   }
+
+   return vk_outarray_status(&out);
+}
+
+void anv_GetPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR(
+    VkPhysicalDevice                            physicalDevice,
+    const VkQueryPoolPerformanceCreateInfoKHR*  pPerformanceQueryCreateInfo,
+    uint32_t*                                   pNumPasses)
+{
+   ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
+   struct gen_perf_config *perf = pdevice->perf;
+
+   if (!perf) {
+      *pNumPasses = 0;
+      return;
+   }
+
+   *pNumPasses = gen_perf_get_n_passes(perf,
+                                       pPerformanceQueryCreateInfo->pCounterIndices,
+                                       pPerformanceQueryCreateInfo->counterIndexCount,
+                                       NULL);
+}
+
+VkResult anv_AcquireProfilingLockKHR(
+    VkDevice                                    _device,
+    const VkAcquireProfilingLockInfoKHR*        pInfo)
+{
+   ANV_FROM_HANDLE(anv_device, device, _device);
+   struct gen_perf_config *perf = device->physical->perf;
+   struct gen_perf_query_info *first_metric_set = &perf->queries[0];
+
+   assert(device->perf_fd == -1);
+
+   int fd = anv_device_perf_open(device, first_metric_set->oa_metrics_set_id);
+   if (fd < 0)
+      return VK_TIMEOUT;
+
+   device->perf_fd = fd;
+   return VK_SUCCESS;
+}
+
+void anv_ReleaseProfilingLockKHR(
+    VkDevice                                    _device)
+{
+   ANV_FROM_HANDLE(anv_device, device, _device);
+
+   assert(device->perf_fd >= 0);
+   close(device->perf_fd);
+   device->perf_fd = -1;
+}
+
+void
+anv_perf_write_pass_results(struct gen_perf_config *perf,
+                            struct anv_query_pool *pool, uint32_t pass,
+                            const struct gen_perf_query_result *accumulated_results,
+                            union VkPerformanceCounterResultKHR *results)
+{
+   for (uint32_t c = 0; c < pool->n_counters; c++) {
+      const struct gen_perf_counter_pass *counter_pass = &pool->counter_pass[c];
+
+      if (counter_pass->pass != pass)
+         continue;
+
+      switch (pool->pass_query[pass]->kind) {
+      case GEN_PERF_QUERY_TYPE_PIPELINE: {
+         assert(counter_pass->counter->data_type == GEN_PERF_COUNTER_DATA_TYPE_UINT64);
+         uint32_t accu_offset = counter_pass->counter->offset / sizeof(uint64_t);
+         results[c].uint64 = accumulated_results->accumulator[accu_offset];
+         break;
+      }
+
+      case GEN_PERF_QUERY_TYPE_OA:
+      case GEN_PERF_QUERY_TYPE_RAW:
+         switch (counter_pass->counter->data_type) {
+         case GEN_PERF_COUNTER_DATA_TYPE_UINT64:
+            results[c].uint64 =
+               counter_pass->counter->oa_counter_read_uint64(perf,
+                                                             counter_pass->query,
+                                                             accumulated_results->accumulator);
+            break;
+         case GEN_PERF_COUNTER_DATA_TYPE_FLOAT:
+            results[c].float32 =
+               counter_pass->counter->oa_counter_read_float(perf,
+                                                            counter_pass->query,
+                                                            accumulated_results->accumulator);
+            break;
+         default:
+            /* So far we aren't using uint32, double or bool32... */
+            unreachable("unexpected counter data type");
+         }
+         break;
+
+      default:
+         unreachable("invalid query type");
+      }
+
+      /* The Vulkan extension only has nanoseconds as a unit */
+      if (counter_pass->counter->units == GEN_PERF_COUNTER_UNITS_US) {
+         assert(counter_pass->counter->data_type == GEN_PERF_COUNTER_DATA_TYPE_UINT64);
+         results[c].uint64 *= 1000;
+      }
+   }
+}