#include <stdint.h>
#include "anv_private.h"
+#include "vk_util.h"
#include "perf/gen_perf.h"
#include "perf/gen_perf_mdapi.h"
+#include "util/mesa-sha1.h"
+
struct gen_perf_config *
anv_get_perf(const struct gen_device_info *devinfo, int fd)
{
+ /* We need self modifying batches. The i915 parser prevents it on
+ * Gen7.5 :( maybe one day.
+ */
+ if (devinfo->gen < 8)
+ return NULL;
+
struct gen_perf_config *perf = gen_perf_new(NULL);
gen_perf_init_metrics(perf, devinfo, fd, false /* pipeline statistics */);
+ if (!perf->n_queries) {
+ if (perf->platform_supported)
+ intel_logw("Performance support disabled, "
+ "consider sysctl dev.i915.perf_stream_paranoid=0\n");
+ goto err;
+ }
+
/* We need DRM_I915_PERF_PROP_HOLD_PREEMPTION support, only available in
* perf revision 2.
*/
return stream_fd;
}
+/* VK_INTEL_performance_query */
VkResult anv_InitializePerformanceApiINTEL(
VkDevice _device,
const VkInitializePerformanceApiInfoINTEL* pInitializeInfo)
device->perf_fd = -1;
}
}
+
+/* VK_KHR_performance_query */
+static const VkPerformanceCounterUnitKHR
+gen_perf_counter_unit_to_vk_unit[] = {
+ [GEN_PERF_COUNTER_UNITS_BYTES] = VK_PERFORMANCE_COUNTER_UNIT_BYTES_KHR,
+ [GEN_PERF_COUNTER_UNITS_HZ] = VK_PERFORMANCE_COUNTER_UNIT_HERTZ_KHR,
+ [GEN_PERF_COUNTER_UNITS_NS] = VK_PERFORMANCE_COUNTER_UNIT_NANOSECONDS_KHR,
+ [GEN_PERF_COUNTER_UNITS_US] = VK_PERFORMANCE_COUNTER_UNIT_NANOSECONDS_KHR, /* todo */
+ [GEN_PERF_COUNTER_UNITS_PIXELS] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
+ [GEN_PERF_COUNTER_UNITS_TEXELS] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
+ [GEN_PERF_COUNTER_UNITS_THREADS] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
+ [GEN_PERF_COUNTER_UNITS_PERCENT] = VK_PERFORMANCE_COUNTER_UNIT_PERCENTAGE_KHR,
+ [GEN_PERF_COUNTER_UNITS_MESSAGES] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
+ [GEN_PERF_COUNTER_UNITS_NUMBER] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
+ [GEN_PERF_COUNTER_UNITS_CYCLES] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
+ [GEN_PERF_COUNTER_UNITS_EVENTS] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
+ [GEN_PERF_COUNTER_UNITS_UTILIZATION] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
+ [GEN_PERF_COUNTER_UNITS_EU_SENDS_TO_L3_CACHE_LINES] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
+ [GEN_PERF_COUNTER_UNITS_EU_ATOMIC_REQUESTS_TO_L3_CACHE_LINES] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
+ [GEN_PERF_COUNTER_UNITS_EU_REQUESTS_TO_L3_CACHE_LINES] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
+ [GEN_PERF_COUNTER_UNITS_EU_BYTES_PER_L3_CACHE_LINE] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
+};
+
+static const VkPerformanceCounterStorageKHR
+gen_perf_counter_data_type_to_vk_storage[] = {
+ [GEN_PERF_COUNTER_DATA_TYPE_BOOL32] = VK_PERFORMANCE_COUNTER_STORAGE_UINT32_KHR,
+ [GEN_PERF_COUNTER_DATA_TYPE_UINT32] = VK_PERFORMANCE_COUNTER_STORAGE_UINT32_KHR,
+ [GEN_PERF_COUNTER_DATA_TYPE_UINT64] = VK_PERFORMANCE_COUNTER_STORAGE_UINT64_KHR,
+ [GEN_PERF_COUNTER_DATA_TYPE_FLOAT] = VK_PERFORMANCE_COUNTER_STORAGE_FLOAT32_KHR,
+ [GEN_PERF_COUNTER_DATA_TYPE_DOUBLE] = VK_PERFORMANCE_COUNTER_STORAGE_FLOAT64_KHR,
+};
+
+VkResult anv_EnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(
+ VkPhysicalDevice physicalDevice,
+ uint32_t queueFamilyIndex,
+ uint32_t* pCounterCount,
+ VkPerformanceCounterKHR* pCounters,
+ VkPerformanceCounterDescriptionKHR* pCounterDescriptions)
+{
+ ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
+ struct gen_perf_config *perf = pdevice->perf;
+
+ uint32_t desc_count = *pCounterCount;
+
+ VK_OUTARRAY_MAKE(out, pCounters, pCounterCount);
+ VK_OUTARRAY_MAKE(out_desc, pCounterDescriptions, &desc_count);
+
+ for (int c = 0; c < (perf ? perf->n_counters : 0); c++) {
+ const struct gen_perf_query_counter *gen_counter = perf->counters[c];
+
+ vk_outarray_append(&out, counter) {
+ counter->unit = gen_perf_counter_unit_to_vk_unit[gen_counter->units];
+ counter->scope = VK_QUERY_SCOPE_COMMAND_KHR;
+ counter->storage = gen_perf_counter_data_type_to_vk_storage[gen_counter->data_type];
+
+ unsigned char sha1_result[20];
+ _mesa_sha1_compute(gen_counter->symbol_name,
+ strlen(gen_counter->symbol_name),
+ sha1_result);
+ memcpy(counter->uuid, sha1_result, sizeof(counter->uuid));
+ }
+
+ vk_outarray_append(&out_desc, desc) {
+ desc->flags = 0; /* None so far. */
+ snprintf(desc->name, sizeof(desc->name), "%s", gen_counter->name);
+ snprintf(desc->category, sizeof(desc->category), "%s", gen_counter->category);
+ snprintf(desc->description, sizeof(desc->description), "%s", gen_counter->desc);
+ }
+ }
+
+ return vk_outarray_status(&out);
+}
+
+void anv_GetPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR(
+ VkPhysicalDevice physicalDevice,
+ const VkQueryPoolPerformanceCreateInfoKHR* pPerformanceQueryCreateInfo,
+ uint32_t* pNumPasses)
+{
+ ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
+ struct gen_perf_config *perf = pdevice->perf;
+
+ if (!perf) {
+ *pNumPasses = 0;
+ return;
+ }
+
+ *pNumPasses = gen_perf_get_n_passes(perf,
+ pPerformanceQueryCreateInfo->pCounterIndices,
+ pPerformanceQueryCreateInfo->counterIndexCount,
+ NULL);
+}
+
+VkResult anv_AcquireProfilingLockKHR(
+ VkDevice _device,
+ const VkAcquireProfilingLockInfoKHR* pInfo)
+{
+ ANV_FROM_HANDLE(anv_device, device, _device);
+ struct gen_perf_config *perf = device->physical->perf;
+ struct gen_perf_query_info *first_metric_set = &perf->queries[0];
+
+ assert(device->perf_fd == -1);
+
+ int fd = anv_device_perf_open(device, first_metric_set->oa_metrics_set_id);
+ if (fd < 0)
+ return VK_TIMEOUT;
+
+ device->perf_fd = fd;
+ return VK_SUCCESS;
+}
+
+void anv_ReleaseProfilingLockKHR(
+ VkDevice _device)
+{
+ ANV_FROM_HANDLE(anv_device, device, _device);
+
+ assert(device->perf_fd >= 0);
+ close(device->perf_fd);
+ device->perf_fd = -1;
+}
+
+void
+anv_perf_write_pass_results(struct gen_perf_config *perf,
+ struct anv_query_pool *pool, uint32_t pass,
+ const struct gen_perf_query_result *accumulated_results,
+ union VkPerformanceCounterResultKHR *results)
+{
+ for (uint32_t c = 0; c < pool->n_counters; c++) {
+ const struct gen_perf_counter_pass *counter_pass = &pool->counter_pass[c];
+
+ if (counter_pass->pass != pass)
+ continue;
+
+ switch (pool->pass_query[pass]->kind) {
+ case GEN_PERF_QUERY_TYPE_PIPELINE: {
+ assert(counter_pass->counter->data_type == GEN_PERF_COUNTER_DATA_TYPE_UINT64);
+ uint32_t accu_offset = counter_pass->counter->offset / sizeof(uint64_t);
+ results[c].uint64 = accumulated_results->accumulator[accu_offset];
+ break;
+ }
+
+ case GEN_PERF_QUERY_TYPE_OA:
+ case GEN_PERF_QUERY_TYPE_RAW:
+ switch (counter_pass->counter->data_type) {
+ case GEN_PERF_COUNTER_DATA_TYPE_UINT64:
+ results[c].uint64 =
+ counter_pass->counter->oa_counter_read_uint64(perf,
+ counter_pass->query,
+ accumulated_results->accumulator);
+ break;
+ case GEN_PERF_COUNTER_DATA_TYPE_FLOAT:
+ results[c].float32 =
+ counter_pass->counter->oa_counter_read_float(perf,
+ counter_pass->query,
+ accumulated_results->accumulator);
+ break;
+ default:
+ /* So far we aren't using uint32, double or bool32... */
+ unreachable("unexpected counter data type");
+ }
+ break;
+
+ default:
+ unreachable("invalid query type");
+ }
+
+ /* The Vulkan extension only has nanoseconds as a unit */
+ if (counter_pass->counter->units == GEN_PERF_COUNTER_UNITS_US) {
+ assert(counter_pass->counter->data_type == GEN_PERF_COUNTER_DATA_TYPE_UINT64);
+ results[c].uint64 *= 1000;
+ }
+ }
+}