#include <stdint.h>
#include "anv_private.h"
+#include "vk_util.h"
#include "perf/gen_perf.h"
#include "perf/gen_perf_mdapi.h"
+#include "util/mesa-sha1.h"
+
struct gen_perf_config *
anv_get_perf(const struct gen_device_info *devinfo, int fd)
{
+ /* We need self modifying batches. The i915 parser prevents it on
+ * Gen7.5 :( maybe one day.
+ */
+ if (devinfo->gen < 8)
+ return NULL;
+
struct gen_perf_config *perf = gen_perf_new(NULL);
- gen_perf_init_metrics(perf, devinfo, fd);
+ gen_perf_init_metrics(perf, devinfo, fd, false /* pipeline statistics */);
+
+ if (!perf->n_queries) {
+ if (perf->platform_supported)
+ intel_logw("Performance support disabled, "
+ "consider sysctl dev.i915.perf_stream_paranoid=0\n");
+ goto err;
+ }
/* We need DRM_I915_PERF_PROP_HOLD_PREEMPTION support, only available in
* perf revision 2.
properties[p++] = DRM_I915_PERF_PROP_HOLD_PREEMPTION;
properties[p++] = true;
+ /* If global SSEU is available, pin it to the default. This will ensure on
+ * Gen11 for instance we use the full EU array. Initially when perf was
+ * enabled we would use only half on Gen11 because of functional
+ * requirements.
+ */
+ if (device->physical->perf->i915_perf_version >= 4) {
+ properties[p++] = DRM_I915_PERF_PROP_GLOBAL_SSEU;
+ properties[p++] = (uintptr_t) &device->physical->perf->sseu;
+ }
+
memset(¶m, 0, sizeof(param));
param.flags = 0;
param.flags |= I915_PERF_FLAG_FD_CLOEXEC | I915_PERF_FLAG_FD_NONBLOCK;
return stream_fd;
}
+/* VK_INTEL_performance_query */
VkResult anv_InitializePerformanceApiINTEL(
VkDevice _device,
const VkInitializePerformanceApiInfoINTEL* pInitializeInfo)
VkPerformanceConfigurationINTEL* pConfiguration)
{
ANV_FROM_HANDLE(anv_device, device, _device);
-
- struct gen_perf_registers *perf_config =
- gen_perf_load_configuration(device->physical->perf, device->fd,
- GEN_PERF_QUERY_GUID_MDAPI);
- if (!perf_config)
- return VK_INCOMPLETE;
-
- int ret = gen_perf_store_configuration(device->physical->perf, device->fd,
- perf_config, NULL /* guid */);
- if (ret < 0) {
- ralloc_free(perf_config);
- return VK_INCOMPLETE;
+ int ret = -1;
+
+ if (likely(!(INTEL_DEBUG & DEBUG_NO_OACONFIG))) {
+ struct gen_perf_registers *perf_config =
+ gen_perf_load_configuration(device->physical->perf, device->fd,
+ GEN_PERF_QUERY_GUID_MDAPI);
+ if (!perf_config)
+ return VK_INCOMPLETE;
+
+ ret = gen_perf_store_configuration(device->physical->perf, device->fd,
+ perf_config, NULL /* guid */);
+ if (ret < 0) {
+ ralloc_free(perf_config);
+ return VK_INCOMPLETE;
+ }
}
*pConfiguration = (VkPerformanceConfigurationINTEL) (uint64_t) ret;
ANV_FROM_HANDLE(anv_device, device, _device);
uint64_t config = (uint64_t) _configuration;
- gen_ioctl(device->fd, DRM_IOCTL_I915_PERF_REMOVE_CONFIG, &config);
+ if (likely(!(INTEL_DEBUG & DEBUG_NO_OACONFIG)))
+ gen_ioctl(device->fd, DRM_IOCTL_I915_PERF_REMOVE_CONFIG, &config);
return VK_SUCCESS;
}
struct anv_device *device = queue->device;
uint64_t configuration = (uint64_t) _configuration;
- if (device->perf_fd < 0) {
- device->perf_fd = anv_device_perf_open(device, configuration);
- if (device->perf_fd < 0)
- return VK_ERROR_INITIALIZATION_FAILED;
- } else {
- int ret = gen_ioctl(device->perf_fd, I915_PERF_IOCTL_CONFIG,
+ if (likely(!(INTEL_DEBUG & DEBUG_NO_OACONFIG))) {
+ if (device->perf_fd < 0) {
+ device->perf_fd = anv_device_perf_open(device, configuration);
+ if (device->perf_fd < 0)
+ return VK_ERROR_INITIALIZATION_FAILED;
+ } else {
+ int ret = gen_ioctl(device->perf_fd, I915_PERF_IOCTL_CONFIG,
(void *)(uintptr_t) _configuration);
- if (ret < 0)
- return anv_device_set_lost(device, "i915-perf config failed: %m");
+ if (ret < 0)
+ return anv_device_set_lost(device, "i915-perf config failed: %m");
+ }
}
return VK_SUCCESS;
device->perf_fd = -1;
}
}
+
+/* VK_KHR_performance_query */
+static const VkPerformanceCounterUnitKHR
+gen_perf_counter_unit_to_vk_unit[] = {
+ [GEN_PERF_COUNTER_UNITS_BYTES] = VK_PERFORMANCE_COUNTER_UNIT_BYTES_KHR,
+ [GEN_PERF_COUNTER_UNITS_HZ] = VK_PERFORMANCE_COUNTER_UNIT_HERTZ_KHR,
+ [GEN_PERF_COUNTER_UNITS_NS] = VK_PERFORMANCE_COUNTER_UNIT_NANOSECONDS_KHR,
+ [GEN_PERF_COUNTER_UNITS_US] = VK_PERFORMANCE_COUNTER_UNIT_NANOSECONDS_KHR, /* todo */
+ [GEN_PERF_COUNTER_UNITS_PIXELS] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
+ [GEN_PERF_COUNTER_UNITS_TEXELS] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
+ [GEN_PERF_COUNTER_UNITS_THREADS] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
+ [GEN_PERF_COUNTER_UNITS_PERCENT] = VK_PERFORMANCE_COUNTER_UNIT_PERCENTAGE_KHR,
+ [GEN_PERF_COUNTER_UNITS_MESSAGES] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
+ [GEN_PERF_COUNTER_UNITS_NUMBER] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
+ [GEN_PERF_COUNTER_UNITS_CYCLES] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
+ [GEN_PERF_COUNTER_UNITS_EVENTS] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
+ [GEN_PERF_COUNTER_UNITS_UTILIZATION] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
+ [GEN_PERF_COUNTER_UNITS_EU_SENDS_TO_L3_CACHE_LINES] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
+ [GEN_PERF_COUNTER_UNITS_EU_ATOMIC_REQUESTS_TO_L3_CACHE_LINES] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
+ [GEN_PERF_COUNTER_UNITS_EU_REQUESTS_TO_L3_CACHE_LINES] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
+ [GEN_PERF_COUNTER_UNITS_EU_BYTES_PER_L3_CACHE_LINE] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
+};
+
+static const VkPerformanceCounterStorageKHR
+gen_perf_counter_data_type_to_vk_storage[] = {
+ [GEN_PERF_COUNTER_DATA_TYPE_BOOL32] = VK_PERFORMANCE_COUNTER_STORAGE_UINT32_KHR,
+ [GEN_PERF_COUNTER_DATA_TYPE_UINT32] = VK_PERFORMANCE_COUNTER_STORAGE_UINT32_KHR,
+ [GEN_PERF_COUNTER_DATA_TYPE_UINT64] = VK_PERFORMANCE_COUNTER_STORAGE_UINT64_KHR,
+ [GEN_PERF_COUNTER_DATA_TYPE_FLOAT] = VK_PERFORMANCE_COUNTER_STORAGE_FLOAT32_KHR,
+ [GEN_PERF_COUNTER_DATA_TYPE_DOUBLE] = VK_PERFORMANCE_COUNTER_STORAGE_FLOAT64_KHR,
+};
+
+VkResult anv_EnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(
+ VkPhysicalDevice physicalDevice,
+ uint32_t queueFamilyIndex,
+ uint32_t* pCounterCount,
+ VkPerformanceCounterKHR* pCounters,
+ VkPerformanceCounterDescriptionKHR* pCounterDescriptions)
+{
+ ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
+ struct gen_perf_config *perf = pdevice->perf;
+
+ uint32_t desc_count = *pCounterCount;
+
+ VK_OUTARRAY_MAKE(out, pCounters, pCounterCount);
+ VK_OUTARRAY_MAKE(out_desc, pCounterDescriptions, &desc_count);
+
+ for (int c = 0; c < (perf ? perf->n_counters : 0); c++) {
+ const struct gen_perf_query_counter *gen_counter = perf->counters[c];
+
+ vk_outarray_append(&out, counter) {
+ counter->unit = gen_perf_counter_unit_to_vk_unit[gen_counter->units];
+ counter->scope = VK_QUERY_SCOPE_COMMAND_KHR;
+ counter->storage = gen_perf_counter_data_type_to_vk_storage[gen_counter->data_type];
+
+ unsigned char sha1_result[20];
+ _mesa_sha1_compute(gen_counter->symbol_name,
+ strlen(gen_counter->symbol_name),
+ sha1_result);
+ memcpy(counter->uuid, sha1_result, sizeof(counter->uuid));
+ }
+
+ vk_outarray_append(&out_desc, desc) {
+ desc->flags = 0; /* None so far. */
+ snprintf(desc->name, sizeof(desc->name), "%s", gen_counter->name);
+ snprintf(desc->category, sizeof(desc->category), "%s", gen_counter->category);
+ snprintf(desc->description, sizeof(desc->description), "%s", gen_counter->desc);
+ }
+ }
+
+ return vk_outarray_status(&out);
+}
+
+void anv_GetPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR(
+ VkPhysicalDevice physicalDevice,
+ const VkQueryPoolPerformanceCreateInfoKHR* pPerformanceQueryCreateInfo,
+ uint32_t* pNumPasses)
+{
+ ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
+ struct gen_perf_config *perf = pdevice->perf;
+
+ if (!perf) {
+ *pNumPasses = 0;
+ return;
+ }
+
+ *pNumPasses = gen_perf_get_n_passes(perf,
+ pPerformanceQueryCreateInfo->pCounterIndices,
+ pPerformanceQueryCreateInfo->counterIndexCount,
+ NULL);
+}
+
+VkResult anv_AcquireProfilingLockKHR(
+ VkDevice _device,
+ const VkAcquireProfilingLockInfoKHR* pInfo)
+{
+ ANV_FROM_HANDLE(anv_device, device, _device);
+ struct gen_perf_config *perf = device->physical->perf;
+ struct gen_perf_query_info *first_metric_set = &perf->queries[0];
+ int fd = -1;
+
+ assert(device->perf_fd == -1);
+
+ if (likely(!(INTEL_DEBUG & DEBUG_NO_OACONFIG))) {
+ fd = anv_device_perf_open(device, first_metric_set->oa_metrics_set_id);
+ if (fd < 0)
+ return VK_TIMEOUT;
+ }
+
+ device->perf_fd = fd;
+ return VK_SUCCESS;
+}
+
+void anv_ReleaseProfilingLockKHR(
+ VkDevice _device)
+{
+ ANV_FROM_HANDLE(anv_device, device, _device);
+
+ if (likely(!(INTEL_DEBUG & DEBUG_NO_OACONFIG))) {
+ assert(device->perf_fd >= 0);
+ close(device->perf_fd);
+ }
+ device->perf_fd = -1;
+}
+
+void
+anv_perf_write_pass_results(struct gen_perf_config *perf,
+ struct anv_query_pool *pool, uint32_t pass,
+ const struct gen_perf_query_result *accumulated_results,
+ union VkPerformanceCounterResultKHR *results)
+{
+ for (uint32_t c = 0; c < pool->n_counters; c++) {
+ const struct gen_perf_counter_pass *counter_pass = &pool->counter_pass[c];
+
+ if (counter_pass->pass != pass)
+ continue;
+
+ switch (pool->pass_query[pass]->kind) {
+ case GEN_PERF_QUERY_TYPE_PIPELINE: {
+ assert(counter_pass->counter->data_type == GEN_PERF_COUNTER_DATA_TYPE_UINT64);
+ uint32_t accu_offset = counter_pass->counter->offset / sizeof(uint64_t);
+ results[c].uint64 = accumulated_results->accumulator[accu_offset];
+ break;
+ }
+
+ case GEN_PERF_QUERY_TYPE_OA:
+ case GEN_PERF_QUERY_TYPE_RAW:
+ switch (counter_pass->counter->data_type) {
+ case GEN_PERF_COUNTER_DATA_TYPE_UINT64:
+ results[c].uint64 =
+ counter_pass->counter->oa_counter_read_uint64(perf,
+ counter_pass->query,
+ accumulated_results->accumulator);
+ break;
+ case GEN_PERF_COUNTER_DATA_TYPE_FLOAT:
+ results[c].float32 =
+ counter_pass->counter->oa_counter_read_float(perf,
+ counter_pass->query,
+ accumulated_results->accumulator);
+ break;
+ default:
+ /* So far we aren't using uint32, double or bool32... */
+ unreachable("unexpected counter data type");
+ }
+ break;
+
+ default:
+ unreachable("invalid query type");
+ }
+
+ /* The Vulkan extension only has nanoseconds as a unit */
+ if (counter_pass->counter->units == GEN_PERF_COUNTER_UNITS_US) {
+ assert(counter_pass->counter->data_type == GEN_PERF_COUNTER_DATA_TYPE_UINT64);
+ results[c].uint64 *= 1000;
+ }
+ }
+}