2 * Copyright © 2018 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
28 #include "anv_private.h"
31 #include "perf/gen_perf.h"
32 #include "perf/gen_perf_mdapi.h"
34 #include "util/mesa-sha1.h"
36 struct gen_perf_config
*
37 anv_get_perf(const struct gen_device_info
*devinfo
, int fd
)
39 /* We need self modifying batches. The i915 parser prevents it on
40 * Gen7.5 :( maybe one day.
45 struct gen_perf_config
*perf
= gen_perf_new(NULL
);
47 gen_perf_init_metrics(perf
, devinfo
, fd
, false /* pipeline statistics */);
49 if (!perf
->n_queries
) {
50 if (perf
->platform_supported
)
51 intel_logw("Performance support disabled, "
52 "consider sysctl dev.i915.perf_stream_paranoid=0\n");
56 /* We need DRM_I915_PERF_PROP_HOLD_PREEMPTION support, only available in
59 if (perf
->i915_perf_version
< 3)
70 anv_device_perf_init(struct anv_device
*device
)
76 anv_device_perf_open(struct anv_device
*device
, uint64_t metric_id
)
78 uint64_t properties
[DRM_I915_PERF_PROP_MAX
* 2];
79 struct drm_i915_perf_open_param param
;
82 properties
[p
++] = DRM_I915_PERF_PROP_SAMPLE_OA
;
83 properties
[p
++] = true;
85 properties
[p
++] = DRM_I915_PERF_PROP_OA_METRICS_SET
;
86 properties
[p
++] = metric_id
;
88 properties
[p
++] = DRM_I915_PERF_PROP_OA_FORMAT
;
89 properties
[p
++] = device
->info
.gen
>= 8 ?
90 I915_OA_FORMAT_A32u40_A4u32_B8_C8
:
91 I915_OA_FORMAT_A45_B8_C8
;
93 properties
[p
++] = DRM_I915_PERF_PROP_OA_EXPONENT
;
94 properties
[p
++] = 31; /* slowest sampling period */
96 properties
[p
++] = DRM_I915_PERF_PROP_CTX_HANDLE
;
97 properties
[p
++] = device
->context_id
;
99 properties
[p
++] = DRM_I915_PERF_PROP_HOLD_PREEMPTION
;
100 properties
[p
++] = true;
102 /* If global SSEU is available, pin it to the default. This will ensure on
103 * Gen11 for instance we use the full EU array. Initially when perf was
104 * enabled we would use only half on Gen11 because of functional
107 if (device
->physical
->perf
->i915_perf_version
>= 4) {
108 properties
[p
++] = DRM_I915_PERF_PROP_GLOBAL_SSEU
;
109 properties
[p
++] = (uintptr_t) &device
->physical
->perf
->sseu
;
112 memset(¶m
, 0, sizeof(param
));
114 param
.flags
|= I915_PERF_FLAG_FD_CLOEXEC
| I915_PERF_FLAG_FD_NONBLOCK
;
115 param
.properties_ptr
= (uintptr_t)properties
;
116 param
.num_properties
= p
/ 2;
118 stream_fd
= gen_ioctl(device
->fd
, DRM_IOCTL_I915_PERF_OPEN
, ¶m
);
122 /* VK_INTEL_performance_query */
123 VkResult
anv_InitializePerformanceApiINTEL(
125 const VkInitializePerformanceApiInfoINTEL
* pInitializeInfo
)
127 ANV_FROM_HANDLE(anv_device
, device
, _device
);
129 if (!device
->physical
->perf
)
130 return VK_ERROR_EXTENSION_NOT_PRESENT
;
132 /* Not much to do here */
136 VkResult
anv_GetPerformanceParameterINTEL(
138 VkPerformanceParameterTypeINTEL parameter
,
139 VkPerformanceValueINTEL
* pValue
)
141 ANV_FROM_HANDLE(anv_device
, device
, _device
);
143 if (!device
->physical
->perf
)
144 return VK_ERROR_EXTENSION_NOT_PRESENT
;
146 VkResult result
= VK_SUCCESS
;
148 case VK_PERFORMANCE_PARAMETER_TYPE_HW_COUNTERS_SUPPORTED_INTEL
:
149 pValue
->type
= VK_PERFORMANCE_VALUE_TYPE_BOOL_INTEL
;
150 pValue
->data
.valueBool
= VK_TRUE
;
153 case VK_PERFORMANCE_PARAMETER_TYPE_STREAM_MARKER_VALID_BITS_INTEL
:
154 pValue
->type
= VK_PERFORMANCE_VALUE_TYPE_UINT32_INTEL
;
155 pValue
->data
.value32
= 25;
159 result
= VK_ERROR_FEATURE_NOT_PRESENT
;
166 VkResult
anv_CmdSetPerformanceMarkerINTEL(
167 VkCommandBuffer commandBuffer
,
168 const VkPerformanceMarkerInfoINTEL
* pMarkerInfo
)
170 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, commandBuffer
);
172 cmd_buffer
->intel_perf_marker
= pMarkerInfo
->marker
;
177 VkResult
anv_AcquirePerformanceConfigurationINTEL(
179 const VkPerformanceConfigurationAcquireInfoINTEL
* pAcquireInfo
,
180 VkPerformanceConfigurationINTEL
* pConfiguration
)
182 ANV_FROM_HANDLE(anv_device
, device
, _device
);
183 struct anv_performance_configuration_intel
*config
;
185 config
= vk_alloc(&device
->vk
.alloc
, sizeof(*config
), 8,
186 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE
);
188 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY
);
190 if (likely(!(INTEL_DEBUG
& DEBUG_NO_OACONFIG
))) {
191 config
->register_config
=
192 gen_perf_load_configuration(device
->physical
->perf
, device
->fd
,
193 GEN_PERF_QUERY_GUID_MDAPI
);
194 if (!config
->register_config
) {
195 vk_free(&device
->vk
.alloc
, config
);
196 return VK_INCOMPLETE
;
200 gen_perf_store_configuration(device
->physical
->perf
, device
->fd
,
201 config
->register_config
, NULL
/* guid */);
203 ralloc_free(config
->register_config
);
204 vk_free(&device
->vk
.alloc
, config
);
205 return VK_INCOMPLETE
;
208 config
->config_id
= ret
;
211 vk_object_base_init(&device
->vk
, &config
->base
,
212 VK_OBJECT_TYPE_PERFORMANCE_CONFIGURATION_INTEL
);
214 *pConfiguration
= anv_performance_configuration_intel_to_handle(config
);
219 VkResult
anv_ReleasePerformanceConfigurationINTEL(
221 VkPerformanceConfigurationINTEL _configuration
)
223 ANV_FROM_HANDLE(anv_device
, device
, _device
);
224 ANV_FROM_HANDLE(anv_performance_configuration_intel
, config
, _configuration
);
226 if (likely(!(INTEL_DEBUG
& DEBUG_NO_OACONFIG
)))
227 gen_ioctl(device
->fd
, DRM_IOCTL_I915_PERF_REMOVE_CONFIG
, &config
->config_id
);
229 ralloc_free(config
->register_config
);
230 vk_object_base_finish(&config
->base
);
231 vk_free(&device
->vk
.alloc
, config
);
236 VkResult
anv_QueueSetPerformanceConfigurationINTEL(
238 VkPerformanceConfigurationINTEL _configuration
)
240 ANV_FROM_HANDLE(anv_queue
, queue
, _queue
);
241 ANV_FROM_HANDLE(anv_performance_configuration_intel
, config
, _configuration
);
242 struct anv_device
*device
= queue
->device
;
244 if (likely(!(INTEL_DEBUG
& DEBUG_NO_OACONFIG
))) {
245 if (device
->perf_fd
< 0) {
246 device
->perf_fd
= anv_device_perf_open(device
, config
->config_id
);
247 if (device
->perf_fd
< 0)
248 return VK_ERROR_INITIALIZATION_FAILED
;
250 int ret
= gen_ioctl(device
->perf_fd
, I915_PERF_IOCTL_CONFIG
,
251 (void *)(uintptr_t) config
->config_id
);
253 return anv_device_set_lost(device
, "i915-perf config failed: %m");
260 void anv_UninitializePerformanceApiINTEL(
263 ANV_FROM_HANDLE(anv_device
, device
, _device
);
265 if (device
->perf_fd
>= 0) {
266 close(device
->perf_fd
);
267 device
->perf_fd
= -1;
271 /* VK_KHR_performance_query */
272 static const VkPerformanceCounterUnitKHR
273 gen_perf_counter_unit_to_vk_unit
[] = {
274 [GEN_PERF_COUNTER_UNITS_BYTES
] = VK_PERFORMANCE_COUNTER_UNIT_BYTES_KHR
,
275 [GEN_PERF_COUNTER_UNITS_HZ
] = VK_PERFORMANCE_COUNTER_UNIT_HERTZ_KHR
,
276 [GEN_PERF_COUNTER_UNITS_NS
] = VK_PERFORMANCE_COUNTER_UNIT_NANOSECONDS_KHR
,
277 [GEN_PERF_COUNTER_UNITS_US
] = VK_PERFORMANCE_COUNTER_UNIT_NANOSECONDS_KHR
, /* todo */
278 [GEN_PERF_COUNTER_UNITS_PIXELS
] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR
,
279 [GEN_PERF_COUNTER_UNITS_TEXELS
] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR
,
280 [GEN_PERF_COUNTER_UNITS_THREADS
] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR
,
281 [GEN_PERF_COUNTER_UNITS_PERCENT
] = VK_PERFORMANCE_COUNTER_UNIT_PERCENTAGE_KHR
,
282 [GEN_PERF_COUNTER_UNITS_MESSAGES
] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR
,
283 [GEN_PERF_COUNTER_UNITS_NUMBER
] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR
,
284 [GEN_PERF_COUNTER_UNITS_CYCLES
] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR
,
285 [GEN_PERF_COUNTER_UNITS_EVENTS
] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR
,
286 [GEN_PERF_COUNTER_UNITS_UTILIZATION
] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR
,
287 [GEN_PERF_COUNTER_UNITS_EU_SENDS_TO_L3_CACHE_LINES
] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR
,
288 [GEN_PERF_COUNTER_UNITS_EU_ATOMIC_REQUESTS_TO_L3_CACHE_LINES
] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR
,
289 [GEN_PERF_COUNTER_UNITS_EU_REQUESTS_TO_L3_CACHE_LINES
] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR
,
290 [GEN_PERF_COUNTER_UNITS_EU_BYTES_PER_L3_CACHE_LINE
] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR
,
293 static const VkPerformanceCounterStorageKHR
294 gen_perf_counter_data_type_to_vk_storage
[] = {
295 [GEN_PERF_COUNTER_DATA_TYPE_BOOL32
] = VK_PERFORMANCE_COUNTER_STORAGE_UINT32_KHR
,
296 [GEN_PERF_COUNTER_DATA_TYPE_UINT32
] = VK_PERFORMANCE_COUNTER_STORAGE_UINT32_KHR
,
297 [GEN_PERF_COUNTER_DATA_TYPE_UINT64
] = VK_PERFORMANCE_COUNTER_STORAGE_UINT64_KHR
,
298 [GEN_PERF_COUNTER_DATA_TYPE_FLOAT
] = VK_PERFORMANCE_COUNTER_STORAGE_FLOAT32_KHR
,
299 [GEN_PERF_COUNTER_DATA_TYPE_DOUBLE
] = VK_PERFORMANCE_COUNTER_STORAGE_FLOAT64_KHR
,
302 VkResult
anv_EnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(
303 VkPhysicalDevice physicalDevice
,
304 uint32_t queueFamilyIndex
,
305 uint32_t* pCounterCount
,
306 VkPerformanceCounterKHR
* pCounters
,
307 VkPerformanceCounterDescriptionKHR
* pCounterDescriptions
)
309 ANV_FROM_HANDLE(anv_physical_device
, pdevice
, physicalDevice
);
310 struct gen_perf_config
*perf
= pdevice
->perf
;
312 uint32_t desc_count
= *pCounterCount
;
314 VK_OUTARRAY_MAKE(out
, pCounters
, pCounterCount
);
315 VK_OUTARRAY_MAKE(out_desc
, pCounterDescriptions
, &desc_count
);
317 for (int c
= 0; c
< (perf
? perf
->n_counters
: 0); c
++) {
318 const struct gen_perf_query_counter
*gen_counter
= perf
->counter_infos
[c
].counter
;
320 vk_outarray_append(&out
, counter
) {
321 counter
->unit
= gen_perf_counter_unit_to_vk_unit
[gen_counter
->units
];
322 counter
->scope
= VK_QUERY_SCOPE_COMMAND_KHR
;
323 counter
->storage
= gen_perf_counter_data_type_to_vk_storage
[gen_counter
->data_type
];
325 unsigned char sha1_result
[20];
326 _mesa_sha1_compute(gen_counter
->symbol_name
,
327 strlen(gen_counter
->symbol_name
),
329 memcpy(counter
->uuid
, sha1_result
, sizeof(counter
->uuid
));
332 vk_outarray_append(&out_desc
, desc
) {
333 desc
->flags
= 0; /* None so far. */
334 snprintf(desc
->name
, sizeof(desc
->name
), "%s", gen_counter
->name
);
335 snprintf(desc
->category
, sizeof(desc
->category
), "%s", gen_counter
->category
);
336 snprintf(desc
->description
, sizeof(desc
->description
), "%s", gen_counter
->desc
);
340 return vk_outarray_status(&out
);
343 void anv_GetPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR(
344 VkPhysicalDevice physicalDevice
,
345 const VkQueryPoolPerformanceCreateInfoKHR
* pPerformanceQueryCreateInfo
,
346 uint32_t* pNumPasses
)
348 ANV_FROM_HANDLE(anv_physical_device
, pdevice
, physicalDevice
);
349 struct gen_perf_config
*perf
= pdevice
->perf
;
356 *pNumPasses
= gen_perf_get_n_passes(perf
,
357 pPerformanceQueryCreateInfo
->pCounterIndices
,
358 pPerformanceQueryCreateInfo
->counterIndexCount
,
362 VkResult
anv_AcquireProfilingLockKHR(
364 const VkAcquireProfilingLockInfoKHR
* pInfo
)
366 ANV_FROM_HANDLE(anv_device
, device
, _device
);
367 struct gen_perf_config
*perf
= device
->physical
->perf
;
368 struct gen_perf_query_info
*first_metric_set
= &perf
->queries
[0];
371 assert(device
->perf_fd
== -1);
373 if (likely(!(INTEL_DEBUG
& DEBUG_NO_OACONFIG
))) {
374 fd
= anv_device_perf_open(device
, first_metric_set
->oa_metrics_set_id
);
379 device
->perf_fd
= fd
;
383 void anv_ReleaseProfilingLockKHR(
386 ANV_FROM_HANDLE(anv_device
, device
, _device
);
388 if (likely(!(INTEL_DEBUG
& DEBUG_NO_OACONFIG
))) {
389 assert(device
->perf_fd
>= 0);
390 close(device
->perf_fd
);
392 device
->perf_fd
= -1;
396 anv_perf_write_pass_results(struct gen_perf_config
*perf
,
397 struct anv_query_pool
*pool
, uint32_t pass
,
398 const struct gen_perf_query_result
*accumulated_results
,
399 union VkPerformanceCounterResultKHR
*results
)
401 for (uint32_t c
= 0; c
< pool
->n_counters
; c
++) {
402 const struct gen_perf_counter_pass
*counter_pass
= &pool
->counter_pass
[c
];
404 if (counter_pass
->pass
!= pass
)
407 switch (pool
->pass_query
[pass
]->kind
) {
408 case GEN_PERF_QUERY_TYPE_PIPELINE
: {
409 assert(counter_pass
->counter
->data_type
== GEN_PERF_COUNTER_DATA_TYPE_UINT64
);
410 uint32_t accu_offset
= counter_pass
->counter
->offset
/ sizeof(uint64_t);
411 results
[c
].uint64
= accumulated_results
->accumulator
[accu_offset
];
415 case GEN_PERF_QUERY_TYPE_OA
:
416 case GEN_PERF_QUERY_TYPE_RAW
:
417 switch (counter_pass
->counter
->data_type
) {
418 case GEN_PERF_COUNTER_DATA_TYPE_UINT64
:
420 counter_pass
->counter
->oa_counter_read_uint64(perf
,
422 accumulated_results
->accumulator
);
424 case GEN_PERF_COUNTER_DATA_TYPE_FLOAT
:
426 counter_pass
->counter
->oa_counter_read_float(perf
,
428 accumulated_results
->accumulator
);
431 /* So far we aren't using uint32, double or bool32... */
432 unreachable("unexpected counter data type");
437 unreachable("invalid query type");
440 /* The Vulkan extension only has nanoseconds as a unit */
441 if (counter_pass
->counter
->units
== GEN_PERF_COUNTER_UNITS_US
) {
442 assert(counter_pass
->counter
->data_type
== GEN_PERF_COUNTER_DATA_TYPE_UINT64
);
443 results
[c
].uint64
*= 1000;