2 * Copyright © 2018 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
28 #include "anv_private.h"
31 #include "perf/gen_perf.h"
32 #include "perf/gen_perf_mdapi.h"
34 #include "util/mesa-sha1.h"
36 struct gen_perf_config
*
37 anv_get_perf(const struct gen_device_info
*devinfo
, int fd
)
39 /* We need self modifying batches. The i915 parser prevents it on
40 * Gen7.5 :( maybe one day.
45 struct gen_perf_config
*perf
= gen_perf_new(NULL
);
47 gen_perf_init_metrics(perf
, devinfo
, fd
, false /* pipeline statistics */);
49 if (!perf
->n_queries
) {
50 if (perf
->platform_supported
)
51 intel_logw("Performance support disabled, "
52 "consider sysctl dev.i915.perf_stream_paranoid=0\n");
56 /* We need DRM_I915_PERF_PROP_HOLD_PREEMPTION support, only available in
59 if (perf
->i915_perf_version
< 3)
70 anv_device_perf_init(struct anv_device
*device
)
76 anv_device_perf_open(struct anv_device
*device
, uint64_t metric_id
)
78 uint64_t properties
[DRM_I915_PERF_PROP_MAX
* 2];
79 struct drm_i915_perf_open_param param
;
82 properties
[p
++] = DRM_I915_PERF_PROP_SAMPLE_OA
;
83 properties
[p
++] = true;
85 properties
[p
++] = DRM_I915_PERF_PROP_OA_METRICS_SET
;
86 properties
[p
++] = metric_id
;
88 properties
[p
++] = DRM_I915_PERF_PROP_OA_FORMAT
;
89 properties
[p
++] = device
->info
.gen
>= 8 ?
90 I915_OA_FORMAT_A32u40_A4u32_B8_C8
:
91 I915_OA_FORMAT_A45_B8_C8
;
93 properties
[p
++] = DRM_I915_PERF_PROP_OA_EXPONENT
;
94 properties
[p
++] = 31; /* slowest sampling period */
96 properties
[p
++] = DRM_I915_PERF_PROP_CTX_HANDLE
;
97 properties
[p
++] = device
->context_id
;
99 properties
[p
++] = DRM_I915_PERF_PROP_HOLD_PREEMPTION
;
100 properties
[p
++] = true;
102 /* If global SSEU is available, pin it to the default. This will ensure on
103 * Gen11 for instance we use the full EU array. Initially when perf was
104 * enabled we would use only half on Gen11 because of functional
107 if (device
->physical
->perf
->i915_perf_version
>= 4) {
108 properties
[p
++] = DRM_I915_PERF_PROP_GLOBAL_SSEU
;
109 properties
[p
++] = (uintptr_t) &device
->physical
->perf
->sseu
;
112 memset(¶m
, 0, sizeof(param
));
114 param
.flags
|= I915_PERF_FLAG_FD_CLOEXEC
| I915_PERF_FLAG_FD_NONBLOCK
;
115 param
.properties_ptr
= (uintptr_t)properties
;
116 param
.num_properties
= p
/ 2;
118 stream_fd
= gen_ioctl(device
->fd
, DRM_IOCTL_I915_PERF_OPEN
, ¶m
);
122 /* VK_INTEL_performance_query */
123 VkResult
anv_InitializePerformanceApiINTEL(
125 const VkInitializePerformanceApiInfoINTEL
* pInitializeInfo
)
127 ANV_FROM_HANDLE(anv_device
, device
, _device
);
129 if (!device
->physical
->perf
)
130 return VK_ERROR_EXTENSION_NOT_PRESENT
;
132 /* Not much to do here */
136 VkResult
anv_GetPerformanceParameterINTEL(
138 VkPerformanceParameterTypeINTEL parameter
,
139 VkPerformanceValueINTEL
* pValue
)
141 ANV_FROM_HANDLE(anv_device
, device
, _device
);
143 if (!device
->physical
->perf
)
144 return VK_ERROR_EXTENSION_NOT_PRESENT
;
146 VkResult result
= VK_SUCCESS
;
148 case VK_PERFORMANCE_PARAMETER_TYPE_HW_COUNTERS_SUPPORTED_INTEL
:
149 pValue
->type
= VK_PERFORMANCE_VALUE_TYPE_BOOL_INTEL
;
150 pValue
->data
.valueBool
= VK_TRUE
;
153 case VK_PERFORMANCE_PARAMETER_TYPE_STREAM_MARKER_VALID_BITS_INTEL
:
154 pValue
->type
= VK_PERFORMANCE_VALUE_TYPE_UINT32_INTEL
;
155 pValue
->data
.value32
= 25;
159 result
= VK_ERROR_FEATURE_NOT_PRESENT
;
166 VkResult
anv_CmdSetPerformanceMarkerINTEL(
167 VkCommandBuffer commandBuffer
,
168 const VkPerformanceMarkerInfoINTEL
* pMarkerInfo
)
170 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, commandBuffer
);
172 cmd_buffer
->intel_perf_marker
= pMarkerInfo
->marker
;
177 VkResult
anv_AcquirePerformanceConfigurationINTEL(
179 const VkPerformanceConfigurationAcquireInfoINTEL
* pAcquireInfo
,
180 VkPerformanceConfigurationINTEL
* pConfiguration
)
182 ANV_FROM_HANDLE(anv_device
, device
, _device
);
185 if (likely(!(INTEL_DEBUG
& DEBUG_NO_OACONFIG
))) {
186 struct gen_perf_registers
*perf_config
=
187 gen_perf_load_configuration(device
->physical
->perf
, device
->fd
,
188 GEN_PERF_QUERY_GUID_MDAPI
);
190 return VK_INCOMPLETE
;
192 ret
= gen_perf_store_configuration(device
->physical
->perf
, device
->fd
,
193 perf_config
, NULL
/* guid */);
195 ralloc_free(perf_config
);
196 return VK_INCOMPLETE
;
200 *pConfiguration
= (VkPerformanceConfigurationINTEL
) (uint64_t) ret
;
205 VkResult
anv_ReleasePerformanceConfigurationINTEL(
207 VkPerformanceConfigurationINTEL _configuration
)
209 ANV_FROM_HANDLE(anv_device
, device
, _device
);
210 uint64_t config
= (uint64_t) _configuration
;
212 if (likely(!(INTEL_DEBUG
& DEBUG_NO_OACONFIG
)))
213 gen_ioctl(device
->fd
, DRM_IOCTL_I915_PERF_REMOVE_CONFIG
, &config
);
218 VkResult
anv_QueueSetPerformanceConfigurationINTEL(
220 VkPerformanceConfigurationINTEL _configuration
)
222 ANV_FROM_HANDLE(anv_queue
, queue
, _queue
);
223 struct anv_device
*device
= queue
->device
;
224 uint64_t configuration
= (uint64_t) _configuration
;
226 if (likely(!(INTEL_DEBUG
& DEBUG_NO_OACONFIG
))) {
227 if (device
->perf_fd
< 0) {
228 device
->perf_fd
= anv_device_perf_open(device
, configuration
);
229 if (device
->perf_fd
< 0)
230 return VK_ERROR_INITIALIZATION_FAILED
;
232 int ret
= gen_ioctl(device
->perf_fd
, I915_PERF_IOCTL_CONFIG
,
233 (void *)(uintptr_t) _configuration
);
235 return anv_device_set_lost(device
, "i915-perf config failed: %m");
242 void anv_UninitializePerformanceApiINTEL(
245 ANV_FROM_HANDLE(anv_device
, device
, _device
);
247 if (device
->perf_fd
>= 0) {
248 close(device
->perf_fd
);
249 device
->perf_fd
= -1;
253 /* VK_KHR_performance_query */
254 static const VkPerformanceCounterUnitKHR
255 gen_perf_counter_unit_to_vk_unit
[] = {
256 [GEN_PERF_COUNTER_UNITS_BYTES
] = VK_PERFORMANCE_COUNTER_UNIT_BYTES_KHR
,
257 [GEN_PERF_COUNTER_UNITS_HZ
] = VK_PERFORMANCE_COUNTER_UNIT_HERTZ_KHR
,
258 [GEN_PERF_COUNTER_UNITS_NS
] = VK_PERFORMANCE_COUNTER_UNIT_NANOSECONDS_KHR
,
259 [GEN_PERF_COUNTER_UNITS_US
] = VK_PERFORMANCE_COUNTER_UNIT_NANOSECONDS_KHR
, /* todo */
260 [GEN_PERF_COUNTER_UNITS_PIXELS
] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR
,
261 [GEN_PERF_COUNTER_UNITS_TEXELS
] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR
,
262 [GEN_PERF_COUNTER_UNITS_THREADS
] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR
,
263 [GEN_PERF_COUNTER_UNITS_PERCENT
] = VK_PERFORMANCE_COUNTER_UNIT_PERCENTAGE_KHR
,
264 [GEN_PERF_COUNTER_UNITS_MESSAGES
] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR
,
265 [GEN_PERF_COUNTER_UNITS_NUMBER
] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR
,
266 [GEN_PERF_COUNTER_UNITS_CYCLES
] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR
,
267 [GEN_PERF_COUNTER_UNITS_EVENTS
] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR
,
268 [GEN_PERF_COUNTER_UNITS_UTILIZATION
] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR
,
269 [GEN_PERF_COUNTER_UNITS_EU_SENDS_TO_L3_CACHE_LINES
] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR
,
270 [GEN_PERF_COUNTER_UNITS_EU_ATOMIC_REQUESTS_TO_L3_CACHE_LINES
] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR
,
271 [GEN_PERF_COUNTER_UNITS_EU_REQUESTS_TO_L3_CACHE_LINES
] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR
,
272 [GEN_PERF_COUNTER_UNITS_EU_BYTES_PER_L3_CACHE_LINE
] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR
,
275 static const VkPerformanceCounterStorageKHR
276 gen_perf_counter_data_type_to_vk_storage
[] = {
277 [GEN_PERF_COUNTER_DATA_TYPE_BOOL32
] = VK_PERFORMANCE_COUNTER_STORAGE_UINT32_KHR
,
278 [GEN_PERF_COUNTER_DATA_TYPE_UINT32
] = VK_PERFORMANCE_COUNTER_STORAGE_UINT32_KHR
,
279 [GEN_PERF_COUNTER_DATA_TYPE_UINT64
] = VK_PERFORMANCE_COUNTER_STORAGE_UINT64_KHR
,
280 [GEN_PERF_COUNTER_DATA_TYPE_FLOAT
] = VK_PERFORMANCE_COUNTER_STORAGE_FLOAT32_KHR
,
281 [GEN_PERF_COUNTER_DATA_TYPE_DOUBLE
] = VK_PERFORMANCE_COUNTER_STORAGE_FLOAT64_KHR
,
284 VkResult
anv_EnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(
285 VkPhysicalDevice physicalDevice
,
286 uint32_t queueFamilyIndex
,
287 uint32_t* pCounterCount
,
288 VkPerformanceCounterKHR
* pCounters
,
289 VkPerformanceCounterDescriptionKHR
* pCounterDescriptions
)
291 ANV_FROM_HANDLE(anv_physical_device
, pdevice
, physicalDevice
);
292 struct gen_perf_config
*perf
= pdevice
->perf
;
294 uint32_t desc_count
= *pCounterCount
;
296 VK_OUTARRAY_MAKE(out
, pCounters
, pCounterCount
);
297 VK_OUTARRAY_MAKE(out_desc
, pCounterDescriptions
, &desc_count
);
299 for (int c
= 0; c
< (perf
? perf
->n_counters
: 0); c
++) {
300 const struct gen_perf_query_counter
*gen_counter
= perf
->counter_infos
[c
].counter
;
302 vk_outarray_append(&out
, counter
) {
303 counter
->unit
= gen_perf_counter_unit_to_vk_unit
[gen_counter
->units
];
304 counter
->scope
= VK_QUERY_SCOPE_COMMAND_KHR
;
305 counter
->storage
= gen_perf_counter_data_type_to_vk_storage
[gen_counter
->data_type
];
307 unsigned char sha1_result
[20];
308 _mesa_sha1_compute(gen_counter
->symbol_name
,
309 strlen(gen_counter
->symbol_name
),
311 memcpy(counter
->uuid
, sha1_result
, sizeof(counter
->uuid
));
314 vk_outarray_append(&out_desc
, desc
) {
315 desc
->flags
= 0; /* None so far. */
316 snprintf(desc
->name
, sizeof(desc
->name
), "%s", gen_counter
->name
);
317 snprintf(desc
->category
, sizeof(desc
->category
), "%s", gen_counter
->category
);
318 snprintf(desc
->description
, sizeof(desc
->description
), "%s", gen_counter
->desc
);
322 return vk_outarray_status(&out
);
325 void anv_GetPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR(
326 VkPhysicalDevice physicalDevice
,
327 const VkQueryPoolPerformanceCreateInfoKHR
* pPerformanceQueryCreateInfo
,
328 uint32_t* pNumPasses
)
330 ANV_FROM_HANDLE(anv_physical_device
, pdevice
, physicalDevice
);
331 struct gen_perf_config
*perf
= pdevice
->perf
;
338 *pNumPasses
= gen_perf_get_n_passes(perf
,
339 pPerformanceQueryCreateInfo
->pCounterIndices
,
340 pPerformanceQueryCreateInfo
->counterIndexCount
,
344 VkResult
anv_AcquireProfilingLockKHR(
346 const VkAcquireProfilingLockInfoKHR
* pInfo
)
348 ANV_FROM_HANDLE(anv_device
, device
, _device
);
349 struct gen_perf_config
*perf
= device
->physical
->perf
;
350 struct gen_perf_query_info
*first_metric_set
= &perf
->queries
[0];
353 assert(device
->perf_fd
== -1);
355 if (likely(!(INTEL_DEBUG
& DEBUG_NO_OACONFIG
))) {
356 fd
= anv_device_perf_open(device
, first_metric_set
->oa_metrics_set_id
);
361 device
->perf_fd
= fd
;
365 void anv_ReleaseProfilingLockKHR(
368 ANV_FROM_HANDLE(anv_device
, device
, _device
);
370 if (likely(!(INTEL_DEBUG
& DEBUG_NO_OACONFIG
))) {
371 assert(device
->perf_fd
>= 0);
372 close(device
->perf_fd
);
374 device
->perf_fd
= -1;
378 anv_perf_write_pass_results(struct gen_perf_config
*perf
,
379 struct anv_query_pool
*pool
, uint32_t pass
,
380 const struct gen_perf_query_result
*accumulated_results
,
381 union VkPerformanceCounterResultKHR
*results
)
383 for (uint32_t c
= 0; c
< pool
->n_counters
; c
++) {
384 const struct gen_perf_counter_pass
*counter_pass
= &pool
->counter_pass
[c
];
386 if (counter_pass
->pass
!= pass
)
389 switch (pool
->pass_query
[pass
]->kind
) {
390 case GEN_PERF_QUERY_TYPE_PIPELINE
: {
391 assert(counter_pass
->counter
->data_type
== GEN_PERF_COUNTER_DATA_TYPE_UINT64
);
392 uint32_t accu_offset
= counter_pass
->counter
->offset
/ sizeof(uint64_t);
393 results
[c
].uint64
= accumulated_results
->accumulator
[accu_offset
];
397 case GEN_PERF_QUERY_TYPE_OA
:
398 case GEN_PERF_QUERY_TYPE_RAW
:
399 switch (counter_pass
->counter
->data_type
) {
400 case GEN_PERF_COUNTER_DATA_TYPE_UINT64
:
402 counter_pass
->counter
->oa_counter_read_uint64(perf
,
404 accumulated_results
->accumulator
);
406 case GEN_PERF_COUNTER_DATA_TYPE_FLOAT
:
408 counter_pass
->counter
->oa_counter_read_float(perf
,
410 accumulated_results
->accumulator
);
413 /* So far we aren't using uint32, double or bool32... */
414 unreachable("unexpected counter data type");
419 unreachable("invalid query type");
422 /* The Vulkan extension only has nanoseconds as a unit */
423 if (counter_pass
->counter
->units
== GEN_PERF_COUNTER_UNITS_US
) {
424 assert(counter_pass
->counter
->data_type
== GEN_PERF_COUNTER_DATA_TYPE_UINT64
);
425 results
[c
].uint64
*= 1000;