intel/perf: move query_mask and location out of gen_perf_query_counter
[mesa.git] / src / intel / vulkan / anv_perf.c
1 /*
2 * Copyright © 2018 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <stdint.h>
27
28 #include "anv_private.h"
29 #include "vk_util.h"
30
31 #include "perf/gen_perf.h"
32 #include "perf/gen_perf_mdapi.h"
33
34 #include "util/mesa-sha1.h"
35
36 struct gen_perf_config *
37 anv_get_perf(const struct gen_device_info *devinfo, int fd)
38 {
39 /* We need self modifying batches. The i915 parser prevents it on
40 * Gen7.5 :( maybe one day.
41 */
42 if (devinfo->gen < 8)
43 return NULL;
44
45 struct gen_perf_config *perf = gen_perf_new(NULL);
46
47 gen_perf_init_metrics(perf, devinfo, fd, false /* pipeline statistics */);
48
49 if (!perf->n_queries) {
50 if (perf->platform_supported)
51 intel_logw("Performance support disabled, "
52 "consider sysctl dev.i915.perf_stream_paranoid=0\n");
53 goto err;
54 }
55
56 /* We need DRM_I915_PERF_PROP_HOLD_PREEMPTION support, only available in
57 * perf revision 2.
58 */
59 if (perf->i915_perf_version < 3)
60 goto err;
61
62 return perf;
63
64 err:
65 ralloc_free(perf);
66 return NULL;
67 }
68
69 void
70 anv_device_perf_init(struct anv_device *device)
71 {
72 device->perf_fd = -1;
73 }
74
75 static int
76 anv_device_perf_open(struct anv_device *device, uint64_t metric_id)
77 {
78 uint64_t properties[DRM_I915_PERF_PROP_MAX * 2];
79 struct drm_i915_perf_open_param param;
80 int p = 0, stream_fd;
81
82 properties[p++] = DRM_I915_PERF_PROP_SAMPLE_OA;
83 properties[p++] = true;
84
85 properties[p++] = DRM_I915_PERF_PROP_OA_METRICS_SET;
86 properties[p++] = metric_id;
87
88 properties[p++] = DRM_I915_PERF_PROP_OA_FORMAT;
89 properties[p++] = device->info.gen >= 8 ?
90 I915_OA_FORMAT_A32u40_A4u32_B8_C8 :
91 I915_OA_FORMAT_A45_B8_C8;
92
93 properties[p++] = DRM_I915_PERF_PROP_OA_EXPONENT;
94 properties[p++] = 31; /* slowest sampling period */
95
96 properties[p++] = DRM_I915_PERF_PROP_CTX_HANDLE;
97 properties[p++] = device->context_id;
98
99 properties[p++] = DRM_I915_PERF_PROP_HOLD_PREEMPTION;
100 properties[p++] = true;
101
102 /* If global SSEU is available, pin it to the default. This will ensure on
103 * Gen11 for instance we use the full EU array. Initially when perf was
104 * enabled we would use only half on Gen11 because of functional
105 * requirements.
106 */
107 if (device->physical->perf->i915_perf_version >= 4) {
108 properties[p++] = DRM_I915_PERF_PROP_GLOBAL_SSEU;
109 properties[p++] = (uintptr_t) &device->physical->perf->sseu;
110 }
111
112 memset(&param, 0, sizeof(param));
113 param.flags = 0;
114 param.flags |= I915_PERF_FLAG_FD_CLOEXEC | I915_PERF_FLAG_FD_NONBLOCK;
115 param.properties_ptr = (uintptr_t)properties;
116 param.num_properties = p / 2;
117
118 stream_fd = gen_ioctl(device->fd, DRM_IOCTL_I915_PERF_OPEN, &param);
119 return stream_fd;
120 }
121
122 /* VK_INTEL_performance_query */
123 VkResult anv_InitializePerformanceApiINTEL(
124 VkDevice _device,
125 const VkInitializePerformanceApiInfoINTEL* pInitializeInfo)
126 {
127 ANV_FROM_HANDLE(anv_device, device, _device);
128
129 if (!device->physical->perf)
130 return VK_ERROR_EXTENSION_NOT_PRESENT;
131
132 /* Not much to do here */
133 return VK_SUCCESS;
134 }
135
136 VkResult anv_GetPerformanceParameterINTEL(
137 VkDevice _device,
138 VkPerformanceParameterTypeINTEL parameter,
139 VkPerformanceValueINTEL* pValue)
140 {
141 ANV_FROM_HANDLE(anv_device, device, _device);
142
143 if (!device->physical->perf)
144 return VK_ERROR_EXTENSION_NOT_PRESENT;
145
146 VkResult result = VK_SUCCESS;
147 switch (parameter) {
148 case VK_PERFORMANCE_PARAMETER_TYPE_HW_COUNTERS_SUPPORTED_INTEL:
149 pValue->type = VK_PERFORMANCE_VALUE_TYPE_BOOL_INTEL;
150 pValue->data.valueBool = VK_TRUE;
151 break;
152
153 case VK_PERFORMANCE_PARAMETER_TYPE_STREAM_MARKER_VALID_BITS_INTEL:
154 pValue->type = VK_PERFORMANCE_VALUE_TYPE_UINT32_INTEL;
155 pValue->data.value32 = 25;
156 break;
157
158 default:
159 result = VK_ERROR_FEATURE_NOT_PRESENT;
160 break;
161 }
162
163 return result;
164 }
165
166 VkResult anv_CmdSetPerformanceMarkerINTEL(
167 VkCommandBuffer commandBuffer,
168 const VkPerformanceMarkerInfoINTEL* pMarkerInfo)
169 {
170 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
171
172 cmd_buffer->intel_perf_marker = pMarkerInfo->marker;
173
174 return VK_SUCCESS;
175 }
176
177 VkResult anv_AcquirePerformanceConfigurationINTEL(
178 VkDevice _device,
179 const VkPerformanceConfigurationAcquireInfoINTEL* pAcquireInfo,
180 VkPerformanceConfigurationINTEL* pConfiguration)
181 {
182 ANV_FROM_HANDLE(anv_device, device, _device);
183 int ret = -1;
184
185 if (likely(!(INTEL_DEBUG & DEBUG_NO_OACONFIG))) {
186 struct gen_perf_registers *perf_config =
187 gen_perf_load_configuration(device->physical->perf, device->fd,
188 GEN_PERF_QUERY_GUID_MDAPI);
189 if (!perf_config)
190 return VK_INCOMPLETE;
191
192 ret = gen_perf_store_configuration(device->physical->perf, device->fd,
193 perf_config, NULL /* guid */);
194 if (ret < 0) {
195 ralloc_free(perf_config);
196 return VK_INCOMPLETE;
197 }
198 }
199
200 *pConfiguration = (VkPerformanceConfigurationINTEL) (uint64_t) ret;
201
202 return VK_SUCCESS;
203 }
204
205 VkResult anv_ReleasePerformanceConfigurationINTEL(
206 VkDevice _device,
207 VkPerformanceConfigurationINTEL _configuration)
208 {
209 ANV_FROM_HANDLE(anv_device, device, _device);
210 uint64_t config = (uint64_t) _configuration;
211
212 if (likely(!(INTEL_DEBUG & DEBUG_NO_OACONFIG)))
213 gen_ioctl(device->fd, DRM_IOCTL_I915_PERF_REMOVE_CONFIG, &config);
214
215 return VK_SUCCESS;
216 }
217
218 VkResult anv_QueueSetPerformanceConfigurationINTEL(
219 VkQueue _queue,
220 VkPerformanceConfigurationINTEL _configuration)
221 {
222 ANV_FROM_HANDLE(anv_queue, queue, _queue);
223 struct anv_device *device = queue->device;
224 uint64_t configuration = (uint64_t) _configuration;
225
226 if (likely(!(INTEL_DEBUG & DEBUG_NO_OACONFIG))) {
227 if (device->perf_fd < 0) {
228 device->perf_fd = anv_device_perf_open(device, configuration);
229 if (device->perf_fd < 0)
230 return VK_ERROR_INITIALIZATION_FAILED;
231 } else {
232 int ret = gen_ioctl(device->perf_fd, I915_PERF_IOCTL_CONFIG,
233 (void *)(uintptr_t) _configuration);
234 if (ret < 0)
235 return anv_device_set_lost(device, "i915-perf config failed: %m");
236 }
237 }
238
239 return VK_SUCCESS;
240 }
241
242 void anv_UninitializePerformanceApiINTEL(
243 VkDevice _device)
244 {
245 ANV_FROM_HANDLE(anv_device, device, _device);
246
247 if (device->perf_fd >= 0) {
248 close(device->perf_fd);
249 device->perf_fd = -1;
250 }
251 }
252
253 /* VK_KHR_performance_query */
254 static const VkPerformanceCounterUnitKHR
255 gen_perf_counter_unit_to_vk_unit[] = {
256 [GEN_PERF_COUNTER_UNITS_BYTES] = VK_PERFORMANCE_COUNTER_UNIT_BYTES_KHR,
257 [GEN_PERF_COUNTER_UNITS_HZ] = VK_PERFORMANCE_COUNTER_UNIT_HERTZ_KHR,
258 [GEN_PERF_COUNTER_UNITS_NS] = VK_PERFORMANCE_COUNTER_UNIT_NANOSECONDS_KHR,
259 [GEN_PERF_COUNTER_UNITS_US] = VK_PERFORMANCE_COUNTER_UNIT_NANOSECONDS_KHR, /* todo */
260 [GEN_PERF_COUNTER_UNITS_PIXELS] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
261 [GEN_PERF_COUNTER_UNITS_TEXELS] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
262 [GEN_PERF_COUNTER_UNITS_THREADS] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
263 [GEN_PERF_COUNTER_UNITS_PERCENT] = VK_PERFORMANCE_COUNTER_UNIT_PERCENTAGE_KHR,
264 [GEN_PERF_COUNTER_UNITS_MESSAGES] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
265 [GEN_PERF_COUNTER_UNITS_NUMBER] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
266 [GEN_PERF_COUNTER_UNITS_CYCLES] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
267 [GEN_PERF_COUNTER_UNITS_EVENTS] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
268 [GEN_PERF_COUNTER_UNITS_UTILIZATION] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
269 [GEN_PERF_COUNTER_UNITS_EU_SENDS_TO_L3_CACHE_LINES] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
270 [GEN_PERF_COUNTER_UNITS_EU_ATOMIC_REQUESTS_TO_L3_CACHE_LINES] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
271 [GEN_PERF_COUNTER_UNITS_EU_REQUESTS_TO_L3_CACHE_LINES] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
272 [GEN_PERF_COUNTER_UNITS_EU_BYTES_PER_L3_CACHE_LINE] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
273 };
274
275 static const VkPerformanceCounterStorageKHR
276 gen_perf_counter_data_type_to_vk_storage[] = {
277 [GEN_PERF_COUNTER_DATA_TYPE_BOOL32] = VK_PERFORMANCE_COUNTER_STORAGE_UINT32_KHR,
278 [GEN_PERF_COUNTER_DATA_TYPE_UINT32] = VK_PERFORMANCE_COUNTER_STORAGE_UINT32_KHR,
279 [GEN_PERF_COUNTER_DATA_TYPE_UINT64] = VK_PERFORMANCE_COUNTER_STORAGE_UINT64_KHR,
280 [GEN_PERF_COUNTER_DATA_TYPE_FLOAT] = VK_PERFORMANCE_COUNTER_STORAGE_FLOAT32_KHR,
281 [GEN_PERF_COUNTER_DATA_TYPE_DOUBLE] = VK_PERFORMANCE_COUNTER_STORAGE_FLOAT64_KHR,
282 };
283
284 VkResult anv_EnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(
285 VkPhysicalDevice physicalDevice,
286 uint32_t queueFamilyIndex,
287 uint32_t* pCounterCount,
288 VkPerformanceCounterKHR* pCounters,
289 VkPerformanceCounterDescriptionKHR* pCounterDescriptions)
290 {
291 ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
292 struct gen_perf_config *perf = pdevice->perf;
293
294 uint32_t desc_count = *pCounterCount;
295
296 VK_OUTARRAY_MAKE(out, pCounters, pCounterCount);
297 VK_OUTARRAY_MAKE(out_desc, pCounterDescriptions, &desc_count);
298
299 for (int c = 0; c < (perf ? perf->n_counters : 0); c++) {
300 const struct gen_perf_query_counter *gen_counter = perf->counter_infos[c].counter;
301
302 vk_outarray_append(&out, counter) {
303 counter->unit = gen_perf_counter_unit_to_vk_unit[gen_counter->units];
304 counter->scope = VK_QUERY_SCOPE_COMMAND_KHR;
305 counter->storage = gen_perf_counter_data_type_to_vk_storage[gen_counter->data_type];
306
307 unsigned char sha1_result[20];
308 _mesa_sha1_compute(gen_counter->symbol_name,
309 strlen(gen_counter->symbol_name),
310 sha1_result);
311 memcpy(counter->uuid, sha1_result, sizeof(counter->uuid));
312 }
313
314 vk_outarray_append(&out_desc, desc) {
315 desc->flags = 0; /* None so far. */
316 snprintf(desc->name, sizeof(desc->name), "%s", gen_counter->name);
317 snprintf(desc->category, sizeof(desc->category), "%s", gen_counter->category);
318 snprintf(desc->description, sizeof(desc->description), "%s", gen_counter->desc);
319 }
320 }
321
322 return vk_outarray_status(&out);
323 }
324
325 void anv_GetPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR(
326 VkPhysicalDevice physicalDevice,
327 const VkQueryPoolPerformanceCreateInfoKHR* pPerformanceQueryCreateInfo,
328 uint32_t* pNumPasses)
329 {
330 ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
331 struct gen_perf_config *perf = pdevice->perf;
332
333 if (!perf) {
334 *pNumPasses = 0;
335 return;
336 }
337
338 *pNumPasses = gen_perf_get_n_passes(perf,
339 pPerformanceQueryCreateInfo->pCounterIndices,
340 pPerformanceQueryCreateInfo->counterIndexCount,
341 NULL);
342 }
343
344 VkResult anv_AcquireProfilingLockKHR(
345 VkDevice _device,
346 const VkAcquireProfilingLockInfoKHR* pInfo)
347 {
348 ANV_FROM_HANDLE(anv_device, device, _device);
349 struct gen_perf_config *perf = device->physical->perf;
350 struct gen_perf_query_info *first_metric_set = &perf->queries[0];
351 int fd = -1;
352
353 assert(device->perf_fd == -1);
354
355 if (likely(!(INTEL_DEBUG & DEBUG_NO_OACONFIG))) {
356 fd = anv_device_perf_open(device, first_metric_set->oa_metrics_set_id);
357 if (fd < 0)
358 return VK_TIMEOUT;
359 }
360
361 device->perf_fd = fd;
362 return VK_SUCCESS;
363 }
364
365 void anv_ReleaseProfilingLockKHR(
366 VkDevice _device)
367 {
368 ANV_FROM_HANDLE(anv_device, device, _device);
369
370 if (likely(!(INTEL_DEBUG & DEBUG_NO_OACONFIG))) {
371 assert(device->perf_fd >= 0);
372 close(device->perf_fd);
373 }
374 device->perf_fd = -1;
375 }
376
377 void
378 anv_perf_write_pass_results(struct gen_perf_config *perf,
379 struct anv_query_pool *pool, uint32_t pass,
380 const struct gen_perf_query_result *accumulated_results,
381 union VkPerformanceCounterResultKHR *results)
382 {
383 for (uint32_t c = 0; c < pool->n_counters; c++) {
384 const struct gen_perf_counter_pass *counter_pass = &pool->counter_pass[c];
385
386 if (counter_pass->pass != pass)
387 continue;
388
389 switch (pool->pass_query[pass]->kind) {
390 case GEN_PERF_QUERY_TYPE_PIPELINE: {
391 assert(counter_pass->counter->data_type == GEN_PERF_COUNTER_DATA_TYPE_UINT64);
392 uint32_t accu_offset = counter_pass->counter->offset / sizeof(uint64_t);
393 results[c].uint64 = accumulated_results->accumulator[accu_offset];
394 break;
395 }
396
397 case GEN_PERF_QUERY_TYPE_OA:
398 case GEN_PERF_QUERY_TYPE_RAW:
399 switch (counter_pass->counter->data_type) {
400 case GEN_PERF_COUNTER_DATA_TYPE_UINT64:
401 results[c].uint64 =
402 counter_pass->counter->oa_counter_read_uint64(perf,
403 counter_pass->query,
404 accumulated_results->accumulator);
405 break;
406 case GEN_PERF_COUNTER_DATA_TYPE_FLOAT:
407 results[c].float32 =
408 counter_pass->counter->oa_counter_read_float(perf,
409 counter_pass->query,
410 accumulated_results->accumulator);
411 break;
412 default:
413 /* So far we aren't using uint32, double or bool32... */
414 unreachable("unexpected counter data type");
415 }
416 break;
417
418 default:
419 unreachable("invalid query type");
420 }
421
422 /* The Vulkan extension only has nanoseconds as a unit */
423 if (counter_pass->counter->units == GEN_PERF_COUNTER_UNITS_US) {
424 assert(counter_pass->counter->data_type == GEN_PERF_COUNTER_DATA_TYPE_UINT64);
425 results[c].uint64 *= 1000;
426 }
427 }
428 }