1bf2424e108f46fa129686fd977fd8a02a29a547
[mesa.git] / src / intel / perf / gen_perf.c
1 /*
2 * Copyright © 2018 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <dirent.h>
25
26 #include <sys/types.h>
27 #include <sys/stat.h>
28 #include <fcntl.h>
29 #include <unistd.h>
30 #include <errno.h>
31
32 #ifndef HAVE_DIRENT_D_TYPE
33 #include <limits.h> // PATH_MAX
34 #endif
35
36 #include <drm-uapi/i915_drm.h>
37
38 #include "common/gen_gem.h"
39
40 #include "dev/gen_debug.h"
41 #include "dev/gen_device_info.h"
42
43 #include "perf/gen_perf.h"
44 #include "perf/gen_perf_regs.h"
45 #include "perf/gen_perf_mdapi.h"
46 #include "perf/gen_perf_metrics.h"
47 #include "perf/gen_perf_private.h"
48
49 #include "util/bitscan.h"
50 #include "util/mesa-sha1.h"
51 #include "util/u_math.h"
52
53 #define FILE_DEBUG_FLAG DEBUG_PERFMON
54
55 #define OA_REPORT_INVALID_CTX_ID (0xffffffff)
56
57 static inline uint64_t to_user_pointer(void *ptr)
58 {
59 return (uintptr_t) ptr;
60 }
61
62 static bool
63 is_dir_or_link(const struct dirent *entry, const char *parent_dir)
64 {
65 #ifdef HAVE_DIRENT_D_TYPE
66 return entry->d_type == DT_DIR || entry->d_type == DT_LNK;
67 #else
68 struct stat st;
69 char path[PATH_MAX + 1];
70 snprintf(path, sizeof(path), "%s/%s", parent_dir, entry->d_name);
71 lstat(path, &st);
72 return S_ISDIR(st.st_mode) || S_ISLNK(st.st_mode);
73 #endif
74 }
75
76 static bool
77 get_sysfs_dev_dir(struct gen_perf_config *perf, int fd)
78 {
79 struct stat sb;
80 int min, maj;
81 DIR *drmdir;
82 struct dirent *drm_entry;
83 int len;
84
85 perf->sysfs_dev_dir[0] = '\0';
86
87 if (fstat(fd, &sb)) {
88 DBG("Failed to stat DRM fd\n");
89 return false;
90 }
91
92 maj = major(sb.st_rdev);
93 min = minor(sb.st_rdev);
94
95 if (!S_ISCHR(sb.st_mode)) {
96 DBG("DRM fd is not a character device as expected\n");
97 return false;
98 }
99
100 len = snprintf(perf->sysfs_dev_dir,
101 sizeof(perf->sysfs_dev_dir),
102 "/sys/dev/char/%d:%d/device/drm", maj, min);
103 if (len < 0 || len >= sizeof(perf->sysfs_dev_dir)) {
104 DBG("Failed to concatenate sysfs path to drm device\n");
105 return false;
106 }
107
108 drmdir = opendir(perf->sysfs_dev_dir);
109 if (!drmdir) {
110 DBG("Failed to open %s: %m\n", perf->sysfs_dev_dir);
111 return false;
112 }
113
114 while ((drm_entry = readdir(drmdir))) {
115 if (is_dir_or_link(drm_entry, perf->sysfs_dev_dir) &&
116 strncmp(drm_entry->d_name, "card", 4) == 0)
117 {
118 len = snprintf(perf->sysfs_dev_dir,
119 sizeof(perf->sysfs_dev_dir),
120 "/sys/dev/char/%d:%d/device/drm/%s",
121 maj, min, drm_entry->d_name);
122 closedir(drmdir);
123 if (len < 0 || len >= sizeof(perf->sysfs_dev_dir))
124 return false;
125 else
126 return true;
127 }
128 }
129
130 closedir(drmdir);
131
132 DBG("Failed to find cardX directory under /sys/dev/char/%d:%d/device/drm\n",
133 maj, min);
134
135 return false;
136 }
137
138 static bool
139 read_file_uint64(const char *file, uint64_t *val)
140 {
141 char buf[32];
142 int fd, n;
143
144 fd = open(file, 0);
145 if (fd < 0)
146 return false;
147 while ((n = read(fd, buf, sizeof (buf) - 1)) < 0 &&
148 errno == EINTR);
149 close(fd);
150 if (n < 0)
151 return false;
152
153 buf[n] = '\0';
154 *val = strtoull(buf, NULL, 0);
155
156 return true;
157 }
158
159 static bool
160 read_sysfs_drm_device_file_uint64(struct gen_perf_config *perf,
161 const char *file,
162 uint64_t *value)
163 {
164 char buf[512];
165 int len;
166
167 len = snprintf(buf, sizeof(buf), "%s/%s", perf->sysfs_dev_dir, file);
168 if (len < 0 || len >= sizeof(buf)) {
169 DBG("Failed to concatenate sys filename to read u64 from\n");
170 return false;
171 }
172
173 return read_file_uint64(buf, value);
174 }
175
176 static void
177 register_oa_config(struct gen_perf_config *perf,
178 const struct gen_perf_query_info *query,
179 uint64_t config_id)
180 {
181 struct gen_perf_query_info *registered_query =
182 gen_perf_append_query_info(perf, 0);
183
184 *registered_query = *query;
185 registered_query->oa_metrics_set_id = config_id;
186 DBG("metric set registered: id = %" PRIu64", guid = %s\n",
187 registered_query->oa_metrics_set_id, query->guid);
188 }
189
190 static void
191 enumerate_sysfs_metrics(struct gen_perf_config *perf)
192 {
193 DIR *metricsdir = NULL;
194 struct dirent *metric_entry;
195 char buf[256];
196 int len;
197
198 len = snprintf(buf, sizeof(buf), "%s/metrics", perf->sysfs_dev_dir);
199 if (len < 0 || len >= sizeof(buf)) {
200 DBG("Failed to concatenate path to sysfs metrics/ directory\n");
201 return;
202 }
203
204 metricsdir = opendir(buf);
205 if (!metricsdir) {
206 DBG("Failed to open %s: %m\n", buf);
207 return;
208 }
209
210 while ((metric_entry = readdir(metricsdir))) {
211 struct hash_entry *entry;
212 if (!is_dir_or_link(metric_entry, buf) ||
213 metric_entry->d_name[0] == '.')
214 continue;
215
216 DBG("metric set: %s\n", metric_entry->d_name);
217 entry = _mesa_hash_table_search(perf->oa_metrics_table,
218 metric_entry->d_name);
219 if (entry) {
220 uint64_t id;
221 if (!gen_perf_load_metric_id(perf, metric_entry->d_name, &id)) {
222 DBG("Failed to read metric set id from %s: %m", buf);
223 continue;
224 }
225
226 register_oa_config(perf, (const struct gen_perf_query_info *)entry->data, id);
227 } else
228 DBG("metric set not known by mesa (skipping)\n");
229 }
230
231 closedir(metricsdir);
232 }
233
234 static bool
235 kernel_has_dynamic_config_support(struct gen_perf_config *perf, int fd)
236 {
237 uint64_t invalid_config_id = UINT64_MAX;
238
239 return gen_ioctl(fd, DRM_IOCTL_I915_PERF_REMOVE_CONFIG,
240 &invalid_config_id) < 0 && errno == ENOENT;
241 }
242
243 static int
244 i915_query_items(struct gen_perf_config *perf, int fd,
245 struct drm_i915_query_item *items, uint32_t n_items)
246 {
247 struct drm_i915_query q = {
248 .num_items = n_items,
249 .items_ptr = to_user_pointer(items),
250 };
251 return gen_ioctl(fd, DRM_IOCTL_I915_QUERY, &q);
252 }
253
254 static bool
255 i915_query_perf_config_supported(struct gen_perf_config *perf, int fd)
256 {
257 struct drm_i915_query_item item = {
258 .query_id = DRM_I915_QUERY_PERF_CONFIG,
259 .flags = DRM_I915_QUERY_PERF_CONFIG_LIST,
260 };
261
262 return i915_query_items(perf, fd, &item, 1) == 0 && item.length > 0;
263 }
264
265 static bool
266 i915_query_perf_config_data(struct gen_perf_config *perf,
267 int fd, const char *guid,
268 struct drm_i915_perf_oa_config *config)
269 {
270 struct {
271 struct drm_i915_query_perf_config query;
272 struct drm_i915_perf_oa_config config;
273 } item_data;
274 struct drm_i915_query_item item = {
275 .query_id = DRM_I915_QUERY_PERF_CONFIG,
276 .flags = DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID,
277 .data_ptr = to_user_pointer(&item_data),
278 .length = sizeof(item_data),
279 };
280
281 memset(&item_data, 0, sizeof(item_data));
282 memcpy(item_data.query.uuid, guid, sizeof(item_data.query.uuid));
283 memcpy(&item_data.config, config, sizeof(item_data.config));
284
285 if (!(i915_query_items(perf, fd, &item, 1) == 0 && item.length > 0))
286 return false;
287
288 memcpy(config, &item_data.config, sizeof(item_data.config));
289
290 return true;
291 }
292
293 bool
294 gen_perf_load_metric_id(struct gen_perf_config *perf_cfg,
295 const char *guid,
296 uint64_t *metric_id)
297 {
298 char config_path[280];
299
300 snprintf(config_path, sizeof(config_path), "%s/metrics/%s/id",
301 perf_cfg->sysfs_dev_dir, guid);
302
303 /* Don't recreate already loaded configs. */
304 return read_file_uint64(config_path, metric_id);
305 }
306
307 static uint64_t
308 i915_add_config(struct gen_perf_config *perf, int fd,
309 const struct gen_perf_registers *config,
310 const char *guid)
311 {
312 struct drm_i915_perf_oa_config i915_config = { 0, };
313
314 memcpy(i915_config.uuid, guid, sizeof(i915_config.uuid));
315
316 i915_config.n_mux_regs = config->n_mux_regs;
317 i915_config.mux_regs_ptr = to_user_pointer(config->mux_regs);
318
319 i915_config.n_boolean_regs = config->n_b_counter_regs;
320 i915_config.boolean_regs_ptr = to_user_pointer(config->b_counter_regs);
321
322 i915_config.n_flex_regs = config->n_flex_regs;
323 i915_config.flex_regs_ptr = to_user_pointer(config->flex_regs);
324
325 int ret = gen_ioctl(fd, DRM_IOCTL_I915_PERF_ADD_CONFIG, &i915_config);
326 return ret > 0 ? ret : 0;
327 }
328
329 static void
330 init_oa_configs(struct gen_perf_config *perf, int fd)
331 {
332 hash_table_foreach(perf->oa_metrics_table, entry) {
333 const struct gen_perf_query_info *query = entry->data;
334 uint64_t config_id;
335
336 if (gen_perf_load_metric_id(perf, query->guid, &config_id)) {
337 DBG("metric set: %s (already loaded)\n", query->guid);
338 register_oa_config(perf, query, config_id);
339 continue;
340 }
341
342 int ret = i915_add_config(perf, fd, &query->config, query->guid);
343 if (ret < 0) {
344 DBG("Failed to load \"%s\" (%s) metrics set in kernel: %s\n",
345 query->name, query->guid, strerror(errno));
346 continue;
347 }
348
349 register_oa_config(perf, query, ret);
350 DBG("metric set: %s (added)\n", query->guid);
351 }
352 }
353
354 static void
355 compute_topology_builtins(struct gen_perf_config *perf,
356 const struct gen_device_info *devinfo)
357 {
358 perf->sys_vars.slice_mask = devinfo->slice_masks;
359 perf->sys_vars.n_eu_slices = devinfo->num_slices;
360
361 for (int i = 0; i < sizeof(devinfo->subslice_masks[i]); i++) {
362 perf->sys_vars.n_eu_sub_slices +=
363 __builtin_popcount(devinfo->subslice_masks[i]);
364 }
365
366 for (int i = 0; i < sizeof(devinfo->eu_masks); i++)
367 perf->sys_vars.n_eus += __builtin_popcount(devinfo->eu_masks[i]);
368
369 perf->sys_vars.eu_threads_count = devinfo->num_thread_per_eu;
370
371 /* The subslice mask builtin contains bits for all slices. Prior to Gen11
372 * it had groups of 3bits for each slice, on Gen11 it's 8bits for each
373 * slice.
374 *
375 * Ideally equations would be updated to have a slice/subslice query
376 * function/operator.
377 */
378 perf->sys_vars.subslice_mask = 0;
379
380 int bits_per_subslice = devinfo->gen == 11 ? 8 : 3;
381
382 for (int s = 0; s < util_last_bit(devinfo->slice_masks); s++) {
383 for (int ss = 0; ss < (devinfo->subslice_slice_stride * 8); ss++) {
384 if (gen_device_info_subslice_available(devinfo, s, ss))
385 perf->sys_vars.subslice_mask |= 1ULL << (s * bits_per_subslice + ss);
386 }
387 }
388 }
389
390 static bool
391 init_oa_sys_vars(struct gen_perf_config *perf, const struct gen_device_info *devinfo)
392 {
393 uint64_t min_freq_mhz = 0, max_freq_mhz = 0;
394
395 if (!read_sysfs_drm_device_file_uint64(perf, "gt_min_freq_mhz", &min_freq_mhz))
396 return false;
397
398 if (!read_sysfs_drm_device_file_uint64(perf, "gt_max_freq_mhz", &max_freq_mhz))
399 return false;
400
401 memset(&perf->sys_vars, 0, sizeof(perf->sys_vars));
402 perf->sys_vars.gt_min_freq = min_freq_mhz * 1000000;
403 perf->sys_vars.gt_max_freq = max_freq_mhz * 1000000;
404 perf->sys_vars.timestamp_frequency = devinfo->timestamp_frequency;
405 perf->sys_vars.revision = devinfo->revision;
406 compute_topology_builtins(perf, devinfo);
407
408 return true;
409 }
410
411 typedef void (*perf_register_oa_queries_t)(struct gen_perf_config *);
412
413 static perf_register_oa_queries_t
414 get_register_queries_function(const struct gen_device_info *devinfo)
415 {
416 if (devinfo->is_haswell)
417 return gen_oa_register_queries_hsw;
418 if (devinfo->is_cherryview)
419 return gen_oa_register_queries_chv;
420 if (devinfo->is_broadwell)
421 return gen_oa_register_queries_bdw;
422 if (devinfo->is_broxton)
423 return gen_oa_register_queries_bxt;
424 if (devinfo->is_skylake) {
425 if (devinfo->gt == 2)
426 return gen_oa_register_queries_sklgt2;
427 if (devinfo->gt == 3)
428 return gen_oa_register_queries_sklgt3;
429 if (devinfo->gt == 4)
430 return gen_oa_register_queries_sklgt4;
431 }
432 if (devinfo->is_kabylake) {
433 if (devinfo->gt == 2)
434 return gen_oa_register_queries_kblgt2;
435 if (devinfo->gt == 3)
436 return gen_oa_register_queries_kblgt3;
437 }
438 if (devinfo->is_geminilake)
439 return gen_oa_register_queries_glk;
440 if (devinfo->is_coffeelake) {
441 if (devinfo->gt == 2)
442 return gen_oa_register_queries_cflgt2;
443 if (devinfo->gt == 3)
444 return gen_oa_register_queries_cflgt3;
445 }
446 if (devinfo->is_cannonlake)
447 return gen_oa_register_queries_cnl;
448 if (devinfo->gen == 11) {
449 if (devinfo->is_elkhartlake)
450 return gen_oa_register_queries_lkf;
451 return gen_oa_register_queries_icl;
452 }
453 if (devinfo->gen == 12)
454 return gen_oa_register_queries_tgl;
455
456 return NULL;
457 }
458
459 static void
460 load_pipeline_statistic_metrics(struct gen_perf_config *perf_cfg,
461 const struct gen_device_info *devinfo)
462 {
463 struct gen_perf_query_info *query =
464 gen_perf_append_query_info(perf_cfg, MAX_STAT_COUNTERS);
465
466 query->kind = GEN_PERF_QUERY_TYPE_PIPELINE;
467 query->name = "Pipeline Statistics Registers";
468
469 gen_perf_query_add_basic_stat_reg(query, IA_VERTICES_COUNT,
470 "N vertices submitted");
471 gen_perf_query_add_basic_stat_reg(query, IA_PRIMITIVES_COUNT,
472 "N primitives submitted");
473 gen_perf_query_add_basic_stat_reg(query, VS_INVOCATION_COUNT,
474 "N vertex shader invocations");
475
476 if (devinfo->gen == 6) {
477 gen_perf_query_add_stat_reg(query, GEN6_SO_PRIM_STORAGE_NEEDED, 1, 1,
478 "SO_PRIM_STORAGE_NEEDED",
479 "N geometry shader stream-out primitives (total)");
480 gen_perf_query_add_stat_reg(query, GEN6_SO_NUM_PRIMS_WRITTEN, 1, 1,
481 "SO_NUM_PRIMS_WRITTEN",
482 "N geometry shader stream-out primitives (written)");
483 } else {
484 gen_perf_query_add_stat_reg(query, GEN7_SO_PRIM_STORAGE_NEEDED(0), 1, 1,
485 "SO_PRIM_STORAGE_NEEDED (Stream 0)",
486 "N stream-out (stream 0) primitives (total)");
487 gen_perf_query_add_stat_reg(query, GEN7_SO_PRIM_STORAGE_NEEDED(1), 1, 1,
488 "SO_PRIM_STORAGE_NEEDED (Stream 1)",
489 "N stream-out (stream 1) primitives (total)");
490 gen_perf_query_add_stat_reg(query, GEN7_SO_PRIM_STORAGE_NEEDED(2), 1, 1,
491 "SO_PRIM_STORAGE_NEEDED (Stream 2)",
492 "N stream-out (stream 2) primitives (total)");
493 gen_perf_query_add_stat_reg(query, GEN7_SO_PRIM_STORAGE_NEEDED(3), 1, 1,
494 "SO_PRIM_STORAGE_NEEDED (Stream 3)",
495 "N stream-out (stream 3) primitives (total)");
496 gen_perf_query_add_stat_reg(query, GEN7_SO_NUM_PRIMS_WRITTEN(0), 1, 1,
497 "SO_NUM_PRIMS_WRITTEN (Stream 0)",
498 "N stream-out (stream 0) primitives (written)");
499 gen_perf_query_add_stat_reg(query, GEN7_SO_NUM_PRIMS_WRITTEN(1), 1, 1,
500 "SO_NUM_PRIMS_WRITTEN (Stream 1)",
501 "N stream-out (stream 1) primitives (written)");
502 gen_perf_query_add_stat_reg(query, GEN7_SO_NUM_PRIMS_WRITTEN(2), 1, 1,
503 "SO_NUM_PRIMS_WRITTEN (Stream 2)",
504 "N stream-out (stream 2) primitives (written)");
505 gen_perf_query_add_stat_reg(query, GEN7_SO_NUM_PRIMS_WRITTEN(3), 1, 1,
506 "SO_NUM_PRIMS_WRITTEN (Stream 3)",
507 "N stream-out (stream 3) primitives (written)");
508 }
509
510 gen_perf_query_add_basic_stat_reg(query, HS_INVOCATION_COUNT,
511 "N TCS shader invocations");
512 gen_perf_query_add_basic_stat_reg(query, DS_INVOCATION_COUNT,
513 "N TES shader invocations");
514
515 gen_perf_query_add_basic_stat_reg(query, GS_INVOCATION_COUNT,
516 "N geometry shader invocations");
517 gen_perf_query_add_basic_stat_reg(query, GS_PRIMITIVES_COUNT,
518 "N geometry shader primitives emitted");
519
520 gen_perf_query_add_basic_stat_reg(query, CL_INVOCATION_COUNT,
521 "N primitives entering clipping");
522 gen_perf_query_add_basic_stat_reg(query, CL_PRIMITIVES_COUNT,
523 "N primitives leaving clipping");
524
525 if (devinfo->is_haswell || devinfo->gen == 8) {
526 gen_perf_query_add_stat_reg(query, PS_INVOCATION_COUNT, 1, 4,
527 "N fragment shader invocations",
528 "N fragment shader invocations");
529 } else {
530 gen_perf_query_add_basic_stat_reg(query, PS_INVOCATION_COUNT,
531 "N fragment shader invocations");
532 }
533
534 gen_perf_query_add_basic_stat_reg(query, PS_DEPTH_COUNT,
535 "N z-pass fragments");
536
537 if (devinfo->gen >= 7) {
538 gen_perf_query_add_basic_stat_reg(query, CS_INVOCATION_COUNT,
539 "N compute shader invocations");
540 }
541
542 query->data_size = sizeof(uint64_t) * query->n_counters;
543 }
544
545 static int
546 i915_perf_version(int drm_fd)
547 {
548 int tmp;
549 drm_i915_getparam_t gp = {
550 .param = I915_PARAM_PERF_REVISION,
551 .value = &tmp,
552 };
553
554 int ret = gen_ioctl(drm_fd, DRM_IOCTL_I915_GETPARAM, &gp);
555
556 /* Return 0 if this getparam is not supported, the first version supported
557 * is 1.
558 */
559 return ret < 0 ? 0 : tmp;
560 }
561
562 static bool
563 load_oa_metrics(struct gen_perf_config *perf, int fd,
564 const struct gen_device_info *devinfo)
565 {
566 perf_register_oa_queries_t oa_register = get_register_queries_function(devinfo);
567 bool i915_perf_oa_available = false;
568 struct stat sb;
569
570 perf->i915_query_supported = i915_query_perf_config_supported(perf, fd);
571 perf->i915_perf_version = i915_perf_version(fd);
572
573 /* The existence of this sysctl parameter implies the kernel supports
574 * the i915 perf interface.
575 */
576 if (stat("/proc/sys/dev/i915/perf_stream_paranoid", &sb) == 0) {
577
578 /* If _paranoid == 1 then on Gen8+ we won't be able to access OA
579 * metrics unless running as root.
580 */
581 if (devinfo->is_haswell)
582 i915_perf_oa_available = true;
583 else {
584 uint64_t paranoid = 1;
585
586 read_file_uint64("/proc/sys/dev/i915/perf_stream_paranoid", &paranoid);
587
588 if (paranoid == 0 || geteuid() == 0)
589 i915_perf_oa_available = true;
590 }
591 }
592
593 if (!i915_perf_oa_available ||
594 !oa_register ||
595 !get_sysfs_dev_dir(perf, fd) ||
596 !init_oa_sys_vars(perf, devinfo))
597 return false;
598
599 perf->oa_metrics_table =
600 _mesa_hash_table_create(perf, _mesa_hash_string,
601 _mesa_key_string_equal);
602
603 /* Index all the metric sets mesa knows about before looking to see what
604 * the kernel is advertising.
605 */
606 oa_register(perf);
607
608 if (likely((INTEL_DEBUG & DEBUG_NO_OACONFIG) == 0) &&
609 kernel_has_dynamic_config_support(perf, fd))
610 init_oa_configs(perf, fd);
611 else
612 enumerate_sysfs_metrics(perf);
613
614 return true;
615 }
616
617 struct gen_perf_registers *
618 gen_perf_load_configuration(struct gen_perf_config *perf_cfg, int fd, const char *guid)
619 {
620 if (!perf_cfg->i915_query_supported)
621 return NULL;
622
623 struct drm_i915_perf_oa_config i915_config = { 0, };
624 if (!i915_query_perf_config_data(perf_cfg, fd, guid, &i915_config))
625 return NULL;
626
627 struct gen_perf_registers *config = rzalloc(NULL, struct gen_perf_registers);
628 config->n_flex_regs = i915_config.n_flex_regs;
629 config->flex_regs = rzalloc_array(config, struct gen_perf_query_register_prog, config->n_flex_regs);
630 config->n_mux_regs = i915_config.n_mux_regs;
631 config->mux_regs = rzalloc_array(config, struct gen_perf_query_register_prog, config->n_mux_regs);
632 config->n_b_counter_regs = i915_config.n_boolean_regs;
633 config->b_counter_regs = rzalloc_array(config, struct gen_perf_query_register_prog, config->n_b_counter_regs);
634
635 /*
636 * struct gen_perf_query_register_prog maps exactly to the tuple of
637 * (register offset, register value) returned by the i915.
638 */
639 i915_config.flex_regs_ptr = to_user_pointer(config->flex_regs);
640 i915_config.mux_regs_ptr = to_user_pointer(config->mux_regs);
641 i915_config.boolean_regs_ptr = to_user_pointer(config->b_counter_regs);
642 if (!i915_query_perf_config_data(perf_cfg, fd, guid, &i915_config)) {
643 ralloc_free(config);
644 return NULL;
645 }
646
647 return config;
648 }
649
650 uint64_t
651 gen_perf_store_configuration(struct gen_perf_config *perf_cfg, int fd,
652 const struct gen_perf_registers *config,
653 const char *guid)
654 {
655 if (guid)
656 return i915_add_config(perf_cfg, fd, config, guid);
657
658 struct mesa_sha1 sha1_ctx;
659 _mesa_sha1_init(&sha1_ctx);
660
661 if (config->flex_regs) {
662 _mesa_sha1_update(&sha1_ctx, config->flex_regs,
663 sizeof(config->flex_regs[0]) *
664 config->n_flex_regs);
665 }
666 if (config->mux_regs) {
667 _mesa_sha1_update(&sha1_ctx, config->mux_regs,
668 sizeof(config->mux_regs[0]) *
669 config->n_mux_regs);
670 }
671 if (config->b_counter_regs) {
672 _mesa_sha1_update(&sha1_ctx, config->b_counter_regs,
673 sizeof(config->b_counter_regs[0]) *
674 config->n_b_counter_regs);
675 }
676
677 uint8_t hash[20];
678 _mesa_sha1_final(&sha1_ctx, hash);
679
680 char formatted_hash[41];
681 _mesa_sha1_format(formatted_hash, hash);
682
683 char generated_guid[37];
684 snprintf(generated_guid, sizeof(generated_guid),
685 "%.8s-%.4s-%.4s-%.4s-%.12s",
686 &formatted_hash[0], &formatted_hash[8],
687 &formatted_hash[8 + 4], &formatted_hash[8 + 4 + 4],
688 &formatted_hash[8 + 4 + 4 + 4]);
689
690 /* Check if already present. */
691 uint64_t id;
692 if (gen_perf_load_metric_id(perf_cfg, generated_guid, &id))
693 return id;
694
695 return i915_add_config(perf_cfg, fd, config, generated_guid);
696 }
697
698 /* Accumulate 32bits OA counters */
699 static inline void
700 accumulate_uint32(const uint32_t *report0,
701 const uint32_t *report1,
702 uint64_t *accumulator)
703 {
704 *accumulator += (uint32_t)(*report1 - *report0);
705 }
706
707 /* Accumulate 40bits OA counters */
708 static inline void
709 accumulate_uint40(int a_index,
710 const uint32_t *report0,
711 const uint32_t *report1,
712 uint64_t *accumulator)
713 {
714 const uint8_t *high_bytes0 = (uint8_t *)(report0 + 40);
715 const uint8_t *high_bytes1 = (uint8_t *)(report1 + 40);
716 uint64_t high0 = (uint64_t)(high_bytes0[a_index]) << 32;
717 uint64_t high1 = (uint64_t)(high_bytes1[a_index]) << 32;
718 uint64_t value0 = report0[a_index + 4] | high0;
719 uint64_t value1 = report1[a_index + 4] | high1;
720 uint64_t delta;
721
722 if (value0 > value1)
723 delta = (1ULL << 40) + value1 - value0;
724 else
725 delta = value1 - value0;
726
727 *accumulator += delta;
728 }
729
730 static void
731 gen8_read_report_clock_ratios(const uint32_t *report,
732 uint64_t *slice_freq_hz,
733 uint64_t *unslice_freq_hz)
734 {
735 /* The lower 16bits of the RPT_ID field of the OA reports contains a
736 * snapshot of the bits coming from the RP_FREQ_NORMAL register and is
737 * divided this way :
738 *
739 * RPT_ID[31:25]: RP_FREQ_NORMAL[20:14] (low squashed_slice_clock_frequency)
740 * RPT_ID[10:9]: RP_FREQ_NORMAL[22:21] (high squashed_slice_clock_frequency)
741 * RPT_ID[8:0]: RP_FREQ_NORMAL[31:23] (squashed_unslice_clock_frequency)
742 *
743 * RP_FREQ_NORMAL[31:23]: Software Unslice Ratio Request
744 * Multiple of 33.33MHz 2xclk (16 MHz 1xclk)
745 *
746 * RP_FREQ_NORMAL[22:14]: Software Slice Ratio Request
747 * Multiple of 33.33MHz 2xclk (16 MHz 1xclk)
748 */
749
750 uint32_t unslice_freq = report[0] & 0x1ff;
751 uint32_t slice_freq_low = (report[0] >> 25) & 0x7f;
752 uint32_t slice_freq_high = (report[0] >> 9) & 0x3;
753 uint32_t slice_freq = slice_freq_low | (slice_freq_high << 7);
754
755 *slice_freq_hz = slice_freq * 16666667ULL;
756 *unslice_freq_hz = unslice_freq * 16666667ULL;
757 }
758
759 void
760 gen_perf_query_result_read_frequencies(struct gen_perf_query_result *result,
761 const struct gen_device_info *devinfo,
762 const uint32_t *start,
763 const uint32_t *end)
764 {
765 /* Slice/Unslice frequency is only available in the OA reports when the
766 * "Disable OA reports due to clock ratio change" field in
767 * OA_DEBUG_REGISTER is set to 1. This is how the kernel programs this
768 * global register (see drivers/gpu/drm/i915/i915_perf.c)
769 *
770 * Documentation says this should be available on Gen9+ but experimentation
771 * shows that Gen8 reports similar values, so we enable it there too.
772 */
773 if (devinfo->gen < 8)
774 return;
775
776 gen8_read_report_clock_ratios(start,
777 &result->slice_frequency[0],
778 &result->unslice_frequency[0]);
779 gen8_read_report_clock_ratios(end,
780 &result->slice_frequency[1],
781 &result->unslice_frequency[1]);
782 }
783
784 void
785 gen_perf_query_result_accumulate(struct gen_perf_query_result *result,
786 const struct gen_perf_query_info *query,
787 const uint32_t *start,
788 const uint32_t *end)
789 {
790 int i, idx = 0;
791
792 if (result->hw_id == OA_REPORT_INVALID_CTX_ID &&
793 start[2] != OA_REPORT_INVALID_CTX_ID)
794 result->hw_id = start[2];
795 if (result->reports_accumulated == 0)
796 result->begin_timestamp = start[1];
797 result->reports_accumulated++;
798
799 switch (query->oa_format) {
800 case I915_OA_FORMAT_A32u40_A4u32_B8_C8:
801 accumulate_uint32(start + 1, end + 1, result->accumulator + idx++); /* timestamp */
802 accumulate_uint32(start + 3, end + 3, result->accumulator + idx++); /* clock */
803
804 /* 32x 40bit A counters... */
805 for (i = 0; i < 32; i++)
806 accumulate_uint40(i, start, end, result->accumulator + idx++);
807
808 /* 4x 32bit A counters... */
809 for (i = 0; i < 4; i++)
810 accumulate_uint32(start + 36 + i, end + 36 + i, result->accumulator + idx++);
811
812 /* 8x 32bit B counters + 8x 32bit C counters... */
813 for (i = 0; i < 16; i++)
814 accumulate_uint32(start + 48 + i, end + 48 + i, result->accumulator + idx++);
815 break;
816
817 case I915_OA_FORMAT_A45_B8_C8:
818 accumulate_uint32(start + 1, end + 1, result->accumulator); /* timestamp */
819
820 for (i = 0; i < 61; i++)
821 accumulate_uint32(start + 3 + i, end + 3 + i, result->accumulator + 1 + i);
822 break;
823
824 default:
825 unreachable("Can't accumulate OA counters in unknown format");
826 }
827
828 }
829
830 void
831 gen_perf_query_result_clear(struct gen_perf_query_result *result)
832 {
833 memset(result, 0, sizeof(*result));
834 result->hw_id = OA_REPORT_INVALID_CTX_ID; /* invalid */
835 }
836
837 void
838 gen_perf_init_metrics(struct gen_perf_config *perf_cfg,
839 const struct gen_device_info *devinfo,
840 int drm_fd)
841 {
842 load_pipeline_statistic_metrics(perf_cfg, devinfo);
843 gen_perf_register_mdapi_statistic_query(perf_cfg, devinfo);
844 if (load_oa_metrics(perf_cfg, drm_fd, devinfo))
845 gen_perf_register_mdapi_oa_query(perf_cfg, devinfo);
846 }