intel/perf: store default sseu configuration
[mesa.git] / src / intel / perf / gen_perf.c
1 /*
2 * Copyright © 2018 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <dirent.h>
25
26 #include <sys/types.h>
27 #include <sys/stat.h>
28 #include <fcntl.h>
29 #include <unistd.h>
30 #include <errno.h>
31
32 #ifndef HAVE_DIRENT_D_TYPE
33 #include <limits.h> // PATH_MAX
34 #endif
35
36 #include <drm-uapi/i915_drm.h>
37
38 #include "common/gen_gem.h"
39
40 #include "dev/gen_debug.h"
41 #include "dev/gen_device_info.h"
42
43 #include "perf/gen_perf.h"
44 #include "perf/gen_perf_regs.h"
45 #include "perf/gen_perf_mdapi.h"
46 #include "perf/gen_perf_metrics.h"
47 #include "perf/gen_perf_private.h"
48
49 #include "util/bitscan.h"
50 #include "util/mesa-sha1.h"
51 #include "util/u_math.h"
52
53 #define FILE_DEBUG_FLAG DEBUG_PERFMON
54
55 #define OA_REPORT_INVALID_CTX_ID (0xffffffff)
56
57 static inline uint64_t to_user_pointer(void *ptr)
58 {
59 return (uintptr_t) ptr;
60 }
61
62 static bool
63 is_dir_or_link(const struct dirent *entry, const char *parent_dir)
64 {
65 #ifdef HAVE_DIRENT_D_TYPE
66 return entry->d_type == DT_DIR || entry->d_type == DT_LNK;
67 #else
68 struct stat st;
69 char path[PATH_MAX + 1];
70 snprintf(path, sizeof(path), "%s/%s", parent_dir, entry->d_name);
71 lstat(path, &st);
72 return S_ISDIR(st.st_mode) || S_ISLNK(st.st_mode);
73 #endif
74 }
75
76 static bool
77 get_sysfs_dev_dir(struct gen_perf_config *perf, int fd)
78 {
79 struct stat sb;
80 int min, maj;
81 DIR *drmdir;
82 struct dirent *drm_entry;
83 int len;
84
85 perf->sysfs_dev_dir[0] = '\0';
86
87 if (fstat(fd, &sb)) {
88 DBG("Failed to stat DRM fd\n");
89 return false;
90 }
91
92 maj = major(sb.st_rdev);
93 min = minor(sb.st_rdev);
94
95 if (!S_ISCHR(sb.st_mode)) {
96 DBG("DRM fd is not a character device as expected\n");
97 return false;
98 }
99
100 len = snprintf(perf->sysfs_dev_dir,
101 sizeof(perf->sysfs_dev_dir),
102 "/sys/dev/char/%d:%d/device/drm", maj, min);
103 if (len < 0 || len >= sizeof(perf->sysfs_dev_dir)) {
104 DBG("Failed to concatenate sysfs path to drm device\n");
105 return false;
106 }
107
108 drmdir = opendir(perf->sysfs_dev_dir);
109 if (!drmdir) {
110 DBG("Failed to open %s: %m\n", perf->sysfs_dev_dir);
111 return false;
112 }
113
114 while ((drm_entry = readdir(drmdir))) {
115 if (is_dir_or_link(drm_entry, perf->sysfs_dev_dir) &&
116 strncmp(drm_entry->d_name, "card", 4) == 0)
117 {
118 len = snprintf(perf->sysfs_dev_dir,
119 sizeof(perf->sysfs_dev_dir),
120 "/sys/dev/char/%d:%d/device/drm/%s",
121 maj, min, drm_entry->d_name);
122 closedir(drmdir);
123 if (len < 0 || len >= sizeof(perf->sysfs_dev_dir))
124 return false;
125 else
126 return true;
127 }
128 }
129
130 closedir(drmdir);
131
132 DBG("Failed to find cardX directory under /sys/dev/char/%d:%d/device/drm\n",
133 maj, min);
134
135 return false;
136 }
137
138 static bool
139 read_file_uint64(const char *file, uint64_t *val)
140 {
141 char buf[32];
142 int fd, n;
143
144 fd = open(file, 0);
145 if (fd < 0)
146 return false;
147 while ((n = read(fd, buf, sizeof (buf) - 1)) < 0 &&
148 errno == EINTR);
149 close(fd);
150 if (n < 0)
151 return false;
152
153 buf[n] = '\0';
154 *val = strtoull(buf, NULL, 0);
155
156 return true;
157 }
158
159 static bool
160 read_sysfs_drm_device_file_uint64(struct gen_perf_config *perf,
161 const char *file,
162 uint64_t *value)
163 {
164 char buf[512];
165 int len;
166
167 len = snprintf(buf, sizeof(buf), "%s/%s", perf->sysfs_dev_dir, file);
168 if (len < 0 || len >= sizeof(buf)) {
169 DBG("Failed to concatenate sys filename to read u64 from\n");
170 return false;
171 }
172
173 return read_file_uint64(buf, value);
174 }
175
176 static void
177 register_oa_config(struct gen_perf_config *perf,
178 const struct gen_perf_query_info *query,
179 uint64_t config_id)
180 {
181 struct gen_perf_query_info *registered_query =
182 gen_perf_append_query_info(perf, 0);
183
184 *registered_query = *query;
185 registered_query->oa_metrics_set_id = config_id;
186 DBG("metric set registered: id = %" PRIu64", guid = %s\n",
187 registered_query->oa_metrics_set_id, query->guid);
188 }
189
190 static void
191 enumerate_sysfs_metrics(struct gen_perf_config *perf)
192 {
193 DIR *metricsdir = NULL;
194 struct dirent *metric_entry;
195 char buf[256];
196 int len;
197
198 len = snprintf(buf, sizeof(buf), "%s/metrics", perf->sysfs_dev_dir);
199 if (len < 0 || len >= sizeof(buf)) {
200 DBG("Failed to concatenate path to sysfs metrics/ directory\n");
201 return;
202 }
203
204 metricsdir = opendir(buf);
205 if (!metricsdir) {
206 DBG("Failed to open %s: %m\n", buf);
207 return;
208 }
209
210 while ((metric_entry = readdir(metricsdir))) {
211 struct hash_entry *entry;
212 if (!is_dir_or_link(metric_entry, buf) ||
213 metric_entry->d_name[0] == '.')
214 continue;
215
216 DBG("metric set: %s\n", metric_entry->d_name);
217 entry = _mesa_hash_table_search(perf->oa_metrics_table,
218 metric_entry->d_name);
219 if (entry) {
220 uint64_t id;
221 if (!gen_perf_load_metric_id(perf, metric_entry->d_name, &id)) {
222 DBG("Failed to read metric set id from %s: %m", buf);
223 continue;
224 }
225
226 register_oa_config(perf, (const struct gen_perf_query_info *)entry->data, id);
227 } else
228 DBG("metric set not known by mesa (skipping)\n");
229 }
230
231 closedir(metricsdir);
232 }
233
234 static bool
235 kernel_has_dynamic_config_support(struct gen_perf_config *perf, int fd)
236 {
237 uint64_t invalid_config_id = UINT64_MAX;
238
239 return gen_ioctl(fd, DRM_IOCTL_I915_PERF_REMOVE_CONFIG,
240 &invalid_config_id) < 0 && errno == ENOENT;
241 }
242
243 static int
244 i915_query_items(struct gen_perf_config *perf, int fd,
245 struct drm_i915_query_item *items, uint32_t n_items)
246 {
247 struct drm_i915_query q = {
248 .num_items = n_items,
249 .items_ptr = to_user_pointer(items),
250 };
251 return gen_ioctl(fd, DRM_IOCTL_I915_QUERY, &q);
252 }
253
254 static bool
255 i915_query_perf_config_supported(struct gen_perf_config *perf, int fd)
256 {
257 struct drm_i915_query_item item = {
258 .query_id = DRM_I915_QUERY_PERF_CONFIG,
259 .flags = DRM_I915_QUERY_PERF_CONFIG_LIST,
260 };
261
262 return i915_query_items(perf, fd, &item, 1) == 0 && item.length > 0;
263 }
264
265 static bool
266 i915_query_perf_config_data(struct gen_perf_config *perf,
267 int fd, const char *guid,
268 struct drm_i915_perf_oa_config *config)
269 {
270 struct {
271 struct drm_i915_query_perf_config query;
272 struct drm_i915_perf_oa_config config;
273 } item_data;
274 struct drm_i915_query_item item = {
275 .query_id = DRM_I915_QUERY_PERF_CONFIG,
276 .flags = DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID,
277 .data_ptr = to_user_pointer(&item_data),
278 .length = sizeof(item_data),
279 };
280
281 memset(&item_data, 0, sizeof(item_data));
282 memcpy(item_data.query.uuid, guid, sizeof(item_data.query.uuid));
283 memcpy(&item_data.config, config, sizeof(item_data.config));
284
285 if (!(i915_query_items(perf, fd, &item, 1) == 0 && item.length > 0))
286 return false;
287
288 memcpy(config, &item_data.config, sizeof(item_data.config));
289
290 return true;
291 }
292
293 bool
294 gen_perf_load_metric_id(struct gen_perf_config *perf_cfg,
295 const char *guid,
296 uint64_t *metric_id)
297 {
298 char config_path[280];
299
300 snprintf(config_path, sizeof(config_path), "%s/metrics/%s/id",
301 perf_cfg->sysfs_dev_dir, guid);
302
303 /* Don't recreate already loaded configs. */
304 return read_file_uint64(config_path, metric_id);
305 }
306
307 static uint64_t
308 i915_add_config(struct gen_perf_config *perf, int fd,
309 const struct gen_perf_registers *config,
310 const char *guid)
311 {
312 struct drm_i915_perf_oa_config i915_config = { 0, };
313
314 memcpy(i915_config.uuid, guid, sizeof(i915_config.uuid));
315
316 i915_config.n_mux_regs = config->n_mux_regs;
317 i915_config.mux_regs_ptr = to_user_pointer(config->mux_regs);
318
319 i915_config.n_boolean_regs = config->n_b_counter_regs;
320 i915_config.boolean_regs_ptr = to_user_pointer(config->b_counter_regs);
321
322 i915_config.n_flex_regs = config->n_flex_regs;
323 i915_config.flex_regs_ptr = to_user_pointer(config->flex_regs);
324
325 int ret = gen_ioctl(fd, DRM_IOCTL_I915_PERF_ADD_CONFIG, &i915_config);
326 return ret > 0 ? ret : 0;
327 }
328
329 static void
330 init_oa_configs(struct gen_perf_config *perf, int fd)
331 {
332 hash_table_foreach(perf->oa_metrics_table, entry) {
333 const struct gen_perf_query_info *query = entry->data;
334 uint64_t config_id;
335
336 if (gen_perf_load_metric_id(perf, query->guid, &config_id)) {
337 DBG("metric set: %s (already loaded)\n", query->guid);
338 register_oa_config(perf, query, config_id);
339 continue;
340 }
341
342 int ret = i915_add_config(perf, fd, &query->config, query->guid);
343 if (ret < 0) {
344 DBG("Failed to load \"%s\" (%s) metrics set in kernel: %s\n",
345 query->name, query->guid, strerror(errno));
346 continue;
347 }
348
349 register_oa_config(perf, query, ret);
350 DBG("metric set: %s (added)\n", query->guid);
351 }
352 }
353
354 static void
355 compute_topology_builtins(struct gen_perf_config *perf,
356 const struct gen_device_info *devinfo)
357 {
358 perf->sys_vars.slice_mask = devinfo->slice_masks;
359 perf->sys_vars.n_eu_slices = devinfo->num_slices;
360
361 for (int i = 0; i < sizeof(devinfo->subslice_masks[i]); i++) {
362 perf->sys_vars.n_eu_sub_slices +=
363 __builtin_popcount(devinfo->subslice_masks[i]);
364 }
365
366 for (int i = 0; i < sizeof(devinfo->eu_masks); i++)
367 perf->sys_vars.n_eus += __builtin_popcount(devinfo->eu_masks[i]);
368
369 perf->sys_vars.eu_threads_count = devinfo->num_thread_per_eu;
370
371 /* The subslice mask builtin contains bits for all slices. Prior to Gen11
372 * it had groups of 3bits for each slice, on Gen11 it's 8bits for each
373 * slice.
374 *
375 * Ideally equations would be updated to have a slice/subslice query
376 * function/operator.
377 */
378 perf->sys_vars.subslice_mask = 0;
379
380 int bits_per_subslice = devinfo->gen == 11 ? 8 : 3;
381
382 for (int s = 0; s < util_last_bit(devinfo->slice_masks); s++) {
383 for (int ss = 0; ss < (devinfo->subslice_slice_stride * 8); ss++) {
384 if (gen_device_info_subslice_available(devinfo, s, ss))
385 perf->sys_vars.subslice_mask |= 1ULL << (s * bits_per_subslice + ss);
386 }
387 }
388 }
389
390 static bool
391 init_oa_sys_vars(struct gen_perf_config *perf, const struct gen_device_info *devinfo)
392 {
393 uint64_t min_freq_mhz = 0, max_freq_mhz = 0;
394
395 if (!read_sysfs_drm_device_file_uint64(perf, "gt_min_freq_mhz", &min_freq_mhz))
396 return false;
397
398 if (!read_sysfs_drm_device_file_uint64(perf, "gt_max_freq_mhz", &max_freq_mhz))
399 return false;
400
401 memset(&perf->sys_vars, 0, sizeof(perf->sys_vars));
402 perf->sys_vars.gt_min_freq = min_freq_mhz * 1000000;
403 perf->sys_vars.gt_max_freq = max_freq_mhz * 1000000;
404 perf->sys_vars.timestamp_frequency = devinfo->timestamp_frequency;
405 perf->sys_vars.revision = devinfo->revision;
406 compute_topology_builtins(perf, devinfo);
407
408 return true;
409 }
410
411 typedef void (*perf_register_oa_queries_t)(struct gen_perf_config *);
412
413 static perf_register_oa_queries_t
414 get_register_queries_function(const struct gen_device_info *devinfo)
415 {
416 if (devinfo->is_haswell)
417 return gen_oa_register_queries_hsw;
418 if (devinfo->is_cherryview)
419 return gen_oa_register_queries_chv;
420 if (devinfo->is_broadwell)
421 return gen_oa_register_queries_bdw;
422 if (devinfo->is_broxton)
423 return gen_oa_register_queries_bxt;
424 if (devinfo->is_skylake) {
425 if (devinfo->gt == 2)
426 return gen_oa_register_queries_sklgt2;
427 if (devinfo->gt == 3)
428 return gen_oa_register_queries_sklgt3;
429 if (devinfo->gt == 4)
430 return gen_oa_register_queries_sklgt4;
431 }
432 if (devinfo->is_kabylake) {
433 if (devinfo->gt == 2)
434 return gen_oa_register_queries_kblgt2;
435 if (devinfo->gt == 3)
436 return gen_oa_register_queries_kblgt3;
437 }
438 if (devinfo->is_geminilake)
439 return gen_oa_register_queries_glk;
440 if (devinfo->is_coffeelake) {
441 if (devinfo->gt == 2)
442 return gen_oa_register_queries_cflgt2;
443 if (devinfo->gt == 3)
444 return gen_oa_register_queries_cflgt3;
445 }
446 if (devinfo->is_cannonlake)
447 return gen_oa_register_queries_cnl;
448 if (devinfo->gen == 11) {
449 if (devinfo->is_elkhartlake)
450 return gen_oa_register_queries_lkf;
451 return gen_oa_register_queries_icl;
452 }
453 if (devinfo->gen == 12)
454 return gen_oa_register_queries_tgl;
455
456 return NULL;
457 }
458
459 static void
460 load_pipeline_statistic_metrics(struct gen_perf_config *perf_cfg,
461 const struct gen_device_info *devinfo)
462 {
463 struct gen_perf_query_info *query =
464 gen_perf_append_query_info(perf_cfg, MAX_STAT_COUNTERS);
465
466 query->kind = GEN_PERF_QUERY_TYPE_PIPELINE;
467 query->name = "Pipeline Statistics Registers";
468
469 gen_perf_query_add_basic_stat_reg(query, IA_VERTICES_COUNT,
470 "N vertices submitted");
471 gen_perf_query_add_basic_stat_reg(query, IA_PRIMITIVES_COUNT,
472 "N primitives submitted");
473 gen_perf_query_add_basic_stat_reg(query, VS_INVOCATION_COUNT,
474 "N vertex shader invocations");
475
476 if (devinfo->gen == 6) {
477 gen_perf_query_add_stat_reg(query, GEN6_SO_PRIM_STORAGE_NEEDED, 1, 1,
478 "SO_PRIM_STORAGE_NEEDED",
479 "N geometry shader stream-out primitives (total)");
480 gen_perf_query_add_stat_reg(query, GEN6_SO_NUM_PRIMS_WRITTEN, 1, 1,
481 "SO_NUM_PRIMS_WRITTEN",
482 "N geometry shader stream-out primitives (written)");
483 } else {
484 gen_perf_query_add_stat_reg(query, GEN7_SO_PRIM_STORAGE_NEEDED(0), 1, 1,
485 "SO_PRIM_STORAGE_NEEDED (Stream 0)",
486 "N stream-out (stream 0) primitives (total)");
487 gen_perf_query_add_stat_reg(query, GEN7_SO_PRIM_STORAGE_NEEDED(1), 1, 1,
488 "SO_PRIM_STORAGE_NEEDED (Stream 1)",
489 "N stream-out (stream 1) primitives (total)");
490 gen_perf_query_add_stat_reg(query, GEN7_SO_PRIM_STORAGE_NEEDED(2), 1, 1,
491 "SO_PRIM_STORAGE_NEEDED (Stream 2)",
492 "N stream-out (stream 2) primitives (total)");
493 gen_perf_query_add_stat_reg(query, GEN7_SO_PRIM_STORAGE_NEEDED(3), 1, 1,
494 "SO_PRIM_STORAGE_NEEDED (Stream 3)",
495 "N stream-out (stream 3) primitives (total)");
496 gen_perf_query_add_stat_reg(query, GEN7_SO_NUM_PRIMS_WRITTEN(0), 1, 1,
497 "SO_NUM_PRIMS_WRITTEN (Stream 0)",
498 "N stream-out (stream 0) primitives (written)");
499 gen_perf_query_add_stat_reg(query, GEN7_SO_NUM_PRIMS_WRITTEN(1), 1, 1,
500 "SO_NUM_PRIMS_WRITTEN (Stream 1)",
501 "N stream-out (stream 1) primitives (written)");
502 gen_perf_query_add_stat_reg(query, GEN7_SO_NUM_PRIMS_WRITTEN(2), 1, 1,
503 "SO_NUM_PRIMS_WRITTEN (Stream 2)",
504 "N stream-out (stream 2) primitives (written)");
505 gen_perf_query_add_stat_reg(query, GEN7_SO_NUM_PRIMS_WRITTEN(3), 1, 1,
506 "SO_NUM_PRIMS_WRITTEN (Stream 3)",
507 "N stream-out (stream 3) primitives (written)");
508 }
509
510 gen_perf_query_add_basic_stat_reg(query, HS_INVOCATION_COUNT,
511 "N TCS shader invocations");
512 gen_perf_query_add_basic_stat_reg(query, DS_INVOCATION_COUNT,
513 "N TES shader invocations");
514
515 gen_perf_query_add_basic_stat_reg(query, GS_INVOCATION_COUNT,
516 "N geometry shader invocations");
517 gen_perf_query_add_basic_stat_reg(query, GS_PRIMITIVES_COUNT,
518 "N geometry shader primitives emitted");
519
520 gen_perf_query_add_basic_stat_reg(query, CL_INVOCATION_COUNT,
521 "N primitives entering clipping");
522 gen_perf_query_add_basic_stat_reg(query, CL_PRIMITIVES_COUNT,
523 "N primitives leaving clipping");
524
525 if (devinfo->is_haswell || devinfo->gen == 8) {
526 gen_perf_query_add_stat_reg(query, PS_INVOCATION_COUNT, 1, 4,
527 "N fragment shader invocations",
528 "N fragment shader invocations");
529 } else {
530 gen_perf_query_add_basic_stat_reg(query, PS_INVOCATION_COUNT,
531 "N fragment shader invocations");
532 }
533
534 gen_perf_query_add_basic_stat_reg(query, PS_DEPTH_COUNT,
535 "N z-pass fragments");
536
537 if (devinfo->gen >= 7) {
538 gen_perf_query_add_basic_stat_reg(query, CS_INVOCATION_COUNT,
539 "N compute shader invocations");
540 }
541
542 query->data_size = sizeof(uint64_t) * query->n_counters;
543 }
544
545 static int
546 i915_perf_version(int drm_fd)
547 {
548 int tmp;
549 drm_i915_getparam_t gp = {
550 .param = I915_PARAM_PERF_REVISION,
551 .value = &tmp,
552 };
553
554 int ret = gen_ioctl(drm_fd, DRM_IOCTL_I915_GETPARAM, &gp);
555
556 /* Return 0 if this getparam is not supported, the first version supported
557 * is 1.
558 */
559 return ret < 0 ? 0 : tmp;
560 }
561
562 static void
563 i915_get_sseu(int drm_fd, struct drm_i915_gem_context_param_sseu *sseu)
564 {
565 struct drm_i915_gem_context_param arg = {
566 .param = I915_CONTEXT_PARAM_SSEU,
567 .size = sizeof(*sseu),
568 .value = to_user_pointer(sseu)
569 };
570
571 gen_ioctl(drm_fd, DRM_IOCTL_I915_GEM_CONTEXT_GETPARAM, &arg);
572 }
573
574 static bool
575 load_oa_metrics(struct gen_perf_config *perf, int fd,
576 const struct gen_device_info *devinfo)
577 {
578 perf_register_oa_queries_t oa_register = get_register_queries_function(devinfo);
579 bool i915_perf_oa_available = false;
580 struct stat sb;
581
582 perf->i915_query_supported = i915_query_perf_config_supported(perf, fd);
583 perf->i915_perf_version = i915_perf_version(fd);
584
585 /* Record the default SSEU configuration. */
586 i915_get_sseu(fd, &perf->sseu);
587
588 /* The existence of this sysctl parameter implies the kernel supports
589 * the i915 perf interface.
590 */
591 if (stat("/proc/sys/dev/i915/perf_stream_paranoid", &sb) == 0) {
592
593 /* If _paranoid == 1 then on Gen8+ we won't be able to access OA
594 * metrics unless running as root.
595 */
596 if (devinfo->is_haswell)
597 i915_perf_oa_available = true;
598 else {
599 uint64_t paranoid = 1;
600
601 read_file_uint64("/proc/sys/dev/i915/perf_stream_paranoid", &paranoid);
602
603 if (paranoid == 0 || geteuid() == 0)
604 i915_perf_oa_available = true;
605 }
606 }
607
608 if (!i915_perf_oa_available ||
609 !oa_register ||
610 !get_sysfs_dev_dir(perf, fd) ||
611 !init_oa_sys_vars(perf, devinfo))
612 return false;
613
614 perf->oa_metrics_table =
615 _mesa_hash_table_create(perf, _mesa_hash_string,
616 _mesa_key_string_equal);
617
618 /* Index all the metric sets mesa knows about before looking to see what
619 * the kernel is advertising.
620 */
621 oa_register(perf);
622
623 if (likely((INTEL_DEBUG & DEBUG_NO_OACONFIG) == 0) &&
624 kernel_has_dynamic_config_support(perf, fd))
625 init_oa_configs(perf, fd);
626 else
627 enumerate_sysfs_metrics(perf);
628
629 return true;
630 }
631
632 struct gen_perf_registers *
633 gen_perf_load_configuration(struct gen_perf_config *perf_cfg, int fd, const char *guid)
634 {
635 if (!perf_cfg->i915_query_supported)
636 return NULL;
637
638 struct drm_i915_perf_oa_config i915_config = { 0, };
639 if (!i915_query_perf_config_data(perf_cfg, fd, guid, &i915_config))
640 return NULL;
641
642 struct gen_perf_registers *config = rzalloc(NULL, struct gen_perf_registers);
643 config->n_flex_regs = i915_config.n_flex_regs;
644 config->flex_regs = rzalloc_array(config, struct gen_perf_query_register_prog, config->n_flex_regs);
645 config->n_mux_regs = i915_config.n_mux_regs;
646 config->mux_regs = rzalloc_array(config, struct gen_perf_query_register_prog, config->n_mux_regs);
647 config->n_b_counter_regs = i915_config.n_boolean_regs;
648 config->b_counter_regs = rzalloc_array(config, struct gen_perf_query_register_prog, config->n_b_counter_regs);
649
650 /*
651 * struct gen_perf_query_register_prog maps exactly to the tuple of
652 * (register offset, register value) returned by the i915.
653 */
654 i915_config.flex_regs_ptr = to_user_pointer(config->flex_regs);
655 i915_config.mux_regs_ptr = to_user_pointer(config->mux_regs);
656 i915_config.boolean_regs_ptr = to_user_pointer(config->b_counter_regs);
657 if (!i915_query_perf_config_data(perf_cfg, fd, guid, &i915_config)) {
658 ralloc_free(config);
659 return NULL;
660 }
661
662 return config;
663 }
664
665 uint64_t
666 gen_perf_store_configuration(struct gen_perf_config *perf_cfg, int fd,
667 const struct gen_perf_registers *config,
668 const char *guid)
669 {
670 if (guid)
671 return i915_add_config(perf_cfg, fd, config, guid);
672
673 struct mesa_sha1 sha1_ctx;
674 _mesa_sha1_init(&sha1_ctx);
675
676 if (config->flex_regs) {
677 _mesa_sha1_update(&sha1_ctx, config->flex_regs,
678 sizeof(config->flex_regs[0]) *
679 config->n_flex_regs);
680 }
681 if (config->mux_regs) {
682 _mesa_sha1_update(&sha1_ctx, config->mux_regs,
683 sizeof(config->mux_regs[0]) *
684 config->n_mux_regs);
685 }
686 if (config->b_counter_regs) {
687 _mesa_sha1_update(&sha1_ctx, config->b_counter_regs,
688 sizeof(config->b_counter_regs[0]) *
689 config->n_b_counter_regs);
690 }
691
692 uint8_t hash[20];
693 _mesa_sha1_final(&sha1_ctx, hash);
694
695 char formatted_hash[41];
696 _mesa_sha1_format(formatted_hash, hash);
697
698 char generated_guid[37];
699 snprintf(generated_guid, sizeof(generated_guid),
700 "%.8s-%.4s-%.4s-%.4s-%.12s",
701 &formatted_hash[0], &formatted_hash[8],
702 &formatted_hash[8 + 4], &formatted_hash[8 + 4 + 4],
703 &formatted_hash[8 + 4 + 4 + 4]);
704
705 /* Check if already present. */
706 uint64_t id;
707 if (gen_perf_load_metric_id(perf_cfg, generated_guid, &id))
708 return id;
709
710 return i915_add_config(perf_cfg, fd, config, generated_guid);
711 }
712
713 /* Accumulate 32bits OA counters */
714 static inline void
715 accumulate_uint32(const uint32_t *report0,
716 const uint32_t *report1,
717 uint64_t *accumulator)
718 {
719 *accumulator += (uint32_t)(*report1 - *report0);
720 }
721
722 /* Accumulate 40bits OA counters */
723 static inline void
724 accumulate_uint40(int a_index,
725 const uint32_t *report0,
726 const uint32_t *report1,
727 uint64_t *accumulator)
728 {
729 const uint8_t *high_bytes0 = (uint8_t *)(report0 + 40);
730 const uint8_t *high_bytes1 = (uint8_t *)(report1 + 40);
731 uint64_t high0 = (uint64_t)(high_bytes0[a_index]) << 32;
732 uint64_t high1 = (uint64_t)(high_bytes1[a_index]) << 32;
733 uint64_t value0 = report0[a_index + 4] | high0;
734 uint64_t value1 = report1[a_index + 4] | high1;
735 uint64_t delta;
736
737 if (value0 > value1)
738 delta = (1ULL << 40) + value1 - value0;
739 else
740 delta = value1 - value0;
741
742 *accumulator += delta;
743 }
744
745 static void
746 gen8_read_report_clock_ratios(const uint32_t *report,
747 uint64_t *slice_freq_hz,
748 uint64_t *unslice_freq_hz)
749 {
750 /* The lower 16bits of the RPT_ID field of the OA reports contains a
751 * snapshot of the bits coming from the RP_FREQ_NORMAL register and is
752 * divided this way :
753 *
754 * RPT_ID[31:25]: RP_FREQ_NORMAL[20:14] (low squashed_slice_clock_frequency)
755 * RPT_ID[10:9]: RP_FREQ_NORMAL[22:21] (high squashed_slice_clock_frequency)
756 * RPT_ID[8:0]: RP_FREQ_NORMAL[31:23] (squashed_unslice_clock_frequency)
757 *
758 * RP_FREQ_NORMAL[31:23]: Software Unslice Ratio Request
759 * Multiple of 33.33MHz 2xclk (16 MHz 1xclk)
760 *
761 * RP_FREQ_NORMAL[22:14]: Software Slice Ratio Request
762 * Multiple of 33.33MHz 2xclk (16 MHz 1xclk)
763 */
764
765 uint32_t unslice_freq = report[0] & 0x1ff;
766 uint32_t slice_freq_low = (report[0] >> 25) & 0x7f;
767 uint32_t slice_freq_high = (report[0] >> 9) & 0x3;
768 uint32_t slice_freq = slice_freq_low | (slice_freq_high << 7);
769
770 *slice_freq_hz = slice_freq * 16666667ULL;
771 *unslice_freq_hz = unslice_freq * 16666667ULL;
772 }
773
774 void
775 gen_perf_query_result_read_frequencies(struct gen_perf_query_result *result,
776 const struct gen_device_info *devinfo,
777 const uint32_t *start,
778 const uint32_t *end)
779 {
780 /* Slice/Unslice frequency is only available in the OA reports when the
781 * "Disable OA reports due to clock ratio change" field in
782 * OA_DEBUG_REGISTER is set to 1. This is how the kernel programs this
783 * global register (see drivers/gpu/drm/i915/i915_perf.c)
784 *
785 * Documentation says this should be available on Gen9+ but experimentation
786 * shows that Gen8 reports similar values, so we enable it there too.
787 */
788 if (devinfo->gen < 8)
789 return;
790
791 gen8_read_report_clock_ratios(start,
792 &result->slice_frequency[0],
793 &result->unslice_frequency[0]);
794 gen8_read_report_clock_ratios(end,
795 &result->slice_frequency[1],
796 &result->unslice_frequency[1]);
797 }
798
799 void
800 gen_perf_query_result_accumulate(struct gen_perf_query_result *result,
801 const struct gen_perf_query_info *query,
802 const uint32_t *start,
803 const uint32_t *end)
804 {
805 int i, idx = 0;
806
807 if (result->hw_id == OA_REPORT_INVALID_CTX_ID &&
808 start[2] != OA_REPORT_INVALID_CTX_ID)
809 result->hw_id = start[2];
810 if (result->reports_accumulated == 0)
811 result->begin_timestamp = start[1];
812 result->reports_accumulated++;
813
814 switch (query->oa_format) {
815 case I915_OA_FORMAT_A32u40_A4u32_B8_C8:
816 accumulate_uint32(start + 1, end + 1, result->accumulator + idx++); /* timestamp */
817 accumulate_uint32(start + 3, end + 3, result->accumulator + idx++); /* clock */
818
819 /* 32x 40bit A counters... */
820 for (i = 0; i < 32; i++)
821 accumulate_uint40(i, start, end, result->accumulator + idx++);
822
823 /* 4x 32bit A counters... */
824 for (i = 0; i < 4; i++)
825 accumulate_uint32(start + 36 + i, end + 36 + i, result->accumulator + idx++);
826
827 /* 8x 32bit B counters + 8x 32bit C counters... */
828 for (i = 0; i < 16; i++)
829 accumulate_uint32(start + 48 + i, end + 48 + i, result->accumulator + idx++);
830 break;
831
832 case I915_OA_FORMAT_A45_B8_C8:
833 accumulate_uint32(start + 1, end + 1, result->accumulator); /* timestamp */
834
835 for (i = 0; i < 61; i++)
836 accumulate_uint32(start + 3 + i, end + 3 + i, result->accumulator + 1 + i);
837 break;
838
839 default:
840 unreachable("Can't accumulate OA counters in unknown format");
841 }
842
843 }
844
845 void
846 gen_perf_query_result_clear(struct gen_perf_query_result *result)
847 {
848 memset(result, 0, sizeof(*result));
849 result->hw_id = OA_REPORT_INVALID_CTX_ID; /* invalid */
850 }
851
852 void
853 gen_perf_init_metrics(struct gen_perf_config *perf_cfg,
854 const struct gen_device_info *devinfo,
855 int drm_fd)
856 {
857 load_pipeline_statistic_metrics(perf_cfg, devinfo);
858 gen_perf_register_mdapi_statistic_query(perf_cfg, devinfo);
859 if (load_oa_metrics(perf_cfg, drm_fd, devinfo))
860 gen_perf_register_mdapi_oa_query(perf_cfg, devinfo);
861 }