From 134e750e16bfc53480e0bba6f0ae3e1d2a7fb87c Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Sun, 27 May 2018 20:33:25 +0100 Subject: [PATCH] i965: extract performance query metrics We would like to reuse performance query metrics in other APIs. Let's make the query code dealing with the processing of raw counters into human readable values API agnostic. Signed-off-by: Lionel Landwerlin Reviewed-by: Mark Janes Reviewed-by: Kenneth Graunke --- src/intel/Android.mk | 1 + src/intel/Android.perf.mk | 52 ++ src/intel/Makefile.perf.am | 41 ++ src/intel/Makefile.sources | 24 + src/intel/meson.build | 1 + src/intel/perf/gen_perf.c | 435 ++++++++++++ src/intel/perf/gen_perf.h | 258 +++++++ .../i965/brw_oa.py => intel/perf/gen_perf.py} | 79 +-- src/intel/perf/meson.build | 40 ++ .../brw_oa_bdw.xml => intel/perf/oa-bdw.xml} | 0 .../brw_oa_bxt.xml => intel/perf/oa-bxt.xml} | 0 .../perf/oa-cflgt2.xml} | 0 .../perf/oa-cflgt3.xml} | 0 .../brw_oa_chv.xml => intel/perf/oa-chv.xml} | 0 .../brw_oa_cnl.xml => intel/perf/oa-cnl.xml} | 0 .../brw_oa_glk.xml => intel/perf/oa-glk.xml} | 0 .../brw_oa_hsw.xml => intel/perf/oa-hsw.xml} | 0 .../brw_oa_icl.xml => intel/perf/oa-icl.xml} | 0 .../perf/oa-kblgt2.xml} | 0 .../perf/oa-kblgt3.xml} | 0 .../perf/oa-sklgt2.xml} | 0 .../perf/oa-sklgt3.xml} | 0 .../perf/oa-sklgt4.xml} | 0 src/mesa/drivers/dri/i965/Android.mk | 3 +- src/mesa/drivers/dri/i965/Makefile.sources | 20 - src/mesa/drivers/dri/i965/brw_context.h | 73 +- .../drivers/dri/i965/brw_performance_query.c | 653 ++++-------------- .../drivers/dri/i965/brw_performance_query.h | 80 +-- .../dri/i965/brw_performance_query_mdapi.c | 114 ++- .../dri/i965/brw_performance_query_metrics.h | 57 -- src/mesa/drivers/dri/i965/meson.build | 33 +- 31 files changed, 1098 insertions(+), 866 deletions(-) create mode 100644 src/intel/Android.perf.mk create mode 100644 src/intel/Makefile.perf.am create mode 100644 src/intel/perf/gen_perf.c create mode 100644 src/intel/perf/gen_perf.h rename src/{mesa/drivers/dri/i965/brw_oa.py => intel/perf/gen_perf.py} (89%) create mode 100644 src/intel/perf/meson.build rename src/{mesa/drivers/dri/i965/brw_oa_bdw.xml => intel/perf/oa-bdw.xml} (100%) rename src/{mesa/drivers/dri/i965/brw_oa_bxt.xml => intel/perf/oa-bxt.xml} (100%) rename src/{mesa/drivers/dri/i965/brw_oa_cflgt2.xml => intel/perf/oa-cflgt2.xml} (100%) rename src/{mesa/drivers/dri/i965/brw_oa_cflgt3.xml => intel/perf/oa-cflgt3.xml} (100%) rename src/{mesa/drivers/dri/i965/brw_oa_chv.xml => intel/perf/oa-chv.xml} (100%) rename src/{mesa/drivers/dri/i965/brw_oa_cnl.xml => intel/perf/oa-cnl.xml} (100%) rename src/{mesa/drivers/dri/i965/brw_oa_glk.xml => intel/perf/oa-glk.xml} (100%) rename src/{mesa/drivers/dri/i965/brw_oa_hsw.xml => intel/perf/oa-hsw.xml} (100%) rename src/{mesa/drivers/dri/i965/brw_oa_icl.xml => intel/perf/oa-icl.xml} (100%) rename src/{mesa/drivers/dri/i965/brw_oa_kblgt2.xml => intel/perf/oa-kblgt2.xml} (100%) rename src/{mesa/drivers/dri/i965/brw_oa_kblgt3.xml => intel/perf/oa-kblgt3.xml} (100%) rename src/{mesa/drivers/dri/i965/brw_oa_sklgt2.xml => intel/perf/oa-sklgt2.xml} (100%) rename src/{mesa/drivers/dri/i965/brw_oa_sklgt3.xml => intel/perf/oa-sklgt3.xml} (100%) rename src/{mesa/drivers/dri/i965/brw_oa_sklgt4.xml => intel/perf/oa-sklgt4.xml} (100%) delete mode 100644 src/mesa/drivers/dri/i965/brw_performance_query_metrics.h diff --git a/src/intel/Android.mk b/src/intel/Android.mk index 380473d0acb..96498141dd2 100644 --- a/src/intel/Android.mk +++ b/src/intel/Android.mk @@ -31,4 +31,5 @@ include $(LOCAL_PATH)/Android.compiler.mk include $(LOCAL_PATH)/Android.dev.mk include $(LOCAL_PATH)/Android.genxml.mk include $(LOCAL_PATH)/Android.isl.mk +include $(LOCAL_PATH)/Android.perf.mk include $(LOCAL_PATH)/Android.vulkan.mk diff --git a/src/intel/Android.perf.mk b/src/intel/Android.perf.mk new file mode 100644 index 00000000000..0d7d746a632 --- /dev/null +++ b/src/intel/Android.perf.mk @@ -0,0 +1,52 @@ +# Copyright © 2018 Intel Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. +# + +# --------------------------------------- +# Build libmesa_intel_perf +# --------------------------------------- + +include $(CLEAR_VARS) + +LOCAL_MODULE := libmesa_intel_perf + +LOCAL_MODULE_CLASS := STATIC_LIBRARIES + +intermediates := $(call local-generated-sources-dir) + +LOCAL_C_INCLUDES := $(MESA_TOP)/include/drm-uapi + +LOCAL_SRC_FILES := $(GEN_PERF_FILES) + +LOCAL_GENERATED_SOURCES += $(addprefix $(intermediates)/, \ + $(GEN_PERF_GENERATED_FILES)) + +$(intermediates)/perf/gen_perf_metrics.c: $(LOCAL_PATH)/perf/gen_perf.py $(addprefix $(MESA_TOP)/src/intel/,$(GEN_PERF_XML_FILES)) + @echo "target Generated: $(PRIVATE_MODULE) <= $(notdir $(@))" + @mkdir -p $(dir $@) + $(hide) $(MESA_PYTHON2) $< \ + --code=$@ \ + --header=$(@:%.c=%.h) \ + $(addprefix $(MESA_TOP)/src/intel/,$(GEN_PERF_XML_FILES)) + +$(intermediates)/perf/gen_perf_metrics.h: $(intermediates)/perf/gen_perf_metrics.c + +include $(MESA_COMMON_MK) +include $(BUILD_STATIC_LIBRARY) diff --git a/src/intel/Makefile.perf.am b/src/intel/Makefile.perf.am new file mode 100644 index 00000000000..a9d896c458d --- /dev/null +++ b/src/intel/Makefile.perf.am @@ -0,0 +1,41 @@ +# Copyright © 2018 Intel Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. + +BUILT_SOURCES += \ + $(GEN_PERF_GENERATED_FILES) + +noinst_LTLIBRARIES += perf/libintel_perf.la + +perf_libintel_perf_la_SOURCES = $(GEN_PERF_FILES) $(GEN_PERF_GENERATED_FILES) +perf_libintel_perf_la_CFLAGS = $(AM_CFLAGS) + +perf/gen_perf_metrics.c: perf/gen_perf.py $(GEN_PERF_XML_FILES) + $(MKDIR_GEN) + $(PYTHON_GEN) $(PYTHON_FLAGS) $(srcdir)/perf/gen_perf.py \ + --code=$(builddir)/perf/gen_perf_metrics.c \ + --header=$(builddir)/perf/gen_perf_metrics.h \ + $(GEN_PERF_XML_FILES:%=$(srcdir)/%) + +perf/gen_perf_metrics.h: perf/gen_perf_metrics.c + +EXTRA_DIST += \ + $(GEN_PERF_XML_FILES) \ + perf/gen_perf.py diff --git a/src/intel/Makefile.sources b/src/intel/Makefile.sources index ffe51f87698..b5915a0d9a4 100644 --- a/src/intel/Makefile.sources +++ b/src/intel/Makefile.sources @@ -314,3 +314,27 @@ VULKAN_GEN10_FILES := \ VULKAN_GEN11_FILES := \ vulkan/gen8_cmd_buffer.c \ $(VULKAN_GENX_FILES) + +GEN_PERF_XML_FILES = \ + perf/oa-hsw.xml \ + perf/oa-bdw.xml \ + perf/oa-chv.xml \ + perf/oa-sklgt2.xml \ + perf/oa-sklgt3.xml \ + perf/oa-sklgt4.xml \ + perf/oa-bxt.xml \ + perf/oa-kblgt2.xml \ + perf/oa-kblgt3.xml \ + perf/oa-glk.xml \ + perf/oa-cflgt2.xml \ + perf/oa-cflgt3.xml \ + perf/oa-cnl.xml \ + perf/oa-icl.xml + +GEN_PERF_FILES = \ + perf/gen_perf.c \ + perf/gen_perf.h + +GEN_PERF_GENERATED_FILES = \ + perf/gen_perf_metrics.c \ + perf/gen_perf_metrics.h diff --git a/src/intel/meson.build b/src/intel/meson.build index a5bb03e314a..7b6aa40ba8d 100644 --- a/src/intel/meson.build +++ b/src/intel/meson.build @@ -27,6 +27,7 @@ subdir('dev') subdir('isl') subdir('common') subdir('compiler') +subdir('perf') if with_tools.contains('intel') or with_tools.contains('intel-ui') subdir('tools') endif diff --git a/src/intel/perf/gen_perf.c b/src/intel/perf/gen_perf.c new file mode 100644 index 00000000000..bc26cff9c99 --- /dev/null +++ b/src/intel/perf/gen_perf.c @@ -0,0 +1,435 @@ +/* + * Copyright © 2018 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include + +#include +#include +#include +#include +#include + +#include + +#include "gen_perf.h" +#include "perf/gen_perf_metrics.h" + +#include "dev/gen_debug.h" +#include "dev/gen_device_info.h" +#include "util/bitscan.h" + +#define FILE_DEBUG_FLAG DEBUG_PERFMON + +static bool +get_sysfs_dev_dir(struct gen_perf *perf, int fd) +{ + struct stat sb; + int min, maj; + DIR *drmdir; + struct dirent *drm_entry; + int len; + + perf->sysfs_dev_dir[0] = '\0'; + + if (fstat(fd, &sb)) { + DBG("Failed to stat DRM fd\n"); + return false; + } + + maj = major(sb.st_rdev); + min = minor(sb.st_rdev); + + if (!S_ISCHR(sb.st_mode)) { + DBG("DRM fd is not a character device as expected\n"); + return false; + } + + len = snprintf(perf->sysfs_dev_dir, + sizeof(perf->sysfs_dev_dir), + "/sys/dev/char/%d:%d/device/drm", maj, min); + if (len < 0 || len >= sizeof(perf->sysfs_dev_dir)) { + DBG("Failed to concatenate sysfs path to drm device\n"); + return false; + } + + drmdir = opendir(perf->sysfs_dev_dir); + if (!drmdir) { + DBG("Failed to open %s: %m\n", perf->sysfs_dev_dir); + return false; + } + + while ((drm_entry = readdir(drmdir))) { + if ((drm_entry->d_type == DT_DIR || + drm_entry->d_type == DT_LNK) && + strncmp(drm_entry->d_name, "card", 4) == 0) + { + len = snprintf(perf->sysfs_dev_dir, + sizeof(perf->sysfs_dev_dir), + "/sys/dev/char/%d:%d/device/drm/%s", + maj, min, drm_entry->d_name); + closedir(drmdir); + if (len < 0 || len >= sizeof(perf->sysfs_dev_dir)) + return false; + else + return true; + } + } + + closedir(drmdir); + + DBG("Failed to find cardX directory under /sys/dev/char/%d:%d/device/drm\n", + maj, min); + + return false; +} + +static bool +read_file_uint64(const char *file, uint64_t *val) +{ + char buf[32]; + int fd, n; + + fd = open(file, 0); + if (fd < 0) + return false; + while ((n = read(fd, buf, sizeof (buf) - 1)) < 0 && + errno == EINTR); + close(fd); + if (n < 0) + return false; + + buf[n] = '\0'; + *val = strtoull(buf, NULL, 0); + + return true; +} + +static bool +read_sysfs_drm_device_file_uint64(struct gen_perf *perf, + const char *file, + uint64_t *value) +{ + char buf[512]; + int len; + + len = snprintf(buf, sizeof(buf), "%s/%s", perf->sysfs_dev_dir, file); + if (len < 0 || len >= sizeof(buf)) { + DBG("Failed to concatenate sys filename to read u64 from\n"); + return false; + } + + return read_file_uint64(buf, value); +} + +static void +register_oa_config(struct gen_perf *perf, + const struct gen_perf_query_info *query, + uint64_t config_id) +{ + struct gen_perf_query_info *registred_query = + gen_perf_query_append_query_info(perf, 0); + + *registred_query = *query; + registred_query->oa_metrics_set_id = config_id; + DBG("metric set registred: id = %" PRIu64", guid = %s\n", + registred_query->oa_metrics_set_id, query->guid); +} + +static void +enumerate_sysfs_metrics(struct gen_perf *perf) +{ + DIR *metricsdir = NULL; + struct dirent *metric_entry; + char buf[256]; + int len; + + len = snprintf(buf, sizeof(buf), "%s/metrics", perf->sysfs_dev_dir); + if (len < 0 || len >= sizeof(buf)) { + DBG("Failed to concatenate path to sysfs metrics/ directory\n"); + return; + } + + metricsdir = opendir(buf); + if (!metricsdir) { + DBG("Failed to open %s: %m\n", buf); + return; + } + + while ((metric_entry = readdir(metricsdir))) { + struct hash_entry *entry; + + if ((metric_entry->d_type != DT_DIR && + metric_entry->d_type != DT_LNK) || + metric_entry->d_name[0] == '.') + continue; + + DBG("metric set: %s\n", metric_entry->d_name); + entry = _mesa_hash_table_search(perf->oa_metrics_table, + metric_entry->d_name); + if (entry) { + uint64_t id; + + len = snprintf(buf, sizeof(buf), "%s/metrics/%s/id", + perf->sysfs_dev_dir, metric_entry->d_name); + if (len < 0 || len >= sizeof(buf)) { + DBG("Failed to concatenate path to sysfs metric id file\n"); + continue; + } + + if (!read_file_uint64(buf, &id)) { + DBG("Failed to read metric set id from %s: %m", buf); + continue; + } + + register_oa_config(perf, (const struct gen_perf_query_info *)entry->data, id); + } else + DBG("metric set not known by mesa (skipping)\n"); + } + + closedir(metricsdir); +} + +static bool +kernel_has_dynamic_config_support(struct gen_perf *perf, int fd) +{ + hash_table_foreach(perf->oa_metrics_table, entry) { + struct gen_perf_query_info *query = entry->data; + char config_path[280]; + uint64_t config_id; + + snprintf(config_path, sizeof(config_path), "%s/metrics/%s/id", + perf->sysfs_dev_dir, query->guid); + + /* Look for the test config, which we know we can't replace. */ + if (read_file_uint64(config_path, &config_id) && config_id == 1) { + return perf->ioctl(fd, DRM_IOCTL_I915_PERF_REMOVE_CONFIG, + &config_id) < 0 && errno == ENOENT; + } + } + + return false; +} + +bool +gen_perf_load_metric_id(struct gen_perf *perf, const char *guid, + uint64_t *metric_id) +{ + char config_path[280]; + + snprintf(config_path, sizeof(config_path), "%s/metrics/%s/id", + perf->sysfs_dev_dir, guid); + + /* Don't recreate already loaded configs. */ + return read_file_uint64(config_path, metric_id); +} + +static void +init_oa_configs(struct gen_perf *perf, int fd) +{ + hash_table_foreach(perf->oa_metrics_table, entry) { + const struct gen_perf_query_info *query = entry->data; + struct drm_i915_perf_oa_config config; + uint64_t config_id; + int ret; + + if (gen_perf_load_metric_id(perf, query->guid, &config_id)) { + DBG("metric set: %s (already loaded)\n", query->guid); + register_oa_config(perf, query, config_id); + continue; + } + + memset(&config, 0, sizeof(config)); + + memcpy(config.uuid, query->guid, sizeof(config.uuid)); + + config.n_mux_regs = query->n_mux_regs; + config.mux_regs_ptr = (uintptr_t) query->mux_regs; + + config.n_boolean_regs = query->n_b_counter_regs; + config.boolean_regs_ptr = (uintptr_t) query->b_counter_regs; + + config.n_flex_regs = query->n_flex_regs; + config.flex_regs_ptr = (uintptr_t) query->flex_regs; + + ret = perf->ioctl(fd, DRM_IOCTL_I915_PERF_ADD_CONFIG, &config); + if (ret < 0) { + DBG("Failed to load \"%s\" (%s) metrics set in kernel: %s\n", + query->name, query->guid, strerror(errno)); + continue; + } + + register_oa_config(perf, query, ret); + DBG("metric set: %s (added)\n", query->guid); + } +} + +static void +compute_topology_builtins(struct gen_perf *perf, + const struct gen_device_info *devinfo) +{ + perf->sys_vars.slice_mask = devinfo->slice_masks; + perf->sys_vars.n_eu_slices = devinfo->num_slices; + + for (int i = 0; i < sizeof(devinfo->subslice_masks[i]); i++) { + perf->sys_vars.n_eu_sub_slices += + __builtin_popcount(devinfo->subslice_masks[i]); + } + + for (int i = 0; i < sizeof(devinfo->eu_masks); i++) + perf->sys_vars.n_eus += __builtin_popcount(devinfo->eu_masks[i]); + + perf->sys_vars.eu_threads_count = + perf->sys_vars.n_eus * devinfo->num_thread_per_eu; + + /* The subslice mask builtin contains bits for all slices. Prior to Gen11 + * it had groups of 3bits for each slice, on Gen11 it's 8bits for each + * slice. + * + * Ideally equations would be updated to have a slice/subslice query + * function/operator. + */ + perf->sys_vars.subslice_mask = 0; + + int bits_per_subslice = devinfo->gen == 11 ? 8 : 3; + + for (int s = 0; s < util_last_bit(devinfo->slice_masks); s++) { + for (int ss = 0; ss < (devinfo->subslice_slice_stride * 8); ss++) { + if (gen_device_info_subslice_available(devinfo, s, ss)) + perf->sys_vars.subslice_mask |= 1ULL << (s * bits_per_subslice + ss); + } + } +} + +static bool +init_oa_sys_vars(struct gen_perf *perf, const struct gen_device_info *devinfo) +{ + uint64_t min_freq_mhz = 0, max_freq_mhz = 0; + + if (!read_sysfs_drm_device_file_uint64(perf, "gt_min_freq_mhz", &min_freq_mhz)) + return false; + + if (!read_sysfs_drm_device_file_uint64(perf, "gt_max_freq_mhz", &max_freq_mhz)) + return false; + + memset(&perf->sys_vars, 0, sizeof(perf->sys_vars)); + perf->sys_vars.gt_min_freq = min_freq_mhz * 1000000; + perf->sys_vars.gt_max_freq = max_freq_mhz * 1000000; + perf->sys_vars.timestamp_frequency = devinfo->timestamp_frequency; + perf->sys_vars.revision = devinfo->revision; + compute_topology_builtins(perf, devinfo); + + return true; +} + +typedef void (*perf_register_oa_queries_t)(struct gen_perf *); + +static perf_register_oa_queries_t +get_register_queries_function(const struct gen_device_info *devinfo) +{ + if (devinfo->is_haswell) + return gen_oa_register_queries_hsw; + if (devinfo->is_cherryview) + return gen_oa_register_queries_chv; + if (devinfo->is_broadwell) + return gen_oa_register_queries_bdw; + if (devinfo->is_broxton) + return gen_oa_register_queries_bxt; + if (devinfo->is_skylake) { + if (devinfo->gt == 2) + return gen_oa_register_queries_sklgt2; + if (devinfo->gt == 3) + return gen_oa_register_queries_sklgt3; + if (devinfo->gt == 4) + return gen_oa_register_queries_sklgt4; + } + if (devinfo->is_kabylake) { + if (devinfo->gt == 2) + return gen_oa_register_queries_kblgt2; + if (devinfo->gt == 3) + return gen_oa_register_queries_kblgt3; + } + if (devinfo->is_geminilake) + return gen_oa_register_queries_glk; + if (devinfo->is_coffeelake) { + if (devinfo->gt == 2) + return gen_oa_register_queries_cflgt2; + if (devinfo->gt == 3) + return gen_oa_register_queries_cflgt3; + } + if (devinfo->is_cannonlake) + return gen_oa_register_queries_cnl; + + return NULL; +} + +bool +gen_perf_load_oa_metrics(struct gen_perf *perf, int fd, + const struct gen_device_info *devinfo) +{ + perf_register_oa_queries_t oa_register = get_register_queries_function(devinfo); + bool i915_perf_oa_available = false; + struct stat sb; + + /* The existence of this sysctl parameter implies the kernel supports + * the i915 perf interface. + */ + if (stat("/proc/sys/dev/i915/perf_stream_paranoid", &sb) == 0) { + + /* If _paranoid == 1 then on Gen8+ we won't be able to access OA + * metrics unless running as root. + */ + if (devinfo->is_haswell) + i915_perf_oa_available = true; + else { + uint64_t paranoid = 1; + + read_file_uint64("/proc/sys/dev/i915/perf_stream_paranoid", ¶noid); + + if (paranoid == 0 || geteuid() == 0) + i915_perf_oa_available = true; + } + } + + if (!i915_perf_oa_available || + !oa_register || + !get_sysfs_dev_dir(perf, fd) || + !init_oa_sys_vars(perf, devinfo)) + return false; + + perf->oa_metrics_table = + _mesa_hash_table_create(perf, _mesa_key_hash_string, + _mesa_key_string_equal); + + /* Index all the metric sets mesa knows about before looking to see what + * the kernel is advertising. + */ + oa_register(perf); + + if (likely((INTEL_DEBUG & DEBUG_NO_OACONFIG) == 0) && + kernel_has_dynamic_config_support(perf, fd)) + init_oa_configs(perf, fd); + else + enumerate_sysfs_metrics(perf); + + return true; +} diff --git a/src/intel/perf/gen_perf.h b/src/intel/perf/gen_perf.h new file mode 100644 index 00000000000..5d47ebd2925 --- /dev/null +++ b/src/intel/perf/gen_perf.h @@ -0,0 +1,258 @@ +/* + * Copyright © 2018 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef GEN_PERF_H +#define GEN_PERF_H + +#include +#include +#include + +#include + +#include "util/hash_table.h" +#include "util/ralloc.h" + +struct gen_device_info; + +struct gen_perf; +struct gen_perf_query_info; + +enum gen_perf_counter_type { + GEN_PERF_COUNTER_TYPE_EVENT, + GEN_PERF_COUNTER_TYPE_DURATION_NORM, + GEN_PERF_COUNTER_TYPE_DURATION_RAW, + GEN_PERF_COUNTER_TYPE_THROUGHPUT, + GEN_PERF_COUNTER_TYPE_RAW, + GEN_PERF_COUNTER_TYPE_TIMESTAMP, +}; + +enum gen_perf_counter_data_type { + GEN_PERF_COUNTER_DATA_TYPE_BOOL32, + GEN_PERF_COUNTER_DATA_TYPE_UINT32, + GEN_PERF_COUNTER_DATA_TYPE_UINT64, + GEN_PERF_COUNTER_DATA_TYPE_FLOAT, + GEN_PERF_COUNTER_DATA_TYPE_DOUBLE, +}; + +struct gen_pipeline_stat { + uint32_t reg; + uint32_t numerator; + uint32_t denominator; +}; + +struct gen_perf_query_counter { + const char *name; + const char *desc; + enum gen_perf_counter_type type; + enum gen_perf_counter_data_type data_type; + uint64_t raw_max; + size_t offset; + size_t size; + + union { + uint64_t (*oa_counter_read_uint64)(struct gen_perf *perf, + const struct gen_perf_query_info *query, + uint64_t *accumulator); + float (*oa_counter_read_float)(struct gen_perf *perf, + const struct gen_perf_query_info *query, + uint64_t *accumulator); + struct gen_pipeline_stat pipeline_stat; + }; +}; + +struct gen_perf_query_register_prog { + uint32_t reg; + uint32_t val; +}; + +struct gen_perf_query_info { + enum gen_perf_query_type { + GEN_PERF_QUERY_TYPE_OA, + GEN_PERF_QUERY_TYPE_RAW, + GEN_PERF_QUERY_TYPE_PIPELINE, + } kind; + const char *name; + const char *guid; + struct gen_perf_query_counter *counters; + int n_counters; + int max_counters; + size_t data_size; + + /* OA specific */ + uint64_t oa_metrics_set_id; + int oa_format; + + /* For indexing into the accumulator[] ... */ + int gpu_time_offset; + int gpu_clock_offset; + int a_offset; + int b_offset; + int c_offset; + + /* Register programming for a given query */ + struct gen_perf_query_register_prog *flex_regs; + uint32_t n_flex_regs; + + struct gen_perf_query_register_prog *mux_regs; + uint32_t n_mux_regs; + + struct gen_perf_query_register_prog *b_counter_regs; + uint32_t n_b_counter_regs; +}; + +struct gen_perf { + struct gen_perf_query_info *queries; + int n_queries; + + /* Variables referenced in the XML meta data for OA performance + * counters, e.g in the normalization equations. + * + * All uint64_t for consistent operand types in generated code + */ + struct { + uint64_t timestamp_frequency; /** $GpuTimestampFrequency */ + uint64_t n_eus; /** $EuCoresTotalCount */ + uint64_t n_eu_slices; /** $EuSlicesTotalCount */ + uint64_t n_eu_sub_slices; /** $EuSubslicesTotalCount */ + uint64_t eu_threads_count; /** $EuThreadsCount */ + uint64_t slice_mask; /** $SliceMask */ + uint64_t subslice_mask; /** $SubsliceMask */ + uint64_t gt_min_freq; /** $GpuMinFrequency */ + uint64_t gt_max_freq; /** $GpuMaxFrequency */ + uint64_t revision; /** $SkuRevisionId */ + } sys_vars; + + /* OA metric sets, indexed by GUID, as know by Mesa at build time, to + * cross-reference with the GUIDs of configs advertised by the kernel at + * runtime + */ + struct hash_table *oa_metrics_table; + + /* Location of the device's sysfs entry. */ + char sysfs_dev_dir[256]; + + int (*ioctl)(int, unsigned long, void *); +}; + +static inline struct gen_perf_query_info * +gen_perf_query_append_query_info(struct gen_perf *perf, int max_counters) +{ + struct gen_perf_query_info *query; + + perf->queries = reralloc(perf, perf->queries, + struct gen_perf_query_info, + ++perf->n_queries); + query = &perf->queries[perf->n_queries - 1]; + memset(query, 0, sizeof(*query)); + + if (max_counters > 0) { + query->max_counters = max_counters; + query->counters = + rzalloc_array(perf, struct gen_perf_query_counter, max_counters); + } + + return query; +} + +static inline void +gen_perf_query_info_add_stat_reg(struct gen_perf_query_info *query, + uint32_t reg, + uint32_t numerator, + uint32_t denominator, + const char *name, + const char *description) +{ + struct gen_perf_query_counter *counter; + + assert(query->n_counters < query->max_counters); + + counter = &query->counters[query->n_counters]; + counter->name = name; + counter->desc = description; + counter->type = GEN_PERF_COUNTER_TYPE_RAW; + counter->data_type = GEN_PERF_COUNTER_DATA_TYPE_UINT64; + counter->size = sizeof(uint64_t); + counter->offset = sizeof(uint64_t) * query->n_counters; + counter->pipeline_stat.reg = reg; + counter->pipeline_stat.numerator = numerator; + counter->pipeline_stat.denominator = denominator; + + query->n_counters++; +} + +static inline void +gen_perf_query_info_add_basic_stat_reg(struct gen_perf_query_info *query, + uint32_t reg, const char *name) +{ + gen_perf_query_info_add_stat_reg(query, reg, 1, 1, name, name); +} + +/* Accumulate 32bits OA counters */ +static inline void +gen_perf_query_accumulate_uint32(const uint32_t *report0, + const uint32_t *report1, + uint64_t *accumulator) +{ + *accumulator += (uint32_t)(*report1 - *report0); +} + +/* Accumulate 40bits OA counters */ +static inline void +gen_perf_query_accumulate_uint40(int a_index, + const uint32_t *report0, + const uint32_t *report1, + uint64_t *accumulator) +{ + const uint8_t *high_bytes0 = (uint8_t *)(report0 + 40); + const uint8_t *high_bytes1 = (uint8_t *)(report1 + 40); + uint64_t high0 = (uint64_t)(high_bytes0[a_index]) << 32; + uint64_t high1 = (uint64_t)(high_bytes1[a_index]) << 32; + uint64_t value0 = report0[a_index + 4] | high0; + uint64_t value1 = report1[a_index + 4] | high1; + uint64_t delta; + + if (value0 > value1) + delta = (1ULL << 40) + value1 - value0; + else + delta = value1 - value0; + + *accumulator += delta; +} + +static inline struct gen_perf * +gen_perf_new(void *ctx, int (*ioctl_cb)(int, unsigned long, void *)) +{ + struct gen_perf *perf = rzalloc(ctx, struct gen_perf); + + perf->ioctl = ioctl_cb; + + return perf; +} + +bool gen_perf_load_oa_metrics(struct gen_perf *perf, int fd, + const struct gen_device_info *devinfo); +bool gen_perf_load_metric_id(struct gen_perf *perf, const char *guid, + uint64_t *metric_id); + +#endif /* GEN_PERF_H */ diff --git a/src/mesa/drivers/dri/i965/brw_oa.py b/src/intel/perf/gen_perf.py similarity index 89% rename from src/mesa/drivers/dri/i965/brw_oa.py rename to src/intel/perf/gen_perf.py index 75382558e6d..c41e8f3eb11 100644 --- a/src/mesa/drivers/dri/i965/brw_oa.py +++ b/src/intel/perf/gen_perf.py @@ -176,16 +176,16 @@ exp_ops["&&"] = (2, splice_logical_and) hw_vars = {} -hw_vars["$EuCoresTotalCount"] = "brw->perfquery.sys_vars.n_eus" -hw_vars["$EuSlicesTotalCount"] = "brw->perfquery.sys_vars.n_eu_slices" -hw_vars["$EuSubslicesTotalCount"] = "brw->perfquery.sys_vars.n_eu_sub_slices" -hw_vars["$EuThreadsCount"] = "brw->perfquery.sys_vars.eu_threads_count" -hw_vars["$SliceMask"] = "brw->perfquery.sys_vars.slice_mask" -hw_vars["$SubsliceMask"] = "brw->perfquery.sys_vars.subslice_mask" -hw_vars["$GpuTimestampFrequency"] = "brw->perfquery.sys_vars.timestamp_frequency" -hw_vars["$GpuMinFrequency"] = "brw->perfquery.sys_vars.gt_min_freq" -hw_vars["$GpuMaxFrequency"] = "brw->perfquery.sys_vars.gt_max_freq" -hw_vars["$SkuRevisionId"] = "brw->perfquery.sys_vars.revision" +hw_vars["$EuCoresTotalCount"] = "perf->sys_vars.n_eus" +hw_vars["$EuSlicesTotalCount"] = "perf->sys_vars.n_eu_slices" +hw_vars["$EuSubslicesTotalCount"] = "perf->sys_vars.n_eu_sub_slices" +hw_vars["$EuThreadsCount"] = "perf->sys_vars.eu_threads_count" +hw_vars["$SliceMask"] = "perf->sys_vars.slice_mask" +hw_vars["$SubsliceMask"] = "perf->sys_vars.subslice_mask" +hw_vars["$GpuTimestampFrequency"] = "perf->sys_vars.timestamp_frequency" +hw_vars["$GpuMinFrequency"] = "perf->sys_vars.gt_min_freq" +hw_vars["$GpuMaxFrequency"] = "perf->sys_vars.gt_max_freq" +hw_vars["$SkuRevisionId"] = "perf->sys_vars.revision" def output_rpn_equation_code(set, counter, equation): c("/* RPN equation: " + equation + " */") @@ -207,7 +207,7 @@ def output_rpn_equation_code(set, counter, equation): operand = hw_vars[operand] elif operand in set.counter_vars: reference = set.counter_vars[operand] - operand = set.read_funcs[operand[1:]] + "(brw, query, accumulator)" + operand = set.read_funcs[operand[1:]] + "(perf, query, accumulator)" else: raise Exception("Failed to resolve variable " + operand + " in equation " + equation + " for " + set.name + " :: " + counter.get('name')); args.append(operand) @@ -227,7 +227,7 @@ def output_rpn_equation_code(set, counter, equation): if value in hw_vars: value = hw_vars[value] if value in set.counter_vars: - value = set.read_funcs[value[1:]] + "(brw, query, accumulator)" + value = set.read_funcs[value[1:]] + "(perf, query, accumulator)" c("\nreturn " + value + ";") @@ -278,9 +278,9 @@ def output_counter_read(gen, set, counter): read_eq = counter.get('equation') c("static " + ret_type) - c(counter.read_sym + "(MAYBE_UNUSED struct brw_context *brw,\n") + c(counter.read_sym + "(MAYBE_UNUSED struct gen_perf *perf,\n") c_indent(len(counter.read_sym) + 1) - c("const struct brw_perf_query_info *query,\n") + c("const struct gen_perf_query_info *query,\n") c("uint64_t *accumulator)\n") c_outdent(len(counter.read_sym) + 1) @@ -313,7 +313,7 @@ def output_counter_max(gen, set, counter): ret_type = "uint64_t" c("static " + ret_type) - c(counter.max_sym() + "(struct brw_context *brw)\n") + c(counter.max_sym() + "(struct gen_perf *perf)\n") c("{") c_indent(3) output_rpn_equation_code(set, counter, max_eq) @@ -375,8 +375,8 @@ def output_counter_report(set, counter, current_offset): c("counter->oa_counter_read_" + data_type + " = " + set.read_funcs[counter.get('symbol_name')] + ";\n") c("counter->name = \"" + counter.get('name') + "\";\n") c("counter->desc = \"" + counter.get('description') + "\";\n") - c("counter->type = GL_PERFQUERY_COUNTER_" + semantic_type_uc + "_INTEL;\n") - c("counter->data_type = GL_PERFQUERY_COUNTER_DATA_" + data_type_uc + "_INTEL;\n") + c("counter->type = GEN_PERF_COUNTER_TYPE_" + semantic_type_uc + ";\n") + c("counter->data_type = GEN_PERF_COUNTER_DATA_TYPE_" + data_type_uc + ";\n") c("counter->raw_max = " + set.max_values[counter.get('symbol_name')] + ";\n") current_offset = pot_align(current_offset, sizeof(c_type)) @@ -420,7 +420,7 @@ def generate_register_configs(set): c_indent(3) for register in register_config.findall('register'): - c("query->%s[query->n_%s++] = (struct brw_perf_query_register_prog) { .reg = %s, .val = %s };" % + c("query->%s[query->n_%s++] = (struct gen_perf_query_register_prog) { .reg = %s, .val = %s };" % (t, t, register.get('address'), register.get('value'))) if availability: @@ -429,7 +429,7 @@ def generate_register_configs(set): c("\n") -# Wraps a element from the brw_oa_*.xml files. +# Wraps a element from the oa-*.xml files. class Counter: def __init__(self, set, xml): self.xml = xml @@ -501,11 +501,11 @@ class Counter: if token[0] == '$' and token not in hw_vars: return "0 /* unsupported (varies over time) */" - return "{0}__{1}__{2}__max(brw)".format(self.set.gen.chipset, - self.set.underscore_name, - self.xml.get('underscore_name')) + return "{0}__{1}__{2}__max(perf)".format(self.set.gen.chipset, + self.set.underscore_name, + self.xml.get('underscore_name')) -# Wraps a element from the brw_oa_*.xml files. +# Wraps a element from the oa-*.xml files. class Set: def __init__(self, gen, xml): self.gen = gen @@ -550,7 +550,7 @@ class Set: return self.xml.find(path) -# Wraps an entire brw_oa_*.xml file. +# Wraps an entire oa-*.xml file. class Gen: def __init__(self, filename): self.filename = filename @@ -573,8 +573,8 @@ def main(): args = parser.parse_args() - header_file = open(args.header, 'w') c_file = open(args.code, 'w') + header_file = open(args.header, 'w') gens = [] for xml_file in args.xml_files: @@ -612,7 +612,7 @@ def main(): h(textwrap.dedent("""\ #pragma once - struct brw_context; + struct gen_perf; """)) @@ -621,6 +621,8 @@ def main(): #include #include + #include + #include "util/hash_table.h" """)) @@ -628,8 +630,7 @@ def main(): c("#include \"" + os.path.basename(args.header) + "\"") c(textwrap.dedent("""\ - #include "brw_context.h" - #include "brw_performance_query_metrics.h" + #include "perf/gen_perf.h" #define MIN(a, b) ((a < b) ? (a) : (b)) @@ -654,15 +655,15 @@ def main(): c("\n") register_lengths = compute_register_lengths(set); for reg_type, reg_length in register_lengths.items(): - c("static struct brw_perf_query_register_prog {0}_{1}_{2}[{3}];".format(gen.chipset, + c("static struct gen_perf_query_register_prog {0}_{1}_{2}[{3}];".format(gen.chipset, set.underscore_name, reg_type, reg_length)) - c("\nstatic struct brw_perf_query_counter {0}_{1}_query_counters[{2}];\n".format(gen.chipset, set.underscore_name, len(counters))) - c("static struct brw_perf_query_info " + gen.chipset + "_" + set.underscore_name + "_query = {\n") + c("\nstatic struct gen_perf_query_counter {0}_{1}_query_counters[{2}];\n".format(gen.chipset, set.underscore_name, len(counters))) + c("static struct gen_perf_query_info " + gen.chipset + "_" + set.underscore_name + "_query = {\n") c_indent(3) - c(".kind = OA_COUNTERS,\n") + c(".kind = GEN_PERF_QUERY_TYPE_OA,\n") c(".name = \"" + set.name + "\",\n") c(".guid = \"" + set.hw_config_guid + "\",\n") @@ -700,12 +701,12 @@ def main(): c("};\n") c("\nstatic void\n") - c("{0}_register_{1}_counter_query(struct brw_context *brw)\n".format(gen.chipset, set.underscore_name)) + c("{0}_register_{1}_counter_query(struct gen_perf *perf)\n".format(gen.chipset, set.underscore_name)) c("{\n") c_indent(3) - c("static struct brw_perf_query_info *query = &" + gen.chipset + "_" + set.underscore_name + "_query;\n") - c("struct brw_perf_query_counter *counter;\n") + c("static struct gen_perf_query_info *query = &" + gen.chipset + "_" + set.underscore_name + "_query;\n") + c("struct gen_perf_query_counter *counter;\n") c("\n") c("/* Note: we're assuming there can't be any variation in the definition ") @@ -726,20 +727,20 @@ def main(): c_outdent(3) c("}"); - c("\n_mesa_hash_table_insert(brw->perfquery.oa_metrics_table, query->guid, query);") + c("\n_mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);") c_outdent(3) c("}\n") - h("void brw_oa_register_queries_" + gen.chipset + "(struct brw_context *brw);\n") + h("void gen_oa_register_queries_" + gen.chipset + "(struct gen_perf *perf);\n") c("\nvoid") - c("brw_oa_register_queries_" + gen.chipset + "(struct brw_context *brw)") + c("gen_oa_register_queries_" + gen.chipset + "(struct gen_perf *perf)") c("{") c_indent(3) for set in gen.sets: - c("{0}_register_{1}_counter_query(brw);".format(gen.chipset, set.underscore_name)) + c("{0}_register_{1}_counter_query(perf);".format(gen.chipset, set.underscore_name)) c_outdent(3) c("}") diff --git a/src/intel/perf/meson.build b/src/intel/perf/meson.build new file mode 100644 index 00000000000..3620f6885a4 --- /dev/null +++ b/src/intel/perf/meson.build @@ -0,0 +1,40 @@ +gen_hw_metrics = [ + 'hsw', + 'bdw', 'chv', + 'sklgt2', 'sklgt3', 'sklgt4', + 'kblgt2', 'kblgt3', + 'cflgt2', 'cflgt3', + 'bxt', 'glk', + 'cnl', + 'icl', +] + +gen_hw_metrics_xml_files = [] +foreach hw : gen_hw_metrics + gen_hw_metrics_xml_files += 'oa-@0@.xml'.format(hw) +endforeach + +gen_perf_sources = [ + 'gen_perf.c' +] + +gen_perf_sources += custom_target( + 'intel-perf-sources', + input : gen_hw_metrics_xml_files, + output : [ 'gen_perf_metrics.c', 'gen_perf_metrics.h' ], + command : [ + prog_python, files('gen_perf.py'), + '--code', '@OUTPUT0@', '--header', '@OUTPUT1@', + '@INPUT@', + ], +) + +libintel_perf = static_library( + 'intel_perf', + gen_perf_sources, + include_directories : [ + inc_common, inc_intel, inc_util, + ], + c_args : [c_vis_args, no_override_init_args, '-msse2'], + cpp_args : [cpp_vis_args, '-msse2'], +) diff --git a/src/mesa/drivers/dri/i965/brw_oa_bdw.xml b/src/intel/perf/oa-bdw.xml similarity index 100% rename from src/mesa/drivers/dri/i965/brw_oa_bdw.xml rename to src/intel/perf/oa-bdw.xml diff --git a/src/mesa/drivers/dri/i965/brw_oa_bxt.xml b/src/intel/perf/oa-bxt.xml similarity index 100% rename from src/mesa/drivers/dri/i965/brw_oa_bxt.xml rename to src/intel/perf/oa-bxt.xml diff --git a/src/mesa/drivers/dri/i965/brw_oa_cflgt2.xml b/src/intel/perf/oa-cflgt2.xml similarity index 100% rename from src/mesa/drivers/dri/i965/brw_oa_cflgt2.xml rename to src/intel/perf/oa-cflgt2.xml diff --git a/src/mesa/drivers/dri/i965/brw_oa_cflgt3.xml b/src/intel/perf/oa-cflgt3.xml similarity index 100% rename from src/mesa/drivers/dri/i965/brw_oa_cflgt3.xml rename to src/intel/perf/oa-cflgt3.xml diff --git a/src/mesa/drivers/dri/i965/brw_oa_chv.xml b/src/intel/perf/oa-chv.xml similarity index 100% rename from src/mesa/drivers/dri/i965/brw_oa_chv.xml rename to src/intel/perf/oa-chv.xml diff --git a/src/mesa/drivers/dri/i965/brw_oa_cnl.xml b/src/intel/perf/oa-cnl.xml similarity index 100% rename from src/mesa/drivers/dri/i965/brw_oa_cnl.xml rename to src/intel/perf/oa-cnl.xml diff --git a/src/mesa/drivers/dri/i965/brw_oa_glk.xml b/src/intel/perf/oa-glk.xml similarity index 100% rename from src/mesa/drivers/dri/i965/brw_oa_glk.xml rename to src/intel/perf/oa-glk.xml diff --git a/src/mesa/drivers/dri/i965/brw_oa_hsw.xml b/src/intel/perf/oa-hsw.xml similarity index 100% rename from src/mesa/drivers/dri/i965/brw_oa_hsw.xml rename to src/intel/perf/oa-hsw.xml diff --git a/src/mesa/drivers/dri/i965/brw_oa_icl.xml b/src/intel/perf/oa-icl.xml similarity index 100% rename from src/mesa/drivers/dri/i965/brw_oa_icl.xml rename to src/intel/perf/oa-icl.xml diff --git a/src/mesa/drivers/dri/i965/brw_oa_kblgt2.xml b/src/intel/perf/oa-kblgt2.xml similarity index 100% rename from src/mesa/drivers/dri/i965/brw_oa_kblgt2.xml rename to src/intel/perf/oa-kblgt2.xml diff --git a/src/mesa/drivers/dri/i965/brw_oa_kblgt3.xml b/src/intel/perf/oa-kblgt3.xml similarity index 100% rename from src/mesa/drivers/dri/i965/brw_oa_kblgt3.xml rename to src/intel/perf/oa-kblgt3.xml diff --git a/src/mesa/drivers/dri/i965/brw_oa_sklgt2.xml b/src/intel/perf/oa-sklgt2.xml similarity index 100% rename from src/mesa/drivers/dri/i965/brw_oa_sklgt2.xml rename to src/intel/perf/oa-sklgt2.xml diff --git a/src/mesa/drivers/dri/i965/brw_oa_sklgt3.xml b/src/intel/perf/oa-sklgt3.xml similarity index 100% rename from src/mesa/drivers/dri/i965/brw_oa_sklgt3.xml rename to src/intel/perf/oa-sklgt3.xml diff --git a/src/mesa/drivers/dri/i965/brw_oa_sklgt4.xml b/src/intel/perf/oa-sklgt4.xml similarity index 100% rename from src/mesa/drivers/dri/i965/brw_oa_sklgt4.xml rename to src/intel/perf/oa-sklgt4.xml diff --git a/src/mesa/drivers/dri/i965/Android.mk b/src/mesa/drivers/dri/i965/Android.mk index b9ce93f7a97..29b46147f39 100644 --- a/src/mesa/drivers/dri/i965/Android.mk +++ b/src/mesa/drivers/dri/i965/Android.mk @@ -289,7 +289,8 @@ LOCAL_WHOLE_STATIC_LIBRARIES := \ libmesa_intel_common \ libmesa_isl \ libmesa_blorp \ - libmesa_intel_compiler + libmesa_intel_compiler \ + libmesa_intel_perf ifeq ($(ARCH_X86_HAVE_SSE4_1),true) LOCAL_CFLAGS += \ diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources index ae609361989..01aeae89980 100644 --- a/src/mesa/drivers/dri/i965/Makefile.sources +++ b/src/mesa/drivers/dri/i965/Makefile.sources @@ -172,23 +172,3 @@ i965_gen11_FILES = \ genX_boilerplate.h \ genX_pipe_control.c \ genX_state_upload.c - -i965_oa_GENERATED_FILES = \ - brw_oa_metrics.c \ - brw_oa_metrics.h - -i965_oa_xml_FILES = \ - brw_oa_hsw.xml \ - brw_oa_bdw.xml \ - brw_oa_chv.xml \ - brw_oa_sklgt2.xml \ - brw_oa_sklgt3.xml \ - brw_oa_sklgt4.xml \ - brw_oa_bxt.xml \ - brw_oa_kblgt2.xml \ - brw_oa_kblgt3.xml \ - brw_oa_glk.xml \ - brw_oa_cflgt2.xml \ - brw_oa_cflgt3.xml \ - brw_oa_cnl.xml \ - brw_oa_icl.xml diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index d6187541a8c..23048428f3e 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -681,48 +681,7 @@ enum brw_predicate_state { struct shader_times; struct gen_l3_config; - -enum brw_query_kind { - OA_COUNTERS, - OA_COUNTERS_RAW, - PIPELINE_STATS, -}; - -struct brw_perf_query_register_prog { - uint32_t reg; - uint32_t val; -}; - -struct brw_perf_query_info -{ - enum brw_query_kind kind; - const char *name; - const char *guid; - struct brw_perf_query_counter *counters; - int n_counters; - size_t data_size; - - /* OA specific */ - uint64_t oa_metrics_set_id; - int oa_format; - - /* For indexing into the accumulator[] ... */ - int gpu_time_offset; - int gpu_clock_offset; - int a_offset; - int b_offset; - int c_offset; - - /* Register programming for a given query */ - struct brw_perf_query_register_prog *flex_regs; - uint32_t n_flex_regs; - - struct brw_perf_query_register_prog *mux_regs; - uint32_t n_mux_regs; - - struct brw_perf_query_register_prog *b_counter_regs; - uint32_t n_b_counter_regs; -}; +struct gen_perf; struct brw_uploader { struct brw_bufmgr *bufmgr; @@ -1203,35 +1162,7 @@ struct brw_context } predicate; struct { - /* Variables referenced in the XML meta data for OA performance - * counters, e.g in the normalization equations. - * - * All uint64_t for consistent operand types in generated code - */ - struct { - uint64_t timestamp_frequency; /** $GpuTimestampFrequency */ - uint64_t n_eus; /** $EuCoresTotalCount */ - uint64_t n_eu_slices; /** $EuSlicesTotalCount */ - uint64_t n_eu_sub_slices; /** $EuSubslicesTotalCount */ - uint64_t eu_threads_count; /** $EuThreadsCount */ - uint64_t slice_mask; /** $SliceMask */ - uint64_t subslice_mask; /** $SubsliceMask */ - uint64_t gt_min_freq; /** $GpuMinFrequency */ - uint64_t gt_max_freq; /** $GpuMaxFrequency */ - uint64_t revision; /** $SkuRevisionId */ - } sys_vars; - - /* OA metric sets, indexed by GUID, as know by Mesa at build time, - * to cross-reference with the GUIDs of configs advertised by the - * kernel at runtime - */ - struct hash_table *oa_metrics_table; - - /* Location of the device's sysfs entry. */ - char sysfs_dev_dir[256]; - - struct brw_perf_query_info *queries; - int n_queries; + struct gen_perf *perf; /* The i915 perf stream we open to setup + enable the OA counters */ int oa_stream_fd; diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c b/src/mesa/drivers/dri/i965/brw_performance_query.c index 7676b2bdd6b..3207be11569 100644 --- a/src/mesa/drivers/dri/i965/brw_performance_query.c +++ b/src/mesa/drivers/dri/i965/brw_performance_query.c @@ -40,7 +40,6 @@ */ #include -#include /* put before sys/types.h to silence glibc warnings */ #ifdef MAJOR_IN_MKDEV @@ -72,9 +71,10 @@ #include "brw_context.h" #include "brw_defines.h" #include "brw_performance_query.h" -#include "brw_oa_metrics.h" #include "intel_batchbuffer.h" +#include "perf/gen_perf.h" + #define FILE_DEBUG_FLAG DEBUG_PERFMON #define OAREPORT_REASON_MASK 0x3f @@ -222,62 +222,21 @@ brw_perf_query(struct gl_perf_query_object *o) /******************************************************************************/ -static bool -read_file_uint64(const char *file, uint64_t *val) -{ - char buf[32]; - int fd, n; - - fd = open(file, 0); - if (fd < 0) - return false; - while ((n = read(fd, buf, sizeof (buf) - 1)) < 0 && - errno == EINTR); - close(fd); - if (n < 0) - return false; - - buf[n] = '\0'; - *val = strtoull(buf, NULL, 0); - - return true; -} - -static bool -read_sysfs_drm_device_file_uint64(struct brw_context *brw, - const char *file, - uint64_t *value) -{ - char buf[512]; - int len; - - len = snprintf(buf, sizeof(buf), "%s/%s", - brw->perfquery.sysfs_dev_dir, file); - if (len < 0 || len >= sizeof(buf)) { - DBG("Failed to concatenate sys filename to read u64 from\n"); - return false; - } - - return read_file_uint64(buf, value); -} - -/******************************************************************************/ - static bool brw_is_perf_query_ready(struct gl_context *ctx, struct gl_perf_query_object *o); static uint64_t brw_perf_query_get_metric_id(struct brw_context *brw, - const struct brw_perf_query_info *query) + const struct gen_perf_query_info *query) { /* These queries are know not to ever change, their config ID has been * loaded upon the first query creation. No need to look them up again. */ - if (query->kind == OA_COUNTERS) + if (query->kind == GEN_PERF_QUERY_TYPE_OA) return query->oa_metrics_set_id; - assert(query->kind == OA_COUNTERS_RAW); + assert(query->kind == GEN_PERF_QUERY_TYPE_RAW); /* Raw queries can be reprogrammed up by an external application/library. * When a raw query is used for the first time it's id is set to a value != @@ -290,12 +249,9 @@ brw_perf_query_get_metric_id(struct brw_context *brw, return query->oa_metrics_set_id; } - char metric_id_file[280]; - snprintf(metric_id_file, sizeof(metric_id_file), - "%s/metrics/%s/id", brw->perfquery.sysfs_dev_dir, query->guid); - - struct brw_perf_query_info *raw_query = (struct brw_perf_query_info *)query; - if (!read_file_uint64(metric_id_file, &raw_query->oa_metrics_set_id)) { + struct gen_perf_query_info *raw_query = (struct gen_perf_query_info *)query; + if (!gen_perf_load_metric_id(brw->perfquery.perf, query->guid, + &raw_query->oa_metrics_set_id)) { DBG("Unable to read query guid=%s ID, falling back to test config\n", query->guid); raw_query->oa_metrics_set_id = 1ULL; } else { @@ -313,8 +269,8 @@ dump_perf_query_callback(GLuint id, void *query_void, void *brw_void) struct brw_perf_query_object *obj = query_void; switch (obj->query->kind) { - case OA_COUNTERS: - case OA_COUNTERS_RAW: + case GEN_PERF_QUERY_TYPE_OA: + case GEN_PERF_QUERY_TYPE_RAW: DBG("%4d: %-6s %-8s BO: %-4s OA data: %-10s %-15s\n", id, o->Used ? "Dirty," : "New,", @@ -323,7 +279,7 @@ dump_perf_query_callback(GLuint id, void *query_void, void *brw_void) brw_is_perf_query_ready(ctx, o) ? "ready," : "not ready,", obj->oa.results_accumulated ? "accumulated" : "not accumulated"); break; - case PIPELINE_STATS: + case GEN_PERF_QUERY_TYPE_PIPELINE: DBG("%4d: %-6s %-8s BO: %-4s\n", id, o->Used ? "Dirty," : "New,", @@ -414,20 +370,20 @@ brw_get_perf_query_info(struct gl_context *ctx, GLuint *n_active) { struct brw_context *brw = brw_context(ctx); - const struct brw_perf_query_info *query = - &brw->perfquery.queries[query_index]; + const struct gen_perf_query_info *query = + &brw->perfquery.perf->queries[query_index]; *name = query->name; *data_size = query->data_size; *n_counters = query->n_counters; switch (query->kind) { - case OA_COUNTERS: - case OA_COUNTERS_RAW: + case GEN_PERF_QUERY_TYPE_OA: + case GEN_PERF_QUERY_TYPE_RAW: *n_active = brw->perfquery.n_active_oa_queries; break; - case PIPELINE_STATS: + case GEN_PERF_QUERY_TYPE_PIPELINE: *n_active = brw->perfquery.n_active_pipeline_stats_queries; break; @@ -437,6 +393,35 @@ brw_get_perf_query_info(struct gl_context *ctx, } } +static GLuint +gen_counter_type_enum_to_gl_type(enum gen_perf_counter_type type) +{ + switch (type) { + case GEN_PERF_COUNTER_TYPE_EVENT: return GL_PERFQUERY_COUNTER_EVENT_INTEL; + case GEN_PERF_COUNTER_TYPE_DURATION_NORM: return GL_PERFQUERY_COUNTER_DURATION_NORM_INTEL; + case GEN_PERF_COUNTER_TYPE_DURATION_RAW: return GL_PERFQUERY_COUNTER_DURATION_RAW_INTEL; + case GEN_PERF_COUNTER_TYPE_THROUGHPUT: return GL_PERFQUERY_COUNTER_THROUGHPUT_INTEL; + case GEN_PERF_COUNTER_TYPE_RAW: return GL_PERFQUERY_COUNTER_RAW_INTEL; + case GEN_PERF_COUNTER_TYPE_TIMESTAMP: return GL_PERFQUERY_COUNTER_TIMESTAMP_INTEL; + default: + unreachable("Unknown counter type"); + } +} + +static GLuint +gen_counter_data_type_to_gl_type(enum gen_perf_counter_data_type type) +{ + switch (type) { + case GEN_PERF_COUNTER_DATA_TYPE_BOOL32: return GL_PERFQUERY_COUNTER_DATA_BOOL32_INTEL; + case GEN_PERF_COUNTER_DATA_TYPE_UINT32: return GL_PERFQUERY_COUNTER_DATA_UINT32_INTEL; + case GEN_PERF_COUNTER_DATA_TYPE_UINT64: return GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL; + case GEN_PERF_COUNTER_DATA_TYPE_FLOAT: return GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL; + case GEN_PERF_COUNTER_DATA_TYPE_DOUBLE: return GL_PERFQUERY_COUNTER_DATA_DOUBLE_INTEL; + default: + unreachable("Unknown counter data type"); + } +} + /** * Driver hook for glGetPerfCounterInfoINTEL(). */ @@ -453,17 +438,17 @@ brw_get_perf_counter_info(struct gl_context *ctx, GLuint64 *raw_max) { struct brw_context *brw = brw_context(ctx); - const struct brw_perf_query_info *query = - &brw->perfquery.queries[query_index]; - const struct brw_perf_query_counter *counter = + const struct gen_perf_query_info *query = + &brw->perfquery.perf->queries[query_index]; + const struct gen_perf_query_counter *counter = &query->counters[counter_index]; *name = counter->name; *desc = counter->desc; *offset = counter->offset; *data_size = counter->size; - *type_enum = counter->type; - *data_type_enum = counter->data_type; + *type_enum = gen_counter_type_enum_to_gl_type(counter->type); + *data_type_enum = gen_counter_data_type_to_gl_type(counter->data_type); *raw_max = counter->raw_max; } @@ -478,13 +463,13 @@ snapshot_statistics_registers(struct brw_context *brw, struct brw_perf_query_object *obj, uint32_t offset_in_bytes) { - const struct brw_perf_query_info *query = obj->query; + const struct gen_perf_query_info *query = obj->query; const int n_counters = query->n_counters; for (int i = 0; i < n_counters; i++) { - const struct brw_perf_query_counter *counter = &query->counters[i]; + const struct gen_perf_query_counter *counter = &query->counters[i]; - assert(counter->data_type == GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL); + assert(counter->data_type == GEN_PERF_COUNTER_DATA_TYPE_UINT64); brw_store_register_mem64(brw, obj->pipeline_stats.bo, counter->pipeline_stat.reg, @@ -567,7 +552,7 @@ add_deltas(struct brw_context *brw, const uint32_t *start, const uint32_t *end) { - const struct brw_perf_query_info *query = obj->query; + const struct gen_perf_query_info *query = obj->query; uint64_t *accumulator = obj->oa.accumulator; int idx = 0; int i; @@ -576,29 +561,29 @@ add_deltas(struct brw_context *brw, switch (query->oa_format) { case I915_OA_FORMAT_A32u40_A4u32_B8_C8: - brw_perf_query_accumulate_uint32(start + 1, end + 1, accumulator + idx++); /* timestamp */ - brw_perf_query_accumulate_uint32(start + 3, end + 3, accumulator + idx++); /* clock */ + gen_perf_query_accumulate_uint32(start + 1, end + 1, accumulator + idx++); /* timestamp */ + gen_perf_query_accumulate_uint32(start + 3, end + 3, accumulator + idx++); /* clock */ /* 32x 40bit A counters... */ for (i = 0; i < 32; i++) - brw_perf_query_accumulate_uint40(i, start, end, accumulator + idx++); + gen_perf_query_accumulate_uint40(i, start, end, accumulator + idx++); /* 4x 32bit A counters... */ for (i = 0; i < 4; i++) - brw_perf_query_accumulate_uint32(start + 36 + i, end + 36 + i, + gen_perf_query_accumulate_uint32(start + 36 + i, end + 36 + i, accumulator + idx++); /* 8x 32bit B counters + 8x 32bit C counters... */ for (i = 0; i < 16; i++) - brw_perf_query_accumulate_uint32(start + 48 + i, end + 48 + i, + gen_perf_query_accumulate_uint32(start + 48 + i, end + 48 + i, accumulator + idx++); break; case I915_OA_FORMAT_A45_B8_C8: - brw_perf_query_accumulate_uint32(start + 1, end + 1, accumulator); /* timestamp */ + gen_perf_query_accumulate_uint32(start + 1, end + 1, accumulator); /* timestamp */ for (i = 0; i < 61; i++) - brw_perf_query_accumulate_uint32(start + 3 + i, end + 3 + i, accumulator + 1 + i); + gen_perf_query_accumulate_uint32(start + 3 + i, end + 3 + i, accumulator + 1 + i); break; default: @@ -982,15 +967,15 @@ open_i915_perf_oa_stream(struct brw_context *brw, static void close_perf(struct brw_context *brw, - const struct brw_perf_query_info *query) + const struct gen_perf_query_info *query) { if (brw->perfquery.oa_stream_fd != -1) { close(brw->perfquery.oa_stream_fd); brw->perfquery.oa_stream_fd = -1; } - if (query->kind == OA_COUNTERS_RAW) { - struct brw_perf_query_info *raw_query = - (struct brw_perf_query_info *) query; + if (query->kind == GEN_PERF_QUERY_TYPE_RAW) { + struct gen_perf_query_info *raw_query = + (struct gen_perf_query_info *) query; raw_query->oa_metrics_set_id = 0; } } @@ -1019,7 +1004,7 @@ brw_begin_perf_query(struct gl_context *ctx, { struct brw_context *brw = brw_context(ctx); struct brw_perf_query_object *obj = brw_perf_query(o); - const struct brw_perf_query_info *query = obj->query; + const struct gen_perf_query_info *query = obj->query; /* We can assume the frontend hides mistaken attempts to Begin a * query object multiple times before its End. Similarly if an @@ -1079,8 +1064,8 @@ brw_begin_perf_query(struct gl_context *ctx, brw_emit_mi_flush(brw); switch (query->kind) { - case OA_COUNTERS: - case OA_COUNTERS_RAW: { + case GEN_PERF_QUERY_TYPE_OA: + case GEN_PERF_QUERY_TYPE_RAW: { /* Opening an i915 perf stream implies exclusive access to the OA unit * which will generate counter reports for a specific counter set with a @@ -1130,12 +1115,12 @@ brw_begin_perf_query(struct gl_context *ctx, a_counter_in_bits = 40; uint64_t overflow_period = pow(2, a_counter_in_bits) / - (brw->perfquery.sys_vars.n_eus * + (brw->perfquery.perf->sys_vars.n_eus * /* drop 1GHz freq to have units in nanoseconds */ 2); DBG("A counter overflow period: %"PRIu64"ns, %"PRIu64"ms (n_eus=%"PRIu64")\n", - overflow_period, overflow_period / 1000000ul, brw->perfquery.sys_vars.n_eus); + overflow_period, overflow_period / 1000000ul, brw->perfquery.perf->sys_vars.n_eus); int period_exponent = 0; uint64_t prev_sample_period, next_sample_period; @@ -1234,7 +1219,7 @@ brw_begin_perf_query(struct gl_context *ctx, break; } - case PIPELINE_STATS: + case GEN_PERF_QUERY_TYPE_PIPELINE: if (obj->pipeline_stats.bo) { brw_bo_unreference(obj->pipeline_stats.bo); obj->pipeline_stats.bo = NULL; @@ -1282,8 +1267,8 @@ brw_end_perf_query(struct gl_context *ctx, brw_emit_mi_flush(brw); switch (obj->query->kind) { - case OA_COUNTERS: - case OA_COUNTERS_RAW: + case GEN_PERF_QUERY_TYPE_OA: + case GEN_PERF_QUERY_TYPE_RAW: /* NB: It's possible that the query will have already been marked * as 'accumulated' if an error was seen while reading samples @@ -1306,7 +1291,7 @@ brw_end_perf_query(struct gl_context *ctx, */ break; - case PIPELINE_STATS: + case GEN_PERF_QUERY_TYPE_PIPELINE: snapshot_statistics_registers(brw, obj, STATS_BO_END_OFFSET_BYTES); --brw->perfquery.n_active_pipeline_stats_queries; @@ -1328,12 +1313,12 @@ brw_wait_perf_query(struct gl_context *ctx, struct gl_perf_query_object *o) assert(!o->Ready); switch (obj->query->kind) { - case OA_COUNTERS: - case OA_COUNTERS_RAW: + case GEN_PERF_QUERY_TYPE_OA: + case GEN_PERF_QUERY_TYPE_RAW: bo = obj->oa.bo; break; - case PIPELINE_STATS: + case GEN_PERF_QUERY_TYPE_PIPELINE: bo = obj->pipeline_stats.bo; break; @@ -1358,8 +1343,8 @@ brw_wait_perf_query(struct gl_context *ctx, struct gl_perf_query_object *o) * we need to wait for all the reports to come in before we can * read them. */ - if (obj->query->kind == OA_COUNTERS || - obj->query->kind == OA_COUNTERS_RAW) { + if (obj->query->kind == GEN_PERF_QUERY_TYPE_OA || + obj->query->kind == GEN_PERF_QUERY_TYPE_RAW) { while (!read_oa_samples_for_query(brw, obj)) ; } @@ -1376,14 +1361,14 @@ brw_is_perf_query_ready(struct gl_context *ctx, return true; switch (obj->query->kind) { - case OA_COUNTERS: - case OA_COUNTERS_RAW: + case GEN_PERF_QUERY_TYPE_OA: + case GEN_PERF_QUERY_TYPE_RAW: return (obj->oa.results_accumulated || (obj->oa.bo && !brw_batch_references(&brw->batch, obj->oa.bo) && !brw_bo_busy(obj->oa.bo) && read_oa_samples_for_query(brw, obj))); - case PIPELINE_STATS: + case GEN_PERF_QUERY_TYPE_PIPELINE: return (obj->pipeline_stats.bo && !brw_batch_references(&brw->batch, obj->pipeline_stats.bo) && !brw_bo_busy(obj->pipeline_stats.bo)); @@ -1489,25 +1474,26 @@ get_oa_counter_data(struct brw_context *brw, size_t data_size, uint8_t *data) { - const struct brw_perf_query_info *query = obj->query; + struct gen_perf *perf = brw->perfquery.perf; + const struct gen_perf_query_info *query = obj->query; int n_counters = query->n_counters; int written = 0; for (int i = 0; i < n_counters; i++) { - const struct brw_perf_query_counter *counter = &query->counters[i]; + const struct gen_perf_query_counter *counter = &query->counters[i]; uint64_t *out_uint64; float *out_float; if (counter->size) { switch (counter->data_type) { - case GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL: + case GEN_PERF_COUNTER_DATA_TYPE_UINT64: out_uint64 = (uint64_t *)(data + counter->offset); - *out_uint64 = counter->oa_counter_read_uint64(brw, query, + *out_uint64 = counter->oa_counter_read_uint64(perf, query, obj->oa.accumulator); break; - case GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL: + case GEN_PERF_COUNTER_DATA_TYPE_FLOAT: out_float = (float *)(data + counter->offset); - *out_float = counter->oa_counter_read_float(brw, query, + *out_float = counter->oa_counter_read_float(perf, query, obj->oa.accumulator); break; default: @@ -1528,7 +1514,7 @@ get_pipeline_stats_data(struct brw_context *brw, uint8_t *data) { - const struct brw_perf_query_info *query = obj->query; + const struct gen_perf_query_info *query = obj->query; int n_counters = obj->query->n_counters; uint8_t *p = data; @@ -1536,7 +1522,7 @@ get_pipeline_stats_data(struct brw_context *brw, uint64_t *end = start + (STATS_BO_END_OFFSET_BYTES / sizeof(uint64_t)); for (int i = 0; i < n_counters; i++) { - const struct brw_perf_query_counter *counter = &query->counters[i]; + const struct gen_perf_query_counter *counter = &query->counters[i]; uint64_t value = end[i] - start[i]; if (counter->pipeline_stat.numerator != @@ -1581,8 +1567,8 @@ brw_get_perf_query_data(struct gl_context *ctx, assert(o->Ready); switch (obj->query->kind) { - case OA_COUNTERS: - case OA_COUNTERS_RAW: + case GEN_PERF_QUERY_TYPE_OA: + case GEN_PERF_QUERY_TYPE_RAW: if (!obj->oa.results_accumulated) { read_gt_frequency(brw, obj); read_slice_unslice_frequencies(brw, obj); @@ -1592,13 +1578,13 @@ brw_get_perf_query_data(struct gl_context *ctx, brw_bo_unmap(obj->oa.bo); obj->oa.map = NULL; } - if (obj->query->kind == OA_COUNTERS) + if (obj->query->kind == GEN_PERF_QUERY_TYPE_OA) written = get_oa_counter_data(brw, obj, data_size, (uint8_t *)data); else written = brw_perf_query_get_mdapi_oa_data(brw, obj, data_size, (uint8_t *)data); break; - case PIPELINE_STATS: + case GEN_PERF_QUERY_TYPE_PIPELINE: written = get_pipeline_stats_data(brw, obj, data_size, (uint8_t *)data); break; @@ -1615,8 +1601,8 @@ static struct gl_perf_query_object * brw_new_perf_query_object(struct gl_context *ctx, unsigned query_index) { struct brw_context *brw = brw_context(ctx); - const struct brw_perf_query_info *query = - &brw->perfquery.queries[query_index]; + const struct gen_perf_query_info *query = + &brw->perfquery.perf->queries[query_index]; struct brw_perf_query_object *obj = calloc(1, sizeof(struct brw_perf_query_object)); @@ -1650,8 +1636,8 @@ brw_delete_perf_query(struct gl_context *ctx, DBG("Delete(%d)\n", o->Id); switch (obj->query->kind) { - case OA_COUNTERS: - case OA_COUNTERS_RAW: + case GEN_PERF_QUERY_TYPE_OA: + case GEN_PERF_QUERY_TYPE_RAW: if (obj->oa.bo) { if (!obj->oa.results_accumulated) { drop_from_unaccumulated_query_list(brw, obj); @@ -1665,7 +1651,7 @@ brw_delete_perf_query(struct gl_context *ctx, obj->oa.results_accumulated = false; break; - case PIPELINE_STATS: + case GEN_PERF_QUERY_TYPE_PIPELINE: if (obj->pipeline_stats.bo) { brw_bo_unreference(obj->pipeline_stats.bo); obj->pipeline_stats.bo = NULL; @@ -1695,223 +1681,87 @@ static void init_pipeline_statistic_query_registers(struct brw_context *brw) { const struct gen_device_info *devinfo = &brw->screen->devinfo; - struct brw_perf_query_info *query = brw_perf_query_append_query_info(brw); + struct gen_perf *perf = brw->perfquery.perf; + struct gen_perf_query_info *query = + gen_perf_query_append_query_info(perf, MAX_STAT_COUNTERS); - query->kind = PIPELINE_STATS; + query->kind = GEN_PERF_QUERY_TYPE_PIPELINE; query->name = "Pipeline Statistics Registers"; - query->n_counters = 0; - query->counters = - rzalloc_array(brw, struct brw_perf_query_counter, MAX_STAT_COUNTERS); - brw_perf_query_info_add_basic_stat_reg(query, IA_VERTICES_COUNT, - "N vertices submitted"); - brw_perf_query_info_add_basic_stat_reg(query, IA_PRIMITIVES_COUNT, - "N primitives submitted"); - brw_perf_query_info_add_basic_stat_reg(query, VS_INVOCATION_COUNT, - "N vertex shader invocations"); + gen_perf_query_info_add_basic_stat_reg(query, IA_VERTICES_COUNT, + "N vertices submitted"); + gen_perf_query_info_add_basic_stat_reg(query, IA_PRIMITIVES_COUNT, + "N primitives submitted"); + gen_perf_query_info_add_basic_stat_reg(query, VS_INVOCATION_COUNT, + "N vertex shader invocations"); if (devinfo->gen == 6) { - brw_perf_query_info_add_stat_reg(query, GEN6_SO_PRIM_STORAGE_NEEDED, 1, 1, + gen_perf_query_info_add_stat_reg(query, GEN6_SO_PRIM_STORAGE_NEEDED, 1, 1, "SO_PRIM_STORAGE_NEEDED", "N geometry shader stream-out primitives (total)"); - brw_perf_query_info_add_stat_reg(query, GEN6_SO_NUM_PRIMS_WRITTEN, 1, 1, + gen_perf_query_info_add_stat_reg(query, GEN6_SO_NUM_PRIMS_WRITTEN, 1, 1, "SO_NUM_PRIMS_WRITTEN", "N geometry shader stream-out primitives (written)"); } else { - brw_perf_query_info_add_stat_reg(query, GEN7_SO_PRIM_STORAGE_NEEDED(0), 1, 1, + gen_perf_query_info_add_stat_reg(query, GEN7_SO_PRIM_STORAGE_NEEDED(0), 1, 1, "SO_PRIM_STORAGE_NEEDED (Stream 0)", "N stream-out (stream 0) primitives (total)"); - brw_perf_query_info_add_stat_reg(query, GEN7_SO_PRIM_STORAGE_NEEDED(1), 1, 1, + gen_perf_query_info_add_stat_reg(query, GEN7_SO_PRIM_STORAGE_NEEDED(1), 1, 1, "SO_PRIM_STORAGE_NEEDED (Stream 1)", "N stream-out (stream 1) primitives (total)"); - brw_perf_query_info_add_stat_reg(query, GEN7_SO_PRIM_STORAGE_NEEDED(2), 1, 1, + gen_perf_query_info_add_stat_reg(query, GEN7_SO_PRIM_STORAGE_NEEDED(2), 1, 1, "SO_PRIM_STORAGE_NEEDED (Stream 2)", "N stream-out (stream 2) primitives (total)"); - brw_perf_query_info_add_stat_reg(query, GEN7_SO_PRIM_STORAGE_NEEDED(3), 1, 1, + gen_perf_query_info_add_stat_reg(query, GEN7_SO_PRIM_STORAGE_NEEDED(3), 1, 1, "SO_PRIM_STORAGE_NEEDED (Stream 3)", "N stream-out (stream 3) primitives (total)"); - brw_perf_query_info_add_stat_reg(query, GEN7_SO_NUM_PRIMS_WRITTEN(0), 1, 1, + gen_perf_query_info_add_stat_reg(query, GEN7_SO_NUM_PRIMS_WRITTEN(0), 1, 1, "SO_NUM_PRIMS_WRITTEN (Stream 0)", "N stream-out (stream 0) primitives (written)"); - brw_perf_query_info_add_stat_reg(query, GEN7_SO_NUM_PRIMS_WRITTEN(1), 1, 1, + gen_perf_query_info_add_stat_reg(query, GEN7_SO_NUM_PRIMS_WRITTEN(1), 1, 1, "SO_NUM_PRIMS_WRITTEN (Stream 1)", "N stream-out (stream 1) primitives (written)"); - brw_perf_query_info_add_stat_reg(query, GEN7_SO_NUM_PRIMS_WRITTEN(2), 1, 1, + gen_perf_query_info_add_stat_reg(query, GEN7_SO_NUM_PRIMS_WRITTEN(2), 1, 1, "SO_NUM_PRIMS_WRITTEN (Stream 2)", "N stream-out (stream 2) primitives (written)"); - brw_perf_query_info_add_stat_reg(query, GEN7_SO_NUM_PRIMS_WRITTEN(3), 1, 1, + gen_perf_query_info_add_stat_reg(query, GEN7_SO_NUM_PRIMS_WRITTEN(3), 1, 1, "SO_NUM_PRIMS_WRITTEN (Stream 3)", "N stream-out (stream 3) primitives (written)"); } - brw_perf_query_info_add_basic_stat_reg(query, HS_INVOCATION_COUNT, + gen_perf_query_info_add_basic_stat_reg(query, HS_INVOCATION_COUNT, "N TCS shader invocations"); - brw_perf_query_info_add_basic_stat_reg(query, DS_INVOCATION_COUNT, + gen_perf_query_info_add_basic_stat_reg(query, DS_INVOCATION_COUNT, "N TES shader invocations"); - brw_perf_query_info_add_basic_stat_reg(query, GS_INVOCATION_COUNT, + gen_perf_query_info_add_basic_stat_reg(query, GS_INVOCATION_COUNT, "N geometry shader invocations"); - brw_perf_query_info_add_basic_stat_reg(query, GS_PRIMITIVES_COUNT, + gen_perf_query_info_add_basic_stat_reg(query, GS_PRIMITIVES_COUNT, "N geometry shader primitives emitted"); - brw_perf_query_info_add_basic_stat_reg(query, CL_INVOCATION_COUNT, + gen_perf_query_info_add_basic_stat_reg(query, CL_INVOCATION_COUNT, "N primitives entering clipping"); - brw_perf_query_info_add_basic_stat_reg(query, CL_PRIMITIVES_COUNT, + gen_perf_query_info_add_basic_stat_reg(query, CL_PRIMITIVES_COUNT, "N primitives leaving clipping"); - if (devinfo->is_haswell || devinfo->gen == 8) - brw_perf_query_info_add_stat_reg(query, PS_INVOCATION_COUNT, 1, 4, + if (devinfo->is_haswell || devinfo->gen == 8) { + gen_perf_query_info_add_stat_reg(query, PS_INVOCATION_COUNT, 1, 4, "N fragment shader invocations", "N fragment shader invocations"); - else - brw_perf_query_info_add_basic_stat_reg(query, PS_INVOCATION_COUNT, + } else { + gen_perf_query_info_add_basic_stat_reg(query, PS_INVOCATION_COUNT, "N fragment shader invocations"); - - brw_perf_query_info_add_basic_stat_reg(query, PS_DEPTH_COUNT, "N z-pass fragments"); - - if (devinfo->gen >= 7) - brw_perf_query_info_add_basic_stat_reg(query, CS_INVOCATION_COUNT, - "N compute shader invocations"); - - query->data_size = sizeof(uint64_t) * query->n_counters; -} - -static void -register_oa_config(struct brw_context *brw, - const struct brw_perf_query_info *query, - uint64_t config_id) -{ - struct brw_perf_query_info *registred_query = - brw_perf_query_append_query_info(brw); - - *registred_query = *query; - registred_query->oa_metrics_set_id = config_id; - DBG("metric set registred: id = %" PRIu64", guid = %s\n", - registred_query->oa_metrics_set_id, query->guid); -} - -static void -enumerate_sysfs_metrics(struct brw_context *brw) -{ - char buf[256]; - DIR *metricsdir = NULL; - struct dirent *metric_entry; - int len; - - len = snprintf(buf, sizeof(buf), "%s/metrics", brw->perfquery.sysfs_dev_dir); - if (len < 0 || len >= sizeof(buf)) { - DBG("Failed to concatenate path to sysfs metrics/ directory\n"); - return; - } - - metricsdir = opendir(buf); - if (!metricsdir) { - DBG("Failed to open %s: %m\n", buf); - return; - } - - while ((metric_entry = readdir(metricsdir))) { - struct hash_entry *entry; - - if ((metric_entry->d_type != DT_DIR && - metric_entry->d_type != DT_LNK) || - metric_entry->d_name[0] == '.') - continue; - - DBG("metric set: %s\n", metric_entry->d_name); - entry = _mesa_hash_table_search(brw->perfquery.oa_metrics_table, - metric_entry->d_name); - if (entry) { - uint64_t id; - - len = snprintf(buf, sizeof(buf), "%s/metrics/%s/id", - brw->perfquery.sysfs_dev_dir, metric_entry->d_name); - if (len < 0 || len >= sizeof(buf)) { - DBG("Failed to concatenate path to sysfs metric id file\n"); - continue; - } - - if (!read_file_uint64(buf, &id)) { - DBG("Failed to read metric set id from %s: %m", buf); - continue; - } - - register_oa_config(brw, (const struct brw_perf_query_info *)entry->data, id); - } else - DBG("metric set not known by mesa (skipping)\n"); } - closedir(metricsdir); -} - -static bool -kernel_has_dynamic_config_support(struct brw_context *brw) -{ - __DRIscreen *screen = brw->screen->driScrnPriv; - - hash_table_foreach(brw->perfquery.oa_metrics_table, entry) { - struct brw_perf_query_info *query = entry->data; - char config_path[280]; - uint64_t config_id; + gen_perf_query_info_add_basic_stat_reg(query, PS_DEPTH_COUNT, + "N z-pass fragments"); - snprintf(config_path, sizeof(config_path), "%s/metrics/%s/id", - brw->perfquery.sysfs_dev_dir, query->guid); - - /* Look for the test config, which we know we can't replace. */ - if (read_file_uint64(config_path, &config_id) && config_id == 1) { - return drmIoctl(screen->fd, DRM_IOCTL_I915_PERF_REMOVE_CONFIG, - &config_id) < 0 && errno == ENOENT; - } + if (devinfo->gen >= 7) { + gen_perf_query_info_add_basic_stat_reg(query, CS_INVOCATION_COUNT, + "N compute shader invocations"); } - return false; -} - -static void -init_oa_configs(struct brw_context *brw) -{ - __DRIscreen *screen = brw->screen->driScrnPriv; - - hash_table_foreach(brw->perfquery.oa_metrics_table, entry) { - const struct brw_perf_query_info *query = entry->data; - struct drm_i915_perf_oa_config config; - char config_path[280]; - uint64_t config_id; - int ret; - - snprintf(config_path, sizeof(config_path), "%s/metrics/%s/id", - brw->perfquery.sysfs_dev_dir, query->guid); - - /* Don't recreate already loaded configs. */ - if (read_file_uint64(config_path, &config_id)) { - DBG("metric set: %s (already loaded)\n", query->guid); - register_oa_config(brw, query, config_id); - continue; - } - - memset(&config, 0, sizeof(config)); - - memcpy(config.uuid, query->guid, sizeof(config.uuid)); - - config.n_mux_regs = query->n_mux_regs; - config.mux_regs_ptr = (uintptr_t) query->mux_regs; - - config.n_boolean_regs = query->n_b_counter_regs; - config.boolean_regs_ptr = (uintptr_t) query->b_counter_regs; - - config.n_flex_regs = query->n_flex_regs; - config.flex_regs_ptr = (uintptr_t) query->flex_regs; - - ret = drmIoctl(screen->fd, DRM_IOCTL_I915_PERF_ADD_CONFIG, &config); - if (ret < 0) { - DBG("Failed to load \"%s\" (%s) metrics set in kernel: %s\n", - query->name, query->guid, strerror(errno)); - continue; - } - - register_oa_config(brw, query, ret); - DBG("metric set: %s (added)\n", query->guid); - } + query->data_size = sizeof(uint64_t) * query->n_counters; } static bool @@ -1974,55 +1824,20 @@ getparam_topology(struct brw_context *brw) return true; } -static void -compute_topology_builtins(struct brw_context *brw) +static unsigned +brw_init_perf_query_info(struct gl_context *ctx) { + struct brw_context *brw = brw_context(ctx); const struct gen_device_info *devinfo = &brw->screen->devinfo; + __DRIscreen *screen = brw->screen->driScrnPriv; - brw->perfquery.sys_vars.slice_mask = devinfo->slice_masks; - brw->perfquery.sys_vars.n_eu_slices = devinfo->num_slices; - - for (int i = 0; i < sizeof(devinfo->subslice_masks[i]); i++) { - brw->perfquery.sys_vars.n_eu_sub_slices += - util_bitcount(devinfo->subslice_masks[i]); - } - - for (int i = 0; i < sizeof(devinfo->eu_masks); i++) - brw->perfquery.sys_vars.n_eus += util_bitcount(devinfo->eu_masks[i]); - - brw->perfquery.sys_vars.eu_threads_count = - brw->perfquery.sys_vars.n_eus * devinfo->num_thread_per_eu; - - /* The subslice mask builtin contains bits for all slices. Prior to Gen11 - * it had groups of 3bits for each slice, on Gen11 it's 8bits for each - * slice. - * - * Ideally equations would be updated to have a slice/subslice query - * function/operator. - */ - brw->perfquery.sys_vars.subslice_mask = 0; - - int bits_per_subslice = devinfo->gen == 11 ? 8 : 3; - - for (int s = 0; s < util_last_bit(devinfo->slice_masks); s++) { - for (int ss = 0; ss < (devinfo->subslice_slice_stride * 8); ss++) { - if (gen_device_info_subslice_available(devinfo, s, ss)) - brw->perfquery.sys_vars.subslice_mask |= 1UL << (s * bits_per_subslice + ss); - } - } -} - -static bool -init_oa_sys_vars(struct brw_context *brw) -{ - const struct gen_device_info *devinfo = &brw->screen->devinfo; - uint64_t min_freq_mhz = 0, max_freq_mhz = 0; + if (brw->perfquery.perf) + return brw->perfquery.perf->n_queries; - if (!read_sysfs_drm_device_file_uint64(brw, "gt_min_freq_mhz", &min_freq_mhz)) - return false; + brw->perfquery.perf = gen_perf_new(brw, drmIoctl); - if (!read_sysfs_drm_device_file_uint64(brw, "gt_max_freq_mhz", &max_freq_mhz)) - return false; + init_pipeline_statistic_query_registers(brw); + brw_perf_query_register_mdapi_statistic_query(brw); if (!query_topology(brw)) { /* We need the i915 query uAPI on CNL+ (kernel 4.17+). */ @@ -2040,182 +1855,8 @@ init_oa_sys_vars(struct brw_context *brw) } } - memset(&brw->perfquery.sys_vars, 0, sizeof(brw->perfquery.sys_vars)); - brw->perfquery.sys_vars.gt_min_freq = min_freq_mhz * 1000000; - brw->perfquery.sys_vars.gt_max_freq = max_freq_mhz * 1000000; - brw->perfquery.sys_vars.timestamp_frequency = devinfo->timestamp_frequency; - brw->perfquery.sys_vars.revision = devinfo->revision; - compute_topology_builtins(brw); - - return true; -} - -static bool -get_sysfs_dev_dir(struct brw_context *brw) -{ - __DRIscreen *screen = brw->screen->driScrnPriv; - struct stat sb; - int min, maj; - DIR *drmdir; - struct dirent *drm_entry; - int len; - - brw->perfquery.sysfs_dev_dir[0] = '\0'; - - if (fstat(screen->fd, &sb)) { - DBG("Failed to stat DRM fd\n"); - return false; - } - - maj = major(sb.st_rdev); - min = minor(sb.st_rdev); - - if (!S_ISCHR(sb.st_mode)) { - DBG("DRM fd is not a character device as expected\n"); - return false; - } - - len = snprintf(brw->perfquery.sysfs_dev_dir, - sizeof(brw->perfquery.sysfs_dev_dir), - "/sys/dev/char/%d:%d/device/drm", maj, min); - if (len < 0 || len >= sizeof(brw->perfquery.sysfs_dev_dir)) { - DBG("Failed to concatenate sysfs path to drm device\n"); - return false; - } - - drmdir = opendir(brw->perfquery.sysfs_dev_dir); - if (!drmdir) { - DBG("Failed to open %s: %m\n", brw->perfquery.sysfs_dev_dir); - return false; - } - - while ((drm_entry = readdir(drmdir))) { - if ((drm_entry->d_type == DT_DIR || - drm_entry->d_type == DT_LNK) && - strncmp(drm_entry->d_name, "card", 4) == 0) - { - len = snprintf(brw->perfquery.sysfs_dev_dir, - sizeof(brw->perfquery.sysfs_dev_dir), - "/sys/dev/char/%d:%d/device/drm/%s", - maj, min, drm_entry->d_name); - closedir(drmdir); - if (len < 0 || len >= sizeof(brw->perfquery.sysfs_dev_dir)) - return false; - else - return true; - } - } - - closedir(drmdir); - - DBG("Failed to find cardX directory under /sys/dev/char/%d:%d/device/drm\n", - maj, min); - - return false; -} - -typedef void (*perf_register_oa_queries_t)(struct brw_context *); - -static perf_register_oa_queries_t -get_register_queries_function(const struct gen_device_info *devinfo) -{ - if (devinfo->is_haswell) - return brw_oa_register_queries_hsw; - if (devinfo->is_cherryview) - return brw_oa_register_queries_chv; - if (devinfo->is_broadwell) - return brw_oa_register_queries_bdw; - if (devinfo->is_broxton) - return brw_oa_register_queries_bxt; - if (devinfo->is_skylake) { - if (devinfo->gt == 2) - return brw_oa_register_queries_sklgt2; - if (devinfo->gt == 3) - return brw_oa_register_queries_sklgt3; - if (devinfo->gt == 4) - return brw_oa_register_queries_sklgt4; - } - if (devinfo->is_kabylake) { - if (devinfo->gt == 2) - return brw_oa_register_queries_kblgt2; - if (devinfo->gt == 3) - return brw_oa_register_queries_kblgt3; - } - if (devinfo->is_geminilake) - return brw_oa_register_queries_glk; - if (devinfo->is_coffeelake) { - if (devinfo->gt == 2) - return brw_oa_register_queries_cflgt2; - if (devinfo->gt == 3) - return brw_oa_register_queries_cflgt3; - } - if (devinfo->is_cannonlake) - return brw_oa_register_queries_cnl; - if (devinfo->gen == 11) - return brw_oa_register_queries_icl; - - return NULL; -} - -static unsigned -brw_init_perf_query_info(struct gl_context *ctx) -{ - struct brw_context *brw = brw_context(ctx); - const struct gen_device_info *devinfo = &brw->screen->devinfo; - bool i915_perf_oa_available = false; - struct stat sb; - perf_register_oa_queries_t oa_register; - - if (brw->perfquery.n_queries) - return brw->perfquery.n_queries; - - init_pipeline_statistic_query_registers(brw); - brw_perf_query_register_mdapi_statistic_query(brw); - - oa_register = get_register_queries_function(devinfo); - - /* The existence of this sysctl parameter implies the kernel supports - * the i915 perf interface. - */ - if (stat("/proc/sys/dev/i915/perf_stream_paranoid", &sb) == 0) { - - /* If _paranoid == 1 then on Gen8+ we won't be able to access OA - * metrics unless running as root. - */ - if (devinfo->is_haswell) - i915_perf_oa_available = true; - else { - uint64_t paranoid = 1; - - read_file_uint64("/proc/sys/dev/i915/perf_stream_paranoid", ¶noid); - - if (paranoid == 0 || geteuid() == 0) - i915_perf_oa_available = true; - } - } - - if (i915_perf_oa_available && - oa_register && - get_sysfs_dev_dir(brw) && - init_oa_sys_vars(brw)) - { - brw->perfquery.oa_metrics_table = - _mesa_hash_table_create(NULL, _mesa_key_hash_string, - _mesa_key_string_equal); - - /* Index all the metric sets mesa knows about before looking to see what - * the kernel is advertising. - */ - oa_register(brw); - - if (likely((INTEL_DEBUG & DEBUG_NO_OACONFIG) == 0) && - kernel_has_dynamic_config_support(brw)) - init_oa_configs(brw); - else - enumerate_sysfs_metrics(brw); - + if (gen_perf_load_oa_metrics(brw->perfquery.perf, screen->fd, devinfo)) brw_perf_query_register_mdapi_oa_query(brw); - } brw->perfquery.unaccumulated = ralloc_array(brw, struct brw_perf_query_object *, 2); @@ -2237,7 +1878,7 @@ brw_init_perf_query_info(struct gl_context *ctx) brw->perfquery.next_query_start_report_id = 1000; - return brw->perfquery.n_queries; + return brw->perfquery.perf->n_queries; } void diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.h b/src/mesa/drivers/dri/i965/brw_performance_query.h index 66b32c0490b..ca0503422ca 100644 --- a/src/mesa/drivers/dri/i965/brw_performance_query.h +++ b/src/mesa/drivers/dri/i965/brw_performance_query.h @@ -27,7 +27,8 @@ #include #include "brw_context.h" -#include "brw_performance_query_metrics.h" + +struct gen_perf_query_info; /* * When currently allocate only one page for pipeline statistics queries. Here @@ -57,7 +58,7 @@ struct brw_perf_query_object { struct gl_perf_query_object base; - const struct brw_perf_query_info *query; + const struct gen_perf_query_info *query; /* See query->kind to know which state below is in use... */ union { @@ -142,81 +143,6 @@ struct brw_perf_query_object }; }; -static inline struct brw_perf_query_info * -brw_perf_query_append_query_info(struct brw_context *brw) -{ - brw->perfquery.queries = - reralloc(brw, brw->perfquery.queries, - struct brw_perf_query_info, ++brw->perfquery.n_queries); - - return &brw->perfquery.queries[brw->perfquery.n_queries - 1]; -} - -static inline void -brw_perf_query_info_add_stat_reg(struct brw_perf_query_info *query, - uint32_t reg, - uint32_t numerator, - uint32_t denominator, - const char *name, - const char *description) -{ - struct brw_perf_query_counter *counter; - - assert(query->n_counters < MAX_STAT_COUNTERS); - - counter = &query->counters[query->n_counters]; - counter->name = name; - counter->desc = description; - counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL; - counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL; - counter->size = sizeof(uint64_t); - counter->offset = sizeof(uint64_t) * query->n_counters; - counter->pipeline_stat.reg = reg; - counter->pipeline_stat.numerator = numerator; - counter->pipeline_stat.denominator = denominator; - - query->n_counters++; -} - -static inline void -brw_perf_query_info_add_basic_stat_reg(struct brw_perf_query_info *query, - uint32_t reg, const char *name) -{ - brw_perf_query_info_add_stat_reg(query, reg, 1, 1, name, name); -} - -/* Accumulate 32bits OA counters */ -static inline void -brw_perf_query_accumulate_uint32(const uint32_t *report0, - const uint32_t *report1, - uint64_t *accumulator) -{ - *accumulator += (uint32_t)(*report1 - *report0); -} - -/* Accumulate 40bits OA counters */ -static inline void -brw_perf_query_accumulate_uint40(int a_index, - const uint32_t *report0, - const uint32_t *report1, - uint64_t *accumulator) -{ - const uint8_t *high_bytes0 = (uint8_t *)(report0 + 40); - const uint8_t *high_bytes1 = (uint8_t *)(report1 + 40); - uint64_t high0 = (uint64_t)(high_bytes0[a_index]) << 32; - uint64_t high1 = (uint64_t)(high_bytes1[a_index]) << 32; - uint64_t value0 = report0[a_index + 4] | high0; - uint64_t value1 = report1[a_index + 4] | high1; - uint64_t delta; - - if (value0 > value1) - delta = (1ULL << 40) + value1 - value0; - else - delta = value1 - value0; - - *accumulator += delta; -} - int brw_perf_query_get_mdapi_oa_data(struct brw_context *brw, struct brw_perf_query_object *obj, size_t data_size, diff --git a/src/mesa/drivers/dri/i965/brw_performance_query_mdapi.c b/src/mesa/drivers/dri/i965/brw_performance_query_mdapi.c index 70f69debe98..0676e868b81 100644 --- a/src/mesa/drivers/dri/i965/brw_performance_query_mdapi.c +++ b/src/mesa/drivers/dri/i965/brw_performance_query_mdapi.c @@ -24,6 +24,8 @@ #include "brw_defines.h" #include "brw_performance_query.h" +#include "perf/gen_perf.h" + /** * Data format expected by MDAPI. */ @@ -203,16 +205,19 @@ brw_perf_query_get_mdapi_oa_data(struct brw_context *brw, } static void -fill_mdapi_perf_query_counter(struct brw_perf_query_info *query, +fill_mdapi_perf_query_counter(struct gen_perf_query_info *query, const char *name, uint32_t data_offset, uint32_t data_size, - GLenum data_type) + enum gen_perf_counter_data_type data_type) { - struct brw_perf_query_counter *counter = &query->counters[query->n_counters]; + struct gen_perf_query_counter *counter = &query->counters[query->n_counters]; + + assert(query->n_counters <= query->max_counters); counter->name = name; counter->desc = "Raw counter value"; + counter->type = GEN_PERF_COUNTER_TYPE_RAW; counter->data_type = data_type; counter->offset = data_offset; counter->size = data_size; @@ -226,19 +231,21 @@ fill_mdapi_perf_query_counter(struct brw_perf_query_info *query, (uint8_t *) &struct_name.field_name - \ (uint8_t *) &struct_name, \ sizeof(struct_name.field_name), \ - GL_PERFQUERY_COUNTER_DATA_##type_name##_INTEL) + GEN_PERF_COUNTER_DATA_TYPE_##type_name) #define MDAPI_QUERY_ADD_ARRAY_COUNTER(ctx, query, struct_name, field_name, idx, type_name) \ fill_mdapi_perf_query_counter(query, \ ralloc_asprintf(ctx, "%s%i", #field_name, idx), \ (uint8_t *) &struct_name.field_name[idx] - \ (uint8_t *) &struct_name, \ sizeof(struct_name.field_name[0]), \ - GL_PERFQUERY_COUNTER_DATA_##type_name##_INTEL) + GEN_PERF_COUNTER_DATA_TYPE_##type_name) void brw_perf_query_register_mdapi_oa_query(struct brw_context *brw) { const struct gen_device_info *devinfo = &brw->screen->devinfo; + struct gen_perf *perf = brw->perfquery.perf; + struct gen_perf_query_info *query = NULL; /* MDAPI requires different structures for pretty much every generation * (right now we have definitions for gen 7 to 11). @@ -246,36 +253,22 @@ brw_perf_query_register_mdapi_oa_query(struct brw_context *brw) if (!(devinfo->gen >= 7 && devinfo->gen <= 11)) return; - struct brw_perf_query_info *query = brw_perf_query_append_query_info(brw); - - query->kind = OA_COUNTERS_RAW; - query->name = "Intel_Raw_Hardware_Counters_Set_0_Query"; - /* Guid has to matches with MDAPI's. */ - query->guid = "2f01b241-7014-42a7-9eb6-a925cad3daba"; - query->n_counters = 0; - query->oa_metrics_set_id = 0; /* Set by MDAPI */ - - int n_counters; switch (devinfo->gen) { case 7: { + query = gen_perf_query_append_query_info(perf, 1 + 45 + 16 + 7); query->oa_format = I915_OA_FORMAT_A45_B8_C8; struct mdapi_gen7_metrics metric_data; query->data_size = sizeof(metric_data); - n_counters = 1 + 45 + 16 + 7; - query->counters = - rzalloc_array_size(brw->perfquery.queries, - sizeof(*query->counters), n_counters); - MDAPI_QUERY_ADD_COUNTER(query, metric_data, TotalTime, UINT64); for (int i = 0; i < ARRAY_SIZE(metric_data.ACounters); i++) { - MDAPI_QUERY_ADD_ARRAY_COUNTER(brw->perfquery.queries, - query, metric_data, ACounters, i, UINT64); + MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query, + metric_data, ACounters, i, UINT64); } for (int i = 0; i < ARRAY_SIZE(metric_data.NOACounters); i++) { - MDAPI_QUERY_ADD_ARRAY_COUNTER(brw->perfquery.queries, - query, metric_data, NOACounters, i, UINT64); + MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query, + metric_data, NOACounters, i, UINT64); } MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter1, UINT64); MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter2, UINT64); @@ -287,25 +280,21 @@ brw_perf_query_register_mdapi_oa_query(struct brw_context *brw) break; } case 8: { + query = gen_perf_query_append_query_info(perf, 2 + 36 + 16 + 16); query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8; struct mdapi_gen8_metrics metric_data; query->data_size = sizeof(metric_data); - n_counters = 2 + 36 + 16 + 16; - query->counters = - rzalloc_array_size(brw->perfquery.queries, - sizeof(*query->counters), n_counters); - MDAPI_QUERY_ADD_COUNTER(query, metric_data, TotalTime, UINT64); MDAPI_QUERY_ADD_COUNTER(query, metric_data, GPUTicks, UINT64); for (int i = 0; i < ARRAY_SIZE(metric_data.OaCntr); i++) { - MDAPI_QUERY_ADD_ARRAY_COUNTER(brw->perfquery.queries, - query, metric_data, OaCntr, i, UINT64); + MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query, + metric_data, OaCntr, i, UINT64); } for (int i = 0; i < ARRAY_SIZE(metric_data.NoaCntr); i++) { - MDAPI_QUERY_ADD_ARRAY_COUNTER(brw->perfquery.queries, - query, metric_data, NoaCntr, i, UINT64); + MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query, + metric_data, NoaCntr, i, UINT64); } MDAPI_QUERY_ADD_COUNTER(query, metric_data, BeginTimestamp, UINT64); MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved1, UINT64); @@ -328,25 +317,21 @@ brw_perf_query_register_mdapi_oa_query(struct brw_context *brw) case 9: case 10: case 11: { + query = gen_perf_query_append_query_info(perf, 2 + 36 + 16 + 16 + 16 + 2); query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8; struct mdapi_gen9_metrics metric_data; query->data_size = sizeof(metric_data); - n_counters = 2 + 36 + 16 + 16 + 16 + 2; - query->counters = - rzalloc_array_size(brw->perfquery.queries, - sizeof(*query->counters), n_counters); - MDAPI_QUERY_ADD_COUNTER(query, metric_data, TotalTime, UINT64); MDAPI_QUERY_ADD_COUNTER(query, metric_data, GPUTicks, UINT64); for (int i = 0; i < ARRAY_SIZE(metric_data.OaCntr); i++) { - MDAPI_QUERY_ADD_ARRAY_COUNTER(brw->perfquery.queries, - query, metric_data, OaCntr, i, UINT64); + MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query, + metric_data, OaCntr, i, UINT64); } for (int i = 0; i < ARRAY_SIZE(metric_data.NoaCntr); i++) { - MDAPI_QUERY_ADD_ARRAY_COUNTER(brw->perfquery.queries, - query, metric_data, NoaCntr, i, UINT64); + MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query, + metric_data, NoaCntr, i, UINT64); } MDAPI_QUERY_ADD_COUNTER(query, metric_data, BeginTimestamp, UINT64); MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved1, UINT64); @@ -365,8 +350,8 @@ brw_perf_query_register_mdapi_oa_query(struct brw_context *brw) MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportId, UINT32); MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportsCount, UINT32); for (int i = 0; i < ARRAY_SIZE(metric_data.UserCntr); i++) { - MDAPI_QUERY_ADD_ARRAY_COUNTER(brw->perfquery.queries, - query, metric_data, UserCntr, i, UINT64); + MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query, + metric_data, UserCntr, i, UINT64); } MDAPI_QUERY_ADD_COUNTER(query, metric_data, UserCntrCfgId, UINT32); MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved4, UINT32); @@ -377,12 +362,15 @@ brw_perf_query_register_mdapi_oa_query(struct brw_context *brw) break; } - assert(query->n_counters <= n_counters); + query->kind = GEN_PERF_QUERY_TYPE_RAW; + query->name = "Intel_Raw_Hardware_Counters_Set_0_Query"; + /* Guid has to matches with MDAPI's. */ + query->guid = "2f01b241-7014-42a7-9eb6-a925cad3daba"; { /* Accumulation buffer offsets copied from an actual query... */ - const struct brw_perf_query_info *copy_query = - &brw->perfquery.queries[0]; + const struct gen_perf_query_info *copy_query = + &brw->perfquery.perf->queries[0]; query->gpu_time_offset = copy_query->gpu_time_offset; query->gpu_clock_offset = copy_query->gpu_clock_offset; @@ -400,43 +388,41 @@ brw_perf_query_register_mdapi_statistic_query(struct brw_context *brw) if (!(devinfo->gen >= 7 && devinfo->gen <= 9)) return; - struct brw_perf_query_info *query = brw_perf_query_append_query_info(brw); + struct gen_perf_query_info *query = + gen_perf_query_append_query_info(brw->perfquery.perf, MAX_STAT_COUNTERS); - query->kind = PIPELINE_STATS; + query->kind = GEN_PERF_QUERY_TYPE_PIPELINE; query->name = "Intel_Raw_Pipeline_Statistics_Query"; - query->n_counters = 0; - query->counters = - rzalloc_array(brw, struct brw_perf_query_counter, MAX_STAT_COUNTERS); /* The order has to match mdapi_pipeline_metrics. */ - brw_perf_query_info_add_basic_stat_reg(query, IA_VERTICES_COUNT, + gen_perf_query_info_add_basic_stat_reg(query, IA_VERTICES_COUNT, "N vertices submitted"); - brw_perf_query_info_add_basic_stat_reg(query, IA_PRIMITIVES_COUNT, + gen_perf_query_info_add_basic_stat_reg(query, IA_PRIMITIVES_COUNT, "N primitives submitted"); - brw_perf_query_info_add_basic_stat_reg(query, VS_INVOCATION_COUNT, + gen_perf_query_info_add_basic_stat_reg(query, VS_INVOCATION_COUNT, "N vertex shader invocations"); - brw_perf_query_info_add_basic_stat_reg(query, GS_INVOCATION_COUNT, + gen_perf_query_info_add_basic_stat_reg(query, GS_INVOCATION_COUNT, "N geometry shader invocations"); - brw_perf_query_info_add_basic_stat_reg(query, GS_PRIMITIVES_COUNT, + gen_perf_query_info_add_basic_stat_reg(query, GS_PRIMITIVES_COUNT, "N geometry shader primitives emitted"); - brw_perf_query_info_add_basic_stat_reg(query, CL_INVOCATION_COUNT, + gen_perf_query_info_add_basic_stat_reg(query, CL_INVOCATION_COUNT, "N primitives entering clipping"); - brw_perf_query_info_add_basic_stat_reg(query, CL_PRIMITIVES_COUNT, + gen_perf_query_info_add_basic_stat_reg(query, CL_PRIMITIVES_COUNT, "N primitives leaving clipping"); if (devinfo->is_haswell || devinfo->gen == 8) { - brw_perf_query_info_add_stat_reg(query, PS_INVOCATION_COUNT, 1, 4, + gen_perf_query_info_add_stat_reg(query, PS_INVOCATION_COUNT, 1, 4, "N fragment shader invocations", "N fragment shader invocations"); } else { - brw_perf_query_info_add_basic_stat_reg(query, PS_INVOCATION_COUNT, + gen_perf_query_info_add_basic_stat_reg(query, PS_INVOCATION_COUNT, "N fragment shader invocations"); } - brw_perf_query_info_add_basic_stat_reg(query, HS_INVOCATION_COUNT, + gen_perf_query_info_add_basic_stat_reg(query, HS_INVOCATION_COUNT, "N TCS shader invocations"); - brw_perf_query_info_add_basic_stat_reg(query, DS_INVOCATION_COUNT, + gen_perf_query_info_add_basic_stat_reg(query, DS_INVOCATION_COUNT, "N TES shader invocations"); if (devinfo->gen >= 7) { - brw_perf_query_info_add_basic_stat_reg(query, CS_INVOCATION_COUNT, + gen_perf_query_info_add_basic_stat_reg(query, CS_INVOCATION_COUNT, "N compute shader invocations"); } diff --git a/src/mesa/drivers/dri/i965/brw_performance_query_metrics.h b/src/mesa/drivers/dri/i965/brw_performance_query_metrics.h deleted file mode 100644 index 80d7ddc07cf..00000000000 --- a/src/mesa/drivers/dri/i965/brw_performance_query_metrics.h +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright © 2018 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#ifndef BRW_PERFORMANCE_QUERY_METRICS_H -#define BRW_PERFORMANCE_QUERY_METRICS_H - -#include - -struct brw_pipeline_stat -{ - uint32_t reg; - uint32_t numerator; - uint32_t denominator; -}; - -struct brw_perf_query_counter -{ - const char *name; - const char *desc; - GLenum type; - GLenum data_type; - uint64_t raw_max; - size_t offset; - size_t size; - - union { - uint64_t (*oa_counter_read_uint64)(struct brw_context *brw, - const struct brw_perf_query_info *query, - uint64_t *accumulator); - float (*oa_counter_read_float)(struct brw_context *brw, - const struct brw_perf_query_info *query, - uint64_t *accumulator); - struct brw_pipeline_stat pipeline_stat; - }; -}; - -#endif /* BRW_PERFORMANCE_QUERY_METRICS_H */ diff --git a/src/mesa/drivers/dri/i965/meson.build b/src/mesa/drivers/dri/i965/meson.build index ca93af0ab10..3c3637dbebc 100644 --- a/src/mesa/drivers/dri/i965/meson.build +++ b/src/mesa/drivers/dri/i965/meson.build @@ -57,7 +57,6 @@ files_i965 = files( 'brw_performance_query.h', 'brw_performance_query.c', 'brw_performance_query_mdapi.c', - 'brw_performance_query_metrics.h', 'brw_program.c', 'brw_program.h', 'brw_program_binary.c', @@ -149,37 +148,9 @@ foreach v : ['40', '45', '50', '60', '70', '75', '80', '90', '100', '110'] endforeach -i965_hw_metrics = [ - 'hsw', - 'bdw', 'chv', - 'sklgt2', 'sklgt3', 'sklgt4', - 'kblgt2', 'kblgt3', - 'cflgt2', 'cflgt3', - 'bxt', 'glk', - 'cnl', - 'icl', -] - -i965_hw_metrics_xml_files = [] -foreach hw : i965_hw_metrics - i965_hw_metrics_xml_files += 'brw_oa_@0@.xml'.format(hw) -endforeach - -i965_oa_sources = custom_target( - 'i965-oa-sources', - input : i965_hw_metrics_xml_files, - output : [ 'brw_oa_metrics.c', 'brw_oa_metrics.h' ], - command : [ - prog_python, files('brw_oa.py'), - '--code', '@OUTPUT0@', '--header', '@OUTPUT1@', - '@INPUT@', - ], -) - libi965 = static_library( 'i965', - [files_i965, i965_oa_sources, ir_expression_operation_h, - xmlpool_options_h], + [files_i965, ir_expression_operation_h, xmlpool_options_h], include_directories : [ inc_common, inc_intel, inc_dri_common, inc_util, inc_include, ], @@ -187,7 +158,7 @@ libi965 = static_library( cpp_args : [cpp_vis_args, c_sse2_args], link_with : [ i965_gen_libs, libintel_common, libintel_dev, libisl, libintel_compiler, - libblorp + libblorp, libintel_perf ], dependencies : [dep_libdrm, dep_valgrind, idep_nir_headers, idep_genxml], ) -- 2.30.2