+static int
+gen_perf_compare_counter_names(const void *v1, const void *v2)
+{
+ const struct gen_perf_query_counter *c1 = v1;
+ const struct gen_perf_query_counter *c2 = v2;
+
+ return strcmp(c1->name, c2->name);
+}
+
+static void
+sort_query(struct gen_perf_query_info *q)
+{
+ qsort(q->counters, q->n_counters, sizeof(q->counters[0]),
+ gen_perf_compare_counter_names);
+}
+
+static void
+load_pipeline_statistic_metrics(struct gen_perf_config *perf_cfg,
+ const struct gen_device_info *devinfo)
+{
+ struct gen_perf_query_info *query =
+ gen_perf_append_query_info(perf_cfg, MAX_STAT_COUNTERS);
+
+ query->kind = GEN_PERF_QUERY_TYPE_PIPELINE;
+ query->name = "Pipeline Statistics Registers";
+
+ gen_perf_query_add_basic_stat_reg(query, IA_VERTICES_COUNT,
+ "N vertices submitted");
+ gen_perf_query_add_basic_stat_reg(query, IA_PRIMITIVES_COUNT,
+ "N primitives submitted");
+ gen_perf_query_add_basic_stat_reg(query, VS_INVOCATION_COUNT,
+ "N vertex shader invocations");
+
+ if (devinfo->gen == 6) {
+ gen_perf_query_add_stat_reg(query, GEN6_SO_PRIM_STORAGE_NEEDED, 1, 1,
+ "SO_PRIM_STORAGE_NEEDED",
+ "N geometry shader stream-out primitives (total)");
+ gen_perf_query_add_stat_reg(query, GEN6_SO_NUM_PRIMS_WRITTEN, 1, 1,
+ "SO_NUM_PRIMS_WRITTEN",
+ "N geometry shader stream-out primitives (written)");
+ } else {
+ gen_perf_query_add_stat_reg(query, GEN7_SO_PRIM_STORAGE_NEEDED(0), 1, 1,
+ "SO_PRIM_STORAGE_NEEDED (Stream 0)",
+ "N stream-out (stream 0) primitives (total)");
+ gen_perf_query_add_stat_reg(query, GEN7_SO_PRIM_STORAGE_NEEDED(1), 1, 1,
+ "SO_PRIM_STORAGE_NEEDED (Stream 1)",
+ "N stream-out (stream 1) primitives (total)");
+ gen_perf_query_add_stat_reg(query, GEN7_SO_PRIM_STORAGE_NEEDED(2), 1, 1,
+ "SO_PRIM_STORAGE_NEEDED (Stream 2)",
+ "N stream-out (stream 2) primitives (total)");
+ gen_perf_query_add_stat_reg(query, GEN7_SO_PRIM_STORAGE_NEEDED(3), 1, 1,
+ "SO_PRIM_STORAGE_NEEDED (Stream 3)",
+ "N stream-out (stream 3) primitives (total)");
+ gen_perf_query_add_stat_reg(query, GEN7_SO_NUM_PRIMS_WRITTEN(0), 1, 1,
+ "SO_NUM_PRIMS_WRITTEN (Stream 0)",
+ "N stream-out (stream 0) primitives (written)");
+ gen_perf_query_add_stat_reg(query, GEN7_SO_NUM_PRIMS_WRITTEN(1), 1, 1,
+ "SO_NUM_PRIMS_WRITTEN (Stream 1)",
+ "N stream-out (stream 1) primitives (written)");
+ gen_perf_query_add_stat_reg(query, GEN7_SO_NUM_PRIMS_WRITTEN(2), 1, 1,
+ "SO_NUM_PRIMS_WRITTEN (Stream 2)",
+ "N stream-out (stream 2) primitives (written)");
+ gen_perf_query_add_stat_reg(query, GEN7_SO_NUM_PRIMS_WRITTEN(3), 1, 1,
+ "SO_NUM_PRIMS_WRITTEN (Stream 3)",
+ "N stream-out (stream 3) primitives (written)");
+ }
+
+ gen_perf_query_add_basic_stat_reg(query, HS_INVOCATION_COUNT,
+ "N TCS shader invocations");
+ gen_perf_query_add_basic_stat_reg(query, DS_INVOCATION_COUNT,
+ "N TES shader invocations");
+
+ gen_perf_query_add_basic_stat_reg(query, GS_INVOCATION_COUNT,
+ "N geometry shader invocations");
+ gen_perf_query_add_basic_stat_reg(query, GS_PRIMITIVES_COUNT,
+ "N geometry shader primitives emitted");
+
+ gen_perf_query_add_basic_stat_reg(query, CL_INVOCATION_COUNT,
+ "N primitives entering clipping");
+ gen_perf_query_add_basic_stat_reg(query, CL_PRIMITIVES_COUNT,
+ "N primitives leaving clipping");
+
+ if (devinfo->is_haswell || devinfo->gen == 8) {
+ gen_perf_query_add_stat_reg(query, PS_INVOCATION_COUNT, 1, 4,
+ "N fragment shader invocations",
+ "N fragment shader invocations");
+ } else {
+ gen_perf_query_add_basic_stat_reg(query, PS_INVOCATION_COUNT,
+ "N fragment shader invocations");
+ }
+
+ gen_perf_query_add_basic_stat_reg(query, PS_DEPTH_COUNT,
+ "N z-pass fragments");
+
+ if (devinfo->gen >= 7) {
+ gen_perf_query_add_basic_stat_reg(query, CS_INVOCATION_COUNT,
+ "N compute shader invocations");
+ }
+
+ query->data_size = sizeof(uint64_t) * query->n_counters;
+
+ sort_query(query);
+}
+
+static int
+i915_perf_version(int drm_fd)
+{
+ int tmp;
+ drm_i915_getparam_t gp = {
+ .param = I915_PARAM_PERF_REVISION,
+ .value = &tmp,
+ };
+
+ int ret = gen_ioctl(drm_fd, DRM_IOCTL_I915_GETPARAM, &gp);
+
+ /* Return 0 if this getparam is not supported, the first version supported
+ * is 1.
+ */
+ return ret < 0 ? 0 : tmp;
+}
+
+static void
+i915_get_sseu(int drm_fd, struct drm_i915_gem_context_param_sseu *sseu)
+{
+ struct drm_i915_gem_context_param arg = {
+ .param = I915_CONTEXT_PARAM_SSEU,
+ .size = sizeof(*sseu),
+ .value = to_user_pointer(sseu)
+ };
+
+ gen_ioctl(drm_fd, DRM_IOCTL_I915_GEM_CONTEXT_GETPARAM, &arg);
+}
+
+static inline int
+compare_str_or_null(const char *s1, const char *s2)
+{
+ if (s1 == NULL && s2 == NULL)
+ return 0;
+ if (s1 == NULL)
+ return -1;
+ if (s2 == NULL)
+ return 1;
+
+ return strcmp(s1, s2);
+}
+
+static int
+compare_counter_categories_and_names(const void *_c1, const void *_c2)
+{
+ const struct gen_perf_query_counter_info *c1 = (const struct gen_perf_query_counter_info *)_c1;
+ const struct gen_perf_query_counter_info *c2 = (const struct gen_perf_query_counter_info *)_c2;
+
+ /* pipeline counters don't have an assigned category */
+ int r = compare_str_or_null(c1->counter->category, c2->counter->category);
+ if (r)
+ return r;
+
+ return strcmp(c1->counter->name, c2->counter->name);
+}
+
+static void
+build_unique_counter_list(struct gen_perf_config *perf)
+{
+ assert(perf->n_queries < 64);
+
+ size_t max_counters = 0;
+
+ for (int q = 0; q < perf->n_queries; q++)
+ max_counters += perf->queries[q].n_counters;
+
+ /*
+ * Allocate big enough array to hold maximum possible number of counters.
+ * We can't alloc it small and realloc when needed because the hash table
+ * below contains pointers to this array.
+ */
+ struct gen_perf_query_counter_info *counter_infos =
+ ralloc_array_size(perf, sizeof(counter_infos[0]), max_counters);
+
+ perf->n_counters = 0;
+
+ struct hash_table *counters_table =
+ _mesa_hash_table_create(perf,
+ _mesa_hash_string,
+ _mesa_key_string_equal);
+ struct hash_entry *entry;
+ for (int q = 0; q < perf->n_queries ; q++) {
+ struct gen_perf_query_info *query = &perf->queries[q];
+
+ for (int c = 0; c < query->n_counters; c++) {
+ struct gen_perf_query_counter *counter;
+ struct gen_perf_query_counter_info *counter_info;
+
+ counter = &query->counters[c];
+ entry = _mesa_hash_table_search(counters_table, counter->symbol_name);
+
+ if (entry) {
+ counter_info = entry->data;
+ counter_info->query_mask |= BITFIELD64_BIT(q);
+ continue;
+ }
+ assert(perf->n_counters < max_counters);
+
+ counter_info = &counter_infos[perf->n_counters++];
+ counter_info->counter = counter;
+ counter_info->query_mask = BITFIELD64_BIT(q);
+
+ counter_info->location.group_idx = q;
+ counter_info->location.counter_idx = c;
+
+ _mesa_hash_table_insert(counters_table, counter->symbol_name, counter_info);
+ }
+ }
+
+ _mesa_hash_table_destroy(counters_table, NULL);
+
+ /* Now we can realloc counter_infos array because hash table doesn't exist. */
+ perf->counter_infos = reralloc_array_size(perf, counter_infos,
+ sizeof(counter_infos[0]), perf->n_counters);
+
+ qsort(perf->counter_infos, perf->n_counters, sizeof(perf->counter_infos[0]),
+ compare_counter_categories_and_names);
+}
+
+static bool
+oa_metrics_available(struct gen_perf_config *perf, int fd,
+ const struct gen_device_info *devinfo)