intel/perf: create a vtable entry for emit_report_count
[mesa.git] / src / intel / perf / gen_perf.h
1 /*
2 * Copyright © 2018 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #ifndef GEN_PERF_H
25 #define GEN_PERF_H
26
27 #include <stdio.h>
28 #include <stdint.h>
29 #include <string.h>
30
31 #include <sys/sysmacros.h>
32
33 #include "util/hash_table.h"
34 #include "util/ralloc.h"
35
36 struct gen_device_info;
37
38 struct gen_perf_config;
39 struct gen_perf_query_info;
40
41 enum gen_perf_counter_type {
42 GEN_PERF_COUNTER_TYPE_EVENT,
43 GEN_PERF_COUNTER_TYPE_DURATION_NORM,
44 GEN_PERF_COUNTER_TYPE_DURATION_RAW,
45 GEN_PERF_COUNTER_TYPE_THROUGHPUT,
46 GEN_PERF_COUNTER_TYPE_RAW,
47 GEN_PERF_COUNTER_TYPE_TIMESTAMP,
48 };
49
50 enum gen_perf_counter_data_type {
51 GEN_PERF_COUNTER_DATA_TYPE_BOOL32,
52 GEN_PERF_COUNTER_DATA_TYPE_UINT32,
53 GEN_PERF_COUNTER_DATA_TYPE_UINT64,
54 GEN_PERF_COUNTER_DATA_TYPE_FLOAT,
55 GEN_PERF_COUNTER_DATA_TYPE_DOUBLE,
56 };
57
58 struct gen_pipeline_stat {
59 uint32_t reg;
60 uint32_t numerator;
61 uint32_t denominator;
62 };
63
64 /*
65 * The largest OA formats we can use include:
66 * For Haswell:
67 * 1 timestamp, 45 A counters, 8 B counters and 8 C counters.
68 * For Gen8+
69 * 1 timestamp, 1 clock, 36 A counters, 8 B counters and 8 C counters
70 */
71 #define MAX_OA_REPORT_COUNTERS 62
72
73 struct gen_perf_query_result {
74 /**
75 * Storage for the final accumulated OA counters.
76 */
77 uint64_t accumulator[MAX_OA_REPORT_COUNTERS];
78
79 /**
80 * Hw ID used by the context on which the query was running.
81 */
82 uint32_t hw_id;
83
84 /**
85 * Number of reports accumulated to produce the results.
86 */
87 uint32_t reports_accumulated;
88
89 /**
90 * Frequency in the slices of the GT at the begin and end of the
91 * query.
92 */
93 uint64_t slice_frequency[2];
94
95 /**
96 * Frequency in the unslice of the GT at the begin and end of the
97 * query.
98 */
99 uint64_t unslice_frequency[2];
100 };
101
102 struct gen_perf_query_counter {
103 const char *name;
104 const char *desc;
105 enum gen_perf_counter_type type;
106 enum gen_perf_counter_data_type data_type;
107 uint64_t raw_max;
108 size_t offset;
109
110 union {
111 uint64_t (*oa_counter_read_uint64)(struct gen_perf_config *perf,
112 const struct gen_perf_query_info *query,
113 const uint64_t *accumulator);
114 float (*oa_counter_read_float)(struct gen_perf_config *perf,
115 const struct gen_perf_query_info *query,
116 const uint64_t *accumulator);
117 struct gen_pipeline_stat pipeline_stat;
118 };
119 };
120
121 struct gen_perf_query_register_prog {
122 uint32_t reg;
123 uint32_t val;
124 };
125
126 struct gen_perf_query_info {
127 enum gen_perf_query_type {
128 GEN_PERF_QUERY_TYPE_OA,
129 GEN_PERF_QUERY_TYPE_RAW,
130 GEN_PERF_QUERY_TYPE_PIPELINE,
131 } kind;
132 const char *name;
133 const char *guid;
134 struct gen_perf_query_counter *counters;
135 int n_counters;
136 int max_counters;
137 size_t data_size;
138
139 /* OA specific */
140 uint64_t oa_metrics_set_id;
141 int oa_format;
142
143 /* For indexing into the accumulator[] ... */
144 int gpu_time_offset;
145 int gpu_clock_offset;
146 int a_offset;
147 int b_offset;
148 int c_offset;
149
150 /* Register programming for a given query */
151 struct gen_perf_query_register_prog *flex_regs;
152 uint32_t n_flex_regs;
153
154 struct gen_perf_query_register_prog *mux_regs;
155 uint32_t n_mux_regs;
156
157 struct gen_perf_query_register_prog *b_counter_regs;
158 uint32_t n_b_counter_regs;
159 };
160
161 struct gen_perf_config {
162 struct gen_perf_query_info *queries;
163 int n_queries;
164
165 /* Variables referenced in the XML meta data for OA performance
166 * counters, e.g in the normalization equations.
167 *
168 * All uint64_t for consistent operand types in generated code
169 */
170 struct {
171 uint64_t timestamp_frequency; /** $GpuTimestampFrequency */
172 uint64_t n_eus; /** $EuCoresTotalCount */
173 uint64_t n_eu_slices; /** $EuSlicesTotalCount */
174 uint64_t n_eu_sub_slices; /** $EuSubslicesTotalCount */
175 uint64_t eu_threads_count; /** $EuThreadsCount */
176 uint64_t slice_mask; /** $SliceMask */
177 uint64_t subslice_mask; /** $SubsliceMask */
178 uint64_t gt_min_freq; /** $GpuMinFrequency */
179 uint64_t gt_max_freq; /** $GpuMaxFrequency */
180 uint64_t revision; /** $SkuRevisionId */
181 } sys_vars;
182
183 /* OA metric sets, indexed by GUID, as know by Mesa at build time, to
184 * cross-reference with the GUIDs of configs advertised by the kernel at
185 * runtime
186 */
187 struct hash_table *oa_metrics_table;
188
189 /* Location of the device's sysfs entry. */
190 char sysfs_dev_dir[256];
191
192 struct {
193 void *(*bo_alloc)(void *bufmgr, const char *name, uint64_t size);
194 void (*bo_unreference)(void *bo);
195 void (*emit_mi_report_perf_count)(void *ctx,
196 void *bo,
197 uint32_t offset_in_bytes,
198 uint32_t report_id);
199 } vtbl;
200 };
201
202 static inline size_t
203 gen_perf_query_counter_get_size(const struct gen_perf_query_counter *counter)
204 {
205 switch (counter->data_type) {
206 case GEN_PERF_COUNTER_DATA_TYPE_BOOL32:
207 return sizeof(uint32_t);
208 case GEN_PERF_COUNTER_DATA_TYPE_UINT32:
209 return sizeof(uint32_t);
210 case GEN_PERF_COUNTER_DATA_TYPE_UINT64:
211 return sizeof(uint64_t);
212 case GEN_PERF_COUNTER_DATA_TYPE_FLOAT:
213 return sizeof(float);
214 case GEN_PERF_COUNTER_DATA_TYPE_DOUBLE:
215 return sizeof(double);
216 default:
217 unreachable("invalid counter data type");
218 }
219 }
220
221 static inline struct gen_perf_query_info *
222 gen_perf_query_append_query_info(struct gen_perf_config *perf, int max_counters)
223 {
224 struct gen_perf_query_info *query;
225
226 perf->queries = reralloc(perf, perf->queries,
227 struct gen_perf_query_info,
228 ++perf->n_queries);
229 query = &perf->queries[perf->n_queries - 1];
230 memset(query, 0, sizeof(*query));
231
232 if (max_counters > 0) {
233 query->max_counters = max_counters;
234 query->counters =
235 rzalloc_array(perf, struct gen_perf_query_counter, max_counters);
236 }
237
238 return query;
239 }
240
241 static inline void
242 gen_perf_query_info_add_stat_reg(struct gen_perf_query_info *query,
243 uint32_t reg,
244 uint32_t numerator,
245 uint32_t denominator,
246 const char *name,
247 const char *description)
248 {
249 struct gen_perf_query_counter *counter;
250
251 assert(query->n_counters < query->max_counters);
252
253 counter = &query->counters[query->n_counters];
254 counter->name = name;
255 counter->desc = description;
256 counter->type = GEN_PERF_COUNTER_TYPE_RAW;
257 counter->data_type = GEN_PERF_COUNTER_DATA_TYPE_UINT64;
258 counter->offset = sizeof(uint64_t) * query->n_counters;
259 counter->pipeline_stat.reg = reg;
260 counter->pipeline_stat.numerator = numerator;
261 counter->pipeline_stat.denominator = denominator;
262
263 query->n_counters++;
264 }
265
266 static inline void
267 gen_perf_query_info_add_basic_stat_reg(struct gen_perf_query_info *query,
268 uint32_t reg, const char *name)
269 {
270 gen_perf_query_info_add_stat_reg(query, reg, 1, 1, name, name);
271 }
272
273 static inline struct gen_perf_config *
274 gen_perf_new(void *ctx)
275 {
276 struct gen_perf_config *perf = rzalloc(ctx, struct gen_perf_config);
277 return perf;
278 }
279
280 bool gen_perf_load_oa_metrics(struct gen_perf_config *perf, int fd,
281 const struct gen_device_info *devinfo);
282 bool gen_perf_load_metric_id(struct gen_perf_config *perf, const char *guid,
283 uint64_t *metric_id);
284
285 void gen_perf_query_result_read_frequencies(struct gen_perf_query_result *result,
286 const struct gen_device_info *devinfo,
287 const uint32_t *start,
288 const uint32_t *end);
289 void gen_perf_query_result_accumulate(struct gen_perf_query_result *result,
290 const struct gen_perf_query_info *query,
291 const uint32_t *start,
292 const uint32_t *end);
293 void gen_perf_query_result_clear(struct gen_perf_query_result *result);
294
295
296 #endif /* GEN_PERF_H */