6e7a6d36c1570ec6a4f0e81cfeba606a33780e19
[mesa.git] / src / intel / perf / gen_perf.h
1 /*
2 * Copyright © 2018 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #ifndef GEN_PERF_H
25 #define GEN_PERF_H
26
27 #include <stdio.h>
28 #include <stdint.h>
29 #include <string.h>
30
31 #include <sys/sysmacros.h>
32
33 #include "util/hash_table.h"
34 #include "util/ralloc.h"
35
36 struct gen_device_info;
37
38 struct gen_perf;
39 struct gen_perf_query_info;
40
41 enum gen_perf_counter_type {
42 GEN_PERF_COUNTER_TYPE_EVENT,
43 GEN_PERF_COUNTER_TYPE_DURATION_NORM,
44 GEN_PERF_COUNTER_TYPE_DURATION_RAW,
45 GEN_PERF_COUNTER_TYPE_THROUGHPUT,
46 GEN_PERF_COUNTER_TYPE_RAW,
47 GEN_PERF_COUNTER_TYPE_TIMESTAMP,
48 };
49
50 enum gen_perf_counter_data_type {
51 GEN_PERF_COUNTER_DATA_TYPE_BOOL32,
52 GEN_PERF_COUNTER_DATA_TYPE_UINT32,
53 GEN_PERF_COUNTER_DATA_TYPE_UINT64,
54 GEN_PERF_COUNTER_DATA_TYPE_FLOAT,
55 GEN_PERF_COUNTER_DATA_TYPE_DOUBLE,
56 };
57
58 struct gen_pipeline_stat {
59 uint32_t reg;
60 uint32_t numerator;
61 uint32_t denominator;
62 };
63
64 /*
65 * The largest OA formats we can use include:
66 * For Haswell:
67 * 1 timestamp, 45 A counters, 8 B counters and 8 C counters.
68 * For Gen8+
69 * 1 timestamp, 1 clock, 36 A counters, 8 B counters and 8 C counters
70 */
71 #define MAX_OA_REPORT_COUNTERS 62
72
73 struct gen_perf_query_result {
74 /**
75 * Storage for the final accumulated OA counters.
76 */
77 uint64_t accumulator[MAX_OA_REPORT_COUNTERS];
78
79 /**
80 * Hw ID used by the context on which the query was running.
81 */
82 uint32_t hw_id;
83
84 /**
85 * Number of reports accumulated to produce the results.
86 */
87 uint32_t reports_accumulated;
88
89 /**
90 * Frequency in the slices of the GT at the begin and end of the
91 * query.
92 */
93 uint64_t slice_frequency[2];
94
95 /**
96 * Frequency in the unslice of the GT at the begin and end of the
97 * query.
98 */
99 uint64_t unslice_frequency[2];
100 };
101
102 struct gen_perf_query_counter {
103 const char *name;
104 const char *desc;
105 enum gen_perf_counter_type type;
106 enum gen_perf_counter_data_type data_type;
107 uint64_t raw_max;
108 size_t offset;
109
110 union {
111 uint64_t (*oa_counter_read_uint64)(struct gen_perf *perf,
112 const struct gen_perf_query_info *query,
113 uint64_t *accumulator);
114 float (*oa_counter_read_float)(struct gen_perf *perf,
115 const struct gen_perf_query_info *query,
116 uint64_t *accumulator);
117 struct gen_pipeline_stat pipeline_stat;
118 };
119 };
120
121 struct gen_perf_query_register_prog {
122 uint32_t reg;
123 uint32_t val;
124 };
125
126 struct gen_perf_query_info {
127 enum gen_perf_query_type {
128 GEN_PERF_QUERY_TYPE_OA,
129 GEN_PERF_QUERY_TYPE_RAW,
130 GEN_PERF_QUERY_TYPE_PIPELINE,
131 } kind;
132 const char *name;
133 const char *guid;
134 struct gen_perf_query_counter *counters;
135 int n_counters;
136 int max_counters;
137 size_t data_size;
138
139 /* OA specific */
140 uint64_t oa_metrics_set_id;
141 int oa_format;
142
143 /* For indexing into the accumulator[] ... */
144 int gpu_time_offset;
145 int gpu_clock_offset;
146 int a_offset;
147 int b_offset;
148 int c_offset;
149
150 /* Register programming for a given query */
151 struct gen_perf_query_register_prog *flex_regs;
152 uint32_t n_flex_regs;
153
154 struct gen_perf_query_register_prog *mux_regs;
155 uint32_t n_mux_regs;
156
157 struct gen_perf_query_register_prog *b_counter_regs;
158 uint32_t n_b_counter_regs;
159 };
160
161 struct gen_perf {
162 struct gen_perf_query_info *queries;
163 int n_queries;
164
165 /* Variables referenced in the XML meta data for OA performance
166 * counters, e.g in the normalization equations.
167 *
168 * All uint64_t for consistent operand types in generated code
169 */
170 struct {
171 uint64_t timestamp_frequency; /** $GpuTimestampFrequency */
172 uint64_t n_eus; /** $EuCoresTotalCount */
173 uint64_t n_eu_slices; /** $EuSlicesTotalCount */
174 uint64_t n_eu_sub_slices; /** $EuSubslicesTotalCount */
175 uint64_t eu_threads_count; /** $EuThreadsCount */
176 uint64_t slice_mask; /** $SliceMask */
177 uint64_t subslice_mask; /** $SubsliceMask */
178 uint64_t gt_min_freq; /** $GpuMinFrequency */
179 uint64_t gt_max_freq; /** $GpuMaxFrequency */
180 uint64_t revision; /** $SkuRevisionId */
181 } sys_vars;
182
183 /* OA metric sets, indexed by GUID, as know by Mesa at build time, to
184 * cross-reference with the GUIDs of configs advertised by the kernel at
185 * runtime
186 */
187 struct hash_table *oa_metrics_table;
188
189 /* Location of the device's sysfs entry. */
190 char sysfs_dev_dir[256];
191
192 int (*ioctl)(int, unsigned long, void *);
193 };
194
195 static inline size_t
196 gen_perf_query_counter_get_size(const struct gen_perf_query_counter *counter)
197 {
198 switch (counter->data_type) {
199 case GEN_PERF_COUNTER_DATA_TYPE_BOOL32:
200 return sizeof(uint32_t);
201 case GEN_PERF_COUNTER_DATA_TYPE_UINT32:
202 return sizeof(uint32_t);
203 case GEN_PERF_COUNTER_DATA_TYPE_UINT64:
204 return sizeof(uint64_t);
205 case GEN_PERF_COUNTER_DATA_TYPE_FLOAT:
206 return sizeof(float);
207 case GEN_PERF_COUNTER_DATA_TYPE_DOUBLE:
208 return sizeof(double);
209 default:
210 unreachable("invalid counter data type");
211 }
212 }
213
214 static inline struct gen_perf_query_info *
215 gen_perf_query_append_query_info(struct gen_perf *perf, int max_counters)
216 {
217 struct gen_perf_query_info *query;
218
219 perf->queries = reralloc(perf, perf->queries,
220 struct gen_perf_query_info,
221 ++perf->n_queries);
222 query = &perf->queries[perf->n_queries - 1];
223 memset(query, 0, sizeof(*query));
224
225 if (max_counters > 0) {
226 query->max_counters = max_counters;
227 query->counters =
228 rzalloc_array(perf, struct gen_perf_query_counter, max_counters);
229 }
230
231 return query;
232 }
233
234 static inline void
235 gen_perf_query_info_add_stat_reg(struct gen_perf_query_info *query,
236 uint32_t reg,
237 uint32_t numerator,
238 uint32_t denominator,
239 const char *name,
240 const char *description)
241 {
242 struct gen_perf_query_counter *counter;
243
244 assert(query->n_counters < query->max_counters);
245
246 counter = &query->counters[query->n_counters];
247 counter->name = name;
248 counter->desc = description;
249 counter->type = GEN_PERF_COUNTER_TYPE_RAW;
250 counter->data_type = GEN_PERF_COUNTER_DATA_TYPE_UINT64;
251 counter->offset = sizeof(uint64_t) * query->n_counters;
252 counter->pipeline_stat.reg = reg;
253 counter->pipeline_stat.numerator = numerator;
254 counter->pipeline_stat.denominator = denominator;
255
256 query->n_counters++;
257 }
258
259 static inline void
260 gen_perf_query_info_add_basic_stat_reg(struct gen_perf_query_info *query,
261 uint32_t reg, const char *name)
262 {
263 gen_perf_query_info_add_stat_reg(query, reg, 1, 1, name, name);
264 }
265
266 static inline struct gen_perf *
267 gen_perf_new(void *ctx, int (*ioctl_cb)(int, unsigned long, void *))
268 {
269 struct gen_perf *perf = rzalloc(ctx, struct gen_perf);
270
271 perf->ioctl = ioctl_cb;
272
273 return perf;
274 }
275
276 bool gen_perf_load_oa_metrics(struct gen_perf *perf, int fd,
277 const struct gen_device_info *devinfo);
278 bool gen_perf_load_metric_id(struct gen_perf *perf, const char *guid,
279 uint64_t *metric_id);
280
281 void gen_perf_query_result_read_frequencies(struct gen_perf_query_result *result,
282 const struct gen_device_info *devinfo,
283 const uint32_t *start,
284 const uint32_t *end);
285 void gen_perf_query_result_accumulate(struct gen_perf_query_result *result,
286 const struct gen_perf_query_info *query,
287 const uint32_t *start,
288 const uint32_t *end);
289 void gen_perf_query_result_clear(struct gen_perf_query_result *result);
290
291
292 #endif /* GEN_PERF_H */