i965: move OA accumulation code to intel/perf
[mesa.git] / src / intel / perf / gen_perf.h
1 /*
2 * Copyright © 2018 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #ifndef GEN_PERF_H
25 #define GEN_PERF_H
26
27 #include <stdio.h>
28 #include <stdint.h>
29 #include <string.h>
30
31 #include <sys/sysmacros.h>
32
33 #include "util/hash_table.h"
34 #include "util/ralloc.h"
35
36 struct gen_device_info;
37
38 struct gen_perf;
39 struct gen_perf_query_info;
40
41 enum gen_perf_counter_type {
42 GEN_PERF_COUNTER_TYPE_EVENT,
43 GEN_PERF_COUNTER_TYPE_DURATION_NORM,
44 GEN_PERF_COUNTER_TYPE_DURATION_RAW,
45 GEN_PERF_COUNTER_TYPE_THROUGHPUT,
46 GEN_PERF_COUNTER_TYPE_RAW,
47 GEN_PERF_COUNTER_TYPE_TIMESTAMP,
48 };
49
50 enum gen_perf_counter_data_type {
51 GEN_PERF_COUNTER_DATA_TYPE_BOOL32,
52 GEN_PERF_COUNTER_DATA_TYPE_UINT32,
53 GEN_PERF_COUNTER_DATA_TYPE_UINT64,
54 GEN_PERF_COUNTER_DATA_TYPE_FLOAT,
55 GEN_PERF_COUNTER_DATA_TYPE_DOUBLE,
56 };
57
58 struct gen_pipeline_stat {
59 uint32_t reg;
60 uint32_t numerator;
61 uint32_t denominator;
62 };
63
64 /*
65 * The largest OA formats we can use include:
66 * For Haswell:
67 * 1 timestamp, 45 A counters, 8 B counters and 8 C counters.
68 * For Gen8+
69 * 1 timestamp, 1 clock, 36 A counters, 8 B counters and 8 C counters
70 */
71 #define MAX_OA_REPORT_COUNTERS 62
72
73 struct gen_perf_query_result {
74 /**
75 * Storage for the final accumulated OA counters.
76 */
77 uint64_t accumulator[MAX_OA_REPORT_COUNTERS];
78
79 /**
80 * Hw ID used by the context on which the query was running.
81 */
82 uint32_t hw_id;
83
84 /**
85 * Number of reports accumulated to produce the results.
86 */
87 uint32_t reports_accumulated;
88
89 /**
90 * Frequency in the slices of the GT at the begin and end of the
91 * query.
92 */
93 uint64_t slice_frequency[2];
94
95 /**
96 * Frequency in the unslice of the GT at the begin and end of the
97 * query.
98 */
99 uint64_t unslice_frequency[2];
100 };
101
102 struct gen_perf_query_counter {
103 const char *name;
104 const char *desc;
105 enum gen_perf_counter_type type;
106 enum gen_perf_counter_data_type data_type;
107 uint64_t raw_max;
108 size_t offset;
109 size_t size;
110
111 union {
112 uint64_t (*oa_counter_read_uint64)(struct gen_perf *perf,
113 const struct gen_perf_query_info *query,
114 uint64_t *accumulator);
115 float (*oa_counter_read_float)(struct gen_perf *perf,
116 const struct gen_perf_query_info *query,
117 uint64_t *accumulator);
118 struct gen_pipeline_stat pipeline_stat;
119 };
120 };
121
122 struct gen_perf_query_register_prog {
123 uint32_t reg;
124 uint32_t val;
125 };
126
127 struct gen_perf_query_info {
128 enum gen_perf_query_type {
129 GEN_PERF_QUERY_TYPE_OA,
130 GEN_PERF_QUERY_TYPE_RAW,
131 GEN_PERF_QUERY_TYPE_PIPELINE,
132 } kind;
133 const char *name;
134 const char *guid;
135 struct gen_perf_query_counter *counters;
136 int n_counters;
137 int max_counters;
138 size_t data_size;
139
140 /* OA specific */
141 uint64_t oa_metrics_set_id;
142 int oa_format;
143
144 /* For indexing into the accumulator[] ... */
145 int gpu_time_offset;
146 int gpu_clock_offset;
147 int a_offset;
148 int b_offset;
149 int c_offset;
150
151 /* Register programming for a given query */
152 struct gen_perf_query_register_prog *flex_regs;
153 uint32_t n_flex_regs;
154
155 struct gen_perf_query_register_prog *mux_regs;
156 uint32_t n_mux_regs;
157
158 struct gen_perf_query_register_prog *b_counter_regs;
159 uint32_t n_b_counter_regs;
160 };
161
162 struct gen_perf {
163 struct gen_perf_query_info *queries;
164 int n_queries;
165
166 /* Variables referenced in the XML meta data for OA performance
167 * counters, e.g in the normalization equations.
168 *
169 * All uint64_t for consistent operand types in generated code
170 */
171 struct {
172 uint64_t timestamp_frequency; /** $GpuTimestampFrequency */
173 uint64_t n_eus; /** $EuCoresTotalCount */
174 uint64_t n_eu_slices; /** $EuSlicesTotalCount */
175 uint64_t n_eu_sub_slices; /** $EuSubslicesTotalCount */
176 uint64_t eu_threads_count; /** $EuThreadsCount */
177 uint64_t slice_mask; /** $SliceMask */
178 uint64_t subslice_mask; /** $SubsliceMask */
179 uint64_t gt_min_freq; /** $GpuMinFrequency */
180 uint64_t gt_max_freq; /** $GpuMaxFrequency */
181 uint64_t revision; /** $SkuRevisionId */
182 } sys_vars;
183
184 /* OA metric sets, indexed by GUID, as know by Mesa at build time, to
185 * cross-reference with the GUIDs of configs advertised by the kernel at
186 * runtime
187 */
188 struct hash_table *oa_metrics_table;
189
190 /* Location of the device's sysfs entry. */
191 char sysfs_dev_dir[256];
192
193 int (*ioctl)(int, unsigned long, void *);
194 };
195
196 static inline struct gen_perf_query_info *
197 gen_perf_query_append_query_info(struct gen_perf *perf, int max_counters)
198 {
199 struct gen_perf_query_info *query;
200
201 perf->queries = reralloc(perf, perf->queries,
202 struct gen_perf_query_info,
203 ++perf->n_queries);
204 query = &perf->queries[perf->n_queries - 1];
205 memset(query, 0, sizeof(*query));
206
207 if (max_counters > 0) {
208 query->max_counters = max_counters;
209 query->counters =
210 rzalloc_array(perf, struct gen_perf_query_counter, max_counters);
211 }
212
213 return query;
214 }
215
216 static inline void
217 gen_perf_query_info_add_stat_reg(struct gen_perf_query_info *query,
218 uint32_t reg,
219 uint32_t numerator,
220 uint32_t denominator,
221 const char *name,
222 const char *description)
223 {
224 struct gen_perf_query_counter *counter;
225
226 assert(query->n_counters < query->max_counters);
227
228 counter = &query->counters[query->n_counters];
229 counter->name = name;
230 counter->desc = description;
231 counter->type = GEN_PERF_COUNTER_TYPE_RAW;
232 counter->data_type = GEN_PERF_COUNTER_DATA_TYPE_UINT64;
233 counter->size = sizeof(uint64_t);
234 counter->offset = sizeof(uint64_t) * query->n_counters;
235 counter->pipeline_stat.reg = reg;
236 counter->pipeline_stat.numerator = numerator;
237 counter->pipeline_stat.denominator = denominator;
238
239 query->n_counters++;
240 }
241
242 static inline void
243 gen_perf_query_info_add_basic_stat_reg(struct gen_perf_query_info *query,
244 uint32_t reg, const char *name)
245 {
246 gen_perf_query_info_add_stat_reg(query, reg, 1, 1, name, name);
247 }
248
249 static inline struct gen_perf *
250 gen_perf_new(void *ctx, int (*ioctl_cb)(int, unsigned long, void *))
251 {
252 struct gen_perf *perf = rzalloc(ctx, struct gen_perf);
253
254 perf->ioctl = ioctl_cb;
255
256 return perf;
257 }
258
259 bool gen_perf_load_oa_metrics(struct gen_perf *perf, int fd,
260 const struct gen_device_info *devinfo);
261 bool gen_perf_load_metric_id(struct gen_perf *perf, const char *guid,
262 uint64_t *metric_id);
263
264 void gen_perf_query_result_read_frequencies(struct gen_perf_query_result *result,
265 const struct gen_device_info *devinfo,
266 const uint32_t *start,
267 const uint32_t *end);
268 void gen_perf_query_result_accumulate(struct gen_perf_query_result *result,
269 const struct gen_perf_query_info *query,
270 const uint32_t *start,
271 const uint32_t *end);
272 void gen_perf_query_result_clear(struct gen_perf_query_result *result);
273
274
275 #endif /* GEN_PERF_H */