intel/perf: make pipeline statistic query loading optional
[mesa.git] / src / gallium / drivers / iris / iris_monitor.c
1 /*
2 * Copyright © 2019 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23 #include "iris_monitor.h"
24
25 #include <xf86drm.h>
26
27 #include "iris_screen.h"
28 #include "iris_context.h"
29 #include "iris_perf.h"
30
31 struct iris_monitor_object {
32 int num_active_counters;
33 int *active_counters;
34
35 size_t result_size;
36 unsigned char *result_buffer;
37
38 struct gen_perf_query_object *query;
39 };
40
41 int
42 iris_get_monitor_info(struct pipe_screen *pscreen, unsigned index,
43 struct pipe_driver_query_info *info)
44 {
45 const struct iris_screen *screen = (struct iris_screen *)pscreen;
46 assert(screen->monitor_cfg);
47 if (!screen->monitor_cfg)
48 return 0;
49
50 const struct iris_monitor_config *monitor_cfg = screen->monitor_cfg;
51
52 if (!info) {
53 /* return the number of metrics */
54 return monitor_cfg->num_counters;
55 }
56
57 const struct gen_perf_config *perf_cfg = monitor_cfg->perf_cfg;
58 const int group = monitor_cfg->counters[index].group;
59 const int counter_index = monitor_cfg->counters[index].counter;
60 struct gen_perf_query_counter *counter =
61 &perf_cfg->queries[group].counters[counter_index];
62
63 info->group_id = group;
64 info->name = counter->name;
65 info->query_type = PIPE_QUERY_DRIVER_SPECIFIC + index;
66
67 if (counter->type == GEN_PERF_COUNTER_TYPE_THROUGHPUT)
68 info->result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE;
69 else
70 info->result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE;
71 switch (counter->data_type) {
72 case GEN_PERF_COUNTER_DATA_TYPE_BOOL32:
73 case GEN_PERF_COUNTER_DATA_TYPE_UINT32:
74 info->type = PIPE_DRIVER_QUERY_TYPE_UINT;
75 info->max_value.u32 = 0;
76 break;
77 case GEN_PERF_COUNTER_DATA_TYPE_UINT64:
78 info->type = PIPE_DRIVER_QUERY_TYPE_UINT64;
79 info->max_value.u64 = 0;
80 break;
81 case GEN_PERF_COUNTER_DATA_TYPE_FLOAT:
82 case GEN_PERF_COUNTER_DATA_TYPE_DOUBLE:
83 info->type = PIPE_DRIVER_QUERY_TYPE_FLOAT;
84 info->max_value.u64 = -1;
85 break;
86 default:
87 assert(false);
88 break;
89 }
90
91 /* indicates that this is an OA query, not a pipeline statistics query */
92 info->flags = PIPE_DRIVER_QUERY_FLAG_BATCH;
93 return 1;
94 }
95
96 static bool
97 iris_monitor_init_metrics(struct iris_screen *screen)
98 {
99 struct iris_monitor_config *monitor_cfg =
100 rzalloc(screen, struct iris_monitor_config);
101 struct gen_perf_config *perf_cfg = NULL;
102 if (unlikely(!monitor_cfg))
103 goto allocation_error;
104 perf_cfg = gen_perf_new(monitor_cfg);
105 if (unlikely(!perf_cfg))
106 goto allocation_error;
107
108 monitor_cfg->perf_cfg = perf_cfg;
109
110 iris_perf_init_vtbl(perf_cfg);
111
112 gen_perf_init_metrics(perf_cfg, &screen->devinfo, screen->fd,
113 true /* pipeline stats*/);
114 screen->monitor_cfg = monitor_cfg;
115
116 /* a gallium "group" is equivalent to a gen "query"
117 * a gallium "query" is equivalent to a gen "query_counter"
118 *
119 * Each gen_query supports a specific number of query_counters. To
120 * allocate the array of iris_monitor_counter, we need an upper bound
121 * (ignoring duplicate query_counters).
122 */
123 int gen_query_counters_count = 0;
124 for (int gen_query_id = 0;
125 gen_query_id < perf_cfg->n_queries;
126 ++gen_query_id) {
127 gen_query_counters_count += perf_cfg->queries[gen_query_id].n_counters;
128 }
129
130 monitor_cfg->counters = rzalloc_size(monitor_cfg,
131 sizeof(struct iris_monitor_counter) *
132 gen_query_counters_count);
133 if (unlikely(!monitor_cfg->counters))
134 goto allocation_error;
135
136 int iris_monitor_id = 0;
137 for (int group = 0; group < perf_cfg->n_queries; ++group) {
138 for (int counter = 0;
139 counter < perf_cfg->queries[group].n_counters;
140 ++counter) {
141 /* Check previously identified metrics to filter out duplicates. The
142 * user is not helped by having the same metric available in several
143 * groups. (n^2 algorithm).
144 */
145 bool duplicate = false;
146 for (int existing_group = 0;
147 existing_group < group && !duplicate;
148 ++existing_group) {
149 for (int existing_counter = 0;
150 existing_counter < perf_cfg->queries[existing_group].n_counters && !duplicate;
151 ++existing_counter) {
152 const char *current_name =
153 perf_cfg->queries[group].counters[counter].name;
154 const char *existing_name =
155 perf_cfg->queries[existing_group].counters[existing_counter].name;
156 if (strcmp(current_name, existing_name) == 0) {
157 duplicate = true;
158 }
159 }
160 }
161 if (duplicate)
162 continue;
163 monitor_cfg->counters[iris_monitor_id].group = group;
164 monitor_cfg->counters[iris_monitor_id].counter = counter;
165 ++iris_monitor_id;
166 }
167 }
168 monitor_cfg->num_counters = iris_monitor_id;
169 return monitor_cfg->num_counters;
170
171 allocation_error:
172 if (monitor_cfg)
173 free(monitor_cfg->counters);
174 free(perf_cfg);
175 free(monitor_cfg);
176 return false;
177 }
178
179 int
180 iris_get_monitor_group_info(struct pipe_screen *pscreen,
181 unsigned group_index,
182 struct pipe_driver_query_group_info *info)
183 {
184 struct iris_screen *screen = (struct iris_screen *)pscreen;
185 if (!screen->monitor_cfg) {
186 if (!iris_monitor_init_metrics(screen))
187 return 0;
188 }
189
190 const struct iris_monitor_config *monitor_cfg = screen->monitor_cfg;
191 const struct gen_perf_config *perf_cfg = monitor_cfg->perf_cfg;
192
193 if (!info) {
194 /* return the count that can be queried */
195 return perf_cfg->n_queries;
196 }
197
198 if (group_index >= perf_cfg->n_queries) {
199 /* out of range */
200 return 0;
201 }
202
203 struct gen_perf_query_info *query = &perf_cfg->queries[group_index];
204
205 info->name = query->name;
206 info->max_active_queries = query->n_counters;
207 info->num_queries = query->n_counters;
208
209 return 1;
210 }
211
212 static void
213 iris_init_monitor_ctx(struct iris_context *ice)
214 {
215 struct iris_screen *screen = (struct iris_screen *) ice->ctx.screen;
216 struct iris_monitor_config *monitor_cfg = screen->monitor_cfg;
217
218 ice->perf_ctx = gen_perf_new_context(ice);
219 if (unlikely(!ice->perf_ctx))
220 return;
221
222 struct gen_perf_context *perf_ctx = ice->perf_ctx;
223 struct gen_perf_config *perf_cfg = monitor_cfg->perf_cfg;
224 gen_perf_init_context(perf_ctx,
225 perf_cfg,
226 ice,
227 screen->bufmgr,
228 &screen->devinfo,
229 ice->batches[IRIS_BATCH_RENDER].hw_ctx_id,
230 screen->fd);
231 }
232
233 /* entry point for GenPerfMonitorsAMD */
234 struct iris_monitor_object *
235 iris_create_monitor_object(struct iris_context *ice,
236 unsigned num_queries,
237 unsigned *query_types)
238 {
239 struct iris_screen *screen = (struct iris_screen *) ice->ctx.screen;
240 struct iris_monitor_config *monitor_cfg = screen->monitor_cfg;
241 struct gen_perf_config *perf_cfg = monitor_cfg->perf_cfg;
242 struct gen_perf_query_object *query_obj = NULL;
243
244 /* initialize perf context if this has not already been done. This
245 * function is the first entry point that carries the gl context.
246 */
247 if (ice->perf_ctx == NULL) {
248 iris_init_monitor_ctx(ice);
249 }
250 struct gen_perf_context *perf_ctx = ice->perf_ctx;
251
252 assert(num_queries > 0);
253 int query_index = query_types[0] - PIPE_QUERY_DRIVER_SPECIFIC;
254 assert(query_index <= monitor_cfg->num_counters);
255 const int group = monitor_cfg->counters[query_index].group;
256
257 struct iris_monitor_object *monitor =
258 calloc(1, sizeof(struct iris_monitor_object));
259 if (unlikely(!monitor))
260 goto allocation_failure;
261
262 monitor->num_active_counters = num_queries;
263 monitor->active_counters = calloc(num_queries, sizeof(int));
264 if (unlikely(!monitor->active_counters))
265 goto allocation_failure;
266
267 for (int i = 0; i < num_queries; ++i) {
268 unsigned current_query = query_types[i];
269 unsigned current_query_index = current_query - PIPE_QUERY_DRIVER_SPECIFIC;
270
271 /* all queries must be in the same group */
272 assert(current_query_index <= monitor_cfg->num_counters);
273 assert(monitor_cfg->counters[current_query_index].group == group);
274 monitor->active_counters[i] =
275 monitor_cfg->counters[current_query_index].counter;
276 }
277
278 /* create the gen_perf_query */
279 query_obj = gen_perf_new_query(perf_ctx, group);
280 if (unlikely(!query_obj))
281 goto allocation_failure;
282
283 monitor->query = query_obj;
284 monitor->result_size = perf_cfg->queries[group].data_size;
285 monitor->result_buffer = calloc(1, monitor->result_size);
286 if (unlikely(!monitor->result_buffer))
287 goto allocation_failure;
288
289 return monitor;
290
291 allocation_failure:
292 if (monitor) {
293 free(monitor->active_counters);
294 free(monitor->result_buffer);
295 }
296 free(query_obj);
297 free(monitor);
298 return NULL;
299 }
300
301 void
302 iris_destroy_monitor_object(struct pipe_context *ctx,
303 struct iris_monitor_object *monitor)
304 {
305 struct iris_context *ice = (struct iris_context *)ctx;
306
307 gen_perf_delete_query(ice->perf_ctx, monitor->query);
308 free(monitor->result_buffer);
309 monitor->result_buffer = NULL;
310 free(monitor->active_counters);
311 monitor->active_counters = NULL;
312 free(monitor);
313 }
314
315 bool
316 iris_begin_monitor(struct pipe_context *ctx,
317 struct iris_monitor_object *monitor)
318 {
319 struct iris_context *ice = (void *) ctx;
320 struct gen_perf_context *perf_ctx = ice->perf_ctx;
321
322 return gen_perf_begin_query(perf_ctx, monitor->query);
323 }
324
325 bool
326 iris_end_monitor(struct pipe_context *ctx,
327 struct iris_monitor_object *monitor)
328 {
329 struct iris_context *ice = (void *) ctx;
330 struct gen_perf_context *perf_ctx = ice->perf_ctx;
331
332 gen_perf_end_query(perf_ctx, monitor->query);
333 return true;
334 }
335
336 bool
337 iris_get_monitor_result(struct pipe_context *ctx,
338 struct iris_monitor_object *monitor,
339 bool wait,
340 union pipe_numeric_type_union *result)
341 {
342 struct iris_context *ice = (void *) ctx;
343 struct gen_perf_context *perf_ctx = ice->perf_ctx;
344 struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
345
346 bool monitor_ready =
347 gen_perf_is_query_ready(perf_ctx, monitor->query, batch);
348
349 if (!monitor_ready) {
350 if (!wait)
351 return false;
352 gen_perf_wait_query(perf_ctx, monitor->query, batch);
353 }
354
355 assert(gen_perf_is_query_ready(perf_ctx, monitor->query, batch));
356
357 unsigned bytes_written;
358 gen_perf_get_query_data(perf_ctx, monitor->query,
359 monitor->result_size,
360 (unsigned*) monitor->result_buffer,
361 &bytes_written);
362 if (bytes_written != monitor->result_size)
363 return false;
364
365 /* copy metrics into the batch result */
366 for (int i = 0; i < monitor->num_active_counters; ++i) {
367 int current_counter = monitor->active_counters[i];
368 const struct gen_perf_query_info *info =
369 gen_perf_query_info(monitor->query);
370 const struct gen_perf_query_counter *counter =
371 &info->counters[current_counter];
372 assert(gen_perf_query_counter_get_size(counter));
373 switch (counter->data_type) {
374 case GEN_PERF_COUNTER_DATA_TYPE_UINT64:
375 result[i].u64 = *(uint64_t*)(monitor->result_buffer + counter->offset);
376 break;
377 case GEN_PERF_COUNTER_DATA_TYPE_FLOAT:
378 result[i].f = *(float*)(monitor->result_buffer + counter->offset);
379 break;
380 case GEN_PERF_COUNTER_DATA_TYPE_UINT32:
381 case GEN_PERF_COUNTER_DATA_TYPE_BOOL32:
382 result[i].u64 = *(uint32_t*)(monitor->result_buffer + counter->offset);
383 break;
384 case GEN_PERF_COUNTER_DATA_TYPE_DOUBLE: {
385 double v = *(double*)(monitor->result_buffer + counter->offset);
386 result[i].f = v;
387 break;
388 }
389 default:
390 unreachable("unexpected counter data type");
391 }
392 }
393 return true;
394 }