iris/perf: get monitor results
[mesa.git] / src / gallium / drivers / iris / iris_monitor.c
1 /*
2 * Copyright © 2019 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23 #include "iris_monitor.h"
24
25 #include <xf86drm.h>
26
27 #include "iris_screen.h"
28 #include "iris_context.h"
29
30 #include "perf/gen_perf.h"
31
32 struct iris_monitor_object {
33 int num_active_counters;
34 int *active_counters;
35
36 size_t result_size;
37 unsigned char *result_buffer;
38
39 struct gen_perf_query_object *query;
40 };
41
42 int iris_get_monitor_info(struct pipe_screen *pscreen, unsigned index,
43 struct pipe_driver_query_info *info)
44 {
45 const struct iris_screen *screen = (struct iris_screen *)pscreen;
46 assert(screen->monitor_cfg);
47 if (!screen->monitor_cfg)
48 return 0;
49
50 const struct iris_monitor_config *monitor_cfg = screen->monitor_cfg;
51 if (!info)
52 /* return the number of metrics */
53 return monitor_cfg->num_counters;
54 const struct gen_perf_config *perf_cfg = monitor_cfg->perf_cfg;
55 const int group = monitor_cfg->counters[index].group;
56 const int counter_index = monitor_cfg->counters[index].counter;
57 info->group_id = group;
58 struct gen_perf_query_counter *counter =
59 &perf_cfg->queries[group].counters[counter_index];
60 info->name = counter->name;
61 info->query_type = PIPE_QUERY_DRIVER_SPECIFIC + index;
62
63 if (counter->type == GEN_PERF_COUNTER_TYPE_THROUGHPUT)
64 info->result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE;
65 else
66 info->result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE;
67 switch (counter->data_type) {
68 case GEN_PERF_COUNTER_DATA_TYPE_BOOL32:
69 case GEN_PERF_COUNTER_DATA_TYPE_UINT32:
70 info->type = PIPE_DRIVER_QUERY_TYPE_UINT;
71 info->max_value.u32 = 0;
72 break;
73 case GEN_PERF_COUNTER_DATA_TYPE_UINT64:
74 info->type = PIPE_DRIVER_QUERY_TYPE_UINT64;
75 info->max_value.u64 = 0;
76 break;
77 case GEN_PERF_COUNTER_DATA_TYPE_FLOAT:
78 case GEN_PERF_COUNTER_DATA_TYPE_DOUBLE:
79 info->type = PIPE_DRIVER_QUERY_TYPE_FLOAT;
80 info->max_value.u64 = -1;
81 break;
82 default:
83 assert(false);
84 break;
85 }
86
87 /* indicates that this is an OA query, not a pipeline statistics query */
88 info->flags = PIPE_DRIVER_QUERY_FLAG_BATCH;
89 return 1;
90 }
91
92 typedef void (*bo_unreference_t)(void *);
93 typedef void *(*bo_map_t)(void *, void *, unsigned flags);
94 typedef void (*bo_unmap_t)(void *);
95 typedef void (* emit_mi_report_t)(void *, void *, uint32_t, uint32_t);
96 typedef void (*emit_mi_flush_t)(void *);
97 typedef void (*capture_frequency_stat_register_t)(void *, void *,
98 uint32_t );
99 typedef void (*store_register_mem64_t)(void *ctx, void *bo,
100 uint32_t reg, uint32_t offset);
101 typedef bool (*batch_references_t)(void *batch, void *bo);
102 typedef void (*bo_wait_rendering_t)(void *bo);
103 typedef int (*bo_busy_t)(void *bo);
104
105 static void *
106 iris_oa_bo_alloc(void *bufmgr,
107 const char *name,
108 uint64_t size)
109 {
110 return iris_bo_alloc(bufmgr, name, size, IRIS_MEMZONE_OTHER);
111 }
112
113 static void
114 iris_monitor_emit_mi_flush(struct iris_context *ice)
115 {
116 const int flags = PIPE_CONTROL_RENDER_TARGET_FLUSH |
117 PIPE_CONTROL_INSTRUCTION_INVALIDATE |
118 PIPE_CONTROL_CONST_CACHE_INVALIDATE |
119 PIPE_CONTROL_DATA_CACHE_FLUSH |
120 PIPE_CONTROL_DEPTH_CACHE_FLUSH |
121 PIPE_CONTROL_VF_CACHE_INVALIDATE |
122 PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
123 PIPE_CONTROL_CS_STALL;
124 iris_emit_pipe_control_flush(&ice->batches[IRIS_BATCH_RENDER],
125 "OA metrics",
126 flags);
127 }
128
129 static void
130 iris_monitor_emit_mi_report_perf_count(void *c,
131 void *bo,
132 uint32_t offset_in_bytes,
133 uint32_t report_id)
134 {
135 struct iris_context *ice = c;
136 ice->vtbl.emit_mi_report_perf_count(&ice->batches[IRIS_BATCH_RENDER],
137 bo,
138 offset_in_bytes,
139 report_id);
140 }
141
142 static void
143 iris_monitor_batchbuffer_flush(void *c, const char *file, int line)
144 {
145 struct iris_context *ice = c;
146 _iris_batch_flush(&ice->batches[IRIS_BATCH_RENDER], __FILE__, __LINE__);
147 }
148
149 static void
150 iris_monitor_capture_frequency_stat_register(void *ctx,
151 void *bo,
152 uint32_t bo_offset)
153 {
154 struct iris_context *ice = ctx;
155 ice->vtbl.store_register_mem32(&ice->batches[IRIS_BATCH_RENDER],
156 GEN9_RPSTAT0, bo, bo_offset, false);
157 }
158
159 static void
160 iris_monitor_store_register_mem64(void *ctx, void *bo,
161 uint32_t reg, uint32_t offset)
162 {
163 struct iris_context *ice = ctx;
164 ice->vtbl.store_register_mem64(&ice->batches[IRIS_BATCH_RENDER], reg, bo,
165 offset, false);
166 }
167
168
169 static bool
170 iris_monitor_init_metrics(struct iris_screen *screen)
171 {
172 struct iris_monitor_config *monitor_cfg =
173 rzalloc(screen, struct iris_monitor_config);
174 struct gen_perf_config *perf_cfg = NULL;
175 if (unlikely(!monitor_cfg))
176 goto allocation_error;
177 perf_cfg = gen_perf_new(monitor_cfg);
178 if (unlikely(!perf_cfg))
179 goto allocation_error;
180
181 monitor_cfg->perf_cfg = perf_cfg;
182
183 perf_cfg->vtbl.bo_alloc = iris_oa_bo_alloc;
184 perf_cfg->vtbl.bo_unreference = (bo_unreference_t)iris_bo_unreference;
185 perf_cfg->vtbl.bo_map = (bo_map_t)iris_bo_map;
186 perf_cfg->vtbl.bo_unmap = (bo_unmap_t)iris_bo_unmap;
187 perf_cfg->vtbl.emit_mi_flush = (emit_mi_flush_t)iris_monitor_emit_mi_flush;
188
189 perf_cfg->vtbl.emit_mi_report_perf_count =
190 (emit_mi_report_t)iris_monitor_emit_mi_report_perf_count;
191 perf_cfg->vtbl.batchbuffer_flush = iris_monitor_batchbuffer_flush;
192 perf_cfg->vtbl.capture_frequency_stat_register =
193 (capture_frequency_stat_register_t) iris_monitor_capture_frequency_stat_register;
194 perf_cfg->vtbl.store_register_mem64 =
195 (store_register_mem64_t) iris_monitor_store_register_mem64;
196 perf_cfg->vtbl.batch_references = (batch_references_t)iris_batch_references;
197 perf_cfg->vtbl.bo_wait_rendering =
198 (bo_wait_rendering_t)iris_bo_wait_rendering;
199 perf_cfg->vtbl.bo_busy = (bo_busy_t)iris_bo_busy;
200
201 gen_perf_init_metrics(perf_cfg, &screen->devinfo, screen->fd);
202 screen->monitor_cfg = monitor_cfg;
203
204 /* a gallium "group" is equivalent to a gen "query"
205 * a gallium "query" is equivalent to a gen "query_counter"
206 *
207 * Each gen_query supports a specific number of query_counters. To
208 * allocate the array of iris_monitor_counter, we need an upper bound
209 * (ignoring duplicate query_counters).
210 */
211 int gen_query_counters_count = 0;
212 for (int gen_query_id = 0;
213 gen_query_id < perf_cfg->n_queries;
214 ++gen_query_id) {
215 gen_query_counters_count += perf_cfg->queries[gen_query_id].n_counters;
216 }
217
218 monitor_cfg->counters = rzalloc_size(monitor_cfg,
219 sizeof(struct iris_monitor_counter) *
220 gen_query_counters_count);
221 if (unlikely(!monitor_cfg->counters))
222 goto allocation_error;
223
224 int iris_monitor_id = 0;
225 for (int group = 0; group < perf_cfg->n_queries; ++group) {
226 for (int counter = 0;
227 counter < perf_cfg->queries[group].n_counters;
228 ++counter) {
229 /* Check previously identified metrics to filter out duplicates. The
230 * user is not helped by having the same metric available in several
231 * groups. (n^2 algorithm).
232 */
233 bool duplicate = false;
234 for (int existing_group = 0;
235 existing_group < group && !duplicate;
236 ++existing_group) {
237 for (int existing_counter = 0;
238 existing_counter < perf_cfg->queries[existing_group].n_counters && !duplicate;
239 ++existing_counter) {
240 const char *current_name = perf_cfg->queries[group].counters[counter].name;
241 const char *existing_name =
242 perf_cfg->queries[existing_group].counters[existing_counter].name;
243 if (strcmp(current_name, existing_name) == 0) {
244 duplicate = true;
245 }
246 }
247 }
248 if (duplicate)
249 continue;
250 monitor_cfg->counters[iris_monitor_id].group = group;
251 monitor_cfg->counters[iris_monitor_id].counter = counter;
252 ++iris_monitor_id;
253 }
254 }
255 monitor_cfg->num_counters = iris_monitor_id;
256 return monitor_cfg->num_counters;
257
258 allocation_error:
259 if (monitor_cfg)
260 free(monitor_cfg->counters);
261 free(perf_cfg);
262 free(monitor_cfg);
263 return false;
264 }
265
266 int iris_get_monitor_group_info(struct pipe_screen *pscreen,
267 unsigned group_index,
268 struct pipe_driver_query_group_info *info)
269 {
270 struct iris_screen *screen = (struct iris_screen *)pscreen;
271 if (!screen->monitor_cfg) {
272 if (!iris_monitor_init_metrics(screen))
273 return 0;
274 }
275
276 const struct iris_monitor_config *monitor_cfg = screen->monitor_cfg;
277 const struct gen_perf_config *perf_cfg = monitor_cfg->perf_cfg;
278 if (!info)
279 /* return the count that can be queried */
280 return perf_cfg->n_queries;
281
282 if (group_index >= perf_cfg->n_queries)
283 /* out of range */
284 return 0;
285
286 struct gen_perf_query_info *query = &perf_cfg->queries[group_index];
287 info->name = query->name;
288 info->max_active_queries = query->n_counters;
289 info->num_queries = query->n_counters;
290 return 1;
291 }
292
293 static void
294 iris_init_monitor_ctx(struct iris_context *ice)
295 {
296 struct iris_screen *screen = (struct iris_screen *) ice->ctx.screen;
297 struct iris_monitor_config *monitor_cfg = screen->monitor_cfg;
298 ice->perf_ctx = gen_perf_new_context(ice);
299 if (unlikely(!ice->perf_ctx)) {
300 return;
301 }
302
303 struct gen_perf_context *perf_ctx = ice->perf_ctx;
304 struct gen_perf_config *perf_cfg = monitor_cfg->perf_cfg;
305 gen_perf_init_context(perf_ctx,
306 perf_cfg,
307 ice,
308 screen->bufmgr,
309 &screen->devinfo,
310 ice->batches[IRIS_BATCH_RENDER].hw_ctx_id,
311 screen->fd);
312 }
313
314 /* entry point for GenPerfMonitorsAMD */
315 struct iris_monitor_object *
316 iris_create_monitor_object(struct iris_context *ice,
317 unsigned num_queries,
318 unsigned *query_types)
319 {
320 struct iris_screen *screen = (struct iris_screen *) ice->ctx.screen;
321 struct iris_monitor_config *monitor_cfg = screen->monitor_cfg;
322 struct gen_perf_config *perf_cfg = monitor_cfg->perf_cfg;
323 struct gen_perf_query_object *query_obj = NULL;
324
325 /* initialize perf context if this has not already been done. This
326 * function is the first entry point that carries the gl context.
327 */
328 if (ice->perf_ctx == NULL) {
329 iris_init_monitor_ctx(ice);
330 }
331 struct gen_perf_context *perf_ctx = ice->perf_ctx;
332
333 assert(num_queries > 0);
334 int query_index = query_types[0] - PIPE_QUERY_DRIVER_SPECIFIC;
335 assert(query_index <= monitor_cfg->num_counters);
336 const int group = monitor_cfg->counters[query_index].group;
337
338 struct iris_monitor_object *monitor =
339 calloc(1, sizeof(struct iris_monitor_object));
340 if (unlikely(!monitor))
341 goto allocation_failure;
342
343 monitor->num_active_counters = num_queries;
344 monitor->active_counters = calloc(num_queries, sizeof(int));
345 if (unlikely(!monitor->active_counters))
346 goto allocation_failure;
347
348 for (int i = 0; i < num_queries; ++i) {
349 unsigned current_query = query_types[i];
350 unsigned current_query_index = current_query - PIPE_QUERY_DRIVER_SPECIFIC;
351
352 /* all queries must be in the same group */
353 assert(current_query_index <= monitor_cfg->num_counters);
354 assert(monitor_cfg->counters[current_query_index].group == group);
355 monitor->active_counters[i] =
356 monitor_cfg->counters[current_query_index].counter;
357 }
358
359 /* create the gen_perf_query */
360 query_obj = gen_perf_new_query(perf_ctx, group);
361 if (unlikely(!query_obj))
362 goto allocation_failure;
363
364 monitor->query = query_obj;
365 monitor->result_size = perf_cfg->queries[group].data_size;
366 monitor->result_buffer = calloc(1, monitor->result_size);
367 if (unlikely(!monitor->result_buffer))
368 goto allocation_failure;
369
370 return monitor;
371
372 allocation_failure:
373 if (monitor) {
374 free(monitor->active_counters);
375 free(monitor->result_buffer);
376 }
377 free(query_obj);
378 free(monitor);
379 return NULL;
380 }
381
382 void iris_destroy_monitor_object(struct pipe_context *ctx,
383 struct iris_monitor_object *monitor)
384 {
385 struct iris_context *ice = (struct iris_context *)ctx;
386 gen_perf_delete_query(ice->perf_ctx, monitor->query);
387 free(monitor->result_buffer);
388 monitor->result_buffer = NULL;
389 free(monitor->active_counters);
390 monitor->active_counters = NULL;
391 free(monitor);
392 }
393
394 bool
395 iris_begin_monitor(struct pipe_context *ctx,
396 struct iris_monitor_object *monitor)
397 {
398 struct iris_context *ice = (void *) ctx;
399 struct gen_perf_context *perf_ctx = ice->perf_ctx;
400
401 return gen_perf_begin_query(perf_ctx, monitor->query);
402 }
403
404 bool
405 iris_end_monitor(struct pipe_context *ctx,
406 struct iris_monitor_object *monitor)
407 {
408 struct iris_context *ice = (void *) ctx;
409 struct gen_perf_context *perf_ctx = ice->perf_ctx;
410
411 gen_perf_end_query(perf_ctx, monitor->query);
412 return true;
413 }
414
415 bool
416 iris_get_monitor_result(struct pipe_context *ctx,
417 struct iris_monitor_object *monitor,
418 bool wait,
419 union pipe_numeric_type_union *result)
420 {
421 struct iris_context *ice = (void *) ctx;
422 struct gen_perf_context *perf_ctx = ice->perf_ctx;
423
424 bool monitor_ready = gen_perf_is_query_ready(perf_ctx, monitor->query,
425 &ice->batches[IRIS_BATCH_RENDER]);
426
427 if (!monitor_ready) {
428 if (!wait)
429 return false;
430 gen_perf_wait_query(perf_ctx, monitor->query,
431 &ice->batches[IRIS_BATCH_RENDER]);
432 }
433
434 assert (gen_perf_is_query_ready(perf_ctx, monitor->query,
435 &ice->batches[IRIS_BATCH_RENDER]));
436
437 unsigned bytes_written;
438 gen_perf_get_query_data(perf_ctx, monitor->query,
439 monitor->result_size,
440 (unsigned*) monitor->result_buffer,
441 &bytes_written);
442 if (bytes_written != monitor->result_size)
443 return false;
444
445 /* copy metrics into the batch result */
446 for (int i = 0; i < monitor->num_active_counters; ++i) {
447 int current_counter = monitor->active_counters[i];
448 const struct gen_perf_query_info *info =
449 gen_perf_query_info(monitor->query);
450 const struct gen_perf_query_counter *counter =
451 &info->counters[current_counter];
452 assert(gen_perf_query_counter_get_size(counter));
453 switch (counter->data_type) {
454 case GEN_PERF_COUNTER_DATA_TYPE_UINT64:
455 result[i].u64 = *(uint64_t*)(monitor->result_buffer + counter->offset);
456 break;
457 case GEN_PERF_COUNTER_DATA_TYPE_FLOAT:
458 result[i].f = *(float*)(monitor->result_buffer + counter->offset);
459 break;
460 case GEN_PERF_COUNTER_DATA_TYPE_UINT32:
461 case GEN_PERF_COUNTER_DATA_TYPE_BOOL32:
462 result[i].u64 = *(uint32_t*)(monitor->result_buffer + counter->offset);
463 break;
464 case GEN_PERF_COUNTER_DATA_TYPE_DOUBLE: {
465 double v = *(double*)(monitor->result_buffer + counter->offset);
466 result[i].f = v;
467 }
468 default:
469 unreachable("unexpected counter data type");
470 }
471 }
472 return true;
473 }