iris: Track per-stage bind history, reduce work accordingly
[mesa.git] / src / gallium / drivers / iris / iris_monitor.c
1 /*
2 * Copyright © 2019 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23 #include "iris_monitor.h"
24
25 #include <xf86drm.h>
26
27 #include "iris_screen.h"
28 #include "iris_context.h"
29
30 #include "perf/gen_perf.h"
31
32 struct iris_monitor_object {
33 int num_active_counters;
34 int *active_counters;
35
36 size_t result_size;
37 unsigned char *result_buffer;
38
39 struct gen_perf_query_object *query;
40 };
41
42 int
43 iris_get_monitor_info(struct pipe_screen *pscreen, unsigned index,
44 struct pipe_driver_query_info *info)
45 {
46 const struct iris_screen *screen = (struct iris_screen *)pscreen;
47 assert(screen->monitor_cfg);
48 if (!screen->monitor_cfg)
49 return 0;
50
51 const struct iris_monitor_config *monitor_cfg = screen->monitor_cfg;
52
53 if (!info) {
54 /* return the number of metrics */
55 return monitor_cfg->num_counters;
56 }
57
58 const struct gen_perf_config *perf_cfg = monitor_cfg->perf_cfg;
59 const int group = monitor_cfg->counters[index].group;
60 const int counter_index = monitor_cfg->counters[index].counter;
61 struct gen_perf_query_counter *counter =
62 &perf_cfg->queries[group].counters[counter_index];
63
64 info->group_id = group;
65 info->name = counter->name;
66 info->query_type = PIPE_QUERY_DRIVER_SPECIFIC + index;
67
68 if (counter->type == GEN_PERF_COUNTER_TYPE_THROUGHPUT)
69 info->result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE;
70 else
71 info->result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE;
72 switch (counter->data_type) {
73 case GEN_PERF_COUNTER_DATA_TYPE_BOOL32:
74 case GEN_PERF_COUNTER_DATA_TYPE_UINT32:
75 info->type = PIPE_DRIVER_QUERY_TYPE_UINT;
76 info->max_value.u32 = 0;
77 break;
78 case GEN_PERF_COUNTER_DATA_TYPE_UINT64:
79 info->type = PIPE_DRIVER_QUERY_TYPE_UINT64;
80 info->max_value.u64 = 0;
81 break;
82 case GEN_PERF_COUNTER_DATA_TYPE_FLOAT:
83 case GEN_PERF_COUNTER_DATA_TYPE_DOUBLE:
84 info->type = PIPE_DRIVER_QUERY_TYPE_FLOAT;
85 info->max_value.u64 = -1;
86 break;
87 default:
88 assert(false);
89 break;
90 }
91
92 /* indicates that this is an OA query, not a pipeline statistics query */
93 info->flags = PIPE_DRIVER_QUERY_FLAG_BATCH;
94 return 1;
95 }
96
97 typedef void (*bo_unreference_t)(void *);
98 typedef void *(*bo_map_t)(void *, void *, unsigned flags);
99 typedef void (*bo_unmap_t)(void *);
100 typedef void (*emit_mi_report_t)(void *, void *, uint32_t, uint32_t);
101 typedef void (*emit_mi_flush_t)(void *);
102 typedef void (*capture_frequency_stat_register_t)(void *, void *,
103 uint32_t );
104 typedef void (*store_register_mem64_t)(void *ctx, void *bo,
105 uint32_t reg, uint32_t offset);
106 typedef bool (*batch_references_t)(void *batch, void *bo);
107 typedef void (*bo_wait_rendering_t)(void *bo);
108 typedef int (*bo_busy_t)(void *bo);
109
110 static void *
111 iris_oa_bo_alloc(void *bufmgr, const char *name, uint64_t size)
112 {
113 return iris_bo_alloc(bufmgr, name, size, IRIS_MEMZONE_OTHER);
114 }
115
116 static void
117 iris_monitor_emit_mi_flush(struct iris_context *ice)
118 {
119 const int flags = PIPE_CONTROL_RENDER_TARGET_FLUSH |
120 PIPE_CONTROL_INSTRUCTION_INVALIDATE |
121 PIPE_CONTROL_CONST_CACHE_INVALIDATE |
122 PIPE_CONTROL_DATA_CACHE_FLUSH |
123 PIPE_CONTROL_DEPTH_CACHE_FLUSH |
124 PIPE_CONTROL_VF_CACHE_INVALIDATE |
125 PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
126 PIPE_CONTROL_CS_STALL;
127 iris_emit_pipe_control_flush(&ice->batches[IRIS_BATCH_RENDER],
128 "OA metrics", flags);
129 }
130
131 static void
132 iris_monitor_emit_mi_report_perf_count(void *c,
133 void *bo,
134 uint32_t offset_in_bytes,
135 uint32_t report_id)
136 {
137 struct iris_context *ice = c;
138 struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
139 ice->vtbl.emit_mi_report_perf_count(batch, bo, offset_in_bytes, report_id);
140 }
141
142 static void
143 iris_monitor_batchbuffer_flush(void *c, const char *file, int line)
144 {
145 struct iris_context *ice = c;
146 _iris_batch_flush(&ice->batches[IRIS_BATCH_RENDER], __FILE__, __LINE__);
147 }
148
149 static void
150 iris_monitor_capture_frequency_stat_register(void *ctx,
151 void *bo,
152 uint32_t bo_offset)
153 {
154 struct iris_context *ice = ctx;
155 struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
156 ice->vtbl.store_register_mem32(batch, GEN9_RPSTAT0, bo, bo_offset, false);
157 }
158
159 static void
160 iris_monitor_store_register_mem64(void *ctx, void *bo,
161 uint32_t reg, uint32_t offset)
162 {
163 struct iris_context *ice = ctx;
164 struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
165 ice->vtbl.store_register_mem64(batch, reg, bo, offset, false);
166 }
167
168
169 static bool
170 iris_monitor_init_metrics(struct iris_screen *screen)
171 {
172 struct iris_monitor_config *monitor_cfg =
173 rzalloc(screen, struct iris_monitor_config);
174 struct gen_perf_config *perf_cfg = NULL;
175 if (unlikely(!monitor_cfg))
176 goto allocation_error;
177 perf_cfg = gen_perf_new(monitor_cfg);
178 if (unlikely(!perf_cfg))
179 goto allocation_error;
180
181 monitor_cfg->perf_cfg = perf_cfg;
182
183 perf_cfg->vtbl.bo_alloc = iris_oa_bo_alloc;
184 perf_cfg->vtbl.bo_unreference = (bo_unreference_t)iris_bo_unreference;
185 perf_cfg->vtbl.bo_map = (bo_map_t)iris_bo_map;
186 perf_cfg->vtbl.bo_unmap = (bo_unmap_t)iris_bo_unmap;
187 perf_cfg->vtbl.emit_mi_flush = (emit_mi_flush_t)iris_monitor_emit_mi_flush;
188
189 perf_cfg->vtbl.emit_mi_report_perf_count =
190 (emit_mi_report_t)iris_monitor_emit_mi_report_perf_count;
191 perf_cfg->vtbl.batchbuffer_flush = iris_monitor_batchbuffer_flush;
192 perf_cfg->vtbl.capture_frequency_stat_register =
193 (capture_frequency_stat_register_t) iris_monitor_capture_frequency_stat_register;
194 perf_cfg->vtbl.store_register_mem64 =
195 (store_register_mem64_t) iris_monitor_store_register_mem64;
196 perf_cfg->vtbl.batch_references = (batch_references_t)iris_batch_references;
197 perf_cfg->vtbl.bo_wait_rendering =
198 (bo_wait_rendering_t)iris_bo_wait_rendering;
199 perf_cfg->vtbl.bo_busy = (bo_busy_t)iris_bo_busy;
200
201 gen_perf_init_metrics(perf_cfg, &screen->devinfo, screen->fd);
202 screen->monitor_cfg = monitor_cfg;
203
204 /* a gallium "group" is equivalent to a gen "query"
205 * a gallium "query" is equivalent to a gen "query_counter"
206 *
207 * Each gen_query supports a specific number of query_counters. To
208 * allocate the array of iris_monitor_counter, we need an upper bound
209 * (ignoring duplicate query_counters).
210 */
211 int gen_query_counters_count = 0;
212 for (int gen_query_id = 0;
213 gen_query_id < perf_cfg->n_queries;
214 ++gen_query_id) {
215 gen_query_counters_count += perf_cfg->queries[gen_query_id].n_counters;
216 }
217
218 monitor_cfg->counters = rzalloc_size(monitor_cfg,
219 sizeof(struct iris_monitor_counter) *
220 gen_query_counters_count);
221 if (unlikely(!monitor_cfg->counters))
222 goto allocation_error;
223
224 int iris_monitor_id = 0;
225 for (int group = 0; group < perf_cfg->n_queries; ++group) {
226 for (int counter = 0;
227 counter < perf_cfg->queries[group].n_counters;
228 ++counter) {
229 /* Check previously identified metrics to filter out duplicates. The
230 * user is not helped by having the same metric available in several
231 * groups. (n^2 algorithm).
232 */
233 bool duplicate = false;
234 for (int existing_group = 0;
235 existing_group < group && !duplicate;
236 ++existing_group) {
237 for (int existing_counter = 0;
238 existing_counter < perf_cfg->queries[existing_group].n_counters && !duplicate;
239 ++existing_counter) {
240 const char *current_name =
241 perf_cfg->queries[group].counters[counter].name;
242 const char *existing_name =
243 perf_cfg->queries[existing_group].counters[existing_counter].name;
244 if (strcmp(current_name, existing_name) == 0) {
245 duplicate = true;
246 }
247 }
248 }
249 if (duplicate)
250 continue;
251 monitor_cfg->counters[iris_monitor_id].group = group;
252 monitor_cfg->counters[iris_monitor_id].counter = counter;
253 ++iris_monitor_id;
254 }
255 }
256 monitor_cfg->num_counters = iris_monitor_id;
257 return monitor_cfg->num_counters;
258
259 allocation_error:
260 if (monitor_cfg)
261 free(monitor_cfg->counters);
262 free(perf_cfg);
263 free(monitor_cfg);
264 return false;
265 }
266
267 int
268 iris_get_monitor_group_info(struct pipe_screen *pscreen,
269 unsigned group_index,
270 struct pipe_driver_query_group_info *info)
271 {
272 struct iris_screen *screen = (struct iris_screen *)pscreen;
273 if (!screen->monitor_cfg) {
274 if (!iris_monitor_init_metrics(screen))
275 return 0;
276 }
277
278 const struct iris_monitor_config *monitor_cfg = screen->monitor_cfg;
279 const struct gen_perf_config *perf_cfg = monitor_cfg->perf_cfg;
280
281 if (!info) {
282 /* return the count that can be queried */
283 return perf_cfg->n_queries;
284 }
285
286 if (group_index >= perf_cfg->n_queries) {
287 /* out of range */
288 return 0;
289 }
290
291 struct gen_perf_query_info *query = &perf_cfg->queries[group_index];
292
293 info->name = query->name;
294 info->max_active_queries = query->n_counters;
295 info->num_queries = query->n_counters;
296
297 return 1;
298 }
299
300 static void
301 iris_init_monitor_ctx(struct iris_context *ice)
302 {
303 struct iris_screen *screen = (struct iris_screen *) ice->ctx.screen;
304 struct iris_monitor_config *monitor_cfg = screen->monitor_cfg;
305
306 ice->perf_ctx = gen_perf_new_context(ice);
307 if (unlikely(!ice->perf_ctx))
308 return;
309
310 struct gen_perf_context *perf_ctx = ice->perf_ctx;
311 struct gen_perf_config *perf_cfg = monitor_cfg->perf_cfg;
312 gen_perf_init_context(perf_ctx,
313 perf_cfg,
314 ice,
315 screen->bufmgr,
316 &screen->devinfo,
317 ice->batches[IRIS_BATCH_RENDER].hw_ctx_id,
318 screen->fd);
319 }
320
321 /* entry point for GenPerfMonitorsAMD */
322 struct iris_monitor_object *
323 iris_create_monitor_object(struct iris_context *ice,
324 unsigned num_queries,
325 unsigned *query_types)
326 {
327 struct iris_screen *screen = (struct iris_screen *) ice->ctx.screen;
328 struct iris_monitor_config *monitor_cfg = screen->monitor_cfg;
329 struct gen_perf_config *perf_cfg = monitor_cfg->perf_cfg;
330 struct gen_perf_query_object *query_obj = NULL;
331
332 /* initialize perf context if this has not already been done. This
333 * function is the first entry point that carries the gl context.
334 */
335 if (ice->perf_ctx == NULL) {
336 iris_init_monitor_ctx(ice);
337 }
338 struct gen_perf_context *perf_ctx = ice->perf_ctx;
339
340 assert(num_queries > 0);
341 int query_index = query_types[0] - PIPE_QUERY_DRIVER_SPECIFIC;
342 assert(query_index <= monitor_cfg->num_counters);
343 const int group = monitor_cfg->counters[query_index].group;
344
345 struct iris_monitor_object *monitor =
346 calloc(1, sizeof(struct iris_monitor_object));
347 if (unlikely(!monitor))
348 goto allocation_failure;
349
350 monitor->num_active_counters = num_queries;
351 monitor->active_counters = calloc(num_queries, sizeof(int));
352 if (unlikely(!monitor->active_counters))
353 goto allocation_failure;
354
355 for (int i = 0; i < num_queries; ++i) {
356 unsigned current_query = query_types[i];
357 unsigned current_query_index = current_query - PIPE_QUERY_DRIVER_SPECIFIC;
358
359 /* all queries must be in the same group */
360 assert(current_query_index <= monitor_cfg->num_counters);
361 assert(monitor_cfg->counters[current_query_index].group == group);
362 monitor->active_counters[i] =
363 monitor_cfg->counters[current_query_index].counter;
364 }
365
366 /* create the gen_perf_query */
367 query_obj = gen_perf_new_query(perf_ctx, group);
368 if (unlikely(!query_obj))
369 goto allocation_failure;
370
371 monitor->query = query_obj;
372 monitor->result_size = perf_cfg->queries[group].data_size;
373 monitor->result_buffer = calloc(1, monitor->result_size);
374 if (unlikely(!monitor->result_buffer))
375 goto allocation_failure;
376
377 return monitor;
378
379 allocation_failure:
380 if (monitor) {
381 free(monitor->active_counters);
382 free(monitor->result_buffer);
383 }
384 free(query_obj);
385 free(monitor);
386 return NULL;
387 }
388
389 void
390 iris_destroy_monitor_object(struct pipe_context *ctx,
391 struct iris_monitor_object *monitor)
392 {
393 struct iris_context *ice = (struct iris_context *)ctx;
394
395 gen_perf_delete_query(ice->perf_ctx, monitor->query);
396 free(monitor->result_buffer);
397 monitor->result_buffer = NULL;
398 free(monitor->active_counters);
399 monitor->active_counters = NULL;
400 free(monitor);
401 }
402
403 bool
404 iris_begin_monitor(struct pipe_context *ctx,
405 struct iris_monitor_object *monitor)
406 {
407 struct iris_context *ice = (void *) ctx;
408 struct gen_perf_context *perf_ctx = ice->perf_ctx;
409
410 return gen_perf_begin_query(perf_ctx, monitor->query);
411 }
412
413 bool
414 iris_end_monitor(struct pipe_context *ctx,
415 struct iris_monitor_object *monitor)
416 {
417 struct iris_context *ice = (void *) ctx;
418 struct gen_perf_context *perf_ctx = ice->perf_ctx;
419
420 gen_perf_end_query(perf_ctx, monitor->query);
421 return true;
422 }
423
424 bool
425 iris_get_monitor_result(struct pipe_context *ctx,
426 struct iris_monitor_object *monitor,
427 bool wait,
428 union pipe_numeric_type_union *result)
429 {
430 struct iris_context *ice = (void *) ctx;
431 struct gen_perf_context *perf_ctx = ice->perf_ctx;
432 struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
433
434 bool monitor_ready =
435 gen_perf_is_query_ready(perf_ctx, monitor->query, batch);
436
437 if (!monitor_ready) {
438 if (!wait)
439 return false;
440 gen_perf_wait_query(perf_ctx, monitor->query, batch);
441 }
442
443 assert(gen_perf_is_query_ready(perf_ctx, monitor->query, batch));
444
445 unsigned bytes_written;
446 gen_perf_get_query_data(perf_ctx, monitor->query,
447 monitor->result_size,
448 (unsigned*) monitor->result_buffer,
449 &bytes_written);
450 if (bytes_written != monitor->result_size)
451 return false;
452
453 /* copy metrics into the batch result */
454 for (int i = 0; i < monitor->num_active_counters; ++i) {
455 int current_counter = monitor->active_counters[i];
456 const struct gen_perf_query_info *info =
457 gen_perf_query_info(monitor->query);
458 const struct gen_perf_query_counter *counter =
459 &info->counters[current_counter];
460 assert(gen_perf_query_counter_get_size(counter));
461 switch (counter->data_type) {
462 case GEN_PERF_COUNTER_DATA_TYPE_UINT64:
463 result[i].u64 = *(uint64_t*)(monitor->result_buffer + counter->offset);
464 break;
465 case GEN_PERF_COUNTER_DATA_TYPE_FLOAT:
466 result[i].f = *(float*)(monitor->result_buffer + counter->offset);
467 break;
468 case GEN_PERF_COUNTER_DATA_TYPE_UINT32:
469 case GEN_PERF_COUNTER_DATA_TYPE_BOOL32:
470 result[i].u64 = *(uint32_t*)(monitor->result_buffer + counter->offset);
471 break;
472 case GEN_PERF_COUNTER_DATA_TYPE_DOUBLE: {
473 double v = *(double*)(monitor->result_buffer + counter->offset);
474 result[i].f = v;
475 break;
476 }
477 default:
478 unreachable("unexpected counter data type");
479 }
480 }
481 return true;
482 }