2 * Copyright © 2019 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
23 #include "iris_monitor.h"
27 #include "iris_screen.h"
28 #include "iris_context.h"
30 #include "perf/gen_perf.h"
32 struct iris_monitor_object
{
33 int num_active_counters
;
37 unsigned char *result_buffer
;
39 struct gen_perf_query_object
*query
;
43 iris_get_monitor_info(struct pipe_screen
*pscreen
, unsigned index
,
44 struct pipe_driver_query_info
*info
)
46 const struct iris_screen
*screen
= (struct iris_screen
*)pscreen
;
47 assert(screen
->monitor_cfg
);
48 if (!screen
->monitor_cfg
)
51 const struct iris_monitor_config
*monitor_cfg
= screen
->monitor_cfg
;
54 /* return the number of metrics */
55 return monitor_cfg
->num_counters
;
58 const struct gen_perf_config
*perf_cfg
= monitor_cfg
->perf_cfg
;
59 const int group
= monitor_cfg
->counters
[index
].group
;
60 const int counter_index
= monitor_cfg
->counters
[index
].counter
;
61 struct gen_perf_query_counter
*counter
=
62 &perf_cfg
->queries
[group
].counters
[counter_index
];
64 info
->group_id
= group
;
65 info
->name
= counter
->name
;
66 info
->query_type
= PIPE_QUERY_DRIVER_SPECIFIC
+ index
;
68 if (counter
->type
== GEN_PERF_COUNTER_TYPE_THROUGHPUT
)
69 info
->result_type
= PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE
;
71 info
->result_type
= PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE
;
72 switch (counter
->data_type
) {
73 case GEN_PERF_COUNTER_DATA_TYPE_BOOL32
:
74 case GEN_PERF_COUNTER_DATA_TYPE_UINT32
:
75 info
->type
= PIPE_DRIVER_QUERY_TYPE_UINT
;
76 info
->max_value
.u32
= 0;
78 case GEN_PERF_COUNTER_DATA_TYPE_UINT64
:
79 info
->type
= PIPE_DRIVER_QUERY_TYPE_UINT64
;
80 info
->max_value
.u64
= 0;
82 case GEN_PERF_COUNTER_DATA_TYPE_FLOAT
:
83 case GEN_PERF_COUNTER_DATA_TYPE_DOUBLE
:
84 info
->type
= PIPE_DRIVER_QUERY_TYPE_FLOAT
;
85 info
->max_value
.u64
= -1;
92 /* indicates that this is an OA query, not a pipeline statistics query */
93 info
->flags
= PIPE_DRIVER_QUERY_FLAG_BATCH
;
97 typedef void (*bo_unreference_t
)(void *);
98 typedef void *(*bo_map_t
)(void *, void *, unsigned flags
);
99 typedef void (*bo_unmap_t
)(void *);
100 typedef void (*emit_mi_report_t
)(void *, void *, uint32_t, uint32_t);
101 typedef void (*emit_mi_flush_t
)(void *);
102 typedef void (*capture_frequency_stat_register_t
)(void *, void *,
104 typedef void (*store_register_mem64_t
)(void *ctx
, void *bo
,
105 uint32_t reg
, uint32_t offset
);
106 typedef bool (*batch_references_t
)(void *batch
, void *bo
);
107 typedef void (*bo_wait_rendering_t
)(void *bo
);
108 typedef int (*bo_busy_t
)(void *bo
);
111 iris_oa_bo_alloc(void *bufmgr
, const char *name
, uint64_t size
)
113 return iris_bo_alloc(bufmgr
, name
, size
, IRIS_MEMZONE_OTHER
);
117 iris_monitor_emit_mi_flush(struct iris_context
*ice
)
119 const int flags
= PIPE_CONTROL_RENDER_TARGET_FLUSH
|
120 PIPE_CONTROL_INSTRUCTION_INVALIDATE
|
121 PIPE_CONTROL_CONST_CACHE_INVALIDATE
|
122 PIPE_CONTROL_DATA_CACHE_FLUSH
|
123 PIPE_CONTROL_DEPTH_CACHE_FLUSH
|
124 PIPE_CONTROL_VF_CACHE_INVALIDATE
|
125 PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE
|
126 PIPE_CONTROL_CS_STALL
;
127 iris_emit_pipe_control_flush(&ice
->batches
[IRIS_BATCH_RENDER
],
128 "OA metrics", flags
);
132 iris_monitor_emit_mi_report_perf_count(void *c
,
134 uint32_t offset_in_bytes
,
137 struct iris_context
*ice
= c
;
138 struct iris_batch
*batch
= &ice
->batches
[IRIS_BATCH_RENDER
];
139 ice
->vtbl
.emit_mi_report_perf_count(batch
, bo
, offset_in_bytes
, report_id
);
143 iris_monitor_batchbuffer_flush(void *c
, const char *file
, int line
)
145 struct iris_context
*ice
= c
;
146 _iris_batch_flush(&ice
->batches
[IRIS_BATCH_RENDER
], __FILE__
, __LINE__
);
150 iris_monitor_capture_frequency_stat_register(void *ctx
,
154 struct iris_context
*ice
= ctx
;
155 struct iris_batch
*batch
= &ice
->batches
[IRIS_BATCH_RENDER
];
156 ice
->vtbl
.store_register_mem32(batch
, GEN9_RPSTAT0
, bo
, bo_offset
, false);
160 iris_monitor_store_register_mem64(void *ctx
, void *bo
,
161 uint32_t reg
, uint32_t offset
)
163 struct iris_context
*ice
= ctx
;
164 struct iris_batch
*batch
= &ice
->batches
[IRIS_BATCH_RENDER
];
165 ice
->vtbl
.store_register_mem64(batch
, reg
, bo
, offset
, false);
170 iris_monitor_init_metrics(struct iris_screen
*screen
)
172 struct iris_monitor_config
*monitor_cfg
=
173 rzalloc(screen
, struct iris_monitor_config
);
174 struct gen_perf_config
*perf_cfg
= NULL
;
175 if (unlikely(!monitor_cfg
))
176 goto allocation_error
;
177 perf_cfg
= gen_perf_new(monitor_cfg
);
178 if (unlikely(!perf_cfg
))
179 goto allocation_error
;
181 monitor_cfg
->perf_cfg
= perf_cfg
;
183 perf_cfg
->vtbl
.bo_alloc
= iris_oa_bo_alloc
;
184 perf_cfg
->vtbl
.bo_unreference
= (bo_unreference_t
)iris_bo_unreference
;
185 perf_cfg
->vtbl
.bo_map
= (bo_map_t
)iris_bo_map
;
186 perf_cfg
->vtbl
.bo_unmap
= (bo_unmap_t
)iris_bo_unmap
;
187 perf_cfg
->vtbl
.emit_mi_flush
= (emit_mi_flush_t
)iris_monitor_emit_mi_flush
;
189 perf_cfg
->vtbl
.emit_mi_report_perf_count
=
190 (emit_mi_report_t
)iris_monitor_emit_mi_report_perf_count
;
191 perf_cfg
->vtbl
.batchbuffer_flush
= iris_monitor_batchbuffer_flush
;
192 perf_cfg
->vtbl
.capture_frequency_stat_register
=
193 (capture_frequency_stat_register_t
) iris_monitor_capture_frequency_stat_register
;
194 perf_cfg
->vtbl
.store_register_mem64
=
195 (store_register_mem64_t
) iris_monitor_store_register_mem64
;
196 perf_cfg
->vtbl
.batch_references
= (batch_references_t
)iris_batch_references
;
197 perf_cfg
->vtbl
.bo_wait_rendering
=
198 (bo_wait_rendering_t
)iris_bo_wait_rendering
;
199 perf_cfg
->vtbl
.bo_busy
= (bo_busy_t
)iris_bo_busy
;
201 gen_perf_init_metrics(perf_cfg
, &screen
->devinfo
, screen
->fd
);
202 screen
->monitor_cfg
= monitor_cfg
;
204 /* a gallium "group" is equivalent to a gen "query"
205 * a gallium "query" is equivalent to a gen "query_counter"
207 * Each gen_query supports a specific number of query_counters. To
208 * allocate the array of iris_monitor_counter, we need an upper bound
209 * (ignoring duplicate query_counters).
211 int gen_query_counters_count
= 0;
212 for (int gen_query_id
= 0;
213 gen_query_id
< perf_cfg
->n_queries
;
215 gen_query_counters_count
+= perf_cfg
->queries
[gen_query_id
].n_counters
;
218 monitor_cfg
->counters
= rzalloc_size(monitor_cfg
,
219 sizeof(struct iris_monitor_counter
) *
220 gen_query_counters_count
);
221 if (unlikely(!monitor_cfg
->counters
))
222 goto allocation_error
;
224 int iris_monitor_id
= 0;
225 for (int group
= 0; group
< perf_cfg
->n_queries
; ++group
) {
226 for (int counter
= 0;
227 counter
< perf_cfg
->queries
[group
].n_counters
;
229 /* Check previously identified metrics to filter out duplicates. The
230 * user is not helped by having the same metric available in several
231 * groups. (n^2 algorithm).
233 bool duplicate
= false;
234 for (int existing_group
= 0;
235 existing_group
< group
&& !duplicate
;
237 for (int existing_counter
= 0;
238 existing_counter
< perf_cfg
->queries
[existing_group
].n_counters
&& !duplicate
;
239 ++existing_counter
) {
240 const char *current_name
=
241 perf_cfg
->queries
[group
].counters
[counter
].name
;
242 const char *existing_name
=
243 perf_cfg
->queries
[existing_group
].counters
[existing_counter
].name
;
244 if (strcmp(current_name
, existing_name
) == 0) {
251 monitor_cfg
->counters
[iris_monitor_id
].group
= group
;
252 monitor_cfg
->counters
[iris_monitor_id
].counter
= counter
;
256 monitor_cfg
->num_counters
= iris_monitor_id
;
257 return monitor_cfg
->num_counters
;
261 free(monitor_cfg
->counters
);
268 iris_get_monitor_group_info(struct pipe_screen
*pscreen
,
269 unsigned group_index
,
270 struct pipe_driver_query_group_info
*info
)
272 struct iris_screen
*screen
= (struct iris_screen
*)pscreen
;
273 if (!screen
->monitor_cfg
) {
274 if (!iris_monitor_init_metrics(screen
))
278 const struct iris_monitor_config
*monitor_cfg
= screen
->monitor_cfg
;
279 const struct gen_perf_config
*perf_cfg
= monitor_cfg
->perf_cfg
;
282 /* return the count that can be queried */
283 return perf_cfg
->n_queries
;
286 if (group_index
>= perf_cfg
->n_queries
) {
291 struct gen_perf_query_info
*query
= &perf_cfg
->queries
[group_index
];
293 info
->name
= query
->name
;
294 info
->max_active_queries
= query
->n_counters
;
295 info
->num_queries
= query
->n_counters
;
301 iris_init_monitor_ctx(struct iris_context
*ice
)
303 struct iris_screen
*screen
= (struct iris_screen
*) ice
->ctx
.screen
;
304 struct iris_monitor_config
*monitor_cfg
= screen
->monitor_cfg
;
306 ice
->perf_ctx
= gen_perf_new_context(ice
);
307 if (unlikely(!ice
->perf_ctx
))
310 struct gen_perf_context
*perf_ctx
= ice
->perf_ctx
;
311 struct gen_perf_config
*perf_cfg
= monitor_cfg
->perf_cfg
;
312 gen_perf_init_context(perf_ctx
,
317 ice
->batches
[IRIS_BATCH_RENDER
].hw_ctx_id
,
321 /* entry point for GenPerfMonitorsAMD */
322 struct iris_monitor_object
*
323 iris_create_monitor_object(struct iris_context
*ice
,
324 unsigned num_queries
,
325 unsigned *query_types
)
327 struct iris_screen
*screen
= (struct iris_screen
*) ice
->ctx
.screen
;
328 struct iris_monitor_config
*monitor_cfg
= screen
->monitor_cfg
;
329 struct gen_perf_config
*perf_cfg
= monitor_cfg
->perf_cfg
;
330 struct gen_perf_query_object
*query_obj
= NULL
;
332 /* initialize perf context if this has not already been done. This
333 * function is the first entry point that carries the gl context.
335 if (ice
->perf_ctx
== NULL
) {
336 iris_init_monitor_ctx(ice
);
338 struct gen_perf_context
*perf_ctx
= ice
->perf_ctx
;
340 assert(num_queries
> 0);
341 int query_index
= query_types
[0] - PIPE_QUERY_DRIVER_SPECIFIC
;
342 assert(query_index
<= monitor_cfg
->num_counters
);
343 const int group
= monitor_cfg
->counters
[query_index
].group
;
345 struct iris_monitor_object
*monitor
=
346 calloc(1, sizeof(struct iris_monitor_object
));
347 if (unlikely(!monitor
))
348 goto allocation_failure
;
350 monitor
->num_active_counters
= num_queries
;
351 monitor
->active_counters
= calloc(num_queries
, sizeof(int));
352 if (unlikely(!monitor
->active_counters
))
353 goto allocation_failure
;
355 for (int i
= 0; i
< num_queries
; ++i
) {
356 unsigned current_query
= query_types
[i
];
357 unsigned current_query_index
= current_query
- PIPE_QUERY_DRIVER_SPECIFIC
;
359 /* all queries must be in the same group */
360 assert(current_query_index
<= monitor_cfg
->num_counters
);
361 assert(monitor_cfg
->counters
[current_query_index
].group
== group
);
362 monitor
->active_counters
[i
] =
363 monitor_cfg
->counters
[current_query_index
].counter
;
366 /* create the gen_perf_query */
367 query_obj
= gen_perf_new_query(perf_ctx
, group
);
368 if (unlikely(!query_obj
))
369 goto allocation_failure
;
371 monitor
->query
= query_obj
;
372 monitor
->result_size
= perf_cfg
->queries
[group
].data_size
;
373 monitor
->result_buffer
= calloc(1, monitor
->result_size
);
374 if (unlikely(!monitor
->result_buffer
))
375 goto allocation_failure
;
381 free(monitor
->active_counters
);
382 free(monitor
->result_buffer
);
390 iris_destroy_monitor_object(struct pipe_context
*ctx
,
391 struct iris_monitor_object
*monitor
)
393 struct iris_context
*ice
= (struct iris_context
*)ctx
;
395 gen_perf_delete_query(ice
->perf_ctx
, monitor
->query
);
396 free(monitor
->result_buffer
);
397 monitor
->result_buffer
= NULL
;
398 free(monitor
->active_counters
);
399 monitor
->active_counters
= NULL
;
404 iris_begin_monitor(struct pipe_context
*ctx
,
405 struct iris_monitor_object
*monitor
)
407 struct iris_context
*ice
= (void *) ctx
;
408 struct gen_perf_context
*perf_ctx
= ice
->perf_ctx
;
410 return gen_perf_begin_query(perf_ctx
, monitor
->query
);
414 iris_end_monitor(struct pipe_context
*ctx
,
415 struct iris_monitor_object
*monitor
)
417 struct iris_context
*ice
= (void *) ctx
;
418 struct gen_perf_context
*perf_ctx
= ice
->perf_ctx
;
420 gen_perf_end_query(perf_ctx
, monitor
->query
);
425 iris_get_monitor_result(struct pipe_context
*ctx
,
426 struct iris_monitor_object
*monitor
,
428 union pipe_numeric_type_union
*result
)
430 struct iris_context
*ice
= (void *) ctx
;
431 struct gen_perf_context
*perf_ctx
= ice
->perf_ctx
;
432 struct iris_batch
*batch
= &ice
->batches
[IRIS_BATCH_RENDER
];
435 gen_perf_is_query_ready(perf_ctx
, monitor
->query
, batch
);
437 if (!monitor_ready
) {
440 gen_perf_wait_query(perf_ctx
, monitor
->query
, batch
);
443 assert(gen_perf_is_query_ready(perf_ctx
, monitor
->query
, batch
));
445 unsigned bytes_written
;
446 gen_perf_get_query_data(perf_ctx
, monitor
->query
,
447 monitor
->result_size
,
448 (unsigned*) monitor
->result_buffer
,
450 if (bytes_written
!= monitor
->result_size
)
453 /* copy metrics into the batch result */
454 for (int i
= 0; i
< monitor
->num_active_counters
; ++i
) {
455 int current_counter
= monitor
->active_counters
[i
];
456 const struct gen_perf_query_info
*info
=
457 gen_perf_query_info(monitor
->query
);
458 const struct gen_perf_query_counter
*counter
=
459 &info
->counters
[current_counter
];
460 assert(gen_perf_query_counter_get_size(counter
));
461 switch (counter
->data_type
) {
462 case GEN_PERF_COUNTER_DATA_TYPE_UINT64
:
463 result
[i
].u64
= *(uint64_t*)(monitor
->result_buffer
+ counter
->offset
);
465 case GEN_PERF_COUNTER_DATA_TYPE_FLOAT
:
466 result
[i
].f
= *(float*)(monitor
->result_buffer
+ counter
->offset
);
468 case GEN_PERF_COUNTER_DATA_TYPE_UINT32
:
469 case GEN_PERF_COUNTER_DATA_TYPE_BOOL32
:
470 result
[i
].u64
= *(uint32_t*)(monitor
->result_buffer
+ counter
->offset
);
472 case GEN_PERF_COUNTER_DATA_TYPE_DOUBLE
: {
473 double v
= *(double*)(monitor
->result_buffer
+ counter
->offset
);
478 unreachable("unexpected counter data type");