2 * Copyright © 2019 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
23 #include "iris_monitor.h"
27 #include "iris_screen.h"
28 #include "iris_context.h"
30 #include "perf/gen_perf.h"
31 #include "perf/gen_perf_regs.h"
33 struct iris_monitor_object
{
34 int num_active_counters
;
38 unsigned char *result_buffer
;
40 struct gen_perf_query_object
*query
;
44 iris_get_monitor_info(struct pipe_screen
*pscreen
, unsigned index
,
45 struct pipe_driver_query_info
*info
)
47 const struct iris_screen
*screen
= (struct iris_screen
*)pscreen
;
48 assert(screen
->monitor_cfg
);
49 if (!screen
->monitor_cfg
)
52 const struct iris_monitor_config
*monitor_cfg
= screen
->monitor_cfg
;
55 /* return the number of metrics */
56 return monitor_cfg
->num_counters
;
59 const struct gen_perf_config
*perf_cfg
= monitor_cfg
->perf_cfg
;
60 const int group
= monitor_cfg
->counters
[index
].group
;
61 const int counter_index
= monitor_cfg
->counters
[index
].counter
;
62 struct gen_perf_query_counter
*counter
=
63 &perf_cfg
->queries
[group
].counters
[counter_index
];
65 info
->group_id
= group
;
66 info
->name
= counter
->name
;
67 info
->query_type
= PIPE_QUERY_DRIVER_SPECIFIC
+ index
;
69 if (counter
->type
== GEN_PERF_COUNTER_TYPE_THROUGHPUT
)
70 info
->result_type
= PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE
;
72 info
->result_type
= PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE
;
73 switch (counter
->data_type
) {
74 case GEN_PERF_COUNTER_DATA_TYPE_BOOL32
:
75 case GEN_PERF_COUNTER_DATA_TYPE_UINT32
:
76 info
->type
= PIPE_DRIVER_QUERY_TYPE_UINT
;
77 info
->max_value
.u32
= 0;
79 case GEN_PERF_COUNTER_DATA_TYPE_UINT64
:
80 info
->type
= PIPE_DRIVER_QUERY_TYPE_UINT64
;
81 info
->max_value
.u64
= 0;
83 case GEN_PERF_COUNTER_DATA_TYPE_FLOAT
:
84 case GEN_PERF_COUNTER_DATA_TYPE_DOUBLE
:
85 info
->type
= PIPE_DRIVER_QUERY_TYPE_FLOAT
;
86 info
->max_value
.u64
= -1;
93 /* indicates that this is an OA query, not a pipeline statistics query */
94 info
->flags
= PIPE_DRIVER_QUERY_FLAG_BATCH
;
98 typedef void (*bo_unreference_t
)(void *);
99 typedef void *(*bo_map_t
)(void *, void *, unsigned flags
);
100 typedef void (*bo_unmap_t
)(void *);
101 typedef void (*emit_mi_report_t
)(void *, void *, uint32_t, uint32_t);
102 typedef void (*emit_mi_flush_t
)(void *);
103 typedef void (*capture_frequency_stat_register_t
)(void *, void *,
105 typedef void (*store_register_mem64_t
)(void *ctx
, void *bo
,
106 uint32_t reg
, uint32_t offset
);
107 typedef bool (*batch_references_t
)(void *batch
, void *bo
);
108 typedef void (*bo_wait_rendering_t
)(void *bo
);
109 typedef int (*bo_busy_t
)(void *bo
);
112 iris_oa_bo_alloc(void *bufmgr
, const char *name
, uint64_t size
)
114 return iris_bo_alloc(bufmgr
, name
, size
, IRIS_MEMZONE_OTHER
);
118 iris_monitor_emit_mi_flush(struct iris_context
*ice
)
120 const int flags
= PIPE_CONTROL_RENDER_TARGET_FLUSH
|
121 PIPE_CONTROL_INSTRUCTION_INVALIDATE
|
122 PIPE_CONTROL_CONST_CACHE_INVALIDATE
|
123 PIPE_CONTROL_DATA_CACHE_FLUSH
|
124 PIPE_CONTROL_DEPTH_CACHE_FLUSH
|
125 PIPE_CONTROL_VF_CACHE_INVALIDATE
|
126 PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE
|
127 PIPE_CONTROL_CS_STALL
;
128 iris_emit_pipe_control_flush(&ice
->batches
[IRIS_BATCH_RENDER
],
129 "OA metrics", flags
);
133 iris_monitor_emit_mi_report_perf_count(void *c
,
135 uint32_t offset_in_bytes
,
138 struct iris_context
*ice
= c
;
139 struct iris_batch
*batch
= &ice
->batches
[IRIS_BATCH_RENDER
];
140 ice
->vtbl
.emit_mi_report_perf_count(batch
, bo
, offset_in_bytes
, report_id
);
144 iris_monitor_batchbuffer_flush(void *c
, const char *file
, int line
)
146 struct iris_context
*ice
= c
;
147 _iris_batch_flush(&ice
->batches
[IRIS_BATCH_RENDER
], __FILE__
, __LINE__
);
151 iris_monitor_capture_frequency_stat_register(void *ctx
,
155 struct iris_context
*ice
= ctx
;
156 struct iris_batch
*batch
= &ice
->batches
[IRIS_BATCH_RENDER
];
157 ice
->vtbl
.store_register_mem32(batch
, GEN9_RPSTAT0
, bo
, bo_offset
, false);
161 iris_monitor_store_register_mem64(void *ctx
, void *bo
,
162 uint32_t reg
, uint32_t offset
)
164 struct iris_context
*ice
= ctx
;
165 struct iris_batch
*batch
= &ice
->batches
[IRIS_BATCH_RENDER
];
166 ice
->vtbl
.store_register_mem64(batch
, reg
, bo
, offset
, false);
171 iris_monitor_init_metrics(struct iris_screen
*screen
)
173 struct iris_monitor_config
*monitor_cfg
=
174 rzalloc(screen
, struct iris_monitor_config
);
175 struct gen_perf_config
*perf_cfg
= NULL
;
176 if (unlikely(!monitor_cfg
))
177 goto allocation_error
;
178 perf_cfg
= gen_perf_new(monitor_cfg
);
179 if (unlikely(!perf_cfg
))
180 goto allocation_error
;
182 monitor_cfg
->perf_cfg
= perf_cfg
;
184 perf_cfg
->vtbl
.bo_alloc
= iris_oa_bo_alloc
;
185 perf_cfg
->vtbl
.bo_unreference
= (bo_unreference_t
)iris_bo_unreference
;
186 perf_cfg
->vtbl
.bo_map
= (bo_map_t
)iris_bo_map
;
187 perf_cfg
->vtbl
.bo_unmap
= (bo_unmap_t
)iris_bo_unmap
;
188 perf_cfg
->vtbl
.emit_mi_flush
= (emit_mi_flush_t
)iris_monitor_emit_mi_flush
;
190 perf_cfg
->vtbl
.emit_mi_report_perf_count
=
191 (emit_mi_report_t
)iris_monitor_emit_mi_report_perf_count
;
192 perf_cfg
->vtbl
.batchbuffer_flush
= iris_monitor_batchbuffer_flush
;
193 perf_cfg
->vtbl
.capture_frequency_stat_register
=
194 (capture_frequency_stat_register_t
) iris_monitor_capture_frequency_stat_register
;
195 perf_cfg
->vtbl
.store_register_mem64
=
196 (store_register_mem64_t
) iris_monitor_store_register_mem64
;
197 perf_cfg
->vtbl
.batch_references
= (batch_references_t
)iris_batch_references
;
198 perf_cfg
->vtbl
.bo_wait_rendering
=
199 (bo_wait_rendering_t
)iris_bo_wait_rendering
;
200 perf_cfg
->vtbl
.bo_busy
= (bo_busy_t
)iris_bo_busy
;
202 gen_perf_init_metrics(perf_cfg
, &screen
->devinfo
, screen
->fd
);
203 screen
->monitor_cfg
= monitor_cfg
;
205 /* a gallium "group" is equivalent to a gen "query"
206 * a gallium "query" is equivalent to a gen "query_counter"
208 * Each gen_query supports a specific number of query_counters. To
209 * allocate the array of iris_monitor_counter, we need an upper bound
210 * (ignoring duplicate query_counters).
212 int gen_query_counters_count
= 0;
213 for (int gen_query_id
= 0;
214 gen_query_id
< perf_cfg
->n_queries
;
216 gen_query_counters_count
+= perf_cfg
->queries
[gen_query_id
].n_counters
;
219 monitor_cfg
->counters
= rzalloc_size(monitor_cfg
,
220 sizeof(struct iris_monitor_counter
) *
221 gen_query_counters_count
);
222 if (unlikely(!monitor_cfg
->counters
))
223 goto allocation_error
;
225 int iris_monitor_id
= 0;
226 for (int group
= 0; group
< perf_cfg
->n_queries
; ++group
) {
227 for (int counter
= 0;
228 counter
< perf_cfg
->queries
[group
].n_counters
;
230 /* Check previously identified metrics to filter out duplicates. The
231 * user is not helped by having the same metric available in several
232 * groups. (n^2 algorithm).
234 bool duplicate
= false;
235 for (int existing_group
= 0;
236 existing_group
< group
&& !duplicate
;
238 for (int existing_counter
= 0;
239 existing_counter
< perf_cfg
->queries
[existing_group
].n_counters
&& !duplicate
;
240 ++existing_counter
) {
241 const char *current_name
=
242 perf_cfg
->queries
[group
].counters
[counter
].name
;
243 const char *existing_name
=
244 perf_cfg
->queries
[existing_group
].counters
[existing_counter
].name
;
245 if (strcmp(current_name
, existing_name
) == 0) {
252 monitor_cfg
->counters
[iris_monitor_id
].group
= group
;
253 monitor_cfg
->counters
[iris_monitor_id
].counter
= counter
;
257 monitor_cfg
->num_counters
= iris_monitor_id
;
258 return monitor_cfg
->num_counters
;
262 free(monitor_cfg
->counters
);
269 iris_get_monitor_group_info(struct pipe_screen
*pscreen
,
270 unsigned group_index
,
271 struct pipe_driver_query_group_info
*info
)
273 struct iris_screen
*screen
= (struct iris_screen
*)pscreen
;
274 if (!screen
->monitor_cfg
) {
275 if (!iris_monitor_init_metrics(screen
))
279 const struct iris_monitor_config
*monitor_cfg
= screen
->monitor_cfg
;
280 const struct gen_perf_config
*perf_cfg
= monitor_cfg
->perf_cfg
;
283 /* return the count that can be queried */
284 return perf_cfg
->n_queries
;
287 if (group_index
>= perf_cfg
->n_queries
) {
292 struct gen_perf_query_info
*query
= &perf_cfg
->queries
[group_index
];
294 info
->name
= query
->name
;
295 info
->max_active_queries
= query
->n_counters
;
296 info
->num_queries
= query
->n_counters
;
302 iris_init_monitor_ctx(struct iris_context
*ice
)
304 struct iris_screen
*screen
= (struct iris_screen
*) ice
->ctx
.screen
;
305 struct iris_monitor_config
*monitor_cfg
= screen
->monitor_cfg
;
307 ice
->perf_ctx
= gen_perf_new_context(ice
);
308 if (unlikely(!ice
->perf_ctx
))
311 struct gen_perf_context
*perf_ctx
= ice
->perf_ctx
;
312 struct gen_perf_config
*perf_cfg
= monitor_cfg
->perf_cfg
;
313 gen_perf_init_context(perf_ctx
,
318 ice
->batches
[IRIS_BATCH_RENDER
].hw_ctx_id
,
322 /* entry point for GenPerfMonitorsAMD */
323 struct iris_monitor_object
*
324 iris_create_monitor_object(struct iris_context
*ice
,
325 unsigned num_queries
,
326 unsigned *query_types
)
328 struct iris_screen
*screen
= (struct iris_screen
*) ice
->ctx
.screen
;
329 struct iris_monitor_config
*monitor_cfg
= screen
->monitor_cfg
;
330 struct gen_perf_config
*perf_cfg
= monitor_cfg
->perf_cfg
;
331 struct gen_perf_query_object
*query_obj
= NULL
;
333 /* initialize perf context if this has not already been done. This
334 * function is the first entry point that carries the gl context.
336 if (ice
->perf_ctx
== NULL
) {
337 iris_init_monitor_ctx(ice
);
339 struct gen_perf_context
*perf_ctx
= ice
->perf_ctx
;
341 assert(num_queries
> 0);
342 int query_index
= query_types
[0] - PIPE_QUERY_DRIVER_SPECIFIC
;
343 assert(query_index
<= monitor_cfg
->num_counters
);
344 const int group
= monitor_cfg
->counters
[query_index
].group
;
346 struct iris_monitor_object
*monitor
=
347 calloc(1, sizeof(struct iris_monitor_object
));
348 if (unlikely(!monitor
))
349 goto allocation_failure
;
351 monitor
->num_active_counters
= num_queries
;
352 monitor
->active_counters
= calloc(num_queries
, sizeof(int));
353 if (unlikely(!monitor
->active_counters
))
354 goto allocation_failure
;
356 for (int i
= 0; i
< num_queries
; ++i
) {
357 unsigned current_query
= query_types
[i
];
358 unsigned current_query_index
= current_query
- PIPE_QUERY_DRIVER_SPECIFIC
;
360 /* all queries must be in the same group */
361 assert(current_query_index
<= monitor_cfg
->num_counters
);
362 assert(monitor_cfg
->counters
[current_query_index
].group
== group
);
363 monitor
->active_counters
[i
] =
364 monitor_cfg
->counters
[current_query_index
].counter
;
367 /* create the gen_perf_query */
368 query_obj
= gen_perf_new_query(perf_ctx
, group
);
369 if (unlikely(!query_obj
))
370 goto allocation_failure
;
372 monitor
->query
= query_obj
;
373 monitor
->result_size
= perf_cfg
->queries
[group
].data_size
;
374 monitor
->result_buffer
= calloc(1, monitor
->result_size
);
375 if (unlikely(!monitor
->result_buffer
))
376 goto allocation_failure
;
382 free(monitor
->active_counters
);
383 free(monitor
->result_buffer
);
391 iris_destroy_monitor_object(struct pipe_context
*ctx
,
392 struct iris_monitor_object
*monitor
)
394 struct iris_context
*ice
= (struct iris_context
*)ctx
;
396 gen_perf_delete_query(ice
->perf_ctx
, monitor
->query
);
397 free(monitor
->result_buffer
);
398 monitor
->result_buffer
= NULL
;
399 free(monitor
->active_counters
);
400 monitor
->active_counters
= NULL
;
405 iris_begin_monitor(struct pipe_context
*ctx
,
406 struct iris_monitor_object
*monitor
)
408 struct iris_context
*ice
= (void *) ctx
;
409 struct gen_perf_context
*perf_ctx
= ice
->perf_ctx
;
411 return gen_perf_begin_query(perf_ctx
, monitor
->query
);
415 iris_end_monitor(struct pipe_context
*ctx
,
416 struct iris_monitor_object
*monitor
)
418 struct iris_context
*ice
= (void *) ctx
;
419 struct gen_perf_context
*perf_ctx
= ice
->perf_ctx
;
421 gen_perf_end_query(perf_ctx
, monitor
->query
);
426 iris_get_monitor_result(struct pipe_context
*ctx
,
427 struct iris_monitor_object
*monitor
,
429 union pipe_numeric_type_union
*result
)
431 struct iris_context
*ice
= (void *) ctx
;
432 struct gen_perf_context
*perf_ctx
= ice
->perf_ctx
;
433 struct iris_batch
*batch
= &ice
->batches
[IRIS_BATCH_RENDER
];
436 gen_perf_is_query_ready(perf_ctx
, monitor
->query
, batch
);
438 if (!monitor_ready
) {
441 gen_perf_wait_query(perf_ctx
, monitor
->query
, batch
);
444 assert(gen_perf_is_query_ready(perf_ctx
, monitor
->query
, batch
));
446 unsigned bytes_written
;
447 gen_perf_get_query_data(perf_ctx
, monitor
->query
,
448 monitor
->result_size
,
449 (unsigned*) monitor
->result_buffer
,
451 if (bytes_written
!= monitor
->result_size
)
454 /* copy metrics into the batch result */
455 for (int i
= 0; i
< monitor
->num_active_counters
; ++i
) {
456 int current_counter
= monitor
->active_counters
[i
];
457 const struct gen_perf_query_info
*info
=
458 gen_perf_query_info(monitor
->query
);
459 const struct gen_perf_query_counter
*counter
=
460 &info
->counters
[current_counter
];
461 assert(gen_perf_query_counter_get_size(counter
));
462 switch (counter
->data_type
) {
463 case GEN_PERF_COUNTER_DATA_TYPE_UINT64
:
464 result
[i
].u64
= *(uint64_t*)(monitor
->result_buffer
+ counter
->offset
);
466 case GEN_PERF_COUNTER_DATA_TYPE_FLOAT
:
467 result
[i
].f
= *(float*)(monitor
->result_buffer
+ counter
->offset
);
469 case GEN_PERF_COUNTER_DATA_TYPE_UINT32
:
470 case GEN_PERF_COUNTER_DATA_TYPE_BOOL32
:
471 result
[i
].u64
= *(uint32_t*)(monitor
->result_buffer
+ counter
->offset
);
473 case GEN_PERF_COUNTER_DATA_TYPE_DOUBLE
: {
474 double v
= *(double*)(monitor
->result_buffer
+ counter
->offset
);
479 unreachable("unexpected counter data type");