2 * Copyright (C) 2013 Christoph Bumiller
3 * Copyright (C) 2015 Samuel Pitoiset
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21 * OTHER DEALINGS IN THE SOFTWARE.
25 * Performance monitoring counters interface to gallium.
29 #include "st_context.h"
30 #include "st_cb_bitmap.h"
31 #include "st_cb_perfmon.h"
33 #include "util/bitset.h"
35 #include "pipe/p_context.h"
36 #include "pipe/p_screen.h"
37 #include "util/u_memory.h"
40 init_perf_monitor(struct gl_context
*ctx
, struct gl_perf_monitor_object
*m
)
42 struct st_context
*st
= st_context(ctx
);
43 struct st_perf_monitor_object
*stm
= st_perf_monitor_object(m
);
44 struct pipe_context
*pipe
= st
->pipe
;
45 unsigned *batch
= NULL
;
46 unsigned num_active_counters
= 0;
47 unsigned max_batch_counters
= 0;
48 unsigned num_batch_counters
= 0;
51 st_flush_bitmap_cache(st
);
53 /* Determine the number of active counters. */
54 for (gid
= 0; gid
< ctx
->PerfMonitor
.NumGroups
; gid
++) {
55 const struct gl_perf_monitor_group
*g
= &ctx
->PerfMonitor
.Groups
[gid
];
56 const struct st_perf_monitor_group
*stg
= &st
->perfmon
[gid
];
58 if (m
->ActiveGroups
[gid
] > g
->MaxActiveCounters
) {
59 /* Maximum number of counters reached. Cannot start the session. */
60 if (ST_DEBUG
& DEBUG_MESA
) {
61 debug_printf("Maximum number of counters reached. "
62 "Cannot start the session!\n");
67 num_active_counters
+= m
->ActiveGroups
[gid
];
69 max_batch_counters
+= m
->ActiveGroups
[gid
];
72 if (!num_active_counters
)
75 stm
->active_counters
= CALLOC(num_active_counters
,
76 sizeof(*stm
->active_counters
));
77 if (!stm
->active_counters
)
80 if (max_batch_counters
) {
81 batch
= CALLOC(max_batch_counters
, sizeof(*batch
));
86 /* Create a query for each active counter. */
87 for (gid
= 0; gid
< ctx
->PerfMonitor
.NumGroups
; gid
++) {
88 const struct gl_perf_monitor_group
*g
= &ctx
->PerfMonitor
.Groups
[gid
];
89 const struct st_perf_monitor_group
*stg
= &st
->perfmon
[gid
];
92 BITSET_FOREACH_SET(cid
, tmp
, m
->ActiveCounters
[gid
], g
->NumCounters
) {
93 const struct st_perf_monitor_counter
*stc
= &stg
->counters
[cid
];
94 struct st_perf_counter_object
*cntr
=
95 &stm
->active_counters
[stm
->num_active_counters
];
99 if (stc
->flags
& PIPE_DRIVER_QUERY_FLAG_BATCH
) {
100 cntr
->batch_index
= num_batch_counters
;
101 batch
[num_batch_counters
++] = stc
->query_type
;
103 cntr
->query
= pipe
->create_query(pipe
, stc
->query_type
, 0);
107 ++stm
->num_active_counters
;
111 /* Create the batch query. */
112 if (num_batch_counters
) {
113 stm
->batch_query
= pipe
->create_batch_query(pipe
, num_batch_counters
,
115 stm
->batch_result
= CALLOC(num_batch_counters
, sizeof(stm
->batch_result
->batch
[0]));
116 if (!stm
->batch_query
|| !stm
->batch_result
)
129 reset_perf_monitor(struct st_perf_monitor_object
*stm
,
130 struct pipe_context
*pipe
)
134 for (i
= 0; i
< stm
->num_active_counters
; ++i
) {
135 struct pipe_query
*query
= stm
->active_counters
[i
].query
;
137 pipe
->destroy_query(pipe
, query
);
139 FREE(stm
->active_counters
);
140 stm
->active_counters
= NULL
;
141 stm
->num_active_counters
= 0;
143 if (stm
->batch_query
) {
144 pipe
->destroy_query(pipe
, stm
->batch_query
);
145 stm
->batch_query
= NULL
;
147 FREE(stm
->batch_result
);
148 stm
->batch_result
= NULL
;
151 static struct gl_perf_monitor_object
*
152 st_NewPerfMonitor(struct gl_context
*ctx
)
154 struct st_perf_monitor_object
*stq
= ST_CALLOC_STRUCT(st_perf_monitor_object
);
161 st_DeletePerfMonitor(struct gl_context
*ctx
, struct gl_perf_monitor_object
*m
)
163 struct st_perf_monitor_object
*stm
= st_perf_monitor_object(m
);
164 struct pipe_context
*pipe
= st_context(ctx
)->pipe
;
166 reset_perf_monitor(stm
, pipe
);
171 st_BeginPerfMonitor(struct gl_context
*ctx
, struct gl_perf_monitor_object
*m
)
173 struct st_perf_monitor_object
*stm
= st_perf_monitor_object(m
);
174 struct pipe_context
*pipe
= st_context(ctx
)->pipe
;
177 if (!stm
->num_active_counters
) {
178 /* Create a query for each active counter before starting
179 * a new monitoring session. */
180 if (!init_perf_monitor(ctx
, m
))
184 /* Start the query for each active counter. */
185 for (i
= 0; i
< stm
->num_active_counters
; ++i
) {
186 struct pipe_query
*query
= stm
->active_counters
[i
].query
;
187 if (query
&& !pipe
->begin_query(pipe
, query
))
191 if (stm
->batch_query
&& !pipe
->begin_query(pipe
, stm
->batch_query
))
197 /* Failed to start the monitoring session. */
198 reset_perf_monitor(stm
, pipe
);
203 st_EndPerfMonitor(struct gl_context
*ctx
, struct gl_perf_monitor_object
*m
)
205 struct st_perf_monitor_object
*stm
= st_perf_monitor_object(m
);
206 struct pipe_context
*pipe
= st_context(ctx
)->pipe
;
209 /* Stop the query for each active counter. */
210 for (i
= 0; i
< stm
->num_active_counters
; ++i
) {
211 struct pipe_query
*query
= stm
->active_counters
[i
].query
;
213 pipe
->end_query(pipe
, query
);
216 if (stm
->batch_query
)
217 pipe
->end_query(pipe
, stm
->batch_query
);
221 st_ResetPerfMonitor(struct gl_context
*ctx
, struct gl_perf_monitor_object
*m
)
223 struct st_perf_monitor_object
*stm
= st_perf_monitor_object(m
);
224 struct pipe_context
*pipe
= st_context(ctx
)->pipe
;
227 st_EndPerfMonitor(ctx
, m
);
229 reset_perf_monitor(stm
, pipe
);
232 st_BeginPerfMonitor(ctx
, m
);
236 st_IsPerfMonitorResultAvailable(struct gl_context
*ctx
,
237 struct gl_perf_monitor_object
*m
)
239 struct st_perf_monitor_object
*stm
= st_perf_monitor_object(m
);
240 struct pipe_context
*pipe
= st_context(ctx
)->pipe
;
243 if (!stm
->num_active_counters
)
246 /* The result of a monitoring session is only available if the query of
247 * each active counter is idle. */
248 for (i
= 0; i
< stm
->num_active_counters
; ++i
) {
249 struct pipe_query
*query
= stm
->active_counters
[i
].query
;
250 union pipe_query_result result
;
251 if (query
&& !pipe
->get_query_result(pipe
, query
, FALSE
, &result
)) {
252 /* The query is busy. */
257 if (stm
->batch_query
&&
258 !pipe
->get_query_result(pipe
, stm
->batch_query
, FALSE
, stm
->batch_result
))
265 st_GetPerfMonitorResult(struct gl_context
*ctx
,
266 struct gl_perf_monitor_object
*m
,
271 struct st_perf_monitor_object
*stm
= st_perf_monitor_object(m
);
272 struct pipe_context
*pipe
= st_context(ctx
)->pipe
;
275 /* Copy data to the supplied array (data).
277 * The output data format is: <group ID, counter ID, value> for each
278 * active counter. The API allows counters to appear in any order.
281 bool have_batch_query
= false;
283 if (stm
->batch_query
)
284 have_batch_query
= pipe
->get_query_result(pipe
, stm
->batch_query
, TRUE
,
287 /* Read query results for each active counter. */
288 for (i
= 0; i
< stm
->num_active_counters
; ++i
) {
289 struct st_perf_counter_object
*cntr
= &stm
->active_counters
[i
];
290 union pipe_query_result result
= { 0 };
295 gid
= cntr
->group_id
;
296 type
= ctx
->PerfMonitor
.Groups
[gid
].Counters
[cid
].Type
;
299 if (!pipe
->get_query_result(pipe
, cntr
->query
, TRUE
, &result
))
302 if (!have_batch_query
)
304 result
.batch
[0] = stm
->batch_result
->batch
[cntr
->batch_index
];
307 data
[offset
++] = gid
;
308 data
[offset
++] = cid
;
310 case GL_UNSIGNED_INT64_AMD
:
311 memcpy(&data
[offset
], &result
.u64
, sizeof(uint64_t));
312 offset
+= sizeof(uint64_t) / sizeof(GLuint
);
314 case GL_UNSIGNED_INT
:
315 memcpy(&data
[offset
], &result
.u32
, sizeof(uint32_t));
316 offset
+= sizeof(uint32_t) / sizeof(GLuint
);
319 case GL_PERCENTAGE_AMD
:
320 memcpy(&data
[offset
], &result
.f
, sizeof(GLfloat
));
321 offset
+= sizeof(GLfloat
) / sizeof(GLuint
);
327 *bytesWritten
= offset
* sizeof(GLuint
);
332 st_have_perfmon(struct st_context
*st
)
334 struct pipe_screen
*screen
= st
->pipe
->screen
;
336 if (!screen
->get_driver_query_info
|| !screen
->get_driver_query_group_info
)
339 return screen
->get_driver_query_group_info(screen
, 0, NULL
) != 0;
343 st_InitPerfMonitorGroups(struct gl_context
*ctx
)
345 struct st_context
*st
= st_context(ctx
);
346 struct gl_perf_monitor_state
*perfmon
= &st
->ctx
->PerfMonitor
;
347 struct pipe_screen
*screen
= st
->pipe
->screen
;
348 struct gl_perf_monitor_group
*groups
= NULL
;
349 struct st_perf_monitor_group
*stgroups
= NULL
;
350 int num_counters
, num_groups
;
353 /* Get the number of available queries. */
354 num_counters
= screen
->get_driver_query_info(screen
, 0, NULL
);
356 /* Get the number of available groups. */
357 num_groups
= screen
->get_driver_query_group_info(screen
, 0, NULL
);
358 groups
= CALLOC(num_groups
, sizeof(*groups
));
362 stgroups
= CALLOC(num_groups
, sizeof(*stgroups
));
364 goto fail_only_groups
;
366 for (gid
= 0; gid
< num_groups
; gid
++) {
367 struct gl_perf_monitor_group
*g
= &groups
[perfmon
->NumGroups
];
368 struct st_perf_monitor_group
*stg
= &stgroups
[perfmon
->NumGroups
];
369 struct pipe_driver_query_group_info group_info
;
370 struct gl_perf_monitor_counter
*counters
= NULL
;
371 struct st_perf_monitor_counter
*stcounters
= NULL
;
373 if (!screen
->get_driver_query_group_info(screen
, gid
, &group_info
))
376 g
->Name
= group_info
.name
;
377 g
->MaxActiveCounters
= group_info
.max_active_queries
;
379 if (group_info
.num_queries
)
380 counters
= CALLOC(group_info
.num_queries
, sizeof(*counters
));
383 g
->Counters
= counters
;
385 stcounters
= CALLOC(group_info
.num_queries
, sizeof(*stcounters
));
388 stg
->counters
= stcounters
;
390 for (cid
= 0; cid
< num_counters
; cid
++) {
391 struct gl_perf_monitor_counter
*c
= &counters
[g
->NumCounters
];
392 struct st_perf_monitor_counter
*stc
= &stcounters
[g
->NumCounters
];
393 struct pipe_driver_query_info info
;
395 if (!screen
->get_driver_query_info(screen
, cid
, &info
))
397 if (info
.group_id
!= gid
)
402 case PIPE_DRIVER_QUERY_TYPE_UINT64
:
403 case PIPE_DRIVER_QUERY_TYPE_BYTES
:
404 case PIPE_DRIVER_QUERY_TYPE_MICROSECONDS
:
405 case PIPE_DRIVER_QUERY_TYPE_HZ
:
407 c
->Maximum
.u64
= info
.max_value
.u64
? info
.max_value
.u64
: -1;
408 c
->Type
= GL_UNSIGNED_INT64_AMD
;
410 case PIPE_DRIVER_QUERY_TYPE_UINT
:
412 c
->Maximum
.u32
= info
.max_value
.u32
? info
.max_value
.u32
: -1;
413 c
->Type
= GL_UNSIGNED_INT
;
415 case PIPE_DRIVER_QUERY_TYPE_FLOAT
:
417 c
->Maximum
.f
= info
.max_value
.f
? info
.max_value
.f
: -1;
420 case PIPE_DRIVER_QUERY_TYPE_PERCENTAGE
:
422 c
->Maximum
.f
= 100.0f
;
423 c
->Type
= GL_PERCENTAGE_AMD
;
426 unreachable("Invalid driver query type!");
429 stc
->query_type
= info
.query_type
;
430 stc
->flags
= info
.flags
;
431 if (stc
->flags
& PIPE_DRIVER_QUERY_FLAG_BATCH
)
432 stg
->has_batch
= true;
436 perfmon
->NumGroups
++;
438 perfmon
->Groups
= groups
;
439 st
->perfmon
= stgroups
;
444 for (gid
= 0; gid
< num_groups
; gid
++) {
445 FREE(stgroups
[gid
].counters
);
446 FREE((void *)groups
[gid
].Counters
);
454 st_destroy_perfmon(struct st_context
*st
)
456 struct gl_perf_monitor_state
*perfmon
= &st
->ctx
->PerfMonitor
;
459 for (gid
= 0; gid
< perfmon
->NumGroups
; gid
++) {
460 FREE(st
->perfmon
[gid
].counters
);
461 FREE((void *)perfmon
->Groups
[gid
].Counters
);
464 FREE((void *)perfmon
->Groups
);
467 void st_init_perfmon_functions(struct dd_function_table
*functions
)
469 functions
->InitPerfMonitorGroups
= st_InitPerfMonitorGroups
;
470 functions
->NewPerfMonitor
= st_NewPerfMonitor
;
471 functions
->DeletePerfMonitor
= st_DeletePerfMonitor
;
472 functions
->BeginPerfMonitor
= st_BeginPerfMonitor
;
473 functions
->EndPerfMonitor
= st_EndPerfMonitor
;
474 functions
->ResetPerfMonitor
= st_ResetPerfMonitor
;
475 functions
->IsPerfMonitorResultAvailable
= st_IsPerfMonitorResultAvailable
;
476 functions
->GetPerfMonitorResult
= st_GetPerfMonitorResult
;