2 * Copyright © 2013 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
25 * \file brw_performance_monitor.c
27 * Implementation of the GL_AMD_performance_monitor extension.
32 #include "main/bitset.h"
33 #include "main/hash.h"
34 #include "main/macros.h"
35 #include "main/mtypes.h"
36 #include "main/performance_monitor.h"
38 #include "glsl/ralloc.h"
40 #include "brw_context.h"
41 #include "brw_defines.h"
42 #include "intel_batchbuffer.h"
44 #define FILE_DEBUG_FLAG DEBUG_PERFMON
47 * i965 representation of a performance monitor object.
49 struct brw_perf_monitor_object
51 /** The base class. */
52 struct gl_perf_monitor_object base
;
55 * BO containing starting and ending snapshots for any active pipeline
56 * statistics counters.
58 drm_intel_bo
*pipeline_stats_bo
;
61 * Storage for final pipeline statistics counter results.
63 uint64_t *pipeline_stats_results
;
66 /** Downcasting convenience macro. */
67 static inline struct brw_perf_monitor_object
*
68 brw_perf_monitor(struct gl_perf_monitor_object
*m
)
70 return (struct brw_perf_monitor_object
*) m
;
73 #define SECOND_SNAPSHOT_OFFSET_IN_BYTES 2048
75 /******************************************************************************/
77 #define COUNTER(name) \
80 .Type = GL_UNSIGNED_INT, \
81 .Minimum = { .u32 = 0 }, \
82 .Maximum = { .u32 = ~0 }, \
85 #define COUNTER64(name) \
88 .Type = GL_UNSIGNED_INT64_AMD, \
89 .Minimum = { .u64 = 0 }, \
90 .Maximum = { .u64 = ~0 }, \
93 #define GROUP(name, max_active, counter_list) \
96 .MaxActiveCounters = max_active, \
97 .Counters = counter_list, \
98 .NumCounters = ARRAY_SIZE(counter_list), \
101 /** Performance Monitor Group IDs */
102 enum brw_counter_groups
{
103 PIPELINE_STATS_COUNTERS
, /* Pipeline Statistics Register Counters */
110 const static struct gl_perf_monitor_group gen5_groups
[] = {
111 /* Our pipeline statistics counter handling requires hardware contexts. */
119 const static struct gl_perf_monitor_counter gen6_statistics_counters
[] = {
120 COUNTER64("IA_VERTICES_COUNT"),
121 COUNTER64("IA_PRIMITIVES_COUNT"),
122 COUNTER64("VS_INVOCATION_COUNT"),
123 COUNTER64("GS_INVOCATION_COUNT"),
124 COUNTER64("GS_PRIMITIVES_COUNT"),
125 COUNTER64("CL_INVOCATION_COUNT"),
126 COUNTER64("CL_PRIMITIVES_COUNT"),
127 COUNTER64("PS_INVOCATION_COUNT"),
128 COUNTER64("PS_DEPTH_COUNT"),
129 COUNTER64("SO_NUM_PRIMS_WRITTEN"),
130 COUNTER64("SO_PRIM_STORAGE_NEEDED"),
133 /** MMIO register addresses for each pipeline statistics counter. */
134 const static int gen6_statistics_register_addresses
[] = {
144 GEN6_SO_NUM_PRIMS_WRITTEN
,
145 GEN6_SO_PRIM_STORAGE_NEEDED
,
148 const static struct gl_perf_monitor_group gen6_groups
[] = {
149 GROUP("Pipeline Statistics Registers", INT_MAX
, gen6_statistics_counters
),
154 * Ivybridge/Baytrail/Haswell:
157 const static struct gl_perf_monitor_counter gen7_statistics_counters
[] = {
158 COUNTER64("IA_VERTICES_COUNT"),
159 COUNTER64("IA_PRIMITIVES_COUNT"),
160 COUNTER64("VS_INVOCATION_COUNT"),
161 COUNTER64("HS_INVOCATION_COUNT"),
162 COUNTER64("DS_INVOCATION_COUNT"),
163 COUNTER64("GS_INVOCATION_COUNT"),
164 COUNTER64("GS_PRIMITIVES_COUNT"),
165 COUNTER64("CL_INVOCATION_COUNT"),
166 COUNTER64("CL_PRIMITIVES_COUNT"),
167 COUNTER64("PS_INVOCATION_COUNT"),
168 COUNTER64("PS_DEPTH_COUNT"),
169 COUNTER64("SO_NUM_PRIMS_WRITTEN (Stream 0)"),
170 COUNTER64("SO_NUM_PRIMS_WRITTEN (Stream 1)"),
171 COUNTER64("SO_NUM_PRIMS_WRITTEN (Stream 2)"),
172 COUNTER64("SO_NUM_PRIMS_WRITTEN (Stream 3)"),
173 COUNTER64("SO_PRIM_STORAGE_NEEDED (Stream 0)"),
174 COUNTER64("SO_PRIM_STORAGE_NEEDED (Stream 1)"),
175 COUNTER64("SO_PRIM_STORAGE_NEEDED (Stream 2)"),
176 COUNTER64("SO_PRIM_STORAGE_NEEDED (Stream 3)"),
179 /** MMIO register addresses for each pipeline statistics counter. */
180 const static int gen7_statistics_register_addresses
[] = {
192 GEN7_SO_NUM_PRIMS_WRITTEN(0),
193 GEN7_SO_NUM_PRIMS_WRITTEN(1),
194 GEN7_SO_NUM_PRIMS_WRITTEN(2),
195 GEN7_SO_NUM_PRIMS_WRITTEN(3),
196 GEN7_SO_PRIM_STORAGE_NEEDED(0),
197 GEN7_SO_PRIM_STORAGE_NEEDED(1),
198 GEN7_SO_PRIM_STORAGE_NEEDED(2),
199 GEN7_SO_PRIM_STORAGE_NEEDED(3),
202 const static struct gl_perf_monitor_group gen7_groups
[] = {
203 GROUP("Pipeline Statistics Registers", INT_MAX
, gen7_statistics_counters
),
207 /******************************************************************************/
209 static GLboolean
brw_is_perf_monitor_result_available(struct gl_context
*, struct gl_perf_monitor_object
*);
212 dump_perf_monitor_callback(GLuint name
, void *monitor_void
, void *brw_void
)
214 struct gl_context
*ctx
= brw_void
;
215 struct gl_perf_monitor_object
*m
= monitor_void
;
216 struct brw_perf_monitor_object
*monitor
= monitor_void
;
218 DBG("%4d %-7s %-6s %-11s %-9s\n",
220 m
->Active
? "Active" : "",
221 m
->Ended
? "Ended" : "",
222 brw_is_perf_monitor_result_available(ctx
, m
) ? "Available" : "",
223 monitor
->pipeline_stats_bo
? "Stats BO" : "");
227 brw_dump_perf_monitors(struct brw_context
*brw
)
229 struct gl_context
*ctx
= &brw
->ctx
;
231 _mesa_HashWalk(ctx
->PerfMonitor
.Monitors
, dump_perf_monitor_callback
, brw
);
234 /******************************************************************************/
237 monitor_needs_statistics_registers(struct brw_context
*brw
,
238 struct gl_perf_monitor_object
*m
)
240 return brw
->gen
>= 6 && m
->ActiveGroups
[PIPELINE_STATS_COUNTERS
];
244 * Take a snapshot of any monitored pipeline statistics counters.
247 snapshot_statistics_registers(struct brw_context
*brw
,
248 struct brw_perf_monitor_object
*monitor
,
249 uint32_t offset_in_bytes
)
251 struct gl_context
*ctx
= &brw
->ctx
;
252 const int offset
= offset_in_bytes
/ sizeof(uint64_t);
253 const int group
= PIPELINE_STATS_COUNTERS
;
254 const int num_counters
= ctx
->PerfMonitor
.Groups
[group
].NumCounters
;
256 intel_batchbuffer_emit_mi_flush(brw
);
258 for (int i
= 0; i
< num_counters
; i
++) {
259 if (BITSET_TEST(monitor
->base
.ActiveCounters
[group
], i
)) {
260 assert(ctx
->PerfMonitor
.Groups
[group
].Counters
[i
].Type
==
261 GL_UNSIGNED_INT64_AMD
);
263 brw_store_register_mem64(brw
, monitor
->pipeline_stats_bo
,
264 brw
->perfmon
.statistics_registers
[i
],
271 * Gather results from pipeline_stats_bo, storing the final values.
273 * This allows us to free pipeline_stats_bo (which is 4K) in favor of a much
274 * smaller array of final results.
277 gather_statistics_results(struct brw_context
*brw
,
278 struct brw_perf_monitor_object
*monitor
)
280 struct gl_context
*ctx
= &brw
->ctx
;
281 const int num_counters
=
282 ctx
->PerfMonitor
.Groups
[PIPELINE_STATS_COUNTERS
].NumCounters
;
284 monitor
->pipeline_stats_results
= calloc(num_counters
, sizeof(uint64_t));
286 drm_intel_bo_map(monitor
->pipeline_stats_bo
, false);
287 uint64_t *start
= monitor
->pipeline_stats_bo
->virtual;
288 uint64_t *end
= start
+ (SECOND_SNAPSHOT_OFFSET_IN_BYTES
/ sizeof(uint64_t));
290 for (int i
= 0; i
< num_counters
; i
++) {
291 monitor
->pipeline_stats_results
[i
] = end
[i
] - start
[i
];
293 drm_intel_bo_unmap(monitor
->pipeline_stats_bo
);
294 drm_intel_bo_unreference(monitor
->pipeline_stats_bo
);
295 monitor
->pipeline_stats_bo
= NULL
;
298 /******************************************************************************/
301 * Initialize a monitor to sane starting state; throw away old buffers.
304 reinitialize_perf_monitor(struct brw_context
*brw
,
305 struct brw_perf_monitor_object
*monitor
)
307 if (monitor
->pipeline_stats_bo
) {
308 drm_intel_bo_unreference(monitor
->pipeline_stats_bo
);
309 monitor
->pipeline_stats_bo
= NULL
;
312 free(monitor
->pipeline_stats_results
);
313 monitor
->pipeline_stats_results
= NULL
;
317 * Driver hook for glBeginPerformanceMonitorAMD().
320 brw_begin_perf_monitor(struct gl_context
*ctx
,
321 struct gl_perf_monitor_object
*m
)
323 struct brw_context
*brw
= brw_context(ctx
);
324 struct brw_perf_monitor_object
*monitor
= brw_perf_monitor(m
);
326 DBG("Begin(%d)\n", m
->Name
);
328 reinitialize_perf_monitor(brw
, monitor
);
330 if (monitor_needs_statistics_registers(brw
, m
)) {
331 monitor
->pipeline_stats_bo
=
332 drm_intel_bo_alloc(brw
->bufmgr
, "perf. monitor stats bo", 4096, 64);
334 /* Take starting snapshots. */
335 snapshot_statistics_registers(brw
, monitor
, 0);
342 * Driver hook for glEndPerformanceMonitorAMD().
345 brw_end_perf_monitor(struct gl_context
*ctx
,
346 struct gl_perf_monitor_object
*m
)
348 struct brw_context
*brw
= brw_context(ctx
);
349 struct brw_perf_monitor_object
*monitor
= brw_perf_monitor(m
);
351 DBG("End(%d)\n", m
->Name
);
353 if (monitor_needs_statistics_registers(brw
, m
)) {
354 /* Take ending snapshots. */
355 snapshot_statistics_registers(brw
, monitor
,
356 SECOND_SNAPSHOT_OFFSET_IN_BYTES
);
361 * Reset a performance monitor, throwing away any results.
364 brw_reset_perf_monitor(struct gl_context
*ctx
,
365 struct gl_perf_monitor_object
*m
)
367 struct brw_context
*brw
= brw_context(ctx
);
368 struct brw_perf_monitor_object
*monitor
= brw_perf_monitor(m
);
370 reinitialize_perf_monitor(brw
, monitor
);
373 brw_begin_perf_monitor(ctx
, m
);
378 * Is a performance monitor result available?
381 brw_is_perf_monitor_result_available(struct gl_context
*ctx
,
382 struct gl_perf_monitor_object
*m
)
384 struct brw_context
*brw
= brw_context(ctx
);
385 struct brw_perf_monitor_object
*monitor
= brw_perf_monitor(m
);
387 bool stats_available
= true;
389 if (monitor_needs_statistics_registers(brw
, m
)) {
390 stats_available
= !monitor
->pipeline_stats_bo
||
391 (!drm_intel_bo_references(brw
->batch
.bo
, monitor
->pipeline_stats_bo
) &&
392 !drm_intel_bo_busy(monitor
->pipeline_stats_bo
));
395 return stats_available
;
399 * Get the performance monitor result.
402 brw_get_perf_monitor_result(struct gl_context
*ctx
,
403 struct gl_perf_monitor_object
*m
,
406 GLint
*bytes_written
)
408 struct brw_context
*brw
= brw_context(ctx
);
409 struct brw_perf_monitor_object
*monitor
= brw_perf_monitor(m
);
411 DBG("GetResult(%d)\n", m
->Name
);
412 brw_dump_perf_monitors(brw
);
414 /* This hook should only be called when results are available. */
417 /* Copy data to the supplied array (data).
419 * The output data format is: <group ID, counter ID, value> for each
420 * active counter. The API allows counters to appear in any order.
424 if (monitor_needs_statistics_registers(brw
, m
)) {
425 const int num_counters
=
426 ctx
->PerfMonitor
.Groups
[PIPELINE_STATS_COUNTERS
].NumCounters
;
428 if (!monitor
->pipeline_stats_results
)
429 gather_statistics_results(brw
, monitor
);
431 for (int i
= 0; i
< num_counters
; i
++) {
432 if (BITSET_TEST(m
->ActiveCounters
[PIPELINE_STATS_COUNTERS
], i
)) {
433 data
[offset
++] = PIPELINE_STATS_COUNTERS
;
435 *((uint64_t *) (&data
[offset
])) = monitor
->pipeline_stats_results
[i
];
442 *bytes_written
= offset
* sizeof(uint32_t);
446 * Create a new performance monitor object.
448 static struct gl_perf_monitor_object
*
449 brw_new_perf_monitor(struct gl_context
*ctx
)
451 return calloc(1, sizeof(struct brw_perf_monitor_object
));
455 * Delete a performance monitor object.
458 brw_delete_perf_monitor(struct gl_context
*ctx
, struct gl_perf_monitor_object
*m
)
460 struct brw_perf_monitor_object
*monitor
= brw_perf_monitor(m
);
461 DBG("Delete(%d)\n", m
->Name
);
462 reinitialize_perf_monitor(brw_context(ctx
), monitor
);
466 /******************************************************************************/
469 brw_init_performance_monitors(struct brw_context
*brw
)
471 struct gl_context
*ctx
= &brw
->ctx
;
473 ctx
->Driver
.NewPerfMonitor
= brw_new_perf_monitor
;
474 ctx
->Driver
.DeletePerfMonitor
= brw_delete_perf_monitor
;
475 ctx
->Driver
.BeginPerfMonitor
= brw_begin_perf_monitor
;
476 ctx
->Driver
.EndPerfMonitor
= brw_end_perf_monitor
;
477 ctx
->Driver
.ResetPerfMonitor
= brw_reset_perf_monitor
;
478 ctx
->Driver
.IsPerfMonitorResultAvailable
= brw_is_perf_monitor_result_available
;
479 ctx
->Driver
.GetPerfMonitorResult
= brw_get_perf_monitor_result
;
482 ctx
->PerfMonitor
.Groups
= gen5_groups
;
483 ctx
->PerfMonitor
.NumGroups
= ARRAY_SIZE(gen5_groups
);
484 } else if (brw
->gen
== 6) {
485 ctx
->PerfMonitor
.Groups
= gen6_groups
;
486 ctx
->PerfMonitor
.NumGroups
= ARRAY_SIZE(gen6_groups
);
487 brw
->perfmon
.statistics_registers
= gen6_statistics_register_addresses
;
488 } else if (brw
->gen
== 7) {
489 ctx
->PerfMonitor
.Groups
= gen7_groups
;
490 ctx
->PerfMonitor
.NumGroups
= ARRAY_SIZE(gen7_groups
);
491 brw
->perfmon
.statistics_registers
= gen7_statistics_register_addresses
;