725e8bb4e1d08f368362ac2fc1fd9c3710435dda
[mesa.git] / src / mesa / drivers / dri / i965 / brw_performance_monitor.c
1 /*
2 * Copyright © 2013 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 /**
25 * \file brw_performance_monitor.c
26 *
27 * Implementation of the GL_AMD_performance_monitor extension.
28 */
29
30 #include <limits.h>
31
32 #include "main/bitset.h"
33 #include "main/hash.h"
34 #include "main/macros.h"
35 #include "main/mtypes.h"
36 #include "main/performance_monitor.h"
37
38 #include "glsl/ralloc.h"
39
40 #include "brw_context.h"
41 #include "brw_defines.h"
42 #include "intel_batchbuffer.h"
43
44 #define FILE_DEBUG_FLAG DEBUG_PERFMON
45
46 /**
47 * i965 representation of a performance monitor object.
48 */
49 struct brw_perf_monitor_object
50 {
51 /** The base class. */
52 struct gl_perf_monitor_object base;
53
54 /**
55 * BO containing starting and ending snapshots for any active pipeline
56 * statistics counters.
57 */
58 drm_intel_bo *pipeline_stats_bo;
59
60 /**
61 * Storage for final pipeline statistics counter results.
62 */
63 uint64_t *pipeline_stats_results;
64 };
65
66 /** Downcasting convenience macro. */
67 static inline struct brw_perf_monitor_object *
68 brw_perf_monitor(struct gl_perf_monitor_object *m)
69 {
70 return (struct brw_perf_monitor_object *) m;
71 }
72
73 #define SECOND_SNAPSHOT_OFFSET_IN_BYTES 2048
74
75 /******************************************************************************/
76
77 #define COUNTER(name) \
78 { \
79 .Name = name, \
80 .Type = GL_UNSIGNED_INT, \
81 .Minimum = { .u32 = 0 }, \
82 .Maximum = { .u32 = ~0 }, \
83 }
84
85 #define COUNTER64(name) \
86 { \
87 .Name = name, \
88 .Type = GL_UNSIGNED_INT64_AMD, \
89 .Minimum = { .u64 = 0 }, \
90 .Maximum = { .u64 = ~0 }, \
91 }
92
93 #define GROUP(name, max_active, counter_list) \
94 { \
95 .Name = name, \
96 .MaxActiveCounters = max_active, \
97 .Counters = counter_list, \
98 .NumCounters = ARRAY_SIZE(counter_list), \
99 }
100
101 /** Performance Monitor Group IDs */
102 enum brw_counter_groups {
103 PIPELINE_STATS_COUNTERS, /* Pipeline Statistics Register Counters */
104 };
105
106 /**
107 * Ironlake:
108 * @{
109 */
110 const static struct gl_perf_monitor_group gen5_groups[] = {
111 /* Our pipeline statistics counter handling requires hardware contexts. */
112 };
113 /** @} */
114
115 /**
116 * Sandybridge:
117 * @{
118 */
119 const static struct gl_perf_monitor_counter gen6_statistics_counters[] = {
120 COUNTER64("IA_VERTICES_COUNT"),
121 COUNTER64("IA_PRIMITIVES_COUNT"),
122 COUNTER64("VS_INVOCATION_COUNT"),
123 COUNTER64("GS_INVOCATION_COUNT"),
124 COUNTER64("GS_PRIMITIVES_COUNT"),
125 COUNTER64("CL_INVOCATION_COUNT"),
126 COUNTER64("CL_PRIMITIVES_COUNT"),
127 COUNTER64("PS_INVOCATION_COUNT"),
128 COUNTER64("PS_DEPTH_COUNT"),
129 COUNTER64("SO_NUM_PRIMS_WRITTEN"),
130 COUNTER64("SO_PRIM_STORAGE_NEEDED"),
131 };
132
133 /** MMIO register addresses for each pipeline statistics counter. */
134 const static int gen6_statistics_register_addresses[] = {
135 IA_VERTICES_COUNT,
136 IA_PRIMITIVES_COUNT,
137 VS_INVOCATION_COUNT,
138 GS_INVOCATION_COUNT,
139 GS_PRIMITIVES_COUNT,
140 CL_INVOCATION_COUNT,
141 CL_PRIMITIVES_COUNT,
142 PS_INVOCATION_COUNT,
143 PS_DEPTH_COUNT,
144 GEN6_SO_NUM_PRIMS_WRITTEN,
145 GEN6_SO_PRIM_STORAGE_NEEDED,
146 };
147
148 const static struct gl_perf_monitor_group gen6_groups[] = {
149 GROUP("Pipeline Statistics Registers", INT_MAX, gen6_statistics_counters),
150 };
151 /** @} */
152
153 /**
154 * Ivybridge/Baytrail/Haswell:
155 * @{
156 */
157 const static struct gl_perf_monitor_counter gen7_statistics_counters[] = {
158 COUNTER64("IA_VERTICES_COUNT"),
159 COUNTER64("IA_PRIMITIVES_COUNT"),
160 COUNTER64("VS_INVOCATION_COUNT"),
161 COUNTER64("HS_INVOCATION_COUNT"),
162 COUNTER64("DS_INVOCATION_COUNT"),
163 COUNTER64("GS_INVOCATION_COUNT"),
164 COUNTER64("GS_PRIMITIVES_COUNT"),
165 COUNTER64("CL_INVOCATION_COUNT"),
166 COUNTER64("CL_PRIMITIVES_COUNT"),
167 COUNTER64("PS_INVOCATION_COUNT"),
168 COUNTER64("PS_DEPTH_COUNT"),
169 COUNTER64("SO_NUM_PRIMS_WRITTEN (Stream 0)"),
170 COUNTER64("SO_NUM_PRIMS_WRITTEN (Stream 1)"),
171 COUNTER64("SO_NUM_PRIMS_WRITTEN (Stream 2)"),
172 COUNTER64("SO_NUM_PRIMS_WRITTEN (Stream 3)"),
173 COUNTER64("SO_PRIM_STORAGE_NEEDED (Stream 0)"),
174 COUNTER64("SO_PRIM_STORAGE_NEEDED (Stream 1)"),
175 COUNTER64("SO_PRIM_STORAGE_NEEDED (Stream 2)"),
176 COUNTER64("SO_PRIM_STORAGE_NEEDED (Stream 3)"),
177 };
178
179 /** MMIO register addresses for each pipeline statistics counter. */
180 const static int gen7_statistics_register_addresses[] = {
181 IA_VERTICES_COUNT,
182 IA_PRIMITIVES_COUNT,
183 VS_INVOCATION_COUNT,
184 HS_INVOCATION_COUNT,
185 DS_INVOCATION_COUNT,
186 GS_INVOCATION_COUNT,
187 GS_PRIMITIVES_COUNT,
188 CL_INVOCATION_COUNT,
189 CL_PRIMITIVES_COUNT,
190 PS_INVOCATION_COUNT,
191 PS_DEPTH_COUNT,
192 GEN7_SO_NUM_PRIMS_WRITTEN(0),
193 GEN7_SO_NUM_PRIMS_WRITTEN(1),
194 GEN7_SO_NUM_PRIMS_WRITTEN(2),
195 GEN7_SO_NUM_PRIMS_WRITTEN(3),
196 GEN7_SO_PRIM_STORAGE_NEEDED(0),
197 GEN7_SO_PRIM_STORAGE_NEEDED(1),
198 GEN7_SO_PRIM_STORAGE_NEEDED(2),
199 GEN7_SO_PRIM_STORAGE_NEEDED(3),
200 };
201
202 const static struct gl_perf_monitor_group gen7_groups[] = {
203 GROUP("Pipeline Statistics Registers", INT_MAX, gen7_statistics_counters),
204 };
205 /** @} */
206
207 /******************************************************************************/
208
209 static GLboolean brw_is_perf_monitor_result_available(struct gl_context *, struct gl_perf_monitor_object *);
210
211 static void
212 dump_perf_monitor_callback(GLuint name, void *monitor_void, void *brw_void)
213 {
214 struct gl_context *ctx = brw_void;
215 struct gl_perf_monitor_object *m = monitor_void;
216 struct brw_perf_monitor_object *monitor = monitor_void;
217
218 DBG("%4d %-7s %-6s %-11s %-9s\n",
219 name,
220 m->Active ? "Active" : "",
221 m->Ended ? "Ended" : "",
222 brw_is_perf_monitor_result_available(ctx, m) ? "Available" : "",
223 monitor->pipeline_stats_bo ? "Stats BO" : "");
224 }
225
226 void
227 brw_dump_perf_monitors(struct brw_context *brw)
228 {
229 struct gl_context *ctx = &brw->ctx;
230 DBG("Monitors:\n");
231 _mesa_HashWalk(ctx->PerfMonitor.Monitors, dump_perf_monitor_callback, brw);
232 }
233
234 /******************************************************************************/
235
236 static bool
237 monitor_needs_statistics_registers(struct brw_context *brw,
238 struct gl_perf_monitor_object *m)
239 {
240 return brw->gen >= 6 && m->ActiveGroups[PIPELINE_STATS_COUNTERS];
241 }
242
243 /**
244 * Take a snapshot of any monitored pipeline statistics counters.
245 */
246 static void
247 snapshot_statistics_registers(struct brw_context *brw,
248 struct brw_perf_monitor_object *monitor,
249 uint32_t offset_in_bytes)
250 {
251 struct gl_context *ctx = &brw->ctx;
252 const int offset = offset_in_bytes / sizeof(uint64_t);
253 const int group = PIPELINE_STATS_COUNTERS;
254 const int num_counters = ctx->PerfMonitor.Groups[group].NumCounters;
255
256 intel_batchbuffer_emit_mi_flush(brw);
257
258 for (int i = 0; i < num_counters; i++) {
259 if (BITSET_TEST(monitor->base.ActiveCounters[group], i)) {
260 assert(ctx->PerfMonitor.Groups[group].Counters[i].Type ==
261 GL_UNSIGNED_INT64_AMD);
262
263 brw_store_register_mem64(brw, monitor->pipeline_stats_bo,
264 brw->perfmon.statistics_registers[i],
265 offset + i);
266 }
267 }
268 }
269
270 /**
271 * Gather results from pipeline_stats_bo, storing the final values.
272 *
273 * This allows us to free pipeline_stats_bo (which is 4K) in favor of a much
274 * smaller array of final results.
275 */
276 static void
277 gather_statistics_results(struct brw_context *brw,
278 struct brw_perf_monitor_object *monitor)
279 {
280 struct gl_context *ctx = &brw->ctx;
281 const int num_counters =
282 ctx->PerfMonitor.Groups[PIPELINE_STATS_COUNTERS].NumCounters;
283
284 monitor->pipeline_stats_results = calloc(num_counters, sizeof(uint64_t));
285
286 drm_intel_bo_map(monitor->pipeline_stats_bo, false);
287 uint64_t *start = monitor->pipeline_stats_bo->virtual;
288 uint64_t *end = start + (SECOND_SNAPSHOT_OFFSET_IN_BYTES / sizeof(uint64_t));
289
290 for (int i = 0; i < num_counters; i++) {
291 monitor->pipeline_stats_results[i] = end[i] - start[i];
292 }
293 drm_intel_bo_unmap(monitor->pipeline_stats_bo);
294 drm_intel_bo_unreference(monitor->pipeline_stats_bo);
295 monitor->pipeline_stats_bo = NULL;
296 }
297
298 /******************************************************************************/
299
300 /**
301 * Initialize a monitor to sane starting state; throw away old buffers.
302 */
303 static void
304 reinitialize_perf_monitor(struct brw_context *brw,
305 struct brw_perf_monitor_object *monitor)
306 {
307 if (monitor->pipeline_stats_bo) {
308 drm_intel_bo_unreference(monitor->pipeline_stats_bo);
309 monitor->pipeline_stats_bo = NULL;
310 }
311
312 free(monitor->pipeline_stats_results);
313 monitor->pipeline_stats_results = NULL;
314 }
315
316 /**
317 * Driver hook for glBeginPerformanceMonitorAMD().
318 */
319 static GLboolean
320 brw_begin_perf_monitor(struct gl_context *ctx,
321 struct gl_perf_monitor_object *m)
322 {
323 struct brw_context *brw = brw_context(ctx);
324 struct brw_perf_monitor_object *monitor = brw_perf_monitor(m);
325
326 DBG("Begin(%d)\n", m->Name);
327
328 reinitialize_perf_monitor(brw, monitor);
329
330 if (monitor_needs_statistics_registers(brw, m)) {
331 monitor->pipeline_stats_bo =
332 drm_intel_bo_alloc(brw->bufmgr, "perf. monitor stats bo", 4096, 64);
333
334 /* Take starting snapshots. */
335 snapshot_statistics_registers(brw, monitor, 0);
336 }
337
338 return true;
339 }
340
341 /**
342 * Driver hook for glEndPerformanceMonitorAMD().
343 */
344 static void
345 brw_end_perf_monitor(struct gl_context *ctx,
346 struct gl_perf_monitor_object *m)
347 {
348 struct brw_context *brw = brw_context(ctx);
349 struct brw_perf_monitor_object *monitor = brw_perf_monitor(m);
350
351 DBG("End(%d)\n", m->Name);
352
353 if (monitor_needs_statistics_registers(brw, m)) {
354 /* Take ending snapshots. */
355 snapshot_statistics_registers(brw, monitor,
356 SECOND_SNAPSHOT_OFFSET_IN_BYTES);
357 }
358 }
359
360 /**
361 * Reset a performance monitor, throwing away any results.
362 */
363 static void
364 brw_reset_perf_monitor(struct gl_context *ctx,
365 struct gl_perf_monitor_object *m)
366 {
367 struct brw_context *brw = brw_context(ctx);
368 struct brw_perf_monitor_object *monitor = brw_perf_monitor(m);
369
370 reinitialize_perf_monitor(brw, monitor);
371
372 if (m->Active) {
373 brw_begin_perf_monitor(ctx, m);
374 }
375 }
376
377 /**
378 * Is a performance monitor result available?
379 */
380 static GLboolean
381 brw_is_perf_monitor_result_available(struct gl_context *ctx,
382 struct gl_perf_monitor_object *m)
383 {
384 struct brw_context *brw = brw_context(ctx);
385 struct brw_perf_monitor_object *monitor = brw_perf_monitor(m);
386
387 bool stats_available = true;
388
389 if (monitor_needs_statistics_registers(brw, m)) {
390 stats_available = !monitor->pipeline_stats_bo ||
391 (!drm_intel_bo_references(brw->batch.bo, monitor->pipeline_stats_bo) &&
392 !drm_intel_bo_busy(monitor->pipeline_stats_bo));
393 }
394
395 return stats_available;
396 }
397
398 /**
399 * Get the performance monitor result.
400 */
401 static void
402 brw_get_perf_monitor_result(struct gl_context *ctx,
403 struct gl_perf_monitor_object *m,
404 GLsizei data_size,
405 GLuint *data,
406 GLint *bytes_written)
407 {
408 struct brw_context *brw = brw_context(ctx);
409 struct brw_perf_monitor_object *monitor = brw_perf_monitor(m);
410
411 DBG("GetResult(%d)\n", m->Name);
412 brw_dump_perf_monitors(brw);
413
414 /* This hook should only be called when results are available. */
415 assert(m->Ended);
416
417 /* Copy data to the supplied array (data).
418 *
419 * The output data format is: <group ID, counter ID, value> for each
420 * active counter. The API allows counters to appear in any order.
421 */
422 GLsizei offset = 0;
423
424 if (monitor_needs_statistics_registers(brw, m)) {
425 const int num_counters =
426 ctx->PerfMonitor.Groups[PIPELINE_STATS_COUNTERS].NumCounters;
427
428 if (!monitor->pipeline_stats_results)
429 gather_statistics_results(brw, monitor);
430
431 for (int i = 0; i < num_counters; i++) {
432 if (BITSET_TEST(m->ActiveCounters[PIPELINE_STATS_COUNTERS], i)) {
433 data[offset++] = PIPELINE_STATS_COUNTERS;
434 data[offset++] = i;
435 *((uint64_t *) (&data[offset])) = monitor->pipeline_stats_results[i];
436 offset += 2;
437 }
438 }
439 }
440
441 if (bytes_written)
442 *bytes_written = offset * sizeof(uint32_t);
443 }
444
445 /**
446 * Create a new performance monitor object.
447 */
448 static struct gl_perf_monitor_object *
449 brw_new_perf_monitor(struct gl_context *ctx)
450 {
451 return calloc(1, sizeof(struct brw_perf_monitor_object));
452 }
453
454 /**
455 * Delete a performance monitor object.
456 */
457 static void
458 brw_delete_perf_monitor(struct gl_context *ctx, struct gl_perf_monitor_object *m)
459 {
460 struct brw_perf_monitor_object *monitor = brw_perf_monitor(m);
461 DBG("Delete(%d)\n", m->Name);
462 reinitialize_perf_monitor(brw_context(ctx), monitor);
463 free(monitor);
464 }
465
466 /******************************************************************************/
467
468 void
469 brw_init_performance_monitors(struct brw_context *brw)
470 {
471 struct gl_context *ctx = &brw->ctx;
472
473 ctx->Driver.NewPerfMonitor = brw_new_perf_monitor;
474 ctx->Driver.DeletePerfMonitor = brw_delete_perf_monitor;
475 ctx->Driver.BeginPerfMonitor = brw_begin_perf_monitor;
476 ctx->Driver.EndPerfMonitor = brw_end_perf_monitor;
477 ctx->Driver.ResetPerfMonitor = brw_reset_perf_monitor;
478 ctx->Driver.IsPerfMonitorResultAvailable = brw_is_perf_monitor_result_available;
479 ctx->Driver.GetPerfMonitorResult = brw_get_perf_monitor_result;
480
481 if (brw->gen == 5) {
482 ctx->PerfMonitor.Groups = gen5_groups;
483 ctx->PerfMonitor.NumGroups = ARRAY_SIZE(gen5_groups);
484 } else if (brw->gen == 6) {
485 ctx->PerfMonitor.Groups = gen6_groups;
486 ctx->PerfMonitor.NumGroups = ARRAY_SIZE(gen6_groups);
487 brw->perfmon.statistics_registers = gen6_statistics_register_addresses;
488 } else if (brw->gen == 7) {
489 ctx->PerfMonitor.Groups = gen7_groups;
490 ctx->PerfMonitor.NumGroups = ARRAY_SIZE(gen7_groups);
491 brw->perfmon.statistics_registers = gen7_statistics_register_addresses;
492 }
493 }