i965: Add a function to emit the MI_REPORT_PERF_COUNT packet.
authorKenneth Graunke <kenneth@whitecape.org>
Sun, 3 Nov 2013 01:48:18 +0000 (18:48 -0700)
committerKenneth Graunke <kenneth@whitecape.org>
Thu, 21 Nov 2013 23:01:14 +0000 (15:01 -0800)
MI_REPORT_PERF_COUNT writes a snapshot of the Observability Architecture
counters to a buffer.  Exactly how it works varies between generations:
Ironlake requires two packets, Sandybridge has to use GGTT, and Ivybridge
and later use PPGTT.

v2: Assert that we didn't use more space than we reserved (suggested
    by Eric Anholt).

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
src/mesa/drivers/dri/i965/brw_performance_monitor.c

index 1e04938ad76299ecc20d0126790283ffaab2ac84..e5d214e968861feb74d1647249bdfdc0fefe2e27 100644 (file)
@@ -595,6 +595,82 @@ monitor_needs_oa(struct brw_context *brw,
    return m->ActiveGroups[OA_COUNTERS];
 }
 
+/**
+ * The amount of batch space it takes to emit an MI_REPORT_PERF_COUNT snapshot,
+ * including the required PIPE_CONTROL flushes.
+ *
+ * Sandybridge is the worst case scenario: intel_batchbuffer_emit_mi_flush
+ * expands to three PIPE_CONTROLs which are 4 DWords each.  We have to flush
+ * before and after MI_REPORT_PERF_COUNT, so multiply by two.  Finally, add
+ * the 3 DWords for MI_REPORT_PERF_COUNT itself.
+ */
+#define MI_REPORT_PERF_COUNT_BATCH_DWORDS (2 * (3 * 4) + 3)
+
+/**
+ * Emit an MI_REPORT_PERF_COUNT command packet.
+ *
+ * This writes the current OA counter values to buffer.
+ */
+static void
+emit_mi_report_perf_count(struct brw_context *brw,
+                          drm_intel_bo *bo,
+                          uint32_t offset_in_bytes,
+                          uint32_t report_id)
+{
+   assert(offset_in_bytes % 64 == 0);
+
+   /* Make sure the commands to take a snapshot fits in a single batch. */
+   intel_batchbuffer_require_space(brw, MI_REPORT_PERF_COUNT_BATCH_DWORDS * 4,
+                                   RENDER_RING);
+   int batch_used = brw->batch.used;
+
+   /* Reports apparently don't always get written unless we flush first. */
+   intel_batchbuffer_emit_mi_flush(brw);
+
+   if (brw->gen == 5) {
+      /* Ironlake requires two MI_REPORT_PERF_COUNT commands to write all
+       * the counters.  The report ID is ignored in the second set.
+       */
+      BEGIN_BATCH(6);
+      OUT_BATCH(GEN5_MI_REPORT_PERF_COUNT | GEN5_MI_COUNTER_SET_0);
+      OUT_RELOC(bo,
+                I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+                offset_in_bytes);
+      OUT_BATCH(report_id);
+
+      OUT_BATCH(GEN5_MI_REPORT_PERF_COUNT | GEN5_MI_COUNTER_SET_1);
+      OUT_RELOC(bo,
+                I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+                offset_in_bytes + 64);
+      OUT_BATCH(report_id);
+      ADVANCE_BATCH();
+   } else if (brw->gen == 6) {
+      BEGIN_BATCH(3);
+      OUT_BATCH(GEN6_MI_REPORT_PERF_COUNT);
+      OUT_RELOC(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+                offset_in_bytes | MI_COUNTER_ADDRESS_GTT);
+      OUT_BATCH(report_id);
+      ADVANCE_BATCH();
+   } else if (brw->gen == 7) {
+      BEGIN_BATCH(3);
+      OUT_BATCH(GEN6_MI_REPORT_PERF_COUNT);
+      OUT_RELOC(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+                offset_in_bytes);
+      OUT_BATCH(report_id);
+      ADVANCE_BATCH();
+   } else {
+      assert(!"Unsupported generation for performance counters.");
+   }
+
+   /* Reports apparently don't always get written unless we flush after. */
+   intel_batchbuffer_emit_mi_flush(brw);
+
+   (void) batch_used;
+   assert(brw->batch.used - batch_used <= MI_REPORT_PERF_COUNT_BATCH_DWORDS * 4);
+}
+
+/******************************************************************************/
+
 /**
  * Initialize a monitor to sane starting state; throw away old buffers.
  */