i965: perf: snapshot RPSTAT register
authorLionel Landwerlin <lionel.g.landwerlin@intel.com>
Wed, 7 Mar 2018 10:46:58 +0000 (10:46 +0000)
committerLionel Landwerlin <lionel.g.landwerlin@intel.com>
Mon, 23 Apr 2018 17:30:10 +0000 (18:30 +0100)
This register contains the current/previous frequency of the GT, it's
one of the value GPA would like to have as part of their queries.

v2: Don't use this register on baytrail/cherryview (Ken)
    Use GET_FIELD() macro (Ken)

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
src/mesa/drivers/dri/i965/brw_defines.h
src/mesa/drivers/dri/i965/brw_performance_query.c
src/mesa/drivers/dri/i965/brw_performance_query.h

index 8bf6f68b67c88ece887c29dbfce3759508eb1bcc..855f1c7d7446c953a61001542d80e3abee389b87 100644 (file)
@@ -1656,6 +1656,18 @@ enum brw_pixel_shader_coverage_mask_mode {
 #define CS_DEBUG_MODE2                     0x20d8 /* Gen9+ */
 # define CSDBG2_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE (1 << 4)
 
+#define GEN7_RPSTAT1                       0xA01C
+#define  GEN7_RPSTAT1_CURR_GT_FREQ_SHIFT   7
+#define  GEN7_RPSTAT1_CURR_GT_FREQ_MASK    INTEL_MASK(13, 7)
+#define  GEN7_RPSTAT1_PREV_GT_FREQ_SHIFT   0
+#define  GEN7_RPSTAT1_PREV_GT_FREQ_MASK    INTEL_MASK(6, 0)
+
+#define GEN9_RPSTAT0                       0xA01C
+#define  GEN9_RPSTAT0_CURR_GT_FREQ_SHIFT   23
+#define  GEN9_RPSTAT0_CURR_GT_FREQ_MASK    INTEL_MASK(31, 23)
+#define  GEN9_RPSTAT0_PREV_GT_FREQ_SHIFT   0
+#define  GEN9_RPSTAT0_PREV_GT_FREQ_MASK    INTEL_MASK(8, 0)
+
 #define SLICE_COMMON_ECO_CHICKEN1          0x731c /* Gen9+ */
 # define GLK_SCEC_BARRIER_MODE_GPGPU       (0 << 7)
 # define GLK_SCEC_BARRIER_MODE_3D_HULL     (1 << 7)
index 44cac85c6e6b70fd76fab274108a2193606d96ad..32cf96a333d693d77fb2f73d341b371131665df5 100644 (file)
@@ -216,6 +216,8 @@ brw_perf_query(struct gl_perf_query_object *o)
 
 #define MI_RPC_BO_SIZE              4096
 #define MI_RPC_BO_END_OFFSET_BYTES  (MI_RPC_BO_SIZE / 2)
+#define MI_FREQ_START_OFFSET_BYTES  (3072)
+#define MI_FREQ_END_OFFSET_BYTES    (3076)
 
 /******************************************************************************/
 
@@ -946,6 +948,21 @@ close_perf(struct brw_context *brw)
    }
 }
 
+static void
+capture_frequency_stat_register(struct brw_context *brw,
+                                struct brw_bo *bo,
+                                uint32_t bo_offset)
+{
+   const struct gen_device_info *devinfo = &brw->screen->devinfo;
+
+   if (devinfo->gen >= 7 && devinfo->gen <= 8 &&
+       !devinfo->is_baytrail && !devinfo->is_cherryview) {
+      brw_store_register_mem32(brw, bo, GEN7_RPSTAT1, bo_offset);
+   } else if (devinfo->gen >= 9) {
+      brw_store_register_mem32(brw, bo, GEN9_RPSTAT0, bo_offset);
+   }
+}
+
 /**
  * Driver hook for glBeginPerfQueryINTEL().
  */
@@ -1138,6 +1155,8 @@ brw_begin_perf_query(struct gl_context *ctx,
       /* Take a starting OA counter snapshot. */
       brw->vtbl.emit_mi_report_perf_count(brw, obj->oa.bo, 0,
                                           obj->oa.begin_report_id);
+      capture_frequency_stat_register(brw, obj->oa.bo, MI_FREQ_START_OFFSET_BYTES);
+
       ++brw->perfquery.n_active_oa_queries;
 
       /* No already-buffered samples can possibly be associated with this query
@@ -1221,6 +1240,7 @@ brw_end_perf_query(struct gl_context *ctx,
        */
       if (!obj->oa.results_accumulated) {
          /* Take an ending OA counter snapshot. */
+         capture_frequency_stat_register(brw, obj->oa.bo, MI_FREQ_END_OFFSET_BYTES);
          brw->vtbl.emit_mi_report_perf_count(brw, obj->oa.bo,
                                              MI_RPC_BO_END_OFFSET_BYTES,
                                              obj->oa.begin_report_id + 1);
@@ -1321,6 +1341,35 @@ brw_is_perf_query_ready(struct gl_context *ctx,
    return false;
 }
 
+static void
+read_gt_frequency(struct brw_context *brw,
+                  struct brw_perf_query_object *obj)
+{
+   const struct gen_device_info *devinfo = &brw->screen->devinfo;
+   uint32_t start = *((uint32_t *)(obj->oa.map + MI_FREQ_START_OFFSET_BYTES)),
+      end = *((uint32_t *)(obj->oa.map + MI_FREQ_END_OFFSET_BYTES));
+
+   switch (devinfo->gen) {
+   case 7:
+   case 8:
+      obj->oa.gt_frequency[0] = GET_FIELD(start, GEN7_RPSTAT1_CURR_GT_FREQ) * 50ULL;
+      obj->oa.gt_frequency[1] = GET_FIELD(end, GEN7_RPSTAT1_CURR_GT_FREQ) * 50ULL;
+      break;
+   case 9:
+   case 10:
+   case 11:
+      obj->oa.gt_frequency[0] = GET_FIELD(start, GEN9_RPSTAT0_CURR_GT_FREQ) * 50ULL / 3ULL;
+      obj->oa.gt_frequency[1] = GET_FIELD(end, GEN9_RPSTAT0_CURR_GT_FREQ) * 50ULL / 3ULL;
+      break;
+   default:
+      unreachable("unexpected gen");
+   }
+
+   /* Put the numbers into Hz. */
+   obj->oa.gt_frequency[0] *= 1000000ULL;
+   obj->oa.gt_frequency[1] *= 1000000ULL;
+}
+
 static int
 get_oa_counter_data(struct brw_context *brw,
                     struct brw_perf_query_object *obj,
@@ -1332,6 +1381,7 @@ get_oa_counter_data(struct brw_context *brw,
    int written = 0;
 
    if (!obj->oa.results_accumulated) {
+      read_gt_frequency(brw, obj);
       accumulate_oa_reports(brw, obj);
       assert(obj->oa.results_accumulated);
 
index f62786f7f1ce74ef035666f91e5b8ab89d34e5de..f8732738b4ed9b449f05f038ed0b6f244a312fa7 100644 (file)
@@ -113,6 +113,11 @@ struct brw_perf_query_object
           * Number of reports accumulated to produce the results.
           */
          uint32_t reports_accumulated;
+
+         /**
+          * Frequency of the GT at begin and end of the query.
+          */
+         uint64_t gt_frequency[2];
       } oa;
 
       struct {