nvc0: rework the driver metrics infrastructure
authorSamuel Pitoiset <samuel.pitoiset@gmail.com>
Wed, 9 Mar 2016 22:24:39 +0000 (23:24 +0100)
committerSamuel Pitoiset <samuel.pitoiset@gmail.com>
Thu, 10 Mar 2016 17:20:29 +0000 (18:20 +0100)
This follows the same design as MP perf counters.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Acked-by: Ilia Mirkin <imirkin@alum.mit.edu>
src/gallium/drivers/nouveau/nvc0/nvc0_query.c
src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c
src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.h

index f9f2bbe633fb77f969dd272a57fb57b44a1c51d6..68364328dfa9aa35d067f143eff6aff0dfbd4772 100644 (file)
@@ -230,16 +230,10 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen *pscreen,
    } else
    if (id == NVC0_HW_METRIC_QUERY_GROUP) {
       if (screen->compute) {
-          if (screen->base.class_3d == NVE4_3D_CLASS) {
+          if (screen->base.class_3d <= NVE4_3D_CLASS) {
             info->name = "Performance metrics";
             info->max_active_queries = 1;
-            info->num_queries = NVE4_HW_METRIC_QUERY_COUNT;
-            return 1;
-         } else
-         if (screen->base.class_3d < NVE4_3D_CLASS) {
-            info->name = "Performance metrics";
-            info->max_active_queries = 1;
-            info->num_queries = NVC0_HW_METRIC_QUERY_COUNT;
+            info->num_queries = nvc0_hw_metric_get_num_queries(screen);
             return 1;
          }
       }
index c1085511a85dc0f8176bdb1088d445fbb6429ce3..a01ab3fba4ce1fd924c9db80244e4ab66ae340e1 100644 (file)
 #include "nvc0/nvc0_query_hw_metric.h"
 #include "nvc0/nvc0_query_hw_sm.h"
 
-/* === PERFORMANCE MONITORING METRICS for NVC0:NVE4 === */
-static const char *nvc0_hw_metric_names[] =
-{
-   "metric-achieved_occupancy",
-   "metric-branch_efficiency",
-   "metric-inst_issued",
-   "metric-inst_per_wrap",
-   "metric-inst_replay_overhead",
-   "metric-issued_ipc",
-   "metric-issue_slots",
-   "metric-issue_slot_utilization",
-   "metric-ipc",
+#define _Q(t,n) { NVC0_HW_METRIC_QUERY_##t, n }
+struct {
+   unsigned type;
+   const char *name;
+} nvc0_hw_metric_queries[] = {
+   _Q(ACHIEVED_OCCUPANCY,           "metric-achieved_occupancy"               ),
+   _Q(BRANCH_EFFICIENCY,            "metric-branch_efficiency"                ),
+   _Q(INST_ISSUED,                  "metric-inst_issued"                      ),
+   _Q(INST_PER_WRAP,                "metric-inst_per_wrap"                    ),
+   _Q(INST_REPLAY_OVERHEAD,         "metric-inst_replay_overhead"             ),
+   _Q(ISSUED_IPC,                   "metric-issued_ipc"                       ),
+   _Q(ISSUE_SLOTS,                  "metric-issue_slots"                      ),
+   _Q(ISSUE_SLOT_UTILIZATION,       "metric-issue_slot_utilization"           ),
+   _Q(IPC,                          "metric-ipc"                              ),
+   _Q(SHARED_REPLAY_OVERHEAD,       "metric-shared_replay_overhead"           ),
 };
 
+#undef _Q
+
+static inline const char *
+nvc0_hw_metric_query_get_name(unsigned query_type)
+{
+   unsigned i;
+
+   for (i = 0; i < ARRAY_SIZE(nvc0_hw_metric_queries); i++) {
+      if (nvc0_hw_metric_queries[i].type == query_type)
+         return nvc0_hw_metric_queries[i].name;
+   }
+   assert(0);
+   return NULL;
+}
+
 struct nvc0_hw_metric_query_cfg {
+   unsigned type;
    uint32_t queries[8];
    uint32_t num_queries;
 };
 
 #define _SM(n) NVC0_HW_SM_QUERY(NVC0_HW_SM_QUERY_ ##n)
-#define _M(n, c) [NVC0_HW_METRIC_QUERY_##n] = c
 
 /* ==== Compute capability 2.0 (GF100/GF110) ==== */
 static const struct nvc0_hw_metric_query_cfg
 sm20_achieved_occupancy =
 {
+   .type        = NVC0_HW_METRIC_QUERY_ACHIEVED_OCCUPANCY,
    .queries[0]  = _SM(ACTIVE_WARPS),
    .queries[1]  = _SM(ACTIVE_CYCLES),
    .num_queries = 2,
@@ -58,6 +77,7 @@ sm20_achieved_occupancy =
 static const struct nvc0_hw_metric_query_cfg
 sm20_branch_efficiency =
 {
+   .type        = NVC0_HW_METRIC_QUERY_BRANCH_EFFICIENCY,
    .queries[0]  = _SM(BRANCH),
    .queries[1]  = _SM(DIVERGENT_BRANCH),
    .num_queries = 2,
@@ -66,6 +86,7 @@ sm20_branch_efficiency =
 static const struct nvc0_hw_metric_query_cfg
 sm20_inst_per_wrap =
 {
+   .type        = NVC0_HW_METRIC_QUERY_INST_PER_WRAP,
    .queries[0]  = _SM(INST_EXECUTED),
    .queries[1]  = _SM(WARPS_LAUNCHED),
    .num_queries = 2,
@@ -74,6 +95,7 @@ sm20_inst_per_wrap =
 static const struct nvc0_hw_metric_query_cfg
 sm20_inst_replay_overhead =
 {
+   .type        = NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD,
    .queries[0]  = _SM(INST_ISSUED),
    .queries[1]  = _SM(INST_EXECUTED),
    .num_queries = 2,
@@ -82,6 +104,16 @@ sm20_inst_replay_overhead =
 static const struct nvc0_hw_metric_query_cfg
 sm20_issued_ipc =
 {
+   .type        = NVC0_HW_METRIC_QUERY_ISSUED_IPC,
+   .queries[0]  = _SM(INST_ISSUED),
+   .queries[1]  = _SM(ACTIVE_CYCLES),
+   .num_queries = 2,
+};
+
+static const struct nvc0_hw_metric_query_cfg
+sm20_issue_slot_utilization =
+{
+   .type        = NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION,
    .queries[0]  = _SM(INST_ISSUED),
    .queries[1]  = _SM(ACTIVE_CYCLES),
    .num_queries = 2,
@@ -90,6 +122,7 @@ sm20_issued_ipc =
 static const struct nvc0_hw_metric_query_cfg
 sm20_ipc =
 {
+   .type        = NVC0_HW_METRIC_QUERY_IPC,
    .queries[0]  = _SM(INST_EXECUTED),
    .queries[1]  = _SM(ACTIVE_CYCLES),
    .num_queries = 2,
@@ -97,21 +130,20 @@ sm20_ipc =
 
 static const struct nvc0_hw_metric_query_cfg *sm20_hw_metric_queries[] =
 {
-   _M(ACHIEVED_OCCUPANCY,     &sm20_achieved_occupancy),
-   _M(BRANCH_EFFICIENCY,      &sm20_branch_efficiency),
-   _M(INST_ISSUED,            NULL),
-   _M(INST_PER_WRAP,          &sm20_inst_per_wrap),
-   _M(INST_REPLAY_OVERHEAD,   &sm20_inst_replay_overhead),
-   _M(ISSUED_IPC,             &sm20_issued_ipc),
-   _M(ISSUE_SLOTS,            NULL),
-   _M(ISSUE_SLOT_UTILIZATION, &sm20_issued_ipc),
-   _M(IPC,                    &sm20_ipc),
+   &sm20_achieved_occupancy,
+   &sm20_branch_efficiency,
+   &sm20_inst_per_wrap,
+   &sm20_inst_replay_overhead,
+   &sm20_issued_ipc,
+   &sm20_issue_slot_utilization,
+   &sm20_ipc,
 };
 
 /* ==== Compute capability 2.1 (GF108+ except GF110) ==== */
 static const struct nvc0_hw_metric_query_cfg
 sm21_inst_issued =
 {
+   .type        = NVC0_HW_METRIC_QUERY_INST_ISSUED,
    .queries[0]  = _SM(INST_ISSUED1_0),
    .queries[1]  = _SM(INST_ISSUED1_1),
    .queries[2]  = _SM(INST_ISSUED2_0),
@@ -122,6 +154,7 @@ sm21_inst_issued =
 static const struct nvc0_hw_metric_query_cfg
 sm21_inst_replay_overhead =
 {
+   .type        = NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD,
    .queries[0]  = _SM(INST_ISSUED1_0),
    .queries[1]  = _SM(INST_ISSUED1_1),
    .queries[2]  = _SM(INST_ISSUED2_0),
@@ -133,6 +166,7 @@ sm21_inst_replay_overhead =
 static const struct nvc0_hw_metric_query_cfg
 sm21_issued_ipc =
 {
+   .type        = NVC0_HW_METRIC_QUERY_ISSUED_IPC,
    .queries[0]  = _SM(INST_ISSUED1_0),
    .queries[1]  = _SM(INST_ISSUED1_1),
    .queries[2]  = _SM(INST_ISSUED2_0),
@@ -141,44 +175,36 @@ sm21_issued_ipc =
    .num_queries = 5,
 };
 
-static const struct nvc0_hw_metric_query_cfg *sm21_hw_metric_queries[] =
+static const struct nvc0_hw_metric_query_cfg
+sm21_issue_slot_utilization =
 {
-   _M(ACHIEVED_OCCUPANCY,     &sm20_achieved_occupancy),
-   _M(BRANCH_EFFICIENCY,      &sm20_branch_efficiency),
-   _M(INST_ISSUED,            &sm21_inst_issued),
-   _M(INST_PER_WRAP,          &sm20_inst_per_wrap),
-   _M(INST_REPLAY_OVERHEAD,   &sm21_inst_replay_overhead),
-   _M(ISSUED_IPC,             &sm21_issued_ipc),
-   _M(ISSUE_SLOTS,            &sm21_inst_issued),
-   _M(ISSUE_SLOT_UTILIZATION, &sm21_issued_ipc),
-   _M(IPC,                    &sm20_ipc),
+   .type        = NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION,
+   .queries[0]  = _SM(INST_ISSUED1_0),
+   .queries[1]  = _SM(INST_ISSUED1_1),
+   .queries[2]  = _SM(INST_ISSUED2_0),
+   .queries[3]  = _SM(INST_ISSUED2_1),
+   .queries[4]  = _SM(ACTIVE_CYCLES),
+   .num_queries = 5,
 };
 
-#undef _SM
-#undef _M
-
-/* === PERFORMANCE MONITORING METRICS for NVE4+ === */
-static const char *nve4_hw_metric_names[] =
-{
-   "metric-achieved_occupancy",
-   "metric-branch_efficiency",
-   "metric-inst_issued",
-   "metric-inst_per_wrap",
-   "metric-inst_replay_overhead",
-   "metric-issued_ipc",
-   "metric-issue_slots",
-   "metric-issue_slot_utilization",
-   "metric-ipc",
-   "metric-shared_replay_overhead",
+static const struct nvc0_hw_metric_query_cfg *sm21_hw_metric_queries[] =
+{
+   &sm20_achieved_occupancy,
+   &sm20_branch_efficiency,
+   &sm21_inst_issued,
+   &sm20_inst_per_wrap,
+   &sm21_inst_replay_overhead,
+   &sm21_issued_ipc,
+   &sm21_inst_issued,
+   &sm21_issue_slot_utilization,
+   &sm20_ipc,
 };
 
-#define _SM(n) NVC0_HW_SM_QUERY(NVC0_HW_SM_QUERY_ ##n)
-#define _M(n, c) [NVE4_HW_METRIC_QUERY_##n] = c
-
 /* ==== Compute capability 3.0 (GK104/GK106/GK107) ==== */
 static const struct nvc0_hw_metric_query_cfg
 sm30_achieved_occupancy =
 {
+   .type        = NVC0_HW_METRIC_QUERY_ACHIEVED_OCCUPANCY,
    .queries[0]  = _SM(ACTIVE_WARPS),
    .queries[1]  = _SM(ACTIVE_CYCLES),
    .num_queries = 2,
@@ -187,6 +213,7 @@ sm30_achieved_occupancy =
 static const struct nvc0_hw_metric_query_cfg
 sm30_branch_efficiency =
 {
+   .type        = NVC0_HW_METRIC_QUERY_BRANCH_EFFICIENCY,
    .queries[0]  = _SM(BRANCH),
    .queries[1]  = _SM(DIVERGENT_BRANCH),
    .num_queries = 2,
@@ -195,6 +222,7 @@ sm30_branch_efficiency =
 static const struct nvc0_hw_metric_query_cfg
 sm30_inst_issued =
 {
+   .type        = NVC0_HW_METRIC_QUERY_INST_ISSUED,
    .queries[0]  = _SM(INST_ISSUED1),
    .queries[1]  = _SM(INST_ISSUED2),
    .num_queries = 2,
@@ -203,6 +231,7 @@ sm30_inst_issued =
 static const struct nvc0_hw_metric_query_cfg
 sm30_inst_per_wrap =
 {
+   .type        = NVC0_HW_METRIC_QUERY_INST_PER_WRAP,
    .queries[0]  = _SM(INST_EXECUTED),
    .queries[1]  = _SM(WARPS_LAUNCHED),
    .num_queries = 2,
@@ -211,6 +240,7 @@ sm30_inst_per_wrap =
 static const struct nvc0_hw_metric_query_cfg
 sm30_inst_replay_overhead =
 {
+   .type        = NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD,
    .queries[0]  = _SM(INST_ISSUED1),
    .queries[1]  = _SM(INST_ISSUED2),
    .queries[2]  = _SM(INST_EXECUTED),
@@ -220,6 +250,17 @@ sm30_inst_replay_overhead =
 static const struct nvc0_hw_metric_query_cfg
 sm30_issued_ipc =
 {
+   .type        = NVC0_HW_METRIC_QUERY_ISSUED_IPC,
+   .queries[0]  = _SM(INST_ISSUED1),
+   .queries[1]  = _SM(INST_ISSUED2),
+   .queries[2]  = _SM(ACTIVE_CYCLES),
+   .num_queries = 3,
+};
+
+static const struct nvc0_hw_metric_query_cfg
+sm30_issue_slot_utilization =
+{
+   .type        = NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION,
    .queries[0]  = _SM(INST_ISSUED1),
    .queries[1]  = _SM(INST_ISSUED2),
    .queries[2]  = _SM(ACTIVE_CYCLES),
@@ -229,6 +270,7 @@ sm30_issued_ipc =
 static const struct nvc0_hw_metric_query_cfg
 sm30_ipc =
 {
+   .type        = NVC0_HW_METRIC_QUERY_IPC,
    .queries[0]  = _SM(INST_EXECUTED),
    .queries[1]  = _SM(ACTIVE_CYCLES),
    .num_queries = 2,
@@ -237,6 +279,7 @@ sm30_ipc =
 static const struct nvc0_hw_metric_query_cfg
 sm30_shared_replay_overhead =
 {
+   .type        = NVC0_HW_METRIC_QUERY_SHARED_REPLAY_OVERHEAD,
    .queries[0]  = _SM(SHARED_LD_REPLAY),
    .queries[1]  = _SM(SHARED_ST_REPLAY),
    .queries[2]  = _SM(INST_EXECUTED),
@@ -245,44 +288,71 @@ sm30_shared_replay_overhead =
 
 static const struct nvc0_hw_metric_query_cfg *sm30_hw_metric_queries[] =
 {
-   _M(ACHIEVED_OCCUPANCY,              &sm30_achieved_occupancy),
-   _M(BRANCH_EFFICIENCY,               &sm30_branch_efficiency),
-   _M(INST_ISSUED,                     &sm30_inst_issued),
-   _M(INST_PER_WRAP,                   &sm30_inst_per_wrap),
-   _M(INST_REPLAY_OVERHEAD,            &sm30_inst_replay_overhead),
-   _M(ISSUED_IPC,                      &sm30_issued_ipc),
-   _M(ISSUE_SLOTS,                     &sm30_inst_issued),
-   _M(ISSUE_SLOT_UTILIZATION,          &sm30_issued_ipc),
-   _M(IPC,                             &sm30_ipc),
-   _M(SHARED_REPLAY_OVERHEAD,          &sm30_shared_replay_overhead),
+   &sm30_achieved_occupancy,
+   &sm30_branch_efficiency,
+   &sm30_inst_issued,
+   &sm30_inst_per_wrap,
+   &sm30_inst_replay_overhead,
+   &sm30_issued_ipc,
+   &sm30_inst_issued,
+   &sm30_issue_slot_utilization,
+   &sm30_ipc,
+   &sm30_shared_replay_overhead,
 };
 
 #undef _SM
-#undef _M
 
 static inline const struct nvc0_hw_metric_query_cfg **
 nvc0_hw_metric_get_queries(struct nvc0_screen *screen)
 {
    struct nouveau_device *dev = screen->base.device;
 
-   if (dev->chipset == 0xc0 || dev->chipset == 0xc8)
-      return sm20_hw_metric_queries;
-   return sm21_hw_metric_queries;
+   switch (screen->base.class_3d) {
+   case NVE4_3D_CLASS:
+      return sm30_hw_metric_queries;
+   default:
+      if (dev->chipset == 0xc0 || dev->chipset == 0xc8)
+         return sm20_hw_metric_queries;
+      return sm21_hw_metric_queries;
+   }
+   assert(0);
+   return NULL;
+}
+
+unsigned
+nvc0_hw_metric_get_num_queries(struct nvc0_screen *screen)
+{
+   struct nouveau_device *dev = screen->base.device;
+
+   switch (screen->base.class_3d) {
+   case NVE4_3D_CLASS:
+      return ARRAY_SIZE(sm30_hw_metric_queries);
+   default:
+      if (dev->chipset == 0xc0 || dev->chipset == 0xc8)
+         return ARRAY_SIZE(sm20_hw_metric_queries);
+      return ARRAY_SIZE(sm21_hw_metric_queries);
+   }
+   return 0;
 }
 
 static const struct nvc0_hw_metric_query_cfg *
-nvc0_hw_metric_query_get_cfg(struct nvc0_context *nvc0,
-                             struct nvc0_hw_query *hq)
+nvc0_hw_metric_query_get_cfg(struct nvc0_context *nvc0, struct nvc0_hw_query *hq)
 {
    const struct nvc0_hw_metric_query_cfg **queries;
    struct nvc0_screen *screen = nvc0->screen;
    struct nvc0_query *q = &hq->base;
+   unsigned num_queries;
+   unsigned i;
 
-   if (screen->base.class_3d >= NVE4_3D_CLASS)
-      return sm30_hw_metric_queries[q->type - NVE4_HW_METRIC_QUERY(0)];
-
+   num_queries = nvc0_hw_metric_get_num_queries(screen);
    queries = nvc0_hw_metric_get_queries(screen);
-   return queries[q->type - NVC0_HW_METRIC_QUERY(0)];
+
+   for (i = 0; i < num_queries; i++) {
+      if (NVC0_HW_METRIC_QUERY(queries[i]->type) == q->type)
+         return queries[i];
+   }
+   assert(0);
+   return NULL;
 }
 
 static void
@@ -419,47 +489,47 @@ sm21_hw_metric_calc_result(struct nvc0_hw_query *hq, uint64_t res64[8])
 static uint64_t
 sm30_hw_metric_calc_result(struct nvc0_hw_query *hq, uint64_t res64[8])
 {
-   switch (hq->base.type - NVE4_HW_METRIC_QUERY(0)) {
-   case NVE4_HW_METRIC_QUERY_ACHIEVED_OCCUPANCY:
+   switch (hq->base.type - NVC0_HW_METRIC_QUERY(0)) {
+   case NVC0_HW_METRIC_QUERY_ACHIEVED_OCCUPANCY:
       /* (active_warps / active_cycles) / max. number of warps on a MP */
       if (res64[1])
          return (res64[0] / (double)res64[1]) / 64;
       break;
-   case NVE4_HW_METRIC_QUERY_BRANCH_EFFICIENCY:
+   case NVC0_HW_METRIC_QUERY_BRANCH_EFFICIENCY:
       return sm20_hw_metric_calc_result(hq, res64);
-   case NVE4_HW_METRIC_QUERY_INST_ISSUED:
+   case NVC0_HW_METRIC_QUERY_INST_ISSUED:
       /* inst_issued1 + inst_issued2 * 2 */
       return res64[0] + res64[1] * 2;
-   case NVE4_HW_METRIC_QUERY_INST_PER_WRAP:
+   case NVC0_HW_METRIC_QUERY_INST_PER_WRAP:
       return sm20_hw_metric_calc_result(hq, res64);
-   case NVE4_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD:
+   case NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD:
       /* (metric-inst_issued - inst_executed) / inst_executed */
       if (res64[2])
          return (((res64[0] + res64[1] * 2) - res64[2]) / (double)res64[2]);
       break;
-   case NVE4_HW_METRIC_QUERY_ISSUED_IPC:
+   case NVC0_HW_METRIC_QUERY_ISSUED_IPC:
       /* metric-inst_issued / active_cycles */
       if (res64[2])
          return (res64[0] + res64[1] * 2) / (double)res64[2];
       break;
-   case NVE4_HW_METRIC_QUERY_ISSUE_SLOTS:
+   case NVC0_HW_METRIC_QUERY_ISSUE_SLOTS:
       /* inst_issued1 + inst_issued2 */
       return res64[0] + res64[1];
-   case NVE4_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION:
+   case NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION:
       /* ((metric-issue_slots / 2) / active_cycles) * 100 */
       if (res64[2])
          return (((res64[0] + res64[1]) / 2) / (double)res64[2]) * 100;
       break;
-   case NVE4_HW_METRIC_QUERY_IPC:
+   case NVC0_HW_METRIC_QUERY_IPC:
       return sm20_hw_metric_calc_result(hq, res64);
-   case NVE4_HW_METRIC_QUERY_SHARED_REPLAY_OVERHEAD:
+   case NVC0_HW_METRIC_QUERY_SHARED_REPLAY_OVERHEAD:
       /* (shared_load_replay + shared_store_replay) / inst_executed */
       if (res64[2])
          return (res64[0] + res64[1]) / (double)res64[2];
       break;
    default:
       debug_printf("invalid metric type: %d\n",
-                   hq->base.type - NVE4_HW_METRIC_QUERY(0));
+                   hq->base.type - NVC0_HW_METRIC_QUERY(0));
       break;
    }
    return 0;
@@ -487,13 +557,16 @@ nvc0_hw_metric_get_query_result(struct nvc0_context *nvc0,
       res64[i] = *(uint64_t *)&results[i];
    }
 
-   if (screen->base.class_3d >= NVE4_3D_CLASS) {
+   switch (screen->base.class_3d) {
+   case NVE4_3D_CLASS:
       value = sm30_hw_metric_calc_result(hq, res64);
-   } else {
+      break;
+   default:
       if (dev->chipset == 0xc0 || dev->chipset == 0xc8)
          value = sm20_hw_metric_calc_result(hq, res64);
       else
          value = sm21_hw_metric_calc_result(hq, res64);
+      break;
    }
 
    *(uint64_t *)result = value;
@@ -515,8 +588,7 @@ nvc0_hw_metric_create_query(struct nvc0_context *nvc0, unsigned type)
    struct nvc0_hw_query *hq;
    unsigned i;
 
-   if ((type < NVE4_HW_METRIC_QUERY(0) || type > NVE4_HW_METRIC_QUERY_LAST) &&
-       (type < NVC0_HW_METRIC_QUERY(0) || type > NVC0_HW_METRIC_QUERY_LAST))
+   if (type < NVC0_HW_METRIC_QUERY(0) || type > NVC0_HW_METRIC_QUERY_LAST)
       return NULL;
 
    hmq = CALLOC_STRUCT(nvc0_hw_metric_query);
@@ -541,46 +613,15 @@ nvc0_hw_metric_create_query(struct nvc0_context *nvc0, unsigned type)
    return hq;
 }
 
-static int
-nvc0_hw_metric_get_next_query_id(const struct nvc0_hw_metric_query_cfg **queries,
-                                 unsigned id)
-{
-   unsigned i, next = 0;
-
-   for (i = 0; i < NVC0_HW_METRIC_QUERY_COUNT; i++) {
-      if (!queries[i]) {
-         next++;
-      } else
-      if (i >= id && queries[id + next]) {
-         break;
-      }
-   }
-   return id + next;
-}
-
 int
 nvc0_hw_metric_get_driver_query_info(struct nvc0_screen *screen, unsigned id,
                                      struct pipe_driver_query_info *info)
 {
-   uint16_t class_3d = screen->base.class_3d;
    int count = 0;
 
    if (screen->base.drm->version >= 0x01000101) {
-      if (screen->compute) {
-         if (screen->base.class_3d == NVE4_3D_CLASS) {
-            count += NVE4_HW_METRIC_QUERY_COUNT;
-         } else
-         if (class_3d < NVE4_3D_CLASS) {
-            const struct nvc0_hw_metric_query_cfg **queries =
-               nvc0_hw_metric_get_queries(screen);
-            unsigned i;
-
-            for (i = 0; i < NVC0_HW_METRIC_QUERY_COUNT; i++) {
-               if (queries[i])
-                  count++;
-            }
-         }
-      }
+      if (screen->compute)
+         count = nvc0_hw_metric_get_num_queries(screen);
    }
 
    if (!info)
@@ -588,19 +629,12 @@ nvc0_hw_metric_get_driver_query_info(struct nvc0_screen *screen, unsigned id,
 
    if (id < count) {
       if (screen->compute) {
-         if (screen->base.class_3d == NVE4_3D_CLASS) {
-            info->name = nve4_hw_metric_names[id];
-            info->query_type = NVE4_HW_METRIC_QUERY(id);
-            info->group_id = NVC0_HW_METRIC_QUERY_GROUP;
-            return 1;
-         } else
-         if (class_3d < NVE4_3D_CLASS) {
-             const struct nvc0_hw_metric_query_cfg **queries =
+         if (screen->base.class_3d <= NVE4_3D_CLASS) {
+            const struct nvc0_hw_metric_query_cfg **queries =
                nvc0_hw_metric_get_queries(screen);
 
-            id = nvc0_hw_metric_get_next_query_id(queries, id);
-            info->name = nvc0_hw_metric_names[id];
-            info->query_type = NVC0_HW_METRIC_QUERY(id);
+            info->name = nvc0_hw_metric_query_get_name(queries[id]->type);
+            info->query_type = NVC0_HW_METRIC_QUERY(queries[id]->type);
             info->group_id = NVC0_HW_METRIC_QUERY_GROUP;
             return 1;
          }
index 06cb355db40401ff6affa76b350c713e23fb6403..3203a8ca2b934ea3395e93d164d956c3b30186d8 100644 (file)
@@ -18,24 +18,7 @@ nvc0_hw_metric_query(struct nvc0_hw_query *hq)
 /*
  * Driver metrics queries:
  */
-#define NVE4_HW_METRIC_QUERY(i)   (PIPE_QUERY_DRIVER_SPECIFIC + 3072 + (i))
-#define NVE4_HW_METRIC_QUERY_LAST  NVE4_HW_METRIC_QUERY(NVE4_HW_METRIC_QUERY_COUNT - 1)
-enum nve4_hw_metric_queries
-{
-    NVE4_HW_METRIC_QUERY_ACHIEVED_OCCUPANCY = 0,
-    NVE4_HW_METRIC_QUERY_BRANCH_EFFICIENCY,
-    NVE4_HW_METRIC_QUERY_INST_ISSUED,
-    NVE4_HW_METRIC_QUERY_INST_PER_WRAP,
-    NVE4_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD,
-    NVE4_HW_METRIC_QUERY_ISSUED_IPC,
-    NVE4_HW_METRIC_QUERY_ISSUE_SLOTS,
-    NVE4_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION,
-    NVE4_HW_METRIC_QUERY_IPC,
-    NVE4_HW_METRIC_QUERY_SHARED_REPLAY_OVERHEAD,
-    NVE4_HW_METRIC_QUERY_COUNT
-};
-
-#define NVC0_HW_METRIC_QUERY(i)   (PIPE_QUERY_DRIVER_SPECIFIC + 3072 + (i))
+#define NVC0_HW_METRIC_QUERY(i)   (PIPE_QUERY_DRIVER_SPECIFIC + 2048 + (i))
 #define NVC0_HW_METRIC_QUERY_LAST  NVC0_HW_METRIC_QUERY(NVC0_HW_METRIC_QUERY_COUNT - 1)
 enum nvc0_hw_metric_queries
 {
@@ -48,6 +31,7 @@ enum nvc0_hw_metric_queries
     NVC0_HW_METRIC_QUERY_ISSUE_SLOTS,
     NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION,
     NVC0_HW_METRIC_QUERY_IPC,
+    NVC0_HW_METRIC_QUERY_SHARED_REPLAY_OVERHEAD,
     NVC0_HW_METRIC_QUERY_COUNT
 };
 
@@ -56,4 +40,7 @@ nvc0_hw_metric_create_query(struct nvc0_context *, unsigned);
 int
 nvc0_hw_metric_get_driver_query_info(struct nvc0_screen *, unsigned,
                                      struct pipe_driver_query_info *);
+unsigned
+nvc0_hw_metric_get_num_queries(struct nvc0_screen *);
+
 #endif