nvc0: add new warp_execution_efficiency metric on SM30+
authorSamuel Pitoiset <samuel.pitoiset@gmail.com>
Wed, 2 Nov 2016 22:57:25 +0000 (23:57 +0100)
committerSamuel Pitoiset <samuel.pitoiset@gmail.com>
Wed, 2 Nov 2016 22:35:42 +0000 (23:35 +0100)
Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c
src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.h

index f390ebdb2da0fdcf78f1b36090f519d0469502e0..e5034f79b5abba129569642a238a0f6cf6143776 100644 (file)
@@ -83,6 +83,12 @@ static const struct nvc0_hw_metric_cfg {
       UINT64,
       "Average number of replays due to shared memory conflicts for each "
       "instruction executed"),
+
+   _Q(WARP_EXECUTION_EFFICIENCY,
+      "metric-warp_execution_efficiency",
+      PERCENTAGE,
+      "Ratio of the average active threads per warp to the maximum number of "
+      "threads per warp supported on a multiprocessor"),
 };
 
 #undef _Q
@@ -314,6 +320,15 @@ sm30_shared_replay_overhead =
    .num_queries = 3,
 };
 
+static const struct nvc0_hw_metric_query_cfg
+sm30_warp_execution_efficiency =
+{
+   .type        = NVC0_HW_METRIC_QUERY_WARP_EXECUTION_EFFICIENCY,
+   .queries[0]  = _SM(INST_EXECUTED),
+   .queries[1]  = _SM(TH_INST_EXECUTED),
+   .num_queries = 2,
+};
+
 static const struct nvc0_hw_metric_query_cfg *sm30_hw_metric_queries[] =
 {
    &sm20_achieved_occupancy,
@@ -326,6 +341,7 @@ static const struct nvc0_hw_metric_query_cfg *sm30_hw_metric_queries[] =
    &sm30_issue_slots,
    &sm30_issue_slot_utilization,
    &sm30_shared_replay_overhead,
+   &sm30_warp_execution_efficiency,
 };
 
 /* ==== Compute capability 3.5 (GK110) ==== */
@@ -340,6 +356,7 @@ static const struct nvc0_hw_metric_query_cfg *sm35_hw_metric_queries[] =
    &sm30_inst_issued,
    &sm30_issue_slot_utilization,
    &sm30_shared_replay_overhead,
+   &sm30_warp_execution_efficiency,
 };
 
 #undef _SM
@@ -573,6 +590,12 @@ sm30_hw_metric_calc_result(struct nvc0_hw_query *hq, uint64_t res64[8])
       if (res64[2])
          return (res64[0] + res64[1]) / (double)res64[2];
       break;
+   case NVC0_HW_METRIC_QUERY_WARP_EXECUTION_EFFICIENCY:
+      /* thread_inst_executed / (inst_executed * max. number of threads per
+       * wrap) * 100 */
+      if (res64[0])
+         return (res64[1] / ((double)res64[0] * 32)) * 100;
+      break;
    default:
       debug_printf("invalid metric type: %d\n",
                    hq->base.type - NVC0_HW_METRIC_QUERY(0));
index 3203a8ca2b934ea3395e93d164d956c3b30186d8..c9a54c9493dda60bc8798c9ea65c1cc9ef390b2a 100644 (file)
@@ -32,6 +32,7 @@ enum nvc0_hw_metric_queries
     NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION,
     NVC0_HW_METRIC_QUERY_IPC,
     NVC0_HW_METRIC_QUERY_SHARED_REPLAY_OVERHEAD,
+    NVC0_HW_METRIC_QUERY_WARP_EXECUTION_EFFICIENCY,
     NVC0_HW_METRIC_QUERY_COUNT
 };