From 524703da5899443ca83cb7aaa587c50d5d74a996 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Wed, 2 Nov 2016 23:57:25 +0100 Subject: [PATCH] nvc0: add new warp_execution_efficiency metric on SM30+ Signed-off-by: Samuel Pitoiset Reviewed-by: Ilia Mirkin --- .../nouveau/nvc0/nvc0_query_hw_metric.c | 23 +++++++++++++++++++ .../nouveau/nvc0/nvc0_query_hw_metric.h | 1 + 2 files changed, 24 insertions(+) diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c index f390ebdb2da..e5034f79b5a 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c @@ -83,6 +83,12 @@ static const struct nvc0_hw_metric_cfg { UINT64, "Average number of replays due to shared memory conflicts for each " "instruction executed"), + + _Q(WARP_EXECUTION_EFFICIENCY, + "metric-warp_execution_efficiency", + PERCENTAGE, + "Ratio of the average active threads per warp to the maximum number of " + "threads per warp supported on a multiprocessor"), }; #undef _Q @@ -314,6 +320,15 @@ sm30_shared_replay_overhead = .num_queries = 3, }; +static const struct nvc0_hw_metric_query_cfg +sm30_warp_execution_efficiency = +{ + .type = NVC0_HW_METRIC_QUERY_WARP_EXECUTION_EFFICIENCY, + .queries[0] = _SM(INST_EXECUTED), + .queries[1] = _SM(TH_INST_EXECUTED), + .num_queries = 2, +}; + static const struct nvc0_hw_metric_query_cfg *sm30_hw_metric_queries[] = { &sm20_achieved_occupancy, @@ -326,6 +341,7 @@ static const struct nvc0_hw_metric_query_cfg *sm30_hw_metric_queries[] = &sm30_issue_slots, &sm30_issue_slot_utilization, &sm30_shared_replay_overhead, + &sm30_warp_execution_efficiency, }; /* ==== Compute capability 3.5 (GK110) ==== */ @@ -340,6 +356,7 @@ static const struct nvc0_hw_metric_query_cfg *sm35_hw_metric_queries[] = &sm30_inst_issued, &sm30_issue_slot_utilization, &sm30_shared_replay_overhead, + &sm30_warp_execution_efficiency, }; #undef _SM @@ -573,6 +590,12 @@ sm30_hw_metric_calc_result(struct nvc0_hw_query *hq, uint64_t res64[8]) if (res64[2]) return (res64[0] + res64[1]) / (double)res64[2]; break; + case NVC0_HW_METRIC_QUERY_WARP_EXECUTION_EFFICIENCY: + /* thread_inst_executed / (inst_executed * max. number of threads per + * wrap) * 100 */ + if (res64[0]) + return (res64[1] / ((double)res64[0] * 32)) * 100; + break; default: debug_printf("invalid metric type: %d\n", hq->base.type - NVC0_HW_METRIC_QUERY(0)); diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.h b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.h index 3203a8ca2b9..c9a54c9493d 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.h +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.h @@ -32,6 +32,7 @@ enum nvc0_hw_metric_queries NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION, NVC0_HW_METRIC_QUERY_IPC, NVC0_HW_METRIC_QUERY_SHARED_REPLAY_OVERHEAD, + NVC0_HW_METRIC_QUERY_WARP_EXECUTION_EFFICIENCY, NVC0_HW_METRIC_QUERY_COUNT }; -- 2.30.2