+ &sm20_achieved_occupancy,
+ &sm20_branch_efficiency,
+ &sm21_inst_issued,
+ &sm20_inst_per_wrap,
+ &sm21_inst_replay_overhead,
+ &sm20_ipc,
+ &sm21_issued_ipc,
+ &sm21_issue_slots,
+ &sm21_issue_slot_utilization,
+};
+
+/* ==== Compute capability 3.0 (GK104/GK106/GK107) ==== */
+static const struct nvc0_hw_metric_query_cfg
+sm30_inst_issued =
+{
+ .type = NVC0_HW_METRIC_QUERY_INST_ISSUED,
+ .queries[0] = _SM(INST_ISSUED1),
+ .queries[1] = _SM(INST_ISSUED2),
+ .num_queries = 2,
+};
+
+static const struct nvc0_hw_metric_query_cfg
+sm30_inst_replay_overhead =
+{
+ .type = NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD,
+ .queries[0] = _SM(INST_ISSUED1),
+ .queries[1] = _SM(INST_ISSUED2),
+ .queries[2] = _SM(INST_EXECUTED),
+ .num_queries = 3,
+};
+
+static const struct nvc0_hw_metric_query_cfg
+sm30_issued_ipc =
+{
+ .type = NVC0_HW_METRIC_QUERY_ISSUED_IPC,
+ .queries[0] = _SM(INST_ISSUED1),
+ .queries[1] = _SM(INST_ISSUED2),
+ .queries[2] = _SM(ACTIVE_CYCLES),
+ .num_queries = 3,
+};
+
+static const struct nvc0_hw_metric_query_cfg
+sm30_issue_slots =
+{
+ .type = NVC0_HW_METRIC_QUERY_ISSUE_SLOTS,
+ .queries[0] = _SM(INST_ISSUED1),
+ .queries[1] = _SM(INST_ISSUED2),
+ .num_queries = 2,
+};
+
+static const struct nvc0_hw_metric_query_cfg
+sm30_issue_slot_utilization =
+{
+ .type = NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION,
+ .queries[0] = _SM(INST_ISSUED1),
+ .queries[1] = _SM(INST_ISSUED2),
+ .queries[2] = _SM(ACTIVE_CYCLES),
+ .num_queries = 3,
+};
+
+static const struct nvc0_hw_metric_query_cfg
+sm30_shared_replay_overhead =
+{
+ .type = NVC0_HW_METRIC_QUERY_SHARED_REPLAY_OVERHEAD,
+ .queries[0] = _SM(SHARED_LD_REPLAY),
+ .queries[1] = _SM(SHARED_ST_REPLAY),
+ .queries[2] = _SM(INST_EXECUTED),
+ .num_queries = 3,
+};
+
+static const struct nvc0_hw_metric_query_cfg
+sm30_warp_execution_efficiency =
+{
+ .type = NVC0_HW_METRIC_QUERY_WARP_EXECUTION_EFFICIENCY,
+ .queries[0] = _SM(INST_EXECUTED),
+ .queries[1] = _SM(TH_INST_EXECUTED),
+ .num_queries = 2,
+};
+
+static const struct nvc0_hw_metric_query_cfg *sm30_hw_metric_queries[] =
+{
+ &sm20_achieved_occupancy,
+ &sm20_branch_efficiency,
+ &sm30_inst_issued,
+ &sm20_inst_per_wrap,
+ &sm30_inst_replay_overhead,
+ &sm20_ipc,
+ &sm30_issued_ipc,
+ &sm30_issue_slots,
+ &sm30_issue_slot_utilization,
+ &sm30_shared_replay_overhead,
+ &sm30_warp_execution_efficiency,
+};
+
+/* ==== Compute capability 3.5 (GK110/GK208) ==== */
+static const struct nvc0_hw_metric_query_cfg
+sm35_warp_nonpred_execution_efficiency =
+{
+ .type = NVC0_HW_METRIC_QUERY_WARP_NONPRED_EXECUTION_EFFICIENCY,
+ .queries[0] = _SM(INST_EXECUTED),
+ .queries[1] = _SM(NOT_PRED_OFF_INST_EXECUTED),
+ .num_queries = 2,
+};
+
+static const struct nvc0_hw_metric_query_cfg *sm35_hw_metric_queries[] =
+{
+ &sm20_achieved_occupancy,
+ &sm30_inst_issued,
+ &sm20_inst_per_wrap,
+ &sm30_inst_replay_overhead,
+ &sm20_ipc,
+ &sm30_issued_ipc,
+ &sm30_issue_slots,
+ &sm30_issue_slot_utilization,
+ &sm30_shared_replay_overhead,
+ &sm30_warp_execution_efficiency,
+ &sm35_warp_nonpred_execution_efficiency,
+};
+
+/* ==== Compute capability 5.0 (GM107/GM108) ==== */
+static const struct nvc0_hw_metric_query_cfg *sm50_hw_metric_queries[] =
+{
+ &sm20_achieved_occupancy,
+ &sm20_branch_efficiency,
+ &sm30_inst_issued,
+ &sm20_inst_per_wrap,
+ &sm30_inst_replay_overhead,
+ &sm20_ipc,
+ &sm30_issued_ipc,
+ &sm30_issue_slots,
+ &sm30_issue_slot_utilization,
+ &sm30_warp_execution_efficiency,
+ &sm35_warp_nonpred_execution_efficiency,