nvc0: sort performance counter queries by name
authorSamuel Pitoiset <samuel.pitoiset@gmail.com>
Fri, 28 Aug 2015 17:09:33 +0000 (19:09 +0200)
committerSamuel Pitoiset <samuel.pitoiset@gmail.com>
Sat, 29 Aug 2015 08:24:50 +0000 (10:24 +0200)
Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
src/gallium/drivers/nouveau/nvc0/nvc0_query.c
src/gallium/drivers/nouveau/nvc0/nvc0_screen.h

index 6672061773370c3b14d0086e6700b93ae8bcdd1f..a2a4a5cb3c23b0f2f1f0aab035eff0b580fff1f7 100644 (file)
@@ -776,6 +776,33 @@ static const uint64_t nve4_read_mp_pm_counters_code[] =
 static const char *nve4_pm_query_names[] =
 {
    /* MP counters */
+   "active_cycles",
+   "active_warps",
+   "atom_count",
+   "branch",
+   "divergent_branch",
+   "gld_request",
+   "global_ld_mem_divergence_replays",
+   "global_store_transaction",
+   "global_st_mem_divergence_replays",
+   "gred_count",
+   "gst_request",
+   "inst_executed",
+   "inst_issued",
+   "inst_issued1",
+   "inst_issued2",
+   "l1_global_load_hit",
+   "l1_global_load_miss",
+   "l1_local_load_hit",
+   "l1_local_load_miss",
+   "l1_local_store_hit",
+   "l1_local_store_miss",
+   "l1_shared_load_transactions",
+   "l1_shared_store_transactions",
+   "local_load",
+   "local_load_transactions",
+   "local_store",
+   "local_store_transactions",
    "prof_trigger_00",
    "prof_trigger_01",
    "prof_trigger_02",
@@ -784,41 +811,14 @@ static const char *nve4_pm_query_names[] =
    "prof_trigger_05",
    "prof_trigger_06",
    "prof_trigger_07",
-   "warps_launched",
-   "threads_launched",
-   "sm_cta_launched",
-   "inst_issued1",
-   "inst_issued2",
-   "inst_executed",
-   "local_load",
-   "local_store",
    "shared_load",
-   "shared_store",
-   "l1_local_load_hit",
-   "l1_local_load_miss",
-   "l1_local_store_hit",
-   "l1_local_store_miss",
-   "gld_request",
-   "gst_request",
-   "l1_global_load_hit",
-   "l1_global_load_miss",
-   "uncached_global_load_transaction",
-   "global_store_transaction",
-   "branch",
-   "divergent_branch",
-   "active_warps",
-   "active_cycles",
-   "inst_issued",
-   "atom_count",
-   "gred_count",
    "shared_load_replay",
+   "shared_store",
    "shared_store_replay",
-   "local_load_transactions",
-   "local_store_transactions",
-   "l1_shared_load_transactions",
-   "l1_shared_store_transactions",
-   "global_ld_mem_divergence_replays",
-   "global_st_mem_divergence_replays",
+   "sm_cta_launched",
+   "threads_launched",
+   "uncached_global_load_transaction",
+   "warps_launched",
    /* metrics, i.e. functions of the MP counters */
    "metric-ipc",                   /* inst_executed, clock */
    "metric-ipac",                  /* inst_executed, active_cycles */
@@ -883,6 +883,33 @@ struct nvc0_mp_pm_query_cfg
  */
 static const struct nvc0_mp_pm_query_cfg nve4_mp_pm_queries[] =
 {
+   _Q1B(ACTIVE_CYCLES, 0x0001, B6, WARP, 0x00000000, 1, 1),
+   _Q1B(ACTIVE_WARPS,  0x003f, B6, WARP, 0x31483104, 2, 1),
+   _Q1A(ATOM_COUNT, 0x0001, B6, BRANCH, 0x00000000, 1, 1),
+   _Q1A(BRANCH,           0x0001, B6, BRANCH, 0x0000000c, 1, 1),
+   _Q1A(DIVERGENT_BRANCH, 0x0001, B6, BRANCH, 0x00000010, 1, 1),
+   _Q1A(GLD_REQUEST, 0x0001, B6, LDST, 0x00000010, 1, 1),
+   _Q1B(GLD_MEM_DIV_REPLAY, 0x0001, B6, REPLAY, 0x00000010, 1, 1),
+   _Q1B(GST_TRANSACTIONS,          0x0001, B6, MEM, 0x00000004, 1, 1),
+   _Q1B(GST_MEM_DIV_REPLAY, 0x0001, B6, REPLAY, 0x00000014, 1, 1),
+   _Q1A(GRED_COUNT, 0x0001, B6, BRANCH, 0x00000008, 1, 1),
+   _Q1A(GST_REQUEST, 0x0001, B6, LDST, 0x00000014, 1, 1),
+   _Q1A(INST_EXECUTED, 0x0003, B6, EXEC,  0x00000398, 1, 1),
+   _Q1A(INST_ISSUED,   0x0003, B6, ISSUE, 0x00000104, 1, 1),
+   _Q1A(INST_ISSUED1,  0x0001, B6, ISSUE, 0x00000004, 1, 1),
+   _Q1A(INST_ISSUED2,  0x0001, B6, ISSUE, 0x00000008, 1, 1),
+   _Q1B(L1_GLD_HIT,  0x0001, B6, L1, 0x00000010, 1, 1),
+   _Q1B(L1_GLD_MISS, 0x0001, B6, L1, 0x00000014, 1, 1),
+   _Q1B(L1_LOCAL_LD_HIT,   0x0001, B6, L1, 0x00000000, 1, 1),
+   _Q1B(L1_LOCAL_LD_MISS,  0x0001, B6, L1, 0x00000004, 1, 1),
+   _Q1B(L1_LOCAL_ST_HIT,  0x0001, B6, L1, 0x00000008, 1, 1),
+   _Q1B(L1_LOCAL_ST_MISS, 0x0001, B6, L1, 0x0000000c, 1, 1),
+   _Q1B(L1_SHARED_LD_TRANSACTIONS, 0x0001, B6, TRANSACTION, 0x00000008, 1, 1),
+   _Q1B(L1_SHARED_ST_TRANSACTIONS, 0x0001, B6, TRANSACTION, 0x0000000c, 1, 1),
+   _Q1A(LOCAL_LD,    0x0001, B6, LDST, 0x00000008, 1, 1),
+   _Q1B(LOCAL_LD_TRANSACTIONS, 0x0001, B6, TRANSACTION, 0x00000000, 1, 1),
+   _Q1A(LOCAL_ST,    0x0001, B6, LDST, 0x0000000c, 1, 1),
+   _Q1B(LOCAL_ST_TRANSACTIONS, 0x0001, B6, TRANSACTION, 0x00000004, 1, 1),
    _Q1A(PROF_TRIGGER_0, 0x0001, B6, USER, 0x00000000, 1, 1),
    _Q1A(PROF_TRIGGER_1, 0x0001, B6, USER, 0x00000004, 1, 1),
    _Q1A(PROF_TRIGGER_2, 0x0001, B6, USER, 0x00000008, 1, 1),
@@ -891,41 +918,14 @@ static const struct nvc0_mp_pm_query_cfg nve4_mp_pm_queries[] =
    _Q1A(PROF_TRIGGER_5, 0x0001, B6, USER, 0x00000014, 1, 1),
    _Q1A(PROF_TRIGGER_6, 0x0001, B6, USER, 0x00000018, 1, 1),
    _Q1A(PROF_TRIGGER_7, 0x0001, B6, USER, 0x0000001c, 1, 1),
-   _Q1A(WARPS_LAUNCHED,    0x0001, B6, LAUNCH, 0x00000004, 1, 1),
-   _Q1A(THREADS_LAUNCHED,  0x003f, B6, LAUNCH, 0x398a4188, 1, 1),
-   _Q1B(SM_CTA_LAUNCHED,      0x0001, B6, WARP, 0x0000001c, 1, 1),
-   _Q1A(INST_ISSUED1,  0x0001, B6, ISSUE, 0x00000004, 1, 1),
-   _Q1A(INST_ISSUED2,  0x0001, B6, ISSUE, 0x00000008, 1, 1),
-   _Q1A(INST_ISSUED,   0x0003, B6, ISSUE, 0x00000104, 1, 1),
-   _Q1A(INST_EXECUTED, 0x0003, B6, EXEC,  0x00000398, 1, 1),
    _Q1A(SHARED_LD,   0x0001, B6, LDST, 0x00000000, 1, 1),
-   _Q1A(SHARED_ST,   0x0001, B6, LDST, 0x00000004, 1, 1),
-   _Q1A(LOCAL_LD,    0x0001, B6, LDST, 0x00000008, 1, 1),
-   _Q1A(LOCAL_ST,    0x0001, B6, LDST, 0x0000000c, 1, 1),
-   _Q1A(GLD_REQUEST, 0x0001, B6, LDST, 0x00000010, 1, 1),
-   _Q1A(GST_REQUEST, 0x0001, B6, LDST, 0x00000014, 1, 1),
-   _Q1B(L1_LOCAL_LD_HIT,   0x0001, B6, L1, 0x00000000, 1, 1),
-   _Q1B(L1_LOCAL_LD_MISS,  0x0001, B6, L1, 0x00000004, 1, 1),
-   _Q1B(L1_LOCAL_ST_HIT,  0x0001, B6, L1, 0x00000008, 1, 1),
-   _Q1B(L1_LOCAL_ST_MISS, 0x0001, B6, L1, 0x0000000c, 1, 1),
-   _Q1B(L1_GLD_HIT,  0x0001, B6, L1, 0x00000010, 1, 1),
-   _Q1B(L1_GLD_MISS, 0x0001, B6, L1, 0x00000014, 1, 1),
-   _Q1B(UNCACHED_GLD_TRANSACTIONS, 0x0001, B6, MEM, 0x00000000, 1, 1),
-   _Q1B(GST_TRANSACTIONS,          0x0001, B6, MEM, 0x00000004, 1, 1),
-   _Q1A(BRANCH,           0x0001, B6, BRANCH, 0x0000000c, 1, 1),
-   _Q1A(DIVERGENT_BRANCH, 0x0001, B6, BRANCH, 0x00000010, 1, 1),
-   _Q1B(ACTIVE_WARPS,  0x003f, B6, WARP, 0x31483104, 2, 1),
-   _Q1B(ACTIVE_CYCLES, 0x0001, B6, WARP, 0x00000000, 1, 1),
-   _Q1A(ATOM_COUNT, 0x0001, B6, BRANCH, 0x00000000, 1, 1),
-   _Q1A(GRED_COUNT, 0x0001, B6, BRANCH, 0x00000008, 1, 1),
    _Q1B(SHARED_LD_REPLAY, 0x0001, B6, REPLAY, 0x00000008, 1, 1),
+   _Q1A(SHARED_ST,   0x0001, B6, LDST, 0x00000004, 1, 1),
    _Q1B(SHARED_ST_REPLAY, 0x0001, B6, REPLAY, 0x0000000c, 1, 1),
-   _Q1B(LOCAL_LD_TRANSACTIONS, 0x0001, B6, TRANSACTION, 0x00000000, 1, 1),
-   _Q1B(LOCAL_ST_TRANSACTIONS, 0x0001, B6, TRANSACTION, 0x00000004, 1, 1),
-   _Q1B(L1_SHARED_LD_TRANSACTIONS, 0x0001, B6, TRANSACTION, 0x00000008, 1, 1),
-   _Q1B(L1_SHARED_ST_TRANSACTIONS, 0x0001, B6, TRANSACTION, 0x0000000c, 1, 1),
-   _Q1B(GLD_MEM_DIV_REPLAY, 0x0001, B6, REPLAY, 0x00000010, 1, 1),
-   _Q1B(GST_MEM_DIV_REPLAY, 0x0001, B6, REPLAY, 0x00000014, 1, 1),
+   _Q1B(SM_CTA_LAUNCHED,      0x0001, B6, WARP, 0x0000001c, 1, 1),
+   _Q1A(THREADS_LAUNCHED,  0x003f, B6, LAUNCH, 0x398a4188, 1, 1),
+   _Q1B(UNCACHED_GLD_TRANSACTIONS, 0x0001, B6, MEM, 0x00000000, 1, 1),
+   _Q1A(WARPS_LAUNCHED,    0x0001, B6, LAUNCH, 0x00000004, 1, 1),
    _M2AB(IPC, 0x3, B6, EXEC, 0x398, 0xffff, LOGOP, WARP, 0x0, DIV_SUM_M0, 10, 1),
    _M2AB(IPAC, 0x3, B6, EXEC, 0x398, 0x1, B6, WARP, 0x0, AVG_DIV_MM, 10, 1),
    _M2A(IPEC, 0x3, B6, EXEC, 0x398, 0xe, LOGOP, EXEC, 0x398, AVG_DIV_MM, 10, 1),
@@ -993,29 +993,21 @@ static const uint64_t nvc0_read_mp_pm_counters_code[] =
 static const char *nvc0_pm_query_names[] =
 {
    /* MP counters */
-   "inst_executed",
-   "branch",
-   "divergent_branch",
-   "active_warps",
    "active_cycles",
-   "warps_launched",
-   "threads_launched",
-   "shared_load",
-   "shared_store",
-   "local_load",
-   "local_store",
-   "gred_count",
+   "active_warps",
    "atom_count",
+   "branch",
+   "divergent_branch",
    "gld_request",
+   "gred_count",
    "gst_request",
+   "inst_executed",
    "inst_issued1_0",
    "inst_issued1_1",
    "inst_issued2_0",
    "inst_issued2_1",
-   "thread_inst_executed_0",
-   "thread_inst_executed_1",
-   "thread_inst_executed_2",
-   "thread_inst_executed_3",
+   "local_load",
+   "local_store",
    "prof_trigger_00",
    "prof_trigger_01",
    "prof_trigger_02",
@@ -1024,35 +1016,35 @@ static const char *nvc0_pm_query_names[] =
    "prof_trigger_05",
    "prof_trigger_06",
    "prof_trigger_07",
+   "shared_load",
+   "shared_store",
+   "threads_launched",
+   "thread_inst_executed_0",
+   "thread_inst_executed_1",
+   "thread_inst_executed_2",
+   "thread_inst_executed_3",
+   "warps_launched",
 };
 
 #define _Q(n, f, m, g, c, s0, s1, s2, s3, s4, s5) [NVC0_PM_QUERY_##n] = { { { f, NVC0_COMPUTE_MP_PM_OP_MODE_##m, c, 0, g, s0|(s1 << 8)|(s2 << 16)|(s3 << 24)|(s4##ULL << 32)|(s5##ULL << 40) }, {}, {}, {} }, 1, NVC0_COUNTER_OPn_SUM, { 1, 1 } }
 
 static const struct nvc0_mp_pm_query_cfg nvc0_mp_pm_queries[] =
 {
-   _Q(INST_EXECUTED,       0xaaaa, LOGOP, 0x2d, 3, 0x00, 0x11, 0x22, 0x00, 0x00, 0x00),
-   _Q(BRANCH,              0xaaaa, LOGOP, 0x1a, 2, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00),
-   _Q(DIVERGENT_BRANCH,    0xaaaa, LOGOP, 0x19, 2, 0x20, 0x31, 0x00, 0x00, 0x00, 0x00),
-   _Q(ACTIVE_WARPS,        0xaaaa, LOGOP, 0x24, 6, 0x10, 0x21, 0x32, 0x43, 0x54, 0x65),
    _Q(ACTIVE_CYCLES,       0xaaaa, LOGOP, 0x11, 1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00),
-   _Q(WARPS_LAUNCHED,      0xaaaa, LOGOP, 0x26, 1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00),
-   _Q(THREADS_LAUNCHED,    0xaaaa, LOGOP, 0x26, 6, 0x10, 0x21, 0x32, 0x43, 0x54, 0x65),
-   _Q(SHARED_LD,           0xaaaa, LOGOP, 0x64, 1, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00),
-   _Q(SHARED_ST,           0xaaaa, LOGOP, 0x64, 1, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00),
-   _Q(LOCAL_LD,            0xaaaa, LOGOP, 0x64, 1, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00),
-   _Q(LOCAL_ST,            0xaaaa, LOGOP, 0x64, 1, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00),
-   _Q(GRED_COUNT,          0xaaaa, LOGOP, 0x63, 1, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00),
+   _Q(ACTIVE_WARPS,        0xaaaa, LOGOP, 0x24, 6, 0x10, 0x21, 0x32, 0x43, 0x54, 0x65),
    _Q(ATOM_COUNT,          0xaaaa, LOGOP, 0x63, 1, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00),
+   _Q(BRANCH,              0xaaaa, LOGOP, 0x1a, 2, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00),
+   _Q(DIVERGENT_BRANCH,    0xaaaa, LOGOP, 0x19, 2, 0x20, 0x31, 0x00, 0x00, 0x00, 0x00),
    _Q(GLD_REQUEST,         0xaaaa, LOGOP, 0x64, 1, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00),
+   _Q(GRED_COUNT,          0xaaaa, LOGOP, 0x63, 1, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00),
    _Q(GST_REQUEST,         0xaaaa, LOGOP, 0x64, 1, 0x60, 0x00, 0x00, 0x00, 0x00, 0x00),
+   _Q(INST_EXECUTED,       0xaaaa, LOGOP, 0x2d, 3, 0x00, 0x11, 0x22, 0x00, 0x00, 0x00),
    _Q(INST_ISSUED1_0,      0xaaaa, LOGOP, 0x7e, 1, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00),
    _Q(INST_ISSUED1_1,      0xaaaa, LOGOP, 0x7e, 1, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00),
    _Q(INST_ISSUED2_0,      0xaaaa, LOGOP, 0x7e, 1, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00),
    _Q(INST_ISSUED2_1,      0xaaaa, LOGOP, 0x7e, 1, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00),
-   _Q(TH_INST_EXECUTED_0,  0xaaaa, LOGOP, 0xa3, 6, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55),
-   _Q(TH_INST_EXECUTED_1,  0xaaaa, LOGOP, 0xa5, 6, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55),
-   _Q(TH_INST_EXECUTED_2,  0xaaaa, LOGOP, 0xa4, 6, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55),
-   _Q(TH_INST_EXECUTED_3,  0xaaaa, LOGOP, 0xa6, 6, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55),
+   _Q(LOCAL_LD,            0xaaaa, LOGOP, 0x64, 1, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00),
+   _Q(LOCAL_ST,            0xaaaa, LOGOP, 0x64, 1, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00),
    _Q(PROF_TRIGGER_0,      0xaaaa, LOGOP, 0x01, 1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00),
    _Q(PROF_TRIGGER_1,      0xaaaa, LOGOP, 0x01, 1, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00),
    _Q(PROF_TRIGGER_2,      0xaaaa, LOGOP, 0x01, 1, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00),
@@ -1061,6 +1053,14 @@ static const struct nvc0_mp_pm_query_cfg nvc0_mp_pm_queries[] =
    _Q(PROF_TRIGGER_5,      0xaaaa, LOGOP, 0x01, 1, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00),
    _Q(PROF_TRIGGER_6,      0xaaaa, LOGOP, 0x01, 1, 0x60, 0x00, 0x00, 0x00, 0x00, 0x00),
    _Q(PROF_TRIGGER_7,      0xaaaa, LOGOP, 0x01, 1, 0x70, 0x00, 0x00, 0x00, 0x00, 0x00),
+   _Q(SHARED_LD,           0xaaaa, LOGOP, 0x64, 1, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00),
+   _Q(SHARED_ST,           0xaaaa, LOGOP, 0x64, 1, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00),
+   _Q(THREADS_LAUNCHED,    0xaaaa, LOGOP, 0x26, 6, 0x10, 0x21, 0x32, 0x43, 0x54, 0x65),
+   _Q(TH_INST_EXECUTED_0,  0xaaaa, LOGOP, 0xa3, 6, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55),
+   _Q(TH_INST_EXECUTED_1,  0xaaaa, LOGOP, 0xa5, 6, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55),
+   _Q(TH_INST_EXECUTED_2,  0xaaaa, LOGOP, 0xa4, 6, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55),
+   _Q(TH_INST_EXECUTED_3,  0xaaaa, LOGOP, 0xa6, 6, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55),
+   _Q(WARPS_LAUNCHED,      0xaaaa, LOGOP, 0x26, 1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00),
 };
 
 #undef _Q
index d689863df5c94990811d4ee5f8f92db07a95c45a..531314f420004119fe6be2538bc9b9735378a1bd 100644 (file)
@@ -124,7 +124,34 @@ nvc0_screen(struct pipe_screen *screen)
 #define NVE4_PM_QUERY_LAST   NVE4_PM_QUERY(NVE4_PM_QUERY_COUNT - 1)
 enum nve4_pm_queries
 {
-    NVE4_PM_QUERY_PROF_TRIGGER_0 = 0,
+    NVE4_PM_QUERY_ACTIVE_CYCLES = 0,
+    NVE4_PM_QUERY_ACTIVE_WARPS,
+    NVE4_PM_QUERY_ATOM_COUNT,
+    NVE4_PM_QUERY_BRANCH,
+    NVE4_PM_QUERY_DIVERGENT_BRANCH,
+    NVE4_PM_QUERY_GLD_REQUEST,
+    NVE4_PM_QUERY_GLD_MEM_DIV_REPLAY,
+    NVE4_PM_QUERY_GST_TRANSACTIONS,
+    NVE4_PM_QUERY_GST_MEM_DIV_REPLAY,
+    NVE4_PM_QUERY_GRED_COUNT,
+    NVE4_PM_QUERY_GST_REQUEST,
+    NVE4_PM_QUERY_INST_EXECUTED,
+    NVE4_PM_QUERY_INST_ISSUED,
+    NVE4_PM_QUERY_INST_ISSUED1,
+    NVE4_PM_QUERY_INST_ISSUED2,
+    NVE4_PM_QUERY_L1_GLD_HIT,
+    NVE4_PM_QUERY_L1_GLD_MISS,
+    NVE4_PM_QUERY_L1_LOCAL_LD_HIT,
+    NVE4_PM_QUERY_L1_LOCAL_LD_MISS,
+    NVE4_PM_QUERY_L1_LOCAL_ST_HIT,
+    NVE4_PM_QUERY_L1_LOCAL_ST_MISS,
+    NVE4_PM_QUERY_L1_SHARED_LD_TRANSACTIONS,
+    NVE4_PM_QUERY_L1_SHARED_ST_TRANSACTIONS,
+    NVE4_PM_QUERY_LOCAL_LD,
+    NVE4_PM_QUERY_LOCAL_LD_TRANSACTIONS,
+    NVE4_PM_QUERY_LOCAL_ST,
+    NVE4_PM_QUERY_LOCAL_ST_TRANSACTIONS,
+    NVE4_PM_QUERY_PROF_TRIGGER_0,
     NVE4_PM_QUERY_PROF_TRIGGER_1,
     NVE4_PM_QUERY_PROF_TRIGGER_2,
     NVE4_PM_QUERY_PROF_TRIGGER_3,
@@ -132,41 +159,14 @@ enum nve4_pm_queries
     NVE4_PM_QUERY_PROF_TRIGGER_5,
     NVE4_PM_QUERY_PROF_TRIGGER_6,
     NVE4_PM_QUERY_PROF_TRIGGER_7,
-    NVE4_PM_QUERY_WARPS_LAUNCHED,
-    NVE4_PM_QUERY_THREADS_LAUNCHED,
-    NVE4_PM_QUERY_SM_CTA_LAUNCHED,
-    NVE4_PM_QUERY_INST_ISSUED1,
-    NVE4_PM_QUERY_INST_ISSUED2,
-    NVE4_PM_QUERY_INST_EXECUTED,
-    NVE4_PM_QUERY_LOCAL_LD,
-    NVE4_PM_QUERY_LOCAL_ST,
     NVE4_PM_QUERY_SHARED_LD,
-    NVE4_PM_QUERY_SHARED_ST,
-    NVE4_PM_QUERY_L1_LOCAL_LD_HIT,
-    NVE4_PM_QUERY_L1_LOCAL_LD_MISS,
-    NVE4_PM_QUERY_L1_LOCAL_ST_HIT,
-    NVE4_PM_QUERY_L1_LOCAL_ST_MISS,
-    NVE4_PM_QUERY_GLD_REQUEST,
-    NVE4_PM_QUERY_GST_REQUEST,
-    NVE4_PM_QUERY_L1_GLD_HIT,
-    NVE4_PM_QUERY_L1_GLD_MISS,
-    NVE4_PM_QUERY_UNCACHED_GLD_TRANSACTIONS,
-    NVE4_PM_QUERY_GST_TRANSACTIONS,
-    NVE4_PM_QUERY_BRANCH,
-    NVE4_PM_QUERY_DIVERGENT_BRANCH,
-    NVE4_PM_QUERY_ACTIVE_WARPS,
-    NVE4_PM_QUERY_ACTIVE_CYCLES,
-    NVE4_PM_QUERY_INST_ISSUED,
-    NVE4_PM_QUERY_ATOM_COUNT,
-    NVE4_PM_QUERY_GRED_COUNT,
     NVE4_PM_QUERY_SHARED_LD_REPLAY,
+    NVE4_PM_QUERY_SHARED_ST,
     NVE4_PM_QUERY_SHARED_ST_REPLAY,
-    NVE4_PM_QUERY_LOCAL_LD_TRANSACTIONS,
-    NVE4_PM_QUERY_LOCAL_ST_TRANSACTIONS,
-    NVE4_PM_QUERY_L1_SHARED_LD_TRANSACTIONS,
-    NVE4_PM_QUERY_L1_SHARED_ST_TRANSACTIONS,
-    NVE4_PM_QUERY_GLD_MEM_DIV_REPLAY,
-    NVE4_PM_QUERY_GST_MEM_DIV_REPLAY,
+    NVE4_PM_QUERY_SM_CTA_LAUNCHED,
+    NVE4_PM_QUERY_THREADS_LAUNCHED,
+    NVE4_PM_QUERY_UNCACHED_GLD_TRANSACTIONS,
+    NVE4_PM_QUERY_WARPS_LAUNCHED,
     NVE4_PM_QUERY_METRIC_IPC,
     NVE4_PM_QUERY_METRIC_IPAC,
     NVE4_PM_QUERY_METRIC_IPEC,
@@ -180,29 +180,21 @@ enum nve4_pm_queries
 #define NVC0_PM_QUERY_LAST   NVC0_PM_QUERY(NVC0_PM_QUERY_COUNT - 1)
 enum nvc0_pm_queries
 {
-    NVC0_PM_QUERY_INST_EXECUTED = 0,
-    NVC0_PM_QUERY_BRANCH,
-    NVC0_PM_QUERY_DIVERGENT_BRANCH,
+    NVC0_PM_QUERY_ACTIVE_CYCLES = 0,
     NVC0_PM_QUERY_ACTIVE_WARPS,
-    NVC0_PM_QUERY_ACTIVE_CYCLES,
-    NVC0_PM_QUERY_WARPS_LAUNCHED,
-    NVC0_PM_QUERY_THREADS_LAUNCHED,
-    NVC0_PM_QUERY_SHARED_LD,
-    NVC0_PM_QUERY_SHARED_ST,
-    NVC0_PM_QUERY_LOCAL_LD,
-    NVC0_PM_QUERY_LOCAL_ST,
-    NVC0_PM_QUERY_GRED_COUNT,
     NVC0_PM_QUERY_ATOM_COUNT,
+    NVC0_PM_QUERY_BRANCH,
+    NVC0_PM_QUERY_DIVERGENT_BRANCH,
     NVC0_PM_QUERY_GLD_REQUEST,
+    NVC0_PM_QUERY_GRED_COUNT,
     NVC0_PM_QUERY_GST_REQUEST,
+    NVC0_PM_QUERY_INST_EXECUTED,
     NVC0_PM_QUERY_INST_ISSUED1_0,
     NVC0_PM_QUERY_INST_ISSUED1_1,
     NVC0_PM_QUERY_INST_ISSUED2_0,
     NVC0_PM_QUERY_INST_ISSUED2_1,
-    NVC0_PM_QUERY_TH_INST_EXECUTED_0,
-    NVC0_PM_QUERY_TH_INST_EXECUTED_1,
-    NVC0_PM_QUERY_TH_INST_EXECUTED_2,
-    NVC0_PM_QUERY_TH_INST_EXECUTED_3,
+    NVC0_PM_QUERY_LOCAL_LD,
+    NVC0_PM_QUERY_LOCAL_ST,
     NVC0_PM_QUERY_PROF_TRIGGER_0,
     NVC0_PM_QUERY_PROF_TRIGGER_1,
     NVC0_PM_QUERY_PROF_TRIGGER_2,
@@ -211,6 +203,14 @@ enum nvc0_pm_queries
     NVC0_PM_QUERY_PROF_TRIGGER_5,
     NVC0_PM_QUERY_PROF_TRIGGER_6,
     NVC0_PM_QUERY_PROF_TRIGGER_7,
+    NVC0_PM_QUERY_SHARED_LD,
+    NVC0_PM_QUERY_SHARED_ST,
+    NVC0_PM_QUERY_THREADS_LAUNCHED,
+    NVC0_PM_QUERY_TH_INST_EXECUTED_0,
+    NVC0_PM_QUERY_TH_INST_EXECUTED_1,
+    NVC0_PM_QUERY_TH_INST_EXECUTED_2,
+    NVC0_PM_QUERY_TH_INST_EXECUTED_3,
+    NVC0_PM_QUERY_WARPS_LAUNCHED,
     NVC0_PM_QUERY_COUNT
 };