From: Samuel Pitoiset Date: Sat, 29 Aug 2015 08:58:49 +0000 (+0200) Subject: nvc0: change prefix of MP performance counters to HW_SM X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=c8a61ea4fbcb09215a95dc569dba335b766e5d4d;p=mesa.git nvc0: change prefix of MP performance counters to HW_SM According to NVIDIA, local performance counters (MP) are prefixed with SM, while global performance counters (PCOUNTER) are called PM. Signed-off-by: Samuel Pitoiset --- diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c index a2a4a5cb3c2..b13df6a9485 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c @@ -56,10 +56,10 @@ struct nvc0_query { #define NVC0_QUERY_ALLOC_SPACE 256 -static boolean nvc0_mp_pm_query_begin(struct nvc0_context *, +static boolean nvc0_hw_sm_query_begin(struct nvc0_context *, struct nvc0_query *); -static void nvc0_mp_pm_query_end(struct nvc0_context *, struct nvc0_query *); -static boolean nvc0_mp_pm_query_result(struct nvc0_context *, +static void nvc0_hw_sm_query_end(struct nvc0_context *, struct nvc0_query *); +static boolean nvc0_hw_sm_query_result(struct nvc0_context *, struct nvc0_query *, void *, boolean); static inline struct nvc0_query * @@ -159,7 +159,7 @@ nvc0_query_create(struct pipe_context *pipe, unsigned type, unsigned index) } else #endif if (nvc0->screen->base.device->drm_version >= 0x01000101) { - if (type >= NVE4_PM_QUERY(0) && type <= NVE4_PM_QUERY_LAST) { + if (type >= NVE4_HW_SM_QUERY(0) && type <= NVE4_HW_SM_QUERY_LAST) { /* for each MP: * [00] = WS0.C0 * [04] = WS0.C1 @@ -189,7 +189,7 @@ nvc0_query_create(struct pipe_context *pipe, unsigned type, unsigned index) space = (4 * 4 + 4 + 4) * nvc0->screen->mp_count * sizeof(uint32_t); break; } else - if (type >= NVC0_PM_QUERY(0) && type <= NVC0_PM_QUERY_LAST) { + if (type >= NVC0_HW_SM_QUERY(0) && type <= NVC0_HW_SM_QUERY_LAST) { /* for each MP: * [00] = MP.C0 * [04] = MP.C1 @@ -327,9 +327,9 @@ nvc0_query_begin(struct pipe_context *pipe, struct pipe_query *pq) q->u.value = 0; } else #endif - if ((q->type >= NVE4_PM_QUERY(0) && q->type <= NVE4_PM_QUERY_LAST) || - (q->type >= NVC0_PM_QUERY(0) && q->type <= NVC0_PM_QUERY_LAST)) { - ret = nvc0_mp_pm_query_begin(nvc0, q); + if ((q->type >= NVE4_HW_SM_QUERY(0) && q->type <= NVE4_HW_SM_QUERY_LAST) || + (q->type >= NVC0_HW_SM_QUERY(0) && q->type <= NVC0_HW_SM_QUERY_LAST)) { + ret = nvc0_hw_sm_query_begin(nvc0, q); } break; } @@ -412,9 +412,9 @@ nvc0_query_end(struct pipe_context *pipe, struct pipe_query *pq) return; } else #endif - if ((q->type >= NVE4_PM_QUERY(0) && q->type <= NVE4_PM_QUERY_LAST) || - (q->type >= NVC0_PM_QUERY(0) && q->type <= NVC0_PM_QUERY_LAST)) { - nvc0_mp_pm_query_end(nvc0, q); + if ((q->type >= NVE4_HW_SM_QUERY(0) && q->type <= NVE4_HW_SM_QUERY_LAST) || + (q->type >= NVC0_HW_SM_QUERY(0) && q->type <= NVC0_HW_SM_QUERY_LAST)) { + nvc0_hw_sm_query_end(nvc0, q); } break; } @@ -453,9 +453,9 @@ nvc0_query_result(struct pipe_context *pipe, struct pipe_query *pq, return true; } else #endif - if ((q->type >= NVE4_PM_QUERY(0) && q->type <= NVE4_PM_QUERY_LAST) || - (q->type >= NVC0_PM_QUERY(0) && q->type <= NVC0_PM_QUERY_LAST)) { - return nvc0_mp_pm_query_result(nvc0, q, result, wait); + if ((q->type >= NVE4_HW_SM_QUERY(0) && q->type <= NVE4_HW_SM_QUERY_LAST) || + (q->type >= NVC0_HW_SM_QUERY(0) && q->type <= NVC0_HW_SM_QUERY_LAST)) { + return nvc0_hw_sm_query_result(nvc0, q, result, wait); } if (q->state != NVC0_QUERY_STATE_READY) @@ -692,7 +692,7 @@ static const char *nvc0_drv_stat_names[] = * We could add a kernel interface for it, but reading the counters like this * has the advantage of being async (if get_result isn't called immediately). */ -static const uint64_t nve4_read_mp_pm_counters_code[] = +static const uint64_t nve4_read_hw_sm_counters_code[] = { /* sched 0x20 0x20 0x20 0x20 0x20 0x20 0x20 * mov b32 $r8 $tidx @@ -852,7 +852,7 @@ struct nvc0_mp_counter_cfg #define NVC0_COUNTER_OP2_AVG_DIV_MM 5 /* avg(ctr0 / ctr1) */ #define NVC0_COUNTER_OP2_AVG_DIV_M0 6 /* avg(ctr0) / ctr1 of MP[0]) */ -struct nvc0_mp_pm_query_cfg +struct nvc0_hw_sm_query_cfg { struct nvc0_mp_counter_cfg ctr[4]; uint8_t num_counters; @@ -860,17 +860,17 @@ struct nvc0_mp_pm_query_cfg uint8_t norm[2]; /* normalization num,denom */ }; -#define _Q1A(n, f, m, g, s, nu, dn) [NVE4_PM_QUERY_##n] = { { { f, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m, 0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g, s }, {}, {}, {} }, 1, NVC0_COUNTER_OPn_SUM, { nu, dn } } -#define _Q1B(n, f, m, g, s, nu, dn) [NVE4_PM_QUERY_##n] = { { { f, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m, 0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g, s }, {}, {}, {} }, 1, NVC0_COUNTER_OPn_SUM, { nu, dn } } -#define _M2A(n, f0, m0, g0, s0, f1, m1, g1, s1, o, nu, dn) [NVE4_PM_QUERY_METRIC_##n] = { { \ +#define _Q1A(n, f, m, g, s, nu, dn) [NVE4_HW_SM_QUERY_##n] = { { { f, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m, 0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g, s }, {}, {}, {} }, 1, NVC0_COUNTER_OPn_SUM, { nu, dn } } +#define _Q1B(n, f, m, g, s, nu, dn) [NVE4_HW_SM_QUERY_##n] = { { { f, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m, 0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g, s }, {}, {}, {} }, 1, NVC0_COUNTER_OPn_SUM, { nu, dn } } +#define _M2A(n, f0, m0, g0, s0, f1, m1, g1, s1, o, nu, dn) [NVE4_HW_SM_QUERY_METRIC_##n] = { { \ { f0, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m0, 0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g0, s0 }, \ { f1, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m1, 0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g1, s1 }, \ {}, {}, }, 2, NVC0_COUNTER_OP2_##o, { nu, dn } } -#define _M2B(n, f0, m0, g0, s0, f1, m1, g1, s1, o, nu, dn) [NVE4_PM_QUERY_METRIC_##n] = { { \ +#define _M2B(n, f0, m0, g0, s0, f1, m1, g1, s1, o, nu, dn) [NVE4_HW_SM_QUERY_METRIC_##n] = { { \ { f0, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m0, 0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g0, s0 }, \ { f1, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m1, 0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g1, s1 }, \ {}, {}, }, 2, NVC0_COUNTER_OP2_##o, { nu, dn } } -#define _M2AB(n, f0, m0, g0, s0, f1, m1, g1, s1, o, nu, dn) [NVE4_PM_QUERY_METRIC_##n] = { { \ +#define _M2AB(n, f0, m0, g0, s0, f1, m1, g1, s1, o, nu, dn) [NVE4_HW_SM_QUERY_METRIC_##n] = { { \ { f0, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m0, 0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g0, s0 }, \ { f1, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m1, 0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g1, s1 }, \ {}, {}, }, 2, NVC0_COUNTER_OP2_##o, { nu, dn } } @@ -881,7 +881,7 @@ struct nvc0_mp_pm_query_cfg * metric-ipXc: we simply multiply by 4 to account for the 4 warp schedulers; * this is inaccurate ! */ -static const struct nvc0_mp_pm_query_cfg nve4_mp_pm_queries[] = +static const struct nvc0_hw_sm_query_cfg nve4_hw_sm_queries[] = { _Q1B(ACTIVE_CYCLES, 0x0001, B6, WARP, 0x00000000, 1, 1), _Q1B(ACTIVE_WARPS, 0x003f, B6, WARP, 0x31483104, 2, 1), @@ -940,7 +940,7 @@ static const struct nvc0_mp_pm_query_cfg nve4_mp_pm_queries[] = #undef _M2B /* === PERFORMANCE MONITORING COUNTERS for NVC0:NVE4 === */ -static const uint64_t nvc0_read_mp_pm_counters_code[] = +static const uint64_t nvc0_read_hw_sm_counters_code[] = { /* mov b32 $r8 $tidx * mov b32 $r9 $physid @@ -1026,9 +1026,9 @@ static const char *nvc0_pm_query_names[] = "warps_launched", }; -#define _Q(n, f, m, g, c, s0, s1, s2, s3, s4, s5) [NVC0_PM_QUERY_##n] = { { { f, NVC0_COMPUTE_MP_PM_OP_MODE_##m, c, 0, g, s0|(s1 << 8)|(s2 << 16)|(s3 << 24)|(s4##ULL << 32)|(s5##ULL << 40) }, {}, {}, {} }, 1, NVC0_COUNTER_OPn_SUM, { 1, 1 } } +#define _Q(n, f, m, g, c, s0, s1, s2, s3, s4, s5) [NVC0_HW_SM_QUERY_##n] = { { { f, NVC0_COMPUTE_MP_PM_OP_MODE_##m, c, 0, g, s0|(s1 << 8)|(s2 << 16)|(s3 << 24)|(s4##ULL << 32)|(s5##ULL << 40) }, {}, {}, {} }, 1, NVC0_COUNTER_OPn_SUM, { 1, 1 } } -static const struct nvc0_mp_pm_query_cfg nvc0_mp_pm_queries[] = +static const struct nvc0_hw_sm_query_cfg nvc0_hw_sm_queries[] = { _Q(ACTIVE_CYCLES, 0xaaaa, LOGOP, 0x11, 1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00), _Q(ACTIVE_WARPS, 0xaaaa, LOGOP, 0x24, 6, 0x10, 0x21, 0x32, 0x43, 0x54, 0x65), @@ -1065,34 +1065,34 @@ static const struct nvc0_mp_pm_query_cfg nvc0_mp_pm_queries[] = #undef _Q -static const struct nvc0_mp_pm_query_cfg * -nvc0_mp_pm_query_get_cfg(struct nvc0_context *nvc0, struct nvc0_query *q) +static const struct nvc0_hw_sm_query_cfg * +nvc0_hw_sm_query_get_cfg(struct nvc0_context *nvc0, struct nvc0_query *q) { struct nvc0_screen *screen = nvc0->screen; if (screen->base.class_3d >= NVE4_3D_CLASS) - return &nve4_mp_pm_queries[q->type - PIPE_QUERY_DRIVER_SPECIFIC]; - return &nvc0_mp_pm_queries[q->type - NVC0_PM_QUERY(0)]; + return &nve4_hw_sm_queries[q->type - PIPE_QUERY_DRIVER_SPECIFIC]; + return &nvc0_hw_sm_queries[q->type - NVC0_HW_SM_QUERY(0)]; } boolean -nvc0_mp_pm_query_begin(struct nvc0_context *nvc0, struct nvc0_query *q) +nvc0_hw_sm_query_begin(struct nvc0_context *nvc0, struct nvc0_query *q) { struct nvc0_screen *screen = nvc0->screen; struct nouveau_pushbuf *push = nvc0->base.pushbuf; const bool is_nve4 = screen->base.class_3d >= NVE4_3D_CLASS; - const struct nvc0_mp_pm_query_cfg *cfg; + const struct nvc0_hw_sm_query_cfg *cfg; unsigned i, c; unsigned num_ab[2] = { 0, 0 }; - cfg = nvc0_mp_pm_query_get_cfg(nvc0, q); + cfg = nvc0_hw_sm_query_get_cfg(nvc0, q); /* check if we have enough free counter slots */ for (i = 0; i < cfg->num_counters; ++i) num_ab[cfg->ctr[i].sig_dom]++; - if (screen->pm.num_mp_pm_active[0] + num_ab[0] > 4 || - screen->pm.num_mp_pm_active[1] + num_ab[1] > 4) { + if (screen->pm.num_hw_sm_active[0] + num_ab[0] > 4 || + screen->pm.num_hw_sm_active[1] + num_ab[1] > 4) { NOUVEAU_ERR("Not enough free MP counter slots !\n"); return false; } @@ -1113,14 +1113,14 @@ nvc0_mp_pm_query_begin(struct nvc0_context *nvc0, struct nvc0_query *q) for (i = 0; i < cfg->num_counters; ++i) { const unsigned d = cfg->ctr[i].sig_dom; - if (!screen->pm.num_mp_pm_active[d]) { + if (!screen->pm.num_hw_sm_active[d]) { uint32_t m = (1 << 22) | (1 << (7 + (8 * !d))); - if (screen->pm.num_mp_pm_active[!d]) + if (screen->pm.num_hw_sm_active[!d]) m |= 1 << (7 + (8 * d)); BEGIN_NVC0(push, SUBC_SW(0x0600), 1); PUSH_DATA (push, m); } - screen->pm.num_mp_pm_active[d]++; + screen->pm.num_hw_sm_active[d]++; for (c = d * 4; c < (d * 4 + 4); ++c) { if (!screen->pm.mp_counter[c]) { @@ -1163,7 +1163,7 @@ nvc0_mp_pm_query_begin(struct nvc0_context *nvc0, struct nvc0_query *q) } static void -nvc0_mp_pm_query_end(struct nvc0_context *nvc0, struct nvc0_query *q) +nvc0_hw_sm_query_end(struct nvc0_context *nvc0, struct nvc0_query *q) { struct nvc0_screen *screen = nvc0->screen; struct pipe_context *pipe = &nvc0->base.pipe; @@ -1174,9 +1174,9 @@ nvc0_mp_pm_query_end(struct nvc0_context *nvc0, struct nvc0_query *q) const uint block[3] = { 32, is_nve4 ? 4 : 1, 1 }; const uint grid[3] = { screen->mp_count, 1, 1 }; unsigned c; - const struct nvc0_mp_pm_query_cfg *cfg; + const struct nvc0_hw_sm_query_cfg *cfg; - cfg = nvc0_mp_pm_query_get_cfg(nvc0, q); + cfg = nvc0_hw_sm_query_get_cfg(nvc0, q); if (unlikely(!screen->pm.prog)) { struct nvc0_program *prog = CALLOC_STRUCT(nvc0_program); @@ -1185,11 +1185,11 @@ nvc0_mp_pm_query_end(struct nvc0_context *nvc0, struct nvc0_query *q) prog->num_gprs = 14; prog->parm_size = 12; if (is_nve4) { - prog->code = (uint32_t *)nve4_read_mp_pm_counters_code; - prog->code_size = sizeof(nve4_read_mp_pm_counters_code); + prog->code = (uint32_t *)nve4_read_hw_sm_counters_code; + prog->code_size = sizeof(nve4_read_hw_sm_counters_code); } else { - prog->code = (uint32_t *)nvc0_read_mp_pm_counters_code; - prog->code_size = sizeof(nvc0_read_mp_pm_counters_code); + prog->code = (uint32_t *)nvc0_read_hw_sm_counters_code; + prog->code_size = sizeof(nvc0_read_hw_sm_counters_code); } screen->pm.prog = prog; } @@ -1207,7 +1207,7 @@ nvc0_mp_pm_query_end(struct nvc0_context *nvc0, struct nvc0_query *q) /* release counters for this query */ for (c = 0; c < 8; ++c) { if (nvc0_query(screen->pm.mp_counter[c]) == q) { - screen->pm.num_mp_pm_active[c / 4]--; + screen->pm.num_hw_sm_active[c / 4]--; screen->pm.mp_counter[c] = NULL; } } @@ -1234,7 +1234,7 @@ nvc0_mp_pm_query_end(struct nvc0_context *nvc0, struct nvc0_query *q) q = nvc0_query(screen->pm.mp_counter[c]); if (!q) continue; - cfg = nvc0_mp_pm_query_get_cfg(nvc0, q); + cfg = nvc0_hw_sm_query_get_cfg(nvc0, q); for (i = 0; i < cfg->num_counters; ++i) { if (mask & (1 << q->ctr[i])) break; @@ -1250,10 +1250,10 @@ nvc0_mp_pm_query_end(struct nvc0_context *nvc0, struct nvc0_query *q) } static inline bool -nvc0_mp_pm_query_read_data(uint32_t count[32][4], +nvc0_hw_sm_query_read_data(uint32_t count[32][4], struct nvc0_context *nvc0, bool wait, struct nvc0_query *q, - const struct nvc0_mp_pm_query_cfg *cfg, + const struct nvc0_hw_sm_query_cfg *cfg, unsigned mp_count) { unsigned p, c; @@ -1275,10 +1275,10 @@ nvc0_mp_pm_query_read_data(uint32_t count[32][4], } static inline bool -nve4_mp_pm_query_read_data(uint32_t count[32][4], +nve4_hw_sm_query_read_data(uint32_t count[32][4], struct nvc0_context *nvc0, bool wait, struct nvc0_query *q, - const struct nvc0_mp_pm_query_cfg *cfg, + const struct nvc0_hw_sm_query_cfg *cfg, unsigned mp_count) { unsigned p, c, d; @@ -1317,22 +1317,22 @@ nve4_mp_pm_query_read_data(uint32_t count[32][4], * NOTE: Interpretation of IPC requires knowledge of MP count. */ static boolean -nvc0_mp_pm_query_result(struct nvc0_context *nvc0, struct nvc0_query *q, +nvc0_hw_sm_query_result(struct nvc0_context *nvc0, struct nvc0_query *q, void *result, boolean wait) { uint32_t count[32][4]; uint64_t value = 0; unsigned mp_count = MIN2(nvc0->screen->mp_count_compute, 32); unsigned p, c; - const struct nvc0_mp_pm_query_cfg *cfg; + const struct nvc0_hw_sm_query_cfg *cfg; bool ret; - cfg = nvc0_mp_pm_query_get_cfg(nvc0, q); + cfg = nvc0_hw_sm_query_get_cfg(nvc0, q); if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS) - ret = nve4_mp_pm_query_read_data(count, nvc0, wait, q, cfg, mp_count); + ret = nve4_hw_sm_query_read_data(count, nvc0, wait, q, cfg, mp_count); else - ret = nvc0_mp_pm_query_read_data(count, nvc0, wait, q, cfg, mp_count); + ret = nvc0_hw_sm_query_read_data(count, nvc0, wait, q, cfg, mp_count); if (!ret) return false; @@ -1410,11 +1410,11 @@ nvc0_screen_get_driver_query_info(struct pipe_screen *pscreen, if (screen->base.device->drm_version >= 0x01000101) { if (screen->compute) { if (screen->base.class_3d == NVE4_3D_CLASS) { - count += NVE4_PM_QUERY_COUNT; + count += NVE4_HW_SM_QUERY_COUNT; } else if (screen->base.class_3d < NVE4_3D_CLASS) { /* NVC0_COMPUTE is not always enabled */ - count += NVC0_PM_QUERY_COUNT; + count += NVC0_HW_SM_QUERY_COUNT; } } } @@ -1444,15 +1444,15 @@ nvc0_screen_get_driver_query_info(struct pipe_screen *pscreen, if (screen->compute) { if (screen->base.class_3d == NVE4_3D_CLASS) { info->name = nve4_pm_query_names[id - NVC0_QUERY_DRV_STAT_COUNT]; - info->query_type = NVE4_PM_QUERY(id - NVC0_QUERY_DRV_STAT_COUNT); + info->query_type = NVE4_HW_SM_QUERY(id - NVC0_QUERY_DRV_STAT_COUNT); info->max_value.u64 = - (id < NVE4_PM_QUERY_METRIC_MP_OCCUPANCY) ? 0 : 100; + (id < NVE4_HW_SM_QUERY_METRIC_MP_OCCUPANCY) ? 0 : 100; info->group_id = NVC0_QUERY_MP_COUNTER_GROUP; return 1; } else if (screen->base.class_3d < NVE4_3D_CLASS) { info->name = nvc0_pm_query_names[id - NVC0_QUERY_DRV_STAT_COUNT]; - info->query_type = NVC0_PM_QUERY(id - NVC0_QUERY_DRV_STAT_COUNT); + info->query_type = NVC0_HW_SM_QUERY(id - NVC0_QUERY_DRV_STAT_COUNT); info->group_id = NVC0_QUERY_MP_COUNTER_GROUP; return 1; } @@ -1494,7 +1494,7 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen *pscreen, info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_GPU; if (screen->base.class_3d == NVE4_3D_CLASS) { - info->num_queries = NVE4_PM_QUERY_COUNT; + info->num_queries = NVE4_HW_SM_QUERY_COUNT; /* On NVE4+, each multiprocessor have 8 hardware counters separated * in two distinct domains, but we allow only one active query @@ -1504,7 +1504,7 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen *pscreen, return 1; } else if (screen->base.class_3d < NVE4_3D_CLASS) { - info->num_queries = NVC0_PM_QUERY_COUNT; + info->num_queries = NVC0_HW_SM_QUERY_COUNT; /* On NVC0:NVE4, each multiprocessor have 8 hardware counters * in a single domain. */ diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h index 531314f4200..f57a316f01e 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h @@ -95,7 +95,7 @@ struct nvc0_screen { struct { struct nvc0_program *prog; /* compute state object to read MP counters */ struct pipe_query *mp_counter[8]; /* counter to query allocation */ - uint8_t num_mp_pm_active[2]; + uint8_t num_hw_sm_active[2]; bool mp_counters_enabled; } pm; @@ -120,98 +120,98 @@ nvc0_screen(struct pipe_screen *screen) /* Performance counter queries: */ -#define NVE4_PM_QUERY(i) (PIPE_QUERY_DRIVER_SPECIFIC + (i)) -#define NVE4_PM_QUERY_LAST NVE4_PM_QUERY(NVE4_PM_QUERY_COUNT - 1) +#define NVE4_HW_SM_QUERY(i) (PIPE_QUERY_DRIVER_SPECIFIC + (i)) +#define NVE4_HW_SM_QUERY_LAST NVE4_HW_SM_QUERY(NVE4_HW_SM_QUERY_COUNT - 1) enum nve4_pm_queries { - NVE4_PM_QUERY_ACTIVE_CYCLES = 0, - NVE4_PM_QUERY_ACTIVE_WARPS, - NVE4_PM_QUERY_ATOM_COUNT, - NVE4_PM_QUERY_BRANCH, - NVE4_PM_QUERY_DIVERGENT_BRANCH, - NVE4_PM_QUERY_GLD_REQUEST, - NVE4_PM_QUERY_GLD_MEM_DIV_REPLAY, - NVE4_PM_QUERY_GST_TRANSACTIONS, - NVE4_PM_QUERY_GST_MEM_DIV_REPLAY, - NVE4_PM_QUERY_GRED_COUNT, - NVE4_PM_QUERY_GST_REQUEST, - NVE4_PM_QUERY_INST_EXECUTED, - NVE4_PM_QUERY_INST_ISSUED, - NVE4_PM_QUERY_INST_ISSUED1, - NVE4_PM_QUERY_INST_ISSUED2, - NVE4_PM_QUERY_L1_GLD_HIT, - NVE4_PM_QUERY_L1_GLD_MISS, - NVE4_PM_QUERY_L1_LOCAL_LD_HIT, - NVE4_PM_QUERY_L1_LOCAL_LD_MISS, - NVE4_PM_QUERY_L1_LOCAL_ST_HIT, - NVE4_PM_QUERY_L1_LOCAL_ST_MISS, - NVE4_PM_QUERY_L1_SHARED_LD_TRANSACTIONS, - NVE4_PM_QUERY_L1_SHARED_ST_TRANSACTIONS, - NVE4_PM_QUERY_LOCAL_LD, - NVE4_PM_QUERY_LOCAL_LD_TRANSACTIONS, - NVE4_PM_QUERY_LOCAL_ST, - NVE4_PM_QUERY_LOCAL_ST_TRANSACTIONS, - NVE4_PM_QUERY_PROF_TRIGGER_0, - NVE4_PM_QUERY_PROF_TRIGGER_1, - NVE4_PM_QUERY_PROF_TRIGGER_2, - NVE4_PM_QUERY_PROF_TRIGGER_3, - NVE4_PM_QUERY_PROF_TRIGGER_4, - NVE4_PM_QUERY_PROF_TRIGGER_5, - NVE4_PM_QUERY_PROF_TRIGGER_6, - NVE4_PM_QUERY_PROF_TRIGGER_7, - NVE4_PM_QUERY_SHARED_LD, - NVE4_PM_QUERY_SHARED_LD_REPLAY, - NVE4_PM_QUERY_SHARED_ST, - NVE4_PM_QUERY_SHARED_ST_REPLAY, - NVE4_PM_QUERY_SM_CTA_LAUNCHED, - NVE4_PM_QUERY_THREADS_LAUNCHED, - NVE4_PM_QUERY_UNCACHED_GLD_TRANSACTIONS, - NVE4_PM_QUERY_WARPS_LAUNCHED, - NVE4_PM_QUERY_METRIC_IPC, - NVE4_PM_QUERY_METRIC_IPAC, - NVE4_PM_QUERY_METRIC_IPEC, - NVE4_PM_QUERY_METRIC_MP_OCCUPANCY, - NVE4_PM_QUERY_METRIC_MP_EFFICIENCY, - NVE4_PM_QUERY_METRIC_INST_REPLAY_OHEAD, - NVE4_PM_QUERY_COUNT + NVE4_HW_SM_QUERY_ACTIVE_CYCLES = 0, + NVE4_HW_SM_QUERY_ACTIVE_WARPS, + NVE4_HW_SM_QUERY_ATOM_COUNT, + NVE4_HW_SM_QUERY_BRANCH, + NVE4_HW_SM_QUERY_DIVERGENT_BRANCH, + NVE4_HW_SM_QUERY_GLD_REQUEST, + NVE4_HW_SM_QUERY_GLD_MEM_DIV_REPLAY, + NVE4_HW_SM_QUERY_GST_TRANSACTIONS, + NVE4_HW_SM_QUERY_GST_MEM_DIV_REPLAY, + NVE4_HW_SM_QUERY_GRED_COUNT, + NVE4_HW_SM_QUERY_GST_REQUEST, + NVE4_HW_SM_QUERY_INST_EXECUTED, + NVE4_HW_SM_QUERY_INST_ISSUED, + NVE4_HW_SM_QUERY_INST_ISSUED1, + NVE4_HW_SM_QUERY_INST_ISSUED2, + NVE4_HW_SM_QUERY_L1_GLD_HIT, + NVE4_HW_SM_QUERY_L1_GLD_MISS, + NVE4_HW_SM_QUERY_L1_LOCAL_LD_HIT, + NVE4_HW_SM_QUERY_L1_LOCAL_LD_MISS, + NVE4_HW_SM_QUERY_L1_LOCAL_ST_HIT, + NVE4_HW_SM_QUERY_L1_LOCAL_ST_MISS, + NVE4_HW_SM_QUERY_L1_SHARED_LD_TRANSACTIONS, + NVE4_HW_SM_QUERY_L1_SHARED_ST_TRANSACTIONS, + NVE4_HW_SM_QUERY_LOCAL_LD, + NVE4_HW_SM_QUERY_LOCAL_LD_TRANSACTIONS, + NVE4_HW_SM_QUERY_LOCAL_ST, + NVE4_HW_SM_QUERY_LOCAL_ST_TRANSACTIONS, + NVE4_HW_SM_QUERY_PROF_TRIGGER_0, + NVE4_HW_SM_QUERY_PROF_TRIGGER_1, + NVE4_HW_SM_QUERY_PROF_TRIGGER_2, + NVE4_HW_SM_QUERY_PROF_TRIGGER_3, + NVE4_HW_SM_QUERY_PROF_TRIGGER_4, + NVE4_HW_SM_QUERY_PROF_TRIGGER_5, + NVE4_HW_SM_QUERY_PROF_TRIGGER_6, + NVE4_HW_SM_QUERY_PROF_TRIGGER_7, + NVE4_HW_SM_QUERY_SHARED_LD, + NVE4_HW_SM_QUERY_SHARED_LD_REPLAY, + NVE4_HW_SM_QUERY_SHARED_ST, + NVE4_HW_SM_QUERY_SHARED_ST_REPLAY, + NVE4_HW_SM_QUERY_SM_CTA_LAUNCHED, + NVE4_HW_SM_QUERY_THREADS_LAUNCHED, + NVE4_HW_SM_QUERY_UNCACHED_GLD_TRANSACTIONS, + NVE4_HW_SM_QUERY_WARPS_LAUNCHED, + NVE4_HW_SM_QUERY_METRIC_IPC, + NVE4_HW_SM_QUERY_METRIC_IPAC, + NVE4_HW_SM_QUERY_METRIC_IPEC, + NVE4_HW_SM_QUERY_METRIC_MP_OCCUPANCY, + NVE4_HW_SM_QUERY_METRIC_MP_EFFICIENCY, + NVE4_HW_SM_QUERY_METRIC_INST_REPLAY_OHEAD, + NVE4_HW_SM_QUERY_COUNT }; -#define NVC0_PM_QUERY(i) (PIPE_QUERY_DRIVER_SPECIFIC + 2048 + (i)) -#define NVC0_PM_QUERY_LAST NVC0_PM_QUERY(NVC0_PM_QUERY_COUNT - 1) +#define NVC0_HW_SM_QUERY(i) (PIPE_QUERY_DRIVER_SPECIFIC + 2048 + (i)) +#define NVC0_HW_SM_QUERY_LAST NVC0_HW_SM_QUERY(NVC0_HW_SM_QUERY_COUNT - 1) enum nvc0_pm_queries { - NVC0_PM_QUERY_ACTIVE_CYCLES = 0, - NVC0_PM_QUERY_ACTIVE_WARPS, - NVC0_PM_QUERY_ATOM_COUNT, - NVC0_PM_QUERY_BRANCH, - NVC0_PM_QUERY_DIVERGENT_BRANCH, - NVC0_PM_QUERY_GLD_REQUEST, - NVC0_PM_QUERY_GRED_COUNT, - NVC0_PM_QUERY_GST_REQUEST, - NVC0_PM_QUERY_INST_EXECUTED, - NVC0_PM_QUERY_INST_ISSUED1_0, - NVC0_PM_QUERY_INST_ISSUED1_1, - NVC0_PM_QUERY_INST_ISSUED2_0, - NVC0_PM_QUERY_INST_ISSUED2_1, - NVC0_PM_QUERY_LOCAL_LD, - NVC0_PM_QUERY_LOCAL_ST, - NVC0_PM_QUERY_PROF_TRIGGER_0, - NVC0_PM_QUERY_PROF_TRIGGER_1, - NVC0_PM_QUERY_PROF_TRIGGER_2, - NVC0_PM_QUERY_PROF_TRIGGER_3, - NVC0_PM_QUERY_PROF_TRIGGER_4, - NVC0_PM_QUERY_PROF_TRIGGER_5, - NVC0_PM_QUERY_PROF_TRIGGER_6, - NVC0_PM_QUERY_PROF_TRIGGER_7, - NVC0_PM_QUERY_SHARED_LD, - NVC0_PM_QUERY_SHARED_ST, - NVC0_PM_QUERY_THREADS_LAUNCHED, - NVC0_PM_QUERY_TH_INST_EXECUTED_0, - NVC0_PM_QUERY_TH_INST_EXECUTED_1, - NVC0_PM_QUERY_TH_INST_EXECUTED_2, - NVC0_PM_QUERY_TH_INST_EXECUTED_3, - NVC0_PM_QUERY_WARPS_LAUNCHED, - NVC0_PM_QUERY_COUNT + NVC0_HW_SM_QUERY_ACTIVE_CYCLES = 0, + NVC0_HW_SM_QUERY_ACTIVE_WARPS, + NVC0_HW_SM_QUERY_ATOM_COUNT, + NVC0_HW_SM_QUERY_BRANCH, + NVC0_HW_SM_QUERY_DIVERGENT_BRANCH, + NVC0_HW_SM_QUERY_GLD_REQUEST, + NVC0_HW_SM_QUERY_GRED_COUNT, + NVC0_HW_SM_QUERY_GST_REQUEST, + NVC0_HW_SM_QUERY_INST_EXECUTED, + NVC0_HW_SM_QUERY_INST_ISSUED1_0, + NVC0_HW_SM_QUERY_INST_ISSUED1_1, + NVC0_HW_SM_QUERY_INST_ISSUED2_0, + NVC0_HW_SM_QUERY_INST_ISSUED2_1, + NVC0_HW_SM_QUERY_LOCAL_LD, + NVC0_HW_SM_QUERY_LOCAL_ST, + NVC0_HW_SM_QUERY_PROF_TRIGGER_0, + NVC0_HW_SM_QUERY_PROF_TRIGGER_1, + NVC0_HW_SM_QUERY_PROF_TRIGGER_2, + NVC0_HW_SM_QUERY_PROF_TRIGGER_3, + NVC0_HW_SM_QUERY_PROF_TRIGGER_4, + NVC0_HW_SM_QUERY_PROF_TRIGGER_5, + NVC0_HW_SM_QUERY_PROF_TRIGGER_6, + NVC0_HW_SM_QUERY_PROF_TRIGGER_7, + NVC0_HW_SM_QUERY_SHARED_LD, + NVC0_HW_SM_QUERY_SHARED_ST, + NVC0_HW_SM_QUERY_THREADS_LAUNCHED, + NVC0_HW_SM_QUERY_TH_INST_EXECUTED_0, + NVC0_HW_SM_QUERY_TH_INST_EXECUTED_1, + NVC0_HW_SM_QUERY_TH_INST_EXECUTED_2, + NVC0_HW_SM_QUERY_TH_INST_EXECUTED_3, + NVC0_HW_SM_QUERY_WARPS_LAUNCHED, + NVC0_HW_SM_QUERY_COUNT }; /* Driver statistics queries: