#define NVC0_QUERY_ALLOC_SPACE 256
-static boolean nvc0_mp_pm_query_begin(struct nvc0_context *,
+static boolean nvc0_hw_sm_query_begin(struct nvc0_context *,
struct nvc0_query *);
-static void nvc0_mp_pm_query_end(struct nvc0_context *, struct nvc0_query *);
-static boolean nvc0_mp_pm_query_result(struct nvc0_context *,
+static void nvc0_hw_sm_query_end(struct nvc0_context *, struct nvc0_query *);
+static boolean nvc0_hw_sm_query_result(struct nvc0_context *,
struct nvc0_query *, void *, boolean);
static inline struct nvc0_query *
} else
#endif
if (nvc0->screen->base.device->drm_version >= 0x01000101) {
- if (type >= NVE4_PM_QUERY(0) && type <= NVE4_PM_QUERY_LAST) {
+ if (type >= NVE4_HW_SM_QUERY(0) && type <= NVE4_HW_SM_QUERY_LAST) {
/* for each MP:
* [00] = WS0.C0
* [04] = WS0.C1
space = (4 * 4 + 4 + 4) * nvc0->screen->mp_count * sizeof(uint32_t);
break;
} else
- if (type >= NVC0_PM_QUERY(0) && type <= NVC0_PM_QUERY_LAST) {
+ if (type >= NVC0_HW_SM_QUERY(0) && type <= NVC0_HW_SM_QUERY_LAST) {
/* for each MP:
* [00] = MP.C0
* [04] = MP.C1
q->u.value = 0;
} else
#endif
- if ((q->type >= NVE4_PM_QUERY(0) && q->type <= NVE4_PM_QUERY_LAST) ||
- (q->type >= NVC0_PM_QUERY(0) && q->type <= NVC0_PM_QUERY_LAST)) {
- ret = nvc0_mp_pm_query_begin(nvc0, q);
+ if ((q->type >= NVE4_HW_SM_QUERY(0) && q->type <= NVE4_HW_SM_QUERY_LAST) ||
+ (q->type >= NVC0_HW_SM_QUERY(0) && q->type <= NVC0_HW_SM_QUERY_LAST)) {
+ ret = nvc0_hw_sm_query_begin(nvc0, q);
}
break;
}
return;
} else
#endif
- if ((q->type >= NVE4_PM_QUERY(0) && q->type <= NVE4_PM_QUERY_LAST) ||
- (q->type >= NVC0_PM_QUERY(0) && q->type <= NVC0_PM_QUERY_LAST)) {
- nvc0_mp_pm_query_end(nvc0, q);
+ if ((q->type >= NVE4_HW_SM_QUERY(0) && q->type <= NVE4_HW_SM_QUERY_LAST) ||
+ (q->type >= NVC0_HW_SM_QUERY(0) && q->type <= NVC0_HW_SM_QUERY_LAST)) {
+ nvc0_hw_sm_query_end(nvc0, q);
}
break;
}
return true;
} else
#endif
- if ((q->type >= NVE4_PM_QUERY(0) && q->type <= NVE4_PM_QUERY_LAST) ||
- (q->type >= NVC0_PM_QUERY(0) && q->type <= NVC0_PM_QUERY_LAST)) {
- return nvc0_mp_pm_query_result(nvc0, q, result, wait);
+ if ((q->type >= NVE4_HW_SM_QUERY(0) && q->type <= NVE4_HW_SM_QUERY_LAST) ||
+ (q->type >= NVC0_HW_SM_QUERY(0) && q->type <= NVC0_HW_SM_QUERY_LAST)) {
+ return nvc0_hw_sm_query_result(nvc0, q, result, wait);
}
if (q->state != NVC0_QUERY_STATE_READY)
* We could add a kernel interface for it, but reading the counters like this
* has the advantage of being async (if get_result isn't called immediately).
*/
-static const uint64_t nve4_read_mp_pm_counters_code[] =
+static const uint64_t nve4_read_hw_sm_counters_code[] =
{
/* sched 0x20 0x20 0x20 0x20 0x20 0x20 0x20
* mov b32 $r8 $tidx
#define NVC0_COUNTER_OP2_AVG_DIV_MM 5 /* avg(ctr0 / ctr1) */
#define NVC0_COUNTER_OP2_AVG_DIV_M0 6 /* avg(ctr0) / ctr1 of MP[0]) */
-struct nvc0_mp_pm_query_cfg
+struct nvc0_hw_sm_query_cfg
{
struct nvc0_mp_counter_cfg ctr[4];
uint8_t num_counters;
uint8_t norm[2]; /* normalization num,denom */
};
-#define _Q1A(n, f, m, g, s, nu, dn) [NVE4_PM_QUERY_##n] = { { { f, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m, 0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g, s }, {}, {}, {} }, 1, NVC0_COUNTER_OPn_SUM, { nu, dn } }
-#define _Q1B(n, f, m, g, s, nu, dn) [NVE4_PM_QUERY_##n] = { { { f, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m, 0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g, s }, {}, {}, {} }, 1, NVC0_COUNTER_OPn_SUM, { nu, dn } }
-#define _M2A(n, f0, m0, g0, s0, f1, m1, g1, s1, o, nu, dn) [NVE4_PM_QUERY_METRIC_##n] = { { \
+#define _Q1A(n, f, m, g, s, nu, dn) [NVE4_HW_SM_QUERY_##n] = { { { f, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m, 0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g, s }, {}, {}, {} }, 1, NVC0_COUNTER_OPn_SUM, { nu, dn } }
+#define _Q1B(n, f, m, g, s, nu, dn) [NVE4_HW_SM_QUERY_##n] = { { { f, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m, 0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g, s }, {}, {}, {} }, 1, NVC0_COUNTER_OPn_SUM, { nu, dn } }
+#define _M2A(n, f0, m0, g0, s0, f1, m1, g1, s1, o, nu, dn) [NVE4_HW_SM_QUERY_METRIC_##n] = { { \
{ f0, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m0, 0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g0, s0 }, \
{ f1, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m1, 0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g1, s1 }, \
{}, {}, }, 2, NVC0_COUNTER_OP2_##o, { nu, dn } }
-#define _M2B(n, f0, m0, g0, s0, f1, m1, g1, s1, o, nu, dn) [NVE4_PM_QUERY_METRIC_##n] = { { \
+#define _M2B(n, f0, m0, g0, s0, f1, m1, g1, s1, o, nu, dn) [NVE4_HW_SM_QUERY_METRIC_##n] = { { \
{ f0, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m0, 0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g0, s0 }, \
{ f1, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m1, 0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g1, s1 }, \
{}, {}, }, 2, NVC0_COUNTER_OP2_##o, { nu, dn } }
-#define _M2AB(n, f0, m0, g0, s0, f1, m1, g1, s1, o, nu, dn) [NVE4_PM_QUERY_METRIC_##n] = { { \
+#define _M2AB(n, f0, m0, g0, s0, f1, m1, g1, s1, o, nu, dn) [NVE4_HW_SM_QUERY_METRIC_##n] = { { \
{ f0, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m0, 0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g0, s0 }, \
{ f1, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m1, 0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g1, s1 }, \
{}, {}, }, 2, NVC0_COUNTER_OP2_##o, { nu, dn } }
* metric-ipXc: we simply multiply by 4 to account for the 4 warp schedulers;
* this is inaccurate !
*/
-static const struct nvc0_mp_pm_query_cfg nve4_mp_pm_queries[] =
+static const struct nvc0_hw_sm_query_cfg nve4_hw_sm_queries[] =
{
_Q1B(ACTIVE_CYCLES, 0x0001, B6, WARP, 0x00000000, 1, 1),
_Q1B(ACTIVE_WARPS, 0x003f, B6, WARP, 0x31483104, 2, 1),
#undef _M2B
/* === PERFORMANCE MONITORING COUNTERS for NVC0:NVE4 === */
-static const uint64_t nvc0_read_mp_pm_counters_code[] =
+static const uint64_t nvc0_read_hw_sm_counters_code[] =
{
/* mov b32 $r8 $tidx
* mov b32 $r9 $physid
"warps_launched",
};
-#define _Q(n, f, m, g, c, s0, s1, s2, s3, s4, s5) [NVC0_PM_QUERY_##n] = { { { f, NVC0_COMPUTE_MP_PM_OP_MODE_##m, c, 0, g, s0|(s1 << 8)|(s2 << 16)|(s3 << 24)|(s4##ULL << 32)|(s5##ULL << 40) }, {}, {}, {} }, 1, NVC0_COUNTER_OPn_SUM, { 1, 1 } }
+#define _Q(n, f, m, g, c, s0, s1, s2, s3, s4, s5) [NVC0_HW_SM_QUERY_##n] = { { { f, NVC0_COMPUTE_MP_PM_OP_MODE_##m, c, 0, g, s0|(s1 << 8)|(s2 << 16)|(s3 << 24)|(s4##ULL << 32)|(s5##ULL << 40) }, {}, {}, {} }, 1, NVC0_COUNTER_OPn_SUM, { 1, 1 } }
-static const struct nvc0_mp_pm_query_cfg nvc0_mp_pm_queries[] =
+static const struct nvc0_hw_sm_query_cfg nvc0_hw_sm_queries[] =
{
_Q(ACTIVE_CYCLES, 0xaaaa, LOGOP, 0x11, 1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00),
_Q(ACTIVE_WARPS, 0xaaaa, LOGOP, 0x24, 6, 0x10, 0x21, 0x32, 0x43, 0x54, 0x65),
#undef _Q
-static const struct nvc0_mp_pm_query_cfg *
-nvc0_mp_pm_query_get_cfg(struct nvc0_context *nvc0, struct nvc0_query *q)
+static const struct nvc0_hw_sm_query_cfg *
+nvc0_hw_sm_query_get_cfg(struct nvc0_context *nvc0, struct nvc0_query *q)
{
struct nvc0_screen *screen = nvc0->screen;
if (screen->base.class_3d >= NVE4_3D_CLASS)
- return &nve4_mp_pm_queries[q->type - PIPE_QUERY_DRIVER_SPECIFIC];
- return &nvc0_mp_pm_queries[q->type - NVC0_PM_QUERY(0)];
+ return &nve4_hw_sm_queries[q->type - PIPE_QUERY_DRIVER_SPECIFIC];
+ return &nvc0_hw_sm_queries[q->type - NVC0_HW_SM_QUERY(0)];
}
boolean
-nvc0_mp_pm_query_begin(struct nvc0_context *nvc0, struct nvc0_query *q)
+nvc0_hw_sm_query_begin(struct nvc0_context *nvc0, struct nvc0_query *q)
{
struct nvc0_screen *screen = nvc0->screen;
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
const bool is_nve4 = screen->base.class_3d >= NVE4_3D_CLASS;
- const struct nvc0_mp_pm_query_cfg *cfg;
+ const struct nvc0_hw_sm_query_cfg *cfg;
unsigned i, c;
unsigned num_ab[2] = { 0, 0 };
- cfg = nvc0_mp_pm_query_get_cfg(nvc0, q);
+ cfg = nvc0_hw_sm_query_get_cfg(nvc0, q);
/* check if we have enough free counter slots */
for (i = 0; i < cfg->num_counters; ++i)
num_ab[cfg->ctr[i].sig_dom]++;
- if (screen->pm.num_mp_pm_active[0] + num_ab[0] > 4 ||
- screen->pm.num_mp_pm_active[1] + num_ab[1] > 4) {
+ if (screen->pm.num_hw_sm_active[0] + num_ab[0] > 4 ||
+ screen->pm.num_hw_sm_active[1] + num_ab[1] > 4) {
NOUVEAU_ERR("Not enough free MP counter slots !\n");
return false;
}
for (i = 0; i < cfg->num_counters; ++i) {
const unsigned d = cfg->ctr[i].sig_dom;
- if (!screen->pm.num_mp_pm_active[d]) {
+ if (!screen->pm.num_hw_sm_active[d]) {
uint32_t m = (1 << 22) | (1 << (7 + (8 * !d)));
- if (screen->pm.num_mp_pm_active[!d])
+ if (screen->pm.num_hw_sm_active[!d])
m |= 1 << (7 + (8 * d));
BEGIN_NVC0(push, SUBC_SW(0x0600), 1);
PUSH_DATA (push, m);
}
- screen->pm.num_mp_pm_active[d]++;
+ screen->pm.num_hw_sm_active[d]++;
for (c = d * 4; c < (d * 4 + 4); ++c) {
if (!screen->pm.mp_counter[c]) {
}
static void
-nvc0_mp_pm_query_end(struct nvc0_context *nvc0, struct nvc0_query *q)
+nvc0_hw_sm_query_end(struct nvc0_context *nvc0, struct nvc0_query *q)
{
struct nvc0_screen *screen = nvc0->screen;
struct pipe_context *pipe = &nvc0->base.pipe;
const uint block[3] = { 32, is_nve4 ? 4 : 1, 1 };
const uint grid[3] = { screen->mp_count, 1, 1 };
unsigned c;
- const struct nvc0_mp_pm_query_cfg *cfg;
+ const struct nvc0_hw_sm_query_cfg *cfg;
- cfg = nvc0_mp_pm_query_get_cfg(nvc0, q);
+ cfg = nvc0_hw_sm_query_get_cfg(nvc0, q);
if (unlikely(!screen->pm.prog)) {
struct nvc0_program *prog = CALLOC_STRUCT(nvc0_program);
prog->num_gprs = 14;
prog->parm_size = 12;
if (is_nve4) {
- prog->code = (uint32_t *)nve4_read_mp_pm_counters_code;
- prog->code_size = sizeof(nve4_read_mp_pm_counters_code);
+ prog->code = (uint32_t *)nve4_read_hw_sm_counters_code;
+ prog->code_size = sizeof(nve4_read_hw_sm_counters_code);
} else {
- prog->code = (uint32_t *)nvc0_read_mp_pm_counters_code;
- prog->code_size = sizeof(nvc0_read_mp_pm_counters_code);
+ prog->code = (uint32_t *)nvc0_read_hw_sm_counters_code;
+ prog->code_size = sizeof(nvc0_read_hw_sm_counters_code);
}
screen->pm.prog = prog;
}
/* release counters for this query */
for (c = 0; c < 8; ++c) {
if (nvc0_query(screen->pm.mp_counter[c]) == q) {
- screen->pm.num_mp_pm_active[c / 4]--;
+ screen->pm.num_hw_sm_active[c / 4]--;
screen->pm.mp_counter[c] = NULL;
}
}
q = nvc0_query(screen->pm.mp_counter[c]);
if (!q)
continue;
- cfg = nvc0_mp_pm_query_get_cfg(nvc0, q);
+ cfg = nvc0_hw_sm_query_get_cfg(nvc0, q);
for (i = 0; i < cfg->num_counters; ++i) {
if (mask & (1 << q->ctr[i]))
break;
}
static inline bool
-nvc0_mp_pm_query_read_data(uint32_t count[32][4],
+nvc0_hw_sm_query_read_data(uint32_t count[32][4],
struct nvc0_context *nvc0, bool wait,
struct nvc0_query *q,
- const struct nvc0_mp_pm_query_cfg *cfg,
+ const struct nvc0_hw_sm_query_cfg *cfg,
unsigned mp_count)
{
unsigned p, c;
}
static inline bool
-nve4_mp_pm_query_read_data(uint32_t count[32][4],
+nve4_hw_sm_query_read_data(uint32_t count[32][4],
struct nvc0_context *nvc0, bool wait,
struct nvc0_query *q,
- const struct nvc0_mp_pm_query_cfg *cfg,
+ const struct nvc0_hw_sm_query_cfg *cfg,
unsigned mp_count)
{
unsigned p, c, d;
* NOTE: Interpretation of IPC requires knowledge of MP count.
*/
static boolean
-nvc0_mp_pm_query_result(struct nvc0_context *nvc0, struct nvc0_query *q,
+nvc0_hw_sm_query_result(struct nvc0_context *nvc0, struct nvc0_query *q,
void *result, boolean wait)
{
uint32_t count[32][4];
uint64_t value = 0;
unsigned mp_count = MIN2(nvc0->screen->mp_count_compute, 32);
unsigned p, c;
- const struct nvc0_mp_pm_query_cfg *cfg;
+ const struct nvc0_hw_sm_query_cfg *cfg;
bool ret;
- cfg = nvc0_mp_pm_query_get_cfg(nvc0, q);
+ cfg = nvc0_hw_sm_query_get_cfg(nvc0, q);
if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS)
- ret = nve4_mp_pm_query_read_data(count, nvc0, wait, q, cfg, mp_count);
+ ret = nve4_hw_sm_query_read_data(count, nvc0, wait, q, cfg, mp_count);
else
- ret = nvc0_mp_pm_query_read_data(count, nvc0, wait, q, cfg, mp_count);
+ ret = nvc0_hw_sm_query_read_data(count, nvc0, wait, q, cfg, mp_count);
if (!ret)
return false;
if (screen->base.device->drm_version >= 0x01000101) {
if (screen->compute) {
if (screen->base.class_3d == NVE4_3D_CLASS) {
- count += NVE4_PM_QUERY_COUNT;
+ count += NVE4_HW_SM_QUERY_COUNT;
} else
if (screen->base.class_3d < NVE4_3D_CLASS) {
/* NVC0_COMPUTE is not always enabled */
- count += NVC0_PM_QUERY_COUNT;
+ count += NVC0_HW_SM_QUERY_COUNT;
}
}
}
if (screen->compute) {
if (screen->base.class_3d == NVE4_3D_CLASS) {
info->name = nve4_pm_query_names[id - NVC0_QUERY_DRV_STAT_COUNT];
- info->query_type = NVE4_PM_QUERY(id - NVC0_QUERY_DRV_STAT_COUNT);
+ info->query_type = NVE4_HW_SM_QUERY(id - NVC0_QUERY_DRV_STAT_COUNT);
info->max_value.u64 =
- (id < NVE4_PM_QUERY_METRIC_MP_OCCUPANCY) ? 0 : 100;
+ (id < NVE4_HW_SM_QUERY_METRIC_MP_OCCUPANCY) ? 0 : 100;
info->group_id = NVC0_QUERY_MP_COUNTER_GROUP;
return 1;
} else
if (screen->base.class_3d < NVE4_3D_CLASS) {
info->name = nvc0_pm_query_names[id - NVC0_QUERY_DRV_STAT_COUNT];
- info->query_type = NVC0_PM_QUERY(id - NVC0_QUERY_DRV_STAT_COUNT);
+ info->query_type = NVC0_HW_SM_QUERY(id - NVC0_QUERY_DRV_STAT_COUNT);
info->group_id = NVC0_QUERY_MP_COUNTER_GROUP;
return 1;
}
info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_GPU;
if (screen->base.class_3d == NVE4_3D_CLASS) {
- info->num_queries = NVE4_PM_QUERY_COUNT;
+ info->num_queries = NVE4_HW_SM_QUERY_COUNT;
/* On NVE4+, each multiprocessor have 8 hardware counters separated
* in two distinct domains, but we allow only one active query
return 1;
} else
if (screen->base.class_3d < NVE4_3D_CLASS) {
- info->num_queries = NVC0_PM_QUERY_COUNT;
+ info->num_queries = NVC0_HW_SM_QUERY_COUNT;
/* On NVC0:NVE4, each multiprocessor have 8 hardware counters
* in a single domain. */
struct {
struct nvc0_program *prog; /* compute state object to read MP counters */
struct pipe_query *mp_counter[8]; /* counter to query allocation */
- uint8_t num_mp_pm_active[2];
+ uint8_t num_hw_sm_active[2];
bool mp_counters_enabled;
} pm;
/* Performance counter queries:
*/
-#define NVE4_PM_QUERY(i) (PIPE_QUERY_DRIVER_SPECIFIC + (i))
-#define NVE4_PM_QUERY_LAST NVE4_PM_QUERY(NVE4_PM_QUERY_COUNT - 1)
+#define NVE4_HW_SM_QUERY(i) (PIPE_QUERY_DRIVER_SPECIFIC + (i))
+#define NVE4_HW_SM_QUERY_LAST NVE4_HW_SM_QUERY(NVE4_HW_SM_QUERY_COUNT - 1)
enum nve4_pm_queries
{
- NVE4_PM_QUERY_ACTIVE_CYCLES = 0,
- NVE4_PM_QUERY_ACTIVE_WARPS,
- NVE4_PM_QUERY_ATOM_COUNT,
- NVE4_PM_QUERY_BRANCH,
- NVE4_PM_QUERY_DIVERGENT_BRANCH,
- NVE4_PM_QUERY_GLD_REQUEST,
- NVE4_PM_QUERY_GLD_MEM_DIV_REPLAY,
- NVE4_PM_QUERY_GST_TRANSACTIONS,
- NVE4_PM_QUERY_GST_MEM_DIV_REPLAY,
- NVE4_PM_QUERY_GRED_COUNT,
- NVE4_PM_QUERY_GST_REQUEST,
- NVE4_PM_QUERY_INST_EXECUTED,
- NVE4_PM_QUERY_INST_ISSUED,
- NVE4_PM_QUERY_INST_ISSUED1,
- NVE4_PM_QUERY_INST_ISSUED2,
- NVE4_PM_QUERY_L1_GLD_HIT,
- NVE4_PM_QUERY_L1_GLD_MISS,
- NVE4_PM_QUERY_L1_LOCAL_LD_HIT,
- NVE4_PM_QUERY_L1_LOCAL_LD_MISS,
- NVE4_PM_QUERY_L1_LOCAL_ST_HIT,
- NVE4_PM_QUERY_L1_LOCAL_ST_MISS,
- NVE4_PM_QUERY_L1_SHARED_LD_TRANSACTIONS,
- NVE4_PM_QUERY_L1_SHARED_ST_TRANSACTIONS,
- NVE4_PM_QUERY_LOCAL_LD,
- NVE4_PM_QUERY_LOCAL_LD_TRANSACTIONS,
- NVE4_PM_QUERY_LOCAL_ST,
- NVE4_PM_QUERY_LOCAL_ST_TRANSACTIONS,
- NVE4_PM_QUERY_PROF_TRIGGER_0,
- NVE4_PM_QUERY_PROF_TRIGGER_1,
- NVE4_PM_QUERY_PROF_TRIGGER_2,
- NVE4_PM_QUERY_PROF_TRIGGER_3,
- NVE4_PM_QUERY_PROF_TRIGGER_4,
- NVE4_PM_QUERY_PROF_TRIGGER_5,
- NVE4_PM_QUERY_PROF_TRIGGER_6,
- NVE4_PM_QUERY_PROF_TRIGGER_7,
- NVE4_PM_QUERY_SHARED_LD,
- NVE4_PM_QUERY_SHARED_LD_REPLAY,
- NVE4_PM_QUERY_SHARED_ST,
- NVE4_PM_QUERY_SHARED_ST_REPLAY,
- NVE4_PM_QUERY_SM_CTA_LAUNCHED,
- NVE4_PM_QUERY_THREADS_LAUNCHED,
- NVE4_PM_QUERY_UNCACHED_GLD_TRANSACTIONS,
- NVE4_PM_QUERY_WARPS_LAUNCHED,
- NVE4_PM_QUERY_METRIC_IPC,
- NVE4_PM_QUERY_METRIC_IPAC,
- NVE4_PM_QUERY_METRIC_IPEC,
- NVE4_PM_QUERY_METRIC_MP_OCCUPANCY,
- NVE4_PM_QUERY_METRIC_MP_EFFICIENCY,
- NVE4_PM_QUERY_METRIC_INST_REPLAY_OHEAD,
- NVE4_PM_QUERY_COUNT
+ NVE4_HW_SM_QUERY_ACTIVE_CYCLES = 0,
+ NVE4_HW_SM_QUERY_ACTIVE_WARPS,
+ NVE4_HW_SM_QUERY_ATOM_COUNT,
+ NVE4_HW_SM_QUERY_BRANCH,
+ NVE4_HW_SM_QUERY_DIVERGENT_BRANCH,
+ NVE4_HW_SM_QUERY_GLD_REQUEST,
+ NVE4_HW_SM_QUERY_GLD_MEM_DIV_REPLAY,
+ NVE4_HW_SM_QUERY_GST_TRANSACTIONS,
+ NVE4_HW_SM_QUERY_GST_MEM_DIV_REPLAY,
+ NVE4_HW_SM_QUERY_GRED_COUNT,
+ NVE4_HW_SM_QUERY_GST_REQUEST,
+ NVE4_HW_SM_QUERY_INST_EXECUTED,
+ NVE4_HW_SM_QUERY_INST_ISSUED,
+ NVE4_HW_SM_QUERY_INST_ISSUED1,
+ NVE4_HW_SM_QUERY_INST_ISSUED2,
+ NVE4_HW_SM_QUERY_L1_GLD_HIT,
+ NVE4_HW_SM_QUERY_L1_GLD_MISS,
+ NVE4_HW_SM_QUERY_L1_LOCAL_LD_HIT,
+ NVE4_HW_SM_QUERY_L1_LOCAL_LD_MISS,
+ NVE4_HW_SM_QUERY_L1_LOCAL_ST_HIT,
+ NVE4_HW_SM_QUERY_L1_LOCAL_ST_MISS,
+ NVE4_HW_SM_QUERY_L1_SHARED_LD_TRANSACTIONS,
+ NVE4_HW_SM_QUERY_L1_SHARED_ST_TRANSACTIONS,
+ NVE4_HW_SM_QUERY_LOCAL_LD,
+ NVE4_HW_SM_QUERY_LOCAL_LD_TRANSACTIONS,
+ NVE4_HW_SM_QUERY_LOCAL_ST,
+ NVE4_HW_SM_QUERY_LOCAL_ST_TRANSACTIONS,
+ NVE4_HW_SM_QUERY_PROF_TRIGGER_0,
+ NVE4_HW_SM_QUERY_PROF_TRIGGER_1,
+ NVE4_HW_SM_QUERY_PROF_TRIGGER_2,
+ NVE4_HW_SM_QUERY_PROF_TRIGGER_3,
+ NVE4_HW_SM_QUERY_PROF_TRIGGER_4,
+ NVE4_HW_SM_QUERY_PROF_TRIGGER_5,
+ NVE4_HW_SM_QUERY_PROF_TRIGGER_6,
+ NVE4_HW_SM_QUERY_PROF_TRIGGER_7,
+ NVE4_HW_SM_QUERY_SHARED_LD,
+ NVE4_HW_SM_QUERY_SHARED_LD_REPLAY,
+ NVE4_HW_SM_QUERY_SHARED_ST,
+ NVE4_HW_SM_QUERY_SHARED_ST_REPLAY,
+ NVE4_HW_SM_QUERY_SM_CTA_LAUNCHED,
+ NVE4_HW_SM_QUERY_THREADS_LAUNCHED,
+ NVE4_HW_SM_QUERY_UNCACHED_GLD_TRANSACTIONS,
+ NVE4_HW_SM_QUERY_WARPS_LAUNCHED,
+ NVE4_HW_SM_QUERY_METRIC_IPC,
+ NVE4_HW_SM_QUERY_METRIC_IPAC,
+ NVE4_HW_SM_QUERY_METRIC_IPEC,
+ NVE4_HW_SM_QUERY_METRIC_MP_OCCUPANCY,
+ NVE4_HW_SM_QUERY_METRIC_MP_EFFICIENCY,
+ NVE4_HW_SM_QUERY_METRIC_INST_REPLAY_OHEAD,
+ NVE4_HW_SM_QUERY_COUNT
};
-#define NVC0_PM_QUERY(i) (PIPE_QUERY_DRIVER_SPECIFIC + 2048 + (i))
-#define NVC0_PM_QUERY_LAST NVC0_PM_QUERY(NVC0_PM_QUERY_COUNT - 1)
+#define NVC0_HW_SM_QUERY(i) (PIPE_QUERY_DRIVER_SPECIFIC + 2048 + (i))
+#define NVC0_HW_SM_QUERY_LAST NVC0_HW_SM_QUERY(NVC0_HW_SM_QUERY_COUNT - 1)
enum nvc0_pm_queries
{
- NVC0_PM_QUERY_ACTIVE_CYCLES = 0,
- NVC0_PM_QUERY_ACTIVE_WARPS,
- NVC0_PM_QUERY_ATOM_COUNT,
- NVC0_PM_QUERY_BRANCH,
- NVC0_PM_QUERY_DIVERGENT_BRANCH,
- NVC0_PM_QUERY_GLD_REQUEST,
- NVC0_PM_QUERY_GRED_COUNT,
- NVC0_PM_QUERY_GST_REQUEST,
- NVC0_PM_QUERY_INST_EXECUTED,
- NVC0_PM_QUERY_INST_ISSUED1_0,
- NVC0_PM_QUERY_INST_ISSUED1_1,
- NVC0_PM_QUERY_INST_ISSUED2_0,
- NVC0_PM_QUERY_INST_ISSUED2_1,
- NVC0_PM_QUERY_LOCAL_LD,
- NVC0_PM_QUERY_LOCAL_ST,
- NVC0_PM_QUERY_PROF_TRIGGER_0,
- NVC0_PM_QUERY_PROF_TRIGGER_1,
- NVC0_PM_QUERY_PROF_TRIGGER_2,
- NVC0_PM_QUERY_PROF_TRIGGER_3,
- NVC0_PM_QUERY_PROF_TRIGGER_4,
- NVC0_PM_QUERY_PROF_TRIGGER_5,
- NVC0_PM_QUERY_PROF_TRIGGER_6,
- NVC0_PM_QUERY_PROF_TRIGGER_7,
- NVC0_PM_QUERY_SHARED_LD,
- NVC0_PM_QUERY_SHARED_ST,
- NVC0_PM_QUERY_THREADS_LAUNCHED,
- NVC0_PM_QUERY_TH_INST_EXECUTED_0,
- NVC0_PM_QUERY_TH_INST_EXECUTED_1,
- NVC0_PM_QUERY_TH_INST_EXECUTED_2,
- NVC0_PM_QUERY_TH_INST_EXECUTED_3,
- NVC0_PM_QUERY_WARPS_LAUNCHED,
- NVC0_PM_QUERY_COUNT
+ NVC0_HW_SM_QUERY_ACTIVE_CYCLES = 0,
+ NVC0_HW_SM_QUERY_ACTIVE_WARPS,
+ NVC0_HW_SM_QUERY_ATOM_COUNT,
+ NVC0_HW_SM_QUERY_BRANCH,
+ NVC0_HW_SM_QUERY_DIVERGENT_BRANCH,
+ NVC0_HW_SM_QUERY_GLD_REQUEST,
+ NVC0_HW_SM_QUERY_GRED_COUNT,
+ NVC0_HW_SM_QUERY_GST_REQUEST,
+ NVC0_HW_SM_QUERY_INST_EXECUTED,
+ NVC0_HW_SM_QUERY_INST_ISSUED1_0,
+ NVC0_HW_SM_QUERY_INST_ISSUED1_1,
+ NVC0_HW_SM_QUERY_INST_ISSUED2_0,
+ NVC0_HW_SM_QUERY_INST_ISSUED2_1,
+ NVC0_HW_SM_QUERY_LOCAL_LD,
+ NVC0_HW_SM_QUERY_LOCAL_ST,
+ NVC0_HW_SM_QUERY_PROF_TRIGGER_0,
+ NVC0_HW_SM_QUERY_PROF_TRIGGER_1,
+ NVC0_HW_SM_QUERY_PROF_TRIGGER_2,
+ NVC0_HW_SM_QUERY_PROF_TRIGGER_3,
+ NVC0_HW_SM_QUERY_PROF_TRIGGER_4,
+ NVC0_HW_SM_QUERY_PROF_TRIGGER_5,
+ NVC0_HW_SM_QUERY_PROF_TRIGGER_6,
+ NVC0_HW_SM_QUERY_PROF_TRIGGER_7,
+ NVC0_HW_SM_QUERY_SHARED_LD,
+ NVC0_HW_SM_QUERY_SHARED_ST,
+ NVC0_HW_SM_QUERY_THREADS_LAUNCHED,
+ NVC0_HW_SM_QUERY_TH_INST_EXECUTED_0,
+ NVC0_HW_SM_QUERY_TH_INST_EXECUTED_1,
+ NVC0_HW_SM_QUERY_TH_INST_EXECUTED_2,
+ NVC0_HW_SM_QUERY_TH_INST_EXECUTED_3,
+ NVC0_HW_SM_QUERY_WARPS_LAUNCHED,
+ NVC0_HW_SM_QUERY_COUNT
};
/* Driver statistics queries: