uint32_t src_sel; /* signal selection for up to 4 sources */
};
-#define NVC0_COUNTER_OPn_SUM 0
-#define NVC0_COUNTER_OPn_OR 1
-#define NVC0_COUNTER_OPn_AND 2
-#define NVC0_COUNTER_OP2_REL_SUM_MM 3 /* (sum(ctr0) - sum(ctr1)) / sum(ctr0) */
-#define NVC0_COUNTER_OP2_DIV_SUM_M0 4 /* sum(ctr0) / ctr1 of MP[0]) */
-#define NVC0_COUNTER_OP2_AVG_DIV_MM 5 /* avg(ctr0 / ctr1) */
-#define NVC0_COUNTER_OP2_AVG_DIV_M0 6 /* avg(ctr0) / ctr1 of MP[0]) */
-
struct nvc0_hw_sm_query_cfg
{
struct nvc0_hw_sm_counter_cfg ctr[8];
uint8_t num_counters;
- uint8_t op;
uint8_t norm[2]; /* normalization num,denom */
};
-#define _Q1A(n, f, m, g, s, nu, dn) [NVE4_HW_SM_QUERY_##n] = { { { f, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g, 0, s }, {}, {}, {} }, 1, NVC0_COUNTER_OPn_SUM, { nu, dn } }
-#define _Q1B(n, f, m, g, s, nu, dn) [NVE4_HW_SM_QUERY_##n] = { { { f, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g, 0, s }, {}, {}, {} }, 1, NVC0_COUNTER_OPn_SUM, { nu, dn } }
+#define _Q1A(n, f, m, g, s, nu, dn) [NVE4_HW_SM_QUERY_##n] = { { { f, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g, 0, s }, {}, {}, {} }, 1, { nu, dn } }
+#define _Q1B(n, f, m, g, s, nu, dn) [NVE4_HW_SM_QUERY_##n] = { { { f, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g, 0, s }, {}, {}, {} }, 1, { nu, dn } }
/* NOTES:
* active_warps: bit 0 alternates btw 0 and 1 for odd nr of warps
{
.ctr[0] = _C(0xaaaa, LOGOP, 0x11, 0x000000ff, 0x00000000),
.num_counters = 1,
- .op = NVC0_COUNTER_OPn_SUM,
.norm = { 1, 1 },
};
.ctr[4] = _C(0xaaaa, LOGOP, 0x24, 0x000000ff, 0x00000050),
.ctr[5] = _C(0xaaaa, LOGOP, 0x24, 0x000000ff, 0x00000060),
.num_counters = 6,
- .op = NVC0_COUNTER_OPn_SUM,
.norm = { 1, 1 },
};
{
.ctr[0] = _C(0xaaaa, LOGOP, 0x63, 0x000000ff, 0x00000030),
.num_counters = 1,
- .op = NVC0_COUNTER_OPn_SUM,
.norm = { 1, 1 },
};
.ctr[0] = _C(0xaaaa, LOGOP, 0x1a, 0x000000ff, 0x00000000),
.ctr[1] = _C(0xaaaa, LOGOP, 0x1a, 0x000000ff, 0x00000010),
.num_counters = 2,
- .op = NVC0_COUNTER_OPn_SUM,
.norm = { 1, 1 },
};
.ctr[0] = _C(0xaaaa, LOGOP, 0x19, 0x000000ff, 0x00000020),
.ctr[1] = _C(0xaaaa, LOGOP, 0x19, 0x000000ff, 0x00000030),
.num_counters = 2,
- .op = NVC0_COUNTER_OPn_SUM,
.norm = { 1, 1 },
};
{
.ctr[0] = _C(0xaaaa, LOGOP, 0x64, 0x000000ff, 0x00000030),
.num_counters = 1,
- .op = NVC0_COUNTER_OPn_SUM,
.norm = { 1, 1 },
};
{
.ctr[0] = _C(0xaaaa, LOGOP, 0x63, 0x000000ff, 0x00000040),
.num_counters = 1,
- .op = NVC0_COUNTER_OPn_SUM,
.norm = { 1, 1 },
};
{
.ctr[0] = _C(0xaaaa, LOGOP, 0x64, 0x000000ff, 0x00000060),
.num_counters = 1,
- .op = NVC0_COUNTER_OPn_SUM,
.norm = { 1, 1 },
};
.ctr[0] = _C(0xaaaa, LOGOP, 0x2d, 0x0000ffff, 0x00001000),
.ctr[1] = _C(0xaaaa, LOGOP, 0x2d, 0x0000ffff, 0x00001010),
.num_counters = 2,
- .op = NVC0_COUNTER_OPn_SUM,
.norm = { 1, 1 },
};
.ctr[0] = _C(0xaaaa, LOGOP, 0x27, 0x0000ffff, 0x00007060),
.ctr[1] = _C(0xaaaa, LOGOP, 0x27, 0x0000ffff, 0x00007070),
.num_counters = 2,
- .op = NVC0_COUNTER_OPn_SUM,
.norm = { 1, 1 },
};
{
.ctr[0] = _C(0xaaaa, LOGOP, 0x64, 0x000000ff, 0x00000020),
.num_counters = 1,
- .op = NVC0_COUNTER_OPn_SUM,
.norm = { 1, 1 },
};
{
.ctr[0] = _C(0xaaaa, LOGOP, 0x64, 0x000000ff, 0x00000050),
.num_counters = 1,
- .op = NVC0_COUNTER_OPn_SUM,
.norm = { 1, 1 },
};
{
.ctr[0] = _C(0xaaaa, LOGOP, 0x01, 0x000000ff, 0x00000000),
.num_counters = 1,
- .op = NVC0_COUNTER_OPn_SUM,
.norm = { 1, 1 },
};
{
.ctr[0] = _C(0xaaaa, LOGOP, 0x01, 0x000000ff, 0x00000010),
.num_counters = 1,
- .op = NVC0_COUNTER_OPn_SUM,
.norm = { 1, 1 },
};
{
.ctr[0] = _C(0xaaaa, LOGOP, 0x01, 0x000000ff, 0x00000020),
.num_counters = 1,
- .op = NVC0_COUNTER_OPn_SUM,
.norm = { 1, 1 },
};
{
.ctr[0] = _C(0xaaaa, LOGOP, 0x01, 0x000000ff, 0x00000030),
.num_counters = 1,
- .op = NVC0_COUNTER_OPn_SUM,
.norm = { 1, 1 },
};
{
.ctr[0] = _C(0xaaaa, LOGOP, 0x01, 0x000000ff, 0x00000040),
.num_counters = 1,
- .op = NVC0_COUNTER_OPn_SUM,
.norm = { 1, 1 },
};
{
.ctr[0] = _C(0xaaaa, LOGOP, 0x01, 0x000000ff, 0x00000050),
.num_counters = 1,
- .op = NVC0_COUNTER_OPn_SUM,
.norm = { 1, 1 },
};
{
.ctr[0] = _C(0xaaaa, LOGOP, 0x01, 0x000000ff, 0x00000060),
.num_counters = 1,
- .op = NVC0_COUNTER_OPn_SUM,
.norm = { 1, 1 },
};
{
.ctr[0] = _C(0xaaaa, LOGOP, 0x01, 0x000000ff, 0x00000070),
.num_counters = 1,
- .op = NVC0_COUNTER_OPn_SUM,
.norm = { 1, 1 },
};
{
.ctr[0] = _C(0xaaaa, LOGOP, 0x64, 0x000000ff, 0x00000010),
.num_counters = 1,
- .op = NVC0_COUNTER_OPn_SUM,
.norm = { 1, 1 },
};
{
.ctr[0] = _C(0xaaaa, LOGOP, 0x64, 0x000000ff, 0x00000040),
.num_counters = 1,
- .op = NVC0_COUNTER_OPn_SUM,
.norm = { 1, 1 },
};
.ctr[4] = _C(0xaaaa, LOGOP, 0x26, 0x000000ff, 0x00000050),
.ctr[5] = _C(0xaaaa, LOGOP, 0x26, 0x000000ff, 0x00000060),
.num_counters = 6,
- .op = NVC0_COUNTER_OPn_SUM,
.norm = { 1, 1 },
};
.ctr[4] = _C(0xaaaa, LOGOP, 0x2f, 0x000000ff, 0x00000040),
.ctr[5] = _C(0xaaaa, LOGOP, 0x2f, 0x000000ff, 0x00000050),
.num_counters = 6,
- .op = NVC0_COUNTER_OPn_SUM,
.norm = { 1, 1 },
};
.ctr[4] = _C(0xaaaa, LOGOP, 0x30, 0x000000ff, 0x00000040),
.ctr[5] = _C(0xaaaa, LOGOP, 0x30, 0x000000ff, 0x00000050),
.num_counters = 6,
- .op = NVC0_COUNTER_OPn_SUM,
.norm = { 1, 1 },
};
{
.ctr[0] = _C(0xaaaa, LOGOP, 0x26, 0x000000ff, 0x00000000),
.num_counters = 1,
- .op = NVC0_COUNTER_OPn_SUM,
.norm = { 1, 1 },
};
.ctr[1] = _C(0xaaaa, LOGOP, 0x2d, 0x000000ff, 0x00000010),
.ctr[2] = _C(0xaaaa, LOGOP, 0x2d, 0x000000ff, 0x00000020),
.num_counters = 3,
- .op = NVC0_COUNTER_OPn_SUM,
.norm = { 1, 1 },
};
{
.ctr[0] = _C(0xaaaa, LOGOP, 0x7e, 0x000000ff, 0x00000010),
.num_counters = 1,
- .op = NVC0_COUNTER_OPn_SUM,
.norm = { 1, 1 },
};
{
.ctr[0] = _C(0xaaaa, LOGOP, 0x7e, 0x000000ff, 0x00000040),
.num_counters = 1,
- .op = NVC0_COUNTER_OPn_SUM,
.norm = { 1, 1 },
};
{
.ctr[0] = _C(0xaaaa, LOGOP, 0x7e, 0x000000ff, 0x00000020),
.num_counters = 1,
- .op = NVC0_COUNTER_OPn_SUM,
.norm = { 1, 1 },
};
{
.ctr[0] = _C(0xaaaa, LOGOP, 0x7e, 0x000000ff, 0x00000050),
.num_counters = 1,
- .op = NVC0_COUNTER_OPn_SUM,
.norm = { 1, 1 },
};
.ctr[4] = _C(0xaaaa, LOGOP, 0xa3, 0x000000ff, 0x00000040),
.ctr[5] = _C(0xaaaa, LOGOP, 0xa3, 0x000000ff, 0x00000050),
.num_counters = 6,
- .op = NVC0_COUNTER_OPn_SUM,
.norm = { 1, 1 },
};
.ctr[4] = _C(0xaaaa, LOGOP, 0xa5, 0x000000ff, 0x00000040),
.ctr[5] = _C(0xaaaa, LOGOP, 0xa5, 0x000000ff, 0x00000050),
.num_counters = 6,
- .op = NVC0_COUNTER_OPn_SUM,
.norm = { 1, 1 },
};
.ctr[4] = _C(0xaaaa, LOGOP, 0xa4, 0x000000ff, 0x00000040),
.ctr[5] = _C(0xaaaa, LOGOP, 0xa4, 0x000000ff, 0x00000050),
.num_counters = 6,
- .op = NVC0_COUNTER_OPn_SUM,
.norm = { 1, 1 },
};
.ctr[4] = _C(0xaaaa, LOGOP, 0xa6, 0x000000ff, 0x00000040),
.ctr[5] = _C(0xaaaa, LOGOP, 0xa6, 0x000000ff, 0x00000050),
.num_counters = 6,
- .op = NVC0_COUNTER_OPn_SUM,
.norm = { 1, 1 },
};
if (!ret)
return false;
- if (cfg->op == NVC0_COUNTER_OPn_SUM) {
- for (c = 0; c < cfg->num_counters; ++c)
- for (p = 0; p < mp_count; ++p)
- value += count[p][c];
- value = (value * cfg->norm[0]) / cfg->norm[1];
- } else
- if (cfg->op == NVC0_COUNTER_OPn_OR) {
- uint32_t v = 0;
- for (c = 0; c < cfg->num_counters; ++c)
- for (p = 0; p < mp_count; ++p)
- v |= count[p][c];
- value = ((uint64_t)v * cfg->norm[0]) / cfg->norm[1];
- } else
- if (cfg->op == NVC0_COUNTER_OPn_AND) {
- uint32_t v = ~0;
- for (c = 0; c < cfg->num_counters; ++c)
- for (p = 0; p < mp_count; ++p)
- v &= count[p][c];
- value = ((uint64_t)v * cfg->norm[0]) / cfg->norm[1];
- } else
- if (cfg->op == NVC0_COUNTER_OP2_REL_SUM_MM) {
- uint64_t v[2] = { 0, 0 };
- for (p = 0; p < mp_count; ++p) {
- v[0] += count[p][0];
- v[1] += count[p][1];
- }
- if (v[0])
- value = ((v[0] - v[1]) * cfg->norm[0]) / (v[0] * cfg->norm[1]);
- } else
- if (cfg->op == NVC0_COUNTER_OP2_DIV_SUM_M0) {
+ for (c = 0; c < cfg->num_counters; ++c)
for (p = 0; p < mp_count; ++p)
- value += count[p][0];
- if (count[0][1])
- value = (value * cfg->norm[0]) / (count[0][1] * cfg->norm[1]);
- else
- value = 0;
- } else
- if (cfg->op == NVC0_COUNTER_OP2_AVG_DIV_MM) {
- unsigned mp_used = 0;
- for (p = 0; p < mp_count; ++p, mp_used += !!count[p][0])
- if (count[p][1])
- value += (count[p][0] * cfg->norm[0]) / count[p][1];
- if (mp_used)
- value /= (uint64_t)mp_used * cfg->norm[1];
- } else
- if (cfg->op == NVC0_COUNTER_OP2_AVG_DIV_M0) {
- unsigned mp_used = 0;
- for (p = 0; p < mp_count; ++p, mp_used += !!count[p][0])
- value += count[p][0];
- if (count[0][1] && mp_used) {
- value *= cfg->norm[0];
- value /= (uint64_t)count[0][1] * mp_used * cfg->norm[1];
- } else {
- value = 0;
- }
- }
+ value += count[p][c];
+ value = (value * cfg->norm[0]) / cfg->norm[1];
*(uint64_t *)result = value;
return true;