{
/* All secondary selector dwords follow as one block after the primary
* selector dwords for the counters that have secondary selectors.
+ *
+ * Example:
+ * PERFCOUNTER0_SELECT
+ * PERFCOUNTER1_SELECT
+ * PERFCOUNTER0_SELECT1
+ * PERFCOUNTER1_SELECT1
+ * PERFCOUNTER2_SELECT
+ * PERFCOUNTER3_SELECT
*/
SI_PC_MULTI_BLOCK = 0,
- /* Each secondary selector dword follows immediately afters the
+ /* Each secondary selector dword follows immediately after the
* corresponding primary.
+ *
+ * Example:
+ * PERFCOUNTER0_SELECT
+ * PERFCOUNTER0_SELECT1
+ * PERFCOUNTER1_SELECT
+ * PERFCOUNTER1_SELECT1
+ * PERFCOUNTER2_SELECT
+ * PERFCOUNTER3_SELECT
*/
SI_PC_MULTI_ALTERNATE = 1,
/* All secondary selector dwords follow as one block after all primary
* selector dwords.
+ *
+ * Example:
+ * PERFCOUNTER0_SELECT
+ * PERFCOUNTER1_SELECT
+ * PERFCOUNTER2_SELECT
+ * PERFCOUNTER3_SELECT
+ * PERFCOUNTER0_SELECT1
+ * PERFCOUNTER1_SELECT1
*/
SI_PC_MULTI_TAIL = 2,
.layout = SI_PC_FAKE,
};
+static struct si_pc_block_base gfx10_CHA = {
+ .name = "CHA",
+ .num_counters = 4,
+
+ .select0 = R_037780_CHA_PERFCOUNTER0_SELECT,
+ .counter0_lo = R_035800_CHA_PERFCOUNTER0_LO,
+ .num_multi = 1,
+ .layout = SI_PC_MULTI_ALTERNATE,
+};
+
+static struct si_pc_block_base gfx10_CHCG = {
+ .name = "CHCG",
+ .num_counters = 4,
+
+ .select0 = R_036F18_CHCG_PERFCOUNTER0_SELECT,
+ .counter0_lo = R_034F20_CHCG_PERFCOUNTER0_LO,
+ .num_multi = 1,
+ .layout = SI_PC_MULTI_ALTERNATE,
+};
+
+static struct si_pc_block_base gfx10_CHC = {
+ .name = "CHC",
+ .num_counters = 4,
+
+ .select0 = R_036F00_CHC_PERFCOUNTER0_SELECT,
+ .counter0_lo = R_034F00_CHC_PERFCOUNTER0_LO,
+ .num_multi = 1,
+ .layout = SI_PC_MULTI_ALTERNATE,
+};
+
+static struct si_pc_block_base gfx10_GCR = {
+ .name = "GCR",
+ .num_counters = 2,
+
+ .select0 = R_037580_GCR_PERFCOUNTER0_SELECT,
+ .counter0_lo = R_035480_GCR_PERFCOUNTER0_LO,
+ .num_multi = 1,
+ .layout = SI_PC_MULTI_ALTERNATE,
+};
+
+static struct si_pc_block_base gfx10_GE = {
+ .name = "GE",
+ .num_counters = 12,
+
+ .select0 = R_036200_GE_PERFCOUNTER0_SELECT,
+ .counter0_lo = R_034200_GE_PERFCOUNTER0_LO,
+ .num_multi = 4,
+ .layout = SI_PC_MULTI_ALTERNATE,
+};
+
+static struct si_pc_block_base gfx10_GL1A = {
+ .name = "GL1A",
+ .num_counters = 4,
+ .flags = SI_PC_BLOCK_SE | SI_PC_BLOCK_SHADER_WINDOWED,
+
+ .select0 = R_037700_GL1A_PERFCOUNTER0_SELECT,
+ .counter0_lo = R_035700_GL1A_PERFCOUNTER0_LO,
+ .num_multi = 1,
+ .layout = SI_PC_MULTI_ALTERNATE,
+};
+
+static struct si_pc_block_base gfx10_GL1C = {
+ .name = "GL1C",
+ .num_counters = 4,
+ .flags = SI_PC_BLOCK_SE | SI_PC_BLOCK_SHADER_WINDOWED,
+
+ .select0 = R_036E80_GL1C_PERFCOUNTER0_SELECT,
+ .counter0_lo = R_034E80_GL1C_PERFCOUNTER0_LO,
+ .num_multi = 1,
+ .layout = SI_PC_MULTI_ALTERNATE,
+};
+
+static struct si_pc_block_base gfx10_GL2A = {
+ .name = "GL2A",
+ .num_counters = 4,
+
+ .select0 = R_036E40_GL2A_PERFCOUNTER0_SELECT,
+ .counter0_lo = R_034E40_GL2A_PERFCOUNTER0_LO,
+ .num_multi = 2,
+ .layout = SI_PC_MULTI_ALTERNATE,
+};
+
+static struct si_pc_block_base gfx10_GL2C = {
+ .name = "GL2C",
+ .num_counters = 4,
+
+ .select0 = R_036E00_GL2C_PERFCOUNTER0_SELECT,
+ .counter0_lo = R_034E00_GL2C_PERFCOUNTER0_LO,
+ .num_multi = 2,
+ .layout = SI_PC_MULTI_ALTERNATE,
+};
+
+static unsigned gfx10_PA_PH_select[] = {
+ R_037600_PA_PH_PERFCOUNTER0_SELECT,
+ R_037604_PA_PH_PERFCOUNTER0_SELECT1,
+ R_037608_PA_PH_PERFCOUNTER1_SELECT,
+ R_037640_PA_PH_PERFCOUNTER1_SELECT1,
+ R_03760C_PA_PH_PERFCOUNTER2_SELECT,
+ R_037644_PA_PH_PERFCOUNTER2_SELECT1,
+ R_037610_PA_PH_PERFCOUNTER3_SELECT,
+ R_037648_PA_PH_PERFCOUNTER3_SELECT1,
+ R_037614_PA_PH_PERFCOUNTER4_SELECT,
+ R_037618_PA_PH_PERFCOUNTER5_SELECT,
+ R_03761C_PA_PH_PERFCOUNTER6_SELECT,
+ R_037620_PA_PH_PERFCOUNTER7_SELECT,
+};
+static struct si_pc_block_base gfx10_PA_PH = {
+ .name = "PA_PH",
+ .num_counters = 8,
+ .flags = SI_PC_BLOCK_SE,
+
+ .select = gfx10_PA_PH_select,
+ .counter0_lo = R_035600_PA_PH_PERFCOUNTER0_LO,
+ .num_multi = 4,
+ .layout = SI_PC_MULTI_CUSTOM,
+};
+
+static struct si_pc_block_base gfx10_PA_SU = {
+ .name = "PA_SU",
+ .num_counters = 4,
+ .flags = SI_PC_BLOCK_SE,
+
+ .select0 = R_036400_PA_SU_PERFCOUNTER0_SELECT,
+ .counter0_lo = R_034400_PA_SU_PERFCOUNTER0_LO,
+ .num_multi = 4,
+ .layout = SI_PC_MULTI_ALTERNATE,
+};
+
+static struct si_pc_block_base gfx10_RLC = {
+ .name = "RLC",
+ .num_counters = 2,
+
+ .select0 = R_037304_RLC_PERFCOUNTER0_SELECT,
+ .counter0_lo = R_035200_RLC_PERFCOUNTER0_LO,
+ .num_multi = 0,
+ .layout = SI_PC_MULTI_ALTERNATE,
+};
+
+static struct si_pc_block_base gfx10_RMI = {
+ .name = "RMI",
+ /* Actually 4, but the 2nd counter is missing the secondary selector while
+ * the 3rd counter has it, which complicates the register layout. */
+ .num_counters = 2,
+ .flags = SI_PC_BLOCK_SE | SI_PC_BLOCK_INSTANCE_GROUPS,
+
+ .select0 = R_037400_RMI_PERFCOUNTER0_SELECT,
+ .counter0_lo = R_035300_RMI_PERFCOUNTER0_LO,
+ .num_multi = 1,
+ .layout = SI_PC_MULTI_ALTERNATE,
+};
+
+static struct si_pc_block_base gfx10_UTCL1 = {
+ .name = "UTCL1",
+ .num_counters = 2,
+ .flags = SI_PC_BLOCK_SE | SI_PC_BLOCK_SHADER_WINDOWED,
+
+ .select0 = R_03758C_UTCL1_PERFCOUNTER0_SELECT,
+ .counter0_lo = R_035470_UTCL1_PERFCOUNTER0_LO,
+ .num_multi = 0,
+ .layout = SI_PC_MULTI_ALTERNATE,
+};
+
/* Both the number of instances and selectors varies between chips of the same
* class. We only differentiate by class here and simply expose the maximum
* number over all chips in a class.
{&cik_CPC, 35},
};
+static struct si_pc_block_gfxdescr groups_gfx10[] = {
+ {&cik_CB, 461},
+ {&gfx10_CHA, 45},
+ {&gfx10_CHCG, 35},
+ {&gfx10_CHC, 35},
+ {&cik_CPC, 47},
+ {&cik_CPF, 40},
+ {&cik_CPG, 82},
+ {&cik_DB, 370},
+ {&gfx10_GCR, 94},
+ {&cik_GDS, 123},
+ {&gfx10_GE, 315},
+ {&gfx10_GL1A, 36},
+ {&gfx10_GL1C, 64},
+ {&gfx10_GL2A, 91},
+ {&gfx10_GL2C, 235},
+ {&cik_GRBM, 47},
+ {&cik_GRBMSE, 19},
+ {&gfx10_PA_PH, 960},
+ {&cik_PA_SC, 552},
+ {&gfx10_PA_SU, 266},
+ {&gfx10_RLC, 7},
+ {&gfx10_RMI, 258},
+ {&cik_SPI, 329},
+ {&cik_SQ, 509},
+ {&cik_SX, 225},
+ {&cik_TA, 226},
+ {&cik_TCP, 77},
+ {&cik_TD, 61},
+ {&gfx10_UTCL1, 15},
+};
+
static bool si_pc_block_has_per_se_groups(const struct si_perfcounters *pc,
const struct si_pc_block *block)
{
value |= S_030800_SE_BROADCAST_WRITES(1);
}
+ if (sctx->chip_class >= GFX10) {
+ /* TODO: Expose counters from each shader array separately if needed. */
+ value |= S_030800_SA_BROADCAST_WRITES(1);
+ }
+
if (instance >= 0) {
value |= S_030800_INSTANCE_INDEX(instance);
} else {
blocks = groups_gfx9;
num_blocks = ARRAY_SIZE(groups_gfx9);
break;
+ case GFX10:
+ blocks = groups_gfx10;
+ num_blocks = ARRAY_SIZE(groups_gfx10);
+ break;
case GFX6:
default:
return; /* not implemented */
}
- if (screen->info.max_sh_per_se != 1) {
- /* This should not happen on non-GFX6 chips. */
- fprintf(stderr,
- "si_init_perfcounters: max_sh_per_se = %d not "
- "supported (inaccurate performance counters)\n",
- screen->info.max_sh_per_se);
- }
-
screen->perfcounters = pc = CALLOC_STRUCT(si_perfcounters);
if (!pc)
return;
block->b = &blocks[i];
block->num_instances = MAX2(1, block->b->instances);
- if (!strcmp(block->b->b->name, "CB") || !strcmp(block->b->b->name, "DB"))
+ if (!strcmp(block->b->b->name, "CB") ||
+ !strcmp(block->b->b->name, "DB") ||
+ !strcmp(block->b->b->name, "RMI"))
block->num_instances = screen->info.max_se;
else if (!strcmp(block->b->b->name, "TCC"))
block->num_instances = screen->info.num_tcc_blocks;