From: Nicolai Hähnle Date: Fri, 11 Dec 2015 21:03:56 +0000 (-0500) Subject: radeonsi: add placeholder MC and SRBM performance counter groups X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=156e81f305b19f62b4d6ea98bf44a951e89dc947;p=mesa.git radeonsi: add placeholder MC and SRBM performance counter groups Yet another change motivated by AMD GPUPerfStudio compatibility. These groups are not directly accessible from userspace, and AMD GPUPerfStudio does not actually query them - it just requires them to be there. Hence, adding a placeholder for now. Reviewed-by: Edward O'Callaghan Acked-by: Marek Olšák --- diff --git a/src/gallium/drivers/radeonsi/si_perfcounter.c b/src/gallium/drivers/radeonsi/si_perfcounter.c index f944a52b8dc..24855e4e6f2 100644 --- a/src/gallium/drivers/radeonsi/si_perfcounter.c +++ b/src/gallium/drivers/radeonsi/si_perfcounter.c @@ -56,6 +56,8 @@ enum si_pc_reg_layout { /* Registers are laid out in decreasing rather than increasing order. */ SI_PC_REG_REVERSE = 4, + + SI_PC_FAKE = 8, }; struct si_pc_block_base { @@ -325,6 +327,20 @@ static struct si_pc_block_base cik_WD = { .counter0_lo = R_034200_WD_PERFCOUNTER0_LO, }; +static struct si_pc_block_base cik_MC = { + .name = "MC", + .num_counters = 4, + + .layout = SI_PC_FAKE, +}; + +static struct si_pc_block_base cik_SRBM = { + .name = "SRBM", + .num_counters = 2, + + .layout = SI_PC_FAKE, +}; + /* Both the number of instances and selectors varies between chips of the same * class. We only differentiate by class here and simply expose the maximum * number over all chips in a class. @@ -352,6 +368,8 @@ static struct si_pc_block groups_CIK[] = { { &cik_GDS, 121 }, { &cik_VGT, 140 }, { &cik_IA, 22 }, + { &cik_MC, 22 }, + { &cik_SRBM, 19 }, { &cik_WD, 22 }, { &cik_CPG, 46 }, { &cik_CPC, 22 }, @@ -377,6 +395,8 @@ static struct si_pc_block groups_VI[] = { { &cik_GDS, 121 }, { &cik_VGT, 147 }, { &cik_IA, 24 }, + { &cik_MC, 22 }, + { &cik_SRBM, 27 }, { &cik_WD, 37 }, { &cik_CPG, 48 }, { &cik_CPC, 24 }, @@ -391,7 +411,9 @@ static void si_pc_get_size(struct r600_perfcounter_block *group, struct si_pc_block_base *regs = sigroup->b; unsigned layout_multi = regs->layout & SI_PC_MULTI_MASK; - if (layout_multi == SI_PC_MULTI_BLOCK) { + if (regs->layout & SI_PC_FAKE) { + *num_select_dw = 0; + } else if (layout_multi == SI_PC_MULTI_BLOCK) { if (count < regs->num_multi) *num_select_dw = 2 * (count + 2) + regs->num_prelude; else @@ -454,6 +476,9 @@ static void si_pc_emit_select(struct r600_common_context *ctx, assert(count <= regs->num_counters); + if (regs->layout & SI_PC_FAKE) + return; + if (layout_multi == SI_PC_MULTI_BLOCK) { assert(!(regs->layout & SI_PC_REG_REVERSE)); @@ -613,22 +638,35 @@ static void si_pc_emit_read(struct r600_common_context *ctx, unsigned reg = regs->counter0_lo; unsigned reg_delta = 8; - if (regs->layout & SI_PC_REG_REVERSE) - reg_delta = -reg_delta; - - for (idx = 0; idx < count; ++idx) { - if (regs->counters) - reg = regs->counters[idx]; + if (!(regs->layout & SI_PC_FAKE)) { + if (regs->layout & SI_PC_REG_REVERSE) + reg_delta = -reg_delta; - radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); - radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_PERF) | - COPY_DATA_DST_SEL(COPY_DATA_MEM)); - radeon_emit(cs, reg >> 2); - radeon_emit(cs, 0); /* unused */ - radeon_emit(cs, va); - radeon_emit(cs, va >> 32); - va += 4; - reg += reg_delta; + for (idx = 0; idx < count; ++idx) { + if (regs->counters) + reg = regs->counters[idx]; + + radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); + radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_PERF) | + COPY_DATA_DST_SEL(COPY_DATA_MEM)); + radeon_emit(cs, reg >> 2); + radeon_emit(cs, 0); /* unused */ + radeon_emit(cs, va); + radeon_emit(cs, va >> 32); + va += 4; + reg += reg_delta; + } + } else { + for (idx = 0; idx < count; ++idx) { + radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); + radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | + COPY_DATA_DST_SEL(COPY_DATA_MEM)); + radeon_emit(cs, 0); /* immediate */ + radeon_emit(cs, 0); /* unused */ + radeon_emit(cs, va); + radeon_emit(cs, va >> 32); + va += 4; + } } }