ac: correct PKT3_COPY_DATA definitions
[mesa.git] / src / gallium / drivers / radeonsi / si_perfcounter.c
index fd28d1176e3cffc0d21a2956bbfdd144dc7324b8..de71572c8aa08eb864dcee7e208496391d2d6395 100644 (file)
@@ -1,5 +1,6 @@
 /*
  * Copyright 2015 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
- *
- * Authors:
- *  Nicolai Hähnle <nicolai.haehnle@amd.com>
- *
  */
 
-#include "radeon/r600_cs.h"
-#include "radeon/r600_query.h"
+#include "si_build_pm4.h"
+#include "si_query.h"
 #include "util/u_memory.h"
 
-#include "si_pipe.h"
-#include "sid.h"
 
 enum si_pc_reg_layout {
        /* All secondary selector dwords follow as one block after the primary
@@ -60,22 +55,22 @@ enum si_pc_reg_layout {
 };
 
 struct si_pc_block_base {
-       char name[8];
+       const char *name;
        unsigned num_counters;
        unsigned flags;
 
        unsigned select_or;
        unsigned select0;
        unsigned counter0_lo;
-       unsigned select[4];
-       unsigned counters[4];
+       unsigned *select;
+       unsigned *counters;
        unsigned num_multi;
        unsigned num_prelude;
        unsigned layout;
 };
 
 struct si_pc_block {
-       const struct si_pc_block_base *b;
+       struct si_pc_block_base *b;
        unsigned selectors;
        unsigned instances;
 };
@@ -98,10 +93,10 @@ static const unsigned si_pc_shader_type_bits[] = {
        S_036780_CS_EN(1),
 };
 
-static const struct si_pc_block_base cik_CB = {
+static struct si_pc_block_base cik_CB = {
        .name = "CB",
        .num_counters = 4,
-       .flags = R600_PC_BLOCK_SE | R600_PC_BLOCK_INSTANCE_GROUPS,
+       .flags = SI_PC_BLOCK_SE | SI_PC_BLOCK_INSTANCE_GROUPS,
 
        .select0 = R_037000_CB_PERFCOUNTER_FILTER,
        .counter0_lo = R_035018_CB_PERFCOUNTER0_LO,
@@ -110,19 +105,22 @@ static const struct si_pc_block_base cik_CB = {
        .layout = SI_PC_MULTI_ALTERNATE,
 };
 
-static const struct si_pc_block_base cik_CPC = {
+static unsigned cik_CPC_select[] = {
+       R_036024_CPC_PERFCOUNTER0_SELECT,
+       R_036010_CPC_PERFCOUNTER0_SELECT1,
+       R_03600C_CPC_PERFCOUNTER1_SELECT,
+};
+static struct si_pc_block_base cik_CPC = {
        .name = "CPC",
        .num_counters = 2,
 
-       .select = { R_036024_CPC_PERFCOUNTER0_SELECT,
-                   R_036010_CPC_PERFCOUNTER0_SELECT1,
-                   R_03600C_CPC_PERFCOUNTER1_SELECT },
+       .select = cik_CPC_select,
        .counter0_lo = R_034018_CPC_PERFCOUNTER0_LO,
        .num_multi = 1,
        .layout = SI_PC_MULTI_CUSTOM | SI_PC_REG_REVERSE,
 };
 
-static const struct si_pc_block_base cik_CPF = {
+static struct si_pc_block_base cik_CPF = {
        .name = "CPF",
        .num_counters = 2,
 
@@ -132,7 +130,7 @@ static const struct si_pc_block_base cik_CPF = {
        .layout = SI_PC_MULTI_ALTERNATE | SI_PC_REG_REVERSE,
 };
 
-static const struct si_pc_block_base cik_CPG = {
+static struct si_pc_block_base cik_CPG = {
        .name = "CPG",
        .num_counters = 2,
 
@@ -142,10 +140,10 @@ static const struct si_pc_block_base cik_CPG = {
        .layout = SI_PC_MULTI_ALTERNATE | SI_PC_REG_REVERSE,
 };
 
-static const struct si_pc_block_base cik_DB = {
+static struct si_pc_block_base cik_DB = {
        .name = "DB",
        .num_counters = 4,
-       .flags = R600_PC_BLOCK_SE | R600_PC_BLOCK_INSTANCE_GROUPS,
+       .flags = SI_PC_BLOCK_SE | SI_PC_BLOCK_INSTANCE_GROUPS,
 
        .select0 = R_037100_DB_PERFCOUNTER0_SELECT,
        .counter0_lo = R_035100_DB_PERFCOUNTER0_LO,
@@ -153,7 +151,7 @@ static const struct si_pc_block_base cik_DB = {
        .layout = SI_PC_MULTI_ALTERNATE,
 };
 
-static const struct si_pc_block_base cik_GDS = {
+static struct si_pc_block_base cik_GDS = {
        .name = "GDS",
        .num_counters = 4,
 
@@ -163,16 +161,19 @@ static const struct si_pc_block_base cik_GDS = {
        .layout = SI_PC_MULTI_TAIL,
 };
 
-static const struct si_pc_block_base cik_GRBM = {
+static unsigned cik_GRBM_counters[] = {
+       R_034100_GRBM_PERFCOUNTER0_LO,
+       R_03410C_GRBM_PERFCOUNTER1_LO,
+};
+static struct si_pc_block_base cik_GRBM = {
        .name = "GRBM",
        .num_counters = 2,
 
        .select0 = R_036100_GRBM_PERFCOUNTER0_SELECT,
-       .counters = { R_034100_GRBM_PERFCOUNTER0_LO,
-                     R_03410C_GRBM_PERFCOUNTER1_LO },
+       .counters = cik_GRBM_counters,
 };
 
-static const struct si_pc_block_base cik_GRBMSE = {
+static struct si_pc_block_base cik_GRBMSE = {
        .name = "GRBMSE",
        .num_counters = 4,
 
@@ -180,7 +181,7 @@ static const struct si_pc_block_base cik_GRBMSE = {
        .counter0_lo = R_034114_GRBM_SE0_PERFCOUNTER_LO,
 };
 
-static const struct si_pc_block_base cik_IA = {
+static struct si_pc_block_base cik_IA = {
        .name = "IA",
        .num_counters = 4,
 
@@ -190,10 +191,10 @@ static const struct si_pc_block_base cik_IA = {
        .layout = SI_PC_MULTI_TAIL,
 };
 
-static const struct si_pc_block_base cik_PA_SC = {
+static struct si_pc_block_base cik_PA_SC = {
        .name = "PA_SC",
        .num_counters = 8,
-       .flags = R600_PC_BLOCK_SE,
+       .flags = SI_PC_BLOCK_SE,
 
        .select0 = R_036500_PA_SC_PERFCOUNTER0_SELECT,
        .counter0_lo = R_034500_PA_SC_PERFCOUNTER0_LO,
@@ -202,10 +203,10 @@ static const struct si_pc_block_base cik_PA_SC = {
 };
 
 /* According to docs, PA_SU counters are only 48 bits wide. */
-static const struct si_pc_block_base cik_PA_SU = {
+static struct si_pc_block_base cik_PA_SU = {
        .name = "PA_SU",
        .num_counters = 4,
-       .flags = R600_PC_BLOCK_SE,
+       .flags = SI_PC_BLOCK_SE,
 
        .select0 = R_036400_PA_SU_PERFCOUNTER0_SELECT,
        .counter0_lo = R_034400_PA_SU_PERFCOUNTER0_LO,
@@ -213,10 +214,10 @@ static const struct si_pc_block_base cik_PA_SU = {
        .layout = SI_PC_MULTI_ALTERNATE,
 };
 
-static const struct si_pc_block_base cik_SPI = {
+static struct si_pc_block_base cik_SPI = {
        .name = "SPI",
        .num_counters = 6,
-       .flags = R600_PC_BLOCK_SE,
+       .flags = SI_PC_BLOCK_SE,
 
        .select0 = R_036600_SPI_PERFCOUNTER0_SELECT,
        .counter0_lo = R_034604_SPI_PERFCOUNTER0_LO,
@@ -224,10 +225,10 @@ static const struct si_pc_block_base cik_SPI = {
        .layout = SI_PC_MULTI_BLOCK,
 };
 
-static const struct si_pc_block_base cik_SQ = {
+static struct si_pc_block_base cik_SQ = {
        .name = "SQ",
        .num_counters = 16,
-       .flags = R600_PC_BLOCK_SE | R600_PC_BLOCK_SHADER,
+       .flags = SI_PC_BLOCK_SE | SI_PC_BLOCK_SHADER,
 
        .select0 = R_036700_SQ_PERFCOUNTER0_SELECT,
        .select_or = S_036700_SQC_BANK_MASK(15) |
@@ -236,10 +237,10 @@ static const struct si_pc_block_base cik_SQ = {
        .counter0_lo = R_034700_SQ_PERFCOUNTER0_LO,
 };
 
-static const struct si_pc_block_base cik_SX = {
+static struct si_pc_block_base cik_SX = {
        .name = "SX",
        .num_counters = 4,
-       .flags = R600_PC_BLOCK_SE,
+       .flags = SI_PC_BLOCK_SE,
 
        .select0 = R_036900_SX_PERFCOUNTER0_SELECT,
        .counter0_lo = R_034900_SX_PERFCOUNTER0_LO,
@@ -247,10 +248,10 @@ static const struct si_pc_block_base cik_SX = {
        .layout = SI_PC_MULTI_TAIL,
 };
 
-static const struct si_pc_block_base cik_TA = {
+static struct si_pc_block_base cik_TA = {
        .name = "TA",
        .num_counters = 2,
-       .flags = R600_PC_BLOCK_SE | R600_PC_BLOCK_INSTANCE_GROUPS | R600_PC_BLOCK_SHADER_WINDOWED,
+       .flags = SI_PC_BLOCK_SE | SI_PC_BLOCK_INSTANCE_GROUPS | SI_PC_BLOCK_SHADER_WINDOWED,
 
        .select0 = R_036B00_TA_PERFCOUNTER0_SELECT,
        .counter0_lo = R_034B00_TA_PERFCOUNTER0_LO,
@@ -258,10 +259,10 @@ static const struct si_pc_block_base cik_TA = {
        .layout = SI_PC_MULTI_ALTERNATE,
 };
 
-static const struct si_pc_block_base cik_TD = {
+static struct si_pc_block_base cik_TD = {
        .name = "TD",
        .num_counters = 2,
-       .flags = R600_PC_BLOCK_SE | R600_PC_BLOCK_INSTANCE_GROUPS | R600_PC_BLOCK_SHADER_WINDOWED,
+       .flags = SI_PC_BLOCK_SE | SI_PC_BLOCK_INSTANCE_GROUPS | SI_PC_BLOCK_SHADER_WINDOWED,
 
        .select0 = R_036C00_TD_PERFCOUNTER0_SELECT,
        .counter0_lo = R_034C00_TD_PERFCOUNTER0_LO,
@@ -269,10 +270,10 @@ static const struct si_pc_block_base cik_TD = {
        .layout = SI_PC_MULTI_ALTERNATE,
 };
 
-static const struct si_pc_block_base cik_TCA = {
+static struct si_pc_block_base cik_TCA = {
        .name = "TCA",
        .num_counters = 4,
-       .flags = R600_PC_BLOCK_INSTANCE_GROUPS,
+       .flags = SI_PC_BLOCK_INSTANCE_GROUPS,
 
        .select0 = R_036E40_TCA_PERFCOUNTER0_SELECT,
        .counter0_lo = R_034E40_TCA_PERFCOUNTER0_LO,
@@ -280,10 +281,10 @@ static const struct si_pc_block_base cik_TCA = {
        .layout = SI_PC_MULTI_ALTERNATE,
 };
 
-static const struct si_pc_block_base cik_TCC = {
+static struct si_pc_block_base cik_TCC = {
        .name = "TCC",
        .num_counters = 4,
-       .flags = R600_PC_BLOCK_INSTANCE_GROUPS,
+       .flags = SI_PC_BLOCK_INSTANCE_GROUPS,
 
        .select0 = R_036E00_TCC_PERFCOUNTER0_SELECT,
        .counter0_lo = R_034E00_TCC_PERFCOUNTER0_LO,
@@ -291,10 +292,10 @@ static const struct si_pc_block_base cik_TCC = {
        .layout = SI_PC_MULTI_ALTERNATE,
 };
 
-static const struct si_pc_block_base cik_TCP = {
+static struct si_pc_block_base cik_TCP = {
        .name = "TCP",
        .num_counters = 4,
-       .flags = R600_PC_BLOCK_SE | R600_PC_BLOCK_INSTANCE_GROUPS | R600_PC_BLOCK_SHADER_WINDOWED,
+       .flags = SI_PC_BLOCK_SE | SI_PC_BLOCK_INSTANCE_GROUPS | SI_PC_BLOCK_SHADER_WINDOWED,
 
        .select0 = R_036D00_TCP_PERFCOUNTER0_SELECT,
        .counter0_lo = R_034D00_TCP_PERFCOUNTER0_LO,
@@ -302,10 +303,10 @@ static const struct si_pc_block_base cik_TCP = {
        .layout = SI_PC_MULTI_ALTERNATE,
 };
 
-static const struct si_pc_block_base cik_VGT = {
+static struct si_pc_block_base cik_VGT = {
        .name = "VGT",
        .num_counters = 4,
-       .flags = R600_PC_BLOCK_SE,
+       .flags = SI_PC_BLOCK_SE,
 
        .select0 = R_036230_VGT_PERFCOUNTER0_SELECT,
        .counter0_lo = R_034240_VGT_PERFCOUNTER0_LO,
@@ -313,7 +314,7 @@ static const struct si_pc_block_base cik_VGT = {
        .layout = SI_PC_MULTI_TAIL,
 };
 
-static const struct si_pc_block_base cik_WD = {
+static struct si_pc_block_base cik_WD = {
        .name = "WD",
        .num_counters = 4,
 
@@ -321,14 +322,14 @@ static const struct si_pc_block_base cik_WD = {
        .counter0_lo = R_034200_WD_PERFCOUNTER0_LO,
 };
 
-static const struct si_pc_block_base cik_MC = {
+static struct si_pc_block_base cik_MC = {
        .name = "MC",
        .num_counters = 4,
 
        .layout = SI_PC_FAKE,
 };
 
-static const struct si_pc_block_base cik_SRBM = {
+static struct si_pc_block_base cik_SRBM = {
        .name = "SRBM",
        .num_counters = 2,
 
@@ -343,10 +344,10 @@ static const struct si_pc_block_base cik_SRBM = {
  * blindly once it believes it has identified the hardware, so the order of
  * blocks here matters.
  */
-static const struct si_pc_block groups_CIK[] = {
-       { &cik_CB, 226, 4 },
+static struct si_pc_block groups_CIK[] = {
+       { &cik_CB, 226},
        { &cik_CPF, 17 },
-       { &cik_DB, 257, 4 },
+       { &cik_DB, 257},
        { &cik_GRBM, 34 },
        { &cik_GRBMSE, 15 },
        { &cik_PA_SU, 153 },
@@ -356,7 +357,7 @@ static const struct si_pc_block groups_CIK[] = {
        { &cik_SX, 32 },
        { &cik_TA, 111, 11 },
        { &cik_TCA, 39, 2 },
-       { &cik_TCC, 160, 16 },
+       { &cik_TCC, 160},
        { &cik_TD, 55, 11 },
        { &cik_TCP, 154, 11 },
        { &cik_GDS, 121 },
@@ -370,20 +371,20 @@ static const struct si_pc_block groups_CIK[] = {
 
 };
 
-static const struct si_pc_block groups_VI[] = {
-       { &cik_CB, 396, 4 },
+static struct si_pc_block groups_VI[] = {
+       { &cik_CB, 405},
        { &cik_CPF, 19 },
-       { &cik_DB, 257, 4 },
+       { &cik_DB, 257},
        { &cik_GRBM, 34 },
        { &cik_GRBMSE, 15 },
-       { &cik_PA_SU, 153 },
+       { &cik_PA_SU, 154 },
        { &cik_PA_SC, 397 },
        { &cik_SPI, 197 },
        { &cik_SQ, 273 },
        { &cik_SX, 34 },
        { &cik_TA, 119, 16 },
        { &cik_TCA, 35, 2 },
-       { &cik_TCC, 192, 16 },
+       { &cik_TCC, 192},
        { &cik_TD, 55, 16 },
        { &cik_TCP, 180, 16 },
        { &cik_GDS, 121 },
@@ -397,39 +398,34 @@ static const struct si_pc_block groups_VI[] = {
 
 };
 
-static void si_pc_get_size(struct r600_perfcounter_block *group,
-                       unsigned count, unsigned *selectors,
-                       unsigned *num_select_dw, unsigned *num_read_dw)
-{
-       const struct si_pc_block *sigroup = (const struct si_pc_block *)group->data;
-       const struct si_pc_block_base *regs = sigroup->b;
-       unsigned layout_multi = regs->layout & SI_PC_MULTI_MASK;
-
-       if (regs->layout & SI_PC_FAKE) {
-               *num_select_dw = 0;
-       } else if (layout_multi == SI_PC_MULTI_BLOCK) {
-               if (count < regs->num_multi)
-                       *num_select_dw = 2 * (count + 2) + regs->num_prelude;
-               else
-                       *num_select_dw = 2 + count + regs->num_multi + regs->num_prelude;
-       } else if (layout_multi == SI_PC_MULTI_TAIL) {
-               *num_select_dw = 4 + count + MIN2(count, regs->num_multi) + regs->num_prelude;
-       } else if (layout_multi == SI_PC_MULTI_CUSTOM) {
-               assert(regs->num_prelude == 0);
-               *num_select_dw = 3 * (count + MIN2(count, regs->num_multi));
-       } else {
-               assert(layout_multi == SI_PC_MULTI_ALTERNATE);
-
-               *num_select_dw = 2 + count + MIN2(count, regs->num_multi) + regs->num_prelude;
-       }
-
-       *num_read_dw = 6 * count;
-}
+static struct si_pc_block groups_gfx9[] = {
+       { &cik_CB, 438},
+       { &cik_CPF, 32 },
+       { &cik_DB, 328},
+       { &cik_GRBM, 38 },
+       { &cik_GRBMSE, 16 },
+       { &cik_PA_SU, 292 },
+       { &cik_PA_SC, 491 },
+       { &cik_SPI, 196 },
+       { &cik_SQ, 374 },
+       { &cik_SX, 208 },
+       { &cik_TA, 119, 16 },
+       { &cik_TCA, 35, 2 },
+       { &cik_TCC, 256},
+       { &cik_TD, 57, 16 },
+       { &cik_TCP, 85, 16 },
+       { &cik_GDS, 121 },
+       { &cik_VGT, 148 },
+       { &cik_IA, 32 },
+       { &cik_WD, 58 },
+       { &cik_CPG, 59 },
+       { &cik_CPC, 35 },
+};
 
-static void si_pc_emit_instance(struct r600_common_context *ctx,
+static void si_pc_emit_instance(struct si_context *sctx,
                                int se, int instance)
 {
-       struct radeon_winsys_cs *cs = ctx->gfx.cs;
+       struct radeon_cmdbuf *cs = sctx->gfx_cs;
        unsigned value = S_030800_SH_BROADCAST_WRITES(1);
 
        if (se >= 0) {
@@ -447,23 +443,23 @@ static void si_pc_emit_instance(struct r600_common_context *ctx,
        radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX, value);
 }
 
-static void si_pc_emit_shaders(struct r600_common_context *ctx,
+static void si_pc_emit_shaders(struct si_context *sctx,
                               unsigned shaders)
 {
-       struct radeon_winsys_cs *cs = ctx->gfx.cs;
+       struct radeon_cmdbuf *cs = sctx->gfx_cs;
 
        radeon_set_uconfig_reg_seq(cs, R_036780_SQ_PERFCOUNTER_CTRL, 2);
        radeon_emit(cs, shaders & 0x7f);
        radeon_emit(cs, 0xffffffff);
 }
 
-static void si_pc_emit_select(struct r600_common_context *ctx,
-                       struct r600_perfcounter_block *group,
+static void si_pc_emit_select(struct si_context *sctx,
+                       struct si_perfcounter_block *group,
                        unsigned count, unsigned *selectors)
 {
-       const struct si_pc_block *sigroup = (const struct si_pc_block *)group->data;
-       const struct si_pc_block_base *regs = sigroup->b;
-       struct radeon_winsys_cs *cs = ctx->gfx.cs;
+       struct si_pc_block *sigroup = (struct si_pc_block *)group->data;
+       struct si_pc_block_base *regs = sigroup->b;
+       struct radeon_cmdbuf *cs = sctx->gfx_cs;
        unsigned idx;
        unsigned layout_multi = regs->layout & SI_PC_MULTI_MASK;
        unsigned dw;
@@ -515,7 +511,7 @@ static void si_pc_emit_select(struct r600_common_context *ctx,
                for (idx = 0; idx < select1_count; ++idx)
                        radeon_emit(cs, 0);
        } else if (layout_multi == SI_PC_MULTI_CUSTOM) {
-               const unsigned *reg = regs->select;
+               unsigned *reg = regs->select;
                for (idx = 0; idx < count; ++idx) {
                        radeon_set_uconfig_reg(cs, *reg++, selectors[idx] | regs->select_or);
                        if (idx < regs->num_multi)
@@ -553,17 +549,17 @@ static void si_pc_emit_select(struct r600_common_context *ctx,
        }
 }
 
-static void si_pc_emit_start(struct r600_common_context *ctx,
+static void si_pc_emit_start(struct si_context *sctx,
                             struct r600_resource *buffer, uint64_t va)
 {
-       struct radeon_winsys_cs *cs = ctx->gfx.cs;
+       struct radeon_cmdbuf *cs = sctx->gfx_cs;
 
-       radeon_add_to_buffer_list(ctx, &ctx->gfx, buffer,
+       radeon_add_to_buffer_list(sctx, sctx->gfx_cs, buffer,
                                  RADEON_USAGE_WRITE, RADEON_PRIO_QUERY);
 
        radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
        radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) |
-                       COPY_DATA_DST_SEL(COPY_DATA_MEM));
+                       COPY_DATA_DST_SEL(COPY_DATA_DST_MEM_GRBM));
        radeon_emit(cs, 1); /* immediate */
        radeon_emit(cs, 0); /* unused */
        radeon_emit(cs, va);
@@ -572,39 +568,40 @@ static void si_pc_emit_start(struct r600_common_context *ctx,
        radeon_set_uconfig_reg(cs, R_036020_CP_PERFMON_CNTL,
                               S_036020_PERFMON_STATE(V_036020_DISABLE_AND_RESET));
        radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
-       radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_PERFCOUNTER_START) | EVENT_INDEX(0));
+       radeon_emit(cs, EVENT_TYPE(V_028A90_PERFCOUNTER_START) | EVENT_INDEX(0));
        radeon_set_uconfig_reg(cs, R_036020_CP_PERFMON_CNTL,
                               S_036020_PERFMON_STATE(V_036020_START_COUNTING));
 }
 
 /* Note: The buffer was already added in si_pc_emit_start, so we don't have to
  * do it again in here. */
-static void si_pc_emit_stop(struct r600_common_context *ctx,
+static void si_pc_emit_stop(struct si_context *sctx,
                            struct r600_resource *buffer, uint64_t va)
 {
-       struct radeon_winsys_cs *cs = ctx->gfx.cs;
+       struct radeon_cmdbuf *cs = sctx->gfx_cs;
 
-       r600_gfx_write_event_eop(ctx, EVENT_TYPE_BOTTOM_OF_PIPE_TS, 0, 1,
-                                buffer, va, 1, 0);
-       r600_gfx_wait_fence(ctx, va, 0, 0xffffffff);
+       si_gfx_write_event_eop(sctx, V_028A90_BOTTOM_OF_PIPE_TS, 0,
+                              EOP_DATA_SEL_VALUE_32BIT,
+                              buffer, va, 0, SI_NOT_QUERY);
+       si_gfx_wait_fence(sctx, va, 0, 0xffffffff);
 
        radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
-       radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_PERFCOUNTER_SAMPLE) | EVENT_INDEX(0));
+       radeon_emit(cs, EVENT_TYPE(V_028A90_PERFCOUNTER_SAMPLE) | EVENT_INDEX(0));
        radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
-       radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_PERFCOUNTER_STOP) | EVENT_INDEX(0));
+       radeon_emit(cs, EVENT_TYPE(V_028A90_PERFCOUNTER_STOP) | EVENT_INDEX(0));
        radeon_set_uconfig_reg(cs, R_036020_CP_PERFMON_CNTL,
                               S_036020_PERFMON_STATE(V_036020_STOP_COUNTING) |
                               S_036020_PERFMON_SAMPLE_ENABLE(1));
 }
 
-static void si_pc_emit_read(struct r600_common_context *ctx,
-                           struct r600_perfcounter_block *group,
+static void si_pc_emit_read(struct si_context *sctx,
+                           struct si_perfcounter_block *group,
                            unsigned count, unsigned *selectors,
                            struct r600_resource *buffer, uint64_t va)
 {
-       const struct si_pc_block *sigroup = (const struct si_pc_block *)group->data;
-       const struct si_pc_block_base *regs = sigroup->b;
-       struct radeon_winsys_cs *cs = ctx->gfx.cs;
+       struct si_pc_block *sigroup = (struct si_pc_block *)group->data;
+       struct si_pc_block_base *regs = sigroup->b;
+       struct radeon_cmdbuf *cs = sctx->gfx_cs;
        unsigned idx;
        unsigned reg = regs->counter0_lo;
        unsigned reg_delta = 8;
@@ -619,7 +616,7 @@ static void si_pc_emit_read(struct r600_common_context *ctx,
 
                        radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
                        radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_PERF) |
-                                       COPY_DATA_DST_SEL(COPY_DATA_MEM) |
+                                       COPY_DATA_DST_SEL(COPY_DATA_DST_MEM_GRBM) |
                                        COPY_DATA_COUNT_SEL); /* 64 bits */
                        radeon_emit(cs, reg >> 2);
                        radeon_emit(cs, 0); /* unused */
@@ -632,7 +629,7 @@ static void si_pc_emit_read(struct r600_common_context *ctx,
                for (idx = 0; idx < count; ++idx) {
                        radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
                        radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) |
-                                       COPY_DATA_DST_SEL(COPY_DATA_MEM) |
+                                       COPY_DATA_DST_SEL(COPY_DATA_DST_MEM_GRBM) |
                                        COPY_DATA_COUNT_SEL);
                        radeon_emit(cs, 0); /* immediate */
                        radeon_emit(cs, 0);
@@ -643,20 +640,20 @@ static void si_pc_emit_read(struct r600_common_context *ctx,
        }
 }
 
-static void si_pc_cleanup(struct r600_common_screen *rscreen)
+static void si_pc_cleanup(struct si_screen *sscreen)
 {
-       r600_perfcounters_do_destroy(rscreen->perfcounters);
-       rscreen->perfcounters = NULL;
+       si_perfcounters_do_destroy(sscreen->perfcounters);
+       sscreen->perfcounters = NULL;
 }
 
 void si_init_perfcounters(struct si_screen *screen)
 {
-       struct r600_perfcounters *pc;
-       const struct si_pc_block *blocks;
+       struct si_perfcounters *pc;
+       struct si_pc_block *blocks;
        unsigned num_blocks;
        unsigned i;
 
-       switch (screen->b.chip_class) {
+       switch (screen->info.chip_class) {
        case CIK:
                blocks = groups_CIK;
                num_blocks = ARRAY_SIZE(groups_CIK);
@@ -665,33 +662,33 @@ void si_init_perfcounters(struct si_screen *screen)
                blocks = groups_VI;
                num_blocks = ARRAY_SIZE(groups_VI);
                break;
-       case SI:
        case GFX9:
+               blocks = groups_gfx9;
+               num_blocks = ARRAY_SIZE(groups_gfx9);
+               break;
+       case SI:
        default:
                return; /* not implemented */
        }
 
-       if (screen->b.info.max_sh_per_se != 1) {
+       if (screen->info.max_sh_per_se != 1) {
                /* This should not happen on non-SI chips. */
                fprintf(stderr, "si_init_perfcounters: max_sh_per_se = %d not "
                        "supported (inaccurate performance counters)\n",
-                       screen->b.info.max_sh_per_se);
+                       screen->info.max_sh_per_se);
        }
 
-       pc = CALLOC_STRUCT(r600_perfcounters);
+       pc = CALLOC_STRUCT(si_perfcounters);
        if (!pc)
                return;
 
-       pc->num_start_cs_dwords = 14;
-       pc->num_stop_cs_dwords = 14 + r600_gfx_write_fence_dwords(&screen->b);
+       pc->num_stop_cs_dwords = 14 + si_gfx_write_fence_dwords(screen);
        pc->num_instance_cs_dwords = 3;
-       pc->num_shaders_cs_dwords = 4;
 
        pc->num_shader_types = ARRAY_SIZE(si_pc_shader_type_bits);
        pc->shader_type_suffixes = si_pc_shader_type_suffixes;
        pc->shader_type_bits = si_pc_shader_type_bits;
 
-       pc->get_size = si_pc_get_size;
        pc->emit_instance = si_pc_emit_instance;
        pc->emit_shaders = si_pc_emit_shaders;
        pc->emit_select = si_pc_emit_select;
@@ -700,19 +697,22 @@ void si_init_perfcounters(struct si_screen *screen)
        pc->emit_read = si_pc_emit_read;
        pc->cleanup = si_pc_cleanup;
 
-       if (!r600_perfcounters_init(pc, num_blocks))
+       if (!si_perfcounters_init(pc, num_blocks))
                goto error;
 
        for (i = 0; i < num_blocks; ++i) {
-               const struct si_pc_block *block = &blocks[i];
+               struct si_pc_block *block = &blocks[i];
                unsigned instances = block->instances;
 
-               if (!strcmp(block->b->name, "IA")) {
-                       if (screen->b.info.max_se > 2)
-                               instances = 2;
-               }
+               if (!strcmp(block->b->name, "CB") ||
+                   !strcmp(block->b->name, "DB"))
+                       instances = screen->info.max_se;
+               else if (!strcmp(block->b->name, "TCC"))
+                       instances = screen->info.num_tcc_blocks;
+               else if (!strcmp(block->b->name, "IA"))
+                       instances = MAX2(1, screen->info.max_se / 2);
 
-               r600_perfcounters_add_block(&screen->b, pc,
+               si_perfcounters_add_block(screen, pc,
                                            block->b->name,
                                            block->b->flags,
                                            block->b->num_counters,
@@ -721,9 +721,9 @@ void si_init_perfcounters(struct si_screen *screen)
                                            block);
        }
 
-       screen->b.perfcounters = pc;
+       screen->perfcounters = pc;
        return;
 
 error:
-       r600_perfcounters_do_destroy(pc);
+       si_perfcounters_do_destroy(pc);
 }