freedreno: a2xx: add perfcntrs
authorJonathan Marek <jonathan@marek.ca>
Mon, 28 Jan 2019 15:09:39 +0000 (10:09 -0500)
committerRob Clark <robdclark@gmail.com>
Mon, 28 Jan 2019 23:21:16 +0000 (18:21 -0500)
Based on a5xx perfcntrs implementation.

Signed-off-by: Jonathan Marek <jonathan@marek.ca>
src/gallium/drivers/freedreno/Makefile.sources
src/gallium/drivers/freedreno/a2xx/fd2_context.c
src/gallium/drivers/freedreno/a2xx/fd2_emit.c
src/gallium/drivers/freedreno/a2xx/fd2_perfcntr.c [new file with mode: 0644]
src/gallium/drivers/freedreno/a2xx/fd2_query.c [new file with mode: 0644]
src/gallium/drivers/freedreno/a2xx/fd2_query.h [new file with mode: 0644]
src/gallium/drivers/freedreno/a2xx/fd2_screen.c
src/gallium/drivers/freedreno/meson.build

index d2eddb989d732092707f2371b6f6bd9974727365..e11c42517c92595e58dd3c69eae04135876fb22d 100644 (file)
@@ -50,8 +50,11 @@ a2xx_SOURCES := \
        a2xx/fd2_emit.h \
        a2xx/fd2_gmem.c \
        a2xx/fd2_gmem.h \
+       a2xx/fd2_perfcntr.c \
        a2xx/fd2_program.c \
        a2xx/fd2_program.h \
+       a2xx/fd2_query.c \
+       a2xx/fd2_query.h \
        a2xx/fd2_rasterizer.c \
        a2xx/fd2_rasterizer.h \
        a2xx/fd2_resource.c \
index 6d9dce2ffbec787aa1b62cfb36c554c9543ccdb5..9353e37420f0052c679db2be933b10e94ee71824 100644 (file)
@@ -31,6 +31,7 @@
 #include "fd2_emit.h"
 #include "fd2_gmem.h"
 #include "fd2_program.h"
+#include "fd2_query.h"
 #include "fd2_rasterizer.h"
 #include "fd2_texture.h"
 #include "fd2_zsa.h"
@@ -120,5 +121,7 @@ fd2_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags)
        /* construct vertex state used for solid ops (clear, and gmem<->mem) */
        fd2_ctx->solid_vertexbuf = create_solid_vertexbuf(pctx);
 
+       fd2_query_context_init(pctx);
+
        return pctx;
 }
index 805a4cf032a6b9e2f6559ed8342fba4dbe5b6a31..76fd7dee9d707dd1a1712e29079c8e0425631db2 100644 (file)
@@ -424,6 +424,15 @@ fd2_emit_restore(struct fd_context *ctx, struct fd_ringbuffer *ring)
                OUT_RING(ring, 0x0000003b);
        }
 
+       /* enable perfcntrs */
+       OUT_PKT0(ring, REG_A2XX_CP_PERFMON_CNTL, 1);
+       OUT_RING(ring, COND(fd_mesa_debug & FD_DBG_PERFC, 1));
+
+       /* note: perfcntrs don't work without the PM_OVERRIDE bit */
+       OUT_PKT0(ring, REG_A2XX_RBBM_PM_OVERRIDE1, 2);
+       OUT_RING(ring, 0xffffffff);
+       OUT_RING(ring, 0x00000fff);
+
        OUT_PKT0(ring, REG_A2XX_TP0_CHICKEN, 1);
        OUT_RING(ring, 0x00000002);
 
diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_perfcntr.c b/src/gallium/drivers/freedreno/a2xx/fd2_perfcntr.c
new file mode 100644 (file)
index 0000000..35f084e
--- /dev/null
@@ -0,0 +1,813 @@
+/*
+ * Copyright (C) 2018 Jonathan Marek <jonathan@marek.ca>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Jonathan Marek <jonathan@marek.ca>
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "freedreno_perfcntr.h"
+#include "freedreno_util.h"
+#include "a2xx.xml.h"
+
+#define REG(_x) REG_A2XX_ ## _x
+
+#define COUNTER(_sel, _lo, _hi) {  \
+       .select_reg = REG(_sel),       \
+       .counter_reg_lo = REG(_lo),    \
+       .counter_reg_hi = REG(_hi),    \
+}
+
+#define COUNTABLE(_selector, _query_type, _result_type) {            \
+       .name        = #_selector,                                       \
+       .selector    = _selector,                                        \
+       .query_type  = PIPE_DRIVER_QUERY_TYPE_ ## _query_type,           \
+       .result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_ ## _result_type,   \
+}
+
+#define GROUP(_name, _counters, _countables) {   \
+       .name           = _name,                     \
+       .num_counters   = ARRAY_SIZE(_counters),     \
+       .counters       = _counters,                 \
+       .num_countables = ARRAY_SIZE(_countables),   \
+       .countables     = _countables, \
+}
+
+static const struct fd_perfcntr_countable pa_su_countables[] = {
+       COUNTABLE(PERF_PAPC_PASX_REQ, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_PASX_FIRST_VECTOR, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_PASX_SECOND_VECTOR, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_PASX_FIRST_DEAD, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_PASX_SECOND_DEAD, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_PASX_VTX_KILL_DISCARD, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_PASX_VTX_NAN_DISCARD, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_PA_INPUT_PRIM, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_PA_INPUT_NULL_PRIM, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_PA_INPUT_EVENT_FLAG, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_PA_INPUT_FIRST_PRIM_SLOT, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_PA_INPUT_END_OF_PACKET, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_CLPR_CULL_PRIM, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_CLPR_VV_CULL_PRIM, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_CLPR_VTX_KILL_CULL_PRIM, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_CLPR_VTX_NAN_CULL_PRIM, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_CLPR_CULL_TO_NULL_PRIM, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_CLPR_VV_CLIP_PRIM, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_CLPR_POINT_CLIP_CANDIDATE, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_CLPR_CLIP_PLANE_CNT_1, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_CLPR_CLIP_PLANE_CNT_2, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_CLPR_CLIP_PLANE_CNT_3, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_CLPR_CLIP_PLANE_CNT_4, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_CLPR_CLIP_PLANE_CNT_5, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_CLPR_CLIP_PLANE_CNT_6, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_CLPR_CLIP_PLANE_NEAR, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_CLPR_CLIP_PLANE_FAR, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_CLPR_CLIP_PLANE_LEFT, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_CLPR_CLIP_PLANE_RIGHT, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_CLPR_CLIP_PLANE_TOP, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_CLPR_CLIP_PLANE_BOTTOM, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_CLSM_NULL_PRIM, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_CLSM_TOTALLY_VISIBLE_PRIM, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_CLSM_CLIP_PRIM, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_CLSM_CULL_TO_NULL_PRIM, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_CLSM_OUT_PRIM_CNT_1, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_CLSM_OUT_PRIM_CNT_2, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_CLSM_OUT_PRIM_CNT_3, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_CLSM_OUT_PRIM_CNT_4, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_CLSM_OUT_PRIM_CNT_5, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_CLSM_OUT_PRIM_CNT_6_7, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_CLSM_NON_TRIVIAL_CULL, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_SU_INPUT_PRIM, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_SU_INPUT_CLIP_PRIM, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_SU_INPUT_NULL_PRIM, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_SU_ZERO_AREA_CULL_PRIM, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_SU_BACK_FACE_CULL_PRIM, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_SU_FRONT_FACE_CULL_PRIM, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_SU_POLYMODE_FACE_CULL, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_SU_POLYMODE_BACK_CULL, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_SU_POLYMODE_FRONT_CULL, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_SU_POLYMODE_INVALID_FILL, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_SU_OUTPUT_PRIM, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_SU_OUTPUT_CLIP_PRIM, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_SU_OUTPUT_NULL_PRIM, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_SU_OUTPUT_EVENT_FLAG, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_SU_OUTPUT_FIRST_PRIM_SLOT, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_SU_OUTPUT_END_OF_PACKET, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_SU_OUTPUT_POLYMODE_FACE, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_SU_OUTPUT_POLYMODE_BACK, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_SU_OUTPUT_POLYMODE_FRONT, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_SU_OUT_CLIP_POLYMODE_FACE, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_SU_OUT_CLIP_POLYMODE_BACK, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_SU_OUT_CLIP_POLYMODE_FRONT, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_PASX_REQ_IDLE, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_PASX_REQ_BUSY, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_PASX_REQ_STALLED, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_PASX_REC_IDLE, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_PASX_REC_BUSY, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_PASX_REC_STARVED_SX, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_PASX_REC_STALLED, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_PASX_REC_STALLED_POS_MEM, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_PASX_REC_STALLED_CCGSM_IN, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_CCGSM_IDLE, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_CCGSM_BUSY, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_CCGSM_STALLED, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_CLPRIM_IDLE, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_CLPRIM_BUSY, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_CLPRIM_STALLED, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_CLPRIM_STARVED_CCGSM, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_CLIPSM_IDLE, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_CLIPSM_BUSY, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_CLIPSM_WAIT_CLIP_VERT_ENGH, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_CLIPSM_WAIT_HIGH_PRI_SEQ, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_CLIPSM_WAIT_CLIPGA, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_CLIPSM_WAIT_AVAIL_VTE_CLIP, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_CLIPSM_WAIT_CLIP_OUTSM, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_CLIPGA_IDLE, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_CLIPGA_BUSY, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_CLIPGA_STARVED_VTE_CLIP, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_CLIPGA_STALLED, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_CLIP_IDLE, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_CLIP_BUSY, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_SU_IDLE, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_SU_BUSY, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_SU_STARVED_CLIP, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_SU_STALLED_SC, UINT64, AVERAGE),
+       COUNTABLE(PERF_PAPC_SU_FACENESS_CULL, UINT64, AVERAGE),
+};
+
+static const struct fd_perfcntr_countable pa_sc_countables[] = {
+       COUNTABLE(SC_SR_WINDOW_VALID, UINT64, AVERAGE),
+       COUNTABLE(SC_CW_WINDOW_VALID, UINT64, AVERAGE),
+       COUNTABLE(SC_QM_WINDOW_VALID, UINT64, AVERAGE),
+       COUNTABLE(SC_FW_WINDOW_VALID, UINT64, AVERAGE),
+       COUNTABLE(SC_EZ_WINDOW_VALID, UINT64, AVERAGE),
+       COUNTABLE(SC_IT_WINDOW_VALID, UINT64, AVERAGE),
+       COUNTABLE(SC_STARVED_BY_PA, UINT64, AVERAGE),
+       COUNTABLE(SC_STALLED_BY_RB_TILE, UINT64, AVERAGE),
+       COUNTABLE(SC_STALLED_BY_RB_SAMP, UINT64, AVERAGE),
+       COUNTABLE(SC_STARVED_BY_RB_EZ, UINT64, AVERAGE),
+       COUNTABLE(SC_STALLED_BY_SAMPLE_FF, UINT64, AVERAGE),
+       COUNTABLE(SC_STALLED_BY_SQ, UINT64, AVERAGE),
+       COUNTABLE(SC_STALLED_BY_SP, UINT64, AVERAGE),
+       COUNTABLE(SC_TOTAL_NO_PRIMS, UINT64, AVERAGE),
+       COUNTABLE(SC_NON_EMPTY_PRIMS, UINT64, AVERAGE),
+       COUNTABLE(SC_NO_TILES_PASSING_QM, UINT64, AVERAGE),
+       COUNTABLE(SC_NO_PIXELS_PRE_EZ, UINT64, AVERAGE),
+       COUNTABLE(SC_NO_PIXELS_POST_EZ, UINT64, AVERAGE),
+};
+
+static const struct fd_perfcntr_countable vgt_countables[] = {
+       COUNTABLE(VGT_SQ_EVENT_WINDOW_ACTIVE, UINT64, AVERAGE),
+       COUNTABLE(VGT_SQ_SEND, UINT64, AVERAGE),
+       COUNTABLE(VGT_SQ_STALLED, UINT64, AVERAGE),
+       COUNTABLE(VGT_SQ_STARVED_BUSY, UINT64, AVERAGE),
+       COUNTABLE(VGT_SQ_STARVED_IDLE, UINT64, AVERAGE),
+       COUNTABLE(VGT_SQ_STATIC, UINT64, AVERAGE),
+       COUNTABLE(VGT_PA_EVENT_WINDOW_ACTIVE, UINT64, AVERAGE),
+       COUNTABLE(VGT_PA_CLIP_V_SEND, UINT64, AVERAGE),
+       COUNTABLE(VGT_PA_CLIP_V_STALLED, UINT64, AVERAGE),
+       COUNTABLE(VGT_PA_CLIP_V_STARVED_BUSY, UINT64, AVERAGE),
+       COUNTABLE(VGT_PA_CLIP_V_STARVED_IDLE, UINT64, AVERAGE),
+       COUNTABLE(VGT_PA_CLIP_V_STATIC, UINT64, AVERAGE),
+       COUNTABLE(VGT_PA_CLIP_P_SEND, UINT64, AVERAGE),
+       COUNTABLE(VGT_PA_CLIP_P_STALLED, UINT64, AVERAGE),
+       COUNTABLE(VGT_PA_CLIP_P_STARVED_BUSY, UINT64, AVERAGE),
+       COUNTABLE(VGT_PA_CLIP_P_STARVED_IDLE, UINT64, AVERAGE),
+       COUNTABLE(VGT_PA_CLIP_P_STATIC, UINT64, AVERAGE),
+       COUNTABLE(VGT_PA_CLIP_S_SEND, UINT64, AVERAGE),
+       COUNTABLE(VGT_PA_CLIP_S_STALLED, UINT64, AVERAGE),
+       COUNTABLE(VGT_PA_CLIP_S_STARVED_BUSY, UINT64, AVERAGE),
+       COUNTABLE(VGT_PA_CLIP_S_STARVED_IDLE, UINT64, AVERAGE),
+       COUNTABLE(VGT_PA_CLIP_S_STATIC, UINT64, AVERAGE),
+       COUNTABLE(RBIU_FIFOS_EVENT_WINDOW_ACTIVE, UINT64, AVERAGE),
+       COUNTABLE(RBIU_IMMED_DATA_FIFO_STARVED, UINT64, AVERAGE),
+       COUNTABLE(RBIU_IMMED_DATA_FIFO_STALLED, UINT64, AVERAGE),
+       COUNTABLE(RBIU_DMA_REQUEST_FIFO_STARVED, UINT64, AVERAGE),
+       COUNTABLE(RBIU_DMA_REQUEST_FIFO_STALLED, UINT64, AVERAGE),
+       COUNTABLE(RBIU_DRAW_INITIATOR_FIFO_STARVED, UINT64, AVERAGE),
+       COUNTABLE(RBIU_DRAW_INITIATOR_FIFO_STALLED, UINT64, AVERAGE),
+       COUNTABLE(BIN_PRIM_NEAR_CULL, UINT64, AVERAGE),
+       COUNTABLE(BIN_PRIM_ZERO_CULL, UINT64, AVERAGE),
+       COUNTABLE(BIN_PRIM_FAR_CULL, UINT64, AVERAGE),
+       COUNTABLE(BIN_PRIM_BIN_CULL, UINT64, AVERAGE),
+       COUNTABLE(BIN_PRIM_FACE_CULL, UINT64, AVERAGE),
+       COUNTABLE(SPARE34, UINT64, AVERAGE),
+       COUNTABLE(SPARE35, UINT64, AVERAGE),
+       COUNTABLE(SPARE36, UINT64, AVERAGE),
+       COUNTABLE(SPARE37, UINT64, AVERAGE),
+       COUNTABLE(SPARE38, UINT64, AVERAGE),
+       COUNTABLE(SPARE39, UINT64, AVERAGE),
+       COUNTABLE(TE_SU_IN_VALID, UINT64, AVERAGE),
+       COUNTABLE(TE_SU_IN_READ, UINT64, AVERAGE),
+       COUNTABLE(TE_SU_IN_PRIM, UINT64, AVERAGE),
+       COUNTABLE(TE_SU_IN_EOP, UINT64, AVERAGE),
+       COUNTABLE(TE_SU_IN_NULL_PRIM, UINT64, AVERAGE),
+       COUNTABLE(TE_WK_IN_VALID, UINT64, AVERAGE),
+       COUNTABLE(TE_WK_IN_READ, UINT64, AVERAGE),
+       COUNTABLE(TE_OUT_PRIM_VALID, UINT64, AVERAGE),
+       COUNTABLE(TE_OUT_PRIM_READ, UINT64, AVERAGE),
+};
+
+static const struct fd_perfcntr_countable tcr_countables[] = {
+       COUNTABLE(DGMMPD_IPMUX0_STALL, UINT64, AVERAGE),
+       COUNTABLE(DGMMPD_IPMUX_ALL_STALL, UINT64, AVERAGE),
+       COUNTABLE(OPMUX0_L2_WRITES, UINT64, AVERAGE),
+};
+
+static const struct fd_perfcntr_countable tp0_countables[] = {
+       COUNTABLE(POINT_QUADS, UINT64, AVERAGE),
+       COUNTABLE(BILIN_QUADS, UINT64, AVERAGE),
+       COUNTABLE(ANISO_QUADS, UINT64, AVERAGE),
+       COUNTABLE(MIP_QUADS, UINT64, AVERAGE),
+       COUNTABLE(VOL_QUADS, UINT64, AVERAGE),
+       COUNTABLE(MIP_VOL_QUADS, UINT64, AVERAGE),
+       COUNTABLE(MIP_ANISO_QUADS, UINT64, AVERAGE),
+       COUNTABLE(VOL_ANISO_QUADS, UINT64, AVERAGE),
+       COUNTABLE(ANISO_2_1_QUADS, UINT64, AVERAGE),
+       COUNTABLE(ANISO_4_1_QUADS, UINT64, AVERAGE),
+       COUNTABLE(ANISO_6_1_QUADS, UINT64, AVERAGE),
+       COUNTABLE(ANISO_8_1_QUADS, UINT64, AVERAGE),
+       COUNTABLE(ANISO_10_1_QUADS, UINT64, AVERAGE),
+       COUNTABLE(ANISO_12_1_QUADS, UINT64, AVERAGE),
+       COUNTABLE(ANISO_14_1_QUADS, UINT64, AVERAGE),
+       COUNTABLE(ANISO_16_1_QUADS, UINT64, AVERAGE),
+       COUNTABLE(MIP_VOL_ANISO_QUADS, UINT64, AVERAGE),
+       COUNTABLE(ALIGN_2_QUADS, UINT64, AVERAGE),
+       COUNTABLE(ALIGN_4_QUADS, UINT64, AVERAGE),
+       COUNTABLE(PIX_0_QUAD, UINT64, AVERAGE),
+       COUNTABLE(PIX_1_QUAD, UINT64, AVERAGE),
+       COUNTABLE(PIX_2_QUAD, UINT64, AVERAGE),
+       COUNTABLE(PIX_3_QUAD, UINT64, AVERAGE),
+       COUNTABLE(PIX_4_QUAD, UINT64, AVERAGE),
+       COUNTABLE(TP_MIPMAP_LOD0, UINT64, AVERAGE),
+       COUNTABLE(TP_MIPMAP_LOD1, UINT64, AVERAGE),
+       COUNTABLE(TP_MIPMAP_LOD2, UINT64, AVERAGE),
+       COUNTABLE(TP_MIPMAP_LOD3, UINT64, AVERAGE),
+       COUNTABLE(TP_MIPMAP_LOD4, UINT64, AVERAGE),
+       COUNTABLE(TP_MIPMAP_LOD5, UINT64, AVERAGE),
+       COUNTABLE(TP_MIPMAP_LOD6, UINT64, AVERAGE),
+       COUNTABLE(TP_MIPMAP_LOD7, UINT64, AVERAGE),
+       COUNTABLE(TP_MIPMAP_LOD8, UINT64, AVERAGE),
+       COUNTABLE(TP_MIPMAP_LOD9, UINT64, AVERAGE),
+       COUNTABLE(TP_MIPMAP_LOD10, UINT64, AVERAGE),
+       COUNTABLE(TP_MIPMAP_LOD11, UINT64, AVERAGE),
+       COUNTABLE(TP_MIPMAP_LOD12, UINT64, AVERAGE),
+       COUNTABLE(TP_MIPMAP_LOD13, UINT64, AVERAGE),
+       COUNTABLE(TP_MIPMAP_LOD14, UINT64, AVERAGE),
+};
+
+static const struct fd_perfcntr_countable tcm_countables[] = {
+       COUNTABLE(QUAD0_RD_LAT_FIFO_EMPTY, UINT64, AVERAGE),
+       COUNTABLE(QUAD0_RD_LAT_FIFO_4TH_FULL, UINT64, AVERAGE),
+       COUNTABLE(QUAD0_RD_LAT_FIFO_HALF_FULL, UINT64, AVERAGE),
+       COUNTABLE(QUAD0_RD_LAT_FIFO_FULL, UINT64, AVERAGE),
+       COUNTABLE(QUAD0_RD_LAT_FIFO_LT_4TH_FULL, UINT64, AVERAGE),
+       COUNTABLE(READ_STARVED_QUAD0, UINT64, AVERAGE),
+       COUNTABLE(READ_STARVED, UINT64, AVERAGE),
+       COUNTABLE(READ_STALLED_QUAD0, UINT64, AVERAGE),
+       COUNTABLE(READ_STALLED, UINT64, AVERAGE),
+       COUNTABLE(VALID_READ_QUAD0, UINT64, AVERAGE),
+       COUNTABLE(TC_TP_STARVED_QUAD0, UINT64, AVERAGE),
+       COUNTABLE(TC_TP_STARVED, UINT64, AVERAGE),
+};
+
+static const struct fd_perfcntr_countable tcf_countables[] = {
+       COUNTABLE(VALID_CYCLES, UINT64, AVERAGE),
+       COUNTABLE(SINGLE_PHASES, UINT64, AVERAGE),
+       COUNTABLE(ANISO_PHASES, UINT64, AVERAGE),
+       COUNTABLE(MIP_PHASES, UINT64, AVERAGE),
+       COUNTABLE(VOL_PHASES, UINT64, AVERAGE),
+       COUNTABLE(MIP_VOL_PHASES, UINT64, AVERAGE),
+       COUNTABLE(MIP_ANISO_PHASES, UINT64, AVERAGE),
+       COUNTABLE(VOL_ANISO_PHASES, UINT64, AVERAGE),
+       COUNTABLE(ANISO_2_1_PHASES, UINT64, AVERAGE),
+       COUNTABLE(ANISO_4_1_PHASES, UINT64, AVERAGE),
+       COUNTABLE(ANISO_6_1_PHASES, UINT64, AVERAGE),
+       COUNTABLE(ANISO_8_1_PHASES, UINT64, AVERAGE),
+       COUNTABLE(ANISO_10_1_PHASES, UINT64, AVERAGE),
+       COUNTABLE(ANISO_12_1_PHASES, UINT64, AVERAGE),
+       COUNTABLE(ANISO_14_1_PHASES, UINT64, AVERAGE),
+       COUNTABLE(ANISO_16_1_PHASES, UINT64, AVERAGE),
+       COUNTABLE(MIP_VOL_ANISO_PHASES, UINT64, AVERAGE),
+       COUNTABLE(ALIGN_2_PHASES, UINT64, AVERAGE),
+       COUNTABLE(ALIGN_4_PHASES, UINT64, AVERAGE),
+       COUNTABLE(TPC_BUSY, UINT64, AVERAGE),
+       COUNTABLE(TPC_STALLED, UINT64, AVERAGE),
+       COUNTABLE(TPC_STARVED, UINT64, AVERAGE),
+       COUNTABLE(TPC_WORKING, UINT64, AVERAGE),
+       COUNTABLE(TPC_WALKER_BUSY, UINT64, AVERAGE),
+       COUNTABLE(TPC_WALKER_STALLED, UINT64, AVERAGE),
+       COUNTABLE(TPC_WALKER_WORKING, UINT64, AVERAGE),
+       COUNTABLE(TPC_ALIGNER_BUSY, UINT64, AVERAGE),
+       COUNTABLE(TPC_ALIGNER_STALLED, UINT64, AVERAGE),
+       COUNTABLE(TPC_ALIGNER_STALLED_BY_BLEND, UINT64, AVERAGE),
+       COUNTABLE(TPC_ALIGNER_STALLED_BY_CACHE, UINT64, AVERAGE),
+       COUNTABLE(TPC_ALIGNER_WORKING, UINT64, AVERAGE),
+       COUNTABLE(TPC_BLEND_BUSY, UINT64, AVERAGE),
+       COUNTABLE(TPC_BLEND_SYNC, UINT64, AVERAGE),
+       COUNTABLE(TPC_BLEND_STARVED, UINT64, AVERAGE),
+       COUNTABLE(TPC_BLEND_WORKING, UINT64, AVERAGE),
+       COUNTABLE(OPCODE_0x00, UINT64, AVERAGE),
+       COUNTABLE(OPCODE_0x01, UINT64, AVERAGE),
+       COUNTABLE(OPCODE_0x04, UINT64, AVERAGE),
+       COUNTABLE(OPCODE_0x10, UINT64, AVERAGE),
+       COUNTABLE(OPCODE_0x11, UINT64, AVERAGE),
+       COUNTABLE(OPCODE_0x12, UINT64, AVERAGE),
+       COUNTABLE(OPCODE_0x13, UINT64, AVERAGE),
+       COUNTABLE(OPCODE_0x18, UINT64, AVERAGE),
+       COUNTABLE(OPCODE_0x19, UINT64, AVERAGE),
+       COUNTABLE(OPCODE_0x1A, UINT64, AVERAGE),
+       COUNTABLE(OPCODE_OTHER, UINT64, AVERAGE),
+       COUNTABLE(IN_FIFO_0_EMPTY, UINT64, AVERAGE),
+       COUNTABLE(IN_FIFO_0_LT_HALF_FULL, UINT64, AVERAGE),
+       COUNTABLE(IN_FIFO_0_HALF_FULL, UINT64, AVERAGE),
+       COUNTABLE(IN_FIFO_0_FULL, UINT64, AVERAGE),
+       COUNTABLE(IN_FIFO_TPC_EMPTY, UINT64, AVERAGE),
+       COUNTABLE(IN_FIFO_TPC_LT_HALF_FULL, UINT64, AVERAGE),
+       COUNTABLE(IN_FIFO_TPC_HALF_FULL, UINT64, AVERAGE),
+       COUNTABLE(IN_FIFO_TPC_FULL, UINT64, AVERAGE),
+       COUNTABLE(TPC_TC_XFC, UINT64, AVERAGE),
+       COUNTABLE(TPC_TC_STATE, UINT64, AVERAGE),
+       COUNTABLE(TC_STALL, UINT64, AVERAGE),
+       COUNTABLE(QUAD0_TAPS, UINT64, AVERAGE),
+       COUNTABLE(QUADS, UINT64, AVERAGE),
+       COUNTABLE(TCA_SYNC_STALL, UINT64, AVERAGE),
+       COUNTABLE(TAG_STALL, UINT64, AVERAGE),
+       COUNTABLE(TCB_SYNC_STALL, UINT64, AVERAGE),
+       COUNTABLE(TCA_VALID, UINT64, AVERAGE),
+       COUNTABLE(PROBES_VALID, UINT64, AVERAGE),
+       COUNTABLE(MISS_STALL, UINT64, AVERAGE),
+       COUNTABLE(FETCH_FIFO_STALL, UINT64, AVERAGE),
+       COUNTABLE(TCO_STALL, UINT64, AVERAGE),
+       COUNTABLE(ANY_STALL, UINT64, AVERAGE),
+       COUNTABLE(TAG_MISSES, UINT64, AVERAGE),
+       COUNTABLE(TAG_HITS, UINT64, AVERAGE),
+       COUNTABLE(SUB_TAG_MISSES, UINT64, AVERAGE),
+       COUNTABLE(SET0_INVALIDATES, UINT64, AVERAGE),
+       COUNTABLE(SET1_INVALIDATES, UINT64, AVERAGE),
+       COUNTABLE(SET2_INVALIDATES, UINT64, AVERAGE),
+       COUNTABLE(SET3_INVALIDATES, UINT64, AVERAGE),
+       COUNTABLE(SET0_TAG_MISSES, UINT64, AVERAGE),
+       COUNTABLE(SET1_TAG_MISSES, UINT64, AVERAGE),
+       COUNTABLE(SET2_TAG_MISSES, UINT64, AVERAGE),
+       COUNTABLE(SET3_TAG_MISSES, UINT64, AVERAGE),
+       COUNTABLE(SET0_TAG_HITS, UINT64, AVERAGE),
+       COUNTABLE(SET1_TAG_HITS, UINT64, AVERAGE),
+       COUNTABLE(SET2_TAG_HITS, UINT64, AVERAGE),
+       COUNTABLE(SET3_TAG_HITS, UINT64, AVERAGE),
+       COUNTABLE(SET0_SUB_TAG_MISSES, UINT64, AVERAGE),
+       COUNTABLE(SET1_SUB_TAG_MISSES, UINT64, AVERAGE),
+       COUNTABLE(SET2_SUB_TAG_MISSES, UINT64, AVERAGE),
+       COUNTABLE(SET3_SUB_TAG_MISSES, UINT64, AVERAGE),
+       COUNTABLE(SET0_EVICT1, UINT64, AVERAGE),
+       COUNTABLE(SET0_EVICT2, UINT64, AVERAGE),
+       COUNTABLE(SET0_EVICT3, UINT64, AVERAGE),
+       COUNTABLE(SET0_EVICT4, UINT64, AVERAGE),
+       COUNTABLE(SET0_EVICT5, UINT64, AVERAGE),
+       COUNTABLE(SET0_EVICT6, UINT64, AVERAGE),
+       COUNTABLE(SET0_EVICT7, UINT64, AVERAGE),
+       COUNTABLE(SET0_EVICT8, UINT64, AVERAGE),
+       COUNTABLE(SET1_EVICT1, UINT64, AVERAGE),
+       COUNTABLE(SET1_EVICT2, UINT64, AVERAGE),
+       COUNTABLE(SET1_EVICT3, UINT64, AVERAGE),
+       COUNTABLE(SET1_EVICT4, UINT64, AVERAGE),
+       COUNTABLE(SET1_EVICT5, UINT64, AVERAGE),
+       COUNTABLE(SET1_EVICT6, UINT64, AVERAGE),
+       COUNTABLE(SET1_EVICT7, UINT64, AVERAGE),
+       COUNTABLE(SET1_EVICT8, UINT64, AVERAGE),
+       COUNTABLE(SET2_EVICT1, UINT64, AVERAGE),
+       COUNTABLE(SET2_EVICT2, UINT64, AVERAGE),
+       COUNTABLE(SET2_EVICT3, UINT64, AVERAGE),
+       COUNTABLE(SET2_EVICT4, UINT64, AVERAGE),
+       COUNTABLE(SET2_EVICT5, UINT64, AVERAGE),
+       COUNTABLE(SET2_EVICT6, UINT64, AVERAGE),
+       COUNTABLE(SET2_EVICT7, UINT64, AVERAGE),
+       COUNTABLE(SET2_EVICT8, UINT64, AVERAGE),
+       COUNTABLE(SET3_EVICT1, UINT64, AVERAGE),
+       COUNTABLE(SET3_EVICT2, UINT64, AVERAGE),
+       COUNTABLE(SET3_EVICT3, UINT64, AVERAGE),
+       COUNTABLE(SET3_EVICT4, UINT64, AVERAGE),
+       COUNTABLE(SET3_EVICT5, UINT64, AVERAGE),
+       COUNTABLE(SET3_EVICT6, UINT64, AVERAGE),
+       COUNTABLE(SET3_EVICT7, UINT64, AVERAGE),
+       COUNTABLE(SET3_EVICT8, UINT64, AVERAGE),
+       COUNTABLE(FF_EMPTY, UINT64, AVERAGE),
+       COUNTABLE(FF_LT_HALF_FULL, UINT64, AVERAGE),
+       COUNTABLE(FF_HALF_FULL, UINT64, AVERAGE),
+       COUNTABLE(FF_FULL, UINT64, AVERAGE),
+       COUNTABLE(FF_XFC, UINT64, AVERAGE),
+       COUNTABLE(FF_STALLED, UINT64, AVERAGE),
+       COUNTABLE(FG_MASKS, UINT64, AVERAGE),
+       COUNTABLE(FG_LEFT_MASKS, UINT64, AVERAGE),
+       COUNTABLE(FG_LEFT_MASK_STALLED, UINT64, AVERAGE),
+       COUNTABLE(FG_LEFT_NOT_DONE_STALL, UINT64, AVERAGE),
+       COUNTABLE(FG_LEFT_FG_STALL, UINT64, AVERAGE),
+       COUNTABLE(FG_LEFT_SECTORS, UINT64, AVERAGE),
+       COUNTABLE(FG0_REQUESTS, UINT64, AVERAGE),
+       COUNTABLE(FG0_STALLED, UINT64, AVERAGE),
+       COUNTABLE(MEM_REQ512, UINT64, AVERAGE),
+       COUNTABLE(MEM_REQ_SENT, UINT64, AVERAGE),
+       COUNTABLE(MEM_LOCAL_READ_REQ, UINT64, AVERAGE),
+       COUNTABLE(TC0_MH_STALLED, UINT64, AVERAGE),
+};
+
+static const struct fd_perfcntr_countable sq_countables[] = {
+       COUNTABLE(SQ_PIXEL_VECTORS_SUB, UINT64, AVERAGE),
+       COUNTABLE(SQ_VERTEX_VECTORS_SUB, UINT64, AVERAGE),
+       COUNTABLE(SQ_ALU0_ACTIVE_VTX_SIMD0, UINT64, AVERAGE),
+       COUNTABLE(SQ_ALU1_ACTIVE_VTX_SIMD0, UINT64, AVERAGE),
+       COUNTABLE(SQ_ALU0_ACTIVE_PIX_SIMD0, UINT64, AVERAGE),
+       COUNTABLE(SQ_ALU1_ACTIVE_PIX_SIMD0, UINT64, AVERAGE),
+       COUNTABLE(SQ_ALU0_ACTIVE_VTX_SIMD1, UINT64, AVERAGE),
+       COUNTABLE(SQ_ALU1_ACTIVE_VTX_SIMD1, UINT64, AVERAGE),
+       COUNTABLE(SQ_ALU0_ACTIVE_PIX_SIMD1, UINT64, AVERAGE),
+       COUNTABLE(SQ_ALU1_ACTIVE_PIX_SIMD1, UINT64, AVERAGE),
+       COUNTABLE(SQ_EXPORT_CYCLES, UINT64, AVERAGE),
+       COUNTABLE(SQ_ALU_CST_WRITTEN, UINT64, AVERAGE),
+       COUNTABLE(SQ_TEX_CST_WRITTEN, UINT64, AVERAGE),
+       COUNTABLE(SQ_ALU_CST_STALL, UINT64, AVERAGE),
+       COUNTABLE(SQ_ALU_TEX_STALL, UINT64, AVERAGE),
+       COUNTABLE(SQ_INST_WRITTEN, UINT64, AVERAGE),
+       COUNTABLE(SQ_BOOLEAN_WRITTEN, UINT64, AVERAGE),
+       COUNTABLE(SQ_LOOPS_WRITTEN, UINT64, AVERAGE),
+       COUNTABLE(SQ_PIXEL_SWAP_IN, UINT64, AVERAGE),
+       COUNTABLE(SQ_PIXEL_SWAP_OUT, UINT64, AVERAGE),
+       COUNTABLE(SQ_VERTEX_SWAP_IN, UINT64, AVERAGE),
+       COUNTABLE(SQ_VERTEX_SWAP_OUT, UINT64, AVERAGE),
+       COUNTABLE(SQ_ALU_VTX_INST_ISSUED, UINT64, AVERAGE),
+       COUNTABLE(SQ_TEX_VTX_INST_ISSUED, UINT64, AVERAGE),
+       COUNTABLE(SQ_VC_VTX_INST_ISSUED, UINT64, AVERAGE),
+       COUNTABLE(SQ_CF_VTX_INST_ISSUED, UINT64, AVERAGE),
+       COUNTABLE(SQ_ALU_PIX_INST_ISSUED, UINT64, AVERAGE),
+       COUNTABLE(SQ_TEX_PIX_INST_ISSUED, UINT64, AVERAGE),
+       COUNTABLE(SQ_VC_PIX_INST_ISSUED, UINT64, AVERAGE),
+       COUNTABLE(SQ_CF_PIX_INST_ISSUED, UINT64, AVERAGE),
+       COUNTABLE(SQ_ALU0_FIFO_EMPTY_SIMD0, UINT64, AVERAGE),
+       COUNTABLE(SQ_ALU1_FIFO_EMPTY_SIMD0, UINT64, AVERAGE),
+       COUNTABLE(SQ_ALU0_FIFO_EMPTY_SIMD1, UINT64, AVERAGE),
+       COUNTABLE(SQ_ALU1_FIFO_EMPTY_SIMD1, UINT64, AVERAGE),
+       COUNTABLE(SQ_ALU_NOPS, UINT64, AVERAGE),
+       COUNTABLE(SQ_PRED_SKIP, UINT64, AVERAGE),
+       COUNTABLE(SQ_SYNC_ALU_STALL_SIMD0_VTX, UINT64, AVERAGE),
+       COUNTABLE(SQ_SYNC_ALU_STALL_SIMD1_VTX, UINT64, AVERAGE),
+       COUNTABLE(SQ_SYNC_TEX_STALL_VTX, UINT64, AVERAGE),
+       COUNTABLE(SQ_SYNC_VC_STALL_VTX, UINT64, AVERAGE),
+       COUNTABLE(SQ_CONSTANTS_USED_SIMD0, UINT64, AVERAGE),
+       COUNTABLE(SQ_CONSTANTS_SENT_SP_SIMD0, UINT64, AVERAGE),
+       COUNTABLE(SQ_GPR_STALL_VTX, UINT64, AVERAGE),
+       COUNTABLE(SQ_GPR_STALL_PIX, UINT64, AVERAGE),
+       COUNTABLE(SQ_VTX_RS_STALL, UINT64, AVERAGE),
+       COUNTABLE(SQ_PIX_RS_STALL, UINT64, AVERAGE),
+       COUNTABLE(SQ_SX_PC_FULL, UINT64, AVERAGE),
+       COUNTABLE(SQ_SX_EXP_BUFF_FULL, UINT64, AVERAGE),
+       COUNTABLE(SQ_SX_POS_BUFF_FULL, UINT64, AVERAGE),
+       COUNTABLE(SQ_INTERP_QUADS, UINT64, AVERAGE),
+       COUNTABLE(SQ_INTERP_ACTIVE, UINT64, AVERAGE),
+       COUNTABLE(SQ_IN_PIXEL_STALL, UINT64, AVERAGE),
+       COUNTABLE(SQ_IN_VTX_STALL, UINT64, AVERAGE),
+       COUNTABLE(SQ_VTX_CNT, UINT64, AVERAGE),
+       COUNTABLE(SQ_VTX_VECTOR2, UINT64, AVERAGE),
+       COUNTABLE(SQ_VTX_VECTOR3, UINT64, AVERAGE),
+       COUNTABLE(SQ_VTX_VECTOR4, UINT64, AVERAGE),
+       COUNTABLE(SQ_PIXEL_VECTOR1, UINT64, AVERAGE),
+       COUNTABLE(SQ_PIXEL_VECTOR23, UINT64, AVERAGE),
+       COUNTABLE(SQ_PIXEL_VECTOR4, UINT64, AVERAGE),
+       COUNTABLE(SQ_CONSTANTS_USED_SIMD1, UINT64, AVERAGE),
+       COUNTABLE(SQ_CONSTANTS_SENT_SP_SIMD1, UINT64, AVERAGE),
+       COUNTABLE(SQ_SX_MEM_EXP_FULL, UINT64, AVERAGE),
+       COUNTABLE(SQ_ALU0_ACTIVE_VTX_SIMD2, UINT64, AVERAGE),
+       COUNTABLE(SQ_ALU1_ACTIVE_VTX_SIMD2, UINT64, AVERAGE),
+       COUNTABLE(SQ_ALU0_ACTIVE_PIX_SIMD2, UINT64, AVERAGE),
+       COUNTABLE(SQ_ALU1_ACTIVE_PIX_SIMD2, UINT64, AVERAGE),
+       COUNTABLE(SQ_ALU0_ACTIVE_VTX_SIMD3, UINT64, AVERAGE),
+       COUNTABLE(SQ_PERFCOUNT_VTX_QUAL_TP_DONE, UINT64, AVERAGE),
+       COUNTABLE(SQ_ALU0_ACTIVE_PIX_SIMD3, UINT64, AVERAGE),
+       COUNTABLE(SQ_PERFCOUNT_PIX_QUAL_TP_DONE, UINT64, AVERAGE),
+       COUNTABLE(SQ_ALU0_FIFO_EMPTY_SIMD2, UINT64, AVERAGE),
+       COUNTABLE(SQ_ALU1_FIFO_EMPTY_SIMD2, UINT64, AVERAGE),
+       COUNTABLE(SQ_ALU0_FIFO_EMPTY_SIMD3, UINT64, AVERAGE),
+       COUNTABLE(SQ_ALU1_FIFO_EMPTY_SIMD3, UINT64, AVERAGE),
+       COUNTABLE(SQ_SYNC_ALU_STALL_SIMD2_VTX, UINT64, AVERAGE),
+       COUNTABLE(SQ_PERFCOUNT_VTX_POP_THREAD, UINT64, AVERAGE),
+       COUNTABLE(SQ_SYNC_ALU_STALL_SIMD0_PIX, UINT64, AVERAGE),
+       COUNTABLE(SQ_SYNC_ALU_STALL_SIMD1_PIX, UINT64, AVERAGE),
+       COUNTABLE(SQ_SYNC_ALU_STALL_SIMD2_PIX, UINT64, AVERAGE),
+       COUNTABLE(SQ_PERFCOUNT_PIX_POP_THREAD, UINT64, AVERAGE),
+       COUNTABLE(SQ_SYNC_TEX_STALL_PIX, UINT64, AVERAGE),
+       COUNTABLE(SQ_SYNC_VC_STALL_PIX, UINT64, AVERAGE),
+       COUNTABLE(SQ_CONSTANTS_USED_SIMD2, UINT64, AVERAGE),
+       COUNTABLE(SQ_CONSTANTS_SENT_SP_SIMD2, UINT64, AVERAGE),
+       COUNTABLE(SQ_PERFCOUNT_VTX_DEALLOC_ACK, UINT64, AVERAGE),
+       COUNTABLE(SQ_PERFCOUNT_PIX_DEALLOC_ACK, UINT64, AVERAGE),
+       COUNTABLE(SQ_ALU0_FIFO_FULL_SIMD0, UINT64, AVERAGE),
+       COUNTABLE(SQ_ALU1_FIFO_FULL_SIMD0, UINT64, AVERAGE),
+       COUNTABLE(SQ_ALU0_FIFO_FULL_SIMD1, UINT64, AVERAGE),
+       COUNTABLE(SQ_ALU1_FIFO_FULL_SIMD1, UINT64, AVERAGE),
+       COUNTABLE(SQ_ALU0_FIFO_FULL_SIMD2, UINT64, AVERAGE),
+       COUNTABLE(SQ_ALU1_FIFO_FULL_SIMD2, UINT64, AVERAGE),
+       COUNTABLE(SQ_ALU0_FIFO_FULL_SIMD3, UINT64, AVERAGE),
+       COUNTABLE(SQ_ALU1_FIFO_FULL_SIMD3, UINT64, AVERAGE),
+       COUNTABLE(VC_PERF_STATIC, UINT64, AVERAGE),
+       COUNTABLE(VC_PERF_STALLED, UINT64, AVERAGE),
+       COUNTABLE(VC_PERF_STARVED, UINT64, AVERAGE),
+       COUNTABLE(VC_PERF_SEND, UINT64, AVERAGE),
+       COUNTABLE(VC_PERF_ACTUAL_STARVED, UINT64, AVERAGE),
+       COUNTABLE(PIXEL_THREAD_0_ACTIVE, UINT64, AVERAGE),
+       COUNTABLE(VERTEX_THREAD_0_ACTIVE, UINT64, AVERAGE),
+       COUNTABLE(PIXEL_THREAD_0_NUMBER, UINT64, AVERAGE),
+       COUNTABLE(VERTEX_THREAD_0_NUMBER, UINT64, AVERAGE),
+       COUNTABLE(VERTEX_EVENT_NUMBER, UINT64, AVERAGE),
+       COUNTABLE(PIXEL_EVENT_NUMBER, UINT64, AVERAGE),
+       COUNTABLE(PTRBUFF_EF_PUSH, UINT64, AVERAGE),
+       COUNTABLE(PTRBUFF_EF_POP_EVENT, UINT64, AVERAGE),
+       COUNTABLE(PTRBUFF_EF_POP_NEW_VTX, UINT64, AVERAGE),
+       COUNTABLE(PTRBUFF_EF_POP_DEALLOC, UINT64, AVERAGE),
+       COUNTABLE(PTRBUFF_EF_POP_PVECTOR, UINT64, AVERAGE),
+       COUNTABLE(PTRBUFF_EF_POP_PVECTOR_X, UINT64, AVERAGE),
+       COUNTABLE(PTRBUFF_EF_POP_PVECTOR_VNZ, UINT64, AVERAGE),
+       COUNTABLE(PTRBUFF_PB_DEALLOC, UINT64, AVERAGE),
+       COUNTABLE(PTRBUFF_PI_STATE_PPB_POP, UINT64, AVERAGE),
+       COUNTABLE(PTRBUFF_PI_RTR, UINT64, AVERAGE),
+       COUNTABLE(PTRBUFF_PI_READ_EN, UINT64, AVERAGE),
+       COUNTABLE(PTRBUFF_PI_BUFF_SWAP, UINT64, AVERAGE),
+       COUNTABLE(PTRBUFF_SQ_FREE_BUFF, UINT64, AVERAGE),
+       COUNTABLE(PTRBUFF_SQ_DEC, UINT64, AVERAGE),
+       COUNTABLE(PTRBUFF_SC_VALID_CNTL_EVENT, UINT64, AVERAGE),
+       COUNTABLE(PTRBUFF_SC_VALID_IJ_XFER, UINT64, AVERAGE),
+       COUNTABLE(PTRBUFF_SC_NEW_VECTOR_1_Q, UINT64, AVERAGE),
+       COUNTABLE(PTRBUFF_QUAL_NEW_VECTOR, UINT64, AVERAGE),
+       COUNTABLE(PTRBUFF_QUAL_EVENT, UINT64, AVERAGE),
+       COUNTABLE(PTRBUFF_END_BUFFER, UINT64, AVERAGE),
+       COUNTABLE(PTRBUFF_FILL_QUAD, UINT64, AVERAGE),
+       COUNTABLE(VERTS_WRITTEN_SPI, UINT64, AVERAGE),
+       COUNTABLE(TP_FETCH_INSTR_EXEC, UINT64, AVERAGE),
+       COUNTABLE(TP_FETCH_INSTR_REQ, UINT64, AVERAGE),
+       COUNTABLE(TP_DATA_RETURN, UINT64, AVERAGE),
+       COUNTABLE(SPI_WRITE_CYCLES_SP, UINT64, AVERAGE),
+       COUNTABLE(SPI_WRITES_SP, UINT64, AVERAGE),
+       COUNTABLE(SP_ALU_INSTR_EXEC, UINT64, AVERAGE),
+       COUNTABLE(SP_CONST_ADDR_TO_SQ, UINT64, AVERAGE),
+       COUNTABLE(SP_PRED_KILLS_TO_SQ, UINT64, AVERAGE),
+       COUNTABLE(SP_EXPORT_CYCLES_TO_SX, UINT64, AVERAGE),
+       COUNTABLE(SP_EXPORTS_TO_SX, UINT64, AVERAGE),
+       COUNTABLE(SQ_CYCLES_ELAPSED, UINT64, AVERAGE),
+       COUNTABLE(SQ_TCFS_OPT_ALLOC_EXEC, UINT64, AVERAGE),
+       COUNTABLE(SQ_TCFS_NO_OPT_ALLOC, UINT64, AVERAGE),
+       COUNTABLE(SQ_ALU0_NO_OPT_ALLOC, UINT64, AVERAGE),
+       COUNTABLE(SQ_ALU1_NO_OPT_ALLOC, UINT64, AVERAGE),
+       COUNTABLE(SQ_TCFS_ARB_XFC_CNT, UINT64, AVERAGE),
+       COUNTABLE(SQ_ALU0_ARB_XFC_CNT, UINT64, AVERAGE),
+       COUNTABLE(SQ_ALU1_ARB_XFC_CNT, UINT64, AVERAGE),
+       COUNTABLE(SQ_TCFS_CFS_UPDATE_CNT, UINT64, AVERAGE),
+       COUNTABLE(SQ_ALU0_CFS_UPDATE_CNT, UINT64, AVERAGE),
+       COUNTABLE(SQ_ALU1_CFS_UPDATE_CNT, UINT64, AVERAGE),
+       COUNTABLE(SQ_VTX_PUSH_THREAD_CNT, UINT64, AVERAGE),
+       COUNTABLE(SQ_VTX_POP_THREAD_CNT, UINT64, AVERAGE),
+       COUNTABLE(SQ_PIX_PUSH_THREAD_CNT, UINT64, AVERAGE),
+       COUNTABLE(SQ_PIX_POP_THREAD_CNT, UINT64, AVERAGE),
+       COUNTABLE(SQ_PIX_TOTAL, UINT64, AVERAGE),
+       COUNTABLE(SQ_PIX_KILLED, UINT64, AVERAGE),
+};
+
+static const struct fd_perfcntr_countable sx_countables[] = {
+       COUNTABLE(SX_EXPORT_VECTORS, UINT64, AVERAGE),
+       COUNTABLE(SX_DUMMY_QUADS, UINT64, AVERAGE),
+       COUNTABLE(SX_ALPHA_FAIL, UINT64, AVERAGE),
+       COUNTABLE(SX_RB_QUAD_BUSY, UINT64, AVERAGE),
+       COUNTABLE(SX_RB_COLOR_BUSY, UINT64, AVERAGE),
+       COUNTABLE(SX_RB_QUAD_STALL, UINT64, AVERAGE),
+       COUNTABLE(SX_RB_COLOR_STALL, UINT64, AVERAGE),
+};
+
+static const struct fd_perfcntr_countable rb_countables[] = {
+       COUNTABLE(RBPERF_CNTX_BUSY, UINT64, AVERAGE),
+       COUNTABLE(RBPERF_CNTX_BUSY_MAX, UINT64, AVERAGE),
+       COUNTABLE(RBPERF_SX_QUAD_STARVED, UINT64, AVERAGE),
+       COUNTABLE(RBPERF_SX_QUAD_STARVED_MAX, UINT64, AVERAGE),
+       COUNTABLE(RBPERF_GA_GC_CH0_SYS_REQ, UINT64, AVERAGE),
+       COUNTABLE(RBPERF_GA_GC_CH0_SYS_REQ_MAX, UINT64, AVERAGE),
+       COUNTABLE(RBPERF_GA_GC_CH1_SYS_REQ, UINT64, AVERAGE),
+       COUNTABLE(RBPERF_GA_GC_CH1_SYS_REQ_MAX, UINT64, AVERAGE),
+       COUNTABLE(RBPERF_MH_STARVED, UINT64, AVERAGE),
+       COUNTABLE(RBPERF_MH_STARVED_MAX, UINT64, AVERAGE),
+       COUNTABLE(RBPERF_AZ_BC_COLOR_BUSY, UINT64, AVERAGE),
+       COUNTABLE(RBPERF_AZ_BC_COLOR_BUSY_MAX, UINT64, AVERAGE),
+       COUNTABLE(RBPERF_AZ_BC_Z_BUSY, UINT64, AVERAGE),
+       COUNTABLE(RBPERF_AZ_BC_Z_BUSY_MAX, UINT64, AVERAGE),
+       COUNTABLE(RBPERF_RB_SC_TILE_RTR_N, UINT64, AVERAGE),
+       COUNTABLE(RBPERF_RB_SC_TILE_RTR_N_MAX, UINT64, AVERAGE),
+       COUNTABLE(RBPERF_RB_SC_SAMP_RTR_N, UINT64, AVERAGE),
+       COUNTABLE(RBPERF_RB_SC_SAMP_RTR_N_MAX, UINT64, AVERAGE),
+       COUNTABLE(RBPERF_RB_SX_QUAD_RTR_N, UINT64, AVERAGE),
+       COUNTABLE(RBPERF_RB_SX_QUAD_RTR_N_MAX, UINT64, AVERAGE),
+       COUNTABLE(RBPERF_RB_SX_COLOR_RTR_N, UINT64, AVERAGE),
+       COUNTABLE(RBPERF_RB_SX_COLOR_RTR_N_MAX, UINT64, AVERAGE),
+       COUNTABLE(RBPERF_RB_SC_SAMP_LZ_BUSY, UINT64, AVERAGE),
+       COUNTABLE(RBPERF_RB_SC_SAMP_LZ_BUSY_MAX, UINT64, AVERAGE),
+       COUNTABLE(RBPERF_ZXP_STALL, UINT64, AVERAGE),
+       COUNTABLE(RBPERF_ZXP_STALL_MAX, UINT64, AVERAGE),
+       COUNTABLE(RBPERF_EVENT_PENDING, UINT64, AVERAGE),
+       COUNTABLE(RBPERF_EVENT_PENDING_MAX, UINT64, AVERAGE),
+       COUNTABLE(RBPERF_RB_MH_VALID, UINT64, AVERAGE),
+       COUNTABLE(RBPERF_RB_MH_VALID_MAX, UINT64, AVERAGE),
+       COUNTABLE(RBPERF_SX_RB_QUAD_SEND, UINT64, AVERAGE),
+       COUNTABLE(RBPERF_SX_RB_COLOR_SEND, UINT64, AVERAGE),
+       COUNTABLE(RBPERF_SC_RB_TILE_SEND, UINT64, AVERAGE),
+       COUNTABLE(RBPERF_SC_RB_SAMPLE_SEND, UINT64, AVERAGE),
+       COUNTABLE(RBPERF_SX_RB_MEM_EXPORT, UINT64, AVERAGE),
+       COUNTABLE(RBPERF_SX_RB_QUAD_EVENT, UINT64, AVERAGE),
+       COUNTABLE(RBPERF_SC_RB_TILE_EVENT_FILTERED, UINT64, AVERAGE),
+       COUNTABLE(RBPERF_SC_RB_TILE_EVENT_ALL, UINT64, AVERAGE),
+       COUNTABLE(RBPERF_RB_SC_EZ_SEND, UINT64, AVERAGE),
+       COUNTABLE(RBPERF_RB_SX_INDEX_SEND, UINT64, AVERAGE),
+       COUNTABLE(RBPERF_GMEM_INTFO_RD, UINT64, AVERAGE),
+       COUNTABLE(RBPERF_GMEM_INTF1_RD, UINT64, AVERAGE),
+       COUNTABLE(RBPERF_GMEM_INTFO_WR, UINT64, AVERAGE),
+       COUNTABLE(RBPERF_GMEM_INTF1_WR, UINT64, AVERAGE),
+       COUNTABLE(RBPERF_RB_CP_CONTEXT_DONE, UINT64, AVERAGE),
+       COUNTABLE(RBPERF_RB_CP_CACHE_FLUSH, UINT64, AVERAGE),
+       COUNTABLE(RBPERF_ZPASS_DONE, UINT64, AVERAGE),
+       COUNTABLE(RBPERF_ZCMD_VALID, UINT64, AVERAGE),
+       COUNTABLE(RBPERF_CCMD_VALID, UINT64, AVERAGE),
+       COUNTABLE(RBPERF_ACCUM_GRANT, UINT64, AVERAGE),
+       COUNTABLE(RBPERF_ACCUM_C0_GRANT, UINT64, AVERAGE),
+       COUNTABLE(RBPERF_ACCUM_C1_GRANT, UINT64, AVERAGE),
+       COUNTABLE(RBPERF_ACCUM_FULL_BE_WR, UINT64, AVERAGE),
+       COUNTABLE(RBPERF_ACCUM_REQUEST_NO_GRANT, UINT64, AVERAGE),
+       COUNTABLE(RBPERF_ACCUM_TIMEOUT_PULSE, UINT64, AVERAGE),
+       COUNTABLE(RBPERF_ACCUM_LIN_TIMEOUT_PULSE, UINT64, AVERAGE),
+       COUNTABLE(RBPERF_ACCUM_CAM_HIT_FLUSHING, UINT64, AVERAGE),
+};
+
+static const struct fd_perfcntr_counter pa_su_counters[] = {
+       COUNTER(PA_SU_PERFCOUNTER0_SELECT, PA_SU_PERFCOUNTER0_LOW, PA_SU_PERFCOUNTER0_HI),
+       COUNTER(PA_SU_PERFCOUNTER1_SELECT, PA_SU_PERFCOUNTER1_LOW, PA_SU_PERFCOUNTER1_HI),
+       COUNTER(PA_SU_PERFCOUNTER2_SELECT, PA_SU_PERFCOUNTER2_LOW, PA_SU_PERFCOUNTER2_HI),
+       COUNTER(PA_SU_PERFCOUNTER3_SELECT, PA_SU_PERFCOUNTER3_LOW, PA_SU_PERFCOUNTER3_HI),
+};
+
+static const struct fd_perfcntr_counter pa_sc_counters[] = {
+       COUNTER(PA_SC_PERFCOUNTER0_SELECT, PA_SC_PERFCOUNTER0_LOW, PA_SC_PERFCOUNTER0_HI),
+};
+
+static const struct fd_perfcntr_counter vgt_counters[] = {
+       COUNTER(VGT_PERFCOUNTER0_SELECT, VGT_PERFCOUNTER0_LOW, VGT_PERFCOUNTER0_HI),
+       COUNTER(VGT_PERFCOUNTER1_SELECT, VGT_PERFCOUNTER1_LOW, VGT_PERFCOUNTER1_HI),
+       COUNTER(VGT_PERFCOUNTER2_SELECT, VGT_PERFCOUNTER2_LOW, VGT_PERFCOUNTER2_HI),
+       COUNTER(VGT_PERFCOUNTER3_SELECT, VGT_PERFCOUNTER3_LOW, VGT_PERFCOUNTER3_HI),
+};
+
+static const struct fd_perfcntr_counter tcr_counters[] = {
+       COUNTER(TCR_PERFCOUNTER0_SELECT, TCR_PERFCOUNTER0_LOW, TCR_PERFCOUNTER0_HI),
+       COUNTER(TCR_PERFCOUNTER1_SELECT, TCR_PERFCOUNTER1_LOW, TCR_PERFCOUNTER1_HI),
+};
+
+static const struct fd_perfcntr_counter tp0_counters[] = {
+       COUNTER(TP0_PERFCOUNTER0_SELECT, TP0_PERFCOUNTER0_LOW, TP0_PERFCOUNTER0_HI),
+       COUNTER(TP0_PERFCOUNTER1_SELECT, TP0_PERFCOUNTER1_LOW, TP0_PERFCOUNTER1_HI),
+};
+
+static const struct fd_perfcntr_counter tcm_counters[] = {
+       COUNTER(TCM_PERFCOUNTER0_SELECT, TCM_PERFCOUNTER0_LOW, TCM_PERFCOUNTER0_HI),
+       COUNTER(TCM_PERFCOUNTER1_SELECT, TCM_PERFCOUNTER1_LOW, TCM_PERFCOUNTER1_HI),
+};
+
+static const struct fd_perfcntr_counter tcf_counters[] = {
+       COUNTER(TCF_PERFCOUNTER0_SELECT, TCF_PERFCOUNTER0_LOW, TCF_PERFCOUNTER0_HI),
+       COUNTER(TCF_PERFCOUNTER1_SELECT, TCF_PERFCOUNTER1_LOW, TCF_PERFCOUNTER1_HI),
+       COUNTER(TCF_PERFCOUNTER2_SELECT, TCF_PERFCOUNTER2_LOW, TCF_PERFCOUNTER2_HI),
+       COUNTER(TCF_PERFCOUNTER3_SELECT, TCF_PERFCOUNTER3_LOW, TCF_PERFCOUNTER3_HI),
+       COUNTER(TCF_PERFCOUNTER4_SELECT, TCF_PERFCOUNTER4_LOW, TCF_PERFCOUNTER4_HI),
+       COUNTER(TCF_PERFCOUNTER5_SELECT, TCF_PERFCOUNTER5_LOW, TCF_PERFCOUNTER5_HI),
+       COUNTER(TCF_PERFCOUNTER6_SELECT, TCF_PERFCOUNTER6_LOW, TCF_PERFCOUNTER6_HI),
+       COUNTER(TCF_PERFCOUNTER7_SELECT, TCF_PERFCOUNTER7_LOW, TCF_PERFCOUNTER7_HI),
+       COUNTER(TCF_PERFCOUNTER8_SELECT, TCF_PERFCOUNTER8_LOW, TCF_PERFCOUNTER8_HI),
+       COUNTER(TCF_PERFCOUNTER9_SELECT, TCF_PERFCOUNTER9_LOW, TCF_PERFCOUNTER9_HI),
+       COUNTER(TCF_PERFCOUNTER10_SELECT, TCF_PERFCOUNTER10_LOW, TCF_PERFCOUNTER10_HI),
+       COUNTER(TCF_PERFCOUNTER11_SELECT, TCF_PERFCOUNTER11_LOW, TCF_PERFCOUNTER11_HI),
+};
+
+static const struct fd_perfcntr_counter sq_counters[] = {
+       COUNTER(SQ_PERFCOUNTER0_SELECT, SQ_PERFCOUNTER0_LOW, SQ_PERFCOUNTER0_HI),
+       COUNTER(SQ_PERFCOUNTER1_SELECT, SQ_PERFCOUNTER1_LOW, SQ_PERFCOUNTER1_HI),
+       COUNTER(SQ_PERFCOUNTER2_SELECT, SQ_PERFCOUNTER2_LOW, SQ_PERFCOUNTER2_HI),
+       COUNTER(SQ_PERFCOUNTER3_SELECT, SQ_PERFCOUNTER3_LOW, SQ_PERFCOUNTER3_HI),
+};
+
+static const struct fd_perfcntr_countable rbbm_countables[] = {
+       COUNTABLE(RBBM1_COUNT, UINT64, AVERAGE),
+       COUNTABLE(RBBM1_NRT_BUSY, UINT64, AVERAGE),
+       COUNTABLE(RBBM1_RB_BUSY, UINT64, AVERAGE),
+       COUNTABLE(RBBM1_SQ_CNTX0_BUSY, UINT64, AVERAGE),
+       COUNTABLE(RBBM1_SQ_CNTX17_BUSY, UINT64, AVERAGE),
+       COUNTABLE(RBBM1_VGT_BUSY, UINT64, AVERAGE),
+       COUNTABLE(RBBM1_VGT_NODMA_BUSY, UINT64, AVERAGE),
+       COUNTABLE(RBBM1_PA_BUSY, UINT64, AVERAGE),
+       COUNTABLE(RBBM1_SC_CNTX_BUSY, UINT64, AVERAGE),
+       COUNTABLE(RBBM1_TPC_BUSY, UINT64, AVERAGE),
+       COUNTABLE(RBBM1_TC_BUSY, UINT64, AVERAGE),
+       COUNTABLE(RBBM1_SX_BUSY, UINT64, AVERAGE),
+       COUNTABLE(RBBM1_CP_COHER_BUSY, UINT64, AVERAGE),
+       COUNTABLE(RBBM1_CP_NRT_BUSY, UINT64, AVERAGE),
+       COUNTABLE(RBBM1_GFX_IDLE_STALL, UINT64, AVERAGE),
+       COUNTABLE(RBBM1_INTERRUPT, UINT64, AVERAGE),
+};
+
+static const struct fd_perfcntr_countable cp_countables[] = {
+       COUNTABLE(ALWAYS_COUNT, UINT64, AVERAGE),
+       COUNTABLE(TRANS_FIFO_FULL, UINT64, AVERAGE),
+       COUNTABLE(TRANS_FIFO_AF, UINT64, AVERAGE),
+       COUNTABLE(RCIU_PFPTRANS_WAIT, UINT64, AVERAGE),
+       COUNTABLE(RCIU_NRTTRANS_WAIT, UINT64, AVERAGE),
+       COUNTABLE(CSF_NRT_READ_WAIT, UINT64, AVERAGE),
+       COUNTABLE(CSF_I1_FIFO_FULL, UINT64, AVERAGE),
+       COUNTABLE(CSF_I2_FIFO_FULL, UINT64, AVERAGE),
+       COUNTABLE(CSF_ST_FIFO_FULL, UINT64, AVERAGE),
+       COUNTABLE(CSF_RING_ROQ_FULL, UINT64, AVERAGE),
+       COUNTABLE(CSF_I1_ROQ_FULL, UINT64, AVERAGE),
+       COUNTABLE(CSF_I2_ROQ_FULL, UINT64, AVERAGE),
+       COUNTABLE(CSF_ST_ROQ_FULL, UINT64, AVERAGE),
+       COUNTABLE(MIU_TAG_MEM_FULL, UINT64, AVERAGE),
+       COUNTABLE(MIU_WRITECLEAN, UINT64, AVERAGE),
+       COUNTABLE(MIU_NRT_WRITE_STALLED, UINT64, AVERAGE),
+       COUNTABLE(MIU_NRT_READ_STALLED, UINT64, AVERAGE),
+       COUNTABLE(ME_WRITE_CONFIRM_FIFO_FULL, UINT64, AVERAGE),
+       COUNTABLE(ME_VS_DEALLOC_FIFO_FULL, UINT64, AVERAGE),
+       COUNTABLE(ME_PS_DEALLOC_FIFO_FULL, UINT64, AVERAGE),
+       COUNTABLE(ME_REGS_VS_EVENT_FIFO_FULL, UINT64, AVERAGE),
+       COUNTABLE(ME_REGS_PS_EVENT_FIFO_FULL, UINT64, AVERAGE),
+       COUNTABLE(ME_REGS_CF_EVENT_FIFO_FULL, UINT64, AVERAGE),
+       COUNTABLE(ME_MICRO_RB_STARVED, UINT64, AVERAGE),
+       COUNTABLE(ME_MICRO_I1_STARVED, UINT64, AVERAGE),
+       COUNTABLE(ME_MICRO_I2_STARVED, UINT64, AVERAGE),
+       COUNTABLE(ME_MICRO_ST_STARVED, UINT64, AVERAGE),
+       COUNTABLE(RCIU_RBBM_DWORD_SENT, UINT64, AVERAGE),
+       COUNTABLE(ME_BUSY_CLOCKS, UINT64, AVERAGE),
+       COUNTABLE(ME_WAIT_CONTEXT_AVAIL, UINT64, AVERAGE),
+       COUNTABLE(PFP_TYPE0_PACKET, UINT64, AVERAGE),
+       COUNTABLE(PFP_TYPE3_PACKET, UINT64, AVERAGE),
+       COUNTABLE(CSF_RB_WPTR_NEQ_RPTR, UINT64, AVERAGE),
+       COUNTABLE(CSF_I1_SIZE_NEQ_ZERO, UINT64, AVERAGE),
+       COUNTABLE(CSF_I2_SIZE_NEQ_ZERO, UINT64, AVERAGE),
+       COUNTABLE(CSF_RBI1I2_FETCHING, UINT64, AVERAGE),
+};
+
+static const struct fd_perfcntr_counter sx_counters[] = {
+       COUNTER(SX_PERFCOUNTER0_SELECT, SX_PERFCOUNTER0_LOW, SX_PERFCOUNTER0_HI),
+};
+
+// We don't have the enums for MH perfcntrs
+#if 0
+static const struct fd_perfcntr_counter mh_counters[] = {
+       COUNTER(MH_PERFCOUNTER0_SELECT, MH_PERFCOUNTER0_LOW, MH_PERFCOUNTER0_HI),
+       COUNTER(MH_PERFCOUNTER1_SELECT, MH_PERFCOUNTER1_LOW, MH_PERFCOUNTER1_HI),
+};
+#endif
+
+static const struct fd_perfcntr_counter rbbm_counters[] = {
+       COUNTER(RBBM_PERFCOUNTER1_SELECT, RBBM_PERFCOUNTER1_LO, RBBM_PERFCOUNTER1_HI),
+};
+
+static const struct fd_perfcntr_counter cp_counters[] = {
+       COUNTER(CP_PERFCOUNTER_SELECT, CP_PERFCOUNTER_LO, CP_PERFCOUNTER_HI),
+};
+
+static const struct fd_perfcntr_counter rb_counters[] = {
+       COUNTER(RB_PERFCOUNTER0_SELECT, RB_PERFCOUNTER0_LOW, RB_PERFCOUNTER0_HI),
+};
+
+const struct fd_perfcntr_group a2xx_perfcntr_groups[] = {
+       GROUP("PA_SU", pa_su_counters, pa_su_countables),
+       GROUP("PA_SC", pa_sc_counters, pa_sc_countables),
+       GROUP("VGT", vgt_counters, vgt_countables),
+       GROUP("TCR", tcr_counters, tcr_countables),
+       GROUP("TP0", tp0_counters, tp0_countables),
+       GROUP("TCM", tcm_counters, tcm_countables),
+       GROUP("TCF", tcf_counters, tcf_countables),
+       GROUP("SQ", sq_counters, sq_countables),
+       GROUP("SX", sx_counters, sx_countables),
+//     GROUP("MH", mh_counters, mh_countables),
+       GROUP("RBBM", rbbm_counters, rbbm_countables),
+       GROUP("CP", cp_counters, cp_countables),
+       GROUP("RB", rb_counters, rb_countables),
+};
+
+const unsigned a2xx_num_perfcntr_groups = ARRAY_SIZE(a2xx_perfcntr_groups);
diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_query.c b/src/gallium/drivers/freedreno/a2xx/fd2_query.c
new file mode 100644 (file)
index 0000000..9e5bb45
--- /dev/null
@@ -0,0 +1,244 @@
+/*
+ * Copyright (C) 2018 Jonathan Marek <jonathan@marek.ca>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Jonathan Marek <jonathan@marek.ca>
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+/* NOTE: perfcntrs are 48-bits but we only have 32-bit accumulate (?)
+ * so we work with 32-bits only. we accumulate start/stop separately,
+ * which differs from a5xx but works with only accumulate (no add/neg)
+ */
+
+#include "freedreno_query_acc.h"
+#include "freedreno_resource.h"
+
+#include "fd2_context.h"
+#include "fd2_query.h"
+
+struct PACKED fd2_query_sample {
+       uint32_t start;
+       uint32_t stop;
+};
+
+/* offset of a single field of an array of fd2_query_sample: */
+#define query_sample_idx(aq, idx, field)        \
+       fd_resource((aq)->prsc)->bo,                \
+       (idx * sizeof(struct fd2_query_sample)) +   \
+       offsetof(struct fd2_query_sample, field),   \
+       0, 0
+
+/* offset of a single field of fd2_query_sample: */
+#define query_sample(aq, field)                 \
+       query_sample_idx(aq, 0, field)
+
+/*
+ * Performance Counter (batch) queries:
+ *
+ * Only one of these is active at a time, per design of the gallium
+ * batch_query API design.  On perfcntr query tracks N query_types,
+ * each of which has a 'fd_batch_query_entry' that maps it back to
+ * the associated group and counter.
+ */
+
+struct fd_batch_query_entry {
+       uint8_t gid;        /* group-id */
+       uint8_t cid;        /* countable-id within the group */
+};
+
+struct fd_batch_query_data {
+       struct fd_screen *screen;
+       unsigned num_query_entries;
+       struct fd_batch_query_entry query_entries[];
+};
+
+static void
+perfcntr_resume(struct fd_acc_query *aq, struct fd_batch *batch)
+{
+       struct fd_batch_query_data *data = aq->query_data;
+       struct fd_screen *screen = data->screen;
+       struct fd_ringbuffer *ring = batch->draw;
+
+       unsigned counters_per_group[screen->num_perfcntr_groups];
+       memset(counters_per_group, 0, sizeof(counters_per_group));
+
+       fd_wfi(batch, ring);
+
+       /* configure performance counters for the requested queries: */
+       for (unsigned i = 0; i < data->num_query_entries; i++) {
+               struct fd_batch_query_entry *entry = &data->query_entries[i];
+               const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid];
+               unsigned counter_idx = counters_per_group[entry->gid]++;
+
+               debug_assert(counter_idx < g->num_counters);
+
+               OUT_PKT0(ring, g->counters[counter_idx].select_reg, 1);
+               OUT_RING(ring, g->countables[entry->cid].selector);
+       }
+
+       memset(counters_per_group, 0, sizeof(counters_per_group));
+
+       /* and snapshot the start values */
+       for (unsigned i = 0; i < data->num_query_entries; i++) {
+               struct fd_batch_query_entry *entry = &data->query_entries[i];
+               const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid];
+               unsigned counter_idx = counters_per_group[entry->gid]++;
+               const struct fd_perfcntr_counter *counter = &g->counters[counter_idx];
+
+               OUT_PKT3(ring, CP_REG_TO_MEM, 2);
+               OUT_RING(ring, counter->counter_reg_lo | CP_MEM_TO_REG_0_ACCUMULATE);
+               OUT_RELOCW(ring, query_sample_idx(aq, i, start));
+       }
+}
+
+static void
+perfcntr_pause(struct fd_acc_query *aq, struct fd_batch *batch)
+{
+       struct fd_batch_query_data *data = aq->query_data;
+       struct fd_screen *screen = data->screen;
+       struct fd_ringbuffer *ring = batch->draw;
+
+       unsigned counters_per_group[screen->num_perfcntr_groups];
+       memset(counters_per_group, 0, sizeof(counters_per_group));
+
+       fd_wfi(batch, ring);
+
+       /* TODO do we need to bother to turn anything off? */
+
+       /* snapshot the end values: */
+       for (unsigned i = 0; i < data->num_query_entries; i++) {
+               struct fd_batch_query_entry *entry = &data->query_entries[i];
+               const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid];
+               unsigned counter_idx = counters_per_group[entry->gid]++;
+               const struct fd_perfcntr_counter *counter = &g->counters[counter_idx];
+
+               OUT_PKT3(ring, CP_REG_TO_MEM, 2);
+               OUT_RING(ring, counter->counter_reg_lo | CP_MEM_TO_REG_0_ACCUMULATE);
+               OUT_RELOCW(ring, query_sample_idx(aq, i, stop));
+       }
+}
+
+static void
+perfcntr_accumulate_result(struct fd_acc_query *aq, void *buf,
+               union pipe_query_result *result)
+{
+       struct fd_batch_query_data *data = aq->query_data;
+       struct fd2_query_sample *sp = buf;
+
+       for (unsigned i = 0; i < data->num_query_entries; i++)
+               result->batch[i].u64 = sp[i].stop - sp[i].start;
+}
+
+static const struct fd_acc_sample_provider perfcntr = {
+               .query_type = FD_QUERY_FIRST_PERFCNTR,
+               .active = FD_STAGE_DRAW | FD_STAGE_CLEAR,
+               .resume = perfcntr_resume,
+               .pause = perfcntr_pause,
+               .result = perfcntr_accumulate_result,
+};
+
+static struct pipe_query *
+fd2_create_batch_query(struct pipe_context *pctx,
+               unsigned num_queries, unsigned *query_types)
+{
+       struct fd_context *ctx = fd_context(pctx);
+       struct fd_screen *screen = ctx->screen;
+       struct fd_query *q;
+       struct fd_acc_query *aq;
+       struct fd_batch_query_data *data;
+
+       data = CALLOC_VARIANT_LENGTH_STRUCT(fd_batch_query_data,
+                       num_queries * sizeof(data->query_entries[0]));
+
+       data->screen = screen;
+       data->num_query_entries = num_queries;
+
+       /* validate the requested query_types and ensure we don't try
+        * to request more query_types of a given group than we have
+        * counters:
+        */
+       unsigned counters_per_group[screen->num_perfcntr_groups];
+       memset(counters_per_group, 0, sizeof(counters_per_group));
+
+       for (unsigned i = 0; i < num_queries; i++) {
+               unsigned idx = query_types[i] - FD_QUERY_FIRST_PERFCNTR;
+
+               /* verify valid query_type, ie. is it actually a perfcntr? */
+               if ((query_types[i] < FD_QUERY_FIRST_PERFCNTR) ||
+                               (idx >= screen->num_perfcntr_queries)) {
+                       debug_printf("invalid batch query query_type: %u\n", query_types[i]);
+                       goto error;
+               }
+
+               struct fd_batch_query_entry *entry = &data->query_entries[i];
+               struct pipe_driver_query_info *pq = &screen->perfcntr_queries[idx];
+
+               entry->gid = pq->group_id;
+
+               /* the perfcntr_queries[] table flattens all the countables
+                * for each group in series, ie:
+                *
+                *   (G0,C0), .., (G0,Cn), (G1,C0), .., (G1,Cm), ...
+                *
+                * So to find the countable index just step back through the
+                * table to find the first entry with the same group-id.
+                */
+               while (pq > screen->perfcntr_queries) {
+                       pq--;
+                       if (pq->group_id == entry->gid)
+                               entry->cid++;
+               }
+
+               if (counters_per_group[entry->gid] >=
+                               screen->perfcntr_groups[entry->gid].num_counters) {
+                       debug_printf("too many counters for group %u\n", entry->gid);
+                       goto error;
+               }
+
+               counters_per_group[entry->gid]++;
+       }
+
+       q = fd_acc_create_query2(ctx, 0, &perfcntr);
+       aq = fd_acc_query(q);
+
+       /* sample buffer size is based on # of queries: */
+       aq->size = num_queries * sizeof(struct fd2_query_sample);
+       aq->query_data = data;
+
+       return (struct pipe_query *)q;
+
+error:
+       free(data);
+       return NULL;
+}
+
+void
+fd2_query_context_init(struct pipe_context *pctx)
+{
+       struct fd_context *ctx = fd_context(pctx);
+
+       ctx->create_query = fd_acc_create_query;
+       ctx->query_set_stage = fd_acc_query_set_stage;
+
+       pctx->create_batch_query = fd2_create_batch_query;
+}
diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_query.h b/src/gallium/drivers/freedreno/a2xx/fd2_query.h
new file mode 100644 (file)
index 0000000..4d9f3e3
--- /dev/null
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2019 Jonathan Marek <jonathan@marek.ca>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Jonathan Marek <jonathan@marek.ca>
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FD2_QUERY_H_
+#define FD2_QUERY_H_
+
+#include "pipe/p_context.h"
+
+void fd2_query_context_init(struct pipe_context *pctx);
+
+#endif /* FD2_QUERY_H_ */
index 3b44147cb3d6b7fe102218f68794c72af655892a..f02532381236b6d90994fcd1cadae01f508ee1db 100644 (file)
@@ -105,6 +105,9 @@ fd2_screen_is_format_supported(struct pipe_screen *pscreen,
        return retval == usage;
 }
 
+extern const struct fd_perfcntr_group a2xx_perfcntr_groups[];
+extern const unsigned a2xx_num_perfcntr_groups;
+
 void
 fd2_screen_init(struct pipe_screen *pscreen)
 {
@@ -114,4 +117,9 @@ fd2_screen_init(struct pipe_screen *pscreen)
        pscreen->context_create = fd2_context_create;
        pscreen->is_format_supported = fd2_screen_is_format_supported;
        screen->setup_slices = fd2_setup_slices;
+
+       if (fd_mesa_debug & FD_DBG_PERFC) {
+               screen->perfcntr_groups = a2xx_perfcntr_groups;
+               screen->num_perfcntr_groups = a2xx_num_perfcntr_groups;
+       }
 }
index 1e3a3037014bf59ceaa787bb7d1df4a31490ff48..90c0b62aeb6d6f83332469f169673d947d2f2b16 100644 (file)
@@ -68,8 +68,11 @@ files_libfreedreno = files(
   'a2xx/fd2_emit.h',
   'a2xx/fd2_gmem.c',
   'a2xx/fd2_gmem.h',
+  'a2xx/fd2_perfcntr.c',
   'a2xx/fd2_program.c',
   'a2xx/fd2_program.h',
+  'a2xx/fd2_query.c',
+  'a2xx/fd2_query.h',
   'a2xx/fd2_rasterizer.c',
   'a2xx/fd2_rasterizer.h',
   'a2xx/fd2_resource.c',