From 7d458c0c69ad80e3d01c0ba79e57ed3981764823 Mon Sep 17 00:00:00 2001 From: Jonathan Marek Date: Mon, 28 Jan 2019 10:09:39 -0500 Subject: [PATCH] freedreno: a2xx: add perfcntrs Based on a5xx perfcntrs implementation. Signed-off-by: Jonathan Marek --- .../drivers/freedreno/Makefile.sources | 3 + .../drivers/freedreno/a2xx/fd2_context.c | 3 + src/gallium/drivers/freedreno/a2xx/fd2_emit.c | 9 + .../drivers/freedreno/a2xx/fd2_perfcntr.c | 813 ++++++++++++++++++ .../drivers/freedreno/a2xx/fd2_query.c | 244 ++++++ .../drivers/freedreno/a2xx/fd2_query.h | 35 + .../drivers/freedreno/a2xx/fd2_screen.c | 8 + src/gallium/drivers/freedreno/meson.build | 3 + 8 files changed, 1118 insertions(+) create mode 100644 src/gallium/drivers/freedreno/a2xx/fd2_perfcntr.c create mode 100644 src/gallium/drivers/freedreno/a2xx/fd2_query.c create mode 100644 src/gallium/drivers/freedreno/a2xx/fd2_query.h diff --git a/src/gallium/drivers/freedreno/Makefile.sources b/src/gallium/drivers/freedreno/Makefile.sources index d2eddb989d7..e11c42517c9 100644 --- a/src/gallium/drivers/freedreno/Makefile.sources +++ b/src/gallium/drivers/freedreno/Makefile.sources @@ -50,8 +50,11 @@ a2xx_SOURCES := \ a2xx/fd2_emit.h \ a2xx/fd2_gmem.c \ a2xx/fd2_gmem.h \ + a2xx/fd2_perfcntr.c \ a2xx/fd2_program.c \ a2xx/fd2_program.h \ + a2xx/fd2_query.c \ + a2xx/fd2_query.h \ a2xx/fd2_rasterizer.c \ a2xx/fd2_rasterizer.h \ a2xx/fd2_resource.c \ diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_context.c b/src/gallium/drivers/freedreno/a2xx/fd2_context.c index 6d9dce2ffbe..9353e37420f 100644 --- a/src/gallium/drivers/freedreno/a2xx/fd2_context.c +++ b/src/gallium/drivers/freedreno/a2xx/fd2_context.c @@ -31,6 +31,7 @@ #include "fd2_emit.h" #include "fd2_gmem.h" #include "fd2_program.h" +#include "fd2_query.h" #include "fd2_rasterizer.h" #include "fd2_texture.h" #include "fd2_zsa.h" @@ -120,5 +121,7 @@ fd2_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags) /* construct vertex state used for solid ops (clear, and gmem<->mem) */ fd2_ctx->solid_vertexbuf = create_solid_vertexbuf(pctx); + fd2_query_context_init(pctx); + return pctx; } diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_emit.c b/src/gallium/drivers/freedreno/a2xx/fd2_emit.c index 805a4cf032a..76fd7dee9d7 100644 --- a/src/gallium/drivers/freedreno/a2xx/fd2_emit.c +++ b/src/gallium/drivers/freedreno/a2xx/fd2_emit.c @@ -424,6 +424,15 @@ fd2_emit_restore(struct fd_context *ctx, struct fd_ringbuffer *ring) OUT_RING(ring, 0x0000003b); } + /* enable perfcntrs */ + OUT_PKT0(ring, REG_A2XX_CP_PERFMON_CNTL, 1); + OUT_RING(ring, COND(fd_mesa_debug & FD_DBG_PERFC, 1)); + + /* note: perfcntrs don't work without the PM_OVERRIDE bit */ + OUT_PKT0(ring, REG_A2XX_RBBM_PM_OVERRIDE1, 2); + OUT_RING(ring, 0xffffffff); + OUT_RING(ring, 0x00000fff); + OUT_PKT0(ring, REG_A2XX_TP0_CHICKEN, 1); OUT_RING(ring, 0x00000002); diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_perfcntr.c b/src/gallium/drivers/freedreno/a2xx/fd2_perfcntr.c new file mode 100644 index 00000000000..35f084e1fa9 --- /dev/null +++ b/src/gallium/drivers/freedreno/a2xx/fd2_perfcntr.c @@ -0,0 +1,813 @@ +/* + * Copyright (C) 2018 Jonathan Marek + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Jonathan Marek + * Rob Clark + */ + +#include "freedreno_perfcntr.h" +#include "freedreno_util.h" +#include "a2xx.xml.h" + +#define REG(_x) REG_A2XX_ ## _x + +#define COUNTER(_sel, _lo, _hi) { \ + .select_reg = REG(_sel), \ + .counter_reg_lo = REG(_lo), \ + .counter_reg_hi = REG(_hi), \ +} + +#define COUNTABLE(_selector, _query_type, _result_type) { \ + .name = #_selector, \ + .selector = _selector, \ + .query_type = PIPE_DRIVER_QUERY_TYPE_ ## _query_type, \ + .result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_ ## _result_type, \ +} + +#define GROUP(_name, _counters, _countables) { \ + .name = _name, \ + .num_counters = ARRAY_SIZE(_counters), \ + .counters = _counters, \ + .num_countables = ARRAY_SIZE(_countables), \ + .countables = _countables, \ +} + +static const struct fd_perfcntr_countable pa_su_countables[] = { + COUNTABLE(PERF_PAPC_PASX_REQ, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_PASX_FIRST_VECTOR, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_PASX_SECOND_VECTOR, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_PASX_FIRST_DEAD, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_PASX_SECOND_DEAD, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_PASX_VTX_KILL_DISCARD, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_PASX_VTX_NAN_DISCARD, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_PA_INPUT_PRIM, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_PA_INPUT_NULL_PRIM, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_PA_INPUT_EVENT_FLAG, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_PA_INPUT_FIRST_PRIM_SLOT, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_PA_INPUT_END_OF_PACKET, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLPR_CULL_PRIM, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLPR_VV_CULL_PRIM, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLPR_VTX_KILL_CULL_PRIM, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLPR_VTX_NAN_CULL_PRIM, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLPR_CULL_TO_NULL_PRIM, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLPR_VV_CLIP_PRIM, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLPR_POINT_CLIP_CANDIDATE, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLPR_CLIP_PLANE_CNT_1, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLPR_CLIP_PLANE_CNT_2, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLPR_CLIP_PLANE_CNT_3, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLPR_CLIP_PLANE_CNT_4, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLPR_CLIP_PLANE_CNT_5, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLPR_CLIP_PLANE_CNT_6, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLPR_CLIP_PLANE_NEAR, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLPR_CLIP_PLANE_FAR, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLPR_CLIP_PLANE_LEFT, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLPR_CLIP_PLANE_RIGHT, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLPR_CLIP_PLANE_TOP, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLPR_CLIP_PLANE_BOTTOM, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLSM_NULL_PRIM, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLSM_TOTALLY_VISIBLE_PRIM, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLSM_CLIP_PRIM, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLSM_CULL_TO_NULL_PRIM, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLSM_OUT_PRIM_CNT_1, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLSM_OUT_PRIM_CNT_2, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLSM_OUT_PRIM_CNT_3, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLSM_OUT_PRIM_CNT_4, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLSM_OUT_PRIM_CNT_5, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLSM_OUT_PRIM_CNT_6_7, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLSM_NON_TRIVIAL_CULL, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_SU_INPUT_PRIM, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_SU_INPUT_CLIP_PRIM, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_SU_INPUT_NULL_PRIM, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_SU_ZERO_AREA_CULL_PRIM, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_SU_BACK_FACE_CULL_PRIM, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_SU_FRONT_FACE_CULL_PRIM, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_SU_POLYMODE_FACE_CULL, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_SU_POLYMODE_BACK_CULL, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_SU_POLYMODE_FRONT_CULL, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_SU_POLYMODE_INVALID_FILL, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_SU_OUTPUT_PRIM, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_SU_OUTPUT_CLIP_PRIM, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_SU_OUTPUT_NULL_PRIM, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_SU_OUTPUT_EVENT_FLAG, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_SU_OUTPUT_FIRST_PRIM_SLOT, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_SU_OUTPUT_END_OF_PACKET, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_SU_OUTPUT_POLYMODE_FACE, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_SU_OUTPUT_POLYMODE_BACK, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_SU_OUTPUT_POLYMODE_FRONT, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_SU_OUT_CLIP_POLYMODE_FACE, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_SU_OUT_CLIP_POLYMODE_BACK, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_SU_OUT_CLIP_POLYMODE_FRONT, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_PASX_REQ_IDLE, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_PASX_REQ_BUSY, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_PASX_REQ_STALLED, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_PASX_REC_IDLE, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_PASX_REC_BUSY, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_PASX_REC_STARVED_SX, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_PASX_REC_STALLED, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_PASX_REC_STALLED_POS_MEM, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_PASX_REC_STALLED_CCGSM_IN, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CCGSM_IDLE, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CCGSM_BUSY, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CCGSM_STALLED, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLPRIM_IDLE, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLPRIM_BUSY, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLPRIM_STALLED, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLPRIM_STARVED_CCGSM, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLIPSM_IDLE, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLIPSM_BUSY, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLIPSM_WAIT_CLIP_VERT_ENGH, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLIPSM_WAIT_HIGH_PRI_SEQ, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLIPSM_WAIT_CLIPGA, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLIPSM_WAIT_AVAIL_VTE_CLIP, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLIPSM_WAIT_CLIP_OUTSM, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLIPGA_IDLE, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLIPGA_BUSY, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLIPGA_STARVED_VTE_CLIP, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLIPGA_STALLED, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLIP_IDLE, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLIP_BUSY, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_SU_IDLE, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_SU_BUSY, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_SU_STARVED_CLIP, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_SU_STALLED_SC, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_SU_FACENESS_CULL, UINT64, AVERAGE), +}; + +static const struct fd_perfcntr_countable pa_sc_countables[] = { + COUNTABLE(SC_SR_WINDOW_VALID, UINT64, AVERAGE), + COUNTABLE(SC_CW_WINDOW_VALID, UINT64, AVERAGE), + COUNTABLE(SC_QM_WINDOW_VALID, UINT64, AVERAGE), + COUNTABLE(SC_FW_WINDOW_VALID, UINT64, AVERAGE), + COUNTABLE(SC_EZ_WINDOW_VALID, UINT64, AVERAGE), + COUNTABLE(SC_IT_WINDOW_VALID, UINT64, AVERAGE), + COUNTABLE(SC_STARVED_BY_PA, UINT64, AVERAGE), + COUNTABLE(SC_STALLED_BY_RB_TILE, UINT64, AVERAGE), + COUNTABLE(SC_STALLED_BY_RB_SAMP, UINT64, AVERAGE), + COUNTABLE(SC_STARVED_BY_RB_EZ, UINT64, AVERAGE), + COUNTABLE(SC_STALLED_BY_SAMPLE_FF, UINT64, AVERAGE), + COUNTABLE(SC_STALLED_BY_SQ, UINT64, AVERAGE), + COUNTABLE(SC_STALLED_BY_SP, UINT64, AVERAGE), + COUNTABLE(SC_TOTAL_NO_PRIMS, UINT64, AVERAGE), + COUNTABLE(SC_NON_EMPTY_PRIMS, UINT64, AVERAGE), + COUNTABLE(SC_NO_TILES_PASSING_QM, UINT64, AVERAGE), + COUNTABLE(SC_NO_PIXELS_PRE_EZ, UINT64, AVERAGE), + COUNTABLE(SC_NO_PIXELS_POST_EZ, UINT64, AVERAGE), +}; + +static const struct fd_perfcntr_countable vgt_countables[] = { + COUNTABLE(VGT_SQ_EVENT_WINDOW_ACTIVE, UINT64, AVERAGE), + COUNTABLE(VGT_SQ_SEND, UINT64, AVERAGE), + COUNTABLE(VGT_SQ_STALLED, UINT64, AVERAGE), + COUNTABLE(VGT_SQ_STARVED_BUSY, UINT64, AVERAGE), + COUNTABLE(VGT_SQ_STARVED_IDLE, UINT64, AVERAGE), + COUNTABLE(VGT_SQ_STATIC, UINT64, AVERAGE), + COUNTABLE(VGT_PA_EVENT_WINDOW_ACTIVE, UINT64, AVERAGE), + COUNTABLE(VGT_PA_CLIP_V_SEND, UINT64, AVERAGE), + COUNTABLE(VGT_PA_CLIP_V_STALLED, UINT64, AVERAGE), + COUNTABLE(VGT_PA_CLIP_V_STARVED_BUSY, UINT64, AVERAGE), + COUNTABLE(VGT_PA_CLIP_V_STARVED_IDLE, UINT64, AVERAGE), + COUNTABLE(VGT_PA_CLIP_V_STATIC, UINT64, AVERAGE), + COUNTABLE(VGT_PA_CLIP_P_SEND, UINT64, AVERAGE), + COUNTABLE(VGT_PA_CLIP_P_STALLED, UINT64, AVERAGE), + COUNTABLE(VGT_PA_CLIP_P_STARVED_BUSY, UINT64, AVERAGE), + COUNTABLE(VGT_PA_CLIP_P_STARVED_IDLE, UINT64, AVERAGE), + COUNTABLE(VGT_PA_CLIP_P_STATIC, UINT64, AVERAGE), + COUNTABLE(VGT_PA_CLIP_S_SEND, UINT64, AVERAGE), + COUNTABLE(VGT_PA_CLIP_S_STALLED, UINT64, AVERAGE), + COUNTABLE(VGT_PA_CLIP_S_STARVED_BUSY, UINT64, AVERAGE), + COUNTABLE(VGT_PA_CLIP_S_STARVED_IDLE, UINT64, AVERAGE), + COUNTABLE(VGT_PA_CLIP_S_STATIC, UINT64, AVERAGE), + COUNTABLE(RBIU_FIFOS_EVENT_WINDOW_ACTIVE, UINT64, AVERAGE), + COUNTABLE(RBIU_IMMED_DATA_FIFO_STARVED, UINT64, AVERAGE), + COUNTABLE(RBIU_IMMED_DATA_FIFO_STALLED, UINT64, AVERAGE), + COUNTABLE(RBIU_DMA_REQUEST_FIFO_STARVED, UINT64, AVERAGE), + COUNTABLE(RBIU_DMA_REQUEST_FIFO_STALLED, UINT64, AVERAGE), + COUNTABLE(RBIU_DRAW_INITIATOR_FIFO_STARVED, UINT64, AVERAGE), + COUNTABLE(RBIU_DRAW_INITIATOR_FIFO_STALLED, UINT64, AVERAGE), + COUNTABLE(BIN_PRIM_NEAR_CULL, UINT64, AVERAGE), + COUNTABLE(BIN_PRIM_ZERO_CULL, UINT64, AVERAGE), + COUNTABLE(BIN_PRIM_FAR_CULL, UINT64, AVERAGE), + COUNTABLE(BIN_PRIM_BIN_CULL, UINT64, AVERAGE), + COUNTABLE(BIN_PRIM_FACE_CULL, UINT64, AVERAGE), + COUNTABLE(SPARE34, UINT64, AVERAGE), + COUNTABLE(SPARE35, UINT64, AVERAGE), + COUNTABLE(SPARE36, UINT64, AVERAGE), + COUNTABLE(SPARE37, UINT64, AVERAGE), + COUNTABLE(SPARE38, UINT64, AVERAGE), + COUNTABLE(SPARE39, UINT64, AVERAGE), + COUNTABLE(TE_SU_IN_VALID, UINT64, AVERAGE), + COUNTABLE(TE_SU_IN_READ, UINT64, AVERAGE), + COUNTABLE(TE_SU_IN_PRIM, UINT64, AVERAGE), + COUNTABLE(TE_SU_IN_EOP, UINT64, AVERAGE), + COUNTABLE(TE_SU_IN_NULL_PRIM, UINT64, AVERAGE), + COUNTABLE(TE_WK_IN_VALID, UINT64, AVERAGE), + COUNTABLE(TE_WK_IN_READ, UINT64, AVERAGE), + COUNTABLE(TE_OUT_PRIM_VALID, UINT64, AVERAGE), + COUNTABLE(TE_OUT_PRIM_READ, UINT64, AVERAGE), +}; + +static const struct fd_perfcntr_countable tcr_countables[] = { + COUNTABLE(DGMMPD_IPMUX0_STALL, UINT64, AVERAGE), + COUNTABLE(DGMMPD_IPMUX_ALL_STALL, UINT64, AVERAGE), + COUNTABLE(OPMUX0_L2_WRITES, UINT64, AVERAGE), +}; + +static const struct fd_perfcntr_countable tp0_countables[] = { + COUNTABLE(POINT_QUADS, UINT64, AVERAGE), + COUNTABLE(BILIN_QUADS, UINT64, AVERAGE), + COUNTABLE(ANISO_QUADS, UINT64, AVERAGE), + COUNTABLE(MIP_QUADS, UINT64, AVERAGE), + COUNTABLE(VOL_QUADS, UINT64, AVERAGE), + COUNTABLE(MIP_VOL_QUADS, UINT64, AVERAGE), + COUNTABLE(MIP_ANISO_QUADS, UINT64, AVERAGE), + COUNTABLE(VOL_ANISO_QUADS, UINT64, AVERAGE), + COUNTABLE(ANISO_2_1_QUADS, UINT64, AVERAGE), + COUNTABLE(ANISO_4_1_QUADS, UINT64, AVERAGE), + COUNTABLE(ANISO_6_1_QUADS, UINT64, AVERAGE), + COUNTABLE(ANISO_8_1_QUADS, UINT64, AVERAGE), + COUNTABLE(ANISO_10_1_QUADS, UINT64, AVERAGE), + COUNTABLE(ANISO_12_1_QUADS, UINT64, AVERAGE), + COUNTABLE(ANISO_14_1_QUADS, UINT64, AVERAGE), + COUNTABLE(ANISO_16_1_QUADS, UINT64, AVERAGE), + COUNTABLE(MIP_VOL_ANISO_QUADS, UINT64, AVERAGE), + COUNTABLE(ALIGN_2_QUADS, UINT64, AVERAGE), + COUNTABLE(ALIGN_4_QUADS, UINT64, AVERAGE), + COUNTABLE(PIX_0_QUAD, UINT64, AVERAGE), + COUNTABLE(PIX_1_QUAD, UINT64, AVERAGE), + COUNTABLE(PIX_2_QUAD, UINT64, AVERAGE), + COUNTABLE(PIX_3_QUAD, UINT64, AVERAGE), + COUNTABLE(PIX_4_QUAD, UINT64, AVERAGE), + COUNTABLE(TP_MIPMAP_LOD0, UINT64, AVERAGE), + COUNTABLE(TP_MIPMAP_LOD1, UINT64, AVERAGE), + COUNTABLE(TP_MIPMAP_LOD2, UINT64, AVERAGE), + COUNTABLE(TP_MIPMAP_LOD3, UINT64, AVERAGE), + COUNTABLE(TP_MIPMAP_LOD4, UINT64, AVERAGE), + COUNTABLE(TP_MIPMAP_LOD5, UINT64, AVERAGE), + COUNTABLE(TP_MIPMAP_LOD6, UINT64, AVERAGE), + COUNTABLE(TP_MIPMAP_LOD7, UINT64, AVERAGE), + COUNTABLE(TP_MIPMAP_LOD8, UINT64, AVERAGE), + COUNTABLE(TP_MIPMAP_LOD9, UINT64, AVERAGE), + COUNTABLE(TP_MIPMAP_LOD10, UINT64, AVERAGE), + COUNTABLE(TP_MIPMAP_LOD11, UINT64, AVERAGE), + COUNTABLE(TP_MIPMAP_LOD12, UINT64, AVERAGE), + COUNTABLE(TP_MIPMAP_LOD13, UINT64, AVERAGE), + COUNTABLE(TP_MIPMAP_LOD14, UINT64, AVERAGE), +}; + +static const struct fd_perfcntr_countable tcm_countables[] = { + COUNTABLE(QUAD0_RD_LAT_FIFO_EMPTY, UINT64, AVERAGE), + COUNTABLE(QUAD0_RD_LAT_FIFO_4TH_FULL, UINT64, AVERAGE), + COUNTABLE(QUAD0_RD_LAT_FIFO_HALF_FULL, UINT64, AVERAGE), + COUNTABLE(QUAD0_RD_LAT_FIFO_FULL, UINT64, AVERAGE), + COUNTABLE(QUAD0_RD_LAT_FIFO_LT_4TH_FULL, UINT64, AVERAGE), + COUNTABLE(READ_STARVED_QUAD0, UINT64, AVERAGE), + COUNTABLE(READ_STARVED, UINT64, AVERAGE), + COUNTABLE(READ_STALLED_QUAD0, UINT64, AVERAGE), + COUNTABLE(READ_STALLED, UINT64, AVERAGE), + COUNTABLE(VALID_READ_QUAD0, UINT64, AVERAGE), + COUNTABLE(TC_TP_STARVED_QUAD0, UINT64, AVERAGE), + COUNTABLE(TC_TP_STARVED, UINT64, AVERAGE), +}; + +static const struct fd_perfcntr_countable tcf_countables[] = { + COUNTABLE(VALID_CYCLES, UINT64, AVERAGE), + COUNTABLE(SINGLE_PHASES, UINT64, AVERAGE), + COUNTABLE(ANISO_PHASES, UINT64, AVERAGE), + COUNTABLE(MIP_PHASES, UINT64, AVERAGE), + COUNTABLE(VOL_PHASES, UINT64, AVERAGE), + COUNTABLE(MIP_VOL_PHASES, UINT64, AVERAGE), + COUNTABLE(MIP_ANISO_PHASES, UINT64, AVERAGE), + COUNTABLE(VOL_ANISO_PHASES, UINT64, AVERAGE), + COUNTABLE(ANISO_2_1_PHASES, UINT64, AVERAGE), + COUNTABLE(ANISO_4_1_PHASES, UINT64, AVERAGE), + COUNTABLE(ANISO_6_1_PHASES, UINT64, AVERAGE), + COUNTABLE(ANISO_8_1_PHASES, UINT64, AVERAGE), + COUNTABLE(ANISO_10_1_PHASES, UINT64, AVERAGE), + COUNTABLE(ANISO_12_1_PHASES, UINT64, AVERAGE), + COUNTABLE(ANISO_14_1_PHASES, UINT64, AVERAGE), + COUNTABLE(ANISO_16_1_PHASES, UINT64, AVERAGE), + COUNTABLE(MIP_VOL_ANISO_PHASES, UINT64, AVERAGE), + COUNTABLE(ALIGN_2_PHASES, UINT64, AVERAGE), + COUNTABLE(ALIGN_4_PHASES, UINT64, AVERAGE), + COUNTABLE(TPC_BUSY, UINT64, AVERAGE), + COUNTABLE(TPC_STALLED, UINT64, AVERAGE), + COUNTABLE(TPC_STARVED, UINT64, AVERAGE), + COUNTABLE(TPC_WORKING, UINT64, AVERAGE), + COUNTABLE(TPC_WALKER_BUSY, UINT64, AVERAGE), + COUNTABLE(TPC_WALKER_STALLED, UINT64, AVERAGE), + COUNTABLE(TPC_WALKER_WORKING, UINT64, AVERAGE), + COUNTABLE(TPC_ALIGNER_BUSY, UINT64, AVERAGE), + COUNTABLE(TPC_ALIGNER_STALLED, UINT64, AVERAGE), + COUNTABLE(TPC_ALIGNER_STALLED_BY_BLEND, UINT64, AVERAGE), + COUNTABLE(TPC_ALIGNER_STALLED_BY_CACHE, UINT64, AVERAGE), + COUNTABLE(TPC_ALIGNER_WORKING, UINT64, AVERAGE), + COUNTABLE(TPC_BLEND_BUSY, UINT64, AVERAGE), + COUNTABLE(TPC_BLEND_SYNC, UINT64, AVERAGE), + COUNTABLE(TPC_BLEND_STARVED, UINT64, AVERAGE), + COUNTABLE(TPC_BLEND_WORKING, UINT64, AVERAGE), + COUNTABLE(OPCODE_0x00, UINT64, AVERAGE), + COUNTABLE(OPCODE_0x01, UINT64, AVERAGE), + COUNTABLE(OPCODE_0x04, UINT64, AVERAGE), + COUNTABLE(OPCODE_0x10, UINT64, AVERAGE), + COUNTABLE(OPCODE_0x11, UINT64, AVERAGE), + COUNTABLE(OPCODE_0x12, UINT64, AVERAGE), + COUNTABLE(OPCODE_0x13, UINT64, AVERAGE), + COUNTABLE(OPCODE_0x18, UINT64, AVERAGE), + COUNTABLE(OPCODE_0x19, UINT64, AVERAGE), + COUNTABLE(OPCODE_0x1A, UINT64, AVERAGE), + COUNTABLE(OPCODE_OTHER, UINT64, AVERAGE), + COUNTABLE(IN_FIFO_0_EMPTY, UINT64, AVERAGE), + COUNTABLE(IN_FIFO_0_LT_HALF_FULL, UINT64, AVERAGE), + COUNTABLE(IN_FIFO_0_HALF_FULL, UINT64, AVERAGE), + COUNTABLE(IN_FIFO_0_FULL, UINT64, AVERAGE), + COUNTABLE(IN_FIFO_TPC_EMPTY, UINT64, AVERAGE), + COUNTABLE(IN_FIFO_TPC_LT_HALF_FULL, UINT64, AVERAGE), + COUNTABLE(IN_FIFO_TPC_HALF_FULL, UINT64, AVERAGE), + COUNTABLE(IN_FIFO_TPC_FULL, UINT64, AVERAGE), + COUNTABLE(TPC_TC_XFC, UINT64, AVERAGE), + COUNTABLE(TPC_TC_STATE, UINT64, AVERAGE), + COUNTABLE(TC_STALL, UINT64, AVERAGE), + COUNTABLE(QUAD0_TAPS, UINT64, AVERAGE), + COUNTABLE(QUADS, UINT64, AVERAGE), + COUNTABLE(TCA_SYNC_STALL, UINT64, AVERAGE), + COUNTABLE(TAG_STALL, UINT64, AVERAGE), + COUNTABLE(TCB_SYNC_STALL, UINT64, AVERAGE), + COUNTABLE(TCA_VALID, UINT64, AVERAGE), + COUNTABLE(PROBES_VALID, UINT64, AVERAGE), + COUNTABLE(MISS_STALL, UINT64, AVERAGE), + COUNTABLE(FETCH_FIFO_STALL, UINT64, AVERAGE), + COUNTABLE(TCO_STALL, UINT64, AVERAGE), + COUNTABLE(ANY_STALL, UINT64, AVERAGE), + COUNTABLE(TAG_MISSES, UINT64, AVERAGE), + COUNTABLE(TAG_HITS, UINT64, AVERAGE), + COUNTABLE(SUB_TAG_MISSES, UINT64, AVERAGE), + COUNTABLE(SET0_INVALIDATES, UINT64, AVERAGE), + COUNTABLE(SET1_INVALIDATES, UINT64, AVERAGE), + COUNTABLE(SET2_INVALIDATES, UINT64, AVERAGE), + COUNTABLE(SET3_INVALIDATES, UINT64, AVERAGE), + COUNTABLE(SET0_TAG_MISSES, UINT64, AVERAGE), + COUNTABLE(SET1_TAG_MISSES, UINT64, AVERAGE), + COUNTABLE(SET2_TAG_MISSES, UINT64, AVERAGE), + COUNTABLE(SET3_TAG_MISSES, UINT64, AVERAGE), + COUNTABLE(SET0_TAG_HITS, UINT64, AVERAGE), + COUNTABLE(SET1_TAG_HITS, UINT64, AVERAGE), + COUNTABLE(SET2_TAG_HITS, UINT64, AVERAGE), + COUNTABLE(SET3_TAG_HITS, UINT64, AVERAGE), + COUNTABLE(SET0_SUB_TAG_MISSES, UINT64, AVERAGE), + COUNTABLE(SET1_SUB_TAG_MISSES, UINT64, AVERAGE), + COUNTABLE(SET2_SUB_TAG_MISSES, UINT64, AVERAGE), + COUNTABLE(SET3_SUB_TAG_MISSES, UINT64, AVERAGE), + COUNTABLE(SET0_EVICT1, UINT64, AVERAGE), + COUNTABLE(SET0_EVICT2, UINT64, AVERAGE), + COUNTABLE(SET0_EVICT3, UINT64, AVERAGE), + COUNTABLE(SET0_EVICT4, UINT64, AVERAGE), + COUNTABLE(SET0_EVICT5, UINT64, AVERAGE), + COUNTABLE(SET0_EVICT6, UINT64, AVERAGE), + COUNTABLE(SET0_EVICT7, UINT64, AVERAGE), + COUNTABLE(SET0_EVICT8, UINT64, AVERAGE), + COUNTABLE(SET1_EVICT1, UINT64, AVERAGE), + COUNTABLE(SET1_EVICT2, UINT64, AVERAGE), + COUNTABLE(SET1_EVICT3, UINT64, AVERAGE), + COUNTABLE(SET1_EVICT4, UINT64, AVERAGE), + COUNTABLE(SET1_EVICT5, UINT64, AVERAGE), + COUNTABLE(SET1_EVICT6, UINT64, AVERAGE), + COUNTABLE(SET1_EVICT7, UINT64, AVERAGE), + COUNTABLE(SET1_EVICT8, UINT64, AVERAGE), + COUNTABLE(SET2_EVICT1, UINT64, AVERAGE), + COUNTABLE(SET2_EVICT2, UINT64, AVERAGE), + COUNTABLE(SET2_EVICT3, UINT64, AVERAGE), + COUNTABLE(SET2_EVICT4, UINT64, AVERAGE), + COUNTABLE(SET2_EVICT5, UINT64, AVERAGE), + COUNTABLE(SET2_EVICT6, UINT64, AVERAGE), + COUNTABLE(SET2_EVICT7, UINT64, AVERAGE), + COUNTABLE(SET2_EVICT8, UINT64, AVERAGE), + COUNTABLE(SET3_EVICT1, UINT64, AVERAGE), + COUNTABLE(SET3_EVICT2, UINT64, AVERAGE), + COUNTABLE(SET3_EVICT3, UINT64, AVERAGE), + COUNTABLE(SET3_EVICT4, UINT64, AVERAGE), + COUNTABLE(SET3_EVICT5, UINT64, AVERAGE), + COUNTABLE(SET3_EVICT6, UINT64, AVERAGE), + COUNTABLE(SET3_EVICT7, UINT64, AVERAGE), + COUNTABLE(SET3_EVICT8, UINT64, AVERAGE), + COUNTABLE(FF_EMPTY, UINT64, AVERAGE), + COUNTABLE(FF_LT_HALF_FULL, UINT64, AVERAGE), + COUNTABLE(FF_HALF_FULL, UINT64, AVERAGE), + COUNTABLE(FF_FULL, UINT64, AVERAGE), + COUNTABLE(FF_XFC, UINT64, AVERAGE), + COUNTABLE(FF_STALLED, UINT64, AVERAGE), + COUNTABLE(FG_MASKS, UINT64, AVERAGE), + COUNTABLE(FG_LEFT_MASKS, UINT64, AVERAGE), + COUNTABLE(FG_LEFT_MASK_STALLED, UINT64, AVERAGE), + COUNTABLE(FG_LEFT_NOT_DONE_STALL, UINT64, AVERAGE), + COUNTABLE(FG_LEFT_FG_STALL, UINT64, AVERAGE), + COUNTABLE(FG_LEFT_SECTORS, UINT64, AVERAGE), + COUNTABLE(FG0_REQUESTS, UINT64, AVERAGE), + COUNTABLE(FG0_STALLED, UINT64, AVERAGE), + COUNTABLE(MEM_REQ512, UINT64, AVERAGE), + COUNTABLE(MEM_REQ_SENT, UINT64, AVERAGE), + COUNTABLE(MEM_LOCAL_READ_REQ, UINT64, AVERAGE), + COUNTABLE(TC0_MH_STALLED, UINT64, AVERAGE), +}; + +static const struct fd_perfcntr_countable sq_countables[] = { + COUNTABLE(SQ_PIXEL_VECTORS_SUB, UINT64, AVERAGE), + COUNTABLE(SQ_VERTEX_VECTORS_SUB, UINT64, AVERAGE), + COUNTABLE(SQ_ALU0_ACTIVE_VTX_SIMD0, UINT64, AVERAGE), + COUNTABLE(SQ_ALU1_ACTIVE_VTX_SIMD0, UINT64, AVERAGE), + COUNTABLE(SQ_ALU0_ACTIVE_PIX_SIMD0, UINT64, AVERAGE), + COUNTABLE(SQ_ALU1_ACTIVE_PIX_SIMD0, UINT64, AVERAGE), + COUNTABLE(SQ_ALU0_ACTIVE_VTX_SIMD1, UINT64, AVERAGE), + COUNTABLE(SQ_ALU1_ACTIVE_VTX_SIMD1, UINT64, AVERAGE), + COUNTABLE(SQ_ALU0_ACTIVE_PIX_SIMD1, UINT64, AVERAGE), + COUNTABLE(SQ_ALU1_ACTIVE_PIX_SIMD1, UINT64, AVERAGE), + COUNTABLE(SQ_EXPORT_CYCLES, UINT64, AVERAGE), + COUNTABLE(SQ_ALU_CST_WRITTEN, UINT64, AVERAGE), + COUNTABLE(SQ_TEX_CST_WRITTEN, UINT64, AVERAGE), + COUNTABLE(SQ_ALU_CST_STALL, UINT64, AVERAGE), + COUNTABLE(SQ_ALU_TEX_STALL, UINT64, AVERAGE), + COUNTABLE(SQ_INST_WRITTEN, UINT64, AVERAGE), + COUNTABLE(SQ_BOOLEAN_WRITTEN, UINT64, AVERAGE), + COUNTABLE(SQ_LOOPS_WRITTEN, UINT64, AVERAGE), + COUNTABLE(SQ_PIXEL_SWAP_IN, UINT64, AVERAGE), + COUNTABLE(SQ_PIXEL_SWAP_OUT, UINT64, AVERAGE), + COUNTABLE(SQ_VERTEX_SWAP_IN, UINT64, AVERAGE), + COUNTABLE(SQ_VERTEX_SWAP_OUT, UINT64, AVERAGE), + COUNTABLE(SQ_ALU_VTX_INST_ISSUED, UINT64, AVERAGE), + COUNTABLE(SQ_TEX_VTX_INST_ISSUED, UINT64, AVERAGE), + COUNTABLE(SQ_VC_VTX_INST_ISSUED, UINT64, AVERAGE), + COUNTABLE(SQ_CF_VTX_INST_ISSUED, UINT64, AVERAGE), + COUNTABLE(SQ_ALU_PIX_INST_ISSUED, UINT64, AVERAGE), + COUNTABLE(SQ_TEX_PIX_INST_ISSUED, UINT64, AVERAGE), + COUNTABLE(SQ_VC_PIX_INST_ISSUED, UINT64, AVERAGE), + COUNTABLE(SQ_CF_PIX_INST_ISSUED, UINT64, AVERAGE), + COUNTABLE(SQ_ALU0_FIFO_EMPTY_SIMD0, UINT64, AVERAGE), + COUNTABLE(SQ_ALU1_FIFO_EMPTY_SIMD0, UINT64, AVERAGE), + COUNTABLE(SQ_ALU0_FIFO_EMPTY_SIMD1, UINT64, AVERAGE), + COUNTABLE(SQ_ALU1_FIFO_EMPTY_SIMD1, UINT64, AVERAGE), + COUNTABLE(SQ_ALU_NOPS, UINT64, AVERAGE), + COUNTABLE(SQ_PRED_SKIP, UINT64, AVERAGE), + COUNTABLE(SQ_SYNC_ALU_STALL_SIMD0_VTX, UINT64, AVERAGE), + COUNTABLE(SQ_SYNC_ALU_STALL_SIMD1_VTX, UINT64, AVERAGE), + COUNTABLE(SQ_SYNC_TEX_STALL_VTX, UINT64, AVERAGE), + COUNTABLE(SQ_SYNC_VC_STALL_VTX, UINT64, AVERAGE), + COUNTABLE(SQ_CONSTANTS_USED_SIMD0, UINT64, AVERAGE), + COUNTABLE(SQ_CONSTANTS_SENT_SP_SIMD0, UINT64, AVERAGE), + COUNTABLE(SQ_GPR_STALL_VTX, UINT64, AVERAGE), + COUNTABLE(SQ_GPR_STALL_PIX, UINT64, AVERAGE), + COUNTABLE(SQ_VTX_RS_STALL, UINT64, AVERAGE), + COUNTABLE(SQ_PIX_RS_STALL, UINT64, AVERAGE), + COUNTABLE(SQ_SX_PC_FULL, UINT64, AVERAGE), + COUNTABLE(SQ_SX_EXP_BUFF_FULL, UINT64, AVERAGE), + COUNTABLE(SQ_SX_POS_BUFF_FULL, UINT64, AVERAGE), + COUNTABLE(SQ_INTERP_QUADS, UINT64, AVERAGE), + COUNTABLE(SQ_INTERP_ACTIVE, UINT64, AVERAGE), + COUNTABLE(SQ_IN_PIXEL_STALL, UINT64, AVERAGE), + COUNTABLE(SQ_IN_VTX_STALL, UINT64, AVERAGE), + COUNTABLE(SQ_VTX_CNT, UINT64, AVERAGE), + COUNTABLE(SQ_VTX_VECTOR2, UINT64, AVERAGE), + COUNTABLE(SQ_VTX_VECTOR3, UINT64, AVERAGE), + COUNTABLE(SQ_VTX_VECTOR4, UINT64, AVERAGE), + COUNTABLE(SQ_PIXEL_VECTOR1, UINT64, AVERAGE), + COUNTABLE(SQ_PIXEL_VECTOR23, UINT64, AVERAGE), + COUNTABLE(SQ_PIXEL_VECTOR4, UINT64, AVERAGE), + COUNTABLE(SQ_CONSTANTS_USED_SIMD1, UINT64, AVERAGE), + COUNTABLE(SQ_CONSTANTS_SENT_SP_SIMD1, UINT64, AVERAGE), + COUNTABLE(SQ_SX_MEM_EXP_FULL, UINT64, AVERAGE), + COUNTABLE(SQ_ALU0_ACTIVE_VTX_SIMD2, UINT64, AVERAGE), + COUNTABLE(SQ_ALU1_ACTIVE_VTX_SIMD2, UINT64, AVERAGE), + COUNTABLE(SQ_ALU0_ACTIVE_PIX_SIMD2, UINT64, AVERAGE), + COUNTABLE(SQ_ALU1_ACTIVE_PIX_SIMD2, UINT64, AVERAGE), + COUNTABLE(SQ_ALU0_ACTIVE_VTX_SIMD3, UINT64, AVERAGE), + COUNTABLE(SQ_PERFCOUNT_VTX_QUAL_TP_DONE, UINT64, AVERAGE), + COUNTABLE(SQ_ALU0_ACTIVE_PIX_SIMD3, UINT64, AVERAGE), + COUNTABLE(SQ_PERFCOUNT_PIX_QUAL_TP_DONE, UINT64, AVERAGE), + COUNTABLE(SQ_ALU0_FIFO_EMPTY_SIMD2, UINT64, AVERAGE), + COUNTABLE(SQ_ALU1_FIFO_EMPTY_SIMD2, UINT64, AVERAGE), + COUNTABLE(SQ_ALU0_FIFO_EMPTY_SIMD3, UINT64, AVERAGE), + COUNTABLE(SQ_ALU1_FIFO_EMPTY_SIMD3, UINT64, AVERAGE), + COUNTABLE(SQ_SYNC_ALU_STALL_SIMD2_VTX, UINT64, AVERAGE), + COUNTABLE(SQ_PERFCOUNT_VTX_POP_THREAD, UINT64, AVERAGE), + COUNTABLE(SQ_SYNC_ALU_STALL_SIMD0_PIX, UINT64, AVERAGE), + COUNTABLE(SQ_SYNC_ALU_STALL_SIMD1_PIX, UINT64, AVERAGE), + COUNTABLE(SQ_SYNC_ALU_STALL_SIMD2_PIX, UINT64, AVERAGE), + COUNTABLE(SQ_PERFCOUNT_PIX_POP_THREAD, UINT64, AVERAGE), + COUNTABLE(SQ_SYNC_TEX_STALL_PIX, UINT64, AVERAGE), + COUNTABLE(SQ_SYNC_VC_STALL_PIX, UINT64, AVERAGE), + COUNTABLE(SQ_CONSTANTS_USED_SIMD2, UINT64, AVERAGE), + COUNTABLE(SQ_CONSTANTS_SENT_SP_SIMD2, UINT64, AVERAGE), + COUNTABLE(SQ_PERFCOUNT_VTX_DEALLOC_ACK, UINT64, AVERAGE), + COUNTABLE(SQ_PERFCOUNT_PIX_DEALLOC_ACK, UINT64, AVERAGE), + COUNTABLE(SQ_ALU0_FIFO_FULL_SIMD0, UINT64, AVERAGE), + COUNTABLE(SQ_ALU1_FIFO_FULL_SIMD0, UINT64, AVERAGE), + COUNTABLE(SQ_ALU0_FIFO_FULL_SIMD1, UINT64, AVERAGE), + COUNTABLE(SQ_ALU1_FIFO_FULL_SIMD1, UINT64, AVERAGE), + COUNTABLE(SQ_ALU0_FIFO_FULL_SIMD2, UINT64, AVERAGE), + COUNTABLE(SQ_ALU1_FIFO_FULL_SIMD2, UINT64, AVERAGE), + COUNTABLE(SQ_ALU0_FIFO_FULL_SIMD3, UINT64, AVERAGE), + COUNTABLE(SQ_ALU1_FIFO_FULL_SIMD3, UINT64, AVERAGE), + COUNTABLE(VC_PERF_STATIC, UINT64, AVERAGE), + COUNTABLE(VC_PERF_STALLED, UINT64, AVERAGE), + COUNTABLE(VC_PERF_STARVED, UINT64, AVERAGE), + COUNTABLE(VC_PERF_SEND, UINT64, AVERAGE), + COUNTABLE(VC_PERF_ACTUAL_STARVED, UINT64, AVERAGE), + COUNTABLE(PIXEL_THREAD_0_ACTIVE, UINT64, AVERAGE), + COUNTABLE(VERTEX_THREAD_0_ACTIVE, UINT64, AVERAGE), + COUNTABLE(PIXEL_THREAD_0_NUMBER, UINT64, AVERAGE), + COUNTABLE(VERTEX_THREAD_0_NUMBER, UINT64, AVERAGE), + COUNTABLE(VERTEX_EVENT_NUMBER, UINT64, AVERAGE), + COUNTABLE(PIXEL_EVENT_NUMBER, UINT64, AVERAGE), + COUNTABLE(PTRBUFF_EF_PUSH, UINT64, AVERAGE), + COUNTABLE(PTRBUFF_EF_POP_EVENT, UINT64, AVERAGE), + COUNTABLE(PTRBUFF_EF_POP_NEW_VTX, UINT64, AVERAGE), + COUNTABLE(PTRBUFF_EF_POP_DEALLOC, UINT64, AVERAGE), + COUNTABLE(PTRBUFF_EF_POP_PVECTOR, UINT64, AVERAGE), + COUNTABLE(PTRBUFF_EF_POP_PVECTOR_X, UINT64, AVERAGE), + COUNTABLE(PTRBUFF_EF_POP_PVECTOR_VNZ, UINT64, AVERAGE), + COUNTABLE(PTRBUFF_PB_DEALLOC, UINT64, AVERAGE), + COUNTABLE(PTRBUFF_PI_STATE_PPB_POP, UINT64, AVERAGE), + COUNTABLE(PTRBUFF_PI_RTR, UINT64, AVERAGE), + COUNTABLE(PTRBUFF_PI_READ_EN, UINT64, AVERAGE), + COUNTABLE(PTRBUFF_PI_BUFF_SWAP, UINT64, AVERAGE), + COUNTABLE(PTRBUFF_SQ_FREE_BUFF, UINT64, AVERAGE), + COUNTABLE(PTRBUFF_SQ_DEC, UINT64, AVERAGE), + COUNTABLE(PTRBUFF_SC_VALID_CNTL_EVENT, UINT64, AVERAGE), + COUNTABLE(PTRBUFF_SC_VALID_IJ_XFER, UINT64, AVERAGE), + COUNTABLE(PTRBUFF_SC_NEW_VECTOR_1_Q, UINT64, AVERAGE), + COUNTABLE(PTRBUFF_QUAL_NEW_VECTOR, UINT64, AVERAGE), + COUNTABLE(PTRBUFF_QUAL_EVENT, UINT64, AVERAGE), + COUNTABLE(PTRBUFF_END_BUFFER, UINT64, AVERAGE), + COUNTABLE(PTRBUFF_FILL_QUAD, UINT64, AVERAGE), + COUNTABLE(VERTS_WRITTEN_SPI, UINT64, AVERAGE), + COUNTABLE(TP_FETCH_INSTR_EXEC, UINT64, AVERAGE), + COUNTABLE(TP_FETCH_INSTR_REQ, UINT64, AVERAGE), + COUNTABLE(TP_DATA_RETURN, UINT64, AVERAGE), + COUNTABLE(SPI_WRITE_CYCLES_SP, UINT64, AVERAGE), + COUNTABLE(SPI_WRITES_SP, UINT64, AVERAGE), + COUNTABLE(SP_ALU_INSTR_EXEC, UINT64, AVERAGE), + COUNTABLE(SP_CONST_ADDR_TO_SQ, UINT64, AVERAGE), + COUNTABLE(SP_PRED_KILLS_TO_SQ, UINT64, AVERAGE), + COUNTABLE(SP_EXPORT_CYCLES_TO_SX, UINT64, AVERAGE), + COUNTABLE(SP_EXPORTS_TO_SX, UINT64, AVERAGE), + COUNTABLE(SQ_CYCLES_ELAPSED, UINT64, AVERAGE), + COUNTABLE(SQ_TCFS_OPT_ALLOC_EXEC, UINT64, AVERAGE), + COUNTABLE(SQ_TCFS_NO_OPT_ALLOC, UINT64, AVERAGE), + COUNTABLE(SQ_ALU0_NO_OPT_ALLOC, UINT64, AVERAGE), + COUNTABLE(SQ_ALU1_NO_OPT_ALLOC, UINT64, AVERAGE), + COUNTABLE(SQ_TCFS_ARB_XFC_CNT, UINT64, AVERAGE), + COUNTABLE(SQ_ALU0_ARB_XFC_CNT, UINT64, AVERAGE), + COUNTABLE(SQ_ALU1_ARB_XFC_CNT, UINT64, AVERAGE), + COUNTABLE(SQ_TCFS_CFS_UPDATE_CNT, UINT64, AVERAGE), + COUNTABLE(SQ_ALU0_CFS_UPDATE_CNT, UINT64, AVERAGE), + COUNTABLE(SQ_ALU1_CFS_UPDATE_CNT, UINT64, AVERAGE), + COUNTABLE(SQ_VTX_PUSH_THREAD_CNT, UINT64, AVERAGE), + COUNTABLE(SQ_VTX_POP_THREAD_CNT, UINT64, AVERAGE), + COUNTABLE(SQ_PIX_PUSH_THREAD_CNT, UINT64, AVERAGE), + COUNTABLE(SQ_PIX_POP_THREAD_CNT, UINT64, AVERAGE), + COUNTABLE(SQ_PIX_TOTAL, UINT64, AVERAGE), + COUNTABLE(SQ_PIX_KILLED, UINT64, AVERAGE), +}; + +static const struct fd_perfcntr_countable sx_countables[] = { + COUNTABLE(SX_EXPORT_VECTORS, UINT64, AVERAGE), + COUNTABLE(SX_DUMMY_QUADS, UINT64, AVERAGE), + COUNTABLE(SX_ALPHA_FAIL, UINT64, AVERAGE), + COUNTABLE(SX_RB_QUAD_BUSY, UINT64, AVERAGE), + COUNTABLE(SX_RB_COLOR_BUSY, UINT64, AVERAGE), + COUNTABLE(SX_RB_QUAD_STALL, UINT64, AVERAGE), + COUNTABLE(SX_RB_COLOR_STALL, UINT64, AVERAGE), +}; + +static const struct fd_perfcntr_countable rb_countables[] = { + COUNTABLE(RBPERF_CNTX_BUSY, UINT64, AVERAGE), + COUNTABLE(RBPERF_CNTX_BUSY_MAX, UINT64, AVERAGE), + COUNTABLE(RBPERF_SX_QUAD_STARVED, UINT64, AVERAGE), + COUNTABLE(RBPERF_SX_QUAD_STARVED_MAX, UINT64, AVERAGE), + COUNTABLE(RBPERF_GA_GC_CH0_SYS_REQ, UINT64, AVERAGE), + COUNTABLE(RBPERF_GA_GC_CH0_SYS_REQ_MAX, UINT64, AVERAGE), + COUNTABLE(RBPERF_GA_GC_CH1_SYS_REQ, UINT64, AVERAGE), + COUNTABLE(RBPERF_GA_GC_CH1_SYS_REQ_MAX, UINT64, AVERAGE), + COUNTABLE(RBPERF_MH_STARVED, UINT64, AVERAGE), + COUNTABLE(RBPERF_MH_STARVED_MAX, UINT64, AVERAGE), + COUNTABLE(RBPERF_AZ_BC_COLOR_BUSY, UINT64, AVERAGE), + COUNTABLE(RBPERF_AZ_BC_COLOR_BUSY_MAX, UINT64, AVERAGE), + COUNTABLE(RBPERF_AZ_BC_Z_BUSY, UINT64, AVERAGE), + COUNTABLE(RBPERF_AZ_BC_Z_BUSY_MAX, UINT64, AVERAGE), + COUNTABLE(RBPERF_RB_SC_TILE_RTR_N, UINT64, AVERAGE), + COUNTABLE(RBPERF_RB_SC_TILE_RTR_N_MAX, UINT64, AVERAGE), + COUNTABLE(RBPERF_RB_SC_SAMP_RTR_N, UINT64, AVERAGE), + COUNTABLE(RBPERF_RB_SC_SAMP_RTR_N_MAX, UINT64, AVERAGE), + COUNTABLE(RBPERF_RB_SX_QUAD_RTR_N, UINT64, AVERAGE), + COUNTABLE(RBPERF_RB_SX_QUAD_RTR_N_MAX, UINT64, AVERAGE), + COUNTABLE(RBPERF_RB_SX_COLOR_RTR_N, UINT64, AVERAGE), + COUNTABLE(RBPERF_RB_SX_COLOR_RTR_N_MAX, UINT64, AVERAGE), + COUNTABLE(RBPERF_RB_SC_SAMP_LZ_BUSY, UINT64, AVERAGE), + COUNTABLE(RBPERF_RB_SC_SAMP_LZ_BUSY_MAX, UINT64, AVERAGE), + COUNTABLE(RBPERF_ZXP_STALL, UINT64, AVERAGE), + COUNTABLE(RBPERF_ZXP_STALL_MAX, UINT64, AVERAGE), + COUNTABLE(RBPERF_EVENT_PENDING, UINT64, AVERAGE), + COUNTABLE(RBPERF_EVENT_PENDING_MAX, UINT64, AVERAGE), + COUNTABLE(RBPERF_RB_MH_VALID, UINT64, AVERAGE), + COUNTABLE(RBPERF_RB_MH_VALID_MAX, UINT64, AVERAGE), + COUNTABLE(RBPERF_SX_RB_QUAD_SEND, UINT64, AVERAGE), + COUNTABLE(RBPERF_SX_RB_COLOR_SEND, UINT64, AVERAGE), + COUNTABLE(RBPERF_SC_RB_TILE_SEND, UINT64, AVERAGE), + COUNTABLE(RBPERF_SC_RB_SAMPLE_SEND, UINT64, AVERAGE), + COUNTABLE(RBPERF_SX_RB_MEM_EXPORT, UINT64, AVERAGE), + COUNTABLE(RBPERF_SX_RB_QUAD_EVENT, UINT64, AVERAGE), + COUNTABLE(RBPERF_SC_RB_TILE_EVENT_FILTERED, UINT64, AVERAGE), + COUNTABLE(RBPERF_SC_RB_TILE_EVENT_ALL, UINT64, AVERAGE), + COUNTABLE(RBPERF_RB_SC_EZ_SEND, UINT64, AVERAGE), + COUNTABLE(RBPERF_RB_SX_INDEX_SEND, UINT64, AVERAGE), + COUNTABLE(RBPERF_GMEM_INTFO_RD, UINT64, AVERAGE), + COUNTABLE(RBPERF_GMEM_INTF1_RD, UINT64, AVERAGE), + COUNTABLE(RBPERF_GMEM_INTFO_WR, UINT64, AVERAGE), + COUNTABLE(RBPERF_GMEM_INTF1_WR, UINT64, AVERAGE), + COUNTABLE(RBPERF_RB_CP_CONTEXT_DONE, UINT64, AVERAGE), + COUNTABLE(RBPERF_RB_CP_CACHE_FLUSH, UINT64, AVERAGE), + COUNTABLE(RBPERF_ZPASS_DONE, UINT64, AVERAGE), + COUNTABLE(RBPERF_ZCMD_VALID, UINT64, AVERAGE), + COUNTABLE(RBPERF_CCMD_VALID, UINT64, AVERAGE), + COUNTABLE(RBPERF_ACCUM_GRANT, UINT64, AVERAGE), + COUNTABLE(RBPERF_ACCUM_C0_GRANT, UINT64, AVERAGE), + COUNTABLE(RBPERF_ACCUM_C1_GRANT, UINT64, AVERAGE), + COUNTABLE(RBPERF_ACCUM_FULL_BE_WR, UINT64, AVERAGE), + COUNTABLE(RBPERF_ACCUM_REQUEST_NO_GRANT, UINT64, AVERAGE), + COUNTABLE(RBPERF_ACCUM_TIMEOUT_PULSE, UINT64, AVERAGE), + COUNTABLE(RBPERF_ACCUM_LIN_TIMEOUT_PULSE, UINT64, AVERAGE), + COUNTABLE(RBPERF_ACCUM_CAM_HIT_FLUSHING, UINT64, AVERAGE), +}; + +static const struct fd_perfcntr_counter pa_su_counters[] = { + COUNTER(PA_SU_PERFCOUNTER0_SELECT, PA_SU_PERFCOUNTER0_LOW, PA_SU_PERFCOUNTER0_HI), + COUNTER(PA_SU_PERFCOUNTER1_SELECT, PA_SU_PERFCOUNTER1_LOW, PA_SU_PERFCOUNTER1_HI), + COUNTER(PA_SU_PERFCOUNTER2_SELECT, PA_SU_PERFCOUNTER2_LOW, PA_SU_PERFCOUNTER2_HI), + COUNTER(PA_SU_PERFCOUNTER3_SELECT, PA_SU_PERFCOUNTER3_LOW, PA_SU_PERFCOUNTER3_HI), +}; + +static const struct fd_perfcntr_counter pa_sc_counters[] = { + COUNTER(PA_SC_PERFCOUNTER0_SELECT, PA_SC_PERFCOUNTER0_LOW, PA_SC_PERFCOUNTER0_HI), +}; + +static const struct fd_perfcntr_counter vgt_counters[] = { + COUNTER(VGT_PERFCOUNTER0_SELECT, VGT_PERFCOUNTER0_LOW, VGT_PERFCOUNTER0_HI), + COUNTER(VGT_PERFCOUNTER1_SELECT, VGT_PERFCOUNTER1_LOW, VGT_PERFCOUNTER1_HI), + COUNTER(VGT_PERFCOUNTER2_SELECT, VGT_PERFCOUNTER2_LOW, VGT_PERFCOUNTER2_HI), + COUNTER(VGT_PERFCOUNTER3_SELECT, VGT_PERFCOUNTER3_LOW, VGT_PERFCOUNTER3_HI), +}; + +static const struct fd_perfcntr_counter tcr_counters[] = { + COUNTER(TCR_PERFCOUNTER0_SELECT, TCR_PERFCOUNTER0_LOW, TCR_PERFCOUNTER0_HI), + COUNTER(TCR_PERFCOUNTER1_SELECT, TCR_PERFCOUNTER1_LOW, TCR_PERFCOUNTER1_HI), +}; + +static const struct fd_perfcntr_counter tp0_counters[] = { + COUNTER(TP0_PERFCOUNTER0_SELECT, TP0_PERFCOUNTER0_LOW, TP0_PERFCOUNTER0_HI), + COUNTER(TP0_PERFCOUNTER1_SELECT, TP0_PERFCOUNTER1_LOW, TP0_PERFCOUNTER1_HI), +}; + +static const struct fd_perfcntr_counter tcm_counters[] = { + COUNTER(TCM_PERFCOUNTER0_SELECT, TCM_PERFCOUNTER0_LOW, TCM_PERFCOUNTER0_HI), + COUNTER(TCM_PERFCOUNTER1_SELECT, TCM_PERFCOUNTER1_LOW, TCM_PERFCOUNTER1_HI), +}; + +static const struct fd_perfcntr_counter tcf_counters[] = { + COUNTER(TCF_PERFCOUNTER0_SELECT, TCF_PERFCOUNTER0_LOW, TCF_PERFCOUNTER0_HI), + COUNTER(TCF_PERFCOUNTER1_SELECT, TCF_PERFCOUNTER1_LOW, TCF_PERFCOUNTER1_HI), + COUNTER(TCF_PERFCOUNTER2_SELECT, TCF_PERFCOUNTER2_LOW, TCF_PERFCOUNTER2_HI), + COUNTER(TCF_PERFCOUNTER3_SELECT, TCF_PERFCOUNTER3_LOW, TCF_PERFCOUNTER3_HI), + COUNTER(TCF_PERFCOUNTER4_SELECT, TCF_PERFCOUNTER4_LOW, TCF_PERFCOUNTER4_HI), + COUNTER(TCF_PERFCOUNTER5_SELECT, TCF_PERFCOUNTER5_LOW, TCF_PERFCOUNTER5_HI), + COUNTER(TCF_PERFCOUNTER6_SELECT, TCF_PERFCOUNTER6_LOW, TCF_PERFCOUNTER6_HI), + COUNTER(TCF_PERFCOUNTER7_SELECT, TCF_PERFCOUNTER7_LOW, TCF_PERFCOUNTER7_HI), + COUNTER(TCF_PERFCOUNTER8_SELECT, TCF_PERFCOUNTER8_LOW, TCF_PERFCOUNTER8_HI), + COUNTER(TCF_PERFCOUNTER9_SELECT, TCF_PERFCOUNTER9_LOW, TCF_PERFCOUNTER9_HI), + COUNTER(TCF_PERFCOUNTER10_SELECT, TCF_PERFCOUNTER10_LOW, TCF_PERFCOUNTER10_HI), + COUNTER(TCF_PERFCOUNTER11_SELECT, TCF_PERFCOUNTER11_LOW, TCF_PERFCOUNTER11_HI), +}; + +static const struct fd_perfcntr_counter sq_counters[] = { + COUNTER(SQ_PERFCOUNTER0_SELECT, SQ_PERFCOUNTER0_LOW, SQ_PERFCOUNTER0_HI), + COUNTER(SQ_PERFCOUNTER1_SELECT, SQ_PERFCOUNTER1_LOW, SQ_PERFCOUNTER1_HI), + COUNTER(SQ_PERFCOUNTER2_SELECT, SQ_PERFCOUNTER2_LOW, SQ_PERFCOUNTER2_HI), + COUNTER(SQ_PERFCOUNTER3_SELECT, SQ_PERFCOUNTER3_LOW, SQ_PERFCOUNTER3_HI), +}; + +static const struct fd_perfcntr_countable rbbm_countables[] = { + COUNTABLE(RBBM1_COUNT, UINT64, AVERAGE), + COUNTABLE(RBBM1_NRT_BUSY, UINT64, AVERAGE), + COUNTABLE(RBBM1_RB_BUSY, UINT64, AVERAGE), + COUNTABLE(RBBM1_SQ_CNTX0_BUSY, UINT64, AVERAGE), + COUNTABLE(RBBM1_SQ_CNTX17_BUSY, UINT64, AVERAGE), + COUNTABLE(RBBM1_VGT_BUSY, UINT64, AVERAGE), + COUNTABLE(RBBM1_VGT_NODMA_BUSY, UINT64, AVERAGE), + COUNTABLE(RBBM1_PA_BUSY, UINT64, AVERAGE), + COUNTABLE(RBBM1_SC_CNTX_BUSY, UINT64, AVERAGE), + COUNTABLE(RBBM1_TPC_BUSY, UINT64, AVERAGE), + COUNTABLE(RBBM1_TC_BUSY, UINT64, AVERAGE), + COUNTABLE(RBBM1_SX_BUSY, UINT64, AVERAGE), + COUNTABLE(RBBM1_CP_COHER_BUSY, UINT64, AVERAGE), + COUNTABLE(RBBM1_CP_NRT_BUSY, UINT64, AVERAGE), + COUNTABLE(RBBM1_GFX_IDLE_STALL, UINT64, AVERAGE), + COUNTABLE(RBBM1_INTERRUPT, UINT64, AVERAGE), +}; + +static const struct fd_perfcntr_countable cp_countables[] = { + COUNTABLE(ALWAYS_COUNT, UINT64, AVERAGE), + COUNTABLE(TRANS_FIFO_FULL, UINT64, AVERAGE), + COUNTABLE(TRANS_FIFO_AF, UINT64, AVERAGE), + COUNTABLE(RCIU_PFPTRANS_WAIT, UINT64, AVERAGE), + COUNTABLE(RCIU_NRTTRANS_WAIT, UINT64, AVERAGE), + COUNTABLE(CSF_NRT_READ_WAIT, UINT64, AVERAGE), + COUNTABLE(CSF_I1_FIFO_FULL, UINT64, AVERAGE), + COUNTABLE(CSF_I2_FIFO_FULL, UINT64, AVERAGE), + COUNTABLE(CSF_ST_FIFO_FULL, UINT64, AVERAGE), + COUNTABLE(CSF_RING_ROQ_FULL, UINT64, AVERAGE), + COUNTABLE(CSF_I1_ROQ_FULL, UINT64, AVERAGE), + COUNTABLE(CSF_I2_ROQ_FULL, UINT64, AVERAGE), + COUNTABLE(CSF_ST_ROQ_FULL, UINT64, AVERAGE), + COUNTABLE(MIU_TAG_MEM_FULL, UINT64, AVERAGE), + COUNTABLE(MIU_WRITECLEAN, UINT64, AVERAGE), + COUNTABLE(MIU_NRT_WRITE_STALLED, UINT64, AVERAGE), + COUNTABLE(MIU_NRT_READ_STALLED, UINT64, AVERAGE), + COUNTABLE(ME_WRITE_CONFIRM_FIFO_FULL, UINT64, AVERAGE), + COUNTABLE(ME_VS_DEALLOC_FIFO_FULL, UINT64, AVERAGE), + COUNTABLE(ME_PS_DEALLOC_FIFO_FULL, UINT64, AVERAGE), + COUNTABLE(ME_REGS_VS_EVENT_FIFO_FULL, UINT64, AVERAGE), + COUNTABLE(ME_REGS_PS_EVENT_FIFO_FULL, UINT64, AVERAGE), + COUNTABLE(ME_REGS_CF_EVENT_FIFO_FULL, UINT64, AVERAGE), + COUNTABLE(ME_MICRO_RB_STARVED, UINT64, AVERAGE), + COUNTABLE(ME_MICRO_I1_STARVED, UINT64, AVERAGE), + COUNTABLE(ME_MICRO_I2_STARVED, UINT64, AVERAGE), + COUNTABLE(ME_MICRO_ST_STARVED, UINT64, AVERAGE), + COUNTABLE(RCIU_RBBM_DWORD_SENT, UINT64, AVERAGE), + COUNTABLE(ME_BUSY_CLOCKS, UINT64, AVERAGE), + COUNTABLE(ME_WAIT_CONTEXT_AVAIL, UINT64, AVERAGE), + COUNTABLE(PFP_TYPE0_PACKET, UINT64, AVERAGE), + COUNTABLE(PFP_TYPE3_PACKET, UINT64, AVERAGE), + COUNTABLE(CSF_RB_WPTR_NEQ_RPTR, UINT64, AVERAGE), + COUNTABLE(CSF_I1_SIZE_NEQ_ZERO, UINT64, AVERAGE), + COUNTABLE(CSF_I2_SIZE_NEQ_ZERO, UINT64, AVERAGE), + COUNTABLE(CSF_RBI1I2_FETCHING, UINT64, AVERAGE), +}; + +static const struct fd_perfcntr_counter sx_counters[] = { + COUNTER(SX_PERFCOUNTER0_SELECT, SX_PERFCOUNTER0_LOW, SX_PERFCOUNTER0_HI), +}; + +// We don't have the enums for MH perfcntrs +#if 0 +static const struct fd_perfcntr_counter mh_counters[] = { + COUNTER(MH_PERFCOUNTER0_SELECT, MH_PERFCOUNTER0_LOW, MH_PERFCOUNTER0_HI), + COUNTER(MH_PERFCOUNTER1_SELECT, MH_PERFCOUNTER1_LOW, MH_PERFCOUNTER1_HI), +}; +#endif + +static const struct fd_perfcntr_counter rbbm_counters[] = { + COUNTER(RBBM_PERFCOUNTER1_SELECT, RBBM_PERFCOUNTER1_LO, RBBM_PERFCOUNTER1_HI), +}; + +static const struct fd_perfcntr_counter cp_counters[] = { + COUNTER(CP_PERFCOUNTER_SELECT, CP_PERFCOUNTER_LO, CP_PERFCOUNTER_HI), +}; + +static const struct fd_perfcntr_counter rb_counters[] = { + COUNTER(RB_PERFCOUNTER0_SELECT, RB_PERFCOUNTER0_LOW, RB_PERFCOUNTER0_HI), +}; + +const struct fd_perfcntr_group a2xx_perfcntr_groups[] = { + GROUP("PA_SU", pa_su_counters, pa_su_countables), + GROUP("PA_SC", pa_sc_counters, pa_sc_countables), + GROUP("VGT", vgt_counters, vgt_countables), + GROUP("TCR", tcr_counters, tcr_countables), + GROUP("TP0", tp0_counters, tp0_countables), + GROUP("TCM", tcm_counters, tcm_countables), + GROUP("TCF", tcf_counters, tcf_countables), + GROUP("SQ", sq_counters, sq_countables), + GROUP("SX", sx_counters, sx_countables), +// GROUP("MH", mh_counters, mh_countables), + GROUP("RBBM", rbbm_counters, rbbm_countables), + GROUP("CP", cp_counters, cp_countables), + GROUP("RB", rb_counters, rb_countables), +}; + +const unsigned a2xx_num_perfcntr_groups = ARRAY_SIZE(a2xx_perfcntr_groups); diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_query.c b/src/gallium/drivers/freedreno/a2xx/fd2_query.c new file mode 100644 index 00000000000..9e5bb450cd4 --- /dev/null +++ b/src/gallium/drivers/freedreno/a2xx/fd2_query.c @@ -0,0 +1,244 @@ +/* + * Copyright (C) 2018 Jonathan Marek + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Jonathan Marek + * Rob Clark + */ + +/* NOTE: perfcntrs are 48-bits but we only have 32-bit accumulate (?) + * so we work with 32-bits only. we accumulate start/stop separately, + * which differs from a5xx but works with only accumulate (no add/neg) + */ + +#include "freedreno_query_acc.h" +#include "freedreno_resource.h" + +#include "fd2_context.h" +#include "fd2_query.h" + +struct PACKED fd2_query_sample { + uint32_t start; + uint32_t stop; +}; + +/* offset of a single field of an array of fd2_query_sample: */ +#define query_sample_idx(aq, idx, field) \ + fd_resource((aq)->prsc)->bo, \ + (idx * sizeof(struct fd2_query_sample)) + \ + offsetof(struct fd2_query_sample, field), \ + 0, 0 + +/* offset of a single field of fd2_query_sample: */ +#define query_sample(aq, field) \ + query_sample_idx(aq, 0, field) + +/* + * Performance Counter (batch) queries: + * + * Only one of these is active at a time, per design of the gallium + * batch_query API design. On perfcntr query tracks N query_types, + * each of which has a 'fd_batch_query_entry' that maps it back to + * the associated group and counter. + */ + +struct fd_batch_query_entry { + uint8_t gid; /* group-id */ + uint8_t cid; /* countable-id within the group */ +}; + +struct fd_batch_query_data { + struct fd_screen *screen; + unsigned num_query_entries; + struct fd_batch_query_entry query_entries[]; +}; + +static void +perfcntr_resume(struct fd_acc_query *aq, struct fd_batch *batch) +{ + struct fd_batch_query_data *data = aq->query_data; + struct fd_screen *screen = data->screen; + struct fd_ringbuffer *ring = batch->draw; + + unsigned counters_per_group[screen->num_perfcntr_groups]; + memset(counters_per_group, 0, sizeof(counters_per_group)); + + fd_wfi(batch, ring); + + /* configure performance counters for the requested queries: */ + for (unsigned i = 0; i < data->num_query_entries; i++) { + struct fd_batch_query_entry *entry = &data->query_entries[i]; + const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid]; + unsigned counter_idx = counters_per_group[entry->gid]++; + + debug_assert(counter_idx < g->num_counters); + + OUT_PKT0(ring, g->counters[counter_idx].select_reg, 1); + OUT_RING(ring, g->countables[entry->cid].selector); + } + + memset(counters_per_group, 0, sizeof(counters_per_group)); + + /* and snapshot the start values */ + for (unsigned i = 0; i < data->num_query_entries; i++) { + struct fd_batch_query_entry *entry = &data->query_entries[i]; + const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid]; + unsigned counter_idx = counters_per_group[entry->gid]++; + const struct fd_perfcntr_counter *counter = &g->counters[counter_idx]; + + OUT_PKT3(ring, CP_REG_TO_MEM, 2); + OUT_RING(ring, counter->counter_reg_lo | CP_MEM_TO_REG_0_ACCUMULATE); + OUT_RELOCW(ring, query_sample_idx(aq, i, start)); + } +} + +static void +perfcntr_pause(struct fd_acc_query *aq, struct fd_batch *batch) +{ + struct fd_batch_query_data *data = aq->query_data; + struct fd_screen *screen = data->screen; + struct fd_ringbuffer *ring = batch->draw; + + unsigned counters_per_group[screen->num_perfcntr_groups]; + memset(counters_per_group, 0, sizeof(counters_per_group)); + + fd_wfi(batch, ring); + + /* TODO do we need to bother to turn anything off? */ + + /* snapshot the end values: */ + for (unsigned i = 0; i < data->num_query_entries; i++) { + struct fd_batch_query_entry *entry = &data->query_entries[i]; + const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid]; + unsigned counter_idx = counters_per_group[entry->gid]++; + const struct fd_perfcntr_counter *counter = &g->counters[counter_idx]; + + OUT_PKT3(ring, CP_REG_TO_MEM, 2); + OUT_RING(ring, counter->counter_reg_lo | CP_MEM_TO_REG_0_ACCUMULATE); + OUT_RELOCW(ring, query_sample_idx(aq, i, stop)); + } +} + +static void +perfcntr_accumulate_result(struct fd_acc_query *aq, void *buf, + union pipe_query_result *result) +{ + struct fd_batch_query_data *data = aq->query_data; + struct fd2_query_sample *sp = buf; + + for (unsigned i = 0; i < data->num_query_entries; i++) + result->batch[i].u64 = sp[i].stop - sp[i].start; +} + +static const struct fd_acc_sample_provider perfcntr = { + .query_type = FD_QUERY_FIRST_PERFCNTR, + .active = FD_STAGE_DRAW | FD_STAGE_CLEAR, + .resume = perfcntr_resume, + .pause = perfcntr_pause, + .result = perfcntr_accumulate_result, +}; + +static struct pipe_query * +fd2_create_batch_query(struct pipe_context *pctx, + unsigned num_queries, unsigned *query_types) +{ + struct fd_context *ctx = fd_context(pctx); + struct fd_screen *screen = ctx->screen; + struct fd_query *q; + struct fd_acc_query *aq; + struct fd_batch_query_data *data; + + data = CALLOC_VARIANT_LENGTH_STRUCT(fd_batch_query_data, + num_queries * sizeof(data->query_entries[0])); + + data->screen = screen; + data->num_query_entries = num_queries; + + /* validate the requested query_types and ensure we don't try + * to request more query_types of a given group than we have + * counters: + */ + unsigned counters_per_group[screen->num_perfcntr_groups]; + memset(counters_per_group, 0, sizeof(counters_per_group)); + + for (unsigned i = 0; i < num_queries; i++) { + unsigned idx = query_types[i] - FD_QUERY_FIRST_PERFCNTR; + + /* verify valid query_type, ie. is it actually a perfcntr? */ + if ((query_types[i] < FD_QUERY_FIRST_PERFCNTR) || + (idx >= screen->num_perfcntr_queries)) { + debug_printf("invalid batch query query_type: %u\n", query_types[i]); + goto error; + } + + struct fd_batch_query_entry *entry = &data->query_entries[i]; + struct pipe_driver_query_info *pq = &screen->perfcntr_queries[idx]; + + entry->gid = pq->group_id; + + /* the perfcntr_queries[] table flattens all the countables + * for each group in series, ie: + * + * (G0,C0), .., (G0,Cn), (G1,C0), .., (G1,Cm), ... + * + * So to find the countable index just step back through the + * table to find the first entry with the same group-id. + */ + while (pq > screen->perfcntr_queries) { + pq--; + if (pq->group_id == entry->gid) + entry->cid++; + } + + if (counters_per_group[entry->gid] >= + screen->perfcntr_groups[entry->gid].num_counters) { + debug_printf("too many counters for group %u\n", entry->gid); + goto error; + } + + counters_per_group[entry->gid]++; + } + + q = fd_acc_create_query2(ctx, 0, &perfcntr); + aq = fd_acc_query(q); + + /* sample buffer size is based on # of queries: */ + aq->size = num_queries * sizeof(struct fd2_query_sample); + aq->query_data = data; + + return (struct pipe_query *)q; + +error: + free(data); + return NULL; +} + +void +fd2_query_context_init(struct pipe_context *pctx) +{ + struct fd_context *ctx = fd_context(pctx); + + ctx->create_query = fd_acc_create_query; + ctx->query_set_stage = fd_acc_query_set_stage; + + pctx->create_batch_query = fd2_create_batch_query; +} diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_query.h b/src/gallium/drivers/freedreno/a2xx/fd2_query.h new file mode 100644 index 00000000000..4d9f3e39f81 --- /dev/null +++ b/src/gallium/drivers/freedreno/a2xx/fd2_query.h @@ -0,0 +1,35 @@ +/* + * Copyright (C) 2019 Jonathan Marek + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Jonathan Marek + * Rob Clark + */ + +#ifndef FD2_QUERY_H_ +#define FD2_QUERY_H_ + +#include "pipe/p_context.h" + +void fd2_query_context_init(struct pipe_context *pctx); + +#endif /* FD2_QUERY_H_ */ diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_screen.c b/src/gallium/drivers/freedreno/a2xx/fd2_screen.c index 3b44147cb3d..f0253238123 100644 --- a/src/gallium/drivers/freedreno/a2xx/fd2_screen.c +++ b/src/gallium/drivers/freedreno/a2xx/fd2_screen.c @@ -105,6 +105,9 @@ fd2_screen_is_format_supported(struct pipe_screen *pscreen, return retval == usage; } +extern const struct fd_perfcntr_group a2xx_perfcntr_groups[]; +extern const unsigned a2xx_num_perfcntr_groups; + void fd2_screen_init(struct pipe_screen *pscreen) { @@ -114,4 +117,9 @@ fd2_screen_init(struct pipe_screen *pscreen) pscreen->context_create = fd2_context_create; pscreen->is_format_supported = fd2_screen_is_format_supported; screen->setup_slices = fd2_setup_slices; + + if (fd_mesa_debug & FD_DBG_PERFC) { + screen->perfcntr_groups = a2xx_perfcntr_groups; + screen->num_perfcntr_groups = a2xx_num_perfcntr_groups; + } } diff --git a/src/gallium/drivers/freedreno/meson.build b/src/gallium/drivers/freedreno/meson.build index 1e3a3037014..90c0b62aeb6 100644 --- a/src/gallium/drivers/freedreno/meson.build +++ b/src/gallium/drivers/freedreno/meson.build @@ -68,8 +68,11 @@ files_libfreedreno = files( 'a2xx/fd2_emit.h', 'a2xx/fd2_gmem.c', 'a2xx/fd2_gmem.h', + 'a2xx/fd2_perfcntr.c', 'a2xx/fd2_program.c', 'a2xx/fd2_program.h', + 'a2xx/fd2_query.c', + 'a2xx/fd2_query.h', 'a2xx/fd2_rasterizer.c', 'a2xx/fd2_rasterizer.h', 'a2xx/fd2_resource.c', -- 2.30.2