From ebbb6b8eaa06c0eac93fee689223c6a98d3f98bc Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Thu, 7 Mar 2019 14:22:24 -0500 Subject: [PATCH] freedreno/a6xx: perfcntrs Signed-off-by: Rob Clark --- .../drivers/freedreno/Makefile.sources | 1 + .../drivers/freedreno/a6xx/fd6_perfcntr.c | 805 ++++++++++++++++++ .../drivers/freedreno/a6xx/fd6_query.c | 205 ++++- .../drivers/freedreno/a6xx/fd6_screen.c | 8 + src/gallium/drivers/freedreno/meson.build | 1 + 5 files changed, 1019 insertions(+), 1 deletion(-) create mode 100644 src/gallium/drivers/freedreno/a6xx/fd6_perfcntr.c diff --git a/src/gallium/drivers/freedreno/Makefile.sources b/src/gallium/drivers/freedreno/Makefile.sources index 034a8dc109b..d316bf56804 100644 --- a/src/gallium/drivers/freedreno/Makefile.sources +++ b/src/gallium/drivers/freedreno/Makefile.sources @@ -183,6 +183,7 @@ a6xx_SOURCES := \ a6xx/fd6_gmem.h \ a6xx/fd6_image.c \ a6xx/fd6_image.h \ + a6xx/fd6_perfcntr.c \ a6xx/fd6_program.c \ a6xx/fd6_program.h \ a6xx/fd6_query.c \ diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_perfcntr.c b/src/gallium/drivers/freedreno/a6xx/fd6_perfcntr.c new file mode 100644 index 00000000000..b8b13c8df02 --- /dev/null +++ b/src/gallium/drivers/freedreno/a6xx/fd6_perfcntr.c @@ -0,0 +1,805 @@ +/* + * Copyright (C) 2019 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#ifndef FD6_PERFCNTR_H_ +#define FD6_PERFCNTR_H_ + +#include "freedreno_perfcntr.h" +#include "fd6_format.h" + +#define REG(_x) REG_A6XX_ ## _x + +#define COUNTER(_sel, _lo, _hi) { \ + .select_reg = REG(_sel), \ + .counter_reg_lo = REG(_lo), \ + .counter_reg_hi = REG(_hi), \ +} + +#define COUNTER2(_sel, _lo, _hi, _en, _clr) { \ + .select_reg = REG(_sel), \ + .counter_reg_lo = REG(_lo), \ + .counter_reg_hi = REG(_hi), \ + .enable = REG(_en), \ + .clear = REG(_clr), \ +} + +#define COUNTABLE(_selector, _query_type, _result_type) { \ + .name = #_selector, \ + .selector = _selector, \ + .query_type = PIPE_DRIVER_QUERY_TYPE_ ## _query_type, \ + .result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_ ## _result_type, \ +} + +#define GROUP(_name, _counters, _countables) { \ + .name = _name, \ + .num_counters = ARRAY_SIZE(_counters), \ + .counters = _counters, \ + .num_countables = ARRAY_SIZE(_countables), \ + .countables = _countables, \ +} + +static const struct fd_perfcntr_counter cp_counters[] = { +//RESERVED: for kernel +// COUNTER(CP_PERFCTR_CP_SEL_0, RBBM_PERFCTR_CP_0_LO, RBBM_PERFCTR_CP_0_HI), + COUNTER(CP_PERFCTR_CP_SEL_1, RBBM_PERFCTR_CP_1_LO, RBBM_PERFCTR_CP_1_HI), + COUNTER(CP_PERFCTR_CP_SEL_2, RBBM_PERFCTR_CP_2_LO, RBBM_PERFCTR_CP_2_HI), + COUNTER(CP_PERFCTR_CP_SEL_3, RBBM_PERFCTR_CP_3_LO, RBBM_PERFCTR_CP_3_HI), + COUNTER(CP_PERFCTR_CP_SEL_4, RBBM_PERFCTR_CP_4_LO, RBBM_PERFCTR_CP_4_HI), + COUNTER(CP_PERFCTR_CP_SEL_5, RBBM_PERFCTR_CP_5_LO, RBBM_PERFCTR_CP_5_HI), + COUNTER(CP_PERFCTR_CP_SEL_6, RBBM_PERFCTR_CP_6_LO, RBBM_PERFCTR_CP_6_HI), + COUNTER(CP_PERFCTR_CP_SEL_7, RBBM_PERFCTR_CP_7_LO, RBBM_PERFCTR_CP_7_HI), + COUNTER(CP_PERFCTR_CP_SEL_8, RBBM_PERFCTR_CP_8_LO, RBBM_PERFCTR_CP_8_HI), + COUNTER(CP_PERFCTR_CP_SEL_9, RBBM_PERFCTR_CP_9_LO, RBBM_PERFCTR_CP_9_HI), + COUNTER(CP_PERFCTR_CP_SEL_10, RBBM_PERFCTR_CP_10_LO, RBBM_PERFCTR_CP_10_HI), + COUNTER(CP_PERFCTR_CP_SEL_11, RBBM_PERFCTR_CP_11_LO, RBBM_PERFCTR_CP_11_HI), + COUNTER(CP_PERFCTR_CP_SEL_12, RBBM_PERFCTR_CP_12_LO, RBBM_PERFCTR_CP_12_HI), + COUNTER(CP_PERFCTR_CP_SEL_13, RBBM_PERFCTR_CP_13_LO, RBBM_PERFCTR_CP_13_HI), +}; + +static const struct fd_perfcntr_countable cp_countables[] = { + COUNTABLE(PERF_CP_ALWAYS_COUNT, UINT64, AVERAGE), + COUNTABLE(PERF_CP_BUSY_GFX_CORE_IDLE, UINT64, AVERAGE), + COUNTABLE(PERF_CP_BUSY_CYCLES, UINT64, AVERAGE), + COUNTABLE(PERF_CP_NUM_PREEMPTIONS, UINT64, AVERAGE), + COUNTABLE(PERF_CP_PREEMPTION_REACTION_DELAY, UINT64, AVERAGE), + COUNTABLE(PERF_CP_PREEMPTION_SWITCH_OUT_TIME, UINT64, AVERAGE), + COUNTABLE(PERF_CP_PREEMPTION_SWITCH_IN_TIME, UINT64, AVERAGE), + COUNTABLE(PERF_CP_DEAD_DRAWS_IN_BIN_RENDER, UINT64, AVERAGE), + COUNTABLE(PERF_CP_PREDICATED_DRAWS_KILLED, UINT64, AVERAGE), + COUNTABLE(PERF_CP_MODE_SWITCH, UINT64, AVERAGE), + COUNTABLE(PERF_CP_ZPASS_DONE, UINT64, AVERAGE), + COUNTABLE(PERF_CP_CONTEXT_DONE, UINT64, AVERAGE), + COUNTABLE(PERF_CP_CACHE_FLUSH, UINT64, AVERAGE), + COUNTABLE(PERF_CP_LONG_PREEMPTIONS, UINT64, AVERAGE), + COUNTABLE(PERF_CP_SQE_I_CACHE_STARVE, UINT64, AVERAGE), + COUNTABLE(PERF_CP_SQE_IDLE, UINT64, AVERAGE), + COUNTABLE(PERF_CP_SQE_PM4_STARVE_RB_IB, UINT64, AVERAGE), + COUNTABLE(PERF_CP_SQE_PM4_STARVE_SDS, UINT64, AVERAGE), + COUNTABLE(PERF_CP_SQE_MRB_STARVE, UINT64, AVERAGE), + COUNTABLE(PERF_CP_SQE_RRB_STARVE, UINT64, AVERAGE), + COUNTABLE(PERF_CP_SQE_VSD_STARVE, UINT64, AVERAGE), + COUNTABLE(PERF_CP_VSD_DECODE_STARVE, UINT64, AVERAGE), + COUNTABLE(PERF_CP_SQE_PIPE_OUT_STALL, UINT64, AVERAGE), + COUNTABLE(PERF_CP_SQE_SYNC_STALL, UINT64, AVERAGE), + COUNTABLE(PERF_CP_SQE_PM4_WFI_STALL, UINT64, AVERAGE), + COUNTABLE(PERF_CP_SQE_SYS_WFI_STALL, UINT64, AVERAGE), + COUNTABLE(PERF_CP_SQE_T4_EXEC, UINT64, AVERAGE), + COUNTABLE(PERF_CP_SQE_LOAD_STATE_EXEC, UINT64, AVERAGE), + COUNTABLE(PERF_CP_SQE_SAVE_SDS_STATE, UINT64, AVERAGE), + COUNTABLE(PERF_CP_SQE_DRAW_EXEC, UINT64, AVERAGE), + COUNTABLE(PERF_CP_SQE_CTXT_REG_BUNCH_EXEC, UINT64, AVERAGE), + COUNTABLE(PERF_CP_SQE_EXEC_PROFILED, UINT64, AVERAGE), + COUNTABLE(PERF_CP_MEMORY_POOL_EMPTY, UINT64, AVERAGE), + COUNTABLE(PERF_CP_MEMORY_POOL_SYNC_STALL, UINT64, AVERAGE), + COUNTABLE(PERF_CP_MEMORY_POOL_ABOVE_THRESH, UINT64, AVERAGE), + COUNTABLE(PERF_CP_AHB_WR_STALL_PRE_DRAWS, UINT64, AVERAGE), + COUNTABLE(PERF_CP_AHB_STALL_SQE_GMU, UINT64, AVERAGE), + COUNTABLE(PERF_CP_AHB_STALL_SQE_WR_OTHER, UINT64, AVERAGE), + COUNTABLE(PERF_CP_AHB_STALL_SQE_RD_OTHER, UINT64, AVERAGE), + COUNTABLE(PERF_CP_CLUSTER0_EMPTY, UINT64, AVERAGE), + COUNTABLE(PERF_CP_CLUSTER1_EMPTY, UINT64, AVERAGE), + COUNTABLE(PERF_CP_CLUSTER2_EMPTY, UINT64, AVERAGE), + COUNTABLE(PERF_CP_CLUSTER3_EMPTY, UINT64, AVERAGE), + COUNTABLE(PERF_CP_CLUSTER4_EMPTY, UINT64, AVERAGE), + COUNTABLE(PERF_CP_CLUSTER5_EMPTY, UINT64, AVERAGE), + COUNTABLE(PERF_CP_PM4_DATA, UINT64, AVERAGE), + COUNTABLE(PERF_CP_PM4_HEADERS, UINT64, AVERAGE), + COUNTABLE(PERF_CP_VBIF_READ_BEATS, UINT64, AVERAGE), + COUNTABLE(PERF_CP_VBIF_WRITE_BEATS, UINT64, AVERAGE), + COUNTABLE(PERF_CP_SQE_INSTR_COUNTER, UINT64, AVERAGE), +}; + +static const struct fd_perfcntr_counter ccu_counters[] = { + COUNTER(RB_PERFCTR_CCU_SEL_0, RBBM_PERFCTR_CCU_0_LO, RBBM_PERFCTR_CCU_0_HI), + COUNTER(RB_PERFCTR_CCU_SEL_1, RBBM_PERFCTR_CCU_1_LO, RBBM_PERFCTR_CCU_1_HI), + COUNTER(RB_PERFCTR_CCU_SEL_2, RBBM_PERFCTR_CCU_2_LO, RBBM_PERFCTR_CCU_2_HI), + COUNTER(RB_PERFCTR_CCU_SEL_3, RBBM_PERFCTR_CCU_3_LO, RBBM_PERFCTR_CCU_3_HI), + COUNTER(RB_PERFCTR_CCU_SEL_4, RBBM_PERFCTR_CCU_4_LO, RBBM_PERFCTR_CCU_4_HI), +}; + +static const struct fd_perfcntr_countable ccu_countables[] = { + COUNTABLE(PERF_CCU_BUSY_CYCLES, UINT64, AVERAGE), + COUNTABLE(PERF_CCU_STALL_CYCLES_RB_DEPTH_RETURN, UINT64, AVERAGE), + COUNTABLE(PERF_CCU_STALL_CYCLES_RB_COLOR_RETURN, UINT64, AVERAGE), + COUNTABLE(PERF_CCU_STARVE_CYCLES_FLAG_RETURN, UINT64, AVERAGE), + COUNTABLE(PERF_CCU_DEPTH_BLOCKS, UINT64, AVERAGE), + COUNTABLE(PERF_CCU_COLOR_BLOCKS, UINT64, AVERAGE), + COUNTABLE(PERF_CCU_DEPTH_BLOCK_HIT, UINT64, AVERAGE), + COUNTABLE(PERF_CCU_COLOR_BLOCK_HIT, UINT64, AVERAGE), + COUNTABLE(PERF_CCU_PARTIAL_BLOCK_READ, UINT64, AVERAGE), + COUNTABLE(PERF_CCU_GMEM_READ, UINT64, AVERAGE), + COUNTABLE(PERF_CCU_GMEM_WRITE, UINT64, AVERAGE), + COUNTABLE(PERF_CCU_DEPTH_READ_FLAG0_COUNT, UINT64, AVERAGE), + COUNTABLE(PERF_CCU_DEPTH_READ_FLAG1_COUNT, UINT64, AVERAGE), + COUNTABLE(PERF_CCU_DEPTH_READ_FLAG2_COUNT, UINT64, AVERAGE), + COUNTABLE(PERF_CCU_DEPTH_READ_FLAG3_COUNT, UINT64, AVERAGE), + COUNTABLE(PERF_CCU_DEPTH_READ_FLAG4_COUNT, UINT64, AVERAGE), + COUNTABLE(PERF_CCU_DEPTH_READ_FLAG5_COUNT, UINT64, AVERAGE), + COUNTABLE(PERF_CCU_DEPTH_READ_FLAG6_COUNT, UINT64, AVERAGE), + COUNTABLE(PERF_CCU_DEPTH_READ_FLAG8_COUNT, UINT64, AVERAGE), + COUNTABLE(PERF_CCU_COLOR_READ_FLAG0_COUNT, UINT64, AVERAGE), + COUNTABLE(PERF_CCU_COLOR_READ_FLAG1_COUNT, UINT64, AVERAGE), + COUNTABLE(PERF_CCU_COLOR_READ_FLAG2_COUNT, UINT64, AVERAGE), + COUNTABLE(PERF_CCU_COLOR_READ_FLAG3_COUNT, UINT64, AVERAGE), + COUNTABLE(PERF_CCU_COLOR_READ_FLAG4_COUNT, UINT64, AVERAGE), + COUNTABLE(PERF_CCU_COLOR_READ_FLAG5_COUNT, UINT64, AVERAGE), + COUNTABLE(PERF_CCU_COLOR_READ_FLAG6_COUNT, UINT64, AVERAGE), + COUNTABLE(PERF_CCU_COLOR_READ_FLAG8_COUNT, UINT64, AVERAGE), + COUNTABLE(PERF_CCU_2D_RD_REQ, UINT64, AVERAGE), + COUNTABLE(PERF_CCU_2D_WR_REQ, UINT64, AVERAGE), +}; + +static const struct fd_perfcntr_counter tse_counters[] = { + COUNTER(GRAS_PERFCTR_TSE_SEL_0, RBBM_PERFCTR_TSE_0_LO, RBBM_PERFCTR_TSE_0_HI), + COUNTER(GRAS_PERFCTR_TSE_SEL_1, RBBM_PERFCTR_TSE_1_LO, RBBM_PERFCTR_TSE_1_HI), + COUNTER(GRAS_PERFCTR_TSE_SEL_2, RBBM_PERFCTR_TSE_2_LO, RBBM_PERFCTR_TSE_2_HI), + COUNTER(GRAS_PERFCTR_TSE_SEL_3, RBBM_PERFCTR_TSE_3_LO, RBBM_PERFCTR_TSE_3_HI), +}; + +static const struct fd_perfcntr_countable tse_countables[] = { + COUNTABLE(PERF_TSE_BUSY_CYCLES, UINT64, AVERAGE), + COUNTABLE(PERF_TSE_CLIPPING_CYCLES, UINT64, AVERAGE), + COUNTABLE(PERF_TSE_STALL_CYCLES_RAS, UINT64, AVERAGE), + COUNTABLE(PERF_TSE_STALL_CYCLES_LRZ_BARYPLANE, UINT64, AVERAGE), + COUNTABLE(PERF_TSE_STALL_CYCLES_LRZ_ZPLANE, UINT64, AVERAGE), + COUNTABLE(PERF_TSE_STARVE_CYCLES_PC, UINT64, AVERAGE), + COUNTABLE(PERF_TSE_INPUT_PRIM, UINT64, AVERAGE), + COUNTABLE(PERF_TSE_INPUT_NULL_PRIM, UINT64, AVERAGE), + COUNTABLE(PERF_TSE_TRIVAL_REJ_PRIM, UINT64, AVERAGE), + COUNTABLE(PERF_TSE_CLIPPED_PRIM, UINT64, AVERAGE), + COUNTABLE(PERF_TSE_ZERO_AREA_PRIM, UINT64, AVERAGE), + COUNTABLE(PERF_TSE_FACENESS_CULLED_PRIM, UINT64, AVERAGE), + COUNTABLE(PERF_TSE_ZERO_PIXEL_PRIM, UINT64, AVERAGE), + COUNTABLE(PERF_TSE_OUTPUT_NULL_PRIM, UINT64, AVERAGE), + COUNTABLE(PERF_TSE_OUTPUT_VISIBLE_PRIM, UINT64, AVERAGE), + COUNTABLE(PERF_TSE_CINVOCATION, UINT64, AVERAGE), + COUNTABLE(PERF_TSE_CPRIMITIVES, UINT64, AVERAGE), + COUNTABLE(PERF_TSE_2D_INPUT_PRIM, UINT64, AVERAGE), + COUNTABLE(PERF_TSE_2D_ALIVE_CYCLES, UINT64, AVERAGE), + COUNTABLE(PERF_TSE_CLIP_PLANES, UINT64, AVERAGE), +}; + +static const struct fd_perfcntr_counter ras_counters[] = { + COUNTER(GRAS_PERFCTR_RAS_SEL_0, RBBM_PERFCTR_RAS_0_LO, RBBM_PERFCTR_RAS_0_HI), + COUNTER(GRAS_PERFCTR_RAS_SEL_1, RBBM_PERFCTR_RAS_1_LO, RBBM_PERFCTR_RAS_1_HI), + COUNTER(GRAS_PERFCTR_RAS_SEL_2, RBBM_PERFCTR_RAS_2_LO, RBBM_PERFCTR_RAS_2_HI), + COUNTER(GRAS_PERFCTR_RAS_SEL_3, RBBM_PERFCTR_RAS_3_LO, RBBM_PERFCTR_RAS_3_HI), +}; + +static const struct fd_perfcntr_countable ras_countables[] = { + COUNTABLE(PERF_RAS_BUSY_CYCLES, UINT64, AVERAGE), + COUNTABLE(PERF_RAS_SUPERTILE_ACTIVE_CYCLES, UINT64, AVERAGE), + COUNTABLE(PERF_RAS_STALL_CYCLES_LRZ, UINT64, AVERAGE), + COUNTABLE(PERF_RAS_STARVE_CYCLES_TSE, UINT64, AVERAGE), + COUNTABLE(PERF_RAS_SUPER_TILES, UINT64, AVERAGE), + COUNTABLE(PERF_RAS_8X4_TILES, UINT64, AVERAGE), + COUNTABLE(PERF_RAS_MASKGEN_ACTIVE, UINT64, AVERAGE), + COUNTABLE(PERF_RAS_FULLY_COVERED_SUPER_TILES, UINT64, AVERAGE), + COUNTABLE(PERF_RAS_FULLY_COVERED_8X4_TILES, UINT64, AVERAGE), + COUNTABLE(PERF_RAS_PRIM_KILLED_INVISILBE, UINT64, AVERAGE), + COUNTABLE(PERF_RAS_SUPERTILE_GEN_ACTIVE_CYCLES, UINT64, AVERAGE), + COUNTABLE(PERF_RAS_LRZ_INTF_WORKING_CYCLES, UINT64, AVERAGE), + COUNTABLE(PERF_RAS_BLOCKS, UINT64, AVERAGE), +}; + +static const struct fd_perfcntr_counter lrz_counters[] = { + COUNTER(GRAS_PERFCTR_LRZ_SEL_0, RBBM_PERFCTR_LRZ_0_LO, RBBM_PERFCTR_LRZ_0_HI), + COUNTER(GRAS_PERFCTR_LRZ_SEL_1, RBBM_PERFCTR_LRZ_1_LO, RBBM_PERFCTR_LRZ_1_HI), + COUNTER(GRAS_PERFCTR_LRZ_SEL_2, RBBM_PERFCTR_LRZ_2_LO, RBBM_PERFCTR_LRZ_2_HI), + COUNTER(GRAS_PERFCTR_LRZ_SEL_3, RBBM_PERFCTR_LRZ_3_LO, RBBM_PERFCTR_LRZ_3_HI), +}; + +static const struct fd_perfcntr_countable lrz_countables[] = { + COUNTABLE(PERF_LRZ_BUSY_CYCLES, UINT64, AVERAGE), + COUNTABLE(PERF_LRZ_STARVE_CYCLES_RAS, UINT64, AVERAGE), + COUNTABLE(PERF_LRZ_STALL_CYCLES_RB, UINT64, AVERAGE), + COUNTABLE(PERF_LRZ_STALL_CYCLES_VSC, UINT64, AVERAGE), + COUNTABLE(PERF_LRZ_STALL_CYCLES_VPC, UINT64, AVERAGE), + COUNTABLE(PERF_LRZ_STALL_CYCLES_FLAG_PREFETCH, UINT64, AVERAGE), + COUNTABLE(PERF_LRZ_STALL_CYCLES_UCHE, UINT64, AVERAGE), + COUNTABLE(PERF_LRZ_LRZ_READ, UINT64, AVERAGE), + COUNTABLE(PERF_LRZ_LRZ_WRITE, UINT64, AVERAGE), + COUNTABLE(PERF_LRZ_READ_LATENCY, UINT64, AVERAGE), + COUNTABLE(PERF_LRZ_MERGE_CACHE_UPDATING, UINT64, AVERAGE), + COUNTABLE(PERF_LRZ_PRIM_KILLED_BY_MASKGEN, UINT64, AVERAGE), + COUNTABLE(PERF_LRZ_PRIM_KILLED_BY_LRZ, UINT64, AVERAGE), + COUNTABLE(PERF_LRZ_VISIBLE_PRIM_AFTER_LRZ, UINT64, AVERAGE), + COUNTABLE(PERF_LRZ_FULL_8X8_TILES, UINT64, AVERAGE), + COUNTABLE(PERF_LRZ_PARTIAL_8X8_TILES, UINT64, AVERAGE), + COUNTABLE(PERF_LRZ_TILE_KILLED, UINT64, AVERAGE), + COUNTABLE(PERF_LRZ_TOTAL_PIXEL, UINT64, AVERAGE), + COUNTABLE(PERF_LRZ_VISIBLE_PIXEL_AFTER_LRZ, UINT64, AVERAGE), + COUNTABLE(PERF_LRZ_FULLY_COVERED_TILES, UINT64, AVERAGE), + COUNTABLE(PERF_LRZ_PARTIAL_COVERED_TILES, UINT64, AVERAGE), + COUNTABLE(PERF_LRZ_FEEDBACK_ACCEPT, UINT64, AVERAGE), + COUNTABLE(PERF_LRZ_FEEDBACK_DISCARD, UINT64, AVERAGE), + COUNTABLE(PERF_LRZ_FEEDBACK_STALL, UINT64, AVERAGE), + COUNTABLE(PERF_LRZ_STALL_CYCLES_RB_ZPLANE, UINT64, AVERAGE), + COUNTABLE(PERF_LRZ_STALL_CYCLES_RB_BPLANE, UINT64, AVERAGE), + COUNTABLE(PERF_LRZ_STALL_CYCLES_VC, UINT64, AVERAGE), + COUNTABLE(PERF_LRZ_RAS_MASK_TRANS, UINT64, AVERAGE), +}; + +static const struct fd_perfcntr_counter hlsq_counters[] = { + COUNTER(HLSQ_PERFCTR_HLSQ_SEL_0, RBBM_PERFCTR_HLSQ_0_LO, RBBM_PERFCTR_HLSQ_0_HI), + COUNTER(HLSQ_PERFCTR_HLSQ_SEL_1, RBBM_PERFCTR_HLSQ_1_LO, RBBM_PERFCTR_HLSQ_1_HI), + COUNTER(HLSQ_PERFCTR_HLSQ_SEL_2, RBBM_PERFCTR_HLSQ_2_LO, RBBM_PERFCTR_HLSQ_2_HI), + COUNTER(HLSQ_PERFCTR_HLSQ_SEL_3, RBBM_PERFCTR_HLSQ_3_LO, RBBM_PERFCTR_HLSQ_3_HI), + COUNTER(HLSQ_PERFCTR_HLSQ_SEL_4, RBBM_PERFCTR_HLSQ_4_LO, RBBM_PERFCTR_HLSQ_4_HI), + COUNTER(HLSQ_PERFCTR_HLSQ_SEL_5, RBBM_PERFCTR_HLSQ_5_LO, RBBM_PERFCTR_HLSQ_5_HI), +// TODO did we loose some HLSQ counters or are they just missing from xml +// COUNTER(HLSQ_PERFCTR_HLSQ_SEL_6, RBBM_PERFCTR_HLSQ_6_LO, RBBM_PERFCTR_HLSQ_6_HI), +// COUNTER(HLSQ_PERFCTR_HLSQ_SEL_7, RBBM_PERFCTR_HLSQ_7_LO, RBBM_PERFCTR_HLSQ_7_HI), +}; + +static const struct fd_perfcntr_countable hlsq_countables[] = { + COUNTABLE(PERF_HLSQ_BUSY_CYCLES, UINT64, AVERAGE), + COUNTABLE(PERF_HLSQ_STALL_CYCLES_UCHE, UINT64, AVERAGE), + COUNTABLE(PERF_HLSQ_STALL_CYCLES_SP_STATE, UINT64, AVERAGE), + COUNTABLE(PERF_HLSQ_STALL_CYCLES_SP_FS_STAGE, UINT64, AVERAGE), + COUNTABLE(PERF_HLSQ_UCHE_LATENCY_CYCLES, UINT64, AVERAGE), + COUNTABLE(PERF_HLSQ_UCHE_LATENCY_COUNT, UINT64, AVERAGE), + COUNTABLE(PERF_HLSQ_FS_STAGE_1X_WAVES, UINT64, AVERAGE), + COUNTABLE(PERF_HLSQ_FS_STAGE_2X_WAVES, UINT64, AVERAGE), + COUNTABLE(PERF_HLSQ_QUADS, UINT64, AVERAGE), + COUNTABLE(PERF_HLSQ_CS_INVOCATIONS, UINT64, AVERAGE), + COUNTABLE(PERF_HLSQ_COMPUTE_DRAWCALLS, UINT64, AVERAGE), + COUNTABLE(PERF_HLSQ_FS_DATA_WAIT_PROGRAMMING, UINT64, AVERAGE), + COUNTABLE(PERF_HLSQ_DUAL_FS_PROG_ACTIVE, UINT64, AVERAGE), + COUNTABLE(PERF_HLSQ_DUAL_VS_PROG_ACTIVE, UINT64, AVERAGE), + COUNTABLE(PERF_HLSQ_FS_BATCH_COUNT_ZERO, UINT64, AVERAGE), + COUNTABLE(PERF_HLSQ_VS_BATCH_COUNT_ZERO, UINT64, AVERAGE), + COUNTABLE(PERF_HLSQ_WAVE_PENDING_NO_QUAD, UINT64, AVERAGE), + COUNTABLE(PERF_HLSQ_WAVE_PENDING_NO_PRIM_BASE, UINT64, AVERAGE), + COUNTABLE(PERF_HLSQ_STALL_CYCLES_VPC, UINT64, AVERAGE), + COUNTABLE(PERF_HLSQ_PIXELS, UINT64, AVERAGE), + COUNTABLE(PERF_HLSQ_DRAW_MODE_SWITCH_VSFS_SYNC, UINT64, AVERAGE), +}; + +static const struct fd_perfcntr_counter pc_counters[] = { + COUNTER(PC_PERFCTR_PC_SEL_0, RBBM_PERFCTR_PC_0_LO, RBBM_PERFCTR_PC_0_HI), + COUNTER(PC_PERFCTR_PC_SEL_1, RBBM_PERFCTR_PC_1_LO, RBBM_PERFCTR_PC_1_HI), + COUNTER(PC_PERFCTR_PC_SEL_2, RBBM_PERFCTR_PC_2_LO, RBBM_PERFCTR_PC_2_HI), + COUNTER(PC_PERFCTR_PC_SEL_3, RBBM_PERFCTR_PC_3_LO, RBBM_PERFCTR_PC_3_HI), + COUNTER(PC_PERFCTR_PC_SEL_4, RBBM_PERFCTR_PC_4_LO, RBBM_PERFCTR_PC_4_HI), + COUNTER(PC_PERFCTR_PC_SEL_5, RBBM_PERFCTR_PC_5_LO, RBBM_PERFCTR_PC_5_HI), + COUNTER(PC_PERFCTR_PC_SEL_6, RBBM_PERFCTR_PC_6_LO, RBBM_PERFCTR_PC_6_HI), + COUNTER(PC_PERFCTR_PC_SEL_7, RBBM_PERFCTR_PC_7_LO, RBBM_PERFCTR_PC_7_HI), +}; + +static const struct fd_perfcntr_countable pc_countables[] = { + COUNTABLE(PERF_PC_BUSY_CYCLES, UINT64, AVERAGE), + COUNTABLE(PERF_PC_WORKING_CYCLES, UINT64, AVERAGE), + COUNTABLE(PERF_PC_STALL_CYCLES_VFD, UINT64, AVERAGE), + COUNTABLE(PERF_PC_STALL_CYCLES_TSE, UINT64, AVERAGE), + COUNTABLE(PERF_PC_STALL_CYCLES_VPC, UINT64, AVERAGE), + COUNTABLE(PERF_PC_STALL_CYCLES_UCHE, UINT64, AVERAGE), + COUNTABLE(PERF_PC_STALL_CYCLES_TESS, UINT64, AVERAGE), + COUNTABLE(PERF_PC_STALL_CYCLES_TSE_ONLY, UINT64, AVERAGE), + COUNTABLE(PERF_PC_STALL_CYCLES_VPC_ONLY, UINT64, AVERAGE), + COUNTABLE(PERF_PC_PASS1_TF_STALL_CYCLES, UINT64, AVERAGE), + COUNTABLE(PERF_PC_STARVE_CYCLES_FOR_INDEX, UINT64, AVERAGE), + COUNTABLE(PERF_PC_STARVE_CYCLES_FOR_TESS_FACTOR, UINT64, AVERAGE), + COUNTABLE(PERF_PC_STARVE_CYCLES_FOR_VIZ_STREAM, UINT64, AVERAGE), + COUNTABLE(PERF_PC_STARVE_CYCLES_FOR_POSITION, UINT64, AVERAGE), + COUNTABLE(PERF_PC_STARVE_CYCLES_DI, UINT64, AVERAGE), + COUNTABLE(PERF_PC_VIS_STREAMS_LOADED, UINT64, AVERAGE), + COUNTABLE(PERF_PC_INSTANCES, UINT64, AVERAGE), + COUNTABLE(PERF_PC_VPC_PRIMITIVES, UINT64, AVERAGE), + COUNTABLE(PERF_PC_DEAD_PRIM, UINT64, AVERAGE), + COUNTABLE(PERF_PC_LIVE_PRIM, UINT64, AVERAGE), + COUNTABLE(PERF_PC_VERTEX_HITS, UINT64, AVERAGE), + COUNTABLE(PERF_PC_IA_VERTICES, UINT64, AVERAGE), + COUNTABLE(PERF_PC_IA_PRIMITIVES, UINT64, AVERAGE), + COUNTABLE(PERF_PC_GS_PRIMITIVES, UINT64, AVERAGE), + COUNTABLE(PERF_PC_HS_INVOCATIONS, UINT64, AVERAGE), + COUNTABLE(PERF_PC_DS_INVOCATIONS, UINT64, AVERAGE), + COUNTABLE(PERF_PC_VS_INVOCATIONS, UINT64, AVERAGE), + COUNTABLE(PERF_PC_GS_INVOCATIONS, UINT64, AVERAGE), + COUNTABLE(PERF_PC_DS_PRIMITIVES, UINT64, AVERAGE), + COUNTABLE(PERF_PC_VPC_POS_DATA_TRANSACTION, UINT64, AVERAGE), + COUNTABLE(PERF_PC_3D_DRAWCALLS, UINT64, AVERAGE), + COUNTABLE(PERF_PC_2D_DRAWCALLS, UINT64, AVERAGE), + COUNTABLE(PERF_PC_NON_DRAWCALL_GLOBAL_EVENTS, UINT64, AVERAGE), + COUNTABLE(PERF_TESS_BUSY_CYCLES, UINT64, AVERAGE), + COUNTABLE(PERF_TESS_WORKING_CYCLES, UINT64, AVERAGE), + COUNTABLE(PERF_TESS_STALL_CYCLES_PC, UINT64, AVERAGE), + COUNTABLE(PERF_TESS_STARVE_CYCLES_PC, UINT64, AVERAGE), + COUNTABLE(PERF_PC_TSE_TRANSACTION, UINT64, AVERAGE), + COUNTABLE(PERF_PC_TSE_VERTEX, UINT64, AVERAGE), + COUNTABLE(PERF_PC_TESS_PC_UV_TRANS, UINT64, AVERAGE), + COUNTABLE(PERF_PC_TESS_PC_UV_PATCHES, UINT64, AVERAGE), + COUNTABLE(PERF_PC_TESS_FACTOR_TRANS, UINT64, AVERAGE), +}; + +static const struct fd_perfcntr_counter rb_counters[] = { + COUNTER(RB_PERFCTR_RB_SEL_0, RBBM_PERFCTR_RB_0_LO, RBBM_PERFCTR_RB_0_HI), + COUNTER(RB_PERFCTR_RB_SEL_1, RBBM_PERFCTR_RB_1_LO, RBBM_PERFCTR_RB_1_HI), + COUNTER(RB_PERFCTR_RB_SEL_2, RBBM_PERFCTR_RB_2_LO, RBBM_PERFCTR_RB_2_HI), + COUNTER(RB_PERFCTR_RB_SEL_3, RBBM_PERFCTR_RB_3_LO, RBBM_PERFCTR_RB_3_HI), + COUNTER(RB_PERFCTR_RB_SEL_4, RBBM_PERFCTR_RB_4_LO, RBBM_PERFCTR_RB_4_HI), + COUNTER(RB_PERFCTR_RB_SEL_5, RBBM_PERFCTR_RB_5_LO, RBBM_PERFCTR_RB_5_HI), + COUNTER(RB_PERFCTR_RB_SEL_6, RBBM_PERFCTR_RB_6_LO, RBBM_PERFCTR_RB_6_HI), + COUNTER(RB_PERFCTR_RB_SEL_7, RBBM_PERFCTR_RB_7_LO, RBBM_PERFCTR_RB_7_HI), +}; + +static const struct fd_perfcntr_countable rb_countables[] = { + COUNTABLE(PERF_RB_BUSY_CYCLES, UINT64, AVERAGE), + COUNTABLE(PERF_RB_STALL_CYCLES_HLSQ, UINT64, AVERAGE), + COUNTABLE(PERF_RB_STALL_CYCLES_FIFO0_FULL, UINT64, AVERAGE), + COUNTABLE(PERF_RB_STALL_CYCLES_FIFO1_FULL, UINT64, AVERAGE), + COUNTABLE(PERF_RB_STALL_CYCLES_FIFO2_FULL, UINT64, AVERAGE), + COUNTABLE(PERF_RB_STARVE_CYCLES_SP, UINT64, AVERAGE), + COUNTABLE(PERF_RB_STARVE_CYCLES_LRZ_TILE, UINT64, AVERAGE), + COUNTABLE(PERF_RB_STARVE_CYCLES_CCU, UINT64, AVERAGE), + COUNTABLE(PERF_RB_STARVE_CYCLES_Z_PLANE, UINT64, AVERAGE), + COUNTABLE(PERF_RB_STARVE_CYCLES_BARY_PLANE, UINT64, AVERAGE), + COUNTABLE(PERF_RB_Z_WORKLOAD, UINT64, AVERAGE), + COUNTABLE(PERF_RB_HLSQ_ACTIVE, UINT64, AVERAGE), + COUNTABLE(PERF_RB_Z_READ, UINT64, AVERAGE), + COUNTABLE(PERF_RB_Z_WRITE, UINT64, AVERAGE), + COUNTABLE(PERF_RB_C_READ, UINT64, AVERAGE), + COUNTABLE(PERF_RB_C_WRITE, UINT64, AVERAGE), + COUNTABLE(PERF_RB_TOTAL_PASS, UINT64, AVERAGE), + COUNTABLE(PERF_RB_Z_PASS, UINT64, AVERAGE), + COUNTABLE(PERF_RB_Z_FAIL, UINT64, AVERAGE), + COUNTABLE(PERF_RB_S_FAIL, UINT64, AVERAGE), + COUNTABLE(PERF_RB_BLENDED_FXP_COMPONENTS, UINT64, AVERAGE), + COUNTABLE(PERF_RB_BLENDED_FP16_COMPONENTS, UINT64, AVERAGE), + COUNTABLE(PERF_RB_PS_INVOCATIONS, UINT64, AVERAGE), + COUNTABLE(PERF_RB_2D_ALIVE_CYCLES, UINT64, AVERAGE), + COUNTABLE(PERF_RB_2D_STALL_CYCLES_A2D, UINT64, AVERAGE), + COUNTABLE(PERF_RB_2D_STARVE_CYCLES_SRC, UINT64, AVERAGE), + COUNTABLE(PERF_RB_2D_STARVE_CYCLES_SP, UINT64, AVERAGE), + COUNTABLE(PERF_RB_2D_STARVE_CYCLES_DST, UINT64, AVERAGE), + COUNTABLE(PERF_RB_2D_VALID_PIXELS, UINT64, AVERAGE), + COUNTABLE(PERF_RB_3D_PIXELS, UINT64, AVERAGE), + COUNTABLE(PERF_RB_BLENDER_WORKING_CYCLES, UINT64, AVERAGE), + COUNTABLE(PERF_RB_ZPROC_WORKING_CYCLES, UINT64, AVERAGE), + COUNTABLE(PERF_RB_CPROC_WORKING_CYCLES, UINT64, AVERAGE), + COUNTABLE(PERF_RB_SAMPLER_WORKING_CYCLES, UINT64, AVERAGE), + COUNTABLE(PERF_RB_STALL_CYCLES_CCU_COLOR_READ, UINT64, AVERAGE), + COUNTABLE(PERF_RB_STALL_CYCLES_CCU_COLOR_WRITE, UINT64, AVERAGE), + COUNTABLE(PERF_RB_STALL_CYCLES_CCU_DEPTH_READ, UINT64, AVERAGE), + COUNTABLE(PERF_RB_STALL_CYCLES_CCU_DEPTH_WRITE, UINT64, AVERAGE), + COUNTABLE(PERF_RB_STALL_CYCLES_VPC, UINT64, AVERAGE), + COUNTABLE(PERF_RB_2D_INPUT_TRANS, UINT64, AVERAGE), + COUNTABLE(PERF_RB_2D_OUTPUT_RB_DST_TRANS, UINT64, AVERAGE), + COUNTABLE(PERF_RB_2D_OUTPUT_RB_SRC_TRANS, UINT64, AVERAGE), + COUNTABLE(PERF_RB_BLENDED_FP32_COMPONENTS, UINT64, AVERAGE), + COUNTABLE(PERF_RB_COLOR_PIX_TILES, UINT64, AVERAGE), + COUNTABLE(PERF_RB_STALL_CYCLES_CCU, UINT64, AVERAGE), + COUNTABLE(PERF_RB_EARLY_Z_ARB3_GRANT, UINT64, AVERAGE), + COUNTABLE(PERF_RB_LATE_Z_ARB3_GRANT, UINT64, AVERAGE), + COUNTABLE(PERF_RB_EARLY_Z_SKIP_GRANT, UINT64, AVERAGE), +}; + +static const struct fd_perfcntr_counter rbbm_counters[] = { +//RESERVED: for kernel +// COUNTER(RBBM_PERFCTR_RBBM_SEL_0, RBBM_PERFCTR_RBBM_0_LO, RBBM_PERFCTR_RBBM_0_HI), + COUNTER(RBBM_PERFCTR_RBBM_SEL_1, RBBM_PERFCTR_RBBM_1_LO, RBBM_PERFCTR_RBBM_1_HI), + COUNTER(RBBM_PERFCTR_RBBM_SEL_2, RBBM_PERFCTR_RBBM_2_LO, RBBM_PERFCTR_RBBM_2_HI), + COUNTER(RBBM_PERFCTR_RBBM_SEL_3, RBBM_PERFCTR_RBBM_3_LO, RBBM_PERFCTR_RBBM_3_HI), +}; + +static const struct fd_perfcntr_countable rbbm_countables[] = { + COUNTABLE(PERF_RBBM_ALWAYS_COUNT, UINT64, AVERAGE), + COUNTABLE(PERF_RBBM_ALWAYS_ON, UINT64, AVERAGE), + COUNTABLE(PERF_RBBM_TSE_BUSY, UINT64, AVERAGE), + COUNTABLE(PERF_RBBM_RAS_BUSY, UINT64, AVERAGE), + COUNTABLE(PERF_RBBM_PC_DCALL_BUSY, UINT64, AVERAGE), + COUNTABLE(PERF_RBBM_PC_VSD_BUSY, UINT64, AVERAGE), + COUNTABLE(PERF_RBBM_STATUS_MASKED, UINT64, AVERAGE), + COUNTABLE(PERF_RBBM_COM_BUSY, UINT64, AVERAGE), + COUNTABLE(PERF_RBBM_DCOM_BUSY, UINT64, AVERAGE), + COUNTABLE(PERF_RBBM_VBIF_BUSY, UINT64, AVERAGE), + COUNTABLE(PERF_RBBM_VSC_BUSY, UINT64, AVERAGE), + COUNTABLE(PERF_RBBM_TESS_BUSY, UINT64, AVERAGE), + COUNTABLE(PERF_RBBM_UCHE_BUSY, UINT64, AVERAGE), + COUNTABLE(PERF_RBBM_HLSQ_BUSY, UINT64, AVERAGE), +}; + +static const struct fd_perfcntr_counter sp_counters[] = { +//RESERVED: for kernel +// COUNTER(SP_PERFCTR_SP_SEL_0, RBBM_PERFCTR_SP_0_LO, RBBM_PERFCTR_SP_0_HI), + COUNTER(SP_PERFCTR_SP_SEL_1, RBBM_PERFCTR_SP_1_LO, RBBM_PERFCTR_SP_1_HI), + COUNTER(SP_PERFCTR_SP_SEL_2, RBBM_PERFCTR_SP_2_LO, RBBM_PERFCTR_SP_2_HI), + COUNTER(SP_PERFCTR_SP_SEL_3, RBBM_PERFCTR_SP_3_LO, RBBM_PERFCTR_SP_3_HI), + COUNTER(SP_PERFCTR_SP_SEL_4, RBBM_PERFCTR_SP_4_LO, RBBM_PERFCTR_SP_4_HI), + COUNTER(SP_PERFCTR_SP_SEL_5, RBBM_PERFCTR_SP_5_LO, RBBM_PERFCTR_SP_5_HI), + COUNTER(SP_PERFCTR_SP_SEL_6, RBBM_PERFCTR_SP_6_LO, RBBM_PERFCTR_SP_6_HI), + COUNTER(SP_PERFCTR_SP_SEL_7, RBBM_PERFCTR_SP_7_LO, RBBM_PERFCTR_SP_7_HI), + COUNTER(SP_PERFCTR_SP_SEL_8, RBBM_PERFCTR_SP_8_LO, RBBM_PERFCTR_SP_8_HI), + COUNTER(SP_PERFCTR_SP_SEL_9, RBBM_PERFCTR_SP_9_LO, RBBM_PERFCTR_SP_9_HI), + COUNTER(SP_PERFCTR_SP_SEL_10, RBBM_PERFCTR_SP_10_LO, RBBM_PERFCTR_SP_10_HI), + COUNTER(SP_PERFCTR_SP_SEL_11, RBBM_PERFCTR_SP_11_LO, RBBM_PERFCTR_SP_11_HI), + COUNTER(SP_PERFCTR_SP_SEL_12, RBBM_PERFCTR_SP_12_LO, RBBM_PERFCTR_SP_12_HI), + COUNTER(SP_PERFCTR_SP_SEL_13, RBBM_PERFCTR_SP_13_LO, RBBM_PERFCTR_SP_13_HI), + COUNTER(SP_PERFCTR_SP_SEL_14, RBBM_PERFCTR_SP_14_LO, RBBM_PERFCTR_SP_14_HI), + COUNTER(SP_PERFCTR_SP_SEL_15, RBBM_PERFCTR_SP_15_LO, RBBM_PERFCTR_SP_15_HI), + COUNTER(SP_PERFCTR_SP_SEL_16, RBBM_PERFCTR_SP_16_LO, RBBM_PERFCTR_SP_16_HI), + COUNTER(SP_PERFCTR_SP_SEL_17, RBBM_PERFCTR_SP_17_LO, RBBM_PERFCTR_SP_17_HI), + COUNTER(SP_PERFCTR_SP_SEL_18, RBBM_PERFCTR_SP_18_LO, RBBM_PERFCTR_SP_18_HI), + COUNTER(SP_PERFCTR_SP_SEL_19, RBBM_PERFCTR_SP_19_LO, RBBM_PERFCTR_SP_19_HI), + COUNTER(SP_PERFCTR_SP_SEL_20, RBBM_PERFCTR_SP_20_LO, RBBM_PERFCTR_SP_20_HI), + COUNTER(SP_PERFCTR_SP_SEL_21, RBBM_PERFCTR_SP_21_LO, RBBM_PERFCTR_SP_21_HI), + COUNTER(SP_PERFCTR_SP_SEL_22, RBBM_PERFCTR_SP_22_LO, RBBM_PERFCTR_SP_22_HI), + COUNTER(SP_PERFCTR_SP_SEL_23, RBBM_PERFCTR_SP_23_LO, RBBM_PERFCTR_SP_23_HI), +}; + +static const struct fd_perfcntr_countable sp_countables[] = { + COUNTABLE(PERF_SP_BUSY_CYCLES, UINT64, AVERAGE), + COUNTABLE(PERF_SP_ALU_WORKING_CYCLES, UINT64, AVERAGE), + COUNTABLE(PERF_SP_EFU_WORKING_CYCLES, UINT64, AVERAGE), + COUNTABLE(PERF_SP_STALL_CYCLES_VPC, UINT64, AVERAGE), + COUNTABLE(PERF_SP_STALL_CYCLES_TP, UINT64, AVERAGE), + COUNTABLE(PERF_SP_STALL_CYCLES_UCHE, UINT64, AVERAGE), + COUNTABLE(PERF_SP_STALL_CYCLES_RB, UINT64, AVERAGE), + COUNTABLE(PERF_SP_NON_EXECUTION_CYCLES, UINT64, AVERAGE), + COUNTABLE(PERF_SP_WAVE_CONTEXTS, UINT64, AVERAGE), + COUNTABLE(PERF_SP_WAVE_CONTEXT_CYCLES, UINT64, AVERAGE), + COUNTABLE(PERF_SP_FS_STAGE_WAVE_CYCLES, UINT64, AVERAGE), + COUNTABLE(PERF_SP_FS_STAGE_WAVE_SAMPLES, UINT64, AVERAGE), + COUNTABLE(PERF_SP_VS_STAGE_WAVE_CYCLES, UINT64, AVERAGE), + COUNTABLE(PERF_SP_VS_STAGE_WAVE_SAMPLES, UINT64, AVERAGE), + COUNTABLE(PERF_SP_FS_STAGE_DURATION_CYCLES, UINT64, AVERAGE), + COUNTABLE(PERF_SP_VS_STAGE_DURATION_CYCLES, UINT64, AVERAGE), + COUNTABLE(PERF_SP_WAVE_CTRL_CYCLES, UINT64, AVERAGE), + COUNTABLE(PERF_SP_WAVE_LOAD_CYCLES, UINT64, AVERAGE), + COUNTABLE(PERF_SP_WAVE_EMIT_CYCLES, UINT64, AVERAGE), + COUNTABLE(PERF_SP_WAVE_NOP_CYCLES, UINT64, AVERAGE), + COUNTABLE(PERF_SP_WAVE_WAIT_CYCLES, UINT64, AVERAGE), + COUNTABLE(PERF_SP_WAVE_FETCH_CYCLES, UINT64, AVERAGE), + COUNTABLE(PERF_SP_WAVE_IDLE_CYCLES, UINT64, AVERAGE), + COUNTABLE(PERF_SP_WAVE_END_CYCLES, UINT64, AVERAGE), + COUNTABLE(PERF_SP_WAVE_LONG_SYNC_CYCLES, UINT64, AVERAGE), + COUNTABLE(PERF_SP_WAVE_SHORT_SYNC_CYCLES, UINT64, AVERAGE), + COUNTABLE(PERF_SP_WAVE_JOIN_CYCLES, UINT64, AVERAGE), + COUNTABLE(PERF_SP_LM_LOAD_INSTRUCTIONS, UINT64, AVERAGE), + COUNTABLE(PERF_SP_LM_STORE_INSTRUCTIONS, UINT64, AVERAGE), + COUNTABLE(PERF_SP_LM_ATOMICS, UINT64, AVERAGE), + COUNTABLE(PERF_SP_GM_LOAD_INSTRUCTIONS, UINT64, AVERAGE), + COUNTABLE(PERF_SP_GM_STORE_INSTRUCTIONS, UINT64, AVERAGE), + COUNTABLE(PERF_SP_GM_ATOMICS, UINT64, AVERAGE), + COUNTABLE(PERF_SP_VS_STAGE_TEX_INSTRUCTIONS, UINT64, AVERAGE), + COUNTABLE(PERF_SP_VS_STAGE_EFU_INSTRUCTIONS, UINT64, AVERAGE), + COUNTABLE(PERF_SP_VS_STAGE_FULL_ALU_INSTRUCTIONS, UINT64, AVERAGE), + COUNTABLE(PERF_SP_VS_STAGE_HALF_ALU_INSTRUCTIONS, UINT64, AVERAGE), + COUNTABLE(PERF_SP_FS_STAGE_TEX_INSTRUCTIONS, UINT64, AVERAGE), + COUNTABLE(PERF_SP_FS_STAGE_CFLOW_INSTRUCTIONS, UINT64, AVERAGE), + COUNTABLE(PERF_SP_FS_STAGE_EFU_INSTRUCTIONS, UINT64, AVERAGE), + COUNTABLE(PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS, UINT64, AVERAGE), + COUNTABLE(PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS, UINT64, AVERAGE), + COUNTABLE(PERF_SP_FS_STAGE_BARY_INSTRUCTIONS, UINT64, AVERAGE), + COUNTABLE(PERF_SP_VS_INSTRUCTIONS, UINT64, AVERAGE), + COUNTABLE(PERF_SP_FS_INSTRUCTIONS, UINT64, AVERAGE), + COUNTABLE(PERF_SP_ADDR_LOCK_COUNT, UINT64, AVERAGE), + COUNTABLE(PERF_SP_UCHE_READ_TRANS, UINT64, AVERAGE), + COUNTABLE(PERF_SP_UCHE_WRITE_TRANS, UINT64, AVERAGE), + COUNTABLE(PERF_SP_EXPORT_VPC_TRANS, UINT64, AVERAGE), + COUNTABLE(PERF_SP_EXPORT_RB_TRANS, UINT64, AVERAGE), + COUNTABLE(PERF_SP_PIXELS_KILLED, UINT64, AVERAGE), + COUNTABLE(PERF_SP_ICL1_REQUESTS, UINT64, AVERAGE), + COUNTABLE(PERF_SP_ICL1_MISSES, UINT64, AVERAGE), + COUNTABLE(PERF_SP_HS_INSTRUCTIONS, UINT64, AVERAGE), + COUNTABLE(PERF_SP_DS_INSTRUCTIONS, UINT64, AVERAGE), + COUNTABLE(PERF_SP_GS_INSTRUCTIONS, UINT64, AVERAGE), + COUNTABLE(PERF_SP_CS_INSTRUCTIONS, UINT64, AVERAGE), + COUNTABLE(PERF_SP_GPR_READ, UINT64, AVERAGE), + COUNTABLE(PERF_SP_GPR_WRITE, UINT64, AVERAGE), + COUNTABLE(PERF_SP_FS_STAGE_HALF_EFU_INSTRUCTIONS, UINT64, AVERAGE), + COUNTABLE(PERF_SP_VS_STAGE_HALF_EFU_INSTRUCTIONS, UINT64, AVERAGE), + COUNTABLE(PERF_SP_LM_BANK_CONFLICTS, UINT64, AVERAGE), + COUNTABLE(PERF_SP_TEX_CONTROL_WORKING_CYCLES, UINT64, AVERAGE), + COUNTABLE(PERF_SP_LOAD_CONTROL_WORKING_CYCLES, UINT64, AVERAGE), + COUNTABLE(PERF_SP_FLOW_CONTROL_WORKING_CYCLES, UINT64, AVERAGE), + COUNTABLE(PERF_SP_LM_WORKING_CYCLES, UINT64, AVERAGE), + COUNTABLE(PERF_SP_DISPATCHER_WORKING_CYCLES, UINT64, AVERAGE), + COUNTABLE(PERF_SP_SEQUENCER_WORKING_CYCLES, UINT64, AVERAGE), + COUNTABLE(PERF_SP_LOW_EFFICIENCY_STARVED_BY_TP, UINT64, AVERAGE), + COUNTABLE(PERF_SP_STARVE_CYCLES_HLSQ, UINT64, AVERAGE), + COUNTABLE(PERF_SP_NON_EXECUTION_LS_CYCLES, UINT64, AVERAGE), + COUNTABLE(PERF_SP_WORKING_EU, UINT64, AVERAGE), + COUNTABLE(PERF_SP_ANY_EU_WORKING, UINT64, AVERAGE), + COUNTABLE(PERF_SP_WORKING_EU_FS_STAGE, UINT64, AVERAGE), + COUNTABLE(PERF_SP_ANY_EU_WORKING_FS_STAGE, UINT64, AVERAGE), + COUNTABLE(PERF_SP_WORKING_EU_VS_STAGE, UINT64, AVERAGE), + COUNTABLE(PERF_SP_ANY_EU_WORKING_VS_STAGE, UINT64, AVERAGE), + COUNTABLE(PERF_SP_WORKING_EU_CS_STAGE, UINT64, AVERAGE), + COUNTABLE(PERF_SP_ANY_EU_WORKING_CS_STAGE, UINT64, AVERAGE), + COUNTABLE(PERF_SP_GPR_READ_PREFETCH, UINT64, AVERAGE), + COUNTABLE(PERF_SP_GPR_READ_CONFLICT, UINT64, AVERAGE), + COUNTABLE(PERF_SP_GPR_WRITE_CONFLICT, UINT64, AVERAGE), + COUNTABLE(PERF_SP_GM_LOAD_LATENCY_CYCLES, UINT64, AVERAGE), + COUNTABLE(PERF_SP_GM_LOAD_LATENCY_SAMPLES, UINT64, AVERAGE), + COUNTABLE(PERF_SP_EXECUTABLE_WAVES, UINT64, AVERAGE), +}; + +static const struct fd_perfcntr_counter tp_counters[] = { + COUNTER(TPL1_PERFCTR_TP_SEL_0, RBBM_PERFCTR_TP_0_LO, RBBM_PERFCTR_TP_0_HI), + COUNTER(TPL1_PERFCTR_TP_SEL_1, RBBM_PERFCTR_TP_1_LO, RBBM_PERFCTR_TP_1_HI), + COUNTER(TPL1_PERFCTR_TP_SEL_2, RBBM_PERFCTR_TP_2_LO, RBBM_PERFCTR_TP_2_HI), + COUNTER(TPL1_PERFCTR_TP_SEL_3, RBBM_PERFCTR_TP_3_LO, RBBM_PERFCTR_TP_3_HI), + COUNTER(TPL1_PERFCTR_TP_SEL_4, RBBM_PERFCTR_TP_4_LO, RBBM_PERFCTR_TP_4_HI), + COUNTER(TPL1_PERFCTR_TP_SEL_5, RBBM_PERFCTR_TP_5_LO, RBBM_PERFCTR_TP_5_HI), + COUNTER(TPL1_PERFCTR_TP_SEL_6, RBBM_PERFCTR_TP_6_LO, RBBM_PERFCTR_TP_6_HI), + COUNTER(TPL1_PERFCTR_TP_SEL_7, RBBM_PERFCTR_TP_7_LO, RBBM_PERFCTR_TP_7_HI), + COUNTER(TPL1_PERFCTR_TP_SEL_8, RBBM_PERFCTR_TP_8_LO, RBBM_PERFCTR_TP_8_HI), + COUNTER(TPL1_PERFCTR_TP_SEL_9, RBBM_PERFCTR_TP_9_LO, RBBM_PERFCTR_TP_9_HI), + COUNTER(TPL1_PERFCTR_TP_SEL_10, RBBM_PERFCTR_TP_10_LO, RBBM_PERFCTR_TP_10_HI), + COUNTER(TPL1_PERFCTR_TP_SEL_11, RBBM_PERFCTR_TP_11_LO, RBBM_PERFCTR_TP_11_HI), +}; + +static const struct fd_perfcntr_countable tp_countables[] = { + COUNTABLE(PERF_TP_BUSY_CYCLES, UINT64, AVERAGE), + COUNTABLE(PERF_TP_STALL_CYCLES_UCHE, UINT64, AVERAGE), + COUNTABLE(PERF_TP_LATENCY_CYCLES, UINT64, AVERAGE), + COUNTABLE(PERF_TP_LATENCY_TRANS, UINT64, AVERAGE), + COUNTABLE(PERF_TP_FLAG_CACHE_REQUEST_SAMPLES, UINT64, AVERAGE), + COUNTABLE(PERF_TP_FLAG_CACHE_REQUEST_LATENCY, UINT64, AVERAGE), + COUNTABLE(PERF_TP_L1_CACHELINE_REQUESTS, UINT64, AVERAGE), + COUNTABLE(PERF_TP_L1_CACHELINE_MISSES, UINT64, AVERAGE), + COUNTABLE(PERF_TP_SP_TP_TRANS, UINT64, AVERAGE), + COUNTABLE(PERF_TP_TP_SP_TRANS, UINT64, AVERAGE), + COUNTABLE(PERF_TP_OUTPUT_PIXELS, UINT64, AVERAGE), + COUNTABLE(PERF_TP_FILTER_WORKLOAD_16BIT, UINT64, AVERAGE), + COUNTABLE(PERF_TP_FILTER_WORKLOAD_32BIT, UINT64, AVERAGE), + COUNTABLE(PERF_TP_QUADS_RECEIVED, UINT64, AVERAGE), + COUNTABLE(PERF_TP_QUADS_OFFSET, UINT64, AVERAGE), + COUNTABLE(PERF_TP_QUADS_SHADOW, UINT64, AVERAGE), + COUNTABLE(PERF_TP_QUADS_ARRAY, UINT64, AVERAGE), + COUNTABLE(PERF_TP_QUADS_GRADIENT, UINT64, AVERAGE), + COUNTABLE(PERF_TP_QUADS_1D, UINT64, AVERAGE), + COUNTABLE(PERF_TP_QUADS_2D, UINT64, AVERAGE), + COUNTABLE(PERF_TP_QUADS_BUFFER, UINT64, AVERAGE), + COUNTABLE(PERF_TP_QUADS_3D, UINT64, AVERAGE), + COUNTABLE(PERF_TP_QUADS_CUBE, UINT64, AVERAGE), + COUNTABLE(PERF_TP_DIVERGENT_QUADS_RECEIVED, UINT64, AVERAGE), + COUNTABLE(PERF_TP_PRT_NON_RESIDENT_EVENTS, UINT64, AVERAGE), + COUNTABLE(PERF_TP_OUTPUT_PIXELS_POINT, UINT64, AVERAGE), + COUNTABLE(PERF_TP_OUTPUT_PIXELS_BILINEAR, UINT64, AVERAGE), + COUNTABLE(PERF_TP_OUTPUT_PIXELS_MIP, UINT64, AVERAGE), + COUNTABLE(PERF_TP_OUTPUT_PIXELS_ANISO, UINT64, AVERAGE), + COUNTABLE(PERF_TP_OUTPUT_PIXELS_ZERO_LOD, UINT64, AVERAGE), + COUNTABLE(PERF_TP_FLAG_CACHE_REQUESTS, UINT64, AVERAGE), + COUNTABLE(PERF_TP_FLAG_CACHE_MISSES, UINT64, AVERAGE), + COUNTABLE(PERF_TP_L1_5_L2_REQUESTS, UINT64, AVERAGE), + COUNTABLE(PERF_TP_2D_OUTPUT_PIXELS, UINT64, AVERAGE), + COUNTABLE(PERF_TP_2D_OUTPUT_PIXELS_POINT, UINT64, AVERAGE), + COUNTABLE(PERF_TP_2D_OUTPUT_PIXELS_BILINEAR, UINT64, AVERAGE), + COUNTABLE(PERF_TP_2D_FILTER_WORKLOAD_16BIT, UINT64, AVERAGE), + COUNTABLE(PERF_TP_2D_FILTER_WORKLOAD_32BIT, UINT64, AVERAGE), + COUNTABLE(PERF_TP_TPA2TPC_TRANS, UINT64, AVERAGE), + COUNTABLE(PERF_TP_L1_MISSES_ASTC_1TILE, UINT64, AVERAGE), + COUNTABLE(PERF_TP_L1_MISSES_ASTC_2TILE, UINT64, AVERAGE), + COUNTABLE(PERF_TP_L1_MISSES_ASTC_4TILE, UINT64, AVERAGE), + COUNTABLE(PERF_TP_L1_5_L2_COMPRESS_REQS, UINT64, AVERAGE), + COUNTABLE(PERF_TP_L1_5_L2_COMPRESS_MISS, UINT64, AVERAGE), + COUNTABLE(PERF_TP_L1_BANK_CONFLICT, UINT64, AVERAGE), + COUNTABLE(PERF_TP_L1_5_MISS_LATENCY_CYCLES, UINT64, AVERAGE), + COUNTABLE(PERF_TP_L1_5_MISS_LATENCY_TRANS, UINT64, AVERAGE), + COUNTABLE(PERF_TP_QUADS_CONSTANT_MULTIPLIED, UINT64, AVERAGE), + COUNTABLE(PERF_TP_FRONTEND_WORKING_CYCLES, UINT64, AVERAGE), + COUNTABLE(PERF_TP_L1_TAG_WORKING_CYCLES, UINT64, AVERAGE), + COUNTABLE(PERF_TP_L1_DATA_WRITE_WORKING_CYCLES, UINT64, AVERAGE), + COUNTABLE(PERF_TP_PRE_L1_DECOM_WORKING_CYCLES, UINT64, AVERAGE), + COUNTABLE(PERF_TP_BACKEND_WORKING_CYCLES, UINT64, AVERAGE), + COUNTABLE(PERF_TP_FLAG_CACHE_WORKING_CYCLES, UINT64, AVERAGE), + COUNTABLE(PERF_TP_L1_5_CACHE_WORKING_CYCLES, UINT64, AVERAGE), + COUNTABLE(PERF_TP_STARVE_CYCLES_SP, UINT64, AVERAGE), + COUNTABLE(PERF_TP_STARVE_CYCLES_UCHE, UINT64, AVERAGE), +}; + +static const struct fd_perfcntr_counter uche_counters[] = { + COUNTER(UCHE_PERFCTR_UCHE_SEL_0, RBBM_PERFCTR_UCHE_0_LO, RBBM_PERFCTR_UCHE_0_HI), + COUNTER(UCHE_PERFCTR_UCHE_SEL_1, RBBM_PERFCTR_UCHE_1_LO, RBBM_PERFCTR_UCHE_1_HI), + COUNTER(UCHE_PERFCTR_UCHE_SEL_2, RBBM_PERFCTR_UCHE_2_LO, RBBM_PERFCTR_UCHE_2_HI), + COUNTER(UCHE_PERFCTR_UCHE_SEL_3, RBBM_PERFCTR_UCHE_3_LO, RBBM_PERFCTR_UCHE_3_HI), + COUNTER(UCHE_PERFCTR_UCHE_SEL_4, RBBM_PERFCTR_UCHE_4_LO, RBBM_PERFCTR_UCHE_4_HI), + COUNTER(UCHE_PERFCTR_UCHE_SEL_5, RBBM_PERFCTR_UCHE_5_LO, RBBM_PERFCTR_UCHE_5_HI), + COUNTER(UCHE_PERFCTR_UCHE_SEL_6, RBBM_PERFCTR_UCHE_6_LO, RBBM_PERFCTR_UCHE_6_HI), + COUNTER(UCHE_PERFCTR_UCHE_SEL_7, RBBM_PERFCTR_UCHE_7_LO, RBBM_PERFCTR_UCHE_7_HI), + COUNTER(UCHE_PERFCTR_UCHE_SEL_8, RBBM_PERFCTR_UCHE_8_LO, RBBM_PERFCTR_UCHE_8_HI), + COUNTER(UCHE_PERFCTR_UCHE_SEL_9, RBBM_PERFCTR_UCHE_9_LO, RBBM_PERFCTR_UCHE_9_HI), + COUNTER(UCHE_PERFCTR_UCHE_SEL_10, RBBM_PERFCTR_UCHE_10_LO, RBBM_PERFCTR_UCHE_10_HI), + COUNTER(UCHE_PERFCTR_UCHE_SEL_11, RBBM_PERFCTR_UCHE_11_LO, RBBM_PERFCTR_UCHE_11_HI), +}; + +static const struct fd_perfcntr_countable uche_countables[] = { + COUNTABLE(PERF_UCHE_BUSY_CYCLES, UINT64, AVERAGE), + COUNTABLE(PERF_UCHE_STALL_CYCLES_ARBITER, UINT64, AVERAGE), + COUNTABLE(PERF_UCHE_VBIF_LATENCY_CYCLES, UINT64, AVERAGE), + COUNTABLE(PERF_UCHE_VBIF_LATENCY_SAMPLES, UINT64, AVERAGE), + COUNTABLE(PERF_UCHE_VBIF_READ_BEATS_TP, UINT64, AVERAGE), + COUNTABLE(PERF_UCHE_VBIF_READ_BEATS_VFD, UINT64, AVERAGE), + COUNTABLE(PERF_UCHE_VBIF_READ_BEATS_HLSQ, UINT64, AVERAGE), + COUNTABLE(PERF_UCHE_VBIF_READ_BEATS_LRZ, UINT64, AVERAGE), + COUNTABLE(PERF_UCHE_VBIF_READ_BEATS_SP, UINT64, AVERAGE), + COUNTABLE(PERF_UCHE_READ_REQUESTS_TP, UINT64, AVERAGE), + COUNTABLE(PERF_UCHE_READ_REQUESTS_VFD, UINT64, AVERAGE), + COUNTABLE(PERF_UCHE_READ_REQUESTS_HLSQ, UINT64, AVERAGE), + COUNTABLE(PERF_UCHE_READ_REQUESTS_LRZ, UINT64, AVERAGE), + COUNTABLE(PERF_UCHE_READ_REQUESTS_SP, UINT64, AVERAGE), + COUNTABLE(PERF_UCHE_WRITE_REQUESTS_LRZ, UINT64, AVERAGE), + COUNTABLE(PERF_UCHE_WRITE_REQUESTS_SP, UINT64, AVERAGE), + COUNTABLE(PERF_UCHE_WRITE_REQUESTS_VPC, UINT64, AVERAGE), + COUNTABLE(PERF_UCHE_WRITE_REQUESTS_VSC, UINT64, AVERAGE), + COUNTABLE(PERF_UCHE_EVICTS, UINT64, AVERAGE), + COUNTABLE(PERF_UCHE_BANK_REQ0, UINT64, AVERAGE), + COUNTABLE(PERF_UCHE_BANK_REQ1, UINT64, AVERAGE), + COUNTABLE(PERF_UCHE_BANK_REQ2, UINT64, AVERAGE), + COUNTABLE(PERF_UCHE_BANK_REQ3, UINT64, AVERAGE), + COUNTABLE(PERF_UCHE_BANK_REQ4, UINT64, AVERAGE), + COUNTABLE(PERF_UCHE_BANK_REQ5, UINT64, AVERAGE), + COUNTABLE(PERF_UCHE_BANK_REQ6, UINT64, AVERAGE), + COUNTABLE(PERF_UCHE_BANK_REQ7, UINT64, AVERAGE), + COUNTABLE(PERF_UCHE_VBIF_READ_BEATS_CH0, UINT64, AVERAGE), + COUNTABLE(PERF_UCHE_VBIF_READ_BEATS_CH1, UINT64, AVERAGE), + COUNTABLE(PERF_UCHE_GMEM_READ_BEATS, UINT64, AVERAGE), + COUNTABLE(PERF_UCHE_TPH_REF_FULL, UINT64, AVERAGE), + COUNTABLE(PERF_UCHE_TPH_VICTIM_FULL, UINT64, AVERAGE), + COUNTABLE(PERF_UCHE_TPH_EXT_FULL, UINT64, AVERAGE), + COUNTABLE(PERF_UCHE_VBIF_STALL_WRITE_DATA, UINT64, AVERAGE), + COUNTABLE(PERF_UCHE_DCMP_LATENCY_SAMPLES, UINT64, AVERAGE), + COUNTABLE(PERF_UCHE_DCMP_LATENCY_CYCLES, UINT64, AVERAGE), + COUNTABLE(PERF_UCHE_VBIF_READ_BEATS_PC, UINT64, AVERAGE), + COUNTABLE(PERF_UCHE_READ_REQUESTS_PC, UINT64, AVERAGE), + COUNTABLE(PERF_UCHE_RAM_READ_REQ, UINT64, AVERAGE), + COUNTABLE(PERF_UCHE_RAM_WRITE_REQ, UINT64, AVERAGE), +}; + +static const struct fd_perfcntr_counter vfd_counters[] = { + COUNTER(VFD_PERFCTR_VFD_SEL_0, RBBM_PERFCTR_VFD_0_LO, RBBM_PERFCTR_VFD_0_HI), + COUNTER(VFD_PERFCTR_VFD_SEL_1, RBBM_PERFCTR_VFD_1_LO, RBBM_PERFCTR_VFD_1_HI), + COUNTER(VFD_PERFCTR_VFD_SEL_2, RBBM_PERFCTR_VFD_2_LO, RBBM_PERFCTR_VFD_2_HI), + COUNTER(VFD_PERFCTR_VFD_SEL_3, RBBM_PERFCTR_VFD_3_LO, RBBM_PERFCTR_VFD_3_HI), + COUNTER(VFD_PERFCTR_VFD_SEL_4, RBBM_PERFCTR_VFD_4_LO, RBBM_PERFCTR_VFD_4_HI), + COUNTER(VFD_PERFCTR_VFD_SEL_5, RBBM_PERFCTR_VFD_5_LO, RBBM_PERFCTR_VFD_5_HI), + COUNTER(VFD_PERFCTR_VFD_SEL_6, RBBM_PERFCTR_VFD_6_LO, RBBM_PERFCTR_VFD_6_HI), + COUNTER(VFD_PERFCTR_VFD_SEL_7, RBBM_PERFCTR_VFD_7_LO, RBBM_PERFCTR_VFD_7_HI), +}; + +static const struct fd_perfcntr_countable vfd_countables[] = { + COUNTABLE(PERF_VFD_BUSY_CYCLES, UINT64, AVERAGE), + COUNTABLE(PERF_VFD_STALL_CYCLES_UCHE, UINT64, AVERAGE), + COUNTABLE(PERF_VFD_STALL_CYCLES_VPC_ALLOC, UINT64, AVERAGE), + COUNTABLE(PERF_VFD_STALL_CYCLES_SP_INFO, UINT64, AVERAGE), + COUNTABLE(PERF_VFD_STALL_CYCLES_SP_ATTR, UINT64, AVERAGE), + COUNTABLE(PERF_VFD_STARVE_CYCLES_UCHE, UINT64, AVERAGE), + COUNTABLE(PERF_VFD_RBUFFER_FULL, UINT64, AVERAGE), + COUNTABLE(PERF_VFD_ATTR_INFO_FIFO_FULL, UINT64, AVERAGE), + COUNTABLE(PERF_VFD_DECODED_ATTRIBUTE_BYTES, UINT64, AVERAGE), + COUNTABLE(PERF_VFD_NUM_ATTRIBUTES, UINT64, AVERAGE), + COUNTABLE(PERF_VFD_UPPER_SHADER_FIBERS, UINT64, AVERAGE), + COUNTABLE(PERF_VFD_LOWER_SHADER_FIBERS, UINT64, AVERAGE), + COUNTABLE(PERF_VFD_MODE_0_FIBERS, UINT64, AVERAGE), + COUNTABLE(PERF_VFD_MODE_1_FIBERS, UINT64, AVERAGE), + COUNTABLE(PERF_VFD_MODE_2_FIBERS, UINT64, AVERAGE), + COUNTABLE(PERF_VFD_MODE_3_FIBERS, UINT64, AVERAGE), + COUNTABLE(PERF_VFD_MODE_4_FIBERS, UINT64, AVERAGE), + COUNTABLE(PERF_VFD_TOTAL_VERTICES, UINT64, AVERAGE), + COUNTABLE(PERF_VFDP_STALL_CYCLES_VFD, UINT64, AVERAGE), + COUNTABLE(PERF_VFDP_STALL_CYCLES_VFD_INDEX, UINT64, AVERAGE), + COUNTABLE(PERF_VFDP_STALL_CYCLES_VFD_PROG, UINT64, AVERAGE), + COUNTABLE(PERF_VFDP_STARVE_CYCLES_PC, UINT64, AVERAGE), + COUNTABLE(PERF_VFDP_VS_STAGE_WAVES, UINT64, AVERAGE), +}; + +static const struct fd_perfcntr_counter vpc_counters[] = { + COUNTER(VPC_PERFCTR_VPC_SEL_0, RBBM_PERFCTR_VPC_0_LO, RBBM_PERFCTR_VPC_0_HI), + COUNTER(VPC_PERFCTR_VPC_SEL_1, RBBM_PERFCTR_VPC_1_LO, RBBM_PERFCTR_VPC_1_HI), + COUNTER(VPC_PERFCTR_VPC_SEL_2, RBBM_PERFCTR_VPC_2_LO, RBBM_PERFCTR_VPC_2_HI), + COUNTER(VPC_PERFCTR_VPC_SEL_3, RBBM_PERFCTR_VPC_3_LO, RBBM_PERFCTR_VPC_3_HI), + COUNTER(VPC_PERFCTR_VPC_SEL_4, RBBM_PERFCTR_VPC_4_LO, RBBM_PERFCTR_VPC_4_HI), + COUNTER(VPC_PERFCTR_VPC_SEL_5, RBBM_PERFCTR_VPC_5_LO, RBBM_PERFCTR_VPC_5_HI), +}; + +static const struct fd_perfcntr_countable vpc_countables[] = { + COUNTABLE(PERF_VPC_BUSY_CYCLES, UINT64, AVERAGE), + COUNTABLE(PERF_VPC_WORKING_CYCLES, UINT64, AVERAGE), + COUNTABLE(PERF_VPC_STALL_CYCLES_UCHE, UINT64, AVERAGE), + COUNTABLE(PERF_VPC_STALL_CYCLES_VFD_WACK, UINT64, AVERAGE), + COUNTABLE(PERF_VPC_STALL_CYCLES_HLSQ_PRIM_ALLOC, UINT64, AVERAGE), + COUNTABLE(PERF_VPC_STALL_CYCLES_PC, UINT64, AVERAGE), + COUNTABLE(PERF_VPC_STALL_CYCLES_SP_LM, UINT64, AVERAGE), + COUNTABLE(PERF_VPC_STARVE_CYCLES_SP, UINT64, AVERAGE), + COUNTABLE(PERF_VPC_STARVE_CYCLES_LRZ, UINT64, AVERAGE), + COUNTABLE(PERF_VPC_PC_PRIMITIVES, UINT64, AVERAGE), + COUNTABLE(PERF_VPC_SP_COMPONENTS, UINT64, AVERAGE), + COUNTABLE(PERF_VPC_STALL_CYCLES_VPCRAM_POS, UINT64, AVERAGE), + COUNTABLE(PERF_VPC_LRZ_ASSIGN_PRIMITIVES, UINT64, AVERAGE), + COUNTABLE(PERF_VPC_RB_VISIBLE_PRIMITIVES, UINT64, AVERAGE), + COUNTABLE(PERF_VPC_LM_TRANSACTION, UINT64, AVERAGE), + COUNTABLE(PERF_VPC_STREAMOUT_TRANSACTION, UINT64, AVERAGE), + COUNTABLE(PERF_VPC_VS_BUSY_CYCLES, UINT64, AVERAGE), + COUNTABLE(PERF_VPC_PS_BUSY_CYCLES, UINT64, AVERAGE), + COUNTABLE(PERF_VPC_VS_WORKING_CYCLES, UINT64, AVERAGE), + COUNTABLE(PERF_VPC_PS_WORKING_CYCLES, UINT64, AVERAGE), + COUNTABLE(PERF_VPC_STARVE_CYCLES_RB, UINT64, AVERAGE), + COUNTABLE(PERF_VPC_NUM_VPCRAM_READ_POS, UINT64, AVERAGE), + COUNTABLE(PERF_VPC_WIT_FULL_CYCLES, UINT64, AVERAGE), + COUNTABLE(PERF_VPC_VPCRAM_FULL_CYCLES, UINT64, AVERAGE), + COUNTABLE(PERF_VPC_LM_FULL_WAIT_FOR_INTP_END, UINT64, AVERAGE), + COUNTABLE(PERF_VPC_NUM_VPCRAM_WRITE, UINT64, AVERAGE), + COUNTABLE(PERF_VPC_NUM_VPCRAM_READ_SO, UINT64, AVERAGE), + COUNTABLE(PERF_VPC_NUM_ATTR_REQ_LM, UINT64, AVERAGE), +}; + +static const struct fd_perfcntr_counter vsc_counters[] = { + COUNTER(VSC_PERFCTR_VSC_SEL_0, RBBM_PERFCTR_VSC_0_LO, RBBM_PERFCTR_VSC_0_HI), + COUNTER(VSC_PERFCTR_VSC_SEL_1, RBBM_PERFCTR_VSC_1_LO, RBBM_PERFCTR_VSC_1_HI), +}; + +static const struct fd_perfcntr_countable vsc_countables[] = { + COUNTABLE(PERF_VSC_BUSY_CYCLES, UINT64, AVERAGE), + COUNTABLE(PERF_VSC_WORKING_CYCLES, UINT64, AVERAGE), + COUNTABLE(PERF_VSC_STALL_CYCLES_UCHE, UINT64, AVERAGE), + COUNTABLE(PERF_VSC_EOT_NUM, UINT64, AVERAGE), + COUNTABLE(PERF_VSC_INPUT_TILES, UINT64, AVERAGE), +}; + +const struct fd_perfcntr_group a6xx_perfcntr_groups[] = { + GROUP("CP", cp_counters, cp_countables), + GROUP("CCU", ccu_counters, ccu_countables), + GROUP("TSE", tse_counters, tse_countables), + GROUP("RAS", ras_counters, ras_countables), + GROUP("LRZ", lrz_counters, lrz_countables), + GROUP("HLSQ", hlsq_counters, hlsq_countables), + GROUP("PC", pc_counters, pc_countables), + GROUP("RB", rb_counters, rb_countables), + GROUP("RBBM", rbbm_counters, rbbm_countables), + GROUP("SP", sp_counters, sp_countables), + GROUP("TP", tp_counters, tp_countables), + GROUP("UCHE", uche_counters, uche_countables), + GROUP("VFD", vfd_counters, vfd_countables), + GROUP("VPC", vpc_counters, vpc_countables), + GROUP("VSC", vsc_counters, vsc_countables), +// GROUP("VBIF", vbif_counters, vbif_countables), +}; + +const unsigned a6xx_num_perfcntr_groups = ARRAY_SIZE(a6xx_perfcntr_groups); + +#endif /* FD5_PERFCNTR_H_ */ diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_query.c b/src/gallium/drivers/freedreno/a6xx/fd6_query.c index c85854d5dce..cdd2778757e 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_query.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_query.c @@ -41,11 +41,17 @@ struct PACKED fd6_query_sample { uint64_t stop; }; -#define query_sample(aq, field) \ +/* offset of a single field of an array of fd6_query_sample: */ +#define query_sample_idx(aq, idx, field) \ fd_resource((aq)->prsc)->bo, \ + (idx * sizeof(struct fd6_query_sample)) + \ offsetof(struct fd6_query_sample, field), \ 0, 0 +/* offset of a single field of fd6_query_sample: */ +#define query_sample(aq, field) \ + query_sample_idx(aq, 0, field) + /* * Occlusion Query: * @@ -246,6 +252,201 @@ static const struct fd_acc_sample_provider timestamp = { .result = timestamp_accumulate_result, }; +/* + * Performance Counter (batch) queries: + * + * Only one of these is active at a time, per design of the gallium + * batch_query API design. On perfcntr query tracks N query_types, + * each of which has a 'fd_batch_query_entry' that maps it back to + * the associated group and counter. + */ + +struct fd_batch_query_entry { + uint8_t gid; /* group-id */ + uint8_t cid; /* countable-id within the group */ +}; + +struct fd_batch_query_data { + struct fd_screen *screen; + unsigned num_query_entries; + struct fd_batch_query_entry query_entries[]; +}; + +static void +perfcntr_resume(struct fd_acc_query *aq, struct fd_batch *batch) +{ + struct fd_batch_query_data *data = aq->query_data; + struct fd_screen *screen = data->screen; + struct fd_ringbuffer *ring = batch->draw; + + unsigned counters_per_group[screen->num_perfcntr_groups]; + memset(counters_per_group, 0, sizeof(counters_per_group)); + + fd_wfi(batch, ring); + + /* configure performance counters for the requested queries: */ + for (unsigned i = 0; i < data->num_query_entries; i++) { + struct fd_batch_query_entry *entry = &data->query_entries[i]; + const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid]; + unsigned counter_idx = counters_per_group[entry->gid]++; + + debug_assert(counter_idx < g->num_counters); + + OUT_PKT4(ring, g->counters[counter_idx].select_reg, 1); + OUT_RING(ring, g->countables[entry->cid].selector); + } + + memset(counters_per_group, 0, sizeof(counters_per_group)); + + /* and snapshot the start values */ + for (unsigned i = 0; i < data->num_query_entries; i++) { + struct fd_batch_query_entry *entry = &data->query_entries[i]; + const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid]; + unsigned counter_idx = counters_per_group[entry->gid]++; + const struct fd_perfcntr_counter *counter = &g->counters[counter_idx]; + + OUT_PKT7(ring, CP_REG_TO_MEM, 3); + OUT_RING(ring, CP_REG_TO_MEM_0_64B | + CP_REG_TO_MEM_0_REG(counter->counter_reg_lo)); + OUT_RELOCW(ring, query_sample_idx(aq, i, start)); + } +} + +static void +perfcntr_pause(struct fd_acc_query *aq, struct fd_batch *batch) +{ + struct fd_batch_query_data *data = aq->query_data; + struct fd_screen *screen = data->screen; + struct fd_ringbuffer *ring = batch->draw; + + unsigned counters_per_group[screen->num_perfcntr_groups]; + memset(counters_per_group, 0, sizeof(counters_per_group)); + + fd_wfi(batch, ring); + + /* TODO do we need to bother to turn anything off? */ + + /* snapshot the end values: */ + for (unsigned i = 0; i < data->num_query_entries; i++) { + struct fd_batch_query_entry *entry = &data->query_entries[i]; + const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid]; + unsigned counter_idx = counters_per_group[entry->gid]++; + const struct fd_perfcntr_counter *counter = &g->counters[counter_idx]; + + OUT_PKT7(ring, CP_REG_TO_MEM, 3); + OUT_RING(ring, CP_REG_TO_MEM_0_64B | + CP_REG_TO_MEM_0_REG(counter->counter_reg_lo)); + OUT_RELOCW(ring, query_sample_idx(aq, i, stop)); + } + + /* and compute the result: */ + for (unsigned i = 0; i < data->num_query_entries; i++) { + /* result += stop - start: */ + OUT_PKT7(ring, CP_MEM_TO_MEM, 9); + OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE | + CP_MEM_TO_MEM_0_NEG_C); + OUT_RELOCW(ring, query_sample_idx(aq, i, result)); /* dst */ + OUT_RELOC(ring, query_sample_idx(aq, i, result)); /* srcA */ + OUT_RELOC(ring, query_sample_idx(aq, i, stop)); /* srcB */ + OUT_RELOC(ring, query_sample_idx(aq, i, start)); /* srcC */ + } +} + +static void +perfcntr_accumulate_result(struct fd_acc_query *aq, void *buf, + union pipe_query_result *result) +{ + struct fd_batch_query_data *data = aq->query_data; + struct fd6_query_sample *sp = buf; + + for (unsigned i = 0; i < data->num_query_entries; i++) { + result->batch[i].u64 = sp[i].result; + } +} + +static const struct fd_acc_sample_provider perfcntr = { + .query_type = FD_QUERY_FIRST_PERFCNTR, + .active = FD_STAGE_DRAW | FD_STAGE_CLEAR, + .resume = perfcntr_resume, + .pause = perfcntr_pause, + .result = perfcntr_accumulate_result, +}; + +static struct pipe_query * +fd6_create_batch_query(struct pipe_context *pctx, + unsigned num_queries, unsigned *query_types) +{ + struct fd_context *ctx = fd_context(pctx); + struct fd_screen *screen = ctx->screen; + struct fd_query *q; + struct fd_acc_query *aq; + struct fd_batch_query_data *data; + + data = CALLOC_VARIANT_LENGTH_STRUCT(fd_batch_query_data, + num_queries * sizeof(data->query_entries[0])); + + data->screen = screen; + data->num_query_entries = num_queries; + + /* validate the requested query_types and ensure we don't try + * to request more query_types of a given group than we have + * counters: + */ + unsigned counters_per_group[screen->num_perfcntr_groups]; + memset(counters_per_group, 0, sizeof(counters_per_group)); + + for (unsigned i = 0; i < num_queries; i++) { + unsigned idx = query_types[i] - FD_QUERY_FIRST_PERFCNTR; + + /* verify valid query_type, ie. is it actually a perfcntr? */ + if ((query_types[i] < FD_QUERY_FIRST_PERFCNTR) || + (idx >= screen->num_perfcntr_queries)) { + debug_printf("invalid batch query query_type: %u\n", query_types[i]); + goto error; + } + + struct fd_batch_query_entry *entry = &data->query_entries[i]; + struct pipe_driver_query_info *pq = &screen->perfcntr_queries[idx]; + + entry->gid = pq->group_id; + + /* the perfcntr_queries[] table flattens all the countables + * for each group in series, ie: + * + * (G0,C0), .., (G0,Cn), (G1,C0), .., (G1,Cm), ... + * + * So to find the countable index just step back through the + * table to find the first entry with the same group-id. + */ + while (pq > screen->perfcntr_queries) { + pq--; + if (pq->group_id == entry->gid) + entry->cid++; + } + + if (counters_per_group[entry->gid] >= + screen->perfcntr_groups[entry->gid].num_counters) { + debug_printf("too many counters for group %u\n", entry->gid); + goto error; + } + + counters_per_group[entry->gid]++; + } + + q = fd_acc_create_query2(ctx, 0, &perfcntr); + aq = fd_acc_query(q); + + /* sample buffer size is based on # of queries: */ + aq->size = num_queries * sizeof(struct fd6_query_sample); + aq->query_data = data; + + return (struct pipe_query *)q; + +error: + free(data); + return NULL; +} + void fd6_query_context_init(struct pipe_context *pctx) { @@ -254,6 +455,8 @@ fd6_query_context_init(struct pipe_context *pctx) ctx->create_query = fd_acc_create_query; ctx->query_set_stage = fd_acc_query_set_stage; + pctx->create_batch_query = fd6_create_batch_query; + fd_acc_query_register_provider(pctx, &occlusion_counter); fd_acc_query_register_provider(pctx, &occlusion_predicate); fd_acc_query_register_provider(pctx, &occlusion_predicate_conservative); diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_screen.c b/src/gallium/drivers/freedreno/a6xx/fd6_screen.c index 33e740f32fe..e04984a3036 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_screen.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_screen.c @@ -126,6 +126,9 @@ fd6_screen_is_format_supported(struct pipe_screen *pscreen, return retval == usage; } +extern const struct fd_perfcntr_group a6xx_perfcntr_groups[]; +extern const unsigned a6xx_num_perfcntr_groups; + void fd6_screen_init(struct pipe_screen *pscreen) { @@ -146,4 +149,9 @@ fd6_screen_init(struct pipe_screen *pscreen) screen->supported_modifiers = supported_modifiers; screen->num_supported_modifiers = ARRAY_SIZE(supported_modifiers); + + if (fd_mesa_debug & FD_DBG_PERFC) { + screen->perfcntr_groups = a6xx_perfcntr_groups; + screen->num_perfcntr_groups = a6xx_num_perfcntr_groups; + } } diff --git a/src/gallium/drivers/freedreno/meson.build b/src/gallium/drivers/freedreno/meson.build index fb196daea5f..1f3d5dc8337 100644 --- a/src/gallium/drivers/freedreno/meson.build +++ b/src/gallium/drivers/freedreno/meson.build @@ -193,6 +193,7 @@ files_libfreedreno = files( 'a6xx/fd6_gmem.h', 'a6xx/fd6_image.c', 'a6xx/fd6_image.h', + 'a6xx/fd6_perfcntr.c', 'a6xx/fd6_program.c', 'a6xx/fd6_program.h', 'a6xx/fd6_query.c', -- 2.30.2