From 988f4b31f3011327b389da2b5e0bd34e222bae86 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Nicolai=20H=C3=A4hnle?= Date: Fri, 11 Dec 2015 15:58:11 -0500 Subject: [PATCH] radeonsi: re-order the SQ_xx performance counter blocks MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit This is yet another change motivated by appeasing AMD GPUPerfStudio's hardcoding of performance counter group numbers. Reviewed-by: Edward O'Callaghan Acked-by: Marek Olšák --- src/gallium/drivers/radeon/r600_perfcounter.c | 38 +++++++++---------- src/gallium/drivers/radeon/r600_query.h | 22 ++--------- src/gallium/drivers/radeonsi/si_perfcounter.c | 21 ++++++++++ 3 files changed, 42 insertions(+), 39 deletions(-) diff --git a/src/gallium/drivers/radeon/r600_perfcounter.c b/src/gallium/drivers/radeon/r600_perfcounter.c index fad7bdec40a..f3529a1fe0f 100644 --- a/src/gallium/drivers/radeon/r600_perfcounter.c +++ b/src/gallium/drivers/radeon/r600_perfcounter.c @@ -33,10 +33,6 @@ /* Max counters per HW block */ #define R600_QUERY_MAX_COUNTERS 16 -static const char * const r600_pc_shader_suffix[] = { - "", "_PS", "_VS", "_GS", "_ES", "_HS", "_LS", "_CS" -}; - static struct r600_perfcounter_block * lookup_counter(struct r600_perfcounters *pc, unsigned index, unsigned *base_gid, unsigned *sub_index) @@ -92,6 +88,8 @@ struct r600_pc_counter { unsigned stride; }; +#define R600_PC_SHADERS_WINDOWING (1 << 31) + struct r600_query_pc { struct r600_query_hw b; @@ -246,32 +244,29 @@ static struct r600_pc_group *get_group_state(struct r600_common_screen *screen, if (block->flags & R600_PC_BLOCK_SHADER) { unsigned sub_gids = block->num_instances; unsigned shader_id; - unsigned shader_mask; - unsigned query_shader_mask; + unsigned shaders; + unsigned query_shaders; if (block->flags & R600_PC_BLOCK_SE_GROUPS) sub_gids = sub_gids * screen->info.max_se; shader_id = sub_gid / sub_gids; sub_gid = sub_gid % sub_gids; - if (shader_id == 0) - shader_mask = R600_PC_SHADER_ALL; - else - shader_mask = 1 << (shader_id - 1); + shaders = screen->perfcounters->shader_type_bits[shader_id]; - query_shader_mask = query->shaders & R600_PC_SHADER_ALL; - if (query_shader_mask && query_shader_mask != shader_mask) { + query_shaders = query->shaders & ~R600_PC_SHADERS_WINDOWING; + if (query_shaders && query_shaders != shaders) { fprintf(stderr, "r600_perfcounter: incompatible shader groups\n"); FREE(group); return NULL; } - query->shaders |= shader_mask; + query->shaders = shaders; } - if (block->flags & R600_PC_BLOCK_SHADER_WINDOWED) { + if (block->flags & R600_PC_BLOCK_SHADER_WINDOWED && !query->shaders) { // A non-zero value in query->shaders ensures that the shader // masking is reset unless the user explicitly requests one. - query->shaders |= R600_PC_SHADER_WINDOWING; + query->shaders = R600_PC_SHADERS_WINDOWING; } if (block->flags & R600_PC_BLOCK_SE_GROUPS) { @@ -379,8 +374,8 @@ struct pipe_query *r600_create_batch_query(struct pipe_context *ctx, } if (query->shaders) { - if ((query->shaders & R600_PC_SHADER_ALL) == 0) - query->shaders |= R600_PC_SHADER_ALL; + if (query->shaders == R600_PC_SHADERS_WINDOWING) + query->shaders = 0xffffffff; query->b.num_cs_dw_begin += pc->num_shaders_cs_dwords; } @@ -438,7 +433,7 @@ static boolean r600_init_block_names(struct r600_common_screen *screen, if (block->flags & R600_PC_BLOCK_SE_GROUPS) groups_se = screen->info.max_se; if (block->flags & R600_PC_BLOCK_SHADER) - groups_shader = ARRAY_SIZE(r600_pc_shader_suffix); + groups_shader = screen->perfcounters->num_shader_types; namelen = strlen(block->basename); block->group_name_stride = namelen + 1; @@ -462,14 +457,15 @@ static boolean r600_init_block_names(struct r600_common_screen *screen, groupname = block->group_names; for (i = 0; i < groups_shader; ++i) { - unsigned shaderlen = strlen(r600_pc_shader_suffix[i]); + const char *shader_suffix = screen->perfcounters->shader_type_suffixes[i]; + unsigned shaderlen = strlen(shader_suffix); for (j = 0; j < groups_se; ++j) { for (k = 0; k < groups_instance; ++k) { strcpy(groupname, block->basename); p = groupname + namelen; if (block->flags & R600_PC_BLOCK_SHADER) { - strcpy(p, r600_pc_shader_suffix[i]); + strcpy(p, shader_suffix); p += shaderlen; } @@ -626,7 +622,7 @@ void r600_perfcounters_add_block(struct r600_common_screen *rscreen, if (block->flags & R600_PC_BLOCK_SE_GROUPS) block->num_groups *= rscreen->info.max_se; if (block->flags & R600_PC_BLOCK_SHADER) - block->num_groups *= ARRAY_SIZE(r600_pc_shader_suffix); + block->num_groups *= pc->num_shader_types; ++pc->num_blocks; pc->num_groups += block->num_groups; diff --git a/src/gallium/drivers/radeon/r600_query.h b/src/gallium/drivers/radeon/r600_query.h index dbc950fed43..8b2c4e3fe93 100644 --- a/src/gallium/drivers/radeon/r600_query.h +++ b/src/gallium/drivers/radeon/r600_query.h @@ -166,24 +166,6 @@ enum { R600_PC_BLOCK_SHADER_WINDOWED = (1 << 4), }; -/* Shader enable bits. Chosen to coincide with SQ_PERFCOUNTER_CTRL values */ -enum { - R600_PC_SHADER_PS = (1 << 0), - R600_PC_SHADER_VS = (1 << 1), - R600_PC_SHADER_GS = (1 << 2), - R600_PC_SHADER_ES = (1 << 3), - R600_PC_SHADER_HS = (1 << 4), - R600_PC_SHADER_LS = (1 << 5), - R600_PC_SHADER_CS = (1 << 6), - - R600_PC_SHADER_ALL = R600_PC_SHADER_PS | R600_PC_SHADER_VS | - R600_PC_SHADER_GS | R600_PC_SHADER_ES | - R600_PC_SHADER_HS | R600_PC_SHADER_LS | - R600_PC_SHADER_CS, - - R600_PC_SHADER_WINDOWING = (1 << 31), -}; - /* Describes a hardware block with performance counters. Multiple instances of * each block, possibly per-SE, may exist on the chip. Depending on the block * and on the user's configuration, we either @@ -220,6 +202,10 @@ struct r600_perfcounters { unsigned num_instance_cs_dwords; unsigned num_shaders_cs_dwords; + unsigned num_shader_types; + const char * const *shader_type_suffixes; + const unsigned *shader_type_bits; + void (*get_size)(struct r600_perfcounter_block *, unsigned count, unsigned *selectors, unsigned *num_select_dw, unsigned *num_read_dw); diff --git a/src/gallium/drivers/radeonsi/si_perfcounter.c b/src/gallium/drivers/radeonsi/si_perfcounter.c index b6588667dc3..f944a52b8dc 100644 --- a/src/gallium/drivers/radeonsi/si_perfcounter.c +++ b/src/gallium/drivers/radeonsi/si_perfcounter.c @@ -79,6 +79,23 @@ struct si_pc_block { unsigned instances; }; +/* The order is chosen to be compatible with GPUPerfStudio's hardcoding of + * performance counter group IDs. + */ +static const char * const si_pc_shader_type_suffixes[] = { + "", "_ES", "_GS", "_VS", "_PS", "_LS", "_HS", "_CS" +}; + +static const unsigned si_pc_shader_type_bits[] = { + 0x7f, + S_036780_ES_EN(1), + S_036780_GS_EN(1), + S_036780_VS_EN(1), + S_036780_PS_EN(1), + S_036780_LS_EN(1), + S_036780_HS_EN(1), + S_036780_CS_EN(1), +}; static struct si_pc_block_base cik_CB = { .name = "CB", @@ -662,6 +679,10 @@ void si_init_perfcounters(struct si_screen *screen) pc->num_stop_cs_dwords += 6; } + pc->num_shader_types = ARRAY_SIZE(si_pc_shader_type_bits); + pc->shader_type_suffixes = si_pc_shader_type_suffixes; + pc->shader_type_bits = si_pc_shader_type_bits; + pc->get_size = si_pc_get_size; pc->emit_instance = si_pc_emit_instance; pc->emit_shaders = si_pc_emit_shaders; -- 2.30.2