radeonsi: re-order the SQ_xx performance counter blocks
authorNicolai Hähnle <nicolai.haehnle@amd.com>
Fri, 11 Dec 2015 20:58:11 +0000 (15:58 -0500)
committerNicolai Hähnle <nicolai.haehnle@amd.com>
Fri, 5 Feb 2016 14:25:30 +0000 (09:25 -0500)
This is yet another change motivated by appeasing AMD GPUPerfStudio's
hardcoding of performance counter group numbers.

Reviewed-by: Edward O'Callaghan <eocallaghan@alterapraxis.com>
Acked-by: Marek Olšák <marek.olsak@amd.com>
src/gallium/drivers/radeon/r600_perfcounter.c
src/gallium/drivers/radeon/r600_query.h
src/gallium/drivers/radeonsi/si_perfcounter.c

index fad7bdec40ac61d1934a66164d17151b37583467..f3529a1fe0ff0c2979ca23a2f6fadf180420ff7a 100644 (file)
 /* Max counters per HW block */
 #define R600_QUERY_MAX_COUNTERS 16
 
-static const char * const r600_pc_shader_suffix[] = {
-       "", "_PS", "_VS", "_GS", "_ES", "_HS", "_LS", "_CS"
-};
-
 static struct r600_perfcounter_block *
 lookup_counter(struct r600_perfcounters *pc, unsigned index,
               unsigned *base_gid, unsigned *sub_index)
@@ -92,6 +88,8 @@ struct r600_pc_counter {
        unsigned stride;
 };
 
+#define R600_PC_SHADERS_WINDOWING (1 << 31)
+
 struct r600_query_pc {
        struct r600_query_hw b;
 
@@ -246,32 +244,29 @@ static struct r600_pc_group *get_group_state(struct r600_common_screen *screen,
        if (block->flags & R600_PC_BLOCK_SHADER) {
                unsigned sub_gids = block->num_instances;
                unsigned shader_id;
-               unsigned shader_mask;
-               unsigned query_shader_mask;
+               unsigned shaders;
+               unsigned query_shaders;
 
                if (block->flags & R600_PC_BLOCK_SE_GROUPS)
                        sub_gids = sub_gids * screen->info.max_se;
                shader_id = sub_gid / sub_gids;
                sub_gid = sub_gid % sub_gids;
 
-               if (shader_id == 0)
-                       shader_mask = R600_PC_SHADER_ALL;
-               else
-                       shader_mask = 1 << (shader_id - 1);
+               shaders = screen->perfcounters->shader_type_bits[shader_id];
 
-               query_shader_mask = query->shaders & R600_PC_SHADER_ALL;
-               if (query_shader_mask && query_shader_mask != shader_mask) {
+               query_shaders = query->shaders & ~R600_PC_SHADERS_WINDOWING;
+               if (query_shaders && query_shaders != shaders) {
                        fprintf(stderr, "r600_perfcounter: incompatible shader groups\n");
                        FREE(group);
                        return NULL;
                }
-               query->shaders |= shader_mask;
+               query->shaders = shaders;
        }
 
-       if (block->flags & R600_PC_BLOCK_SHADER_WINDOWED) {
+       if (block->flags & R600_PC_BLOCK_SHADER_WINDOWED && !query->shaders) {
                // A non-zero value in query->shaders ensures that the shader
                // masking is reset unless the user explicitly requests one.
-               query->shaders |= R600_PC_SHADER_WINDOWING;
+               query->shaders = R600_PC_SHADERS_WINDOWING;
        }
 
        if (block->flags & R600_PC_BLOCK_SE_GROUPS) {
@@ -379,8 +374,8 @@ struct pipe_query *r600_create_batch_query(struct pipe_context *ctx,
        }
 
        if (query->shaders) {
-               if ((query->shaders & R600_PC_SHADER_ALL) == 0)
-                       query->shaders |= R600_PC_SHADER_ALL;
+               if (query->shaders == R600_PC_SHADERS_WINDOWING)
+                       query->shaders = 0xffffffff;
                query->b.num_cs_dw_begin += pc->num_shaders_cs_dwords;
        }
 
@@ -438,7 +433,7 @@ static boolean r600_init_block_names(struct r600_common_screen *screen,
        if (block->flags & R600_PC_BLOCK_SE_GROUPS)
                groups_se = screen->info.max_se;
        if (block->flags & R600_PC_BLOCK_SHADER)
-               groups_shader = ARRAY_SIZE(r600_pc_shader_suffix);
+               groups_shader = screen->perfcounters->num_shader_types;
 
        namelen = strlen(block->basename);
        block->group_name_stride = namelen + 1;
@@ -462,14 +457,15 @@ static boolean r600_init_block_names(struct r600_common_screen *screen,
 
        groupname = block->group_names;
        for (i = 0; i < groups_shader; ++i) {
-               unsigned shaderlen = strlen(r600_pc_shader_suffix[i]);
+               const char *shader_suffix = screen->perfcounters->shader_type_suffixes[i];
+               unsigned shaderlen = strlen(shader_suffix);
                for (j = 0; j < groups_se; ++j) {
                        for (k = 0; k < groups_instance; ++k) {
                                strcpy(groupname, block->basename);
                                p = groupname + namelen;
 
                                if (block->flags & R600_PC_BLOCK_SHADER) {
-                                       strcpy(p, r600_pc_shader_suffix[i]);
+                                       strcpy(p, shader_suffix);
                                        p += shaderlen;
                                }
 
@@ -626,7 +622,7 @@ void r600_perfcounters_add_block(struct r600_common_screen *rscreen,
        if (block->flags & R600_PC_BLOCK_SE_GROUPS)
                block->num_groups *= rscreen->info.max_se;
        if (block->flags & R600_PC_BLOCK_SHADER)
-               block->num_groups *= ARRAY_SIZE(r600_pc_shader_suffix);
+               block->num_groups *= pc->num_shader_types;
 
        ++pc->num_blocks;
        pc->num_groups += block->num_groups;
index dbc950fed43e670df0fd34e6ec037ea15fd3d1c9..8b2c4e3fe932d058f7bce611812334438b2fbc8b 100644 (file)
@@ -166,24 +166,6 @@ enum {
        R600_PC_BLOCK_SHADER_WINDOWED = (1 << 4),
 };
 
-/* Shader enable bits. Chosen to coincide with SQ_PERFCOUNTER_CTRL values */
-enum {
-       R600_PC_SHADER_PS = (1 << 0),
-       R600_PC_SHADER_VS = (1 << 1),
-       R600_PC_SHADER_GS = (1 << 2),
-       R600_PC_SHADER_ES = (1 << 3),
-       R600_PC_SHADER_HS = (1 << 4),
-       R600_PC_SHADER_LS = (1 << 5),
-       R600_PC_SHADER_CS = (1 << 6),
-
-       R600_PC_SHADER_ALL = R600_PC_SHADER_PS | R600_PC_SHADER_VS |
-                            R600_PC_SHADER_GS | R600_PC_SHADER_ES |
-                            R600_PC_SHADER_HS | R600_PC_SHADER_LS |
-                            R600_PC_SHADER_CS,
-
-       R600_PC_SHADER_WINDOWING = (1 << 31),
-};
-
 /* Describes a hardware block with performance counters. Multiple instances of
  * each block, possibly per-SE, may exist on the chip. Depending on the block
  * and on the user's configuration, we either
@@ -220,6 +202,10 @@ struct r600_perfcounters {
        unsigned num_instance_cs_dwords;
        unsigned num_shaders_cs_dwords;
 
+       unsigned num_shader_types;
+       const char * const *shader_type_suffixes;
+       const unsigned *shader_type_bits;
+
        void (*get_size)(struct r600_perfcounter_block *,
                         unsigned count, unsigned *selectors,
                         unsigned *num_select_dw, unsigned *num_read_dw);
index b6588667dc30454c876a135bae71fdd0b6a3e1c8..f944a52b8dc2570b63491ebef1e8f9f632df7630 100644 (file)
@@ -79,6 +79,23 @@ struct si_pc_block {
        unsigned instances;
 };
 
+/* The order is chosen to be compatible with GPUPerfStudio's hardcoding of
+ * performance counter group IDs.
+ */
+static const char * const si_pc_shader_type_suffixes[] = {
+       "", "_ES", "_GS", "_VS", "_PS", "_LS", "_HS", "_CS"
+};
+
+static const unsigned si_pc_shader_type_bits[] = {
+       0x7f,
+       S_036780_ES_EN(1),
+       S_036780_GS_EN(1),
+       S_036780_VS_EN(1),
+       S_036780_PS_EN(1),
+       S_036780_LS_EN(1),
+       S_036780_HS_EN(1),
+       S_036780_CS_EN(1),
+};
 
 static struct si_pc_block_base cik_CB = {
        .name = "CB",
@@ -662,6 +679,10 @@ void si_init_perfcounters(struct si_screen *screen)
                pc->num_stop_cs_dwords += 6;
        }
 
+       pc->num_shader_types = ARRAY_SIZE(si_pc_shader_type_bits);
+       pc->shader_type_suffixes = si_pc_shader_type_suffixes;
+       pc->shader_type_bits = si_pc_shader_type_bits;
+
        pc->get_size = si_pc_get_size;
        pc->emit_instance = si_pc_emit_instance;
        pc->emit_shaders = si_pc_emit_shaders;