v3d: Add support for glSampleMask / glSampleCoverage.
[mesa.git] / src / gallium / drivers / radeon / r600_perfcounter.c
index a710c042b27d6d659e52be2120d7e8fcd76a271a..57c3246898a6345e5ab250757456ef16bbadab21 100644 (file)
@@ -1,5 +1,6 @@
 /*
  * Copyright 2015 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
- *
- * Authors:
- *  Nicolai Hähnle <nicolai.haehnle@amd.com>
- *
  */
 
 #include "util/u_memory.h"
-#include "r600_query.h"
-#include "r600_pipe_common.h"
-#include "r600d_common.h"
+#include "radeonsi/si_query.h"
+#include "radeonsi/si_pipe.h"
+#include "amd/common/sid.h"
 
 /* Max counters per HW block */
-#define R600_QUERY_MAX_COUNTERS 16
+#define SI_QUERY_MAX_COUNTERS 16
 
-static const char * const r600_pc_shader_suffix[] = {
-       "", "_PS", "_VS", "_GS", "_ES", "_HS", "_LS", "_CS"
-};
-
-static struct r600_perfcounter_block *
-lookup_counter(struct r600_perfcounters *pc, unsigned index,
+static struct si_perfcounter_block *
+lookup_counter(struct si_perfcounters *pc, unsigned index,
               unsigned *base_gid, unsigned *sub_index)
 {
-       struct r600_perfcounter_block *block = pc->blocks;
+       struct si_perfcounter_block *block = pc->blocks;
        unsigned bid;
 
        *base_gid = 0;
@@ -60,11 +53,11 @@ lookup_counter(struct r600_perfcounters *pc, unsigned index,
        return NULL;
 }
 
-static struct r600_perfcounter_block *
-lookup_group(struct r600_perfcounters *pc, unsigned *index)
+static struct si_perfcounter_block *
+lookup_group(struct si_perfcounters *pc, unsigned *index)
 {
        unsigned bid;
-       struct r600_perfcounter_block *block = pc->blocks;
+       struct si_perfcounter_block *block = pc->blocks;
 
        for (bid = 0; bid < pc->num_blocks; ++bid, ++block) {
                if (*index < block->num_groups)
@@ -75,163 +68,171 @@ lookup_group(struct r600_perfcounters *pc, unsigned *index)
        return NULL;
 }
 
-struct r600_pc_group {
-       struct r600_pc_group *next;
-       struct r600_perfcounter_block *block;
+struct si_pc_group {
+       struct si_pc_group *next;
+       struct si_perfcounter_block *block;
        unsigned sub_gid; /* only used during init */
        unsigned result_base; /* only used during init */
        int se;
        int instance;
        unsigned num_counters;
-       unsigned selectors[R600_QUERY_MAX_COUNTERS];
+       unsigned selectors[SI_QUERY_MAX_COUNTERS];
 };
 
-struct r600_pc_counter {
+struct si_pc_counter {
        unsigned base;
-       unsigned dwords;
-       unsigned stride;
+       unsigned qwords;
+       unsigned stride; /* in uint64s */
 };
 
-struct r600_query_pc {
-       struct r600_query_hw b;
+#define SI_PC_SHADERS_WINDOWING (1 << 31)
+
+struct si_query_pc {
+       struct si_query_hw b;
 
        unsigned shaders;
        unsigned num_counters;
-       struct r600_pc_counter *counters;
-       struct r600_pc_group *groups;
+       struct si_pc_counter *counters;
+       struct si_pc_group *groups;
 };
 
-static void r600_pc_query_destroy(struct r600_common_context *ctx,
-                                 struct r600_query *rquery)
+static void si_pc_query_destroy(struct si_screen *sscreen,
+                               struct si_query *rquery)
 {
-       struct r600_query_pc *query = (struct r600_query_pc *)rquery;
+       struct si_query_pc *query = (struct si_query_pc *)rquery;
 
        while (query->groups) {
-               struct r600_pc_group *group = query->groups;
+               struct si_pc_group *group = query->groups;
                query->groups = group->next;
                FREE(group);
        }
 
        FREE(query->counters);
 
-       r600_query_hw_destroy(ctx, rquery);
+       si_query_hw_destroy(sscreen, rquery);
 }
 
-static void r600_pc_query_emit_start(struct r600_common_context *ctx,
-                                    struct r600_query_hw *hwquery,
-                                    struct r600_resource *buffer, uint64_t va)
+static bool si_pc_query_prepare_buffer(struct si_screen *screen,
+                                      struct si_query_hw *hwquery,
+                                      struct r600_resource *buffer)
 {
-       struct r600_perfcounters *pc = ctx->screen->perfcounters;
-       struct r600_query_pc *query = (struct r600_query_pc *)hwquery;
-       struct r600_pc_group *group;
+       /* no-op */
+       return true;
+}
+
+static void si_pc_query_emit_start(struct si_context *sctx,
+                                  struct si_query_hw *hwquery,
+                                  struct r600_resource *buffer, uint64_t va)
+{
+       struct si_perfcounters *pc = sctx->screen->perfcounters;
+       struct si_query_pc *query = (struct si_query_pc *)hwquery;
+       struct si_pc_group *group;
        int current_se = -1;
        int current_instance = -1;
 
        if (query->shaders)
-               pc->emit_shaders(ctx, query->shaders);
+               pc->emit_shaders(sctx, query->shaders);
 
        for (group = query->groups; group; group = group->next) {
-               struct r600_perfcounter_block *block = group->block;
+               struct si_perfcounter_block *block = group->block;
 
                if (group->se != current_se || group->instance != current_instance) {
                        current_se = group->se;
                        current_instance = group->instance;
-                       pc->emit_instance(ctx, group->se, group->instance);
+                       pc->emit_instance(sctx, group->se, group->instance);
                }
 
-               pc->emit_select(ctx, block, group->num_counters, group->selectors);
+               pc->emit_select(sctx, block, group->num_counters, group->selectors);
        }
 
        if (current_se != -1 || current_instance != -1)
-               pc->emit_instance(ctx, -1, -1);
+               pc->emit_instance(sctx, -1, -1);
 
-       pc->emit_start(ctx, buffer, va);
+       pc->emit_start(sctx, buffer, va);
 }
 
-static void r600_pc_query_emit_stop(struct r600_common_context *ctx,
-                                   struct r600_query_hw *hwquery,
-                                   struct r600_resource *buffer, uint64_t va)
+static void si_pc_query_emit_stop(struct si_context *sctx,
+                                 struct si_query_hw *hwquery,
+                                 struct r600_resource *buffer, uint64_t va)
 {
-       struct r600_perfcounters *pc = ctx->screen->perfcounters;
-       struct r600_query_pc *query = (struct r600_query_pc *)hwquery;
-       struct r600_pc_group *group;
+       struct si_perfcounters *pc = sctx->screen->perfcounters;
+       struct si_query_pc *query = (struct si_query_pc *)hwquery;
+       struct si_pc_group *group;
 
-       pc->emit_stop(ctx, buffer, va);
+       pc->emit_stop(sctx, buffer, va);
 
        for (group = query->groups; group; group = group->next) {
-               struct r600_perfcounter_block *block = group->block;
+               struct si_perfcounter_block *block = group->block;
                unsigned se = group->se >= 0 ? group->se : 0;
                unsigned se_end = se + 1;
 
-               if ((block->flags & R600_PC_BLOCK_SE) && (group->se < 0))
-                       se_end = ctx->screen->info.max_se;
+               if ((block->flags & SI_PC_BLOCK_SE) && (group->se < 0))
+                       se_end = sctx->screen->info.max_se;
 
                do {
                        unsigned instance = group->instance >= 0 ? group->instance : 0;
 
                        do {
-                               pc->emit_instance(ctx, se, instance);
-                               pc->emit_read(ctx, block,
+                               pc->emit_instance(sctx, se, instance);
+                               pc->emit_read(sctx, block,
                                              group->num_counters, group->selectors,
                                              buffer, va);
-                               va += 4 * group->num_counters;
+                               va += sizeof(uint64_t) * group->num_counters;
                        } while (group->instance < 0 && ++instance < block->num_instances);
                } while (++se < se_end);
        }
 
-       pc->emit_instance(ctx, -1, -1);
+       pc->emit_instance(sctx, -1, -1);
 }
 
-static void r600_pc_query_clear_result(struct r600_query_hw *hwquery,
-                                      union pipe_query_result *result)
+static void si_pc_query_clear_result(struct si_query_hw *hwquery,
+                                    union pipe_query_result *result)
 {
-       struct r600_query_pc *query = (struct r600_query_pc *)hwquery;
+       struct si_query_pc *query = (struct si_query_pc *)hwquery;
 
        memset(result, 0, sizeof(result->batch[0]) * query->num_counters);
 }
 
-static void r600_pc_query_add_result(struct r600_common_context *ctx,
-                                    struct r600_query_hw *hwquery,
-                                    void *buffer,
-                                    union pipe_query_result *result)
+static void si_pc_query_add_result(struct si_screen *sscreen,
+                                  struct si_query_hw *hwquery,
+                                  void *buffer,
+                                  union pipe_query_result *result)
 {
-       struct r600_query_pc *query = (struct r600_query_pc *)hwquery;
-       uint32_t *results = buffer;
+       struct si_query_pc *query = (struct si_query_pc *)hwquery;
+       uint64_t *results = buffer;
        unsigned i, j;
 
        for (i = 0; i < query->num_counters; ++i) {
-               struct r600_pc_counter *counter = &query->counters[i];
-
-               if (counter->base == ~0)
-                       continue;
+               struct si_pc_counter *counter = &query->counters[i];
 
-               for (j = 0; j < counter->dwords; ++j) {
+               for (j = 0; j < counter->qwords; ++j) {
                        uint32_t value = results[counter->base + j * counter->stride];
-                       result->batch[i].u32 += value;
+                       result->batch[i].u64 += value;
                }
        }
 }
 
-static struct r600_query_ops batch_query_ops = {
-       .destroy = r600_pc_query_destroy,
-       .begin = r600_query_hw_begin,
-       .end = r600_query_hw_end,
-       .get_result = r600_query_hw_get_result
+static struct si_query_ops batch_query_ops = {
+       .destroy = si_pc_query_destroy,
+       .begin = si_query_hw_begin,
+       .end = si_query_hw_end,
+       .get_result = si_query_hw_get_result
 };
 
-static struct r600_query_hw_ops batch_query_hw_ops = {
-       .emit_start = r600_pc_query_emit_start,
-       .emit_stop = r600_pc_query_emit_stop,
-       .clear_result = r600_pc_query_clear_result,
-       .add_result = r600_pc_query_add_result,
+static struct si_query_hw_ops batch_query_hw_ops = {
+       .prepare_buffer = si_pc_query_prepare_buffer,
+       .emit_start = si_pc_query_emit_start,
+       .emit_stop = si_pc_query_emit_stop,
+       .clear_result = si_pc_query_clear_result,
+       .add_result = si_pc_query_add_result,
 };
 
-static struct r600_pc_group *get_group_state(struct r600_common_screen *screen,
-                                            struct r600_query_pc *query,
-                                            struct r600_perfcounter_block *block,
+static struct si_pc_group *get_group_state(struct si_screen *screen,
+                                            struct si_query_pc *query,
+                                            struct si_perfcounter_block *block,
                                             unsigned sub_gid)
 {
-       struct r600_pc_group *group = query->groups;
+       struct si_pc_group *group = query->groups;
 
        while (group) {
                if (group->block == block && group->sub_gid == sub_gid)
@@ -239,52 +240,49 @@ static struct r600_pc_group *get_group_state(struct r600_common_screen *screen,
                group = group->next;
        }
 
-       group = CALLOC_STRUCT(r600_pc_group);
+       group = CALLOC_STRUCT(si_pc_group);
        if (!group)
                return NULL;
 
        group->block = block;
        group->sub_gid = sub_gid;
 
-       if (block->flags & R600_PC_BLOCK_SHADER) {
+       if (block->flags & SI_PC_BLOCK_SHADER) {
                unsigned sub_gids = block->num_instances;
                unsigned shader_id;
-               unsigned shader_mask;
-               unsigned query_shader_mask;
+               unsigned shaders;
+               unsigned query_shaders;
 
-               if (block->flags & R600_PC_BLOCK_SE_GROUPS)
+               if (block->flags & SI_PC_BLOCK_SE_GROUPS)
                        sub_gids = sub_gids * screen->info.max_se;
                shader_id = sub_gid / sub_gids;
                sub_gid = sub_gid % sub_gids;
 
-               if (shader_id == 0)
-                       shader_mask = R600_PC_SHADER_ALL;
-               else
-                       shader_mask = 1 << (shader_id - 1);
+               shaders = screen->perfcounters->shader_type_bits[shader_id];
 
-               query_shader_mask = query->shaders & R600_PC_SHADER_ALL;
-               if (query_shader_mask && query_shader_mask != shader_mask) {
-                       fprintf(stderr, "r600_perfcounter: incompatible shader groups\n");
+               query_shaders = query->shaders & ~SI_PC_SHADERS_WINDOWING;
+               if (query_shaders && query_shaders != shaders) {
+                       fprintf(stderr, "si_perfcounter: incompatible shader groups\n");
                        FREE(group);
                        return NULL;
                }
-               query->shaders |= shader_mask;
+               query->shaders = shaders;
        }
 
-       if (block->flags & R600_PC_BLOCK_SHADER_WINDOWED) {
+       if (block->flags & SI_PC_BLOCK_SHADER_WINDOWED && !query->shaders) {
                // A non-zero value in query->shaders ensures that the shader
                // masking is reset unless the user explicitly requests one.
-               query->shaders |= R600_PC_SHADER_WINDOWING;
+               query->shaders = SI_PC_SHADERS_WINDOWING;
        }
 
-       if (block->flags & R600_PC_BLOCK_SE_GROUPS) {
+       if (block->flags & SI_PC_BLOCK_SE_GROUPS) {
                group->se = sub_gid / block->num_instances;
                sub_gid = sub_gid % block->num_instances;
        } else {
                group->se = -1;
        }
 
-       if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS) {
+       if (block->flags & SI_PC_BLOCK_INSTANCE_GROUPS) {
                group->instance = sub_gid;
        } else {
                group->instance = -1;
@@ -296,29 +294,28 @@ static struct r600_pc_group *get_group_state(struct r600_common_screen *screen,
        return group;
 }
 
-struct pipe_query *r600_create_batch_query(struct pipe_context *ctx,
-                                          unsigned num_queries,
-                                          unsigned *query_types)
+struct pipe_query *si_create_batch_query(struct pipe_context *ctx,
+                                        unsigned num_queries,
+                                        unsigned *query_types)
 {
-       struct r600_common_context *rctx = (struct r600_common_context *)ctx;
-       struct r600_common_screen *screen = rctx->screen;
-       struct r600_perfcounters *pc = screen->perfcounters;
-       struct r600_perfcounter_block *block;
-       struct r600_pc_group *group;
-       struct r600_query_pc *query;
+       struct si_screen *screen =
+               (struct si_screen *)ctx->screen;
+       struct si_perfcounters *pc = screen->perfcounters;
+       struct si_perfcounter_block *block;
+       struct si_pc_group *group;
+       struct si_query_pc *query;
        unsigned base_gid, sub_gid, sub_index;
        unsigned i, j;
 
        if (!pc)
                return NULL;
 
-       query = CALLOC_STRUCT(r600_query_pc);
+       query = CALLOC_STRUCT(si_query_pc);
        if (!query)
                return NULL;
 
        query->b.b.ops = &batch_query_ops;
        query->b.ops = &batch_query_hw_ops;
-       query->b.flags = R600_QUERY_HW_FLAG_TIMER;
 
        query->num_counters = num_queries;
 
@@ -326,10 +323,10 @@ struct pipe_query *r600_create_batch_query(struct pipe_context *ctx,
        for (i = 0; i < num_queries; ++i) {
                unsigned sub_gid;
 
-               if (query_types[i] < R600_QUERY_FIRST_PERFCOUNTER)
+               if (query_types[i] < SI_QUERY_FIRST_PERFCOUNTER)
                        goto error;
 
-               block = lookup_counter(pc, query_types[i] - R600_QUERY_FIRST_PERFCOUNTER,
+               block = lookup_counter(pc, query_types[i] - SI_QUERY_FIRST_PERFCOUNTER,
                                       &base_gid, &sub_index);
                if (!block)
                        goto error;
@@ -352,48 +349,41 @@ struct pipe_query *r600_create_batch_query(struct pipe_context *ctx,
        }
 
        /* Compute result bases and CS size per group */
-       query->b.num_cs_dw_begin = pc->num_start_cs_dwords;
        query->b.num_cs_dw_end = pc->num_stop_cs_dwords;
-
-       query->b.num_cs_dw_begin += pc->num_instance_cs_dwords; /* conservative */
        query->b.num_cs_dw_end += pc->num_instance_cs_dwords;
 
        i = 0;
        for (group = query->groups; group; group = group->next) {
-               struct r600_perfcounter_block *block = group->block;
-               unsigned select_dw, read_dw;
+               struct si_perfcounter_block *block = group->block;
+               unsigned read_dw;
                unsigned instances = 1;
 
-               if ((block->flags & R600_PC_BLOCK_SE) && group->se < 0)
-                       instances = rctx->screen->info.max_se;
+               if ((block->flags & SI_PC_BLOCK_SE) && group->se < 0)
+                       instances = screen->info.max_se;
                if (group->instance < 0)
                        instances *= block->num_instances;
 
                group->result_base = i;
-               query->b.result_size += 4 * instances * group->num_counters;
+               query->b.result_size += sizeof(uint64_t) * instances * group->num_counters;
                i += instances * group->num_counters;
 
-               pc->get_size(block, group->num_counters, group->selectors,
-                            &select_dw, &read_dw);
-               query->b.num_cs_dw_begin += select_dw;
+               read_dw = 6 * group->num_counters;
                query->b.num_cs_dw_end += instances * read_dw;
-               query->b.num_cs_dw_begin += pc->num_instance_cs_dwords; /* conservative */
                query->b.num_cs_dw_end += instances * pc->num_instance_cs_dwords;
        }
 
        if (query->shaders) {
-               if ((query->shaders & R600_PC_SHADER_ALL) == 0)
-                       query->shaders |= R600_PC_SHADER_ALL;
-               query->b.num_cs_dw_begin += pc->num_shaders_cs_dwords;
+               if (query->shaders == SI_PC_SHADERS_WINDOWING)
+                       query->shaders = 0xffffffff;
        }
 
        /* Map user-supplied query array to result indices */
        query->counters = CALLOC(num_queries, sizeof(*query->counters));
        for (i = 0; i < num_queries; ++i) {
-               struct r600_pc_counter *counter = &query->counters[i];
-               struct r600_perfcounter_block *block;
+               struct si_pc_counter *counter = &query->counters[i];
+               struct si_perfcounter_block *block;
 
-               block = lookup_counter(pc, query_types[i] - R600_QUERY_FIRST_PERFCOUNTER,
+               block = lookup_counter(pc, query_types[i] - SI_QUERY_FIRST_PERFCOUNTER,
                                       &base_gid, &sub_index);
 
                sub_gid = sub_index / block->num_selectors;
@@ -410,29 +400,113 @@ struct pipe_query *r600_create_batch_query(struct pipe_context *ctx,
                counter->base = group->result_base + j;
                counter->stride = group->num_counters;
 
-               counter->dwords = 1;
-               if ((block->flags & R600_PC_BLOCK_SE) && group->se < 0)
-                       counter->dwords = screen->info.max_se;
+               counter->qwords = 1;
+               if ((block->flags & SI_PC_BLOCK_SE) && group->se < 0)
+                       counter->qwords = screen->info.max_se;
                if (group->instance < 0)
-                       counter->dwords *= block->num_instances;
+                       counter->qwords *= block->num_instances;
        }
 
-       if (!r600_query_hw_init(rctx, &query->b))
+       if (!si_query_hw_init(screen, &query->b))
                goto error;
 
        return (struct pipe_query *)query;
 
 error:
-       r600_pc_query_destroy(rctx, &query->b.b);
+       si_pc_query_destroy(screen, &query->b.b);
        return NULL;
 }
 
-int r600_get_perfcounter_info(struct r600_common_screen *screen,
-                             unsigned index,
-                             struct pipe_driver_query_info *info)
+static bool si_init_block_names(struct si_screen *screen,
+                               struct si_perfcounter_block *block)
 {
-       struct r600_perfcounters *pc = screen->perfcounters;
-       struct r600_perfcounter_block *block;
+       unsigned i, j, k;
+       unsigned groups_shader = 1, groups_se = 1, groups_instance = 1;
+       unsigned namelen;
+       char *groupname;
+       char *p;
+
+       if (block->flags & SI_PC_BLOCK_INSTANCE_GROUPS)
+               groups_instance = block->num_instances;
+       if (block->flags & SI_PC_BLOCK_SE_GROUPS)
+               groups_se = screen->info.max_se;
+       if (block->flags & SI_PC_BLOCK_SHADER)
+               groups_shader = screen->perfcounters->num_shader_types;
+
+       namelen = strlen(block->basename);
+       block->group_name_stride = namelen + 1;
+       if (block->flags & SI_PC_BLOCK_SHADER)
+               block->group_name_stride += 3;
+       if (block->flags & SI_PC_BLOCK_SE_GROUPS) {
+               assert(groups_se <= 10);
+               block->group_name_stride += 1;
+
+               if (block->flags & SI_PC_BLOCK_INSTANCE_GROUPS)
+                       block->group_name_stride += 1;
+       }
+       if (block->flags & SI_PC_BLOCK_INSTANCE_GROUPS) {
+               assert(groups_instance <= 100);
+               block->group_name_stride += 2;
+       }
+
+       block->group_names = MALLOC(block->num_groups * block->group_name_stride);
+       if (!block->group_names)
+               return false;
+
+       groupname = block->group_names;
+       for (i = 0; i < groups_shader; ++i) {
+               const char *shader_suffix = screen->perfcounters->shader_type_suffixes[i];
+               unsigned shaderlen = strlen(shader_suffix);
+               for (j = 0; j < groups_se; ++j) {
+                       for (k = 0; k < groups_instance; ++k) {
+                               strcpy(groupname, block->basename);
+                               p = groupname + namelen;
+
+                               if (block->flags & SI_PC_BLOCK_SHADER) {
+                                       strcpy(p, shader_suffix);
+                                       p += shaderlen;
+                               }
+
+                               if (block->flags & SI_PC_BLOCK_SE_GROUPS) {
+                                       p += sprintf(p, "%d", j);
+                                       if (block->flags & SI_PC_BLOCK_INSTANCE_GROUPS)
+                                               *p++ = '_';
+                               }
+
+                               if (block->flags & SI_PC_BLOCK_INSTANCE_GROUPS)
+                                       p += sprintf(p, "%d", k);
+
+                               groupname += block->group_name_stride;
+                       }
+               }
+       }
+
+       assert(block->num_selectors <= 1000);
+       block->selector_name_stride = block->group_name_stride + 4;
+       block->selector_names = MALLOC(block->num_groups * block->num_selectors *
+                                      block->selector_name_stride);
+       if (!block->selector_names)
+               return false;
+
+       groupname = block->group_names;
+       p = block->selector_names;
+       for (i = 0; i < block->num_groups; ++i) {
+               for (j = 0; j < block->num_selectors; ++j) {
+                       sprintf(p, "%s_%03d", groupname, j);
+                       p += block->selector_name_stride;
+               }
+               groupname += block->group_name_stride;
+       }
+
+       return true;
+}
+
+int si_get_perfcounter_info(struct si_screen *screen,
+                           unsigned index,
+                           struct pipe_driver_query_info *info)
+{
+       struct si_perfcounters *pc = screen->perfcounters;
+       struct si_perfcounter_block *block;
        unsigned base_gid, sub;
 
        if (!pc)
@@ -453,22 +527,28 @@ int r600_get_perfcounter_info(struct r600_common_screen *screen,
        if (!block)
                return 0;
 
+       if (!block->selector_names) {
+               if (!si_init_block_names(screen, block))
+                       return 0;
+       }
        info->name = block->selector_names + sub * block->selector_name_stride;
-       info->query_type = R600_QUERY_FIRST_PERFCOUNTER + index;
+       info->query_type = SI_QUERY_FIRST_PERFCOUNTER + index;
        info->max_value.u64 = 0;
-       info->type = PIPE_DRIVER_QUERY_TYPE_UINT;
-       info->result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE;
+       info->type = PIPE_DRIVER_QUERY_TYPE_UINT64;
+       info->result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE;
        info->group_id = base_gid + sub / block->num_selectors;
        info->flags = PIPE_DRIVER_QUERY_FLAG_BATCH;
+       if (sub > 0 && sub + 1 < block->num_selectors * block->num_groups)
+               info->flags |= PIPE_DRIVER_QUERY_FLAG_DONT_LIST;
        return 1;
 }
 
-int r600_get_perfcounter_group_info(struct r600_common_screen *screen,
-                                   unsigned index,
-                                   struct pipe_driver_query_group_info *info)
+int si_get_perfcounter_group_info(struct si_screen *screen,
+                                 unsigned index,
+                                 struct pipe_driver_query_group_info *info)
 {
-       struct r600_perfcounters *pc = screen->perfcounters;
-       struct r600_perfcounter_block *block;
+       struct si_perfcounters *pc = screen->perfcounters;
+       struct si_perfcounter_block *block;
 
        if (!pc)
                return 0;
@@ -479,45 +559,45 @@ int r600_get_perfcounter_group_info(struct r600_common_screen *screen,
        block = lookup_group(pc, &index);
        if (!block)
                return 0;
+
+       if (!block->group_names) {
+               if (!si_init_block_names(screen, block))
+                       return 0;
+       }
        info->name = block->group_names + index * block->group_name_stride;
        info->num_queries = block->num_selectors;
        info->max_active_queries = block->num_counters;
        return 1;
 }
 
-void r600_perfcounters_destroy(struct r600_common_screen *rscreen)
+void si_perfcounters_destroy(struct si_screen *sscreen)
 {
-       if (rscreen->perfcounters)
-               rscreen->perfcounters->cleanup(rscreen);
+       if (sscreen->perfcounters)
+               sscreen->perfcounters->cleanup(sscreen);
 }
 
-boolean r600_perfcounters_init(struct r600_perfcounters *pc,
-                              unsigned num_blocks)
+bool si_perfcounters_init(struct si_perfcounters *pc,
+                           unsigned num_blocks)
 {
-       pc->blocks = CALLOC(num_blocks, sizeof(struct r600_perfcounter_block));
+       pc->blocks = CALLOC(num_blocks, sizeof(struct si_perfcounter_block));
        if (!pc->blocks)
-               return FALSE;
+               return false;
 
-       pc->separate_se = debug_get_bool_option("RADEON_PC_SEPARATE_SE", FALSE);
-       pc->separate_instance = debug_get_bool_option("RADEON_PC_SEPARATE_INSTANCE", FALSE);
+       pc->separate_se = debug_get_bool_option("RADEON_PC_SEPARATE_SE", false);
+       pc->separate_instance = debug_get_bool_option("RADEON_PC_SEPARATE_INSTANCE", false);
 
-       return TRUE;
+       return true;
 }
 
-boolean r600_perfcounters_add_block(struct r600_common_screen *rscreen,
-                                   struct r600_perfcounters *pc,
-                                   const char *name, unsigned flags,
-                                   unsigned counters, unsigned selectors,
-                                   unsigned instances, void *data)
+void si_perfcounters_add_block(struct si_screen *sscreen,
+                              struct si_perfcounters *pc,
+                              const char *name, unsigned flags,
+                              unsigned counters, unsigned selectors,
+                              unsigned instances, void *data)
 {
-       struct r600_perfcounter_block *block = &pc->blocks[pc->num_blocks];
-       unsigned i, j, k;
-       unsigned groups_shader = 1, groups_se = 1, groups_instance = 1;
-       unsigned namelen;
-       char *groupname;
-       char *p;
+       struct si_perfcounter_block *block = &pc->blocks[pc->num_blocks];
 
-       assert(counters <= R600_QUERY_MAX_COUNTERS);
+       assert(counters <= SI_QUERY_MAX_COUNTERS);
 
        block->basename = name;
        block->flags = flags;
@@ -526,104 +606,27 @@ boolean r600_perfcounters_add_block(struct r600_common_screen *rscreen,
        block->num_instances = MAX2(instances, 1);
        block->data = data;
 
-       if (pc->separate_se && (block->flags & R600_PC_BLOCK_SE))
-               block->flags |= R600_PC_BLOCK_SE_GROUPS;
+       if (pc->separate_se && (block->flags & SI_PC_BLOCK_SE))
+               block->flags |= SI_PC_BLOCK_SE_GROUPS;
        if (pc->separate_instance && block->num_instances > 1)
-               block->flags |= R600_PC_BLOCK_INSTANCE_GROUPS;
+               block->flags |= SI_PC_BLOCK_INSTANCE_GROUPS;
 
-       if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS) {
-               groups_instance = block->num_instances;
-               block->num_groups = groups_instance;
+       if (block->flags & SI_PC_BLOCK_INSTANCE_GROUPS) {
+               block->num_groups = block->num_instances;
        } else {
                block->num_groups = 1;
        }
 
-       if (block->flags & R600_PC_BLOCK_SE_GROUPS) {
-               groups_se = rscreen->info.max_se;
-               block->num_groups *= groups_se;
-       }
-
-       if (block->flags & R600_PC_BLOCK_SHADER) {
-               groups_shader = ARRAY_SIZE(r600_pc_shader_suffix);
-               block->num_groups *= groups_shader;
-       }
-
-       namelen = strlen(name);
-       block->group_name_stride = namelen + 1;
-       if (block->flags & R600_PC_BLOCK_SHADER)
-               block->group_name_stride += 3;
-       if (block->flags & R600_PC_BLOCK_SE_GROUPS) {
-               assert(groups_se <= 10);
-               block->group_name_stride += 1;
-
-               if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS)
-                       block->group_name_stride += 1;
-       }
-       if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS) {
-               assert(groups_instance <= 100);
-               block->group_name_stride += 2;
-       }
-
-       block->group_names = MALLOC(block->num_groups * block->group_name_stride);
-       if (!block->group_names)
-               goto error;
-
-       groupname = block->group_names;
-       for (i = 0; i < groups_shader; ++i) {
-               unsigned shaderlen = strlen(r600_pc_shader_suffix[i]);
-               for (j = 0; j < groups_se; ++j) {
-                       for (k = 0; k < groups_instance; ++k) {
-                               strcpy(groupname, name);
-                               p = groupname + namelen;
-
-                               if (block->flags & R600_PC_BLOCK_SHADER) {
-                                       strcpy(p, r600_pc_shader_suffix[i]);
-                                       p += shaderlen;
-                               }
-
-                               if (block->flags & R600_PC_BLOCK_SE_GROUPS) {
-                                       p += sprintf(p, "%d", j);
-                                       if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS)
-                                               *p++ = '_';
-                               }
-
-                               if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS)
-                                       p += sprintf(p, "%d", k);
-
-                               groupname += block->group_name_stride;
-                       }
-               }
-       }
-
-       assert(selectors <= 1000);
-       block->selector_name_stride = block->group_name_stride + 4;
-       block->selector_names = MALLOC(block->num_groups * selectors *
-                                      block->selector_name_stride);
-       if (!block->selector_names)
-               goto error_groupnames;
-
-       groupname = block->group_names;
-       p = block->selector_names;
-       for (i = 0; i < block->num_groups; ++i) {
-               for (j = 0; j < selectors; ++j) {
-                       sprintf(p, "%s_%03d", groupname, j);
-                       p += block->selector_name_stride;
-               }
-               groupname += block->group_name_stride;
-       }
+       if (block->flags & SI_PC_BLOCK_SE_GROUPS)
+               block->num_groups *= sscreen->info.max_se;
+       if (block->flags & SI_PC_BLOCK_SHADER)
+               block->num_groups *= pc->num_shader_types;
 
        ++pc->num_blocks;
        pc->num_groups += block->num_groups;
-
-       return TRUE;
-
-error_groupnames:
-       FREE(block->group_names);
-error:
-       return FALSE;
 }
 
-void r600_perfcounters_do_destroy(struct r600_perfcounters *pc)
+void si_perfcounters_do_destroy(struct si_perfcounters *pc)
 {
        unsigned i;