winsys/radeon: use the common job queue for multithreaded command submission v2
[mesa.git] / src / gallium / drivers / radeon / r600_perfcounter.c
index a835aee993bd84640000709efe9f3f63ca39845a..af9a692d15061295627a4fe67259176dc3142e30 100644 (file)
 /* Max counters per HW block */
 #define R600_QUERY_MAX_COUNTERS 16
 
-static const char * const r600_pc_shader_suffix[] = {
-       "", "_PS", "_VS", "_GS", "_ES", "_HS", "_LS", "_CS"
-};
-
 static struct r600_perfcounter_block *
 lookup_counter(struct r600_perfcounters *pc, unsigned index,
               unsigned *base_gid, unsigned *sub_index)
@@ -88,10 +84,12 @@ struct r600_pc_group {
 
 struct r600_pc_counter {
        unsigned base;
-       unsigned dwords;
-       unsigned stride;
+       unsigned qwords;
+       unsigned stride; /* in uint64s */
 };
 
+#define R600_PC_SHADERS_WINDOWING (1 << 31)
+
 struct r600_query_pc {
        struct r600_query_hw b;
 
@@ -174,7 +172,7 @@ static void r600_pc_query_emit_stop(struct r600_common_context *ctx,
                                pc->emit_read(ctx, block,
                                              group->num_counters, group->selectors,
                                              buffer, va);
-                               va += 4 * group->num_counters;
+                               va += sizeof(uint64_t) * group->num_counters;
                        } while (group->instance < 0 && ++instance < block->num_instances);
                } while (++se < se_end);
        }
@@ -196,18 +194,15 @@ static void r600_pc_query_add_result(struct r600_common_context *ctx,
                                     union pipe_query_result *result)
 {
        struct r600_query_pc *query = (struct r600_query_pc *)hwquery;
-       uint32_t *results = buffer;
+       uint64_t *results = buffer;
        unsigned i, j;
 
        for (i = 0; i < query->num_counters; ++i) {
                struct r600_pc_counter *counter = &query->counters[i];
 
-               if (counter->base == ~0)
-                       continue;
-
-               for (j = 0; j < counter->dwords; ++j) {
+               for (j = 0; j < counter->qwords; ++j) {
                        uint32_t value = results[counter->base + j * counter->stride];
-                       result->batch[i].u32 += value;
+                       result->batch[i].u64 += value;
                }
        }
 }
@@ -249,32 +244,29 @@ static struct r600_pc_group *get_group_state(struct r600_common_screen *screen,
        if (block->flags & R600_PC_BLOCK_SHADER) {
                unsigned sub_gids = block->num_instances;
                unsigned shader_id;
-               unsigned shader_mask;
-               unsigned query_shader_mask;
+               unsigned shaders;
+               unsigned query_shaders;
 
                if (block->flags & R600_PC_BLOCK_SE_GROUPS)
                        sub_gids = sub_gids * screen->info.max_se;
                shader_id = sub_gid / sub_gids;
                sub_gid = sub_gid % sub_gids;
 
-               if (shader_id == 0)
-                       shader_mask = R600_PC_SHADER_ALL;
-               else
-                       shader_mask = 1 << (shader_id - 1);
+               shaders = screen->perfcounters->shader_type_bits[shader_id];
 
-               query_shader_mask = query->shaders & R600_PC_SHADER_ALL;
-               if (query_shader_mask && query_shader_mask != shader_mask) {
+               query_shaders = query->shaders & ~R600_PC_SHADERS_WINDOWING;
+               if (query_shaders && query_shaders != shaders) {
                        fprintf(stderr, "r600_perfcounter: incompatible shader groups\n");
                        FREE(group);
                        return NULL;
                }
-               query->shaders |= shader_mask;
+               query->shaders = shaders;
        }
 
-       if (block->flags & R600_PC_BLOCK_SHADER_WINDOWED) {
+       if (block->flags & R600_PC_BLOCK_SHADER_WINDOWED && !query->shaders) {
                // A non-zero value in query->shaders ensures that the shader
                // masking is reset unless the user explicitly requests one.
-               query->shaders |= R600_PC_SHADER_WINDOWING;
+               query->shaders = R600_PC_SHADERS_WINDOWING;
        }
 
        if (block->flags & R600_PC_BLOCK_SE_GROUPS) {
@@ -318,7 +310,6 @@ struct pipe_query *r600_create_batch_query(struct pipe_context *ctx,
 
        query->b.b.ops = &batch_query_ops;
        query->b.ops = &batch_query_hw_ops;
-       query->b.flags = R600_QUERY_HW_FLAG_TIMER;
 
        query->num_counters = num_queries;
 
@@ -370,7 +361,7 @@ struct pipe_query *r600_create_batch_query(struct pipe_context *ctx,
                        instances *= block->num_instances;
 
                group->result_base = i;
-               query->b.result_size += 4 * instances * group->num_counters;
+               query->b.result_size += sizeof(uint64_t) * instances * group->num_counters;
                i += instances * group->num_counters;
 
                pc->get_size(block, group->num_counters, group->selectors,
@@ -382,8 +373,8 @@ struct pipe_query *r600_create_batch_query(struct pipe_context *ctx,
        }
 
        if (query->shaders) {
-               if ((query->shaders & R600_PC_SHADER_ALL) == 0)
-                       query->shaders |= R600_PC_SHADER_ALL;
+               if (query->shaders == R600_PC_SHADERS_WINDOWING)
+                       query->shaders = 0xffffffff;
                query->b.num_cs_dw_begin += pc->num_shaders_cs_dwords;
        }
 
@@ -410,11 +401,11 @@ struct pipe_query *r600_create_batch_query(struct pipe_context *ctx,
                counter->base = group->result_base + j;
                counter->stride = group->num_counters;
 
-               counter->dwords = 1;
+               counter->qwords = 1;
                if ((block->flags & R600_PC_BLOCK_SE) && group->se < 0)
-                       counter->dwords = screen->info.max_se;
+                       counter->qwords = screen->info.max_se;
                if (group->instance < 0)
-                       counter->dwords *= block->num_instances;
+                       counter->qwords *= block->num_instances;
        }
 
        if (!r600_query_hw_init(rctx, &query->b))
@@ -441,7 +432,7 @@ static boolean r600_init_block_names(struct r600_common_screen *screen,
        if (block->flags & R600_PC_BLOCK_SE_GROUPS)
                groups_se = screen->info.max_se;
        if (block->flags & R600_PC_BLOCK_SHADER)
-               groups_shader = ARRAY_SIZE(r600_pc_shader_suffix);
+               groups_shader = screen->perfcounters->num_shader_types;
 
        namelen = strlen(block->basename);
        block->group_name_stride = namelen + 1;
@@ -465,14 +456,15 @@ static boolean r600_init_block_names(struct r600_common_screen *screen,
 
        groupname = block->group_names;
        for (i = 0; i < groups_shader; ++i) {
-               unsigned shaderlen = strlen(r600_pc_shader_suffix[i]);
+               const char *shader_suffix = screen->perfcounters->shader_type_suffixes[i];
+               unsigned shaderlen = strlen(shader_suffix);
                for (j = 0; j < groups_se; ++j) {
                        for (k = 0; k < groups_instance; ++k) {
                                strcpy(groupname, block->basename);
                                p = groupname + namelen;
 
                                if (block->flags & R600_PC_BLOCK_SHADER) {
-                                       strcpy(p, r600_pc_shader_suffix[i]);
+                                       strcpy(p, shader_suffix);
                                        p += shaderlen;
                                }
 
@@ -543,7 +535,7 @@ int r600_get_perfcounter_info(struct r600_common_screen *screen,
        info->name = block->selector_names + sub * block->selector_name_stride;
        info->query_type = R600_QUERY_FIRST_PERFCOUNTER + index;
        info->max_value.u64 = 0;
-       info->type = PIPE_DRIVER_QUERY_TYPE_UINT;
+       info->type = PIPE_DRIVER_QUERY_TYPE_UINT64;
        info->result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE;
        info->group_id = base_gid + sub / block->num_selectors;
        info->flags = PIPE_DRIVER_QUERY_FLAG_BATCH;
@@ -629,7 +621,7 @@ void r600_perfcounters_add_block(struct r600_common_screen *rscreen,
        if (block->flags & R600_PC_BLOCK_SE_GROUPS)
                block->num_groups *= rscreen->info.max_se;
        if (block->flags & R600_PC_BLOCK_SHADER)
-               block->num_groups *= ARRAY_SIZE(r600_pc_shader_suffix);
+               block->num_groups *= pc->num_shader_types;
 
        ++pc->num_blocks;
        pc->num_groups += block->num_groups;