X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fradeonsi%2Fsi_perfcounter.c;h=9d90ca6e53b3f484ce446425f8e7cb728e40f388;hb=255de06c5990797832678d7af01876a1afca5b50;hp=2da14f8868fcfe132823f3ff8530ab2238b3b851;hpb=4666f36c04826c902836d77022cb0120457e09c5;p=mesa.git diff --git a/src/gallium/drivers/radeonsi/si_perfcounter.c b/src/gallium/drivers/radeonsi/si_perfcounter.c index 2da14f8868f..9d90ca6e53b 100644 --- a/src/gallium/drivers/radeonsi/si_perfcounter.c +++ b/src/gallium/drivers/radeonsi/si_perfcounter.c @@ -126,7 +126,7 @@ static const unsigned si_pc_shader_type_bits[] = { /* Max counters per HW block */ #define SI_QUERY_MAX_COUNTERS 16 -#define SI_PC_SHADERS_WINDOWING (1 << 31) +#define SI_PC_SHADERS_WINDOWING (1u << 31) struct si_query_group { struct si_query_group *next; @@ -671,16 +671,9 @@ static void si_pc_emit_start(struct si_context *sctx, { struct radeon_cmdbuf *cs = sctx->gfx_cs; - radeon_add_to_buffer_list(sctx, sctx->gfx_cs, buffer, - RADEON_USAGE_WRITE, RADEON_PRIO_QUERY); - - radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); - radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | - COPY_DATA_DST_SEL(COPY_DATA_DST_MEM_GRBM)); - radeon_emit(cs, 1); /* immediate */ - radeon_emit(cs, 0); /* unused */ - radeon_emit(cs, va); - radeon_emit(cs, va >> 32); + si_cp_copy_data(sctx, sctx->gfx_cs, + COPY_DATA_DST_MEM, buffer, va - buffer->gpu_address, + COPY_DATA_IMM, NULL, 1); radeon_set_uconfig_reg(cs, R_036020_CP_PERFMON_CNTL, S_036020_PERFMON_STATE(V_036020_DISABLE_AND_RESET)); @@ -697,7 +690,7 @@ static void si_pc_emit_stop(struct si_context *sctx, { struct radeon_cmdbuf *cs = sctx->gfx_cs; - si_cp_release_mem(sctx, V_028A90_BOTTOM_OF_PIPE_TS, 0, + si_cp_release_mem(sctx, cs, V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM, EOP_INT_SEL_NONE, EOP_DATA_SEL_VALUE_32BIT, buffer, va, 0, SI_NOT_QUERY); @@ -732,7 +725,7 @@ static void si_pc_emit_read(struct si_context *sctx, radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_PERF) | - COPY_DATA_DST_SEL(COPY_DATA_DST_MEM_GRBM) | + COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | COPY_DATA_COUNT_SEL); /* 64 bits */ radeon_emit(cs, reg >> 2); radeon_emit(cs, 0); /* unused */ @@ -745,7 +738,7 @@ static void si_pc_emit_read(struct si_context *sctx, for (idx = 0; idx < count; ++idx) { radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | - COPY_DATA_DST_SEL(COPY_DATA_DST_MEM_GRBM) | + COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | COPY_DATA_COUNT_SEL); radeon_emit(cs, 0); /* immediate */ radeon_emit(cs, 0); @@ -756,7 +749,7 @@ static void si_pc_emit_read(struct si_context *sctx, } } -static void si_pc_query_destroy(struct si_screen *sscreen, +static void si_pc_query_destroy(struct si_context *sctx, struct si_query *squery) { struct si_query_pc *query = (struct si_query_pc *)squery; @@ -769,7 +762,7 @@ static void si_pc_query_destroy(struct si_screen *sscreen, FREE(query->counters); - si_query_buffer_destroy(sscreen, &query->buffer); + si_query_buffer_destroy(sctx->screen, &query->buffer); FREE(query); } @@ -848,7 +841,7 @@ static bool si_pc_query_begin(struct si_context *ctx, struct si_query *squery) si_query_buffer_reset(ctx, &query->buffer); - LIST_ADDTAIL(&query->b.active_list, &ctx->active_queries); + list_addtail(&query->b.active_list, &ctx->active_queries); ctx->num_cs_dw_queries_suspend += query->b.num_cs_dw_suspend; si_pc_query_resume(ctx, squery); @@ -1107,7 +1100,7 @@ struct pipe_query *si_create_batch_query(struct pipe_context *ctx, return (struct pipe_query *)query; error: - si_pc_query_destroy(screen, &query->b); + si_pc_query_destroy((struct si_context *)ctx, &query->b); return NULL; } @@ -1291,11 +1284,11 @@ void si_init_perfcounters(struct si_screen *screen) unsigned i; switch (screen->info.chip_class) { - case CIK: + case GFX7: blocks = groups_CIK; num_blocks = ARRAY_SIZE(groups_CIK); break; - case VI: + case GFX8: blocks = groups_VI; num_blocks = ARRAY_SIZE(groups_VI); break; @@ -1303,13 +1296,13 @@ void si_init_perfcounters(struct si_screen *screen) blocks = groups_gfx9; num_blocks = ARRAY_SIZE(groups_gfx9); break; - case SI: + case GFX6: default: return; /* not implemented */ } if (screen->info.max_sh_per_se != 1) { - /* This should not happen on non-SI chips. */ + /* This should not happen on non-GFX6 chips. */ fprintf(stderr, "si_init_perfcounters: max_sh_per_se = %d not " "supported (inaccurate performance counters)\n", screen->info.max_sh_per_se); @@ -1333,7 +1326,7 @@ void si_init_perfcounters(struct si_screen *screen) for (i = 0; i < num_blocks; ++i) { struct si_pc_block *block = &pc->blocks[i]; block->b = &blocks[i]; - block->num_instances = block->b->instances; + block->num_instances = MAX2(1, block->b->instances); if (!strcmp(block->b->b->name, "CB") || !strcmp(block->b->b->name, "DB"))