X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;ds=sidebyside;f=src%2Fgallium%2Fdrivers%2Fradeonsi%2Fsi_perfcounter.c;h=d073b5821fbfb3414390e4724985377cf9ea2048;hb=50fde5b20801a82bd95f14301707c32cf5df38a2;hp=949724ca7205f7b1075cbc6dbd3ca92e0422f089;hpb=501ff90a954f5a3b9fee1449ec96fbc9bd620f55;p=mesa.git diff --git a/src/gallium/drivers/radeonsi/si_perfcounter.c b/src/gallium/drivers/radeonsi/si_perfcounter.c index 949724ca720..d073b5821fb 100644 --- a/src/gallium/drivers/radeonsi/si_perfcounter.c +++ b/src/gallium/drivers/radeonsi/si_perfcounter.c @@ -126,7 +126,7 @@ static const unsigned si_pc_shader_type_bits[] = { /* Max counters per HW block */ #define SI_QUERY_MAX_COUNTERS 16 -#define SI_PC_SHADERS_WINDOWING (1 << 31) +#define SI_PC_SHADERS_WINDOWING (1u << 31) struct si_query_group { struct si_query_group *next; @@ -671,16 +671,9 @@ static void si_pc_emit_start(struct si_context *sctx, { struct radeon_cmdbuf *cs = sctx->gfx_cs; - radeon_add_to_buffer_list(sctx, sctx->gfx_cs, buffer, - RADEON_USAGE_WRITE, RADEON_PRIO_QUERY); - - radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); - radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | - COPY_DATA_DST_SEL(COPY_DATA_DST_MEM_GRBM)); - radeon_emit(cs, 1); /* immediate */ - radeon_emit(cs, 0); /* unused */ - radeon_emit(cs, va); - radeon_emit(cs, va >> 32); + si_cp_copy_data(sctx, sctx->gfx_cs, + COPY_DATA_DST_MEM, buffer, va - buffer->gpu_address, + COPY_DATA_IMM, NULL, 1); radeon_set_uconfig_reg(cs, R_036020_CP_PERFMON_CNTL, S_036020_PERFMON_STATE(V_036020_DISABLE_AND_RESET)); @@ -697,7 +690,7 @@ static void si_pc_emit_stop(struct si_context *sctx, { struct radeon_cmdbuf *cs = sctx->gfx_cs; - si_cp_release_mem(sctx, V_028A90_BOTTOM_OF_PIPE_TS, 0, + si_cp_release_mem(sctx, cs, V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM, EOP_INT_SEL_NONE, EOP_DATA_SEL_VALUE_32BIT, buffer, va, 0, SI_NOT_QUERY); @@ -732,7 +725,7 @@ static void si_pc_emit_read(struct si_context *sctx, radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_PERF) | - COPY_DATA_DST_SEL(COPY_DATA_DST_MEM_GRBM) | + COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | COPY_DATA_COUNT_SEL); /* 64 bits */ radeon_emit(cs, reg >> 2); radeon_emit(cs, 0); /* unused */ @@ -745,7 +738,7 @@ static void si_pc_emit_read(struct si_context *sctx, for (idx = 0; idx < count; ++idx) { radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | - COPY_DATA_DST_SEL(COPY_DATA_DST_MEM_GRBM) | + COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | COPY_DATA_COUNT_SEL); radeon_emit(cs, 0); /* immediate */ radeon_emit(cs, 0); @@ -756,10 +749,10 @@ static void si_pc_emit_read(struct si_context *sctx, } } -static void si_pc_query_destroy(struct si_screen *sscreen, - struct si_query *rquery) +static void si_pc_query_destroy(struct si_context *sctx, + struct si_query *squery) { - struct si_query_pc *query = (struct si_query_pc *)rquery; + struct si_query_pc *query = (struct si_query_pc *)squery; while (query->groups) { struct si_query_group *group = query->groups; @@ -769,16 +762,16 @@ static void si_pc_query_destroy(struct si_screen *sscreen, FREE(query->counters); - si_query_buffer_destroy(sscreen, &query->buffer); + si_query_buffer_destroy(sctx->screen, &query->buffer); FREE(query); } -static void si_pc_query_resume(struct si_context *sctx, struct si_query *rquery) +static void si_pc_query_resume(struct si_context *sctx, struct si_query *squery) /* struct si_query_hw *hwquery, struct si_resource *buffer, uint64_t va)*/ { - struct si_query_pc *query = (struct si_query_pc *)rquery; + struct si_query_pc *query = (struct si_query_pc *)squery; int current_se = -1; int current_instance = -1; @@ -808,9 +801,9 @@ static void si_pc_query_resume(struct si_context *sctx, struct si_query *rquery) si_pc_emit_start(sctx, query->buffer.buf, va); } -static void si_pc_query_suspend(struct si_context *sctx, struct si_query *rquery) +static void si_pc_query_suspend(struct si_context *sctx, struct si_query *squery) { - struct si_query_pc *query = (struct si_query_pc *)rquery; + struct si_query_pc *query = (struct si_query_pc *)squery; if (!query->buffer.buf) return; @@ -842,28 +835,28 @@ static void si_pc_query_suspend(struct si_context *sctx, struct si_query *rquery si_pc_emit_instance(sctx, -1, -1); } -static bool si_pc_query_begin(struct si_context *ctx, struct si_query *rquery) +static bool si_pc_query_begin(struct si_context *ctx, struct si_query *squery) { - struct si_query_pc *query = (struct si_query_pc *)rquery; + struct si_query_pc *query = (struct si_query_pc *)squery; si_query_buffer_reset(ctx, &query->buffer); LIST_ADDTAIL(&query->b.active_list, &ctx->active_queries); ctx->num_cs_dw_queries_suspend += query->b.num_cs_dw_suspend; - si_pc_query_resume(ctx, rquery); + si_pc_query_resume(ctx, squery); return true; } -static bool si_pc_query_end(struct si_context *ctx, struct si_query *rquery) +static bool si_pc_query_end(struct si_context *ctx, struct si_query *squery) { - struct si_query_pc *query = (struct si_query_pc *)rquery; + struct si_query_pc *query = (struct si_query_pc *)squery; - si_pc_query_suspend(ctx, rquery); + si_pc_query_suspend(ctx, squery); - LIST_DEL(&rquery->active_list); - ctx->num_cs_dw_queries_suspend -= rquery->num_cs_dw_suspend; + LIST_DEL(&squery->active_list); + ctx->num_cs_dw_queries_suspend -= squery->num_cs_dw_suspend; return query->buffer.buf != NULL; } @@ -885,10 +878,10 @@ static void si_pc_query_add_result(struct si_query_pc *query, } } -static bool si_pc_query_get_result(struct si_context *sctx, struct si_query *rquery, +static bool si_pc_query_get_result(struct si_context *sctx, struct si_query *squery, bool wait, union pipe_query_result *result) { - struct si_query_pc *query = (struct si_query_pc *)rquery; + struct si_query_pc *query = (struct si_query_pc *)squery; memset(result, 0, sizeof(result->batch[0]) * query->num_counters); @@ -898,7 +891,7 @@ static bool si_pc_query_get_result(struct si_context *sctx, struct si_query *rqu unsigned results_base = 0; void *map; - if (rquery->b.flushed) + if (squery->b.flushed) map = sctx->ws->buffer_map(qbuf->buf->buf, NULL, usage); else map = si_buffer_map_sync_with_rings(sctx, qbuf->buf, usage); @@ -1107,7 +1100,7 @@ struct pipe_query *si_create_batch_query(struct pipe_context *ctx, return (struct pipe_query *)query; error: - si_pc_query_destroy(screen, &query->b); + si_pc_query_destroy((struct si_context *)ctx, &query->b); return NULL; } @@ -1291,11 +1284,11 @@ void si_init_perfcounters(struct si_screen *screen) unsigned i; switch (screen->info.chip_class) { - case CIK: + case GFX7: blocks = groups_CIK; num_blocks = ARRAY_SIZE(groups_CIK); break; - case VI: + case GFX8: blocks = groups_VI; num_blocks = ARRAY_SIZE(groups_VI); break; @@ -1303,13 +1296,13 @@ void si_init_perfcounters(struct si_screen *screen) blocks = groups_gfx9; num_blocks = ARRAY_SIZE(groups_gfx9); break; - case SI: + case GFX6: default: return; /* not implemented */ } if (screen->info.max_sh_per_se != 1) { - /* This should not happen on non-SI chips. */ + /* This should not happen on non-GFX6 chips. */ fprintf(stderr, "si_init_perfcounters: max_sh_per_se = %d not " "supported (inaccurate performance counters)\n", screen->info.max_sh_per_se); @@ -1333,7 +1326,7 @@ void si_init_perfcounters(struct si_screen *screen) for (i = 0; i < num_blocks; ++i) { struct si_pc_block *block = &pc->blocks[i]; block->b = &blocks[i]; - block->num_instances = block->b->instances; + block->num_instances = MAX2(1, block->b->instances); if (!strcmp(block->b->b->name, "CB") || !strcmp(block->b->b->name, "DB"))