X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fvc4%2Fvc4_query.c;h=6e4681e93ccbd30cb7ff4a719cb574018688d57b;hb=ad1a4cb563f483a5767431adcc6a1a8f973326fd;hp=17400a37ca3884d3816c6f521ca8dafcf70b3ffc;hpb=12f88ba32a14ea79134f4e995a55149f078a2f27;p=mesa.git diff --git a/src/gallium/drivers/vc4/vc4_query.c b/src/gallium/drivers/vc4/vc4_query.c index 17400a37ca3..6e4681e93cc 100644 --- a/src/gallium/drivers/vc4/vc4_query.c +++ b/src/gallium/drivers/vc4/vc4_query.c @@ -22,8 +22,9 @@ */ /** - * Stub support for occlusion queries. + * Expose V3D HW perf counters. * + * We also have code to fake support for occlusion queries. * Since we expose support for GL 2.0, we have to expose occlusion queries, * but the spec allows you to expose 0 query counter bits, so we just return 0 * as the result of all our queries. @@ -32,48 +33,252 @@ struct vc4_query { - uint8_t pad; + unsigned num_queries; + struct vc4_hwperfmon *hwperfmon; }; +static const char *v3d_counter_names[] = { + "FEP-valid-primitives-no-rendered-pixels", + "FEP-valid-primitives-rendered-pixels", + "FEP-clipped-quads", + "FEP-valid-quads", + "TLB-quads-not-passing-stencil-test", + "TLB-quads-not-passing-z-and-stencil-test", + "TLB-quads-passing-z-and-stencil-test", + "TLB-quads-with-zero-coverage", + "TLB-quads-with-non-zero-coverage", + "TLB-quads-written-to-color-buffer", + "PTB-primitives-discarded-outside-viewport", + "PTB-primitives-need-clipping", + "PTB-primitives-discared-reversed", + "QPU-total-idle-clk-cycles", + "QPU-total-clk-cycles-vertex-coord-shading", + "QPU-total-clk-cycles-fragment-shading", + "QPU-total-clk-cycles-executing-valid-instr", + "QPU-total-clk-cycles-waiting-TMU", + "QPU-total-clk-cycles-waiting-scoreboard", + "QPU-total-clk-cycles-waiting-varyings", + "QPU-total-instr-cache-hit", + "QPU-total-instr-cache-miss", + "QPU-total-uniform-cache-hit", + "QPU-total-uniform-cache-miss", + "TMU-total-text-quads-processed", + "TMU-total-text-cache-miss", + "VPM-total-clk-cycles-VDW-stalled", + "VPM-total-clk-cycles-VCD-stalled", + "L2C-total-cache-hit", + "L2C-total-cache-miss", +}; + +int vc4_get_driver_query_group_info(struct pipe_screen *pscreen, + unsigned index, + struct pipe_driver_query_group_info *info) +{ + struct vc4_screen *screen = vc4_screen(pscreen); + + if (!screen->has_perfmon_ioctl) + return 0; + + if (!info) + return 1; + + if (index > 0) + return 0; + + info->name = "V3D counters"; + info->max_active_queries = DRM_VC4_MAX_PERF_COUNTERS; + info->num_queries = ARRAY_SIZE(v3d_counter_names); + return 1; +} + +int vc4_get_driver_query_info(struct pipe_screen *pscreen, unsigned index, + struct pipe_driver_query_info *info) +{ + struct vc4_screen *screen = vc4_screen(pscreen); + + if (!screen->has_perfmon_ioctl) + return 0; + + if (!info) + return ARRAY_SIZE(v3d_counter_names); + + if (index >= ARRAY_SIZE(v3d_counter_names)) + return 0; + + info->group_id = 0; + info->name = v3d_counter_names[index]; + info->query_type = PIPE_QUERY_DRIVER_SPECIFIC + index; + info->result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE; + info->type = PIPE_DRIVER_QUERY_TYPE_UINT64; + info->flags = PIPE_DRIVER_QUERY_FLAG_BATCH; + return 1; +} + static struct pipe_query * -vc4_create_query(struct pipe_context *ctx, unsigned query_type, unsigned index) +vc4_create_batch_query(struct pipe_context *pctx, unsigned num_queries, + unsigned *query_types) { struct vc4_query *query = calloc(1, sizeof(*query)); + struct vc4_hwperfmon *hwperfmon; + unsigned i, nhwqueries = 0; + + if (!query) + return NULL; + + for (i = 0; i < num_queries; i++) { + if (query_types[i] >= PIPE_QUERY_DRIVER_SPECIFIC) + nhwqueries++; + } + + /* We can't mix HW and non-HW queries. */ + if (nhwqueries && nhwqueries != num_queries) + return NULL; + + if (!nhwqueries) + return (struct pipe_query *)query; + + hwperfmon = calloc(1, sizeof(*hwperfmon)); + if (!hwperfmon) + goto err_free_query; + + for (i = 0; i < num_queries; i++) + hwperfmon->events[i] = query_types[i] - + PIPE_QUERY_DRIVER_SPECIFIC; + + query->hwperfmon = hwperfmon; + query->num_queries = num_queries; /* Note that struct pipe_query isn't actually defined anywhere. */ return (struct pipe_query *)query; + +err_free_query: + free(query); + + return NULL; +} + +static struct pipe_query * +vc4_create_query(struct pipe_context *ctx, unsigned query_type, unsigned index) +{ + return vc4_create_batch_query(ctx, 1, &query_type); } static void -vc4_destroy_query(struct pipe_context *ctx, struct pipe_query *query) +vc4_destroy_query(struct pipe_context *pctx, struct pipe_query *pquery) { + struct vc4_context *ctx = vc4_context(pctx); + struct vc4_query *query = (struct vc4_query *)pquery; + + if (query->hwperfmon && query->hwperfmon->id) { + if (query->hwperfmon->id) { + struct drm_vc4_perfmon_destroy req = { }; + + req.id = query->hwperfmon->id; + vc4_ioctl(ctx->fd, DRM_IOCTL_VC4_PERFMON_DESTROY, + &req); + } + + free(query->hwperfmon); + } + free(query); } static boolean -vc4_begin_query(struct pipe_context *ctx, struct pipe_query *query) +vc4_begin_query(struct pipe_context *pctx, struct pipe_query *pquery) { + struct vc4_query *query = (struct vc4_query *)pquery; + struct vc4_context *ctx = vc4_context(pctx); + struct drm_vc4_perfmon_create req = { }; + unsigned i; + int ret; + + if (!query->hwperfmon) + return true; + + /* Only one perfmon can be activated per context. */ + if (ctx->perfmon) + return false; + + /* Reset the counters by destroying the previously allocated perfmon */ + if (query->hwperfmon->id) { + struct drm_vc4_perfmon_destroy destroyreq = { }; + + destroyreq.id = query->hwperfmon->id; + vc4_ioctl(ctx->fd, DRM_IOCTL_VC4_PERFMON_DESTROY, &destroyreq); + } + + for (i = 0; i < query->num_queries; i++) + req.events[i] = query->hwperfmon->events[i]; + + req.ncounters = query->num_queries; + ret = vc4_ioctl(ctx->fd, DRM_IOCTL_VC4_PERFMON_CREATE, &req); + if (ret) + return false; + + query->hwperfmon->id = req.id; + + /* Make sure all pendings jobs are flushed before activating the + * perfmon. + */ + vc4_flush(pctx); + ctx->perfmon = query->hwperfmon; return true; } -static void -vc4_end_query(struct pipe_context *ctx, struct pipe_query *query) +static bool +vc4_end_query(struct pipe_context *pctx, struct pipe_query *pquery) { + struct vc4_query *query = (struct vc4_query *)pquery; + struct vc4_context *ctx = vc4_context(pctx); + + if (!query->hwperfmon) + return true; + + if (ctx->perfmon != query->hwperfmon) + return false; + + /* Make sure all pendings jobs are flushed before deactivating the + * perfmon. + */ + vc4_flush(pctx); + ctx->perfmon = NULL; + return true; } static boolean -vc4_get_query_result(struct pipe_context *ctx, struct pipe_query *query, +vc4_get_query_result(struct pipe_context *pctx, struct pipe_query *pquery, boolean wait, union pipe_query_result *vresult) { - uint64_t *result = &vresult->u64; + struct vc4_context *ctx = vc4_context(pctx); + struct vc4_query *query = (struct vc4_query *)pquery; + struct drm_vc4_perfmon_get_values req; + unsigned i; + int ret; + + if (!query->hwperfmon) { + vresult->u64 = 0; + return true; + } - *result = 0; + if (!vc4_wait_seqno(ctx->screen, query->hwperfmon->last_seqno, + wait ? PIPE_TIMEOUT_INFINITE : 0, "perfmon")) + return false; + + req.id = query->hwperfmon->id; + req.values_ptr = (uintptr_t)query->hwperfmon->counters; + ret = vc4_ioctl(ctx->fd, DRM_IOCTL_VC4_PERFMON_GET_VALUES, &req); + if (ret) + return false; + + for (i = 0; i < query->num_queries; i++) + vresult->batch[i].u64 = query->hwperfmon->counters[i]; return true; } static void -vc4_set_active_query_state(struct pipe_context *pipe, boolean enable) +vc4_set_active_query_state(struct pipe_context *pctx, boolean enable) { } @@ -81,10 +286,10 @@ void vc4_query_init(struct pipe_context *pctx) { pctx->create_query = vc4_create_query; + pctx->create_batch_query = vc4_create_batch_query; pctx->destroy_query = vc4_destroy_query; pctx->begin_query = vc4_begin_query; pctx->end_query = vc4_end_query; pctx->get_query_result = vc4_get_query_result; - pctx->set_active_query_state = vc4_set_active_query_state; + pctx->set_active_query_state = vc4_set_active_query_state; } -