src/gallium/drivers/vc4/vc4_query.c

   1 /*
   2  * Copyright © 2014 Broadcom
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 /**
  25  * Expose V3D HW perf counters.
  26  *
  27  * We also have code to fake support for occlusion queries.
  28  * Since we expose support for GL 2.0, we have to expose occlusion queries,
  29  * but the spec allows you to expose 0 query counter bits, so we just return 0
  30  * as the result of all our queries.
  31  */
  32 #include "vc4_context.h"
  33
  34 struct vc4_query
  35 {
  36         unsigned num_queries;
  37         struct vc4_hwperfmon *hwperfmon;
  38 };
  39
  40 static const char *v3d_counter_names[] = {
  41         "FEP-valid-primitives-no-rendered-pixels",
  42         "FEP-valid-primitives-rendered-pixels",
  43         "FEP-clipped-quads",
  44         "FEP-valid-quads",
  45         "TLB-quads-not-passing-stencil-test",
  46         "TLB-quads-not-passing-z-and-stencil-test",
  47         "TLB-quads-passing-z-and-stencil-test",
  48         "TLB-quads-with-zero-coverage",
  49         "TLB-quads-with-non-zero-coverage",
  50         "TLB-quads-written-to-color-buffer",
  51         "PTB-primitives-discarded-outside-viewport",
  52         "PTB-primitives-need-clipping",
  53         "PTB-primitives-discared-reversed",
  54         "QPU-total-idle-clk-cycles",
  55         "QPU-total-clk-cycles-vertex-coord-shading",
  56         "QPU-total-clk-cycles-fragment-shading",
  57         "QPU-total-clk-cycles-executing-valid-instr",
  58         "QPU-total-clk-cycles-waiting-TMU",
  59         "QPU-total-clk-cycles-waiting-scoreboard",
  60         "QPU-total-clk-cycles-waiting-varyings",
  61         "QPU-total-instr-cache-hit",
  62         "QPU-total-instr-cache-miss",
  63         "QPU-total-uniform-cache-hit",
  64         "QPU-total-uniform-cache-miss",
  65         "TMU-total-text-quads-processed",
  66         "TMU-total-text-cache-miss",
  67         "VPM-total-clk-cycles-VDW-stalled",
  68         "VPM-total-clk-cycles-VCD-stalled",
  69         "L2C-total-cache-hit",
  70         "L2C-total-cache-miss",
  71 };
  72
  73 int vc4_get_driver_query_group_info(struct pipe_screen *pscreen,
  74                                     unsigned index,
  75                                     struct pipe_driver_query_group_info *info)
  76 {
  77         struct vc4_screen *screen = vc4_screen(pscreen);
  78
  79         if (!screen->has_perfmon_ioctl)
  80                 return 0;
  81
  82         if (!info)
  83                 return 1;
  84
  85         if (index > 0)
  86                 return 0;
  87
  88         info->name = "V3D counters";
  89         info->max_active_queries = DRM_VC4_MAX_PERF_COUNTERS;
  90         info->num_queries = ARRAY_SIZE(v3d_counter_names);
  91         return 1;
  92 }
  93
  94 int vc4_get_driver_query_info(struct pipe_screen *pscreen, unsigned index,
  95                               struct pipe_driver_query_info *info)
  96 {
  97         struct vc4_screen *screen = vc4_screen(pscreen);
  98
  99         if (!screen->has_perfmon_ioctl)
 100                 return 0;
 101
 102         if (!info)
 103                 return ARRAY_SIZE(v3d_counter_names);
 104
 105         if (index >= ARRAY_SIZE(v3d_counter_names))
 106                 return 0;
 107
 108         info->group_id = 0;
 109         info->name = v3d_counter_names[index];
 110         info->query_type = PIPE_QUERY_DRIVER_SPECIFIC + index;
 111         info->result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE;
 112         info->type = PIPE_DRIVER_QUERY_TYPE_UINT64;
 113         info->flags = PIPE_DRIVER_QUERY_FLAG_BATCH;
 114         return 1;
 115 }
 116
 117 static struct pipe_query *
 118 vc4_create_batch_query(struct pipe_context *pctx, unsigned num_queries,
 119                        unsigned *query_types)
 120 {
 121         struct vc4_query *query = calloc(1, sizeof(*query));
 122         struct vc4_hwperfmon *hwperfmon;
 123         unsigned i, nhwqueries = 0;
 124
 125         if (!query)
 126                 return NULL;
 127
 128         for (i = 0; i < num_queries; i++) {
 129                 if (query_types[i] >= PIPE_QUERY_DRIVER_SPECIFIC)
 130                         nhwqueries++;
 131         }
 132
 133         /* We can't mix HW and non-HW queries. */
 134         if (nhwqueries && nhwqueries != num_queries)
 135                 goto err_free_query;
 136
 137         if (!nhwqueries)
 138                 return (struct pipe_query *)query;
 139
 140         hwperfmon = calloc(1, sizeof(*hwperfmon));
 141         if (!hwperfmon)
 142                 goto err_free_query;
 143
 144         for (i = 0; i < num_queries; i++)
 145                 hwperfmon->events[i] = query_types[i] -
 146                                        PIPE_QUERY_DRIVER_SPECIFIC;
 147
 148         query->hwperfmon = hwperfmon;
 149         query->num_queries = num_queries;
 150
 151         /* Note that struct pipe_query isn't actually defined anywhere. */
 152         return (struct pipe_query *)query;
 153
 154 err_free_query:
 155         free(query);
 156
 157         return NULL;
 158 }
 159
 160 static struct pipe_query *
 161 vc4_create_query(struct pipe_context *ctx, unsigned query_type, unsigned index)
 162 {
 163         return vc4_create_batch_query(ctx, 1, &query_type);
 164 }
 165
 166 static void
 167 vc4_destroy_query(struct pipe_context *pctx, struct pipe_query *pquery)
 168 {
 169         struct vc4_context *ctx = vc4_context(pctx);
 170         struct vc4_query *query = (struct vc4_query *)pquery;
 171
 172         if (query->hwperfmon && query->hwperfmon->id) {
 173                 if (query->hwperfmon->id) {
 174                         struct drm_vc4_perfmon_destroy req = { };
 175
 176                         req.id = query->hwperfmon->id;
 177                         vc4_ioctl(ctx->fd, DRM_IOCTL_VC4_PERFMON_DESTROY,
 178                                   &req);
 179                 }
 180
 181                 free(query->hwperfmon);
 182         }
 183
 184         free(query);
 185 }
 186
 187 static boolean
 188 vc4_begin_query(struct pipe_context *pctx, struct pipe_query *pquery)
 189 {
 190         struct vc4_query *query = (struct vc4_query *)pquery;
 191         struct vc4_context *ctx = vc4_context(pctx);
 192         struct drm_vc4_perfmon_create req = { };
 193         unsigned i;
 194         int ret;
 195
 196         if (!query->hwperfmon)
 197                 return true;
 198
 199         /* Only one perfmon can be activated per context. */
 200         if (ctx->perfmon)
 201                 return false;
 202
 203         /* Reset the counters by destroying the previously allocated perfmon */
 204         if (query->hwperfmon->id) {
 205                 struct drm_vc4_perfmon_destroy destroyreq = { };
 206
 207                 destroyreq.id = query->hwperfmon->id;
 208                 vc4_ioctl(ctx->fd, DRM_IOCTL_VC4_PERFMON_DESTROY, &destroyreq);
 209         }
 210
 211         for (i = 0; i < query->num_queries; i++)
 212                 req.events[i] = query->hwperfmon->events[i];
 213
 214         req.ncounters = query->num_queries;
 215         ret = vc4_ioctl(ctx->fd, DRM_IOCTL_VC4_PERFMON_CREATE, &req);
 216         if (ret)
 217                 return false;
 218
 219         query->hwperfmon->id = req.id;
 220
 221         /* Make sure all pendings jobs are flushed before activating the
 222          * perfmon.
 223          */
 224         vc4_flush(pctx);
 225         ctx->perfmon = query->hwperfmon;
 226         return true;
 227 }
 228
 229 static bool
 230 vc4_end_query(struct pipe_context *pctx, struct pipe_query *pquery)
 231 {
 232         struct vc4_query *query = (struct vc4_query *)pquery;
 233         struct vc4_context *ctx = vc4_context(pctx);
 234
 235         if (!query->hwperfmon)
 236                 return true;
 237
 238         if (ctx->perfmon != query->hwperfmon)
 239                 return false;
 240
 241         /* Make sure all pendings jobs are flushed before deactivating the
 242          * perfmon.
 243          */
 244         vc4_flush(pctx);
 245         ctx->perfmon = NULL;
 246         return true;
 247 }
 248
 249 static boolean
 250 vc4_get_query_result(struct pipe_context *pctx, struct pipe_query *pquery,
 251                      boolean wait, union pipe_query_result *vresult)
 252 {
 253         struct vc4_context *ctx = vc4_context(pctx);
 254         struct vc4_query *query = (struct vc4_query *)pquery;
 255         struct drm_vc4_perfmon_get_values req;
 256         unsigned i;
 257         int ret;
 258
 259         if (!query->hwperfmon) {
 260                 vresult->u64 = 0;
 261                 return true;
 262         }
 263
 264         if (!vc4_wait_seqno(ctx->screen, query->hwperfmon->last_seqno,
 265                             wait ? PIPE_TIMEOUT_INFINITE : 0, "perfmon"))
 266                 return false;
 267
 268         req.id = query->hwperfmon->id;
 269         req.values_ptr = (uintptr_t)query->hwperfmon->counters;
 270         ret = vc4_ioctl(ctx->fd, DRM_IOCTL_VC4_PERFMON_GET_VALUES, &req);
 271         if (ret)
 272                 return false;
 273
 274         for (i = 0; i < query->num_queries; i++)
 275                 vresult->batch[i].u64 = query->hwperfmon->counters[i];
 276
 277         return true;
 278 }
 279
 280 static void
 281 vc4_set_active_query_state(struct pipe_context *pctx, boolean enable)
 282 {
 283 }
 284
 285 void
 286 vc4_query_init(struct pipe_context *pctx)
 287 {
 288         pctx->create_query = vc4_create_query;
 289         pctx->create_batch_query = vc4_create_batch_query;
 290         pctx->destroy_query = vc4_destroy_query;
 291         pctx->begin_query = vc4_begin_query;
 292         pctx->end_query = vc4_end_query;
 293         pctx->get_query_result = vc4_get_query_result;
 294         pctx->set_active_query_state = vc4_set_active_query_state;
 295 }