src/gallium/drivers/iris/iris_monitor.c

   1 /*
   2  * Copyright © 2019 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice shall be included
  12  * in all copies or substantial portions of the Software.
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  15  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  19  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  20  * DEALINGS IN THE SOFTWARE.
  21  */
  22
  23 #include "iris_monitor.h"
  24
  25 #include <xf86drm.h>
  26
  27 #include "iris_screen.h"
  28 #include "iris_context.h"
  29
  30 #include "perf/gen_perf.h"
  31 #include "perf/gen_perf_regs.h"
  32
  33 struct iris_monitor_object {
  34    int num_active_counters;
  35    int *active_counters;
  36
  37    size_t result_size;
  38    unsigned char *result_buffer;
  39
  40    struct gen_perf_query_object *query;
  41 };
  42
  43 int
  44 iris_get_monitor_info(struct pipe_screen *pscreen, unsigned index,
  45                       struct pipe_driver_query_info *info)
  46 {
  47    const struct iris_screen *screen = (struct iris_screen *)pscreen;
  48    assert(screen->monitor_cfg);
  49    if (!screen->monitor_cfg)
  50       return 0;
  51
  52    const struct iris_monitor_config *monitor_cfg = screen->monitor_cfg;
  53
  54    if (!info) {
  55       /* return the number of metrics */
  56       return monitor_cfg->num_counters;
  57    }
  58
  59    const struct gen_perf_config *perf_cfg = monitor_cfg->perf_cfg;
  60    const int group = monitor_cfg->counters[index].group;
  61    const int counter_index = monitor_cfg->counters[index].counter;
  62    struct gen_perf_query_counter *counter =
  63       &perf_cfg->queries[group].counters[counter_index];
  64
  65    info->group_id = group;
  66    info->name = counter->name;
  67    info->query_type = PIPE_QUERY_DRIVER_SPECIFIC + index;
  68
  69    if (counter->type == GEN_PERF_COUNTER_TYPE_THROUGHPUT)
  70       info->result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE;
  71    else
  72       info->result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE;
  73    switch (counter->data_type) {
  74    case GEN_PERF_COUNTER_DATA_TYPE_BOOL32:
  75    case GEN_PERF_COUNTER_DATA_TYPE_UINT32:
  76       info->type = PIPE_DRIVER_QUERY_TYPE_UINT;
  77       info->max_value.u32 = 0;
  78       break;
  79    case GEN_PERF_COUNTER_DATA_TYPE_UINT64:
  80       info->type = PIPE_DRIVER_QUERY_TYPE_UINT64;
  81       info->max_value.u64 = 0;
  82       break;
  83    case GEN_PERF_COUNTER_DATA_TYPE_FLOAT:
  84    case GEN_PERF_COUNTER_DATA_TYPE_DOUBLE:
  85       info->type = PIPE_DRIVER_QUERY_TYPE_FLOAT;
  86       info->max_value.u64 = -1;
  87       break;
  88    default:
  89       assert(false);
  90       break;
  91    }
  92
  93    /* indicates that this is an OA query, not a pipeline statistics query */
  94    info->flags = PIPE_DRIVER_QUERY_FLAG_BATCH;
  95    return 1;
  96 }
  97
  98 typedef void (*bo_unreference_t)(void *);
  99 typedef void *(*bo_map_t)(void *, void *, unsigned flags);
 100 typedef void (*bo_unmap_t)(void *);
 101 typedef void (*emit_mi_report_t)(void *, void *, uint32_t, uint32_t);
 102 typedef void (*emit_mi_flush_t)(void *);
 103 typedef void (*capture_frequency_stat_register_t)(void *, void *,
 104                                                   uint32_t );
 105 typedef void (*store_register_mem64_t)(void *ctx, void *bo,
 106                                        uint32_t reg, uint32_t offset);
 107 typedef bool (*batch_references_t)(void *batch, void *bo);
 108 typedef void (*bo_wait_rendering_t)(void *bo);
 109 typedef int (*bo_busy_t)(void *bo);
 110
 111 static void *
 112 iris_oa_bo_alloc(void *bufmgr, const char *name, uint64_t size)
 113 {
 114    return iris_bo_alloc(bufmgr, name, size, IRIS_MEMZONE_OTHER);
 115 }
 116
 117 static void
 118 iris_monitor_emit_mi_flush(struct iris_context *ice)
 119 {
 120    const int flags = PIPE_CONTROL_RENDER_TARGET_FLUSH |
 121                      PIPE_CONTROL_INSTRUCTION_INVALIDATE |
 122                      PIPE_CONTROL_CONST_CACHE_INVALIDATE |
 123                      PIPE_CONTROL_DATA_CACHE_FLUSH |
 124                      PIPE_CONTROL_DEPTH_CACHE_FLUSH |
 125                      PIPE_CONTROL_VF_CACHE_INVALIDATE |
 126                      PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
 127                      PIPE_CONTROL_CS_STALL;
 128    iris_emit_pipe_control_flush(&ice->batches[IRIS_BATCH_RENDER],
 129                                 "OA metrics", flags);
 130 }
 131
 132 static void
 133 iris_monitor_emit_mi_report_perf_count(void *c,
 134                                        void *bo,
 135                                        uint32_t offset_in_bytes,
 136                                        uint32_t report_id)
 137 {
 138    struct iris_context *ice = c;
 139    struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
 140    ice->vtbl.emit_mi_report_perf_count(batch, bo, offset_in_bytes, report_id);
 141 }
 142
 143 static void
 144 iris_monitor_batchbuffer_flush(void *c, const char *file, int line)
 145 {
 146    struct iris_context *ice = c;
 147    _iris_batch_flush(&ice->batches[IRIS_BATCH_RENDER], __FILE__, __LINE__);
 148 }
 149
 150 static void
 151 iris_monitor_capture_frequency_stat_register(void *ctx,
 152                                              void *bo,
 153                                              uint32_t bo_offset)
 154 {
 155    struct iris_context *ice = ctx;
 156    struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
 157    ice->vtbl.store_register_mem32(batch, GEN9_RPSTAT0, bo, bo_offset, false);
 158 }
 159
 160 static void
 161 iris_monitor_store_register_mem64(void *ctx, void *bo,
 162                                   uint32_t reg, uint32_t offset)
 163 {
 164    struct iris_context *ice = ctx;
 165    struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
 166    ice->vtbl.store_register_mem64(batch, reg, bo, offset, false);
 167 }
 168
 169
 170 static bool
 171 iris_monitor_init_metrics(struct iris_screen *screen)
 172 {
 173    struct iris_monitor_config *monitor_cfg =
 174       rzalloc(screen, struct iris_monitor_config);
 175    struct gen_perf_config *perf_cfg = NULL;
 176    if (unlikely(!monitor_cfg))
 177       goto allocation_error;
 178    perf_cfg = gen_perf_new(monitor_cfg);
 179    if (unlikely(!perf_cfg))
 180       goto allocation_error;
 181
 182    monitor_cfg->perf_cfg = perf_cfg;
 183
 184    perf_cfg->vtbl.bo_alloc = iris_oa_bo_alloc;
 185    perf_cfg->vtbl.bo_unreference = (bo_unreference_t)iris_bo_unreference;
 186    perf_cfg->vtbl.bo_map = (bo_map_t)iris_bo_map;
 187    perf_cfg->vtbl.bo_unmap = (bo_unmap_t)iris_bo_unmap;
 188    perf_cfg->vtbl.emit_mi_flush = (emit_mi_flush_t)iris_monitor_emit_mi_flush;
 189
 190    perf_cfg->vtbl.emit_mi_report_perf_count =
 191       (emit_mi_report_t)iris_monitor_emit_mi_report_perf_count;
 192    perf_cfg->vtbl.batchbuffer_flush = iris_monitor_batchbuffer_flush;
 193    perf_cfg->vtbl.capture_frequency_stat_register =
 194       (capture_frequency_stat_register_t) iris_monitor_capture_frequency_stat_register;
 195    perf_cfg->vtbl.store_register_mem64 =
 196       (store_register_mem64_t) iris_monitor_store_register_mem64;
 197    perf_cfg->vtbl.batch_references = (batch_references_t)iris_batch_references;
 198    perf_cfg->vtbl.bo_wait_rendering =
 199       (bo_wait_rendering_t)iris_bo_wait_rendering;
 200    perf_cfg->vtbl.bo_busy = (bo_busy_t)iris_bo_busy;
 201
 202    gen_perf_init_metrics(perf_cfg, &screen->devinfo, screen->fd);
 203    screen->monitor_cfg = monitor_cfg;
 204
 205    /* a gallium "group" is equivalent to a gen "query"
 206     * a gallium "query" is equivalent to a gen "query_counter"
 207     *
 208     * Each gen_query supports a specific number of query_counters.  To
 209     * allocate the array of iris_monitor_counter, we need an upper bound
 210     * (ignoring duplicate query_counters).
 211     */
 212    int gen_query_counters_count = 0;
 213    for (int gen_query_id = 0;
 214         gen_query_id < perf_cfg->n_queries;
 215         ++gen_query_id) {
 216       gen_query_counters_count += perf_cfg->queries[gen_query_id].n_counters;
 217    }
 218
 219    monitor_cfg->counters = rzalloc_size(monitor_cfg,
 220                                         sizeof(struct iris_monitor_counter) *
 221                                         gen_query_counters_count);
 222    if (unlikely(!monitor_cfg->counters))
 223       goto allocation_error;
 224
 225    int iris_monitor_id = 0;
 226    for (int group = 0; group < perf_cfg->n_queries; ++group) {
 227       for (int counter = 0;
 228            counter < perf_cfg->queries[group].n_counters;
 229            ++counter) {
 230          /* Check previously identified metrics to filter out duplicates. The
 231           * user is not helped by having the same metric available in several
 232           * groups. (n^2 algorithm).
 233           */
 234          bool duplicate = false;
 235          for (int existing_group = 0;
 236               existing_group < group && !duplicate;
 237               ++existing_group) {
 238             for (int existing_counter = 0;
 239                  existing_counter < perf_cfg->queries[existing_group].n_counters && !duplicate;
 240                  ++existing_counter) {
 241                const char *current_name =
 242                   perf_cfg->queries[group].counters[counter].name;
 243                const char *existing_name =
 244                   perf_cfg->queries[existing_group].counters[existing_counter].name;
 245                if (strcmp(current_name, existing_name) == 0) {
 246                   duplicate = true;
 247                }
 248             }
 249          }
 250          if (duplicate)
 251             continue;
 252          monitor_cfg->counters[iris_monitor_id].group = group;
 253          monitor_cfg->counters[iris_monitor_id].counter = counter;
 254          ++iris_monitor_id;
 255       }
 256    }
 257    monitor_cfg->num_counters = iris_monitor_id;
 258    return monitor_cfg->num_counters;
 259
 260 allocation_error:
 261    if (monitor_cfg)
 262       free(monitor_cfg->counters);
 263    free(perf_cfg);
 264    free(monitor_cfg);
 265    return false;
 266 }
 267
 268 int
 269 iris_get_monitor_group_info(struct pipe_screen *pscreen,
 270                             unsigned group_index,
 271                             struct pipe_driver_query_group_info *info)
 272 {
 273    struct iris_screen *screen = (struct iris_screen *)pscreen;
 274    if (!screen->monitor_cfg) {
 275       if (!iris_monitor_init_metrics(screen))
 276          return 0;
 277    }
 278
 279    const struct iris_monitor_config *monitor_cfg = screen->monitor_cfg;
 280    const struct gen_perf_config *perf_cfg = monitor_cfg->perf_cfg;
 281
 282    if (!info) {
 283       /* return the count that can be queried */
 284       return perf_cfg->n_queries;
 285    }
 286
 287    if (group_index >= perf_cfg->n_queries) {
 288       /* out of range */
 289       return 0;
 290    }
 291
 292    struct gen_perf_query_info *query = &perf_cfg->queries[group_index];
 293
 294    info->name = query->name;
 295    info->max_active_queries = query->n_counters;
 296    info->num_queries = query->n_counters;
 297
 298    return 1;
 299 }
 300
 301 static void
 302 iris_init_monitor_ctx(struct iris_context *ice)
 303 {
 304    struct iris_screen *screen = (struct iris_screen *) ice->ctx.screen;
 305    struct iris_monitor_config *monitor_cfg = screen->monitor_cfg;
 306
 307    ice->perf_ctx = gen_perf_new_context(ice);
 308    if (unlikely(!ice->perf_ctx))
 309       return;
 310
 311    struct gen_perf_context *perf_ctx = ice->perf_ctx;
 312    struct gen_perf_config *perf_cfg = monitor_cfg->perf_cfg;
 313    gen_perf_init_context(perf_ctx,
 314                          perf_cfg,
 315                          ice,
 316                          screen->bufmgr,
 317                          &screen->devinfo,
 318                          ice->batches[IRIS_BATCH_RENDER].hw_ctx_id,
 319                          screen->fd);
 320 }
 321
 322 /* entry point for GenPerfMonitorsAMD */
 323 struct iris_monitor_object *
 324 iris_create_monitor_object(struct iris_context *ice,
 325                            unsigned num_queries,
 326                            unsigned *query_types)
 327 {
 328    struct iris_screen *screen = (struct iris_screen *) ice->ctx.screen;
 329    struct iris_monitor_config *monitor_cfg = screen->monitor_cfg;
 330    struct gen_perf_config *perf_cfg = monitor_cfg->perf_cfg;
 331    struct gen_perf_query_object *query_obj = NULL;
 332
 333    /* initialize perf context if this has not already been done.  This
 334     * function is the first entry point that carries the gl context.
 335     */
 336    if (ice->perf_ctx == NULL) {
 337       iris_init_monitor_ctx(ice);
 338    }
 339    struct gen_perf_context *perf_ctx = ice->perf_ctx;
 340
 341    assert(num_queries > 0);
 342    int query_index = query_types[0] - PIPE_QUERY_DRIVER_SPECIFIC;
 343    assert(query_index <= monitor_cfg->num_counters);
 344    const int group = monitor_cfg->counters[query_index].group;
 345
 346    struct iris_monitor_object *monitor =
 347       calloc(1, sizeof(struct iris_monitor_object));
 348    if (unlikely(!monitor))
 349       goto allocation_failure;
 350
 351    monitor->num_active_counters = num_queries;
 352    monitor->active_counters = calloc(num_queries, sizeof(int));
 353    if (unlikely(!monitor->active_counters))
 354       goto allocation_failure;
 355
 356    for (int i = 0; i < num_queries; ++i) {
 357       unsigned current_query = query_types[i];
 358       unsigned current_query_index = current_query - PIPE_QUERY_DRIVER_SPECIFIC;
 359
 360       /* all queries must be in the same group */
 361       assert(current_query_index <= monitor_cfg->num_counters);
 362       assert(monitor_cfg->counters[current_query_index].group == group);
 363       monitor->active_counters[i] =
 364          monitor_cfg->counters[current_query_index].counter;
 365    }
 366
 367    /* create the gen_perf_query */
 368    query_obj = gen_perf_new_query(perf_ctx, group);
 369    if (unlikely(!query_obj))
 370       goto allocation_failure;
 371
 372    monitor->query = query_obj;
 373    monitor->result_size = perf_cfg->queries[group].data_size;
 374    monitor->result_buffer = calloc(1, monitor->result_size);
 375    if (unlikely(!monitor->result_buffer))
 376       goto allocation_failure;
 377
 378    return monitor;
 379
 380 allocation_failure:
 381    if (monitor) {
 382       free(monitor->active_counters);
 383       free(monitor->result_buffer);
 384    }
 385    free(query_obj);
 386    free(monitor);
 387    return NULL;
 388 }
 389
 390 void
 391 iris_destroy_monitor_object(struct pipe_context *ctx,
 392                             struct iris_monitor_object *monitor)
 393 {
 394    struct iris_context *ice = (struct iris_context *)ctx;
 395
 396    gen_perf_delete_query(ice->perf_ctx, monitor->query);
 397    free(monitor->result_buffer);
 398    monitor->result_buffer = NULL;
 399    free(monitor->active_counters);
 400    monitor->active_counters = NULL;
 401    free(monitor);
 402 }
 403
 404 bool
 405 iris_begin_monitor(struct pipe_context *ctx,
 406                    struct iris_monitor_object *monitor)
 407 {
 408    struct iris_context *ice = (void *) ctx;
 409    struct gen_perf_context *perf_ctx = ice->perf_ctx;
 410
 411    return gen_perf_begin_query(perf_ctx, monitor->query);
 412 }
 413
 414 bool
 415 iris_end_monitor(struct pipe_context *ctx,
 416                  struct iris_monitor_object *monitor)
 417 {
 418    struct iris_context *ice = (void *) ctx;
 419    struct gen_perf_context *perf_ctx = ice->perf_ctx;
 420
 421    gen_perf_end_query(perf_ctx, monitor->query);
 422    return true;
 423 }
 424
 425 bool
 426 iris_get_monitor_result(struct pipe_context *ctx,
 427                         struct iris_monitor_object *monitor,
 428                         bool wait,
 429                         union pipe_numeric_type_union *result)
 430 {
 431    struct iris_context *ice = (void *) ctx;
 432    struct gen_perf_context *perf_ctx = ice->perf_ctx;
 433    struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
 434
 435    bool monitor_ready =
 436       gen_perf_is_query_ready(perf_ctx, monitor->query, batch);
 437
 438    if (!monitor_ready) {
 439       if (!wait)
 440          return false;
 441       gen_perf_wait_query(perf_ctx, monitor->query, batch);
 442    }
 443
 444    assert(gen_perf_is_query_ready(perf_ctx, monitor->query, batch));
 445
 446    unsigned bytes_written;
 447    gen_perf_get_query_data(perf_ctx, monitor->query,
 448                            monitor->result_size,
 449                            (unsigned*) monitor->result_buffer,
 450                            &bytes_written);
 451    if (bytes_written != monitor->result_size)
 452       return false;
 453
 454    /* copy metrics into the batch result */
 455    for (int i = 0; i < monitor->num_active_counters; ++i) {
 456       int current_counter = monitor->active_counters[i];
 457       const struct gen_perf_query_info *info =
 458          gen_perf_query_info(monitor->query);
 459       const struct gen_perf_query_counter *counter =
 460          &info->counters[current_counter];
 461       assert(gen_perf_query_counter_get_size(counter));
 462       switch (counter->data_type) {
 463       case GEN_PERF_COUNTER_DATA_TYPE_UINT64:
 464          result[i].u64 = *(uint64_t*)(monitor->result_buffer + counter->offset);
 465          break;
 466       case GEN_PERF_COUNTER_DATA_TYPE_FLOAT:
 467          result[i].f = *(float*)(monitor->result_buffer + counter->offset);
 468          break;
 469       case GEN_PERF_COUNTER_DATA_TYPE_UINT32:
 470       case GEN_PERF_COUNTER_DATA_TYPE_BOOL32:
 471          result[i].u64 = *(uint32_t*)(monitor->result_buffer + counter->offset);
 472          break;
 473       case GEN_PERF_COUNTER_DATA_TYPE_DOUBLE: {
 474          double v = *(double*)(monitor->result_buffer + counter->offset);
 475          result[i].f = v;
 476          break;
 477       }
 478       default:
 479          unreachable("unexpected counter data type");
 480       }
 481    }
 482    return true;
 483 }