src/gallium/drivers/iris/iris_monitor.c

   1 /*
   2  * Copyright © 2019 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice shall be included
  12  * in all copies or substantial portions of the Software.
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  15  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  19  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  20  * DEALINGS IN THE SOFTWARE.
  21  */
  22
  23 #include "iris_monitor.h"
  24
  25 #include <xf86drm.h>
  26
  27 #include "iris_screen.h"
  28 #include "iris_context.h"
  29
  30 #include "perf/gen_perf.h"
  31
  32 struct iris_monitor_object {
  33    int num_active_counters;
  34    int *active_counters;
  35
  36    size_t result_size;
  37    unsigned char *result_buffer;
  38
  39    struct gen_perf_query_object *query;
  40 };
  41
  42 int iris_get_monitor_info(struct pipe_screen *pscreen, unsigned index,
  43                           struct pipe_driver_query_info *info)
  44 {
  45    const struct iris_screen *screen = (struct iris_screen *)pscreen;
  46    assert(screen->monitor_cfg);
  47    if (!screen->monitor_cfg)
  48       return 0;
  49
  50    const struct iris_monitor_config *monitor_cfg = screen->monitor_cfg;
  51    if (!info)
  52       /* return the number of metrics */
  53       return monitor_cfg->num_counters;
  54    const struct gen_perf_config *perf_cfg = monitor_cfg->perf_cfg;
  55    const int group = monitor_cfg->counters[index].group;
  56    const int counter_index = monitor_cfg->counters[index].counter;
  57    info->group_id = group;
  58    struct gen_perf_query_counter *counter =
  59       &perf_cfg->queries[group].counters[counter_index];
  60    info->name = counter->name;
  61    info->query_type = PIPE_QUERY_DRIVER_SPECIFIC + index;
  62
  63    if (counter->type == GEN_PERF_COUNTER_TYPE_THROUGHPUT)
  64       info->result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE;
  65    else
  66       info->result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE;
  67    switch (counter->data_type) {
  68    case GEN_PERF_COUNTER_DATA_TYPE_BOOL32:
  69    case GEN_PERF_COUNTER_DATA_TYPE_UINT32:
  70       info->type = PIPE_DRIVER_QUERY_TYPE_UINT;
  71       info->max_value.u32 = 0;
  72       break;
  73    case GEN_PERF_COUNTER_DATA_TYPE_UINT64:
  74       info->type = PIPE_DRIVER_QUERY_TYPE_UINT64;
  75       info->max_value.u64 = 0;
  76       break;
  77    case GEN_PERF_COUNTER_DATA_TYPE_FLOAT:
  78    case GEN_PERF_COUNTER_DATA_TYPE_DOUBLE:
  79       info->type = PIPE_DRIVER_QUERY_TYPE_FLOAT;
  80       info->max_value.u64 = -1;
  81       break;
  82    default:
  83       assert(false);
  84       break;
  85    }
  86
  87    /* indicates that this is an OA query, not a pipeline statistics query */
  88    info->flags = PIPE_DRIVER_QUERY_FLAG_BATCH;
  89    return 1;
  90 }
  91
  92 typedef void (*bo_unreference_t)(void *);
  93 typedef void *(*bo_map_t)(void *, void *, unsigned flags);
  94 typedef void (*bo_unmap_t)(void *);
  95 typedef void (* emit_mi_report_t)(void *, void *, uint32_t, uint32_t);
  96 typedef void (*emit_mi_flush_t)(void *);
  97 typedef void (*capture_frequency_stat_register_t)(void *, void *,
  98                                                   uint32_t );
  99 typedef void (*store_register_mem64_t)(void *ctx, void *bo,
 100                                        uint32_t reg, uint32_t offset);
 101 typedef bool (*batch_references_t)(void *batch, void *bo);
 102 typedef void (*bo_wait_rendering_t)(void *bo);
 103 typedef int (*bo_busy_t)(void *bo);
 104
 105 static void *
 106 iris_oa_bo_alloc(void *bufmgr,
 107                  const char *name,
 108                  uint64_t size)
 109 {
 110    return iris_bo_alloc(bufmgr, name, size, IRIS_MEMZONE_OTHER);
 111 }
 112
 113 static void
 114 iris_monitor_emit_mi_flush(struct iris_context *ice)
 115 {
 116    const int flags = PIPE_CONTROL_RENDER_TARGET_FLUSH |
 117                      PIPE_CONTROL_INSTRUCTION_INVALIDATE |
 118                      PIPE_CONTROL_CONST_CACHE_INVALIDATE |
 119                      PIPE_CONTROL_DATA_CACHE_FLUSH |
 120                      PIPE_CONTROL_DEPTH_CACHE_FLUSH |
 121                      PIPE_CONTROL_VF_CACHE_INVALIDATE |
 122                      PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
 123                      PIPE_CONTROL_CS_STALL;
 124    iris_emit_pipe_control_flush(&ice->batches[IRIS_BATCH_RENDER],
 125                                 "OA metrics",
 126                                 flags);
 127 }
 128
 129 static void
 130 iris_monitor_emit_mi_report_perf_count(void *c,
 131                                        void *bo,
 132                                        uint32_t offset_in_bytes,
 133                                        uint32_t report_id)
 134 {
 135    struct iris_context *ice = c;
 136    ice->vtbl.emit_mi_report_perf_count(&ice->batches[IRIS_BATCH_RENDER],
 137                                        bo,
 138                                        offset_in_bytes,
 139                                        report_id);
 140 }
 141
 142 static void
 143 iris_monitor_batchbuffer_flush(void *c, const char *file, int line)
 144 {
 145    struct iris_context *ice = c;
 146    _iris_batch_flush(&ice->batches[IRIS_BATCH_RENDER], __FILE__, __LINE__);
 147 }
 148
 149 static void
 150 iris_monitor_capture_frequency_stat_register(void *ctx,
 151                                              void *bo,
 152                                              uint32_t bo_offset)
 153 {
 154    struct iris_context *ice = ctx;
 155    ice->vtbl.store_register_mem32(&ice->batches[IRIS_BATCH_RENDER],
 156                                   GEN9_RPSTAT0, bo, bo_offset, false);
 157 }
 158
 159 static void
 160 iris_monitor_store_register_mem64(void *ctx, void *bo,
 161                                   uint32_t reg, uint32_t offset)
 162 {
 163    struct iris_context *ice = ctx;
 164    ice->vtbl.store_register_mem64(&ice->batches[IRIS_BATCH_RENDER], reg, bo,
 165                                   offset, false);
 166 }
 167
 168
 169 static bool
 170 iris_monitor_init_metrics(struct iris_screen *screen)
 171 {
 172    struct iris_monitor_config *monitor_cfg =
 173       rzalloc(screen, struct iris_monitor_config);
 174    struct gen_perf_config *perf_cfg = NULL;
 175    if (unlikely(!monitor_cfg))
 176       goto allocation_error;
 177    perf_cfg = gen_perf_new(monitor_cfg);
 178    if (unlikely(!perf_cfg))
 179       goto allocation_error;
 180
 181    monitor_cfg->perf_cfg = perf_cfg;
 182
 183    perf_cfg->vtbl.bo_alloc = iris_oa_bo_alloc;
 184    perf_cfg->vtbl.bo_unreference = (bo_unreference_t)iris_bo_unreference;
 185    perf_cfg->vtbl.bo_map = (bo_map_t)iris_bo_map;
 186    perf_cfg->vtbl.bo_unmap = (bo_unmap_t)iris_bo_unmap;
 187    perf_cfg->vtbl.emit_mi_flush = (emit_mi_flush_t)iris_monitor_emit_mi_flush;
 188
 189    perf_cfg->vtbl.emit_mi_report_perf_count =
 190       (emit_mi_report_t)iris_monitor_emit_mi_report_perf_count;
 191    perf_cfg->vtbl.batchbuffer_flush = iris_monitor_batchbuffer_flush;
 192    perf_cfg->vtbl.capture_frequency_stat_register =
 193       (capture_frequency_stat_register_t) iris_monitor_capture_frequency_stat_register;
 194    perf_cfg->vtbl.store_register_mem64 =
 195       (store_register_mem64_t) iris_monitor_store_register_mem64;
 196    perf_cfg->vtbl.batch_references = (batch_references_t)iris_batch_references;
 197    perf_cfg->vtbl.bo_wait_rendering =
 198       (bo_wait_rendering_t)iris_bo_wait_rendering;
 199    perf_cfg->vtbl.bo_busy = (bo_busy_t)iris_bo_busy;
 200
 201    gen_perf_init_metrics(perf_cfg, &screen->devinfo, screen->fd);
 202    screen->monitor_cfg = monitor_cfg;
 203
 204    /* a gallium "group" is equivalent to a gen "query"
 205     * a gallium "query" is equivalent to a gen "query_counter"
 206     *
 207     * Each gen_query supports a specific number of query_counters.  To
 208     * allocate the array of iris_monitor_counter, we need an upper bound
 209     * (ignoring duplicate query_counters).
 210     */
 211    int gen_query_counters_count = 0;
 212    for (int gen_query_id = 0;
 213         gen_query_id < perf_cfg->n_queries;
 214         ++gen_query_id) {
 215       gen_query_counters_count += perf_cfg->queries[gen_query_id].n_counters;
 216    }
 217
 218    monitor_cfg->counters = rzalloc_size(monitor_cfg,
 219                                         sizeof(struct iris_monitor_counter) *
 220                                         gen_query_counters_count);
 221    if (unlikely(!monitor_cfg->counters))
 222       goto allocation_error;
 223
 224    int iris_monitor_id = 0;
 225    for (int group = 0; group < perf_cfg->n_queries; ++group) {
 226       for (int counter = 0;
 227            counter < perf_cfg->queries[group].n_counters;
 228            ++counter) {
 229          /* Check previously identified metrics to filter out duplicates. The
 230           * user is not helped by having the same metric available in several
 231           * groups. (n^2 algorithm).
 232           */
 233          bool duplicate = false;
 234          for (int existing_group = 0;
 235               existing_group < group && !duplicate;
 236               ++existing_group) {
 237             for (int existing_counter = 0;
 238                  existing_counter < perf_cfg->queries[existing_group].n_counters && !duplicate;
 239                  ++existing_counter) {
 240                const char *current_name = perf_cfg->queries[group].counters[counter].name;
 241                const char *existing_name =
 242                   perf_cfg->queries[existing_group].counters[existing_counter].name;
 243                if (strcmp(current_name, existing_name) == 0) {
 244                   duplicate = true;
 245                }
 246             }
 247          }
 248          if (duplicate)
 249             continue;
 250          monitor_cfg->counters[iris_monitor_id].group = group;
 251          monitor_cfg->counters[iris_monitor_id].counter = counter;
 252          ++iris_monitor_id;
 253       }
 254    }
 255    monitor_cfg->num_counters = iris_monitor_id;
 256    return monitor_cfg->num_counters;
 257
 258 allocation_error:
 259    if (monitor_cfg)
 260       free(monitor_cfg->counters);
 261    free(perf_cfg);
 262    free(monitor_cfg);
 263    return false;
 264 }
 265
 266 int iris_get_monitor_group_info(struct pipe_screen *pscreen,
 267                                 unsigned group_index,
 268                                 struct pipe_driver_query_group_info *info)
 269 {
 270    struct iris_screen *screen = (struct iris_screen *)pscreen;
 271    if (!screen->monitor_cfg) {
 272       if (!iris_monitor_init_metrics(screen))
 273          return 0;
 274    }
 275
 276    const struct iris_monitor_config *monitor_cfg = screen->monitor_cfg;
 277    const struct gen_perf_config *perf_cfg = monitor_cfg->perf_cfg;
 278    if (!info)
 279       /* return the count that can be queried */
 280       return perf_cfg->n_queries;
 281
 282    if (group_index >= perf_cfg->n_queries)
 283       /* out of range */
 284       return 0;
 285
 286    struct gen_perf_query_info *query = &perf_cfg->queries[group_index];
 287    info->name = query->name;
 288    info->max_active_queries = query->n_counters;
 289    info->num_queries = query->n_counters;
 290    return 1;
 291 }
 292
 293 static void
 294 iris_init_monitor_ctx(struct iris_context *ice)
 295 {
 296    struct iris_screen *screen = (struct iris_screen *) ice->ctx.screen;
 297    struct iris_monitor_config *monitor_cfg = screen->monitor_cfg;
 298    ice->perf_ctx = gen_perf_new_context(ice);
 299    if (unlikely(!ice->perf_ctx)) {
 300       return;
 301    }
 302
 303    struct gen_perf_context *perf_ctx = ice->perf_ctx;
 304    struct gen_perf_config *perf_cfg = monitor_cfg->perf_cfg;
 305    gen_perf_init_context(perf_ctx,
 306                          perf_cfg,
 307                          ice,
 308                          screen->bufmgr,
 309                          &screen->devinfo,
 310                          ice->batches[IRIS_BATCH_RENDER].hw_ctx_id,
 311                          screen->fd);
 312 }
 313
 314 /* entry point for GenPerfMonitorsAMD */
 315 struct iris_monitor_object *
 316 iris_create_monitor_object(struct iris_context *ice,
 317                            unsigned num_queries,
 318                            unsigned *query_types)
 319 {
 320    struct iris_screen *screen = (struct iris_screen *) ice->ctx.screen;
 321    struct iris_monitor_config *monitor_cfg = screen->monitor_cfg;
 322    struct gen_perf_config *perf_cfg = monitor_cfg->perf_cfg;
 323    struct gen_perf_query_object *query_obj = NULL;
 324
 325    /* initialize perf context if this has not already been done.  This
 326     * function is the first entry point that carries the gl context.
 327     */
 328    if (ice->perf_ctx == NULL) {
 329       iris_init_monitor_ctx(ice);
 330    }
 331    struct gen_perf_context *perf_ctx = ice->perf_ctx;
 332
 333    assert(num_queries > 0);
 334    int query_index = query_types[0] - PIPE_QUERY_DRIVER_SPECIFIC;
 335    assert(query_index <= monitor_cfg->num_counters);
 336    const int group = monitor_cfg->counters[query_index].group;
 337
 338    struct iris_monitor_object *monitor =
 339       calloc(1, sizeof(struct iris_monitor_object));
 340    if (unlikely(!monitor))
 341       goto allocation_failure;
 342
 343    monitor->num_active_counters = num_queries;
 344    monitor->active_counters = calloc(num_queries, sizeof(int));
 345    if (unlikely(!monitor->active_counters))
 346       goto allocation_failure;
 347
 348    for (int i = 0; i < num_queries; ++i) {
 349       unsigned current_query = query_types[i];
 350       unsigned current_query_index = current_query - PIPE_QUERY_DRIVER_SPECIFIC;
 351
 352       /* all queries must be in the same group */
 353       assert(current_query_index <= monitor_cfg->num_counters);
 354       assert(monitor_cfg->counters[current_query_index].group == group);
 355       monitor->active_counters[i] =
 356          monitor_cfg->counters[current_query_index].counter;
 357    }
 358
 359    /* create the gen_perf_query */
 360    query_obj = gen_perf_new_query(perf_ctx, group);
 361    if (unlikely(!query_obj))
 362       goto allocation_failure;
 363
 364    monitor->query = query_obj;
 365    monitor->result_size = perf_cfg->queries[group].data_size;
 366    monitor->result_buffer = calloc(1, monitor->result_size);
 367    if (unlikely(!monitor->result_buffer))
 368       goto allocation_failure;
 369
 370    return monitor;
 371
 372 allocation_failure:
 373    if (monitor) {
 374       free(monitor->active_counters);
 375       free(monitor->result_buffer);
 376    }
 377    free(query_obj);
 378    free(monitor);
 379    return NULL;
 380 }
 381
 382 void iris_destroy_monitor_object(struct pipe_context *ctx,
 383                                  struct iris_monitor_object *monitor)
 384 {
 385    struct iris_context *ice = (struct iris_context *)ctx;
 386    gen_perf_delete_query(ice->perf_ctx, monitor->query);
 387    free(monitor->result_buffer);
 388    monitor->result_buffer = NULL;
 389    free(monitor->active_counters);
 390    monitor->active_counters = NULL;
 391    free(monitor);
 392 }
 393
 394 bool
 395 iris_begin_monitor(struct pipe_context *ctx,
 396                    struct iris_monitor_object *monitor)
 397 {
 398    struct iris_context *ice = (void *) ctx;
 399    struct gen_perf_context *perf_ctx = ice->perf_ctx;
 400
 401    return gen_perf_begin_query(perf_ctx, monitor->query);
 402 }
 403
 404 bool
 405 iris_end_monitor(struct pipe_context *ctx,
 406                  struct iris_monitor_object *monitor)
 407 {
 408    struct iris_context *ice = (void *) ctx;
 409    struct gen_perf_context *perf_ctx = ice->perf_ctx;
 410
 411    gen_perf_end_query(perf_ctx, monitor->query);
 412    return true;
 413 }
 414
 415 bool
 416 iris_get_monitor_result(struct pipe_context *ctx,
 417                         struct iris_monitor_object *monitor,
 418                         bool wait,
 419                         union pipe_numeric_type_union *result)
 420 {
 421    struct iris_context *ice = (void *) ctx;
 422    struct gen_perf_context *perf_ctx = ice->perf_ctx;
 423
 424    bool monitor_ready = gen_perf_is_query_ready(perf_ctx, monitor->query,
 425                                                 &ice->batches[IRIS_BATCH_RENDER]);
 426
 427    if (!monitor_ready) {
 428       if (!wait)
 429          return false;
 430       gen_perf_wait_query(perf_ctx, monitor->query,
 431                           &ice->batches[IRIS_BATCH_RENDER]);
 432    }
 433
 434    assert (gen_perf_is_query_ready(perf_ctx, monitor->query,
 435                                    &ice->batches[IRIS_BATCH_RENDER]));
 436
 437    unsigned bytes_written;
 438    gen_perf_get_query_data(perf_ctx, monitor->query,
 439                            monitor->result_size,
 440                            (unsigned*) monitor->result_buffer,
 441                            &bytes_written);
 442    if (bytes_written != monitor->result_size)
 443       return false;
 444
 445    /* copy metrics into the batch result */
 446    for (int i = 0; i < monitor->num_active_counters; ++i) {
 447       int current_counter = monitor->active_counters[i];
 448       const struct gen_perf_query_info *info =
 449          gen_perf_query_info(monitor->query);
 450       const struct gen_perf_query_counter *counter =
 451          &info->counters[current_counter];
 452       assert(gen_perf_query_counter_get_size(counter));
 453       switch (counter->data_type) {
 454       case GEN_PERF_COUNTER_DATA_TYPE_UINT64:
 455          result[i].u64 = *(uint64_t*)(monitor->result_buffer + counter->offset);
 456          break;
 457       case GEN_PERF_COUNTER_DATA_TYPE_FLOAT:
 458          result[i].f = *(float*)(monitor->result_buffer + counter->offset);
 459          break;
 460       case GEN_PERF_COUNTER_DATA_TYPE_UINT32:
 461       case GEN_PERF_COUNTER_DATA_TYPE_BOOL32:
 462          result[i].u64 = *(uint32_t*)(monitor->result_buffer + counter->offset);
 463          break;
 464       case GEN_PERF_COUNTER_DATA_TYPE_DOUBLE: {
 465          double v = *(double*)(monitor->result_buffer + counter->offset);
 466          result[i].f = v;
 467       }
 468       default:
 469          unreachable("unexpected counter data type");
 470       }
 471    }
 472    return true;
 473 }