src/gallium/drivers/iris/iris_monitor.c

   1 /*
   2  * Copyright © 2019 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice shall be included
  12  * in all copies or substantial portions of the Software.
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  15  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  19  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  20  * DEALINGS IN THE SOFTWARE.
  21  */
  22
  23 #include "iris_monitor.h"
  24
  25 #include <xf86drm.h>
  26
  27 #include "iris_screen.h"
  28 #include "iris_context.h"
  29
  30 #include "perf/gen_perf.h"
  31
  32 struct iris_monitor_object {
  33    int num_active_counters;
  34    int *active_counters;
  35
  36    size_t result_size;
  37    unsigned char *result_buffer;
  38
  39    struct gen_perf_query_object *query;
  40 };
  41
  42 int
  43 iris_get_monitor_info(struct pipe_screen *pscreen, unsigned index,
  44                       struct pipe_driver_query_info *info)
  45 {
  46    const struct iris_screen *screen = (struct iris_screen *)pscreen;
  47    assert(screen->monitor_cfg);
  48    if (!screen->monitor_cfg)
  49       return 0;
  50
  51    const struct iris_monitor_config *monitor_cfg = screen->monitor_cfg;
  52
  53    if (!info) {
  54       /* return the number of metrics */
  55       return monitor_cfg->num_counters;
  56    }
  57
  58    const struct gen_perf_config *perf_cfg = monitor_cfg->perf_cfg;
  59    const int group = monitor_cfg->counters[index].group;
  60    const int counter_index = monitor_cfg->counters[index].counter;
  61    struct gen_perf_query_counter *counter =
  62       &perf_cfg->queries[group].counters[counter_index];
  63
  64    info->group_id = group;
  65    info->name = counter->name;
  66    info->query_type = PIPE_QUERY_DRIVER_SPECIFIC + index;
  67
  68    if (counter->type == GEN_PERF_COUNTER_TYPE_THROUGHPUT)
  69       info->result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE;
  70    else
  71       info->result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE;
  72    switch (counter->data_type) {
  73    case GEN_PERF_COUNTER_DATA_TYPE_BOOL32:
  74    case GEN_PERF_COUNTER_DATA_TYPE_UINT32:
  75       info->type = PIPE_DRIVER_QUERY_TYPE_UINT;
  76       info->max_value.u32 = 0;
  77       break;
  78    case GEN_PERF_COUNTER_DATA_TYPE_UINT64:
  79       info->type = PIPE_DRIVER_QUERY_TYPE_UINT64;
  80       info->max_value.u64 = 0;
  81       break;
  82    case GEN_PERF_COUNTER_DATA_TYPE_FLOAT:
  83    case GEN_PERF_COUNTER_DATA_TYPE_DOUBLE:
  84       info->type = PIPE_DRIVER_QUERY_TYPE_FLOAT;
  85       info->max_value.u64 = -1;
  86       break;
  87    default:
  88       assert(false);
  89       break;
  90    }
  91
  92    /* indicates that this is an OA query, not a pipeline statistics query */
  93    info->flags = PIPE_DRIVER_QUERY_FLAG_BATCH;
  94    return 1;
  95 }
  96
  97 typedef void (*bo_unreference_t)(void *);
  98 typedef void *(*bo_map_t)(void *, void *, unsigned flags);
  99 typedef void (*bo_unmap_t)(void *);
 100 typedef void (*emit_mi_report_t)(void *, void *, uint32_t, uint32_t);
 101 typedef void (*emit_mi_flush_t)(void *);
 102 typedef void (*capture_frequency_stat_register_t)(void *, void *,
 103                                                   uint32_t );
 104 typedef void (*store_register_mem64_t)(void *ctx, void *bo,
 105                                        uint32_t reg, uint32_t offset);
 106 typedef bool (*batch_references_t)(void *batch, void *bo);
 107 typedef void (*bo_wait_rendering_t)(void *bo);
 108 typedef int (*bo_busy_t)(void *bo);
 109
 110 static void *
 111 iris_oa_bo_alloc(void *bufmgr, const char *name, uint64_t size)
 112 {
 113    return iris_bo_alloc(bufmgr, name, size, IRIS_MEMZONE_OTHER);
 114 }
 115
 116 static void
 117 iris_monitor_emit_mi_flush(struct iris_context *ice)
 118 {
 119    const int flags = PIPE_CONTROL_RENDER_TARGET_FLUSH |
 120                      PIPE_CONTROL_INSTRUCTION_INVALIDATE |
 121                      PIPE_CONTROL_CONST_CACHE_INVALIDATE |
 122                      PIPE_CONTROL_DATA_CACHE_FLUSH |
 123                      PIPE_CONTROL_DEPTH_CACHE_FLUSH |
 124                      PIPE_CONTROL_VF_CACHE_INVALIDATE |
 125                      PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
 126                      PIPE_CONTROL_CS_STALL;
 127    iris_emit_pipe_control_flush(&ice->batches[IRIS_BATCH_RENDER],
 128                                 "OA metrics", flags);
 129 }
 130
 131 static void
 132 iris_monitor_emit_mi_report_perf_count(void *c,
 133                                        void *bo,
 134                                        uint32_t offset_in_bytes,
 135                                        uint32_t report_id)
 136 {
 137    struct iris_context *ice = c;
 138    struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
 139    ice->vtbl.emit_mi_report_perf_count(batch, bo, offset_in_bytes, report_id);
 140 }
 141
 142 static void
 143 iris_monitor_batchbuffer_flush(void *c, const char *file, int line)
 144 {
 145    struct iris_context *ice = c;
 146    _iris_batch_flush(&ice->batches[IRIS_BATCH_RENDER], __FILE__, __LINE__);
 147 }
 148
 149 static void
 150 iris_monitor_capture_frequency_stat_register(void *ctx,
 151                                              void *bo,
 152                                              uint32_t bo_offset)
 153 {
 154    struct iris_context *ice = ctx;
 155    struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
 156    ice->vtbl.store_register_mem32(batch, GEN9_RPSTAT0, bo, bo_offset, false);
 157 }
 158
 159 static void
 160 iris_monitor_store_register_mem64(void *ctx, void *bo,
 161                                   uint32_t reg, uint32_t offset)
 162 {
 163    struct iris_context *ice = ctx;
 164    struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
 165    ice->vtbl.store_register_mem64(batch, reg, bo, offset, false);
 166 }
 167
 168
 169 static bool
 170 iris_monitor_init_metrics(struct iris_screen *screen)
 171 {
 172    struct iris_monitor_config *monitor_cfg =
 173       rzalloc(screen, struct iris_monitor_config);
 174    struct gen_perf_config *perf_cfg = NULL;
 175    if (unlikely(!monitor_cfg))
 176       goto allocation_error;
 177    perf_cfg = gen_perf_new(monitor_cfg);
 178    if (unlikely(!perf_cfg))
 179       goto allocation_error;
 180
 181    monitor_cfg->perf_cfg = perf_cfg;
 182
 183    perf_cfg->vtbl.bo_alloc = iris_oa_bo_alloc;
 184    perf_cfg->vtbl.bo_unreference = (bo_unreference_t)iris_bo_unreference;
 185    perf_cfg->vtbl.bo_map = (bo_map_t)iris_bo_map;
 186    perf_cfg->vtbl.bo_unmap = (bo_unmap_t)iris_bo_unmap;
 187    perf_cfg->vtbl.emit_mi_flush = (emit_mi_flush_t)iris_monitor_emit_mi_flush;
 188
 189    perf_cfg->vtbl.emit_mi_report_perf_count =
 190       (emit_mi_report_t)iris_monitor_emit_mi_report_perf_count;
 191    perf_cfg->vtbl.batchbuffer_flush = iris_monitor_batchbuffer_flush;
 192    perf_cfg->vtbl.capture_frequency_stat_register =
 193       (capture_frequency_stat_register_t) iris_monitor_capture_frequency_stat_register;
 194    perf_cfg->vtbl.store_register_mem64 =
 195       (store_register_mem64_t) iris_monitor_store_register_mem64;
 196    perf_cfg->vtbl.batch_references = (batch_references_t)iris_batch_references;
 197    perf_cfg->vtbl.bo_wait_rendering =
 198       (bo_wait_rendering_t)iris_bo_wait_rendering;
 199    perf_cfg->vtbl.bo_busy = (bo_busy_t)iris_bo_busy;
 200
 201    gen_perf_init_metrics(perf_cfg, &screen->devinfo, screen->fd);
 202    screen->monitor_cfg = monitor_cfg;
 203
 204    /* a gallium "group" is equivalent to a gen "query"
 205     * a gallium "query" is equivalent to a gen "query_counter"
 206     *
 207     * Each gen_query supports a specific number of query_counters.  To
 208     * allocate the array of iris_monitor_counter, we need an upper bound
 209     * (ignoring duplicate query_counters).
 210     */
 211    int gen_query_counters_count = 0;
 212    for (int gen_query_id = 0;
 213         gen_query_id < perf_cfg->n_queries;
 214         ++gen_query_id) {
 215       gen_query_counters_count += perf_cfg->queries[gen_query_id].n_counters;
 216    }
 217
 218    monitor_cfg->counters = rzalloc_size(monitor_cfg,
 219                                         sizeof(struct iris_monitor_counter) *
 220                                         gen_query_counters_count);
 221    if (unlikely(!monitor_cfg->counters))
 222       goto allocation_error;
 223
 224    int iris_monitor_id = 0;
 225    for (int group = 0; group < perf_cfg->n_queries; ++group) {
 226       for (int counter = 0;
 227            counter < perf_cfg->queries[group].n_counters;
 228            ++counter) {
 229          /* Check previously identified metrics to filter out duplicates. The
 230           * user is not helped by having the same metric available in several
 231           * groups. (n^2 algorithm).
 232           */
 233          bool duplicate = false;
 234          for (int existing_group = 0;
 235               existing_group < group && !duplicate;
 236               ++existing_group) {
 237             for (int existing_counter = 0;
 238                  existing_counter < perf_cfg->queries[existing_group].n_counters && !duplicate;
 239                  ++existing_counter) {
 240                const char *current_name =
 241                   perf_cfg->queries[group].counters[counter].name;
 242                const char *existing_name =
 243                   perf_cfg->queries[existing_group].counters[existing_counter].name;
 244                if (strcmp(current_name, existing_name) == 0) {
 245                   duplicate = true;
 246                }
 247             }
 248          }
 249          if (duplicate)
 250             continue;
 251          monitor_cfg->counters[iris_monitor_id].group = group;
 252          monitor_cfg->counters[iris_monitor_id].counter = counter;
 253          ++iris_monitor_id;
 254       }
 255    }
 256    monitor_cfg->num_counters = iris_monitor_id;
 257    return monitor_cfg->num_counters;
 258
 259 allocation_error:
 260    if (monitor_cfg)
 261       free(monitor_cfg->counters);
 262    free(perf_cfg);
 263    free(monitor_cfg);
 264    return false;
 265 }
 266
 267 int
 268 iris_get_monitor_group_info(struct pipe_screen *pscreen,
 269                             unsigned group_index,
 270                             struct pipe_driver_query_group_info *info)
 271 {
 272    struct iris_screen *screen = (struct iris_screen *)pscreen;
 273    if (!screen->monitor_cfg) {
 274       if (!iris_monitor_init_metrics(screen))
 275          return 0;
 276    }
 277
 278    const struct iris_monitor_config *monitor_cfg = screen->monitor_cfg;
 279    const struct gen_perf_config *perf_cfg = monitor_cfg->perf_cfg;
 280
 281    if (!info) {
 282       /* return the count that can be queried */
 283       return perf_cfg->n_queries;
 284    }
 285
 286    if (group_index >= perf_cfg->n_queries) {
 287       /* out of range */
 288       return 0;
 289    }
 290
 291    struct gen_perf_query_info *query = &perf_cfg->queries[group_index];
 292
 293    info->name = query->name;
 294    info->max_active_queries = query->n_counters;
 295    info->num_queries = query->n_counters;
 296
 297    return 1;
 298 }
 299
 300 static void
 301 iris_init_monitor_ctx(struct iris_context *ice)
 302 {
 303    struct iris_screen *screen = (struct iris_screen *) ice->ctx.screen;
 304    struct iris_monitor_config *monitor_cfg = screen->monitor_cfg;
 305
 306    ice->perf_ctx = gen_perf_new_context(ice);
 307    if (unlikely(!ice->perf_ctx))
 308       return;
 309
 310    struct gen_perf_context *perf_ctx = ice->perf_ctx;
 311    struct gen_perf_config *perf_cfg = monitor_cfg->perf_cfg;
 312    gen_perf_init_context(perf_ctx,
 313                          perf_cfg,
 314                          ice,
 315                          screen->bufmgr,
 316                          &screen->devinfo,
 317                          ice->batches[IRIS_BATCH_RENDER].hw_ctx_id,
 318                          screen->fd);
 319 }
 320
 321 /* entry point for GenPerfMonitorsAMD */
 322 struct iris_monitor_object *
 323 iris_create_monitor_object(struct iris_context *ice,
 324                            unsigned num_queries,
 325                            unsigned *query_types)
 326 {
 327    struct iris_screen *screen = (struct iris_screen *) ice->ctx.screen;
 328    struct iris_monitor_config *monitor_cfg = screen->monitor_cfg;
 329    struct gen_perf_config *perf_cfg = monitor_cfg->perf_cfg;
 330    struct gen_perf_query_object *query_obj = NULL;
 331
 332    /* initialize perf context if this has not already been done.  This
 333     * function is the first entry point that carries the gl context.
 334     */
 335    if (ice->perf_ctx == NULL) {
 336       iris_init_monitor_ctx(ice);
 337    }
 338    struct gen_perf_context *perf_ctx = ice->perf_ctx;
 339
 340    assert(num_queries > 0);
 341    int query_index = query_types[0] - PIPE_QUERY_DRIVER_SPECIFIC;
 342    assert(query_index <= monitor_cfg->num_counters);
 343    const int group = monitor_cfg->counters[query_index].group;
 344
 345    struct iris_monitor_object *monitor =
 346       calloc(1, sizeof(struct iris_monitor_object));
 347    if (unlikely(!monitor))
 348       goto allocation_failure;
 349
 350    monitor->num_active_counters = num_queries;
 351    monitor->active_counters = calloc(num_queries, sizeof(int));
 352    if (unlikely(!monitor->active_counters))
 353       goto allocation_failure;
 354
 355    for (int i = 0; i < num_queries; ++i) {
 356       unsigned current_query = query_types[i];
 357       unsigned current_query_index = current_query - PIPE_QUERY_DRIVER_SPECIFIC;
 358
 359       /* all queries must be in the same group */
 360       assert(current_query_index <= monitor_cfg->num_counters);
 361       assert(monitor_cfg->counters[current_query_index].group == group);
 362       monitor->active_counters[i] =
 363          monitor_cfg->counters[current_query_index].counter;
 364    }
 365
 366    /* create the gen_perf_query */
 367    query_obj = gen_perf_new_query(perf_ctx, group);
 368    if (unlikely(!query_obj))
 369       goto allocation_failure;
 370
 371    monitor->query = query_obj;
 372    monitor->result_size = perf_cfg->queries[group].data_size;
 373    monitor->result_buffer = calloc(1, monitor->result_size);
 374    if (unlikely(!monitor->result_buffer))
 375       goto allocation_failure;
 376
 377    return monitor;
 378
 379 allocation_failure:
 380    if (monitor) {
 381       free(monitor->active_counters);
 382       free(monitor->result_buffer);
 383    }
 384    free(query_obj);
 385    free(monitor);
 386    return NULL;
 387 }
 388
 389 void
 390 iris_destroy_monitor_object(struct pipe_context *ctx,
 391                             struct iris_monitor_object *monitor)
 392 {
 393    struct iris_context *ice = (struct iris_context *)ctx;
 394
 395    gen_perf_delete_query(ice->perf_ctx, monitor->query);
 396    free(monitor->result_buffer);
 397    monitor->result_buffer = NULL;
 398    free(monitor->active_counters);
 399    monitor->active_counters = NULL;
 400    free(monitor);
 401 }
 402
 403 bool
 404 iris_begin_monitor(struct pipe_context *ctx,
 405                    struct iris_monitor_object *monitor)
 406 {
 407    struct iris_context *ice = (void *) ctx;
 408    struct gen_perf_context *perf_ctx = ice->perf_ctx;
 409
 410    return gen_perf_begin_query(perf_ctx, monitor->query);
 411 }
 412
 413 bool
 414 iris_end_monitor(struct pipe_context *ctx,
 415                  struct iris_monitor_object *monitor)
 416 {
 417    struct iris_context *ice = (void *) ctx;
 418    struct gen_perf_context *perf_ctx = ice->perf_ctx;
 419
 420    gen_perf_end_query(perf_ctx, monitor->query);
 421    return true;
 422 }
 423
 424 bool
 425 iris_get_monitor_result(struct pipe_context *ctx,
 426                         struct iris_monitor_object *monitor,
 427                         bool wait,
 428                         union pipe_numeric_type_union *result)
 429 {
 430    struct iris_context *ice = (void *) ctx;
 431    struct gen_perf_context *perf_ctx = ice->perf_ctx;
 432    struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
 433
 434    bool monitor_ready =
 435       gen_perf_is_query_ready(perf_ctx, monitor->query, batch);
 436
 437    if (!monitor_ready) {
 438       if (!wait)
 439          return false;
 440       gen_perf_wait_query(perf_ctx, monitor->query, batch);
 441    }
 442
 443    assert(gen_perf_is_query_ready(perf_ctx, monitor->query, batch));
 444
 445    unsigned bytes_written;
 446    gen_perf_get_query_data(perf_ctx, monitor->query,
 447                            monitor->result_size,
 448                            (unsigned*) monitor->result_buffer,
 449                            &bytes_written);
 450    if (bytes_written != monitor->result_size)
 451       return false;
 452
 453    /* copy metrics into the batch result */
 454    for (int i = 0; i < monitor->num_active_counters; ++i) {
 455       int current_counter = monitor->active_counters[i];
 456       const struct gen_perf_query_info *info =
 457          gen_perf_query_info(monitor->query);
 458       const struct gen_perf_query_counter *counter =
 459          &info->counters[current_counter];
 460       assert(gen_perf_query_counter_get_size(counter));
 461       switch (counter->data_type) {
 462       case GEN_PERF_COUNTER_DATA_TYPE_UINT64:
 463          result[i].u64 = *(uint64_t*)(monitor->result_buffer + counter->offset);
 464          break;
 465       case GEN_PERF_COUNTER_DATA_TYPE_FLOAT:
 466          result[i].f = *(float*)(monitor->result_buffer + counter->offset);
 467          break;
 468       case GEN_PERF_COUNTER_DATA_TYPE_UINT32:
 469       case GEN_PERF_COUNTER_DATA_TYPE_BOOL32:
 470          result[i].u64 = *(uint32_t*)(monitor->result_buffer + counter->offset);
 471          break;
 472       case GEN_PERF_COUNTER_DATA_TYPE_DOUBLE: {
 473          double v = *(double*)(monitor->result_buffer + counter->offset);
 474          result[i].f = v;
 475          break;
 476       }
 477       default:
 478          unreachable("unexpected counter data type");
 479       }
 480    }
 481    return true;
 482 }