src/gallium/drivers/iris/iris_query.c

   1 /*
   2  * Copyright © 2017 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice shall be included
  12  * in all copies or substantial portions of the Software.
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  15  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  19  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  20  * DEALINGS IN THE SOFTWARE.
  21  */
  22
  23 /**
  24  * @file iris_query.c
  25  *
  26  * Query object support.  This allows measuring various simple statistics
  27  * via counters on the GPU.
  28  */
  29
  30 #include <stdio.h>
  31 #include <errno.h>
  32 #include "pipe/p_defines.h"
  33 #include "pipe/p_state.h"
  34 #include "pipe/p_context.h"
  35 #include "pipe/p_screen.h"
  36 #include "util/u_inlines.h"
  37 #include "iris_context.h"
  38 #include "iris_defines.h"
  39 #include "iris_resource.h"
  40 #include "iris_screen.h"
  41
  42 #define IA_VERTICES_COUNT          0x2310
  43 #define IA_PRIMITIVES_COUNT        0x2318
  44 #define VS_INVOCATION_COUNT        0x2320
  45 #define HS_INVOCATION_COUNT        0x2300
  46 #define DS_INVOCATION_COUNT        0x2308
  47 #define GS_INVOCATION_COUNT        0x2328
  48 #define GS_PRIMITIVES_COUNT        0x2330
  49 #define CL_INVOCATION_COUNT        0x2338
  50 #define CL_PRIMITIVES_COUNT        0x2340
  51 #define PS_INVOCATION_COUNT        0x2348
  52 #define CS_INVOCATION_COUNT        0x2290
  53 #define PS_DEPTH_COUNT             0x2350
  54
  55 #define SO_PRIM_STORAGE_NEEDED(n)  (0x5240 + (n) * 8)
  56
  57 #define SO_NUM_PRIMS_WRITTEN(n)    (0x5200 + (n) * 8)
  58
  59 #define CS_GPR(n) (0x2600 + (n) * 8)
  60
  61 #define MI_MATH (0x1a << 23)
  62
  63 #define MI_ALU_LOAD      0x080
  64 #define MI_ALU_LOADINV   0x480
  65 #define MI_ALU_LOAD0     0x081
  66 #define MI_ALU_LOAD1     0x481
  67 #define MI_ALU_ADD       0x100
  68 #define MI_ALU_SUB       0x101
  69 #define MI_ALU_AND       0x102
  70 #define MI_ALU_OR        0x103
  71 #define MI_ALU_XOR       0x104
  72 #define MI_ALU_STORE     0x180
  73 #define MI_ALU_STOREINV  0x580
  74
  75 #define MI_ALU_R0        0x00
  76 #define MI_ALU_R1        0x01
  77 #define MI_ALU_R2        0x02
  78 #define MI_ALU_R3        0x03
  79 #define MI_ALU_R4        0x04
  80 #define MI_ALU_SRCA      0x20
  81 #define MI_ALU_SRCB      0x21
  82 #define MI_ALU_ACCU      0x31
  83 #define MI_ALU_ZF        0x32
  84 #define MI_ALU_CF        0x33
  85
  86 #define MI_ALU0(op)       ((MI_ALU_##op << 20))
  87 #define MI_ALU1(op, x)    ((MI_ALU_##op << 20) | (MI_ALU_##x << 10))
  88 #define MI_ALU2(op, x, y) \
  89    ((MI_ALU_##op << 20) | (MI_ALU_##x << 10) | (MI_ALU_##y))
  90
  91 struct iris_query {
  92    enum pipe_query_type type;
  93    int index;
  94
  95    bool ready;
  96
  97    uint64_t result;
  98
  99    struct iris_bo *bo;
 100    struct iris_query_snapshots *map;
 101 };
 102
 103 struct iris_query_snapshots {
 104    uint64_t snapshots_landed;
 105    uint64_t start;
 106    uint64_t end;
 107 };
 108
 109 /**
 110  * Is this type of query written by PIPE_CONTROL?
 111  */
 112 static bool
 113 iris_is_query_pipelined(struct iris_query *q)
 114 {
 115    switch (q->type) {
 116    case PIPE_QUERY_OCCLUSION_COUNTER:
 117    case PIPE_QUERY_OCCLUSION_PREDICATE:
 118    case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
 119    case PIPE_QUERY_TIMESTAMP:
 120    case PIPE_QUERY_TIMESTAMP_DISJOINT:
 121    case PIPE_QUERY_TIME_ELAPSED:
 122       return true;
 123
 124    default:
 125       return false;
 126    }
 127 }
 128
 129 static void
 130 mark_available(struct iris_context *ice, struct iris_query *q)
 131 {
 132    struct iris_batch *batch = &ice->render_batch;
 133    unsigned flags = PIPE_CONTROL_WRITE_IMMEDIATE;
 134    unsigned offset = offsetof(struct iris_query_snapshots, snapshots_landed);
 135
 136    if (!iris_is_query_pipelined(q)) {
 137       ice->vtbl.store_data_imm64(batch, q->bo, offset, true);
 138    } else {
 139       /* Order available *after* the query results. */
 140       flags |= PIPE_CONTROL_FLUSH_ENABLE;
 141       iris_emit_pipe_control_write(batch, flags, q->bo, offset, true);
 142    }
 143 }
 144
 145 /**
 146  * Write PS_DEPTH_COUNT to q->(dest) via a PIPE_CONTROL.
 147  */
 148 static void
 149 iris_pipelined_write(struct iris_batch *batch,
 150                      struct iris_query *q,
 151                      enum pipe_control_flags flags,
 152                      unsigned offset)
 153 {
 154    const struct gen_device_info *devinfo = &batch->screen->devinfo;
 155    const unsigned optional_cs_stall =
 156       devinfo->gen == 9 && devinfo->gt == 4 ?  PIPE_CONTROL_CS_STALL : 0;
 157
 158    iris_emit_pipe_control_write(batch, flags | optional_cs_stall,
 159                                 q->bo, offset, 0ull);
 160 }
 161
 162 static void
 163 write_value(struct iris_context *ice, struct iris_query *q, unsigned offset)
 164 {
 165    struct iris_batch *batch = &ice->render_batch;
 166    const struct gen_device_info *devinfo = &batch->screen->devinfo;
 167
 168    if (!iris_is_query_pipelined(q)) {
 169       iris_emit_pipe_control_flush(batch,
 170                                    PIPE_CONTROL_CS_STALL |
 171                                    PIPE_CONTROL_STALL_AT_SCOREBOARD);
 172    }
 173
 174    switch (q->type) {
 175    case PIPE_QUERY_OCCLUSION_COUNTER:
 176    case PIPE_QUERY_OCCLUSION_PREDICATE:
 177    case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
 178       if (devinfo->gen >= 10) {
 179          /* "Driver must program PIPE_CONTROL with only Depth Stall Enable
 180           *  bit set prior to programming a PIPE_CONTROL with Write PS Depth
 181           *  Count sync operation."
 182           */
 183          iris_emit_pipe_control_flush(batch, PIPE_CONTROL_DEPTH_STALL);
 184       }
 185       iris_pipelined_write(&ice->render_batch, q,
 186                            PIPE_CONTROL_WRITE_DEPTH_COUNT |
 187                            PIPE_CONTROL_DEPTH_STALL,
 188                            offset);
 189       break;
 190    case PIPE_QUERY_TIME_ELAPSED:
 191    case PIPE_QUERY_TIMESTAMP:
 192    case PIPE_QUERY_TIMESTAMP_DISJOINT:
 193       iris_pipelined_write(&ice->render_batch, q,
 194                            PIPE_CONTROL_WRITE_TIMESTAMP,
 195                            offset);
 196       break;
 197    case PIPE_QUERY_PRIMITIVES_GENERATED:
 198       ice->vtbl.store_register_mem64(batch,
 199                                      q->index == 0 ? CL_INVOCATION_COUNT :
 200                                      SO_PRIM_STORAGE_NEEDED(q->index),
 201                                      q->bo, offset, false);
 202       break;
 203    case PIPE_QUERY_PRIMITIVES_EMITTED:
 204       ice->vtbl.store_register_mem64(batch,
 205                                      SO_NUM_PRIMS_WRITTEN(q->index),
 206                                      q->bo, offset, false);
 207       break;
 208    case PIPE_QUERY_PIPELINE_STATISTICS: {
 209       static const uint32_t index_to_reg[] = {
 210          IA_VERTICES_COUNT,
 211          IA_PRIMITIVES_COUNT,
 212          VS_INVOCATION_COUNT,
 213          GS_INVOCATION_COUNT,
 214          GS_PRIMITIVES_COUNT,
 215          CL_INVOCATION_COUNT,
 216          CL_PRIMITIVES_COUNT,
 217          PS_INVOCATION_COUNT,
 218          HS_INVOCATION_COUNT,
 219          DS_INVOCATION_COUNT,
 220          CS_INVOCATION_COUNT,
 221       };
 222       const uint32_t reg = index_to_reg[q->index];
 223
 224       ice->vtbl.store_register_mem64(batch, reg, q->bo, offset, false);
 225       break;
 226    }
 227    default:
 228       assert(false);
 229    }
 230 }
 231
 232 uint64_t
 233 iris_timebase_scale(const struct gen_device_info *devinfo,
 234                     uint64_t gpu_timestamp)
 235 {
 236    return (1000000000ull * gpu_timestamp) / devinfo->timestamp_frequency;
 237 }
 238
 239 static uint64_t
 240 iris_raw_timestamp_delta(uint64_t time0, uint64_t time1)
 241 {
 242    if (time0 > time1) {
 243       return (1ULL << TIMESTAMP_BITS) + time1 - time0;
 244    } else {
 245       return time1 - time0;
 246    }
 247 }
 248
 249 static void
 250 calculate_result_on_cpu(const struct gen_device_info *devinfo,
 251                         struct iris_query *q)
 252 {
 253    switch (q->type) {
 254    case PIPE_QUERY_OCCLUSION_PREDICATE:
 255    case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
 256       q->result = q->map->end != q->map->start;
 257       break;
 258    case PIPE_QUERY_TIMESTAMP:
 259    case PIPE_QUERY_TIMESTAMP_DISJOINT:
 260       /* The timestamp is the single starting snapshot. */
 261       q->result = iris_timebase_scale(devinfo, q->map->start);
 262       q->result &= (1ull << TIMESTAMP_BITS) - 1;
 263       break;
 264    case PIPE_QUERY_TIME_ELAPSED:
 265       q->result = iris_raw_timestamp_delta(q->map->start, q->map->end);
 266       q->result = iris_timebase_scale(devinfo, q->result);
 267       q->result &= (1ull << TIMESTAMP_BITS) - 1;
 268       break;
 269    case PIPE_QUERY_OCCLUSION_COUNTER:
 270    case PIPE_QUERY_PRIMITIVES_GENERATED:
 271    case PIPE_QUERY_PRIMITIVES_EMITTED:
 272    case PIPE_QUERY_PIPELINE_STATISTICS:
 273    default:
 274       q->result = q->map->end - q->map->start;
 275       break;
 276    }
 277
 278    q->ready = true;
 279 }
 280
 281 /*
 282  * GPR0 = (GPR0 == 0) ? 0 : 1;
 283  */
 284 static void
 285 gpr0_to_bool(struct iris_context *ice)
 286 {
 287    struct iris_batch *batch = &ice->render_batch;
 288
 289    ice->vtbl.load_register_imm64(batch, CS_GPR(1), 1ull);
 290
 291    static const uint32_t math[] = {
 292       MI_MATH | (9 - 2),
 293       MI_ALU2(LOAD, SRCA, R0),
 294       MI_ALU1(LOAD0, SRCB),
 295       MI_ALU0(ADD),
 296       MI_ALU2(STOREINV, R0, ZF),
 297       MI_ALU2(LOAD, SRCA, R0),
 298       MI_ALU2(LOAD, SRCB, R1),
 299       MI_ALU0(AND),
 300       MI_ALU2(STORE, R0, ACCU),
 301    };
 302    iris_batch_emit(batch, math, sizeof(math));
 303 }
 304
 305 /**
 306  * Calculate the result and store it to CS_GPR0.
 307  */
 308 static void
 309 calculate_result_on_gpu(struct iris_context *ice, struct iris_query *q)
 310 {
 311    struct iris_batch *batch = &ice->render_batch;
 312
 313    ice->vtbl.load_register_mem64(batch, CS_GPR(1), q->bo,
 314                                  offsetof(struct iris_query_snapshots, start));
 315    ice->vtbl.load_register_mem64(batch, CS_GPR(2), q->bo,
 316                                  offsetof(struct iris_query_snapshots, end));
 317
 318    static const uint32_t math[] = {
 319       MI_MATH | (5 - 2),
 320       MI_ALU2(LOAD, SRCA, R2),
 321       MI_ALU2(LOAD, SRCB, R1),
 322       MI_ALU0(SUB),
 323       MI_ALU2(STORE, R0, ACCU),
 324    };
 325    iris_batch_emit(batch, math, sizeof(math));
 326
 327    if (q->type == PIPE_QUERY_OCCLUSION_PREDICATE ||
 328        q->type == PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE)
 329       gpr0_to_bool(ice);
 330 }
 331
 332 static struct pipe_query *
 333 iris_create_query(struct pipe_context *ctx,
 334                   unsigned query_type,
 335                   unsigned index)
 336 {
 337    struct iris_query *q = calloc(1, sizeof(struct iris_query));
 338
 339    q->type = query_type;
 340    q->index = index;
 341
 342    return (struct pipe_query *) q;
 343 }
 344
 345 static void
 346 iris_destroy_query(struct pipe_context *ctx, struct pipe_query *p_query)
 347 {
 348    struct iris_query *query = (void *) p_query;
 349    iris_bo_unreference(query->bo);
 350    free(query);
 351 }
 352
 353
 354 static boolean
 355 iris_begin_query(struct pipe_context *ctx, struct pipe_query *query)
 356 {
 357    struct iris_screen *screen = (void *) ctx->screen;
 358    struct iris_context *ice = (void *) ctx;
 359    struct iris_query *q = (void *) query;
 360
 361    iris_bo_unreference(q->bo);
 362    q->bo = iris_bo_alloc(screen->bufmgr, "query object", 4096,
 363                          IRIS_MEMZONE_OTHER);
 364    if (!q->bo)
 365       return false;
 366
 367    q->map = iris_bo_map(&ice->dbg, q->bo, MAP_READ | MAP_WRITE | MAP_ASYNC);
 368    if (!q->map)
 369       return false;
 370
 371    q->result = 0ull;
 372    q->ready = false;
 373    q->map->snapshots_landed = false;
 374
 375    if (q->type == PIPE_QUERY_PRIMITIVES_GENERATED && q->index == 0) {
 376       ice->state.prims_generated_query_active = true;
 377       ice->state.dirty |= IRIS_DIRTY_STREAMOUT;
 378    }
 379
 380    write_value(ice, q, offsetof(struct iris_query_snapshots, start));
 381
 382    return true;
 383 }
 384
 385 static bool
 386 iris_end_query(struct pipe_context *ctx, struct pipe_query *query)
 387 {
 388    struct iris_context *ice = (void *) ctx;
 389    struct iris_query *q = (void *) query;
 390
 391    if (q->type == PIPE_QUERY_TIMESTAMP) {
 392       iris_begin_query(ctx, query);
 393       mark_available(ice, q);
 394       return true;
 395    }
 396
 397    if (q->type == PIPE_QUERY_PRIMITIVES_GENERATED && q->index == 0) {
 398       ice->state.prims_generated_query_active = true;
 399       ice->state.dirty |= IRIS_DIRTY_STREAMOUT;
 400    }
 401
 402    write_value(ice, q, offsetof(struct iris_query_snapshots, end));
 403    mark_available(ice, q);
 404
 405    return true;
 406 }
 407
 408 static boolean
 409 iris_get_query_result(struct pipe_context *ctx,
 410                       struct pipe_query *query,
 411                       boolean wait,
 412                       union pipe_query_result *result)
 413 {
 414    struct iris_context *ice = (void *) ctx;
 415    struct iris_query *q = (void *) query;
 416    struct iris_screen *screen = (void *) ctx->screen;
 417    const struct gen_device_info *devinfo = &screen->devinfo;
 418
 419    if (!q->ready) {
 420       if (iris_batch_references(&ice->render_batch, q->bo))
 421          iris_batch_flush(&ice->render_batch);
 422
 423       if (!q->map->snapshots_landed) {
 424          if (wait)
 425             iris_bo_wait_rendering(q->bo);
 426          else
 427             return false;
 428       }
 429
 430       assert(q->map->snapshots_landed);
 431       calculate_result_on_cpu(devinfo, q);
 432    }
 433
 434    assert(q->ready);
 435
 436    if (q->type == PIPE_QUERY_PIPELINE_STATISTICS) {
 437       switch (q->index) {
 438       case 0:
 439          result->pipeline_statistics.ia_vertices = q->result;
 440          break;
 441       case 1:
 442          result->pipeline_statistics.ia_primitives = q->result;
 443          break;
 444       case 2:
 445          result->pipeline_statistics.vs_invocations = q->result;
 446          break;
 447       case 3:
 448          result->pipeline_statistics.gs_invocations = q->result;
 449          break;
 450       case 4:
 451          result->pipeline_statistics.gs_primitives = q->result;
 452          break;
 453       case 5:
 454          result->pipeline_statistics.c_invocations = q->result;
 455          break;
 456       case 6:
 457          result->pipeline_statistics.c_primitives = q->result;
 458          break;
 459       case 7:
 460          result->pipeline_statistics.ps_invocations = q->result;
 461          break;
 462       case 8:
 463          result->pipeline_statistics.hs_invocations = q->result;
 464          break;
 465       case 9:
 466          result->pipeline_statistics.ds_invocations = q->result;
 467          break;
 468       case 10:
 469          result->pipeline_statistics.cs_invocations = q->result;
 470          break;
 471       }
 472    } else {
 473       result->u64 = q->result;
 474    }
 475
 476    return true;
 477 }
 478
 479 static void
 480 iris_get_query_result_resource(struct pipe_context *ctx,
 481                                struct pipe_query *query,
 482                                boolean wait,
 483                                enum pipe_query_value_type result_type,
 484                                int index,
 485                                struct pipe_resource *p_res,
 486                                unsigned offset)
 487 {
 488    struct iris_context *ice = (void *) ctx;
 489    struct iris_query *q = (void *) query;
 490    struct iris_batch *batch = &ice->render_batch;
 491    const struct gen_device_info *devinfo = &batch->screen->devinfo;
 492    unsigned snapshots_landed_offset =
 493       offsetof(struct iris_query_snapshots, snapshots_landed);
 494
 495    if (index == -1) {
 496       /* They're asking for the availability of the result.  If we still
 497        * have commands queued up which produce the result, submit them
 498        * now so that progress happens.  Either way, copy the snapshots
 499        * landed field to the destination resource.
 500        */
 501       if (iris_batch_references(batch, q->bo))
 502          iris_batch_flush(batch);
 503
 504       ice->vtbl.copy_mem_mem(batch, iris_resource_bo(p_res), offset,
 505                              q->bo, snapshots_landed_offset,
 506                              result_type <= PIPE_QUERY_TYPE_U32 ? 4 : 8);
 507       return;
 508    }
 509
 510    if (!q->ready && q->map->snapshots_landed) {
 511       /* The final snapshots happen to have landed, so let's just compute
 512        * the result on the CPU now...
 513        */
 514       calculate_result_on_cpu(devinfo, q);
 515    }
 516
 517    if (q->ready) {
 518       /* We happen to have the result on the CPU, so just copy it. */
 519       if (result_type <= PIPE_QUERY_TYPE_U32) {
 520          ice->vtbl.store_data_imm32(batch, iris_resource_bo(p_res), offset,
 521                                     q->result);
 522       } else {
 523          ice->vtbl.store_data_imm64(batch, iris_resource_bo(p_res), offset,
 524                                     q->result);
 525       }
 526
 527       /* Make sure the result lands before they use bind the QBO elsewhere
 528        * and use the result.
 529        */
 530       // XXX: Why?  i965 doesn't do this.
 531       iris_emit_pipe_control_flush(batch, PIPE_CONTROL_CS_STALL);
 532       return;
 533    }
 534
 535    /* Calculate the result to CS_GPR0 */
 536    calculate_result_on_gpu(ice, q);
 537
 538    bool predicated = !wait && iris_is_query_pipelined(q);
 539
 540    if (predicated) {
 541       ice->vtbl.load_register_imm64(batch, MI_PREDICATE_SRC1, 0ull);
 542       ice->vtbl.load_register_mem64(batch, MI_PREDICATE_SRC0, q->bo,
 543                                     snapshots_landed_offset);
 544       uint32_t predicate = MI_PREDICATE |
 545                            MI_PREDICATE_LOADOP_LOADINV |
 546                            MI_PREDICATE_COMBINEOP_SET |
 547                            MI_PREDICATE_COMPAREOP_SRCS_EQUAL;
 548       iris_batch_emit(batch, &predicate, sizeof(uint32_t));
 549    }
 550
 551    if (result_type <= PIPE_QUERY_TYPE_U32) {
 552       ice->vtbl.store_register_mem32(batch, CS_GPR(0),
 553                                      iris_resource_bo(p_res),
 554                                      offset, predicated);
 555    } else {
 556       ice->vtbl.store_register_mem64(batch, CS_GPR(0),
 557                                      iris_resource_bo(p_res),
 558                                      offset, predicated);
 559    }
 560 }
 561
 562 static void
 563 iris_set_active_query_state(struct pipe_context *pipe, boolean enable)
 564 {
 565    /* Do nothing, intentionally - only u_blitter uses this. */
 566 }
 567
 568 void
 569 iris_init_query_functions(struct pipe_context *ctx)
 570 {
 571    ctx->create_query = iris_create_query;
 572    ctx->destroy_query = iris_destroy_query;
 573    ctx->begin_query = iris_begin_query;
 574    ctx->end_query = iris_end_query;
 575    ctx->get_query_result = iris_get_query_result;
 576    ctx->get_query_result_resource = iris_get_query_result_resource;
 577    ctx->set_active_query_state = iris_set_active_query_state;
 578 }