src/gallium/drivers/iris/iris_query.c

   1 /*
   2  * Copyright © 2017 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice shall be included
  12  * in all copies or substantial portions of the Software.
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  15  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  19  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  20  * DEALINGS IN THE SOFTWARE.
  21  */
  22
  23 /**
  24  * @file iris_query.c
  25  *
  26  * Query object support.  This allows measuring various simple statistics
  27  * via counters on the GPU.
  28  */
  29
  30 #include <stdio.h>
  31 #include <errno.h>
  32 #include "pipe/p_defines.h"
  33 #include "pipe/p_state.h"
  34 #include "pipe/p_context.h"
  35 #include "pipe/p_screen.h"
  36 #include "util/u_inlines.h"
  37 #include "iris_context.h"
  38 #include "iris_defines.h"
  39 #include "iris_resource.h"
  40 #include "iris_screen.h"
  41
  42 #define IA_VERTICES_COUNT          0x2310
  43 #define IA_PRIMITIVES_COUNT        0x2318
  44 #define VS_INVOCATION_COUNT        0x2320
  45 #define HS_INVOCATION_COUNT        0x2300
  46 #define DS_INVOCATION_COUNT        0x2308
  47 #define GS_INVOCATION_COUNT        0x2328
  48 #define GS_PRIMITIVES_COUNT        0x2330
  49 #define CL_INVOCATION_COUNT        0x2338
  50 #define CL_PRIMITIVES_COUNT        0x2340
  51 #define PS_INVOCATION_COUNT        0x2348
  52 #define CS_INVOCATION_COUNT        0x2290
  53 #define PS_DEPTH_COUNT             0x2350
  54
  55 #define SO_PRIM_STORAGE_NEEDED(n)  (0x5240 + (n) * 8)
  56
  57 #define SO_NUM_PRIMS_WRITTEN(n)    (0x5200 + (n) * 8)
  58
  59 #define CS_GPR(n) (0x2600 + (n) * 8)
  60
  61 #define MI_MATH (0x1a << 23)
  62
  63 #define MI_ALU_LOAD      0x080
  64 #define MI_ALU_LOADINV   0x480
  65 #define MI_ALU_LOAD0     0x081
  66 #define MI_ALU_LOAD1     0x481
  67 #define MI_ALU_ADD       0x100
  68 #define MI_ALU_SUB       0x101
  69 #define MI_ALU_AND       0x102
  70 #define MI_ALU_OR        0x103
  71 #define MI_ALU_XOR       0x104
  72 #define MI_ALU_STORE     0x180
  73 #define MI_ALU_STOREINV  0x580
  74
  75 #define MI_ALU_R0        0x00
  76 #define MI_ALU_R1        0x01
  77 #define MI_ALU_R2        0x02
  78 #define MI_ALU_R3        0x03
  79 #define MI_ALU_R4        0x04
  80 #define MI_ALU_SRCA      0x20
  81 #define MI_ALU_SRCB      0x21
  82 #define MI_ALU_ACCU      0x31
  83 #define MI_ALU_ZF        0x32
  84 #define MI_ALU_CF        0x33
  85
  86 #define _MI_ALU(op, x, y)  (((op) << 20) | ((x) << 10) | (y))
  87
  88 #define _MI_ALU0(op)       _MI_ALU(MI_ALU_##op, 0, 0)
  89 #define _MI_ALU1(op, x)    _MI_ALU(MI_ALU_##op, x, 0)
  90 #define _MI_ALU2(op, x, y) _MI_ALU(MI_ALU_##op, x, y)
  91
  92 #define MI_ALU0(op)        _MI_ALU0(op)
  93 #define MI_ALU1(op, x)     _MI_ALU1(op, MI_ALU_##x)
  94 #define MI_ALU2(op, x, y)  _MI_ALU2(op, MI_ALU_##x, MI_ALU_##y)
  95
  96 struct iris_query {
  97    enum pipe_query_type type;
  98    int index;
  99
 100    bool ready;
 101
 102    bool stalled;
 103
 104    uint64_t result;
 105
 106    struct iris_bo *bo;
 107    struct iris_query_snapshots *map;
 108
 109    int batch_idx;
 110 };
 111
 112 struct iris_query_snapshots {
 113    /** iris_render_condition's saved MI_PREDICATE_DATA value. */
 114    uint64_t predicate_data;
 115
 116    /** Have the start/end snapshots landed? */
 117    uint64_t snapshots_landed;
 118
 119    /** Starting and ending counter snapshots */
 120    uint64_t start;
 121    uint64_t end;
 122 };
 123
 124 struct iris_query_so_overflow {
 125    uint64_t predicate_data;
 126    uint64_t snapshots_landed;
 127
 128    struct {
 129       uint64_t prim_storage_needed[2];
 130       uint64_t num_prims[2];
 131    } stream[4];
 132 };
 133
 134 /**
 135  * Is this type of query written by PIPE_CONTROL?
 136  */
 137 static bool
 138 iris_is_query_pipelined(struct iris_query *q)
 139 {
 140    switch (q->type) {
 141    case PIPE_QUERY_OCCLUSION_COUNTER:
 142    case PIPE_QUERY_OCCLUSION_PREDICATE:
 143    case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
 144    case PIPE_QUERY_TIMESTAMP:
 145    case PIPE_QUERY_TIMESTAMP_DISJOINT:
 146    case PIPE_QUERY_TIME_ELAPSED:
 147       return true;
 148
 149    default:
 150       return false;
 151    }
 152 }
 153
 154 static void
 155 mark_available(struct iris_context *ice, struct iris_query *q)
 156 {
 157    struct iris_batch *batch = &ice->batches[q->batch_idx];
 158    unsigned flags = PIPE_CONTROL_WRITE_IMMEDIATE;
 159    unsigned offset = offsetof(struct iris_query_snapshots, snapshots_landed);
 160
 161    if (!iris_is_query_pipelined(q)) {
 162       ice->vtbl.store_data_imm64(batch, q->bo, offset, true);
 163    } else {
 164       /* Order available *after* the query results. */
 165       flags |= PIPE_CONTROL_FLUSH_ENABLE;
 166       iris_emit_pipe_control_write(batch, flags, q->bo, offset, true);
 167    }
 168 }
 169
 170 /**
 171  * Write PS_DEPTH_COUNT to q->(dest) via a PIPE_CONTROL.
 172  */
 173 static void
 174 iris_pipelined_write(struct iris_batch *batch,
 175                      struct iris_query *q,
 176                      enum pipe_control_flags flags,
 177                      unsigned offset)
 178 {
 179    const struct gen_device_info *devinfo = &batch->screen->devinfo;
 180    const unsigned optional_cs_stall =
 181       devinfo->gen == 9 && devinfo->gt == 4 ?  PIPE_CONTROL_CS_STALL : 0;
 182
 183    iris_emit_pipe_control_write(batch, flags | optional_cs_stall,
 184                                 q->bo, offset, 0ull);
 185 }
 186
 187 static void
 188 write_value(struct iris_context *ice, struct iris_query *q, unsigned offset)
 189 {
 190    struct iris_batch *batch = &ice->batches[q->batch_idx];
 191    const struct gen_device_info *devinfo = &batch->screen->devinfo;
 192
 193    if (!iris_is_query_pipelined(q)) {
 194       iris_emit_pipe_control_flush(batch,
 195                                    PIPE_CONTROL_CS_STALL |
 196                                    PIPE_CONTROL_STALL_AT_SCOREBOARD);
 197       q->stalled = true;
 198    }
 199
 200    switch (q->type) {
 201    case PIPE_QUERY_OCCLUSION_COUNTER:
 202    case PIPE_QUERY_OCCLUSION_PREDICATE:
 203    case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
 204       if (devinfo->gen >= 10) {
 205          /* "Driver must program PIPE_CONTROL with only Depth Stall Enable
 206           *  bit set prior to programming a PIPE_CONTROL with Write PS Depth
 207           *  Count sync operation."
 208           */
 209          iris_emit_pipe_control_flush(batch, PIPE_CONTROL_DEPTH_STALL);
 210       }
 211       iris_pipelined_write(&ice->batches[IRIS_BATCH_RENDER], q,
 212                            PIPE_CONTROL_WRITE_DEPTH_COUNT |
 213                            PIPE_CONTROL_DEPTH_STALL,
 214                            offset);
 215       break;
 216    case PIPE_QUERY_TIME_ELAPSED:
 217    case PIPE_QUERY_TIMESTAMP:
 218    case PIPE_QUERY_TIMESTAMP_DISJOINT:
 219       iris_pipelined_write(&ice->batches[IRIS_BATCH_RENDER], q,
 220                            PIPE_CONTROL_WRITE_TIMESTAMP,
 221                            offset);
 222       break;
 223    case PIPE_QUERY_PRIMITIVES_GENERATED:
 224       ice->vtbl.store_register_mem64(batch,
 225                                      q->index == 0 ? CL_INVOCATION_COUNT :
 226                                      SO_PRIM_STORAGE_NEEDED(q->index),
 227                                      q->bo, offset, false);
 228       break;
 229    case PIPE_QUERY_PRIMITIVES_EMITTED:
 230       ice->vtbl.store_register_mem64(batch,
 231                                      SO_NUM_PRIMS_WRITTEN(q->index),
 232                                      q->bo, offset, false);
 233       break;
 234    case PIPE_QUERY_PIPELINE_STATISTICS: {
 235       static const uint32_t index_to_reg[] = {
 236          IA_VERTICES_COUNT,
 237          IA_PRIMITIVES_COUNT,
 238          VS_INVOCATION_COUNT,
 239          GS_INVOCATION_COUNT,
 240          GS_PRIMITIVES_COUNT,
 241          CL_INVOCATION_COUNT,
 242          CL_PRIMITIVES_COUNT,
 243          PS_INVOCATION_COUNT,
 244          HS_INVOCATION_COUNT,
 245          DS_INVOCATION_COUNT,
 246          CS_INVOCATION_COUNT,
 247       };
 248       const uint32_t reg = index_to_reg[q->index];
 249
 250       ice->vtbl.store_register_mem64(batch, reg, q->bo, offset, false);
 251       break;
 252    }
 253    default:
 254       assert(false);
 255    }
 256 }
 257
 258 static void
 259 write_overflow_values(struct iris_context *ice, struct iris_query *q, bool end)
 260 {
 261    struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
 262    uint32_t count = q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ? 1 : 4;
 263
 264    iris_emit_pipe_control_flush(batch,
 265                                 PIPE_CONTROL_CS_STALL |
 266                                 PIPE_CONTROL_STALL_AT_SCOREBOARD);
 267    for (uint32_t i = 0; i < count; i++) {
 268       int s = q->index + i;
 269       int g_idx = offsetof(struct iris_query_so_overflow,
 270                            stream[s].num_prims[end]);
 271       int w_idx = offsetof(struct iris_query_so_overflow,
 272                            stream[s].prim_storage_needed[end]);
 273       ice->vtbl.store_register_mem64(batch, SO_NUM_PRIMS_WRITTEN(s),
 274                                      q->bo, g_idx, false);
 275       ice->vtbl.store_register_mem64(batch, SO_PRIM_STORAGE_NEEDED(s),
 276                                      q->bo, w_idx, false);
 277    }
 278 }
 279
 280 uint64_t
 281 iris_timebase_scale(const struct gen_device_info *devinfo,
 282                     uint64_t gpu_timestamp)
 283 {
 284    return (1000000000ull * gpu_timestamp) / devinfo->timestamp_frequency;
 285 }
 286
 287 static uint64_t
 288 iris_raw_timestamp_delta(uint64_t time0, uint64_t time1)
 289 {
 290    if (time0 > time1) {
 291       return (1ULL << TIMESTAMP_BITS) + time1 - time0;
 292    } else {
 293       return time1 - time0;
 294    }
 295 }
 296
 297 static bool
 298 stream_overflowed(struct iris_query_so_overflow *so, int s)
 299 {
 300    return (so->stream[s].prim_storage_needed[1] -
 301            so->stream[s].prim_storage_needed[0]) !=
 302           (so->stream[s].num_prims[1] - so->stream[s].num_prims[0]);
 303 }
 304
 305 static void
 306 calculate_result_on_cpu(const struct gen_device_info *devinfo,
 307                         struct iris_query *q)
 308 {
 309    switch (q->type) {
 310    case PIPE_QUERY_OCCLUSION_PREDICATE:
 311    case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
 312       q->result = q->map->end != q->map->start;
 313       break;
 314    case PIPE_QUERY_TIMESTAMP:
 315    case PIPE_QUERY_TIMESTAMP_DISJOINT:
 316       /* The timestamp is the single starting snapshot. */
 317       q->result = iris_timebase_scale(devinfo, q->map->start);
 318       q->result &= (1ull << TIMESTAMP_BITS) - 1;
 319       break;
 320    case PIPE_QUERY_TIME_ELAPSED:
 321       q->result = iris_raw_timestamp_delta(q->map->start, q->map->end);
 322       q->result = iris_timebase_scale(devinfo, q->result);
 323       q->result &= (1ull << TIMESTAMP_BITS) - 1;
 324       break;
 325    case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
 326       q->result = stream_overflowed((void *) q->map, q->index);
 327       break;
 328    case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
 329       q->result = false;
 330       for (int i = 0; i < MAX_VERTEX_STREAMS; i++)
 331          q->result |= stream_overflowed((void *) q->map, i);
 332       break;
 333    case PIPE_QUERY_OCCLUSION_COUNTER:
 334    case PIPE_QUERY_PRIMITIVES_GENERATED:
 335    case PIPE_QUERY_PRIMITIVES_EMITTED:
 336    case PIPE_QUERY_PIPELINE_STATISTICS:
 337    default:
 338       q->result = q->map->end - q->map->start;
 339       break;
 340    }
 341
 342    q->ready = true;
 343 }
 344
 345 /*
 346  * GPR0 = (GPR0 == 0) ? 0 : 1;
 347  */
 348 static void
 349 gpr0_to_bool(struct iris_context *ice)
 350 {
 351    struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
 352
 353    ice->vtbl.load_register_imm64(batch, CS_GPR(1), 1ull);
 354
 355    static const uint32_t math[] = {
 356       MI_MATH | (9 - 2),
 357       MI_ALU2(LOAD, SRCA, R0),
 358       MI_ALU1(LOAD0, SRCB),
 359       MI_ALU0(ADD),
 360       MI_ALU2(STOREINV, R0, ZF),
 361       MI_ALU2(LOAD, SRCA, R0),
 362       MI_ALU2(LOAD, SRCB, R1),
 363       MI_ALU0(AND),
 364       MI_ALU2(STORE, R0, ACCU),
 365    };
 366    iris_batch_emit(batch, math, sizeof(math));
 367 }
 368
 369 static void
 370 load_overflow_data_to_cs_gprs(struct iris_context *ice,
 371                               struct iris_query *q,
 372                               int idx)
 373 {
 374    struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
 375
 376    ice->vtbl.load_register_mem64(batch, CS_GPR(1), q->bo,
 377                                  offsetof(struct iris_query_so_overflow,
 378                                           stream[idx].prim_storage_needed[0]));
 379    ice->vtbl.load_register_mem64(batch, CS_GPR(2), q->bo,
 380                                  offsetof(struct iris_query_so_overflow,
 381                                           stream[idx].prim_storage_needed[1]));
 382
 383    ice->vtbl.load_register_mem64(batch, CS_GPR(3), q->bo,
 384                                  offsetof(struct iris_query_so_overflow,
 385                                           stream[idx].num_prims[0]));
 386    ice->vtbl.load_register_mem64(batch, CS_GPR(4), q->bo,
 387                                  offsetof(struct iris_query_so_overflow,
 388                                           stream[idx].num_prims[1]));
 389 }
 390
 391 /*
 392  * R3 = R4 - R3;
 393  * R1 = R2 - R1;
 394  * R1 = R3 - R1;
 395  * R0 = R0 | R1;
 396  */
 397 static void
 398 calc_overflow_for_stream(struct iris_context *ice)
 399 {
 400    struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
 401    static const uint32_t maths[] = {
 402       MI_MATH | (17 - 2),
 403       MI_ALU2(LOAD, SRCA, R4),
 404       MI_ALU2(LOAD, SRCB, R3),
 405       MI_ALU0(SUB),
 406       MI_ALU2(STORE, R3, ACCU),
 407       MI_ALU2(LOAD, SRCA, R2),
 408       MI_ALU2(LOAD, SRCB, R1),
 409       MI_ALU0(SUB),
 410       MI_ALU2(STORE, R1, ACCU),
 411       MI_ALU2(LOAD, SRCA, R3),
 412       MI_ALU2(LOAD, SRCB, R1),
 413       MI_ALU0(SUB),
 414       MI_ALU2(STORE, R1, ACCU),
 415       MI_ALU2(LOAD, SRCA, R1),
 416       MI_ALU2(LOAD, SRCB, R0),
 417       MI_ALU0(OR),
 418       MI_ALU2(STORE, R0, ACCU),
 419    };
 420
 421    iris_batch_emit(batch, maths, sizeof(maths));
 422 }
 423
 424 static void
 425 overflow_result_to_gpr0(struct iris_context *ice, struct iris_query *q)
 426 {
 427    struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
 428
 429    ice->vtbl.load_register_imm64(batch, CS_GPR(0), 0ull);
 430
 431    if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE) {
 432       load_overflow_data_to_cs_gprs(ice, q, q->index);
 433       calc_overflow_for_stream(ice);
 434    } else {
 435       for (int i = 0; i < MAX_VERTEX_STREAMS; i++) {
 436          load_overflow_data_to_cs_gprs(ice, q, i);
 437          calc_overflow_for_stream(ice);
 438       }
 439    }
 440
 441    gpr0_to_bool(ice);
 442 }
 443
 444 /**
 445  * Calculate the result and store it to CS_GPR0.
 446  */
 447 static void
 448 calculate_result_on_gpu(struct iris_context *ice, struct iris_query *q)
 449 {
 450    struct iris_batch *batch = &ice->batches[q->batch_idx];
 451
 452    if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ||
 453        q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) {
 454       overflow_result_to_gpr0(ice, q);
 455       return;
 456    }
 457
 458    ice->vtbl.load_register_mem64(batch, CS_GPR(1), q->bo,
 459                                  offsetof(struct iris_query_snapshots, start));
 460    ice->vtbl.load_register_mem64(batch, CS_GPR(2), q->bo,
 461                                  offsetof(struct iris_query_snapshots, end));
 462
 463    static const uint32_t math[] = {
 464       MI_MATH | (5 - 2),
 465       MI_ALU2(LOAD, SRCA, R2),
 466       MI_ALU2(LOAD, SRCB, R1),
 467       MI_ALU0(SUB),
 468       MI_ALU2(STORE, R0, ACCU),
 469    };
 470    iris_batch_emit(batch, math, sizeof(math));
 471
 472    if (q->type == PIPE_QUERY_OCCLUSION_PREDICATE ||
 473        q->type == PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE)
 474       gpr0_to_bool(ice);
 475 }
 476
 477 static struct pipe_query *
 478 iris_create_query(struct pipe_context *ctx,
 479                   unsigned query_type,
 480                   unsigned index)
 481 {
 482    struct iris_query *q = calloc(1, sizeof(struct iris_query));
 483
 484    q->type = query_type;
 485    q->index = index;
 486
 487    if (q->type == PIPE_QUERY_PIPELINE_STATISTICS && q->index == 10)
 488       q->batch_idx = IRIS_BATCH_COMPUTE;
 489    else
 490       q->batch_idx = IRIS_BATCH_RENDER;
 491    return (struct pipe_query *) q;
 492 }
 493
 494 static void
 495 iris_destroy_query(struct pipe_context *ctx, struct pipe_query *p_query)
 496 {
 497    struct iris_query *query = (void *) p_query;
 498    iris_bo_unreference(query->bo);
 499    free(query);
 500 }
 501
 502
 503 static boolean
 504 iris_begin_query(struct pipe_context *ctx, struct pipe_query *query)
 505 {
 506    struct iris_screen *screen = (void *) ctx->screen;
 507    struct iris_context *ice = (void *) ctx;
 508    struct iris_query *q = (void *) query;
 509
 510    iris_bo_unreference(q->bo);
 511    q->bo = iris_bo_alloc(screen->bufmgr, "query object", 4096,
 512                          IRIS_MEMZONE_OTHER);
 513    if (!q->bo)
 514       return false;
 515
 516    q->map = iris_bo_map(&ice->dbg, q->bo, MAP_READ | MAP_WRITE | MAP_ASYNC);
 517    if (!q->map)
 518       return false;
 519
 520    q->result = 0ull;
 521    q->ready = false;
 522    q->map->snapshots_landed = false;
 523
 524    if (q->type == PIPE_QUERY_PRIMITIVES_GENERATED && q->index == 0) {
 525       ice->state.prims_generated_query_active = true;
 526       ice->state.dirty |= IRIS_DIRTY_STREAMOUT | IRIS_DIRTY_CLIP;
 527    }
 528
 529    if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ||
 530        q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)
 531       write_overflow_values(ice, q, false);
 532    else
 533       write_value(ice, q, offsetof(struct iris_query_snapshots, start));
 534
 535    return true;
 536 }
 537
 538 static bool
 539 iris_end_query(struct pipe_context *ctx, struct pipe_query *query)
 540 {
 541    struct iris_context *ice = (void *) ctx;
 542    struct iris_query *q = (void *) query;
 543
 544    if (q->type == PIPE_QUERY_TIMESTAMP) {
 545       iris_begin_query(ctx, query);
 546       mark_available(ice, q);
 547       return true;
 548    }
 549
 550    if (q->type == PIPE_QUERY_PRIMITIVES_GENERATED && q->index == 0) {
 551       ice->state.prims_generated_query_active = false;
 552       ice->state.dirty |= IRIS_DIRTY_STREAMOUT | IRIS_DIRTY_CLIP;
 553    }
 554
 555    if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ||
 556        q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)
 557       write_overflow_values(ice, q, true);
 558    else
 559       write_value(ice, q, offsetof(struct iris_query_snapshots, end));
 560    mark_available(ice, q);
 561
 562    return true;
 563 }
 564
 565 /**
 566  * See if the snapshots have landed for a query, and if so, compute the
 567  * result and mark it ready.  Does not flush (unlike iris_get_query_result).
 568  */
 569 static void
 570 iris_check_query_no_flush(struct iris_context *ice, struct iris_query *q)
 571 {
 572    struct iris_screen *screen = (void *) ice->ctx.screen;
 573    const struct gen_device_info *devinfo = &screen->devinfo;
 574
 575    if (!q->ready && q->map->snapshots_landed) {
 576       calculate_result_on_cpu(devinfo, q);
 577    }
 578 }
 579
 580 static boolean
 581 iris_get_query_result(struct pipe_context *ctx,
 582                       struct pipe_query *query,
 583                       boolean wait,
 584                       union pipe_query_result *result)
 585 {
 586    struct iris_context *ice = (void *) ctx;
 587    struct iris_query *q = (void *) query;
 588    struct iris_screen *screen = (void *) ctx->screen;
 589    const struct gen_device_info *devinfo = &screen->devinfo;
 590
 591    if (!q->ready) {
 592       if (iris_batch_references(&ice->batches[q->batch_idx], q->bo))
 593          iris_batch_flush(&ice->batches[q->batch_idx]);
 594
 595       if (!q->map->snapshots_landed) {
 596          if (wait)
 597             iris_bo_wait_rendering(q->bo);
 598          else
 599             return false;
 600       }
 601
 602       assert(q->map->snapshots_landed);
 603       calculate_result_on_cpu(devinfo, q);
 604    }
 605
 606    assert(q->ready);
 607
 608    if (q->type == PIPE_QUERY_PIPELINE_STATISTICS) {
 609       switch (q->index) {
 610       case 0:
 611          result->pipeline_statistics.ia_vertices = q->result;
 612          break;
 613       case 1:
 614          result->pipeline_statistics.ia_primitives = q->result;
 615          break;
 616       case 2:
 617          result->pipeline_statistics.vs_invocations = q->result;
 618          break;
 619       case 3:
 620          result->pipeline_statistics.gs_invocations = q->result;
 621          break;
 622       case 4:
 623          result->pipeline_statistics.gs_primitives = q->result;
 624          break;
 625       case 5:
 626          result->pipeline_statistics.c_invocations = q->result;
 627          break;
 628       case 6:
 629          result->pipeline_statistics.c_primitives = q->result;
 630          break;
 631       case 7:
 632          result->pipeline_statistics.ps_invocations = q->result;
 633          break;
 634       case 8:
 635          result->pipeline_statistics.hs_invocations = q->result;
 636          break;
 637       case 9:
 638          result->pipeline_statistics.ds_invocations = q->result;
 639          break;
 640       case 10:
 641          result->pipeline_statistics.cs_invocations = q->result;
 642          break;
 643       }
 644    } else {
 645       result->u64 = q->result;
 646    }
 647
 648    return true;
 649 }
 650
 651 static void
 652 iris_get_query_result_resource(struct pipe_context *ctx,
 653                                struct pipe_query *query,
 654                                boolean wait,
 655                                enum pipe_query_value_type result_type,
 656                                int index,
 657                                struct pipe_resource *p_res,
 658                                unsigned offset)
 659 {
 660    struct iris_context *ice = (void *) ctx;
 661    struct iris_query *q = (void *) query;
 662    struct iris_batch *batch = &ice->batches[q->batch_idx];
 663    const struct gen_device_info *devinfo = &batch->screen->devinfo;
 664    struct iris_resource *res = (void *) p_res;
 665    unsigned snapshots_landed_offset =
 666       offsetof(struct iris_query_snapshots, snapshots_landed);
 667
 668    res->bind_history |= PIPE_BIND_QUERY_BUFFER;
 669
 670    if (index == -1) {
 671       /* They're asking for the availability of the result.  If we still
 672        * have commands queued up which produce the result, submit them
 673        * now so that progress happens.  Either way, copy the snapshots
 674        * landed field to the destination resource.
 675        */
 676       if (iris_batch_references(batch, q->bo))
 677          iris_batch_flush(batch);
 678
 679       ice->vtbl.copy_mem_mem(batch, iris_resource_bo(p_res), offset,
 680                              q->bo, snapshots_landed_offset,
 681                              result_type <= PIPE_QUERY_TYPE_U32 ? 4 : 8);
 682       return;
 683    }
 684
 685    if (!q->ready && q->map->snapshots_landed) {
 686       /* The final snapshots happen to have landed, so let's just compute
 687        * the result on the CPU now...
 688        */
 689       calculate_result_on_cpu(devinfo, q);
 690    }
 691
 692    if (q->ready) {
 693       /* We happen to have the result on the CPU, so just copy it. */
 694       if (result_type <= PIPE_QUERY_TYPE_U32) {
 695          ice->vtbl.store_data_imm32(batch, iris_resource_bo(p_res), offset,
 696                                     q->result);
 697       } else {
 698          ice->vtbl.store_data_imm64(batch, iris_resource_bo(p_res), offset,
 699                                     q->result);
 700       }
 701
 702       /* Make sure the result lands before they use bind the QBO elsewhere
 703        * and use the result.
 704        */
 705       // XXX: Why?  i965 doesn't do this.
 706       iris_emit_pipe_control_flush(batch, PIPE_CONTROL_CS_STALL);
 707       return;
 708    }
 709
 710    /* Calculate the result to CS_GPR0 */
 711    calculate_result_on_gpu(ice, q);
 712
 713    bool predicated = !wait && !q->stalled;
 714
 715    if (predicated) {
 716       ice->vtbl.load_register_imm64(batch, MI_PREDICATE_SRC1, 0ull);
 717       ice->vtbl.load_register_mem64(batch, MI_PREDICATE_SRC0, q->bo,
 718                                     snapshots_landed_offset);
 719       uint32_t predicate = MI_PREDICATE |
 720                            MI_PREDICATE_LOADOP_LOADINV |
 721                            MI_PREDICATE_COMBINEOP_SET |
 722                            MI_PREDICATE_COMPAREOP_SRCS_EQUAL;
 723       iris_batch_emit(batch, &predicate, sizeof(uint32_t));
 724    }
 725
 726    if (result_type <= PIPE_QUERY_TYPE_U32) {
 727       ice->vtbl.store_register_mem32(batch, CS_GPR(0),
 728                                      iris_resource_bo(p_res),
 729                                      offset, predicated);
 730    } else {
 731       ice->vtbl.store_register_mem64(batch, CS_GPR(0),
 732                                      iris_resource_bo(p_res),
 733                                      offset, predicated);
 734    }
 735 }
 736
 737 static void
 738 iris_set_active_query_state(struct pipe_context *ctx, boolean enable)
 739 {
 740    struct iris_context *ice = (void *) ctx;
 741
 742    if (ice->state.statistics_counters_enabled == enable)
 743       return;
 744
 745    // XXX: most packets aren't paying attention to this yet, because it'd
 746    // have to be done dynamically at draw time, which is a pain
 747    ice->state.statistics_counters_enabled = enable;
 748    ice->state.dirty |= IRIS_DIRTY_CLIP |
 749                        IRIS_DIRTY_GS |
 750                        IRIS_DIRTY_RASTER |
 751                        IRIS_DIRTY_STREAMOUT |
 752                        IRIS_DIRTY_TCS |
 753                        IRIS_DIRTY_TES |
 754                        IRIS_DIRTY_VS |
 755                        IRIS_DIRTY_WM;
 756 }
 757
 758 static void
 759 set_predicate_enable(struct iris_context *ice, bool value)
 760 {
 761    if (value)
 762       ice->state.predicate = IRIS_PREDICATE_STATE_RENDER;
 763    else
 764       ice->state.predicate = IRIS_PREDICATE_STATE_DONT_RENDER;
 765 }
 766
 767 static void
 768 set_predicate_for_result(struct iris_context *ice,
 769                          struct iris_query *q,
 770                          bool inverted)
 771 {
 772    struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
 773
 774    /* The CPU doesn't have the query result yet; use hardware predication */
 775    ice->state.predicate = IRIS_PREDICATE_STATE_USE_BIT;
 776
 777    /* Ensure the memory is coherent for MI_LOAD_REGISTER_* commands. */
 778    iris_emit_pipe_control_flush(batch, PIPE_CONTROL_FLUSH_ENABLE);
 779    q->stalled = true;
 780
 781    switch (q->type) {
 782    case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
 783    case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
 784       overflow_result_to_gpr0(ice, q);
 785
 786       ice->vtbl.load_register_reg64(batch, MI_PREDICATE_SRC0, CS_GPR(0));
 787       ice->vtbl.load_register_imm64(batch, MI_PREDICATE_SRC1, 0ull);
 788       break;
 789    default:
 790       /* PIPE_QUERY_OCCLUSION_* */
 791       ice->vtbl.load_register_mem64(batch, MI_PREDICATE_SRC0, q->bo,
 792          offsetof(struct iris_query_snapshots, start));
 793       ice->vtbl.load_register_mem64(batch, MI_PREDICATE_SRC1, q->bo,
 794          offsetof(struct iris_query_snapshots, end));
 795       break;
 796    }
 797
 798    uint32_t mi_predicate = MI_PREDICATE |
 799                            MI_PREDICATE_COMBINEOP_SET |
 800                            MI_PREDICATE_COMPAREOP_SRCS_EQUAL |
 801                            (inverted ? MI_PREDICATE_LOADOP_LOAD
 802                                      : MI_PREDICATE_LOADOP_LOADINV);
 803    iris_batch_emit(batch, &mi_predicate, sizeof(uint32_t));
 804
 805    /* We immediately set the predicate on the render batch, as all the
 806     * counters come from 3D operations.  However, we may need to predicate
 807     * a compute dispatch, which executes in a different GEM context and has
 808     * a different MI_PREDICATE_DATA register.  So, we save the result to
 809     * memory and reload it in iris_launch_grid.
 810     */
 811    unsigned offset = offsetof(struct iris_query_snapshots, predicate_data);
 812    ice->vtbl.store_register_mem64(batch, MI_PREDICATE_DATA,
 813                                   q->bo, offset, false);
 814    ice->state.compute_predicate = q->bo;
 815 }
 816
 817 static void
 818 iris_render_condition(struct pipe_context *ctx,
 819                       struct pipe_query *query,
 820                       boolean condition,
 821                       enum pipe_render_cond_flag mode)
 822 {
 823    struct iris_context *ice = (void *) ctx;
 824    struct iris_query *q = (void *) query;
 825
 826    if (!q) {
 827       ice->state.predicate = IRIS_PREDICATE_STATE_RENDER;
 828       return;
 829    }
 830
 831    iris_check_query_no_flush(ice, q);
 832
 833    if (q->result || q->ready) {
 834       set_predicate_enable(ice, (q->result != 0) ^ condition);
 835    } else {
 836       if (mode == PIPE_RENDER_COND_NO_WAIT ||
 837           mode == PIPE_RENDER_COND_BY_REGION_NO_WAIT) {
 838          perf_debug(&ice->dbg, "Conditional rendering demoted from "
 839                     "\"no wait\" to \"wait\".");
 840       }
 841       set_predicate_for_result(ice, q, condition);
 842    }
 843 }
 844
 845 void
 846 iris_init_query_functions(struct pipe_context *ctx)
 847 {
 848    ctx->create_query = iris_create_query;
 849    ctx->destroy_query = iris_destroy_query;
 850    ctx->begin_query = iris_begin_query;
 851    ctx->end_query = iris_end_query;
 852    ctx->get_query_result = iris_get_query_result;
 853    ctx->get_query_result_resource = iris_get_query_result_resource;
 854    ctx->set_active_query_state = iris_set_active_query_state;
 855    ctx->render_condition = iris_render_condition;
 856 }