src/gallium/drivers/iris/iris_query.c

   1 /*
   2  * Copyright © 2017 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice shall be included
  12  * in all copies or substantial portions of the Software.
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  15  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  19  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  20  * DEALINGS IN THE SOFTWARE.
  21  */
  22
  23 /**
  24  * @file iris_query.c
  25  *
  26  * ============================= GENXML CODE =============================
  27  *              [This file is compiled once per generation.]
  28  * =======================================================================
  29  *
  30  * Query object support.  This allows measuring various simple statistics
  31  * via counters on the GPU.  We use GenX code for MI_MATH calculations.
  32  */
  33
  34 #include <stdio.h>
  35 #include <errno.h>
  36 #include "pipe/p_defines.h"
  37 #include "pipe/p_state.h"
  38 #include "pipe/p_context.h"
  39 #include "pipe/p_screen.h"
  40 #include "util/u_inlines.h"
  41 #include "util/u_upload_mgr.h"
  42 #include "iris_context.h"
  43 #include "iris_defines.h"
  44 #include "iris_fence.h"
  45 #include "iris_monitor.h"
  46 #include "iris_resource.h"
  47 #include "iris_screen.h"
  48
  49 #include "iris_genx_macros.h"
  50
  51 #define SO_PRIM_STORAGE_NEEDED(n) (GENX(SO_PRIM_STORAGE_NEEDED0_num) + (n) * 8)
  52 #define SO_NUM_PRIMS_WRITTEN(n)   (GENX(SO_NUM_PRIMS_WRITTEN0_num) + (n) * 8)
  53
  54 struct iris_query {
  55    enum pipe_query_type type;
  56    int index;
  57
  58    bool ready;
  59
  60    bool stalled;
  61
  62    uint64_t result;
  63
  64    struct iris_state_ref query_state_ref;
  65    struct iris_query_snapshots *map;
  66    struct iris_syncpt *syncpt;
  67
  68    int batch_idx;
  69
  70    struct iris_monitor_object *monitor;
  71
  72    /* Fence for PIPE_QUERY_GPU_FINISHED. */
  73    struct pipe_fence_handle *fence;
  74 };
  75
  76 struct iris_query_snapshots {
  77    /** iris_render_condition's saved MI_PREDICATE_RESULT value. */
  78    uint64_t predicate_result;
  79
  80    /** Have the start/end snapshots landed? */
  81    uint64_t snapshots_landed;
  82
  83    /** Starting and ending counter snapshots */
  84    uint64_t start;
  85    uint64_t end;
  86 };
  87
  88 struct iris_query_so_overflow {
  89    uint64_t predicate_result;
  90    uint64_t snapshots_landed;
  91
  92    struct {
  93       uint64_t prim_storage_needed[2];
  94       uint64_t num_prims[2];
  95    } stream[4];
  96 };
  97
  98 static struct gen_mi_value
  99 query_mem64(struct iris_query *q, uint32_t offset)
 100 {
 101    struct iris_address addr = {
 102       .bo = iris_resource_bo(q->query_state_ref.res),
 103       .offset = q->query_state_ref.offset + offset,
 104       .write = true
 105    };
 106    return gen_mi_mem64(addr);
 107 }
 108
 109 /**
 110  * Is this type of query written by PIPE_CONTROL?
 111  */
 112 static bool
 113 iris_is_query_pipelined(struct iris_query *q)
 114 {
 115    switch (q->type) {
 116    case PIPE_QUERY_OCCLUSION_COUNTER:
 117    case PIPE_QUERY_OCCLUSION_PREDICATE:
 118    case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
 119    case PIPE_QUERY_TIMESTAMP:
 120    case PIPE_QUERY_TIMESTAMP_DISJOINT:
 121    case PIPE_QUERY_TIME_ELAPSED:
 122       return true;
 123
 124    default:
 125       return false;
 126    }
 127 }
 128
 129 static void
 130 mark_available(struct iris_context *ice, struct iris_query *q)
 131 {
 132    struct iris_batch *batch = &ice->batches[q->batch_idx];
 133    unsigned flags = PIPE_CONTROL_WRITE_IMMEDIATE;
 134    unsigned offset = offsetof(struct iris_query_snapshots, snapshots_landed);
 135    struct iris_bo *bo = iris_resource_bo(q->query_state_ref.res);
 136    offset += q->query_state_ref.offset;
 137
 138    if (!iris_is_query_pipelined(q)) {
 139       ice->vtbl.store_data_imm64(batch, bo, offset, true);
 140    } else {
 141       /* Order available *after* the query results. */
 142       flags |= PIPE_CONTROL_FLUSH_ENABLE;
 143       iris_emit_pipe_control_write(batch, "query: mark available",
 144                                    flags, bo, offset, true);
 145    }
 146 }
 147
 148 /**
 149  * Write PS_DEPTH_COUNT to q->(dest) via a PIPE_CONTROL.
 150  */
 151 static void
 152 iris_pipelined_write(struct iris_batch *batch,
 153                      struct iris_query *q,
 154                      enum pipe_control_flags flags,
 155                      unsigned offset)
 156 {
 157    const struct gen_device_info *devinfo = &batch->screen->devinfo;
 158    const unsigned optional_cs_stall =
 159       GEN_GEN == 9 && devinfo->gt == 4 ?  PIPE_CONTROL_CS_STALL : 0;
 160    struct iris_bo *bo = iris_resource_bo(q->query_state_ref.res);
 161
 162    iris_emit_pipe_control_write(batch, "query: pipelined snapshot write",
 163                                 flags | optional_cs_stall,
 164                                 bo, offset, 0ull);
 165 }
 166
 167 static void
 168 write_value(struct iris_context *ice, struct iris_query *q, unsigned offset)
 169 {
 170    struct iris_batch *batch = &ice->batches[q->batch_idx];
 171    struct iris_bo *bo = iris_resource_bo(q->query_state_ref.res);
 172
 173    if (!iris_is_query_pipelined(q)) {
 174       iris_emit_pipe_control_flush(batch,
 175                                    "query: non-pipelined snapshot write",
 176                                    PIPE_CONTROL_CS_STALL |
 177                                    PIPE_CONTROL_STALL_AT_SCOREBOARD);
 178       q->stalled = true;
 179    }
 180
 181    switch (q->type) {
 182    case PIPE_QUERY_OCCLUSION_COUNTER:
 183    case PIPE_QUERY_OCCLUSION_PREDICATE:
 184    case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
 185       if (GEN_GEN >= 10) {
 186          /* "Driver must program PIPE_CONTROL with only Depth Stall Enable
 187           *  bit set prior to programming a PIPE_CONTROL with Write PS Depth
 188           *  Count sync operation."
 189           */
 190          iris_emit_pipe_control_flush(batch,
 191                                       "workaround: depth stall before writing "
 192                                       "PS_DEPTH_COUNT",
 193                                       PIPE_CONTROL_DEPTH_STALL);
 194       }
 195       iris_pipelined_write(&ice->batches[IRIS_BATCH_RENDER], q,
 196                            PIPE_CONTROL_WRITE_DEPTH_COUNT |
 197                            PIPE_CONTROL_DEPTH_STALL,
 198                            offset);
 199       break;
 200    case PIPE_QUERY_TIME_ELAPSED:
 201    case PIPE_QUERY_TIMESTAMP:
 202    case PIPE_QUERY_TIMESTAMP_DISJOINT:
 203       iris_pipelined_write(&ice->batches[IRIS_BATCH_RENDER], q,
 204                            PIPE_CONTROL_WRITE_TIMESTAMP,
 205                            offset);
 206       break;
 207    case PIPE_QUERY_PRIMITIVES_GENERATED:
 208       ice->vtbl.store_register_mem64(batch,
 209                                      q->index == 0 ?
 210                                      GENX(CL_INVOCATION_COUNT_num) :
 211                                      SO_PRIM_STORAGE_NEEDED(q->index),
 212                                      bo, offset, false);
 213       break;
 214    case PIPE_QUERY_PRIMITIVES_EMITTED:
 215       ice->vtbl.store_register_mem64(batch,
 216                                      SO_NUM_PRIMS_WRITTEN(q->index),
 217                                      bo, offset, false);
 218       break;
 219    case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE: {
 220       static const uint32_t index_to_reg[] = {
 221          GENX(IA_VERTICES_COUNT_num),
 222          GENX(IA_PRIMITIVES_COUNT_num),
 223          GENX(VS_INVOCATION_COUNT_num),
 224          GENX(GS_INVOCATION_COUNT_num),
 225          GENX(GS_PRIMITIVES_COUNT_num),
 226          GENX(CL_INVOCATION_COUNT_num),
 227          GENX(CL_PRIMITIVES_COUNT_num),
 228          GENX(PS_INVOCATION_COUNT_num),
 229          GENX(HS_INVOCATION_COUNT_num),
 230          GENX(DS_INVOCATION_COUNT_num),
 231          GENX(CS_INVOCATION_COUNT_num),
 232       };
 233       const uint32_t reg = index_to_reg[q->index];
 234
 235       ice->vtbl.store_register_mem64(batch, reg, bo, offset, false);
 236       break;
 237    }
 238    default:
 239       assert(false);
 240    }
 241 }
 242
 243 static void
 244 write_overflow_values(struct iris_context *ice, struct iris_query *q, bool end)
 245 {
 246    struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
 247    uint32_t count = q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ? 1 : 4;
 248    struct iris_bo *bo = iris_resource_bo(q->query_state_ref.res);
 249    uint32_t offset = q->query_state_ref.offset;
 250
 251    iris_emit_pipe_control_flush(batch,
 252                                 "query: write SO overflow snapshots",
 253                                 PIPE_CONTROL_CS_STALL |
 254                                 PIPE_CONTROL_STALL_AT_SCOREBOARD);
 255    for (uint32_t i = 0; i < count; i++) {
 256       int s = q->index + i;
 257       int g_idx = offset + offsetof(struct iris_query_so_overflow,
 258                            stream[s].num_prims[end]);
 259       int w_idx = offset + offsetof(struct iris_query_so_overflow,
 260                            stream[s].prim_storage_needed[end]);
 261       ice->vtbl.store_register_mem64(batch, SO_NUM_PRIMS_WRITTEN(s),
 262                                      bo, g_idx, false);
 263       ice->vtbl.store_register_mem64(batch, SO_PRIM_STORAGE_NEEDED(s),
 264                                      bo, w_idx, false);
 265    }
 266 }
 267
 268 static uint64_t
 269 iris_raw_timestamp_delta(uint64_t time0, uint64_t time1)
 270 {
 271    if (time0 > time1) {
 272       return (1ULL << TIMESTAMP_BITS) + time1 - time0;
 273    } else {
 274       return time1 - time0;
 275    }
 276 }
 277
 278 static bool
 279 stream_overflowed(struct iris_query_so_overflow *so, int s)
 280 {
 281    return (so->stream[s].prim_storage_needed[1] -
 282            so->stream[s].prim_storage_needed[0]) !=
 283           (so->stream[s].num_prims[1] - so->stream[s].num_prims[0]);
 284 }
 285
 286 static void
 287 calculate_result_on_cpu(const struct gen_device_info *devinfo,
 288                         struct iris_query *q)
 289 {
 290    switch (q->type) {
 291    case PIPE_QUERY_OCCLUSION_PREDICATE:
 292    case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
 293       q->result = q->map->end != q->map->start;
 294       break;
 295    case PIPE_QUERY_TIMESTAMP:
 296    case PIPE_QUERY_TIMESTAMP_DISJOINT:
 297       /* The timestamp is the single starting snapshot. */
 298       q->result = gen_device_info_timebase_scale(devinfo, q->map->start);
 299       q->result &= (1ull << TIMESTAMP_BITS) - 1;
 300       break;
 301    case PIPE_QUERY_TIME_ELAPSED:
 302       q->result = iris_raw_timestamp_delta(q->map->start, q->map->end);
 303       q->result = gen_device_info_timebase_scale(devinfo, q->result);
 304       q->result &= (1ull << TIMESTAMP_BITS) - 1;
 305       break;
 306    case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
 307       q->result = stream_overflowed((void *) q->map, q->index);
 308       break;
 309    case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
 310       q->result = false;
 311       for (int i = 0; i < MAX_VERTEX_STREAMS; i++)
 312          q->result |= stream_overflowed((void *) q->map, i);
 313       break;
 314    case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE:
 315       q->result = q->map->end - q->map->start;
 316
 317       /* WaDividePSInvocationCountBy4:HSW,BDW */
 318       if (GEN_GEN == 8 && q->index == PIPE_STAT_QUERY_PS_INVOCATIONS)
 319          q->result /= 4;
 320       break;
 321    case PIPE_QUERY_OCCLUSION_COUNTER:
 322    case PIPE_QUERY_PRIMITIVES_GENERATED:
 323    case PIPE_QUERY_PRIMITIVES_EMITTED:
 324    default:
 325       q->result = q->map->end - q->map->start;
 326       break;
 327    }
 328
 329    q->ready = true;
 330 }
 331
 332 /**
 333  * Calculate the streamout overflow for stream \p idx:
 334  *
 335  * (num_prims[1] - num_prims[0]) - (storage_needed[1] - storage_needed[0])
 336  */
 337 static struct gen_mi_value
 338 calc_overflow_for_stream(struct gen_mi_builder *b,
 339                          struct iris_query *q,
 340                          int idx)
 341 {
 342 #define C(counter, i) query_mem64(q, \
 343    offsetof(struct iris_query_so_overflow, stream[idx].counter[i]))
 344
 345    return gen_mi_isub(b, gen_mi_isub(b, C(num_prims, 1), C(num_prims, 0)),
 346                          gen_mi_isub(b, C(prim_storage_needed, 1),
 347                                         C(prim_storage_needed, 0)));
 348 #undef C
 349 }
 350
 351 /**
 352  * Calculate whether any stream has overflowed.
 353  */
 354 static struct gen_mi_value
 355 calc_overflow_any_stream(struct gen_mi_builder *b, struct iris_query *q)
 356 {
 357    struct gen_mi_value stream_result[MAX_VERTEX_STREAMS];
 358    for (int i = 0; i < MAX_VERTEX_STREAMS; i++)
 359       stream_result[i] = calc_overflow_for_stream(b, q, i);
 360
 361    struct gen_mi_value result = stream_result[0];
 362    for (int i = 1; i < MAX_VERTEX_STREAMS; i++)
 363       result = gen_mi_ior(b, result, stream_result[i]);
 364
 365    return result;
 366 }
 367
 368 static bool
 369 query_is_boolean(enum pipe_query_type type)
 370 {
 371    switch (type) {
 372    case PIPE_QUERY_OCCLUSION_PREDICATE:
 373    case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
 374    case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
 375    case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
 376       return true;
 377    default:
 378       return false;
 379    }
 380 }
 381
 382 /**
 383  * Calculate the result using MI_MATH.
 384  */
 385 static struct gen_mi_value
 386 calculate_result_on_gpu(const struct gen_device_info *devinfo,
 387                         struct gen_mi_builder *b,
 388                         struct iris_query *q)
 389 {
 390    struct gen_mi_value result;
 391    struct gen_mi_value start_val =
 392       query_mem64(q, offsetof(struct iris_query_snapshots, start));
 393    struct gen_mi_value end_val =
 394       query_mem64(q, offsetof(struct iris_query_snapshots, end));
 395
 396    switch (q->type) {
 397    case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
 398       result = calc_overflow_for_stream(b, q, q->index);
 399       break;
 400    case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
 401       result = calc_overflow_any_stream(b, q);
 402       break;
 403    case PIPE_QUERY_TIMESTAMP: {
 404       /* TODO: This discards any fractional bits of the timebase scale.
 405        * We would need to do a bit of fixed point math on the CS ALU, or
 406        * launch an actual shader to calculate this with full precision.
 407        */
 408       uint32_t scale = 1000000000ull / devinfo->timestamp_frequency;
 409       result = gen_mi_iand(b, gen_mi_imm((1ull << 36) - 1),
 410                            gen_mi_imul_imm(b, start_val, scale));
 411       break;
 412    }
 413    case PIPE_QUERY_TIME_ELAPSED: {
 414       /* TODO: This discards fractional bits (see above). */
 415       uint32_t scale = 1000000000ull / devinfo->timestamp_frequency;
 416       result = gen_mi_imul_imm(b, gen_mi_isub(b, end_val, start_val), scale);
 417       break;
 418    }
 419    default:
 420       result = gen_mi_isub(b, end_val, start_val);
 421       break;
 422    }
 423
 424    /* WaDividePSInvocationCountBy4:HSW,BDW */
 425    if (GEN_GEN == 8 &&
 426        q->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE &&
 427        q->index == PIPE_STAT_QUERY_PS_INVOCATIONS)
 428       result = gen_mi_ushr32_imm(b, result, 2);
 429
 430    if (query_is_boolean(q->type))
 431       result = gen_mi_iand(b, gen_mi_nz(b, result), gen_mi_imm(1));
 432
 433    return result;
 434 }
 435
 436 static struct pipe_query *
 437 iris_create_query(struct pipe_context *ctx,
 438                   unsigned query_type,
 439                   unsigned index)
 440 {
 441    struct iris_query *q = calloc(1, sizeof(struct iris_query));
 442
 443    q->type = query_type;
 444    q->index = index;
 445    q->monitor = NULL;
 446
 447    if (q->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE &&
 448        q->index == PIPE_STAT_QUERY_CS_INVOCATIONS)
 449       q->batch_idx = IRIS_BATCH_COMPUTE;
 450    else
 451       q->batch_idx = IRIS_BATCH_RENDER;
 452    return (struct pipe_query *) q;
 453 }
 454
 455 static struct pipe_query *
 456 iris_create_batch_query(struct pipe_context *ctx,
 457                         unsigned num_queries,
 458                         unsigned *query_types)
 459 {
 460    struct iris_context *ice = (void *) ctx;
 461    struct iris_query *q = calloc(1, sizeof(struct iris_query));
 462    if (unlikely(!q))
 463       return NULL;
 464    q->type = PIPE_QUERY_DRIVER_SPECIFIC;
 465    q->index = -1;
 466    q->monitor = iris_create_monitor_object(ice, num_queries, query_types);
 467    if (unlikely(!q->monitor)) {
 468       free(q);
 469       return NULL;
 470    }
 471
 472    return (struct pipe_query *) q;
 473 }
 474
 475 static void
 476 iris_destroy_query(struct pipe_context *ctx, struct pipe_query *p_query)
 477 {
 478    struct iris_query *query = (void *) p_query;
 479    struct iris_screen *screen = (void *) ctx->screen;
 480    if (query->monitor) {
 481       iris_destroy_monitor_object(ctx, query->monitor);
 482       query->monitor = NULL;
 483    } else {
 484       iris_syncpt_reference(screen, &query->syncpt, NULL);
 485       screen->base.fence_reference(ctx->screen, &query->fence, NULL);
 486    }
 487    free(query);
 488 }
 489
 490
 491 static bool
 492 iris_begin_query(struct pipe_context *ctx, struct pipe_query *query)
 493 {
 494    struct iris_context *ice = (void *) ctx;
 495    struct iris_query *q = (void *) query;
 496
 497    if (q->monitor)
 498       return iris_begin_monitor(ctx, q->monitor);
 499
 500    void *ptr = NULL;
 501    uint32_t size;
 502
 503    if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ||
 504        q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)
 505       size = sizeof(struct iris_query_so_overflow);
 506    else
 507       size = sizeof(struct iris_query_snapshots);
 508
 509    u_upload_alloc(ice->query_buffer_uploader, 0,
 510                   size, size, &q->query_state_ref.offset,
 511                   &q->query_state_ref.res, &ptr);
 512
 513    if (!iris_resource_bo(q->query_state_ref.res))
 514       return false;
 515
 516    q->map = ptr;
 517    if (!q->map)
 518       return false;
 519
 520    q->result = 0ull;
 521    q->ready = false;
 522    WRITE_ONCE(q->map->snapshots_landed, false);
 523
 524    if (q->type == PIPE_QUERY_PRIMITIVES_GENERATED && q->index == 0) {
 525       ice->state.prims_generated_query_active = true;
 526       ice->state.dirty |= IRIS_DIRTY_STREAMOUT | IRIS_DIRTY_CLIP;
 527    }
 528
 529    if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ||
 530        q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)
 531       write_overflow_values(ice, q, false);
 532    else
 533       write_value(ice, q,
 534                   q->query_state_ref.offset +
 535                   offsetof(struct iris_query_snapshots, start));
 536
 537    return true;
 538 }
 539
 540 static bool
 541 iris_end_query(struct pipe_context *ctx, struct pipe_query *query)
 542 {
 543    struct iris_context *ice = (void *) ctx;
 544    struct iris_query *q = (void *) query;
 545
 546    if (q->monitor)
 547       return iris_end_monitor(ctx, q->monitor);
 548
 549    if (q->type == PIPE_QUERY_GPU_FINISHED) {
 550       ctx->flush(ctx, &q->fence, PIPE_FLUSH_DEFERRED);
 551       return true;
 552    }
 553
 554    struct iris_batch *batch = &ice->batches[q->batch_idx];
 555
 556    if (q->type == PIPE_QUERY_TIMESTAMP) {
 557       iris_begin_query(ctx, query);
 558       iris_batch_reference_signal_syncpt(batch, &q->syncpt);
 559       mark_available(ice, q);
 560       return true;
 561    }
 562
 563    if (q->type == PIPE_QUERY_PRIMITIVES_GENERATED && q->index == 0) {
 564       ice->state.prims_generated_query_active = false;
 565       ice->state.dirty |= IRIS_DIRTY_STREAMOUT | IRIS_DIRTY_CLIP;
 566    }
 567
 568    if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ||
 569        q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)
 570       write_overflow_values(ice, q, true);
 571    else
 572       write_value(ice, q,
 573                   q->query_state_ref.offset +
 574                   offsetof(struct iris_query_snapshots, end));
 575
 576    iris_batch_reference_signal_syncpt(batch, &q->syncpt);
 577    mark_available(ice, q);
 578
 579    return true;
 580 }
 581
 582 /**
 583  * See if the snapshots have landed for a query, and if so, compute the
 584  * result and mark it ready.  Does not flush (unlike iris_get_query_result).
 585  */
 586 static void
 587 iris_check_query_no_flush(struct iris_context *ice, struct iris_query *q)
 588 {
 589    struct iris_screen *screen = (void *) ice->ctx.screen;
 590    const struct gen_device_info *devinfo = &screen->devinfo;
 591
 592    if (!q->ready && READ_ONCE(q->map->snapshots_landed)) {
 593       calculate_result_on_cpu(devinfo, q);
 594    }
 595 }
 596
 597 static bool
 598 iris_get_query_result(struct pipe_context *ctx,
 599                       struct pipe_query *query,
 600                       bool wait,
 601                       union pipe_query_result *result)
 602 {
 603    struct iris_context *ice = (void *) ctx;
 604    struct iris_query *q = (void *) query;
 605
 606    if (q->monitor)
 607       return iris_get_monitor_result(ctx, q->monitor, wait, result->batch);
 608
 609    struct iris_screen *screen = (void *) ctx->screen;
 610    const struct gen_device_info *devinfo = &screen->devinfo;
 611
 612    if (unlikely(screen->no_hw)) {
 613       result->u64 = 0;
 614       return true;
 615    }
 616
 617    if (q->type == PIPE_QUERY_GPU_FINISHED) {
 618       struct pipe_screen *screen = ctx->screen;
 619
 620       result->b = screen->fence_finish(screen, ctx, q->fence,
 621                                        wait ? PIPE_TIMEOUT_INFINITE : 0);
 622       return result->b;
 623    }
 624
 625    if (!q->ready) {
 626       struct iris_batch *batch = &ice->batches[q->batch_idx];
 627       if (q->syncpt == iris_batch_get_signal_syncpt(batch))
 628          iris_batch_flush(batch);
 629
 630       while (!READ_ONCE(q->map->snapshots_landed)) {
 631          if (wait)
 632             iris_wait_syncpt(ctx->screen, q->syncpt, INT64_MAX);
 633          else
 634             return false;
 635       }
 636
 637       assert(READ_ONCE(q->map->snapshots_landed));
 638       calculate_result_on_cpu(devinfo, q);
 639    }
 640
 641    assert(q->ready);
 642
 643    result->u64 = q->result;
 644
 645    return true;
 646 }
 647
 648 static void
 649 iris_get_query_result_resource(struct pipe_context *ctx,
 650                                struct pipe_query *query,
 651                                bool wait,
 652                                enum pipe_query_value_type result_type,
 653                                int index,
 654                                struct pipe_resource *p_res,
 655                                unsigned offset)
 656 {
 657    struct iris_context *ice = (void *) ctx;
 658    struct iris_query *q = (void *) query;
 659    struct iris_batch *batch = &ice->batches[q->batch_idx];
 660    const struct gen_device_info *devinfo = &batch->screen->devinfo;
 661    struct iris_resource *res = (void *) p_res;
 662    struct iris_bo *query_bo = iris_resource_bo(q->query_state_ref.res);
 663    struct iris_bo *dst_bo = iris_resource_bo(p_res);
 664    unsigned snapshots_landed_offset =
 665       offsetof(struct iris_query_snapshots, snapshots_landed);
 666
 667    res->bind_history |= PIPE_BIND_QUERY_BUFFER;
 668
 669    if (index == -1) {
 670       /* They're asking for the availability of the result.  If we still
 671        * have commands queued up which produce the result, submit them
 672        * now so that progress happens.  Either way, copy the snapshots
 673        * landed field to the destination resource.
 674        */
 675       if (q->syncpt == iris_batch_get_signal_syncpt(batch))
 676          iris_batch_flush(batch);
 677
 678       ice->vtbl.copy_mem_mem(batch, dst_bo, offset,
 679                              query_bo, snapshots_landed_offset,
 680                              result_type <= PIPE_QUERY_TYPE_U32 ? 4 : 8);
 681       return;
 682    }
 683
 684    if (!q->ready && READ_ONCE(q->map->snapshots_landed)) {
 685       /* The final snapshots happen to have landed, so let's just compute
 686        * the result on the CPU now...
 687        */
 688       calculate_result_on_cpu(devinfo, q);
 689    }
 690
 691    if (q->ready) {
 692       /* We happen to have the result on the CPU, so just copy it. */
 693       if (result_type <= PIPE_QUERY_TYPE_U32) {
 694          ice->vtbl.store_data_imm32(batch, dst_bo, offset, q->result);
 695       } else {
 696          ice->vtbl.store_data_imm64(batch, dst_bo, offset, q->result);
 697       }
 698
 699       /* Make sure the result lands before they use bind the QBO elsewhere
 700        * and use the result.
 701        */
 702       // XXX: Why?  i965 doesn't do this.
 703       iris_emit_pipe_control_flush(batch,
 704                                    "query: unknown QBO flushing hack",
 705                                    PIPE_CONTROL_CS_STALL);
 706       return;
 707    }
 708
 709    bool predicated = !wait && !q->stalled;
 710
 711    struct gen_mi_builder b;
 712    gen_mi_builder_init(&b, batch);
 713
 714    struct gen_mi_value result = calculate_result_on_gpu(devinfo, &b, q);
 715    struct gen_mi_value dst =
 716       result_type <= PIPE_QUERY_TYPE_U32 ? gen_mi_mem32(rw_bo(dst_bo, offset))
 717                                          : gen_mi_mem64(rw_bo(dst_bo, offset));
 718
 719    if (predicated) {
 720       gen_mi_store(&b, gen_mi_reg32(MI_PREDICATE_RESULT),
 721                    gen_mi_mem64(ro_bo(query_bo, snapshots_landed_offset)));
 722       gen_mi_store_if(&b, dst, result);
 723    } else {
 724       gen_mi_store(&b, dst, result);
 725    }
 726 }
 727
 728 static void
 729 iris_set_active_query_state(struct pipe_context *ctx, bool enable)
 730 {
 731    struct iris_context *ice = (void *) ctx;
 732
 733    if (ice->state.statistics_counters_enabled == enable)
 734       return;
 735
 736    // XXX: most packets aren't paying attention to this yet, because it'd
 737    // have to be done dynamically at draw time, which is a pain
 738    ice->state.statistics_counters_enabled = enable;
 739    ice->state.dirty |= IRIS_DIRTY_CLIP |
 740                        IRIS_DIRTY_GS |
 741                        IRIS_DIRTY_RASTER |
 742                        IRIS_DIRTY_STREAMOUT |
 743                        IRIS_DIRTY_TCS |
 744                        IRIS_DIRTY_TES |
 745                        IRIS_DIRTY_VS |
 746                        IRIS_DIRTY_WM;
 747 }
 748
 749 static void
 750 set_predicate_enable(struct iris_context *ice, bool value)
 751 {
 752    if (value)
 753       ice->state.predicate = IRIS_PREDICATE_STATE_RENDER;
 754    else
 755       ice->state.predicate = IRIS_PREDICATE_STATE_DONT_RENDER;
 756 }
 757
 758 static void
 759 set_predicate_for_result(struct iris_context *ice,
 760                          struct iris_query *q,
 761                          bool inverted)
 762 {
 763    struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
 764    struct iris_bo *bo = iris_resource_bo(q->query_state_ref.res);
 765
 766    /* The CPU doesn't have the query result yet; use hardware predication */
 767    ice->state.predicate = IRIS_PREDICATE_STATE_USE_BIT;
 768
 769    /* Ensure the memory is coherent for MI_LOAD_REGISTER_* commands. */
 770    iris_emit_pipe_control_flush(batch,
 771                                 "conditional rendering: set predicate",
 772                                 PIPE_CONTROL_FLUSH_ENABLE);
 773    q->stalled = true;
 774
 775    struct gen_mi_builder b;
 776    gen_mi_builder_init(&b, batch);
 777
 778    struct gen_mi_value result;
 779
 780    switch (q->type) {
 781    case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
 782       result = calc_overflow_for_stream(&b, q, q->index);
 783       break;
 784    case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
 785       result = calc_overflow_any_stream(&b, q);
 786       break;
 787    default: {
 788       /* PIPE_QUERY_OCCLUSION_* */
 789       struct gen_mi_value start =
 790          query_mem64(q, offsetof(struct iris_query_snapshots, start));
 791       struct gen_mi_value end =
 792          query_mem64(q, offsetof(struct iris_query_snapshots, end));
 793       result = gen_mi_isub(&b, end, start);
 794       break;
 795    }
 796    }
 797
 798    result = inverted ? gen_mi_z(&b, result) : gen_mi_nz(&b, result);
 799    result = gen_mi_iand(&b, result, gen_mi_imm(1));
 800
 801    /* We immediately set the predicate on the render batch, as all the
 802     * counters come from 3D operations.  However, we may need to predicate
 803     * a compute dispatch, which executes in a different GEM context and has
 804     * a different MI_PREDICATE_RESULT register.  So, we save the result to
 805     * memory and reload it in iris_launch_grid.
 806     */
 807    gen_mi_value_ref(&b, result);
 808    gen_mi_store(&b, gen_mi_reg32(MI_PREDICATE_RESULT), result);
 809    gen_mi_store(&b, query_mem64(q, offsetof(struct iris_query_snapshots,
 810                                             predicate_result)), result);
 811    ice->state.compute_predicate = bo;
 812 }
 813
 814 static void
 815 iris_render_condition(struct pipe_context *ctx,
 816                       struct pipe_query *query,
 817                       bool condition,
 818                       enum pipe_render_cond_flag mode)
 819 {
 820    struct iris_context *ice = (void *) ctx;
 821    struct iris_query *q = (void *) query;
 822
 823    /* The old condition isn't relevant; we'll update it if necessary */
 824    ice->state.compute_predicate = NULL;
 825    ice->condition.query = q;
 826    ice->condition.condition = condition;
 827
 828    if (!q) {
 829       ice->state.predicate = IRIS_PREDICATE_STATE_RENDER;
 830       return;
 831    }
 832
 833    iris_check_query_no_flush(ice, q);
 834
 835    if (q->result || q->ready) {
 836       set_predicate_enable(ice, (q->result != 0) ^ condition);
 837    } else {
 838       if (mode == PIPE_RENDER_COND_NO_WAIT ||
 839           mode == PIPE_RENDER_COND_BY_REGION_NO_WAIT) {
 840          perf_debug(&ice->dbg, "Conditional rendering demoted from "
 841                     "\"no wait\" to \"wait\".");
 842       }
 843       set_predicate_for_result(ice, q, condition);
 844    }
 845 }
 846
 847 static void
 848 iris_resolve_conditional_render(struct iris_context *ice)
 849 {
 850    struct pipe_context *ctx = (void *) ice;
 851    struct iris_query *q = ice->condition.query;
 852    struct pipe_query *query = (void *) q;
 853    union pipe_query_result result;
 854
 855    if (ice->state.predicate != IRIS_PREDICATE_STATE_USE_BIT)
 856       return;
 857
 858    assert(q);
 859
 860    iris_get_query_result(ctx, query, true, &result);
 861    set_predicate_enable(ice, (q->result != 0) ^ ice->condition.condition);
 862 }
 863
 864 void
 865 genX(init_query)(struct iris_context *ice)
 866 {
 867    struct pipe_context *ctx = &ice->ctx;
 868
 869    ctx->create_query = iris_create_query;
 870    ctx->create_batch_query = iris_create_batch_query;
 871    ctx->destroy_query = iris_destroy_query;
 872    ctx->begin_query = iris_begin_query;
 873    ctx->end_query = iris_end_query;
 874    ctx->get_query_result = iris_get_query_result;
 875    ctx->get_query_result_resource = iris_get_query_result_resource;
 876    ctx->set_active_query_state = iris_set_active_query_state;
 877    ctx->render_condition = iris_render_condition;
 878
 879    ice->vtbl.resolve_conditional_render = iris_resolve_conditional_render;
 880 }