src/gallium/drivers/iris/iris_query.c

   1 /*
   2  * Copyright © 2017 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice shall be included
  12  * in all copies or substantial portions of the Software.
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  15  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  19  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  20  * DEALINGS IN THE SOFTWARE.
  21  */
  22
  23 /**
  24  * @file iris_query.c
  25  *
  26  * ============================= GENXML CODE =============================
  27  *              [This file is compiled once per generation.]
  28  * =======================================================================
  29  *
  30  * Query object support.  This allows measuring various simple statistics
  31  * via counters on the GPU.  We use GenX code for MI_MATH calculations.
  32  */
  33
  34 #include <stdio.h>
  35 #include <errno.h>
  36 #include "perf/gen_perf.h"
  37 #include "pipe/p_defines.h"
  38 #include "pipe/p_state.h"
  39 #include "pipe/p_context.h"
  40 #include "pipe/p_screen.h"
  41 #include "util/u_inlines.h"
  42 #include "util/u_upload_mgr.h"
  43 #include "iris_context.h"
  44 #include "iris_defines.h"
  45 #include "iris_fence.h"
  46 #include "iris_resource.h"
  47 #include "iris_screen.h"
  48
  49 #include "iris_genx_macros.h"
  50
  51 #define SO_PRIM_STORAGE_NEEDED(n) (GENX(SO_PRIM_STORAGE_NEEDED0_num) + (n) * 8)
  52 #define SO_NUM_PRIMS_WRITTEN(n)   (GENX(SO_NUM_PRIMS_WRITTEN0_num) + (n) * 8)
  53
  54 struct iris_query {
  55    enum pipe_query_type type;
  56    int index;
  57
  58    bool ready;
  59
  60    bool stalled;
  61
  62    uint64_t result;
  63
  64    struct iris_state_ref query_state_ref;
  65    struct iris_query_snapshots *map;
  66    struct iris_syncpt *syncpt;
  67
  68    int batch_idx;
  69
  70    struct iris_monitor_object *monitor;
  71 };
  72
  73 struct iris_query_snapshots {
  74    /** iris_render_condition's saved MI_PREDICATE_RESULT value. */
  75    uint64_t predicate_result;
  76
  77    /** Have the start/end snapshots landed? */
  78    uint64_t snapshots_landed;
  79
  80    /** Starting and ending counter snapshots */
  81    uint64_t start;
  82    uint64_t end;
  83 };
  84
  85 struct iris_query_so_overflow {
  86    uint64_t predicate_result;
  87    uint64_t snapshots_landed;
  88
  89    struct {
  90       uint64_t prim_storage_needed[2];
  91       uint64_t num_prims[2];
  92    } stream[4];
  93 };
  94
  95 static struct gen_mi_value
  96 query_mem64(struct iris_query *q, uint32_t offset)
  97 {
  98    struct iris_address addr = {
  99       .bo = iris_resource_bo(q->query_state_ref.res),
 100       .offset = q->query_state_ref.offset + offset,
 101       .write = true
 102    };
 103    return gen_mi_mem64(addr);
 104 }
 105
 106 /**
 107  * Is this type of query written by PIPE_CONTROL?
 108  */
 109 static bool
 110 iris_is_query_pipelined(struct iris_query *q)
 111 {
 112    switch (q->type) {
 113    case PIPE_QUERY_OCCLUSION_COUNTER:
 114    case PIPE_QUERY_OCCLUSION_PREDICATE:
 115    case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
 116    case PIPE_QUERY_TIMESTAMP:
 117    case PIPE_QUERY_TIMESTAMP_DISJOINT:
 118    case PIPE_QUERY_TIME_ELAPSED:
 119       return true;
 120
 121    default:
 122       return false;
 123    }
 124 }
 125
 126 static void
 127 mark_available(struct iris_context *ice, struct iris_query *q)
 128 {
 129    struct iris_batch *batch = &ice->batches[q->batch_idx];
 130    unsigned flags = PIPE_CONTROL_WRITE_IMMEDIATE;
 131    unsigned offset = offsetof(struct iris_query_snapshots, snapshots_landed);
 132    struct iris_bo *bo = iris_resource_bo(q->query_state_ref.res);
 133    offset += q->query_state_ref.offset;
 134
 135    if (!iris_is_query_pipelined(q)) {
 136       ice->vtbl.store_data_imm64(batch, bo, offset, true);
 137    } else {
 138       /* Order available *after* the query results. */
 139       flags |= PIPE_CONTROL_FLUSH_ENABLE;
 140       iris_emit_pipe_control_write(batch, "query: mark available",
 141                                    flags, bo, offset, true);
 142    }
 143 }
 144
 145 /**
 146  * Write PS_DEPTH_COUNT to q->(dest) via a PIPE_CONTROL.
 147  */
 148 static void
 149 iris_pipelined_write(struct iris_batch *batch,
 150                      struct iris_query *q,
 151                      enum pipe_control_flags flags,
 152                      unsigned offset)
 153 {
 154    const struct gen_device_info *devinfo = &batch->screen->devinfo;
 155    const unsigned optional_cs_stall =
 156       GEN_GEN == 9 && devinfo->gt == 4 ?  PIPE_CONTROL_CS_STALL : 0;
 157    struct iris_bo *bo = iris_resource_bo(q->query_state_ref.res);
 158
 159    iris_emit_pipe_control_write(batch, "query: pipelined snapshot write",
 160                                 flags | optional_cs_stall,
 161                                 bo, offset, 0ull);
 162 }
 163
 164 static void
 165 write_value(struct iris_context *ice, struct iris_query *q, unsigned offset)
 166 {
 167    struct iris_batch *batch = &ice->batches[q->batch_idx];
 168    struct iris_bo *bo = iris_resource_bo(q->query_state_ref.res);
 169
 170    if (!iris_is_query_pipelined(q)) {
 171       iris_emit_pipe_control_flush(batch,
 172                                    "query: non-pipelined snapshot write",
 173                                    PIPE_CONTROL_CS_STALL |
 174                                    PIPE_CONTROL_STALL_AT_SCOREBOARD);
 175       q->stalled = true;
 176    }
 177
 178    switch (q->type) {
 179    case PIPE_QUERY_OCCLUSION_COUNTER:
 180    case PIPE_QUERY_OCCLUSION_PREDICATE:
 181    case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
 182       if (GEN_GEN >= 10) {
 183          /* "Driver must program PIPE_CONTROL with only Depth Stall Enable
 184           *  bit set prior to programming a PIPE_CONTROL with Write PS Depth
 185           *  Count sync operation."
 186           */
 187          iris_emit_pipe_control_flush(batch,
 188                                       "workaround: depth stall before writing "
 189                                       "PS_DEPTH_COUNT",
 190                                       PIPE_CONTROL_DEPTH_STALL);
 191       }
 192       iris_pipelined_write(&ice->batches[IRIS_BATCH_RENDER], q,
 193                            PIPE_CONTROL_WRITE_DEPTH_COUNT |
 194                            PIPE_CONTROL_DEPTH_STALL,
 195                            offset);
 196       break;
 197    case PIPE_QUERY_TIME_ELAPSED:
 198    case PIPE_QUERY_TIMESTAMP:
 199    case PIPE_QUERY_TIMESTAMP_DISJOINT:
 200       iris_pipelined_write(&ice->batches[IRIS_BATCH_RENDER], q,
 201                            PIPE_CONTROL_WRITE_TIMESTAMP,
 202                            offset);
 203       break;
 204    case PIPE_QUERY_PRIMITIVES_GENERATED:
 205       ice->vtbl.store_register_mem64(batch,
 206                                      q->index == 0 ?
 207                                      GENX(CL_INVOCATION_COUNT_num) :
 208                                      SO_PRIM_STORAGE_NEEDED(q->index),
 209                                      bo, offset, false);
 210       break;
 211    case PIPE_QUERY_PRIMITIVES_EMITTED:
 212       ice->vtbl.store_register_mem64(batch,
 213                                      SO_NUM_PRIMS_WRITTEN(q->index),
 214                                      bo, offset, false);
 215       break;
 216    case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE: {
 217       static const uint32_t index_to_reg[] = {
 218          GENX(IA_VERTICES_COUNT_num),
 219          GENX(IA_PRIMITIVES_COUNT_num),
 220          GENX(VS_INVOCATION_COUNT_num),
 221          GENX(GS_INVOCATION_COUNT_num),
 222          GENX(GS_PRIMITIVES_COUNT_num),
 223          GENX(CL_INVOCATION_COUNT_num),
 224          GENX(CL_PRIMITIVES_COUNT_num),
 225          GENX(PS_INVOCATION_COUNT_num),
 226          GENX(HS_INVOCATION_COUNT_num),
 227          GENX(DS_INVOCATION_COUNT_num),
 228          GENX(CS_INVOCATION_COUNT_num),
 229       };
 230       const uint32_t reg = index_to_reg[q->index];
 231
 232       ice->vtbl.store_register_mem64(batch, reg, bo, offset, false);
 233       break;
 234    }
 235    default:
 236       assert(false);
 237    }
 238 }
 239
 240 static void
 241 write_overflow_values(struct iris_context *ice, struct iris_query *q, bool end)
 242 {
 243    struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
 244    uint32_t count = q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ? 1 : 4;
 245    struct iris_bo *bo = iris_resource_bo(q->query_state_ref.res);
 246    uint32_t offset = q->query_state_ref.offset;
 247
 248    iris_emit_pipe_control_flush(batch,
 249                                 "query: write SO overflow snapshots",
 250                                 PIPE_CONTROL_CS_STALL |
 251                                 PIPE_CONTROL_STALL_AT_SCOREBOARD);
 252    for (uint32_t i = 0; i < count; i++) {
 253       int s = q->index + i;
 254       int g_idx = offset + offsetof(struct iris_query_so_overflow,
 255                            stream[s].num_prims[end]);
 256       int w_idx = offset + offsetof(struct iris_query_so_overflow,
 257                            stream[s].prim_storage_needed[end]);
 258       ice->vtbl.store_register_mem64(batch, SO_NUM_PRIMS_WRITTEN(s),
 259                                      bo, g_idx, false);
 260       ice->vtbl.store_register_mem64(batch, SO_PRIM_STORAGE_NEEDED(s),
 261                                      bo, w_idx, false);
 262    }
 263 }
 264
 265 static uint64_t
 266 iris_raw_timestamp_delta(uint64_t time0, uint64_t time1)
 267 {
 268    if (time0 > time1) {
 269       return (1ULL << TIMESTAMP_BITS) + time1 - time0;
 270    } else {
 271       return time1 - time0;
 272    }
 273 }
 274
 275 static bool
 276 stream_overflowed(struct iris_query_so_overflow *so, int s)
 277 {
 278    return (so->stream[s].prim_storage_needed[1] -
 279            so->stream[s].prim_storage_needed[0]) !=
 280           (so->stream[s].num_prims[1] - so->stream[s].num_prims[0]);
 281 }
 282
 283 static void
 284 calculate_result_on_cpu(const struct gen_device_info *devinfo,
 285                         struct iris_query *q)
 286 {
 287    switch (q->type) {
 288    case PIPE_QUERY_OCCLUSION_PREDICATE:
 289    case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
 290       q->result = q->map->end != q->map->start;
 291       break;
 292    case PIPE_QUERY_TIMESTAMP:
 293    case PIPE_QUERY_TIMESTAMP_DISJOINT:
 294       /* The timestamp is the single starting snapshot. */
 295       q->result = gen_device_info_timebase_scale(devinfo, q->map->start);
 296       q->result &= (1ull << TIMESTAMP_BITS) - 1;
 297       break;
 298    case PIPE_QUERY_TIME_ELAPSED:
 299       q->result = iris_raw_timestamp_delta(q->map->start, q->map->end);
 300       q->result = gen_device_info_timebase_scale(devinfo, q->result);
 301       q->result &= (1ull << TIMESTAMP_BITS) - 1;
 302       break;
 303    case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
 304       q->result = stream_overflowed((void *) q->map, q->index);
 305       break;
 306    case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
 307       q->result = false;
 308       for (int i = 0; i < MAX_VERTEX_STREAMS; i++)
 309          q->result |= stream_overflowed((void *) q->map, i);
 310       break;
 311    case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE:
 312       q->result = q->map->end - q->map->start;
 313
 314       /* WaDividePSInvocationCountBy4:HSW,BDW */
 315       if (GEN_GEN == 8 && q->index == PIPE_STAT_QUERY_PS_INVOCATIONS)
 316          q->result /= 4;
 317       break;
 318    case PIPE_QUERY_OCCLUSION_COUNTER:
 319    case PIPE_QUERY_PRIMITIVES_GENERATED:
 320    case PIPE_QUERY_PRIMITIVES_EMITTED:
 321    default:
 322       q->result = q->map->end - q->map->start;
 323       break;
 324    }
 325
 326    q->ready = true;
 327 }
 328
 329 /**
 330  * Calculate the streamout overflow for stream \p idx:
 331  *
 332  * (num_prims[1] - num_prims[0]) - (storage_needed[1] - storage_needed[0])
 333  */
 334 static struct gen_mi_value
 335 calc_overflow_for_stream(struct gen_mi_builder *b,
 336                          struct iris_query *q,
 337                          int idx)
 338 {
 339 #define C(counter, i) query_mem64(q, \
 340    offsetof(struct iris_query_so_overflow, stream[idx].counter[i]))
 341
 342    return gen_mi_isub(b, gen_mi_isub(b, C(num_prims, 1), C(num_prims, 0)),
 343                          gen_mi_isub(b, C(prim_storage_needed, 1),
 344                                         C(prim_storage_needed, 0)));
 345 #undef C
 346 }
 347
 348 /**
 349  * Calculate whether any stream has overflowed.
 350  */
 351 static struct gen_mi_value
 352 calc_overflow_any_stream(struct gen_mi_builder *b, struct iris_query *q)
 353 {
 354    struct gen_mi_value stream_result[MAX_VERTEX_STREAMS];
 355    for (int i = 0; i < MAX_VERTEX_STREAMS; i++)
 356       stream_result[i] = calc_overflow_for_stream(b, q, i);
 357
 358    struct gen_mi_value result = stream_result[0];
 359    for (int i = 1; i < MAX_VERTEX_STREAMS; i++)
 360       result = gen_mi_ior(b, result, stream_result[i]);
 361
 362    return result;
 363 }
 364
 365 static bool
 366 query_is_boolean(enum pipe_query_type type)
 367 {
 368    switch (type) {
 369    case PIPE_QUERY_OCCLUSION_PREDICATE:
 370    case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
 371    case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
 372    case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
 373       return true;
 374    default:
 375       return false;
 376    }
 377 }
 378
 379 /**
 380  * Calculate the result using MI_MATH.
 381  */
 382 static struct gen_mi_value
 383 calculate_result_on_gpu(const struct gen_device_info *devinfo,
 384                         struct gen_mi_builder *b,
 385                         struct iris_query *q)
 386 {
 387    struct gen_mi_value result;
 388    struct gen_mi_value start_val =
 389       query_mem64(q, offsetof(struct iris_query_snapshots, start));
 390    struct gen_mi_value end_val =
 391       query_mem64(q, offsetof(struct iris_query_snapshots, end));
 392
 393    switch (q->type) {
 394    case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
 395       result = calc_overflow_for_stream(b, q, q->index);
 396       break;
 397    case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
 398       result = calc_overflow_any_stream(b, q);
 399       break;
 400    case PIPE_QUERY_TIMESTAMP: {
 401       /* TODO: This discards any fractional bits of the timebase scale.
 402        * We would need to do a bit of fixed point math on the CS ALU, or
 403        * launch an actual shader to calculate this with full precision.
 404        */
 405       uint32_t scale = 1000000000ull / devinfo->timestamp_frequency;
 406       result = gen_mi_iand(b, gen_mi_imm((1ull << 36) - 1),
 407                            gen_mi_imul_imm(b, start_val, scale));
 408       break;
 409    }
 410    case PIPE_QUERY_TIME_ELAPSED: {
 411       /* TODO: This discards fractional bits (see above). */
 412       uint32_t scale = 1000000000ull / devinfo->timestamp_frequency;
 413       result = gen_mi_imul_imm(b, gen_mi_isub(b, end_val, start_val), scale);
 414       break;
 415    }
 416    default:
 417       result = gen_mi_isub(b, end_val, start_val);
 418       break;
 419    }
 420
 421    /* WaDividePSInvocationCountBy4:HSW,BDW */
 422    if (GEN_GEN == 8 &&
 423        q->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE &&
 424        q->index == PIPE_STAT_QUERY_PS_INVOCATIONS)
 425       result = gen_mi_ushr32_imm(b, result, 2);
 426
 427    if (query_is_boolean(q->type))
 428       result = gen_mi_iand(b, gen_mi_nz(b, result), gen_mi_imm(1));
 429
 430    return result;
 431 }
 432
 433 static struct pipe_query *
 434 iris_create_query(struct pipe_context *ctx,
 435                   unsigned query_type,
 436                   unsigned index)
 437 {
 438    struct iris_query *q = calloc(1, sizeof(struct iris_query));
 439
 440    q->type = query_type;
 441    q->index = index;
 442
 443    if (q->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE &&
 444        q->index == PIPE_STAT_QUERY_CS_INVOCATIONS)
 445       q->batch_idx = IRIS_BATCH_COMPUTE;
 446    else
 447       q->batch_idx = IRIS_BATCH_RENDER;
 448    return (struct pipe_query *) q;
 449 }
 450
 451 static void
 452 iris_destroy_query(struct pipe_context *ctx, struct pipe_query *p_query)
 453 {
 454    struct iris_query *query = (void *) p_query;
 455    struct iris_screen *screen = (void *) ctx->screen;
 456    iris_syncpt_reference(screen, &query->syncpt, NULL);
 457    free(query);
 458 }
 459
 460
 461 static bool
 462 iris_begin_query(struct pipe_context *ctx, struct pipe_query *query)
 463 {
 464    struct iris_context *ice = (void *) ctx;
 465    struct iris_query *q = (void *) query;
 466    void *ptr = NULL;
 467    uint32_t size;
 468
 469    if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ||
 470        q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)
 471       size = sizeof(struct iris_query_so_overflow);
 472    else
 473       size = sizeof(struct iris_query_snapshots);
 474
 475    u_upload_alloc(ice->query_buffer_uploader, 0,
 476                   size, size, &q->query_state_ref.offset,
 477                   &q->query_state_ref.res, &ptr);
 478
 479    if (!iris_resource_bo(q->query_state_ref.res))
 480       return false;
 481
 482    q->map = ptr;
 483    if (!q->map)
 484       return false;
 485
 486    q->result = 0ull;
 487    q->ready = false;
 488    WRITE_ONCE(q->map->snapshots_landed, false);
 489
 490    if (q->type == PIPE_QUERY_PRIMITIVES_GENERATED && q->index == 0) {
 491       ice->state.prims_generated_query_active = true;
 492       ice->state.dirty |= IRIS_DIRTY_STREAMOUT | IRIS_DIRTY_CLIP;
 493    }
 494
 495    if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ||
 496        q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)
 497       write_overflow_values(ice, q, false);
 498    else
 499       write_value(ice, q,
 500                   q->query_state_ref.offset +
 501                   offsetof(struct iris_query_snapshots, start));
 502
 503    return true;
 504 }
 505
 506 static bool
 507 iris_end_query(struct pipe_context *ctx, struct pipe_query *query)
 508 {
 509    struct iris_context *ice = (void *) ctx;
 510    struct iris_query *q = (void *) query;
 511    struct iris_batch *batch = &ice->batches[q->batch_idx];
 512
 513    if (q->type == PIPE_QUERY_TIMESTAMP) {
 514       iris_begin_query(ctx, query);
 515       iris_batch_reference_signal_syncpt(batch, &q->syncpt);
 516       mark_available(ice, q);
 517       return true;
 518    }
 519
 520    if (q->type == PIPE_QUERY_PRIMITIVES_GENERATED && q->index == 0) {
 521       ice->state.prims_generated_query_active = false;
 522       ice->state.dirty |= IRIS_DIRTY_STREAMOUT | IRIS_DIRTY_CLIP;
 523    }
 524
 525    if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ||
 526        q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)
 527       write_overflow_values(ice, q, true);
 528    else
 529       write_value(ice, q,
 530                   q->query_state_ref.offset +
 531                   offsetof(struct iris_query_snapshots, end));
 532
 533    iris_batch_reference_signal_syncpt(batch, &q->syncpt);
 534    mark_available(ice, q);
 535
 536    return true;
 537 }
 538
 539 /**
 540  * See if the snapshots have landed for a query, and if so, compute the
 541  * result and mark it ready.  Does not flush (unlike iris_get_query_result).
 542  */
 543 static void
 544 iris_check_query_no_flush(struct iris_context *ice, struct iris_query *q)
 545 {
 546    struct iris_screen *screen = (void *) ice->ctx.screen;
 547    const struct gen_device_info *devinfo = &screen->devinfo;
 548
 549    if (!q->ready && READ_ONCE(q->map->snapshots_landed)) {
 550       calculate_result_on_cpu(devinfo, q);
 551    }
 552 }
 553
 554 static bool
 555 iris_get_query_result(struct pipe_context *ctx,
 556                       struct pipe_query *query,
 557                       bool wait,
 558                       union pipe_query_result *result)
 559 {
 560    struct iris_context *ice = (void *) ctx;
 561    struct iris_query *q = (void *) query;
 562    struct iris_screen *screen = (void *) ctx->screen;
 563    const struct gen_device_info *devinfo = &screen->devinfo;
 564
 565    if (unlikely(screen->no_hw)) {
 566       result->u64 = 0;
 567       return true;
 568    }
 569
 570    if (!q->ready) {
 571       struct iris_batch *batch = &ice->batches[q->batch_idx];
 572       if (q->syncpt == iris_batch_get_signal_syncpt(batch))
 573          iris_batch_flush(batch);
 574
 575       while (!READ_ONCE(q->map->snapshots_landed)) {
 576          if (wait)
 577             iris_wait_syncpt(ctx->screen, q->syncpt, INT64_MAX);
 578          else
 579             return false;
 580       }
 581
 582       assert(READ_ONCE(q->map->snapshots_landed));
 583       calculate_result_on_cpu(devinfo, q);
 584    }
 585
 586    assert(q->ready);
 587
 588    result->u64 = q->result;
 589
 590    return true;
 591 }
 592
 593 static void
 594 iris_get_query_result_resource(struct pipe_context *ctx,
 595                                struct pipe_query *query,
 596                                bool wait,
 597                                enum pipe_query_value_type result_type,
 598                                int index,
 599                                struct pipe_resource *p_res,
 600                                unsigned offset)
 601 {
 602    struct iris_context *ice = (void *) ctx;
 603    struct iris_query *q = (void *) query;
 604    struct iris_batch *batch = &ice->batches[q->batch_idx];
 605    const struct gen_device_info *devinfo = &batch->screen->devinfo;
 606    struct iris_resource *res = (void *) p_res;
 607    struct iris_bo *query_bo = iris_resource_bo(q->query_state_ref.res);
 608    struct iris_bo *dst_bo = iris_resource_bo(p_res);
 609    unsigned snapshots_landed_offset =
 610       offsetof(struct iris_query_snapshots, snapshots_landed);
 611
 612    res->bind_history |= PIPE_BIND_QUERY_BUFFER;
 613
 614    if (index == -1) {
 615       /* They're asking for the availability of the result.  If we still
 616        * have commands queued up which produce the result, submit them
 617        * now so that progress happens.  Either way, copy the snapshots
 618        * landed field to the destination resource.
 619        */
 620       if (q->syncpt == iris_batch_get_signal_syncpt(batch))
 621          iris_batch_flush(batch);
 622
 623       ice->vtbl.copy_mem_mem(batch, dst_bo, offset,
 624                              query_bo, snapshots_landed_offset,
 625                              result_type <= PIPE_QUERY_TYPE_U32 ? 4 : 8);
 626       return;
 627    }
 628
 629    if (!q->ready && READ_ONCE(q->map->snapshots_landed)) {
 630       /* The final snapshots happen to have landed, so let's just compute
 631        * the result on the CPU now...
 632        */
 633       calculate_result_on_cpu(devinfo, q);
 634    }
 635
 636    if (q->ready) {
 637       /* We happen to have the result on the CPU, so just copy it. */
 638       if (result_type <= PIPE_QUERY_TYPE_U32) {
 639          ice->vtbl.store_data_imm32(batch, dst_bo, offset, q->result);
 640       } else {
 641          ice->vtbl.store_data_imm64(batch, dst_bo, offset, q->result);
 642       }
 643
 644       /* Make sure the result lands before they use bind the QBO elsewhere
 645        * and use the result.
 646        */
 647       // XXX: Why?  i965 doesn't do this.
 648       iris_emit_pipe_control_flush(batch,
 649                                    "query: unknown QBO flushing hack",
 650                                    PIPE_CONTROL_CS_STALL);
 651       return;
 652    }
 653
 654    bool predicated = !wait && !q->stalled;
 655
 656    struct gen_mi_builder b;
 657    gen_mi_builder_init(&b, batch);
 658
 659    struct gen_mi_value result = calculate_result_on_gpu(devinfo, &b, q);
 660    struct gen_mi_value dst =
 661       result_type <= PIPE_QUERY_TYPE_U32 ? gen_mi_mem32(rw_bo(dst_bo, offset))
 662                                          : gen_mi_mem64(rw_bo(dst_bo, offset));
 663
 664    if (predicated) {
 665       gen_mi_store(&b, gen_mi_reg32(MI_PREDICATE_RESULT),
 666                    gen_mi_mem64(ro_bo(query_bo, snapshots_landed_offset)));
 667       gen_mi_store_if(&b, dst, result);
 668    } else {
 669       gen_mi_store(&b, dst, result);
 670    }
 671 }
 672
 673 static void
 674 iris_set_active_query_state(struct pipe_context *ctx, bool enable)
 675 {
 676    struct iris_context *ice = (void *) ctx;
 677
 678    if (ice->state.statistics_counters_enabled == enable)
 679       return;
 680
 681    // XXX: most packets aren't paying attention to this yet, because it'd
 682    // have to be done dynamically at draw time, which is a pain
 683    ice->state.statistics_counters_enabled = enable;
 684    ice->state.dirty |= IRIS_DIRTY_CLIP |
 685                        IRIS_DIRTY_GS |
 686                        IRIS_DIRTY_RASTER |
 687                        IRIS_DIRTY_STREAMOUT |
 688                        IRIS_DIRTY_TCS |
 689                        IRIS_DIRTY_TES |
 690                        IRIS_DIRTY_VS |
 691                        IRIS_DIRTY_WM;
 692 }
 693
 694 static void
 695 set_predicate_enable(struct iris_context *ice, bool value)
 696 {
 697    if (value)
 698       ice->state.predicate = IRIS_PREDICATE_STATE_RENDER;
 699    else
 700       ice->state.predicate = IRIS_PREDICATE_STATE_DONT_RENDER;
 701 }
 702
 703 static void
 704 set_predicate_for_result(struct iris_context *ice,
 705                          struct iris_query *q,
 706                          bool inverted)
 707 {
 708    struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
 709    struct iris_bo *bo = iris_resource_bo(q->query_state_ref.res);
 710
 711    /* The CPU doesn't have the query result yet; use hardware predication */
 712    ice->state.predicate = IRIS_PREDICATE_STATE_USE_BIT;
 713
 714    /* Ensure the memory is coherent for MI_LOAD_REGISTER_* commands. */
 715    iris_emit_pipe_control_flush(batch,
 716                                 "conditional rendering: set predicate",
 717                                 PIPE_CONTROL_FLUSH_ENABLE);
 718    q->stalled = true;
 719
 720    struct gen_mi_builder b;
 721    gen_mi_builder_init(&b, batch);
 722
 723    struct gen_mi_value result;
 724
 725    switch (q->type) {
 726    case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
 727       result = calc_overflow_for_stream(&b, q, q->index);
 728       break;
 729    case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
 730       result = calc_overflow_any_stream(&b, q);
 731       break;
 732    default: {
 733       /* PIPE_QUERY_OCCLUSION_* */
 734       struct gen_mi_value start =
 735          query_mem64(q, offsetof(struct iris_query_snapshots, start));
 736       struct gen_mi_value end =
 737          query_mem64(q, offsetof(struct iris_query_snapshots, end));
 738       result = gen_mi_isub(&b, end, start);
 739       break;
 740    }
 741    }
 742
 743    result = inverted ? gen_mi_z(&b, result) : gen_mi_nz(&b, result);
 744    result = gen_mi_iand(&b, result, gen_mi_imm(1));
 745
 746    /* We immediately set the predicate on the render batch, as all the
 747     * counters come from 3D operations.  However, we may need to predicate
 748     * a compute dispatch, which executes in a different GEM context and has
 749     * a different MI_PREDICATE_RESULT register.  So, we save the result to
 750     * memory and reload it in iris_launch_grid.
 751     */
 752    gen_mi_value_ref(&b, result);
 753    gen_mi_store(&b, gen_mi_reg32(MI_PREDICATE_RESULT), result);
 754    gen_mi_store(&b, query_mem64(q, offsetof(struct iris_query_snapshots,
 755                                             predicate_result)), result);
 756    ice->state.compute_predicate = bo;
 757 }
 758
 759 static void
 760 iris_render_condition(struct pipe_context *ctx,
 761                       struct pipe_query *query,
 762                       bool condition,
 763                       enum pipe_render_cond_flag mode)
 764 {
 765    struct iris_context *ice = (void *) ctx;
 766    struct iris_query *q = (void *) query;
 767
 768    /* The old condition isn't relevant; we'll update it if necessary */
 769    ice->state.compute_predicate = NULL;
 770    ice->condition.query = q;
 771    ice->condition.condition = condition;
 772
 773    if (!q) {
 774       ice->state.predicate = IRIS_PREDICATE_STATE_RENDER;
 775       return;
 776    }
 777
 778    iris_check_query_no_flush(ice, q);
 779
 780    if (q->result || q->ready) {
 781       set_predicate_enable(ice, (q->result != 0) ^ condition);
 782    } else {
 783       if (mode == PIPE_RENDER_COND_NO_WAIT ||
 784           mode == PIPE_RENDER_COND_BY_REGION_NO_WAIT) {
 785          perf_debug(&ice->dbg, "Conditional rendering demoted from "
 786                     "\"no wait\" to \"wait\".");
 787       }
 788       set_predicate_for_result(ice, q, condition);
 789    }
 790 }
 791
 792 static void
 793 iris_resolve_conditional_render(struct iris_context *ice)
 794 {
 795    struct pipe_context *ctx = (void *) ice;
 796    struct iris_query *q = ice->condition.query;
 797    struct pipe_query *query = (void *) q;
 798    union pipe_query_result result;
 799
 800    if (ice->state.predicate != IRIS_PREDICATE_STATE_USE_BIT)
 801       return;
 802
 803    assert(q);
 804
 805    iris_get_query_result(ctx, query, true, &result);
 806    set_predicate_enable(ice, (q->result != 0) ^ ice->condition.condition);
 807 }
 808
 809 void
 810 genX(init_query)(struct iris_context *ice)
 811 {
 812    struct pipe_context *ctx = &ice->ctx;
 813
 814    ctx->create_query = iris_create_query;
 815    ctx->destroy_query = iris_destroy_query;
 816    ctx->begin_query = iris_begin_query;
 817    ctx->end_query = iris_end_query;
 818    ctx->get_query_result = iris_get_query_result;
 819    ctx->get_query_result_resource = iris_get_query_result_resource;
 820    ctx->set_active_query_state = iris_set_active_query_state;
 821    ctx->render_condition = iris_render_condition;
 822
 823    ice->vtbl.resolve_conditional_render = iris_resolve_conditional_render;
 824 }