src/gallium/drivers/iris/iris_query.c

   1 /*
   2  * Copyright © 2017 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice shall be included
  12  * in all copies or substantial portions of the Software.
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  15  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  19  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  20  * DEALINGS IN THE SOFTWARE.
  21  */
  22
  23 /**
  24  * @file iris_query.c
  25  *
  26  * ============================= GENXML CODE =============================
  27  *              [This file is compiled once per generation.]
  28  * =======================================================================
  29  *
  30  * Query object support.  This allows measuring various simple statistics
  31  * via counters on the GPU.  We use GenX code for MI_MATH calculations.
  32  */
  33
  34 #include <stdio.h>
  35 #include <errno.h>
  36 #include "perf/gen_perf.h"
  37 #include "pipe/p_defines.h"
  38 #include "pipe/p_state.h"
  39 #include "pipe/p_context.h"
  40 #include "pipe/p_screen.h"
  41 #include "util/u_inlines.h"
  42 #include "util/u_upload_mgr.h"
  43 #include "iris_context.h"
  44 #include "iris_defines.h"
  45 #include "iris_fence.h"
  46 #include "iris_resource.h"
  47 #include "iris_screen.h"
  48
  49 #include "iris_genx_macros.h"
  50
  51 #define SO_PRIM_STORAGE_NEEDED(n) (GENX(SO_PRIM_STORAGE_NEEDED0_num) + (n) * 8)
  52 #define SO_NUM_PRIMS_WRITTEN(n)   (GENX(SO_NUM_PRIMS_WRITTEN0_num) + (n) * 8)
  53
  54 struct iris_query {
  55    enum pipe_query_type type;
  56    int index;
  57
  58    bool ready;
  59
  60    bool stalled;
  61
  62    uint64_t result;
  63
  64    struct iris_state_ref query_state_ref;
  65    struct iris_query_snapshots *map;
  66    struct iris_syncpt *syncpt;
  67
  68    int batch_idx;
  69 };
  70
  71 struct iris_query_snapshots {
  72    /** iris_render_condition's saved MI_PREDICATE_RESULT value. */
  73    uint64_t predicate_result;
  74
  75    /** Have the start/end snapshots landed? */
  76    uint64_t snapshots_landed;
  77
  78    /** Starting and ending counter snapshots */
  79    uint64_t start;
  80    uint64_t end;
  81 };
  82
  83 struct iris_query_so_overflow {
  84    uint64_t predicate_result;
  85    uint64_t snapshots_landed;
  86
  87    struct {
  88       uint64_t prim_storage_needed[2];
  89       uint64_t num_prims[2];
  90    } stream[4];
  91 };
  92
  93 static struct gen_mi_value
  94 query_mem64(struct iris_query *q, uint32_t offset)
  95 {
  96    struct iris_address addr = {
  97       .bo = iris_resource_bo(q->query_state_ref.res),
  98       .offset = q->query_state_ref.offset + offset,
  99       .write = true
 100    };
 101    return gen_mi_mem64(addr);
 102 }
 103
 104 /**
 105  * Is this type of query written by PIPE_CONTROL?
 106  */
 107 static bool
 108 iris_is_query_pipelined(struct iris_query *q)
 109 {
 110    switch (q->type) {
 111    case PIPE_QUERY_OCCLUSION_COUNTER:
 112    case PIPE_QUERY_OCCLUSION_PREDICATE:
 113    case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
 114    case PIPE_QUERY_TIMESTAMP:
 115    case PIPE_QUERY_TIMESTAMP_DISJOINT:
 116    case PIPE_QUERY_TIME_ELAPSED:
 117       return true;
 118
 119    default:
 120       return false;
 121    }
 122 }
 123
 124 static void
 125 mark_available(struct iris_context *ice, struct iris_query *q)
 126 {
 127    struct iris_batch *batch = &ice->batches[q->batch_idx];
 128    unsigned flags = PIPE_CONTROL_WRITE_IMMEDIATE;
 129    unsigned offset = offsetof(struct iris_query_snapshots, snapshots_landed);
 130    struct iris_bo *bo = iris_resource_bo(q->query_state_ref.res);
 131    offset += q->query_state_ref.offset;
 132
 133    if (!iris_is_query_pipelined(q)) {
 134       ice->vtbl.store_data_imm64(batch, bo, offset, true);
 135    } else {
 136       /* Order available *after* the query results. */
 137       flags |= PIPE_CONTROL_FLUSH_ENABLE;
 138       iris_emit_pipe_control_write(batch, "query: mark available",
 139                                    flags, bo, offset, true);
 140    }
 141 }
 142
 143 /**
 144  * Write PS_DEPTH_COUNT to q->(dest) via a PIPE_CONTROL.
 145  */
 146 static void
 147 iris_pipelined_write(struct iris_batch *batch,
 148                      struct iris_query *q,
 149                      enum pipe_control_flags flags,
 150                      unsigned offset)
 151 {
 152    const struct gen_device_info *devinfo = &batch->screen->devinfo;
 153    const unsigned optional_cs_stall =
 154       GEN_GEN == 9 && devinfo->gt == 4 ?  PIPE_CONTROL_CS_STALL : 0;
 155    struct iris_bo *bo = iris_resource_bo(q->query_state_ref.res);
 156
 157    iris_emit_pipe_control_write(batch, "query: pipelined snapshot write",
 158                                 flags | optional_cs_stall,
 159                                 bo, offset, 0ull);
 160 }
 161
 162 static void
 163 write_value(struct iris_context *ice, struct iris_query *q, unsigned offset)
 164 {
 165    struct iris_batch *batch = &ice->batches[q->batch_idx];
 166    struct iris_bo *bo = iris_resource_bo(q->query_state_ref.res);
 167
 168    if (!iris_is_query_pipelined(q)) {
 169       iris_emit_pipe_control_flush(batch,
 170                                    "query: non-pipelined snapshot write",
 171                                    PIPE_CONTROL_CS_STALL |
 172                                    PIPE_CONTROL_STALL_AT_SCOREBOARD);
 173       q->stalled = true;
 174    }
 175
 176    switch (q->type) {
 177    case PIPE_QUERY_OCCLUSION_COUNTER:
 178    case PIPE_QUERY_OCCLUSION_PREDICATE:
 179    case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
 180       if (GEN_GEN >= 10) {
 181          /* "Driver must program PIPE_CONTROL with only Depth Stall Enable
 182           *  bit set prior to programming a PIPE_CONTROL with Write PS Depth
 183           *  Count sync operation."
 184           */
 185          iris_emit_pipe_control_flush(batch,
 186                                       "workaround: depth stall before writing "
 187                                       "PS_DEPTH_COUNT",
 188                                       PIPE_CONTROL_DEPTH_STALL);
 189       }
 190       iris_pipelined_write(&ice->batches[IRIS_BATCH_RENDER], q,
 191                            PIPE_CONTROL_WRITE_DEPTH_COUNT |
 192                            PIPE_CONTROL_DEPTH_STALL,
 193                            offset);
 194       break;
 195    case PIPE_QUERY_TIME_ELAPSED:
 196    case PIPE_QUERY_TIMESTAMP:
 197    case PIPE_QUERY_TIMESTAMP_DISJOINT:
 198       iris_pipelined_write(&ice->batches[IRIS_BATCH_RENDER], q,
 199                            PIPE_CONTROL_WRITE_TIMESTAMP,
 200                            offset);
 201       break;
 202    case PIPE_QUERY_PRIMITIVES_GENERATED:
 203       ice->vtbl.store_register_mem64(batch,
 204                                      q->index == 0 ?
 205                                      GENX(CL_INVOCATION_COUNT_num) :
 206                                      SO_PRIM_STORAGE_NEEDED(q->index),
 207                                      bo, offset, false);
 208       break;
 209    case PIPE_QUERY_PRIMITIVES_EMITTED:
 210       ice->vtbl.store_register_mem64(batch,
 211                                      SO_NUM_PRIMS_WRITTEN(q->index),
 212                                      bo, offset, false);
 213       break;
 214    case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE: {
 215       static const uint32_t index_to_reg[] = {
 216          GENX(IA_VERTICES_COUNT_num),
 217          GENX(IA_PRIMITIVES_COUNT_num),
 218          GENX(VS_INVOCATION_COUNT_num),
 219          GENX(GS_INVOCATION_COUNT_num),
 220          GENX(GS_PRIMITIVES_COUNT_num),
 221          GENX(CL_INVOCATION_COUNT_num),
 222          GENX(CL_PRIMITIVES_COUNT_num),
 223          GENX(PS_INVOCATION_COUNT_num),
 224          GENX(HS_INVOCATION_COUNT_num),
 225          GENX(DS_INVOCATION_COUNT_num),
 226          GENX(CS_INVOCATION_COUNT_num),
 227       };
 228       const uint32_t reg = index_to_reg[q->index];
 229
 230       ice->vtbl.store_register_mem64(batch, reg, bo, offset, false);
 231       break;
 232    }
 233    default:
 234       assert(false);
 235    }
 236 }
 237
 238 static void
 239 write_overflow_values(struct iris_context *ice, struct iris_query *q, bool end)
 240 {
 241    struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
 242    uint32_t count = q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ? 1 : 4;
 243    struct iris_bo *bo = iris_resource_bo(q->query_state_ref.res);
 244    uint32_t offset = q->query_state_ref.offset;
 245
 246    iris_emit_pipe_control_flush(batch,
 247                                 "query: write SO overflow snapshots",
 248                                 PIPE_CONTROL_CS_STALL |
 249                                 PIPE_CONTROL_STALL_AT_SCOREBOARD);
 250    for (uint32_t i = 0; i < count; i++) {
 251       int s = q->index + i;
 252       int g_idx = offset + offsetof(struct iris_query_so_overflow,
 253                            stream[s].num_prims[end]);
 254       int w_idx = offset + offsetof(struct iris_query_so_overflow,
 255                            stream[s].prim_storage_needed[end]);
 256       ice->vtbl.store_register_mem64(batch, SO_NUM_PRIMS_WRITTEN(s),
 257                                      bo, g_idx, false);
 258       ice->vtbl.store_register_mem64(batch, SO_PRIM_STORAGE_NEEDED(s),
 259                                      bo, w_idx, false);
 260    }
 261 }
 262
 263 static uint64_t
 264 iris_raw_timestamp_delta(uint64_t time0, uint64_t time1)
 265 {
 266    if (time0 > time1) {
 267       return (1ULL << TIMESTAMP_BITS) + time1 - time0;
 268    } else {
 269       return time1 - time0;
 270    }
 271 }
 272
 273 static bool
 274 stream_overflowed(struct iris_query_so_overflow *so, int s)
 275 {
 276    return (so->stream[s].prim_storage_needed[1] -
 277            so->stream[s].prim_storage_needed[0]) !=
 278           (so->stream[s].num_prims[1] - so->stream[s].num_prims[0]);
 279 }
 280
 281 static void
 282 calculate_result_on_cpu(const struct gen_device_info *devinfo,
 283                         struct iris_query *q)
 284 {
 285    switch (q->type) {
 286    case PIPE_QUERY_OCCLUSION_PREDICATE:
 287    case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
 288       q->result = q->map->end != q->map->start;
 289       break;
 290    case PIPE_QUERY_TIMESTAMP:
 291    case PIPE_QUERY_TIMESTAMP_DISJOINT:
 292       /* The timestamp is the single starting snapshot. */
 293       q->result = gen_device_info_timebase_scale(devinfo, q->map->start);
 294       q->result &= (1ull << TIMESTAMP_BITS) - 1;
 295       break;
 296    case PIPE_QUERY_TIME_ELAPSED:
 297       q->result = iris_raw_timestamp_delta(q->map->start, q->map->end);
 298       q->result = gen_device_info_timebase_scale(devinfo, q->result);
 299       q->result &= (1ull << TIMESTAMP_BITS) - 1;
 300       break;
 301    case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
 302       q->result = stream_overflowed((void *) q->map, q->index);
 303       break;
 304    case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
 305       q->result = false;
 306       for (int i = 0; i < MAX_VERTEX_STREAMS; i++)
 307          q->result |= stream_overflowed((void *) q->map, i);
 308       break;
 309    case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE:
 310       q->result = q->map->end - q->map->start;
 311
 312       /* WaDividePSInvocationCountBy4:HSW,BDW */
 313       if (GEN_GEN == 8 && q->index == PIPE_STAT_QUERY_PS_INVOCATIONS)
 314          q->result /= 4;
 315       break;
 316    case PIPE_QUERY_OCCLUSION_COUNTER:
 317    case PIPE_QUERY_PRIMITIVES_GENERATED:
 318    case PIPE_QUERY_PRIMITIVES_EMITTED:
 319    default:
 320       q->result = q->map->end - q->map->start;
 321       break;
 322    }
 323
 324    q->ready = true;
 325 }
 326
 327 /**
 328  * Calculate the streamout overflow for stream \p idx:
 329  *
 330  * (num_prims[1] - num_prims[0]) - (storage_needed[1] - storage_needed[0])
 331  */
 332 static struct gen_mi_value
 333 calc_overflow_for_stream(struct gen_mi_builder *b,
 334                          struct iris_query *q,
 335                          int idx)
 336 {
 337 #define C(counter, i) query_mem64(q, \
 338    offsetof(struct iris_query_so_overflow, stream[idx].counter[i]))
 339
 340    return gen_mi_isub(b, gen_mi_isub(b, C(num_prims, 1), C(num_prims, 0)),
 341                          gen_mi_isub(b, C(prim_storage_needed, 1),
 342                                         C(prim_storage_needed, 0)));
 343 #undef C
 344 }
 345
 346 /**
 347  * Calculate whether any stream has overflowed.
 348  */
 349 static struct gen_mi_value
 350 calc_overflow_any_stream(struct gen_mi_builder *b, struct iris_query *q)
 351 {
 352    struct gen_mi_value stream_result[MAX_VERTEX_STREAMS];
 353    for (int i = 0; i < MAX_VERTEX_STREAMS; i++)
 354       stream_result[i] = calc_overflow_for_stream(b, q, i);
 355
 356    struct gen_mi_value result = stream_result[0];
 357    for (int i = 1; i < MAX_VERTEX_STREAMS; i++)
 358       result = gen_mi_ior(b, result, stream_result[i]);
 359
 360    return result;
 361 }
 362
 363 static bool
 364 query_is_boolean(enum pipe_query_type type)
 365 {
 366    switch (type) {
 367    case PIPE_QUERY_OCCLUSION_PREDICATE:
 368    case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
 369    case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
 370    case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
 371       return true;
 372    default:
 373       return false;
 374    }
 375 }
 376
 377 /**
 378  * Calculate the result using MI_MATH.
 379  */
 380 static struct gen_mi_value
 381 calculate_result_on_gpu(const struct gen_device_info *devinfo,
 382                         struct gen_mi_builder *b,
 383                         struct iris_query *q)
 384 {
 385    struct gen_mi_value result;
 386    struct gen_mi_value start_val =
 387       query_mem64(q, offsetof(struct iris_query_snapshots, start));
 388    struct gen_mi_value end_val =
 389       query_mem64(q, offsetof(struct iris_query_snapshots, end));
 390
 391    switch (q->type) {
 392    case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
 393       result = calc_overflow_for_stream(b, q, q->index);
 394       break;
 395    case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
 396       result = calc_overflow_any_stream(b, q);
 397       break;
 398    case PIPE_QUERY_TIMESTAMP: {
 399       /* TODO: This discards any fractional bits of the timebase scale.
 400        * We would need to do a bit of fixed point math on the CS ALU, or
 401        * launch an actual shader to calculate this with full precision.
 402        */
 403       uint32_t scale = 1000000000ull / devinfo->timestamp_frequency;
 404       result = gen_mi_iand(b, gen_mi_imm((1ull << 36) - 1),
 405                            gen_mi_imul_imm(b, start_val, scale));
 406       break;
 407    }
 408    case PIPE_QUERY_TIME_ELAPSED: {
 409       /* TODO: This discards fractional bits (see above). */
 410       uint32_t scale = 1000000000ull / devinfo->timestamp_frequency;
 411       result = gen_mi_imul_imm(b, gen_mi_isub(b, end_val, start_val), scale);
 412       break;
 413    }
 414    default:
 415       result = gen_mi_isub(b, end_val, start_val);
 416       break;
 417    }
 418
 419    /* WaDividePSInvocationCountBy4:HSW,BDW */
 420    if (GEN_GEN == 8 &&
 421        q->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE &&
 422        q->index == PIPE_STAT_QUERY_PS_INVOCATIONS)
 423       result = gen_mi_ushr32_imm(b, result, 2);
 424
 425    if (query_is_boolean(q->type))
 426       result = gen_mi_iand(b, gen_mi_nz(b, result), gen_mi_imm(1));
 427
 428    return result;
 429 }
 430
 431 static struct pipe_query *
 432 iris_create_query(struct pipe_context *ctx,
 433                   unsigned query_type,
 434                   unsigned index)
 435 {
 436    struct iris_query *q = calloc(1, sizeof(struct iris_query));
 437
 438    q->type = query_type;
 439    q->index = index;
 440
 441    if (q->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE &&
 442        q->index == PIPE_STAT_QUERY_CS_INVOCATIONS)
 443       q->batch_idx = IRIS_BATCH_COMPUTE;
 444    else
 445       q->batch_idx = IRIS_BATCH_RENDER;
 446    return (struct pipe_query *) q;
 447 }
 448
 449 static void
 450 iris_destroy_query(struct pipe_context *ctx, struct pipe_query *p_query)
 451 {
 452    struct iris_query *query = (void *) p_query;
 453    struct iris_screen *screen = (void *) ctx->screen;
 454    iris_syncpt_reference(screen, &query->syncpt, NULL);
 455    free(query);
 456 }
 457
 458
 459 static bool
 460 iris_begin_query(struct pipe_context *ctx, struct pipe_query *query)
 461 {
 462    struct iris_context *ice = (void *) ctx;
 463    struct iris_query *q = (void *) query;
 464    void *ptr = NULL;
 465    uint32_t size;
 466
 467    if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ||
 468        q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)
 469       size = sizeof(struct iris_query_so_overflow);
 470    else
 471       size = sizeof(struct iris_query_snapshots);
 472
 473    u_upload_alloc(ice->query_buffer_uploader, 0,
 474                   size, size, &q->query_state_ref.offset,
 475                   &q->query_state_ref.res, &ptr);
 476
 477    if (!iris_resource_bo(q->query_state_ref.res))
 478       return false;
 479
 480    q->map = ptr;
 481    if (!q->map)
 482       return false;
 483
 484    q->result = 0ull;
 485    q->ready = false;
 486    WRITE_ONCE(q->map->snapshots_landed, false);
 487
 488    if (q->type == PIPE_QUERY_PRIMITIVES_GENERATED && q->index == 0) {
 489       ice->state.prims_generated_query_active = true;
 490       ice->state.dirty |= IRIS_DIRTY_STREAMOUT | IRIS_DIRTY_CLIP;
 491    }
 492
 493    if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ||
 494        q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)
 495       write_overflow_values(ice, q, false);
 496    else
 497       write_value(ice, q,
 498                   q->query_state_ref.offset +
 499                   offsetof(struct iris_query_snapshots, start));
 500
 501    return true;
 502 }
 503
 504 static bool
 505 iris_end_query(struct pipe_context *ctx, struct pipe_query *query)
 506 {
 507    struct iris_context *ice = (void *) ctx;
 508    struct iris_query *q = (void *) query;
 509    struct iris_batch *batch = &ice->batches[q->batch_idx];
 510
 511    if (q->type == PIPE_QUERY_TIMESTAMP) {
 512       iris_begin_query(ctx, query);
 513       iris_batch_reference_signal_syncpt(batch, &q->syncpt);
 514       mark_available(ice, q);
 515       return true;
 516    }
 517
 518    if (q->type == PIPE_QUERY_PRIMITIVES_GENERATED && q->index == 0) {
 519       ice->state.prims_generated_query_active = false;
 520       ice->state.dirty |= IRIS_DIRTY_STREAMOUT | IRIS_DIRTY_CLIP;
 521    }
 522
 523    if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ||
 524        q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)
 525       write_overflow_values(ice, q, true);
 526    else
 527       write_value(ice, q,
 528                   q->query_state_ref.offset +
 529                   offsetof(struct iris_query_snapshots, end));
 530
 531    iris_batch_reference_signal_syncpt(batch, &q->syncpt);
 532    mark_available(ice, q);
 533
 534    return true;
 535 }
 536
 537 /**
 538  * See if the snapshots have landed for a query, and if so, compute the
 539  * result and mark it ready.  Does not flush (unlike iris_get_query_result).
 540  */
 541 static void
 542 iris_check_query_no_flush(struct iris_context *ice, struct iris_query *q)
 543 {
 544    struct iris_screen *screen = (void *) ice->ctx.screen;
 545    const struct gen_device_info *devinfo = &screen->devinfo;
 546
 547    if (!q->ready && READ_ONCE(q->map->snapshots_landed)) {
 548       calculate_result_on_cpu(devinfo, q);
 549    }
 550 }
 551
 552 static bool
 553 iris_get_query_result(struct pipe_context *ctx,
 554                       struct pipe_query *query,
 555                       bool wait,
 556                       union pipe_query_result *result)
 557 {
 558    struct iris_context *ice = (void *) ctx;
 559    struct iris_query *q = (void *) query;
 560    struct iris_screen *screen = (void *) ctx->screen;
 561    const struct gen_device_info *devinfo = &screen->devinfo;
 562
 563    if (unlikely(screen->no_hw)) {
 564       result->u64 = 0;
 565       return true;
 566    }
 567
 568    if (!q->ready) {
 569       struct iris_batch *batch = &ice->batches[q->batch_idx];
 570       if (q->syncpt == iris_batch_get_signal_syncpt(batch))
 571          iris_batch_flush(batch);
 572
 573       while (!READ_ONCE(q->map->snapshots_landed)) {
 574          if (wait)
 575             iris_wait_syncpt(ctx->screen, q->syncpt, INT64_MAX);
 576          else
 577             return false;
 578       }
 579
 580       assert(READ_ONCE(q->map->snapshots_landed));
 581       calculate_result_on_cpu(devinfo, q);
 582    }
 583
 584    assert(q->ready);
 585
 586    result->u64 = q->result;
 587
 588    return true;
 589 }
 590
 591 static void
 592 iris_get_query_result_resource(struct pipe_context *ctx,
 593                                struct pipe_query *query,
 594                                bool wait,
 595                                enum pipe_query_value_type result_type,
 596                                int index,
 597                                struct pipe_resource *p_res,
 598                                unsigned offset)
 599 {
 600    struct iris_context *ice = (void *) ctx;
 601    struct iris_query *q = (void *) query;
 602    struct iris_batch *batch = &ice->batches[q->batch_idx];
 603    const struct gen_device_info *devinfo = &batch->screen->devinfo;
 604    struct iris_resource *res = (void *) p_res;
 605    struct iris_bo *query_bo = iris_resource_bo(q->query_state_ref.res);
 606    struct iris_bo *dst_bo = iris_resource_bo(p_res);
 607    unsigned snapshots_landed_offset =
 608       offsetof(struct iris_query_snapshots, snapshots_landed);
 609
 610    res->bind_history |= PIPE_BIND_QUERY_BUFFER;
 611
 612    if (index == -1) {
 613       /* They're asking for the availability of the result.  If we still
 614        * have commands queued up which produce the result, submit them
 615        * now so that progress happens.  Either way, copy the snapshots
 616        * landed field to the destination resource.
 617        */
 618       if (q->syncpt == iris_batch_get_signal_syncpt(batch))
 619          iris_batch_flush(batch);
 620
 621       ice->vtbl.copy_mem_mem(batch, dst_bo, offset,
 622                              query_bo, snapshots_landed_offset,
 623                              result_type <= PIPE_QUERY_TYPE_U32 ? 4 : 8);
 624       return;
 625    }
 626
 627    if (!q->ready && READ_ONCE(q->map->snapshots_landed)) {
 628       /* The final snapshots happen to have landed, so let's just compute
 629        * the result on the CPU now...
 630        */
 631       calculate_result_on_cpu(devinfo, q);
 632    }
 633
 634    if (q->ready) {
 635       /* We happen to have the result on the CPU, so just copy it. */
 636       if (result_type <= PIPE_QUERY_TYPE_U32) {
 637          ice->vtbl.store_data_imm32(batch, dst_bo, offset, q->result);
 638       } else {
 639          ice->vtbl.store_data_imm64(batch, dst_bo, offset, q->result);
 640       }
 641
 642       /* Make sure the result lands before they use bind the QBO elsewhere
 643        * and use the result.
 644        */
 645       // XXX: Why?  i965 doesn't do this.
 646       iris_emit_pipe_control_flush(batch,
 647                                    "query: unknown QBO flushing hack",
 648                                    PIPE_CONTROL_CS_STALL);
 649       return;
 650    }
 651
 652    bool predicated = !wait && !q->stalled;
 653
 654    struct gen_mi_builder b;
 655    gen_mi_builder_init(&b, batch);
 656
 657    struct gen_mi_value result = calculate_result_on_gpu(devinfo, &b, q);
 658    struct gen_mi_value dst =
 659       result_type <= PIPE_QUERY_TYPE_U32 ? gen_mi_mem32(rw_bo(dst_bo, offset))
 660                                          : gen_mi_mem64(rw_bo(dst_bo, offset));
 661
 662    if (predicated) {
 663       gen_mi_store(&b, gen_mi_reg32(MI_PREDICATE_RESULT),
 664                    gen_mi_mem64(ro_bo(query_bo, snapshots_landed_offset)));
 665       gen_mi_store_if(&b, dst, result);
 666    } else {
 667       gen_mi_store(&b, dst, result);
 668    }
 669 }
 670
 671 static void
 672 iris_set_active_query_state(struct pipe_context *ctx, bool enable)
 673 {
 674    struct iris_context *ice = (void *) ctx;
 675
 676    if (ice->state.statistics_counters_enabled == enable)
 677       return;
 678
 679    // XXX: most packets aren't paying attention to this yet, because it'd
 680    // have to be done dynamically at draw time, which is a pain
 681    ice->state.statistics_counters_enabled = enable;
 682    ice->state.dirty |= IRIS_DIRTY_CLIP |
 683                        IRIS_DIRTY_GS |
 684                        IRIS_DIRTY_RASTER |
 685                        IRIS_DIRTY_STREAMOUT |
 686                        IRIS_DIRTY_TCS |
 687                        IRIS_DIRTY_TES |
 688                        IRIS_DIRTY_VS |
 689                        IRIS_DIRTY_WM;
 690 }
 691
 692 static void
 693 set_predicate_enable(struct iris_context *ice, bool value)
 694 {
 695    if (value)
 696       ice->state.predicate = IRIS_PREDICATE_STATE_RENDER;
 697    else
 698       ice->state.predicate = IRIS_PREDICATE_STATE_DONT_RENDER;
 699 }
 700
 701 static void
 702 set_predicate_for_result(struct iris_context *ice,
 703                          struct iris_query *q,
 704                          bool inverted)
 705 {
 706    struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
 707    struct iris_bo *bo = iris_resource_bo(q->query_state_ref.res);
 708
 709    /* The CPU doesn't have the query result yet; use hardware predication */
 710    ice->state.predicate = IRIS_PREDICATE_STATE_USE_BIT;
 711
 712    /* Ensure the memory is coherent for MI_LOAD_REGISTER_* commands. */
 713    iris_emit_pipe_control_flush(batch,
 714                                 "conditional rendering: set predicate",
 715                                 PIPE_CONTROL_FLUSH_ENABLE);
 716    q->stalled = true;
 717
 718    struct gen_mi_builder b;
 719    gen_mi_builder_init(&b, batch);
 720
 721    struct gen_mi_value result;
 722
 723    switch (q->type) {
 724    case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
 725       result = calc_overflow_for_stream(&b, q, q->index);
 726       break;
 727    case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
 728       result = calc_overflow_any_stream(&b, q);
 729       break;
 730    default: {
 731       /* PIPE_QUERY_OCCLUSION_* */
 732       struct gen_mi_value start =
 733          query_mem64(q, offsetof(struct iris_query_snapshots, start));
 734       struct gen_mi_value end =
 735          query_mem64(q, offsetof(struct iris_query_snapshots, end));
 736       result = gen_mi_isub(&b, end, start);
 737       break;
 738    }
 739    }
 740
 741    result = inverted ? gen_mi_z(&b, result) : gen_mi_nz(&b, result);
 742    result = gen_mi_iand(&b, result, gen_mi_imm(1));
 743
 744    /* We immediately set the predicate on the render batch, as all the
 745     * counters come from 3D operations.  However, we may need to predicate
 746     * a compute dispatch, which executes in a different GEM context and has
 747     * a different MI_PREDICATE_RESULT register.  So, we save the result to
 748     * memory and reload it in iris_launch_grid.
 749     */
 750    gen_mi_value_ref(&b, result);
 751    gen_mi_store(&b, gen_mi_reg32(MI_PREDICATE_RESULT), result);
 752    gen_mi_store(&b, query_mem64(q, offsetof(struct iris_query_snapshots,
 753                                             predicate_result)), result);
 754    ice->state.compute_predicate = bo;
 755 }
 756
 757 static void
 758 iris_render_condition(struct pipe_context *ctx,
 759                       struct pipe_query *query,
 760                       bool condition,
 761                       enum pipe_render_cond_flag mode)
 762 {
 763    struct iris_context *ice = (void *) ctx;
 764    struct iris_query *q = (void *) query;
 765
 766    /* The old condition isn't relevant; we'll update it if necessary */
 767    ice->state.compute_predicate = NULL;
 768    ice->condition.query = q;
 769    ice->condition.condition = condition;
 770
 771    if (!q) {
 772       ice->state.predicate = IRIS_PREDICATE_STATE_RENDER;
 773       return;
 774    }
 775
 776    iris_check_query_no_flush(ice, q);
 777
 778    if (q->result || q->ready) {
 779       set_predicate_enable(ice, (q->result != 0) ^ condition);
 780    } else {
 781       if (mode == PIPE_RENDER_COND_NO_WAIT ||
 782           mode == PIPE_RENDER_COND_BY_REGION_NO_WAIT) {
 783          perf_debug(&ice->dbg, "Conditional rendering demoted from "
 784                     "\"no wait\" to \"wait\".");
 785       }
 786       set_predicate_for_result(ice, q, condition);
 787    }
 788 }
 789
 790 static void
 791 iris_resolve_conditional_render(struct iris_context *ice)
 792 {
 793    struct pipe_context *ctx = (void *) ice;
 794    struct iris_query *q = ice->condition.query;
 795    struct pipe_query *query = (void *) q;
 796    union pipe_query_result result;
 797
 798    if (ice->state.predicate != IRIS_PREDICATE_STATE_USE_BIT)
 799       return;
 800
 801    assert(q);
 802
 803    iris_get_query_result(ctx, query, true, &result);
 804    set_predicate_enable(ice, (q->result != 0) ^ ice->condition.condition);
 805 }
 806
 807 void
 808 genX(init_query)(struct iris_context *ice)
 809 {
 810    struct pipe_context *ctx = &ice->ctx;
 811
 812    ctx->create_query = iris_create_query;
 813    ctx->destroy_query = iris_destroy_query;
 814    ctx->begin_query = iris_begin_query;
 815    ctx->end_query = iris_end_query;
 816    ctx->get_query_result = iris_get_query_result;
 817    ctx->get_query_result_resource = iris_get_query_result_resource;
 818    ctx->set_active_query_state = iris_set_active_query_state;
 819    ctx->render_condition = iris_render_condition;
 820
 821    ice->vtbl.resolve_conditional_render = iris_resolve_conditional_render;
 822 }