src/gallium/drivers/iris/iris_query.c

   1 /*
   2  * Copyright © 2017 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice shall be included
  12  * in all copies or substantial portions of the Software.
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  15  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  19  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  20  * DEALINGS IN THE SOFTWARE.
  21  */
  22
  23 /**
  24  * @file iris_query.c
  25  *
  26  * ============================= GENXML CODE =============================
  27  *              [This file is compiled once per generation.]
  28  * =======================================================================
  29  *
  30  * Query object support.  This allows measuring various simple statistics
  31  * via counters on the GPU.  We use GenX code for MI_MATH calculations.
  32  */
  33
  34 #include <stdio.h>
  35 #include <errno.h>
  36 #include "pipe/p_defines.h"
  37 #include "pipe/p_state.h"
  38 #include "pipe/p_context.h"
  39 #include "pipe/p_screen.h"
  40 #include "util/u_inlines.h"
  41 #include "util/u_upload_mgr.h"
  42 #include "iris_context.h"
  43 #include "iris_defines.h"
  44 #include "iris_fence.h"
  45 #include "iris_monitor.h"
  46 #include "iris_resource.h"
  47 #include "iris_screen.h"
  48
  49 #include "iris_genx_macros.h"
  50
  51 #define SO_PRIM_STORAGE_NEEDED(n) (GENX(SO_PRIM_STORAGE_NEEDED0_num) + (n) * 8)
  52 #define SO_NUM_PRIMS_WRITTEN(n)   (GENX(SO_NUM_PRIMS_WRITTEN0_num) + (n) * 8)
  53
  54 struct iris_query {
  55    enum pipe_query_type type;
  56    int index;
  57
  58    bool ready;
  59
  60    bool stalled;
  61
  62    uint64_t result;
  63
  64    struct iris_state_ref query_state_ref;
  65    struct iris_query_snapshots *map;
  66    struct iris_syncobj *syncobj;
  67
  68    int batch_idx;
  69
  70    struct iris_monitor_object *monitor;
  71
  72    /* Fence for PIPE_QUERY_GPU_FINISHED. */
  73    struct pipe_fence_handle *fence;
  74 };
  75
  76 struct iris_query_snapshots {
  77    /** iris_render_condition's saved MI_PREDICATE_RESULT value. */
  78    uint64_t predicate_result;
  79
  80    /** Have the start/end snapshots landed? */
  81    uint64_t snapshots_landed;
  82
  83    /** Starting and ending counter snapshots */
  84    uint64_t start;
  85    uint64_t end;
  86 };
  87
  88 struct iris_query_so_overflow {
  89    uint64_t predicate_result;
  90    uint64_t snapshots_landed;
  91
  92    struct {
  93       uint64_t prim_storage_needed[2];
  94       uint64_t num_prims[2];
  95    } stream[4];
  96 };
  97
  98 static struct gen_mi_value
  99 query_mem64(struct iris_query *q, uint32_t offset)
 100 {
 101    struct iris_address addr = {
 102       .bo = iris_resource_bo(q->query_state_ref.res),
 103       .offset = q->query_state_ref.offset + offset,
 104       .write = true,
 105       .access = IRIS_DOMAIN_OTHER_WRITE
 106    };
 107    return gen_mi_mem64(addr);
 108 }
 109
 110 /**
 111  * Is this type of query written by PIPE_CONTROL?
 112  */
 113 static bool
 114 iris_is_query_pipelined(struct iris_query *q)
 115 {
 116    switch (q->type) {
 117    case PIPE_QUERY_OCCLUSION_COUNTER:
 118    case PIPE_QUERY_OCCLUSION_PREDICATE:
 119    case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
 120    case PIPE_QUERY_TIMESTAMP:
 121    case PIPE_QUERY_TIMESTAMP_DISJOINT:
 122    case PIPE_QUERY_TIME_ELAPSED:
 123       return true;
 124
 125    default:
 126       return false;
 127    }
 128 }
 129
 130 static void
 131 mark_available(struct iris_context *ice, struct iris_query *q)
 132 {
 133    struct iris_batch *batch = &ice->batches[q->batch_idx];
 134    unsigned flags = PIPE_CONTROL_WRITE_IMMEDIATE;
 135    unsigned offset = offsetof(struct iris_query_snapshots, snapshots_landed);
 136    struct iris_bo *bo = iris_resource_bo(q->query_state_ref.res);
 137    offset += q->query_state_ref.offset;
 138
 139    if (!iris_is_query_pipelined(q)) {
 140       batch->screen->vtbl.store_data_imm64(batch, bo, offset, true);
 141    } else {
 142       /* Order available *after* the query results. */
 143       flags |= PIPE_CONTROL_FLUSH_ENABLE;
 144       iris_emit_pipe_control_write(batch, "query: mark available",
 145                                    flags, bo, offset, true);
 146    }
 147 }
 148
 149 /**
 150  * Write PS_DEPTH_COUNT to q->(dest) via a PIPE_CONTROL.
 151  */
 152 static void
 153 iris_pipelined_write(struct iris_batch *batch,
 154                      struct iris_query *q,
 155                      enum pipe_control_flags flags,
 156                      unsigned offset)
 157 {
 158    const struct gen_device_info *devinfo = &batch->screen->devinfo;
 159    const unsigned optional_cs_stall =
 160       GEN_GEN == 9 && devinfo->gt == 4 ?  PIPE_CONTROL_CS_STALL : 0;
 161    struct iris_bo *bo = iris_resource_bo(q->query_state_ref.res);
 162
 163    iris_emit_pipe_control_write(batch, "query: pipelined snapshot write",
 164                                 flags | optional_cs_stall,
 165                                 bo, offset, 0ull);
 166 }
 167
 168 static void
 169 write_value(struct iris_context *ice, struct iris_query *q, unsigned offset)
 170 {
 171    struct iris_batch *batch = &ice->batches[q->batch_idx];
 172    struct iris_bo *bo = iris_resource_bo(q->query_state_ref.res);
 173
 174    if (!iris_is_query_pipelined(q)) {
 175       iris_emit_pipe_control_flush(batch,
 176                                    "query: non-pipelined snapshot write",
 177                                    PIPE_CONTROL_CS_STALL |
 178                                    PIPE_CONTROL_STALL_AT_SCOREBOARD);
 179       q->stalled = true;
 180    }
 181
 182    switch (q->type) {
 183    case PIPE_QUERY_OCCLUSION_COUNTER:
 184    case PIPE_QUERY_OCCLUSION_PREDICATE:
 185    case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
 186       if (GEN_GEN >= 10) {
 187          /* "Driver must program PIPE_CONTROL with only Depth Stall Enable
 188           *  bit set prior to programming a PIPE_CONTROL with Write PS Depth
 189           *  Count sync operation."
 190           */
 191          iris_emit_pipe_control_flush(batch,
 192                                       "workaround: depth stall before writing "
 193                                       "PS_DEPTH_COUNT",
 194                                       PIPE_CONTROL_DEPTH_STALL);
 195       }
 196       iris_pipelined_write(&ice->batches[IRIS_BATCH_RENDER], q,
 197                            PIPE_CONTROL_WRITE_DEPTH_COUNT |
 198                            PIPE_CONTROL_DEPTH_STALL,
 199                            offset);
 200       break;
 201    case PIPE_QUERY_TIME_ELAPSED:
 202    case PIPE_QUERY_TIMESTAMP:
 203    case PIPE_QUERY_TIMESTAMP_DISJOINT:
 204       iris_pipelined_write(&ice->batches[IRIS_BATCH_RENDER], q,
 205                            PIPE_CONTROL_WRITE_TIMESTAMP,
 206                            offset);
 207       break;
 208    case PIPE_QUERY_PRIMITIVES_GENERATED:
 209       batch->screen->vtbl.store_register_mem64(batch,
 210                                      q->index == 0 ?
 211                                      GENX(CL_INVOCATION_COUNT_num) :
 212                                      SO_PRIM_STORAGE_NEEDED(q->index),
 213                                      bo, offset, false);
 214       break;
 215    case PIPE_QUERY_PRIMITIVES_EMITTED:
 216       batch->screen->vtbl.store_register_mem64(batch,
 217                                      SO_NUM_PRIMS_WRITTEN(q->index),
 218                                      bo, offset, false);
 219       break;
 220    case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE: {
 221       static const uint32_t index_to_reg[] = {
 222          GENX(IA_VERTICES_COUNT_num),
 223          GENX(IA_PRIMITIVES_COUNT_num),
 224          GENX(VS_INVOCATION_COUNT_num),
 225          GENX(GS_INVOCATION_COUNT_num),
 226          GENX(GS_PRIMITIVES_COUNT_num),
 227          GENX(CL_INVOCATION_COUNT_num),
 228          GENX(CL_PRIMITIVES_COUNT_num),
 229          GENX(PS_INVOCATION_COUNT_num),
 230          GENX(HS_INVOCATION_COUNT_num),
 231          GENX(DS_INVOCATION_COUNT_num),
 232          GENX(CS_INVOCATION_COUNT_num),
 233       };
 234       const uint32_t reg = index_to_reg[q->index];
 235
 236       batch->screen->vtbl.store_register_mem64(batch, reg, bo, offset, false);
 237       break;
 238    }
 239    default:
 240       assert(false);
 241    }
 242 }
 243
 244 static void
 245 write_overflow_values(struct iris_context *ice, struct iris_query *q, bool end)
 246 {
 247    struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
 248    uint32_t count = q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ? 1 : 4;
 249    struct iris_bo *bo = iris_resource_bo(q->query_state_ref.res);
 250    uint32_t offset = q->query_state_ref.offset;
 251
 252    iris_emit_pipe_control_flush(batch,
 253                                 "query: write SO overflow snapshots",
 254                                 PIPE_CONTROL_CS_STALL |
 255                                 PIPE_CONTROL_STALL_AT_SCOREBOARD);
 256    for (uint32_t i = 0; i < count; i++) {
 257       int s = q->index + i;
 258       int g_idx = offset + offsetof(struct iris_query_so_overflow,
 259                            stream[s].num_prims[end]);
 260       int w_idx = offset + offsetof(struct iris_query_so_overflow,
 261                            stream[s].prim_storage_needed[end]);
 262       batch->screen->vtbl.store_register_mem64(batch, SO_NUM_PRIMS_WRITTEN(s),
 263                                      bo, g_idx, false);
 264       batch->screen->vtbl.store_register_mem64(batch, SO_PRIM_STORAGE_NEEDED(s),
 265                                      bo, w_idx, false);
 266    }
 267 }
 268
 269 static uint64_t
 270 iris_raw_timestamp_delta(uint64_t time0, uint64_t time1)
 271 {
 272    if (time0 > time1) {
 273       return (1ULL << TIMESTAMP_BITS) + time1 - time0;
 274    } else {
 275       return time1 - time0;
 276    }
 277 }
 278
 279 static bool
 280 stream_overflowed(struct iris_query_so_overflow *so, int s)
 281 {
 282    return (so->stream[s].prim_storage_needed[1] -
 283            so->stream[s].prim_storage_needed[0]) !=
 284           (so->stream[s].num_prims[1] - so->stream[s].num_prims[0]);
 285 }
 286
 287 static void
 288 calculate_result_on_cpu(const struct gen_device_info *devinfo,
 289                         struct iris_query *q)
 290 {
 291    switch (q->type) {
 292    case PIPE_QUERY_OCCLUSION_PREDICATE:
 293    case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
 294       q->result = q->map->end != q->map->start;
 295       break;
 296    case PIPE_QUERY_TIMESTAMP:
 297    case PIPE_QUERY_TIMESTAMP_DISJOINT:
 298       /* The timestamp is the single starting snapshot. */
 299       q->result = gen_device_info_timebase_scale(devinfo, q->map->start);
 300       q->result &= (1ull << TIMESTAMP_BITS) - 1;
 301       break;
 302    case PIPE_QUERY_TIME_ELAPSED:
 303       q->result = iris_raw_timestamp_delta(q->map->start, q->map->end);
 304       q->result = gen_device_info_timebase_scale(devinfo, q->result);
 305       q->result &= (1ull << TIMESTAMP_BITS) - 1;
 306       break;
 307    case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
 308       q->result = stream_overflowed((void *) q->map, q->index);
 309       break;
 310    case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
 311       q->result = false;
 312       for (int i = 0; i < MAX_VERTEX_STREAMS; i++)
 313          q->result |= stream_overflowed((void *) q->map, i);
 314       break;
 315    case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE:
 316       q->result = q->map->end - q->map->start;
 317
 318       /* WaDividePSInvocationCountBy4:HSW,BDW */
 319       if (GEN_GEN == 8 && q->index == PIPE_STAT_QUERY_PS_INVOCATIONS)
 320          q->result /= 4;
 321       break;
 322    case PIPE_QUERY_OCCLUSION_COUNTER:
 323    case PIPE_QUERY_PRIMITIVES_GENERATED:
 324    case PIPE_QUERY_PRIMITIVES_EMITTED:
 325    default:
 326       q->result = q->map->end - q->map->start;
 327       break;
 328    }
 329
 330    q->ready = true;
 331 }
 332
 333 /**
 334  * Calculate the streamout overflow for stream \p idx:
 335  *
 336  * (num_prims[1] - num_prims[0]) - (storage_needed[1] - storage_needed[0])
 337  */
 338 static struct gen_mi_value
 339 calc_overflow_for_stream(struct gen_mi_builder *b,
 340                          struct iris_query *q,
 341                          int idx)
 342 {
 343 #define C(counter, i) query_mem64(q, \
 344    offsetof(struct iris_query_so_overflow, stream[idx].counter[i]))
 345
 346    return gen_mi_isub(b, gen_mi_isub(b, C(num_prims, 1), C(num_prims, 0)),
 347                          gen_mi_isub(b, C(prim_storage_needed, 1),
 348                                         C(prim_storage_needed, 0)));
 349 #undef C
 350 }
 351
 352 /**
 353  * Calculate whether any stream has overflowed.
 354  */
 355 static struct gen_mi_value
 356 calc_overflow_any_stream(struct gen_mi_builder *b, struct iris_query *q)
 357 {
 358    struct gen_mi_value stream_result[MAX_VERTEX_STREAMS];
 359    for (int i = 0; i < MAX_VERTEX_STREAMS; i++)
 360       stream_result[i] = calc_overflow_for_stream(b, q, i);
 361
 362    struct gen_mi_value result = stream_result[0];
 363    for (int i = 1; i < MAX_VERTEX_STREAMS; i++)
 364       result = gen_mi_ior(b, result, stream_result[i]);
 365
 366    return result;
 367 }
 368
 369 static bool
 370 query_is_boolean(enum pipe_query_type type)
 371 {
 372    switch (type) {
 373    case PIPE_QUERY_OCCLUSION_PREDICATE:
 374    case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
 375    case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
 376    case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
 377       return true;
 378    default:
 379       return false;
 380    }
 381 }
 382
 383 /**
 384  * Calculate the result using MI_MATH.
 385  */
 386 static struct gen_mi_value
 387 calculate_result_on_gpu(const struct gen_device_info *devinfo,
 388                         struct gen_mi_builder *b,
 389                         struct iris_query *q)
 390 {
 391    struct gen_mi_value result;
 392    struct gen_mi_value start_val =
 393       query_mem64(q, offsetof(struct iris_query_snapshots, start));
 394    struct gen_mi_value end_val =
 395       query_mem64(q, offsetof(struct iris_query_snapshots, end));
 396
 397    switch (q->type) {
 398    case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
 399       result = calc_overflow_for_stream(b, q, q->index);
 400       break;
 401    case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
 402       result = calc_overflow_any_stream(b, q);
 403       break;
 404    case PIPE_QUERY_TIMESTAMP: {
 405       /* TODO: This discards any fractional bits of the timebase scale.
 406        * We would need to do a bit of fixed point math on the CS ALU, or
 407        * launch an actual shader to calculate this with full precision.
 408        */
 409       uint32_t scale = 1000000000ull / devinfo->timestamp_frequency;
 410       result = gen_mi_iand(b, gen_mi_imm((1ull << 36) - 1),
 411                            gen_mi_imul_imm(b, start_val, scale));
 412       break;
 413    }
 414    case PIPE_QUERY_TIME_ELAPSED: {
 415       /* TODO: This discards fractional bits (see above). */
 416       uint32_t scale = 1000000000ull / devinfo->timestamp_frequency;
 417       result = gen_mi_imul_imm(b, gen_mi_isub(b, end_val, start_val), scale);
 418       break;
 419    }
 420    default:
 421       result = gen_mi_isub(b, end_val, start_val);
 422       break;
 423    }
 424
 425    /* WaDividePSInvocationCountBy4:HSW,BDW */
 426    if (GEN_GEN == 8 &&
 427        q->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE &&
 428        q->index == PIPE_STAT_QUERY_PS_INVOCATIONS)
 429       result = gen_mi_ushr32_imm(b, result, 2);
 430
 431    if (query_is_boolean(q->type))
 432       result = gen_mi_iand(b, gen_mi_nz(b, result), gen_mi_imm(1));
 433
 434    return result;
 435 }
 436
 437 static struct pipe_query *
 438 iris_create_query(struct pipe_context *ctx,
 439                   unsigned query_type,
 440                   unsigned index)
 441 {
 442    struct iris_query *q = calloc(1, sizeof(struct iris_query));
 443
 444    q->type = query_type;
 445    q->index = index;
 446    q->monitor = NULL;
 447
 448    if (q->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE &&
 449        q->index == PIPE_STAT_QUERY_CS_INVOCATIONS)
 450       q->batch_idx = IRIS_BATCH_COMPUTE;
 451    else
 452       q->batch_idx = IRIS_BATCH_RENDER;
 453    return (struct pipe_query *) q;
 454 }
 455
 456 static struct pipe_query *
 457 iris_create_batch_query(struct pipe_context *ctx,
 458                         unsigned num_queries,
 459                         unsigned *query_types)
 460 {
 461    struct iris_context *ice = (void *) ctx;
 462    struct iris_query *q = calloc(1, sizeof(struct iris_query));
 463    if (unlikely(!q))
 464       return NULL;
 465    q->type = PIPE_QUERY_DRIVER_SPECIFIC;
 466    q->index = -1;
 467    q->monitor = iris_create_monitor_object(ice, num_queries, query_types);
 468    if (unlikely(!q->monitor)) {
 469       free(q);
 470       return NULL;
 471    }
 472
 473    return (struct pipe_query *) q;
 474 }
 475
 476 static void
 477 iris_destroy_query(struct pipe_context *ctx, struct pipe_query *p_query)
 478 {
 479    struct iris_query *query = (void *) p_query;
 480    struct iris_screen *screen = (void *) ctx->screen;
 481    if (query->monitor) {
 482       iris_destroy_monitor_object(ctx, query->monitor);
 483       query->monitor = NULL;
 484    } else {
 485       iris_syncobj_reference(screen, &query->syncobj, NULL);
 486       screen->base.fence_reference(ctx->screen, &query->fence, NULL);
 487    }
 488    free(query);
 489 }
 490
 491
 492 static bool
 493 iris_begin_query(struct pipe_context *ctx, struct pipe_query *query)
 494 {
 495    struct iris_context *ice = (void *) ctx;
 496    struct iris_query *q = (void *) query;
 497
 498    if (q->monitor)
 499       return iris_begin_monitor(ctx, q->monitor);
 500
 501    void *ptr = NULL;
 502    uint32_t size;
 503
 504    if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ||
 505        q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)
 506       size = sizeof(struct iris_query_so_overflow);
 507    else
 508       size = sizeof(struct iris_query_snapshots);
 509
 510    u_upload_alloc(ice->query_buffer_uploader, 0,
 511                   size, size, &q->query_state_ref.offset,
 512                   &q->query_state_ref.res, &ptr);
 513
 514    if (!iris_resource_bo(q->query_state_ref.res))
 515       return false;
 516
 517    q->map = ptr;
 518    if (!q->map)
 519       return false;
 520
 521    q->result = 0ull;
 522    q->ready = false;
 523    WRITE_ONCE(q->map->snapshots_landed, false);
 524
 525    if (q->type == PIPE_QUERY_PRIMITIVES_GENERATED && q->index == 0) {
 526       ice->state.prims_generated_query_active = true;
 527       ice->state.dirty |= IRIS_DIRTY_STREAMOUT | IRIS_DIRTY_CLIP;
 528    }
 529
 530    if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ||
 531        q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)
 532       write_overflow_values(ice, q, false);
 533    else
 534       write_value(ice, q,
 535                   q->query_state_ref.offset +
 536                   offsetof(struct iris_query_snapshots, start));
 537
 538    return true;
 539 }
 540
 541 static bool
 542 iris_end_query(struct pipe_context *ctx, struct pipe_query *query)
 543 {
 544    struct iris_context *ice = (void *) ctx;
 545    struct iris_query *q = (void *) query;
 546
 547    if (q->monitor)
 548       return iris_end_monitor(ctx, q->monitor);
 549
 550    if (q->type == PIPE_QUERY_GPU_FINISHED) {
 551       ctx->flush(ctx, &q->fence, PIPE_FLUSH_DEFERRED);
 552       return true;
 553    }
 554
 555    struct iris_batch *batch = &ice->batches[q->batch_idx];
 556
 557    if (q->type == PIPE_QUERY_TIMESTAMP) {
 558       iris_begin_query(ctx, query);
 559       iris_batch_reference_signal_syncobj(batch, &q->syncobj);
 560       mark_available(ice, q);
 561       return true;
 562    }
 563
 564    if (q->type == PIPE_QUERY_PRIMITIVES_GENERATED && q->index == 0) {
 565       ice->state.prims_generated_query_active = false;
 566       ice->state.dirty |= IRIS_DIRTY_STREAMOUT | IRIS_DIRTY_CLIP;
 567    }
 568
 569    if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ||
 570        q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)
 571       write_overflow_values(ice, q, true);
 572    else
 573       write_value(ice, q,
 574                   q->query_state_ref.offset +
 575                   offsetof(struct iris_query_snapshots, end));
 576
 577    iris_batch_reference_signal_syncobj(batch, &q->syncobj);
 578    mark_available(ice, q);
 579
 580    return true;
 581 }
 582
 583 /**
 584  * See if the snapshots have landed for a query, and if so, compute the
 585  * result and mark it ready.  Does not flush (unlike iris_get_query_result).
 586  */
 587 static void
 588 iris_check_query_no_flush(struct iris_context *ice, struct iris_query *q)
 589 {
 590    struct iris_screen *screen = (void *) ice->ctx.screen;
 591    const struct gen_device_info *devinfo = &screen->devinfo;
 592
 593    if (!q->ready && READ_ONCE(q->map->snapshots_landed)) {
 594       calculate_result_on_cpu(devinfo, q);
 595    }
 596 }
 597
 598 static bool
 599 iris_get_query_result(struct pipe_context *ctx,
 600                       struct pipe_query *query,
 601                       bool wait,
 602                       union pipe_query_result *result)
 603 {
 604    struct iris_context *ice = (void *) ctx;
 605    struct iris_query *q = (void *) query;
 606
 607    if (q->monitor)
 608       return iris_get_monitor_result(ctx, q->monitor, wait, result->batch);
 609
 610    struct iris_screen *screen = (void *) ctx->screen;
 611    const struct gen_device_info *devinfo = &screen->devinfo;
 612
 613    if (unlikely(screen->no_hw)) {
 614       result->u64 = 0;
 615       return true;
 616    }
 617
 618    if (q->type == PIPE_QUERY_GPU_FINISHED) {
 619       struct pipe_screen *screen = ctx->screen;
 620
 621       result->b = screen->fence_finish(screen, ctx, q->fence,
 622                                        wait ? PIPE_TIMEOUT_INFINITE : 0);
 623       return result->b;
 624    }
 625
 626    if (!q->ready) {
 627       struct iris_batch *batch = &ice->batches[q->batch_idx];
 628       if (q->syncobj == iris_batch_get_signal_syncobj(batch))
 629          iris_batch_flush(batch);
 630
 631       while (!READ_ONCE(q->map->snapshots_landed)) {
 632          if (wait)
 633             iris_wait_syncobj(ctx->screen, q->syncobj, INT64_MAX);
 634          else
 635             return false;
 636       }
 637
 638       assert(READ_ONCE(q->map->snapshots_landed));
 639       calculate_result_on_cpu(devinfo, q);
 640    }
 641
 642    assert(q->ready);
 643
 644    result->u64 = q->result;
 645
 646    return true;
 647 }
 648
 649 static void
 650 iris_get_query_result_resource(struct pipe_context *ctx,
 651                                struct pipe_query *query,
 652                                bool wait,
 653                                enum pipe_query_value_type result_type,
 654                                int index,
 655                                struct pipe_resource *p_res,
 656                                unsigned offset)
 657 {
 658    struct iris_context *ice = (void *) ctx;
 659    struct iris_query *q = (void *) query;
 660    struct iris_batch *batch = &ice->batches[q->batch_idx];
 661    const struct gen_device_info *devinfo = &batch->screen->devinfo;
 662    struct iris_resource *res = (void *) p_res;
 663    struct iris_bo *query_bo = iris_resource_bo(q->query_state_ref.res);
 664    struct iris_bo *dst_bo = iris_resource_bo(p_res);
 665    unsigned snapshots_landed_offset =
 666       offsetof(struct iris_query_snapshots, snapshots_landed);
 667
 668    res->bind_history |= PIPE_BIND_QUERY_BUFFER;
 669
 670    if (index == -1) {
 671       /* They're asking for the availability of the result.  If we still
 672        * have commands queued up which produce the result, submit them
 673        * now so that progress happens.  Either way, copy the snapshots
 674        * landed field to the destination resource.
 675        */
 676       if (q->syncobj == iris_batch_get_signal_syncobj(batch))
 677          iris_batch_flush(batch);
 678
 679       batch->screen->vtbl.copy_mem_mem(batch, dst_bo, offset,
 680                              query_bo, snapshots_landed_offset,
 681                              result_type <= PIPE_QUERY_TYPE_U32 ? 4 : 8);
 682       return;
 683    }
 684
 685    if (!q->ready && READ_ONCE(q->map->snapshots_landed)) {
 686       /* The final snapshots happen to have landed, so let's just compute
 687        * the result on the CPU now...
 688        */
 689       calculate_result_on_cpu(devinfo, q);
 690    }
 691
 692    if (q->ready) {
 693       /* We happen to have the result on the CPU, so just copy it. */
 694       if (result_type <= PIPE_QUERY_TYPE_U32) {
 695          batch->screen->vtbl.store_data_imm32(batch, dst_bo, offset, q->result);
 696       } else {
 697          batch->screen->vtbl.store_data_imm64(batch, dst_bo, offset, q->result);
 698       }
 699
 700       /* Make sure the result lands before they use bind the QBO elsewhere
 701        * and use the result.
 702        */
 703       // XXX: Why?  i965 doesn't do this.
 704       iris_emit_pipe_control_flush(batch,
 705                                    "query: unknown QBO flushing hack",
 706                                    PIPE_CONTROL_CS_STALL);
 707       return;
 708    }
 709
 710    bool predicated = !wait && !q->stalled;
 711
 712    struct gen_mi_builder b;
 713    gen_mi_builder_init(&b, batch);
 714
 715    iris_batch_sync_region_start(batch);
 716
 717    struct gen_mi_value result = calculate_result_on_gpu(devinfo, &b, q);
 718    struct gen_mi_value dst =
 719       result_type <= PIPE_QUERY_TYPE_U32 ?
 720       gen_mi_mem32(rw_bo(dst_bo, offset, IRIS_DOMAIN_OTHER_WRITE)) :
 721       gen_mi_mem64(rw_bo(dst_bo, offset, IRIS_DOMAIN_OTHER_WRITE));
 722
 723    if (predicated) {
 724       gen_mi_store(&b, gen_mi_reg32(MI_PREDICATE_RESULT),
 725                    gen_mi_mem64(ro_bo(query_bo, snapshots_landed_offset)));
 726       gen_mi_store_if(&b, dst, result);
 727    } else {
 728       gen_mi_store(&b, dst, result);
 729    }
 730
 731    iris_batch_sync_region_end(batch);
 732 }
 733
 734 static void
 735 iris_set_active_query_state(struct pipe_context *ctx, bool enable)
 736 {
 737    struct iris_context *ice = (void *) ctx;
 738
 739    if (ice->state.statistics_counters_enabled == enable)
 740       return;
 741
 742    // XXX: most packets aren't paying attention to this yet, because it'd
 743    // have to be done dynamically at draw time, which is a pain
 744    ice->state.statistics_counters_enabled = enable;
 745    ice->state.dirty |= IRIS_DIRTY_CLIP |
 746                        IRIS_DIRTY_RASTER |
 747                        IRIS_DIRTY_STREAMOUT |
 748                        IRIS_DIRTY_WM;
 749    ice->state.stage_dirty |= IRIS_STAGE_DIRTY_GS |
 750                              IRIS_STAGE_DIRTY_TCS |
 751                              IRIS_STAGE_DIRTY_TES |
 752                              IRIS_STAGE_DIRTY_VS;
 753 }
 754
 755 static void
 756 set_predicate_enable(struct iris_context *ice, bool value)
 757 {
 758    if (value)
 759       ice->state.predicate = IRIS_PREDICATE_STATE_RENDER;
 760    else
 761       ice->state.predicate = IRIS_PREDICATE_STATE_DONT_RENDER;
 762 }
 763
 764 static void
 765 set_predicate_for_result(struct iris_context *ice,
 766                          struct iris_query *q,
 767                          bool inverted)
 768 {
 769    struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
 770    struct iris_bo *bo = iris_resource_bo(q->query_state_ref.res);
 771
 772    iris_batch_sync_region_start(batch);
 773
 774    /* The CPU doesn't have the query result yet; use hardware predication */
 775    ice->state.predicate = IRIS_PREDICATE_STATE_USE_BIT;
 776
 777    /* Ensure the memory is coherent for MI_LOAD_REGISTER_* commands. */
 778    iris_emit_pipe_control_flush(batch,
 779                                 "conditional rendering: set predicate",
 780                                 PIPE_CONTROL_FLUSH_ENABLE);
 781    q->stalled = true;
 782
 783    struct gen_mi_builder b;
 784    gen_mi_builder_init(&b, batch);
 785
 786    struct gen_mi_value result;
 787
 788    switch (q->type) {
 789    case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
 790       result = calc_overflow_for_stream(&b, q, q->index);
 791       break;
 792    case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
 793       result = calc_overflow_any_stream(&b, q);
 794       break;
 795    default: {
 796       /* PIPE_QUERY_OCCLUSION_* */
 797       struct gen_mi_value start =
 798          query_mem64(q, offsetof(struct iris_query_snapshots, start));
 799       struct gen_mi_value end =
 800          query_mem64(q, offsetof(struct iris_query_snapshots, end));
 801       result = gen_mi_isub(&b, end, start);
 802       break;
 803    }
 804    }
 805
 806    result = inverted ? gen_mi_z(&b, result) : gen_mi_nz(&b, result);
 807    result = gen_mi_iand(&b, result, gen_mi_imm(1));
 808
 809    /* We immediately set the predicate on the render batch, as all the
 810     * counters come from 3D operations.  However, we may need to predicate
 811     * a compute dispatch, which executes in a different GEM context and has
 812     * a different MI_PREDICATE_RESULT register.  So, we save the result to
 813     * memory and reload it in iris_launch_grid.
 814     */
 815    gen_mi_value_ref(&b, result);
 816    gen_mi_store(&b, gen_mi_reg32(MI_PREDICATE_RESULT), result);
 817    gen_mi_store(&b, query_mem64(q, offsetof(struct iris_query_snapshots,
 818                                             predicate_result)), result);
 819    ice->state.compute_predicate = bo;
 820
 821    iris_batch_sync_region_end(batch);
 822 }
 823
 824 static void
 825 iris_render_condition(struct pipe_context *ctx,
 826                       struct pipe_query *query,
 827                       bool condition,
 828                       enum pipe_render_cond_flag mode)
 829 {
 830    struct iris_context *ice = (void *) ctx;
 831    struct iris_query *q = (void *) query;
 832
 833    /* The old condition isn't relevant; we'll update it if necessary */
 834    ice->state.compute_predicate = NULL;
 835    ice->condition.query = q;
 836    ice->condition.condition = condition;
 837
 838    if (!q) {
 839       ice->state.predicate = IRIS_PREDICATE_STATE_RENDER;
 840       return;
 841    }
 842
 843    iris_check_query_no_flush(ice, q);
 844
 845    if (q->result || q->ready) {
 846       set_predicate_enable(ice, (q->result != 0) ^ condition);
 847    } else {
 848       if (mode == PIPE_RENDER_COND_NO_WAIT ||
 849           mode == PIPE_RENDER_COND_BY_REGION_NO_WAIT) {
 850          perf_debug(&ice->dbg, "Conditional rendering demoted from "
 851                     "\"no wait\" to \"wait\".");
 852       }
 853       set_predicate_for_result(ice, q, condition);
 854    }
 855 }
 856
 857 static void
 858 iris_resolve_conditional_render(struct iris_context *ice)
 859 {
 860    struct pipe_context *ctx = (void *) ice;
 861    struct iris_query *q = ice->condition.query;
 862    struct pipe_query *query = (void *) q;
 863    union pipe_query_result result;
 864
 865    if (ice->state.predicate != IRIS_PREDICATE_STATE_USE_BIT)
 866       return;
 867
 868    assert(q);
 869
 870    iris_get_query_result(ctx, query, true, &result);
 871    set_predicate_enable(ice, (q->result != 0) ^ ice->condition.condition);
 872 }
 873
 874 void
 875 genX(init_query)(struct iris_context *ice)
 876 {
 877    struct pipe_context *ctx = &ice->ctx;
 878    struct iris_screen *screen = (struct iris_screen *)ctx->screen;
 879
 880    ctx->create_query = iris_create_query;
 881    ctx->create_batch_query = iris_create_batch_query;
 882    ctx->destroy_query = iris_destroy_query;
 883    ctx->begin_query = iris_begin_query;
 884    ctx->end_query = iris_end_query;
 885    ctx->get_query_result = iris_get_query_result;
 886    ctx->get_query_result_resource = iris_get_query_result_resource;
 887    ctx->set_active_query_state = iris_set_active_query_state;
 888    ctx->render_condition = iris_render_condition;
 889
 890    screen->vtbl.resolve_conditional_render = iris_resolve_conditional_render;
 891 }