src/gallium/drivers/iris/iris_query.c

   1 /*
   2  * Copyright © 2017 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice shall be included
  12  * in all copies or substantial portions of the Software.
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  15  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  19  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  20  * DEALINGS IN THE SOFTWARE.
  21  */
  22
  23 /**
  24  * @file iris_query.c
  25  *
  26  * ============================= GENXML CODE =============================
  27  *              [This file is compiled once per generation.]
  28  * =======================================================================
  29  *
  30  * Query object support.  This allows measuring various simple statistics
  31  * via counters on the GPU.  We use GenX code for MI_MATH calculations.
  32  */
  33
  34 #include <stdio.h>
  35 #include <errno.h>
  36 #include "perf/gen_perf.h"
  37 #include "pipe/p_defines.h"
  38 #include "pipe/p_state.h"
  39 #include "pipe/p_context.h"
  40 #include "pipe/p_screen.h"
  41 #include "util/u_inlines.h"
  42 #include "util/u_upload_mgr.h"
  43 #include "iris_context.h"
  44 #include "iris_defines.h"
  45 #include "iris_fence.h"
  46 #include "iris_monitor.h"
  47 #include "iris_resource.h"
  48 #include "iris_screen.h"
  49
  50 #include "iris_genx_macros.h"
  51
  52 #define SO_PRIM_STORAGE_NEEDED(n) (GENX(SO_PRIM_STORAGE_NEEDED0_num) + (n) * 8)
  53 #define SO_NUM_PRIMS_WRITTEN(n)   (GENX(SO_NUM_PRIMS_WRITTEN0_num) + (n) * 8)
  54
  55 struct iris_query {
  56    enum pipe_query_type type;
  57    int index;
  58
  59    bool ready;
  60
  61    bool stalled;
  62
  63    uint64_t result;
  64
  65    struct iris_state_ref query_state_ref;
  66    struct iris_query_snapshots *map;
  67    struct iris_syncpt *syncpt;
  68
  69    int batch_idx;
  70
  71    struct iris_monitor_object *monitor;
  72
  73    /* Fence for PIPE_QUERY_GPU_FINISHED. */
  74    struct pipe_fence_handle *fence;
  75 };
  76
  77 struct iris_query_snapshots {
  78    /** iris_render_condition's saved MI_PREDICATE_RESULT value. */
  79    uint64_t predicate_result;
  80
  81    /** Have the start/end snapshots landed? */
  82    uint64_t snapshots_landed;
  83
  84    /** Starting and ending counter snapshots */
  85    uint64_t start;
  86    uint64_t end;
  87 };
  88
  89 struct iris_query_so_overflow {
  90    uint64_t predicate_result;
  91    uint64_t snapshots_landed;
  92
  93    struct {
  94       uint64_t prim_storage_needed[2];
  95       uint64_t num_prims[2];
  96    } stream[4];
  97 };
  98
  99 static struct gen_mi_value
 100 query_mem64(struct iris_query *q, uint32_t offset)
 101 {
 102    struct iris_address addr = {
 103       .bo = iris_resource_bo(q->query_state_ref.res),
 104       .offset = q->query_state_ref.offset + offset,
 105       .write = true
 106    };
 107    return gen_mi_mem64(addr);
 108 }
 109
 110 /**
 111  * Is this type of query written by PIPE_CONTROL?
 112  */
 113 static bool
 114 iris_is_query_pipelined(struct iris_query *q)
 115 {
 116    switch (q->type) {
 117    case PIPE_QUERY_OCCLUSION_COUNTER:
 118    case PIPE_QUERY_OCCLUSION_PREDICATE:
 119    case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
 120    case PIPE_QUERY_TIMESTAMP:
 121    case PIPE_QUERY_TIMESTAMP_DISJOINT:
 122    case PIPE_QUERY_TIME_ELAPSED:
 123       return true;
 124
 125    default:
 126       return false;
 127    }
 128 }
 129
 130 static void
 131 mark_available(struct iris_context *ice, struct iris_query *q)
 132 {
 133    struct iris_batch *batch = &ice->batches[q->batch_idx];
 134    unsigned flags = PIPE_CONTROL_WRITE_IMMEDIATE;
 135    unsigned offset = offsetof(struct iris_query_snapshots, snapshots_landed);
 136    struct iris_bo *bo = iris_resource_bo(q->query_state_ref.res);
 137    offset += q->query_state_ref.offset;
 138
 139    if (!iris_is_query_pipelined(q)) {
 140       ice->vtbl.store_data_imm64(batch, bo, offset, true);
 141    } else {
 142       /* Order available *after* the query results. */
 143       flags |= PIPE_CONTROL_FLUSH_ENABLE;
 144       iris_emit_pipe_control_write(batch, "query: mark available",
 145                                    flags, bo, offset, true);
 146    }
 147 }
 148
 149 /**
 150  * Write PS_DEPTH_COUNT to q->(dest) via a PIPE_CONTROL.
 151  */
 152 static void
 153 iris_pipelined_write(struct iris_batch *batch,
 154                      struct iris_query *q,
 155                      enum pipe_control_flags flags,
 156                      unsigned offset)
 157 {
 158    const struct gen_device_info *devinfo = &batch->screen->devinfo;
 159    const unsigned optional_cs_stall =
 160       GEN_GEN == 9 && devinfo->gt == 4 ?  PIPE_CONTROL_CS_STALL : 0;
 161    struct iris_bo *bo = iris_resource_bo(q->query_state_ref.res);
 162
 163    iris_emit_pipe_control_write(batch, "query: pipelined snapshot write",
 164                                 flags | optional_cs_stall,
 165                                 bo, offset, 0ull);
 166 }
 167
 168 static void
 169 write_value(struct iris_context *ice, struct iris_query *q, unsigned offset)
 170 {
 171    struct iris_batch *batch = &ice->batches[q->batch_idx];
 172    struct iris_bo *bo = iris_resource_bo(q->query_state_ref.res);
 173
 174    if (!iris_is_query_pipelined(q)) {
 175       iris_emit_pipe_control_flush(batch,
 176                                    "query: non-pipelined snapshot write",
 177                                    PIPE_CONTROL_CS_STALL |
 178                                    PIPE_CONTROL_STALL_AT_SCOREBOARD);
 179       q->stalled = true;
 180    }
 181
 182    switch (q->type) {
 183    case PIPE_QUERY_OCCLUSION_COUNTER:
 184    case PIPE_QUERY_OCCLUSION_PREDICATE:
 185    case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
 186       if (GEN_GEN >= 10) {
 187          /* "Driver must program PIPE_CONTROL with only Depth Stall Enable
 188           *  bit set prior to programming a PIPE_CONTROL with Write PS Depth
 189           *  Count sync operation."
 190           */
 191          iris_emit_pipe_control_flush(batch,
 192                                       "workaround: depth stall before writing "
 193                                       "PS_DEPTH_COUNT",
 194                                       PIPE_CONTROL_DEPTH_STALL);
 195       }
 196       iris_pipelined_write(&ice->batches[IRIS_BATCH_RENDER], q,
 197                            PIPE_CONTROL_WRITE_DEPTH_COUNT |
 198                            PIPE_CONTROL_DEPTH_STALL,
 199                            offset);
 200       break;
 201    case PIPE_QUERY_TIME_ELAPSED:
 202    case PIPE_QUERY_TIMESTAMP:
 203    case PIPE_QUERY_TIMESTAMP_DISJOINT:
 204       iris_pipelined_write(&ice->batches[IRIS_BATCH_RENDER], q,
 205                            PIPE_CONTROL_WRITE_TIMESTAMP,
 206                            offset);
 207       break;
 208    case PIPE_QUERY_PRIMITIVES_GENERATED:
 209       ice->vtbl.store_register_mem64(batch,
 210                                      q->index == 0 ?
 211                                      GENX(CL_INVOCATION_COUNT_num) :
 212                                      SO_PRIM_STORAGE_NEEDED(q->index),
 213                                      bo, offset, false);
 214       break;
 215    case PIPE_QUERY_PRIMITIVES_EMITTED:
 216       ice->vtbl.store_register_mem64(batch,
 217                                      SO_NUM_PRIMS_WRITTEN(q->index),
 218                                      bo, offset, false);
 219       break;
 220    case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE: {
 221       static const uint32_t index_to_reg[] = {
 222          GENX(IA_VERTICES_COUNT_num),
 223          GENX(IA_PRIMITIVES_COUNT_num),
 224          GENX(VS_INVOCATION_COUNT_num),
 225          GENX(GS_INVOCATION_COUNT_num),
 226          GENX(GS_PRIMITIVES_COUNT_num),
 227          GENX(CL_INVOCATION_COUNT_num),
 228          GENX(CL_PRIMITIVES_COUNT_num),
 229          GENX(PS_INVOCATION_COUNT_num),
 230          GENX(HS_INVOCATION_COUNT_num),
 231          GENX(DS_INVOCATION_COUNT_num),
 232          GENX(CS_INVOCATION_COUNT_num),
 233       };
 234       const uint32_t reg = index_to_reg[q->index];
 235
 236       ice->vtbl.store_register_mem64(batch, reg, bo, offset, false);
 237       break;
 238    }
 239    default:
 240       assert(false);
 241    }
 242 }
 243
 244 static void
 245 write_overflow_values(struct iris_context *ice, struct iris_query *q, bool end)
 246 {
 247    struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
 248    uint32_t count = q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ? 1 : 4;
 249    struct iris_bo *bo = iris_resource_bo(q->query_state_ref.res);
 250    uint32_t offset = q->query_state_ref.offset;
 251
 252    iris_emit_pipe_control_flush(batch,
 253                                 "query: write SO overflow snapshots",
 254                                 PIPE_CONTROL_CS_STALL |
 255                                 PIPE_CONTROL_STALL_AT_SCOREBOARD);
 256    for (uint32_t i = 0; i < count; i++) {
 257       int s = q->index + i;
 258       int g_idx = offset + offsetof(struct iris_query_so_overflow,
 259                            stream[s].num_prims[end]);
 260       int w_idx = offset + offsetof(struct iris_query_so_overflow,
 261                            stream[s].prim_storage_needed[end]);
 262       ice->vtbl.store_register_mem64(batch, SO_NUM_PRIMS_WRITTEN(s),
 263                                      bo, g_idx, false);
 264       ice->vtbl.store_register_mem64(batch, SO_PRIM_STORAGE_NEEDED(s),
 265                                      bo, w_idx, false);
 266    }
 267 }
 268
 269 static uint64_t
 270 iris_raw_timestamp_delta(uint64_t time0, uint64_t time1)
 271 {
 272    if (time0 > time1) {
 273       return (1ULL << TIMESTAMP_BITS) + time1 - time0;
 274    } else {
 275       return time1 - time0;
 276    }
 277 }
 278
 279 static bool
 280 stream_overflowed(struct iris_query_so_overflow *so, int s)
 281 {
 282    return (so->stream[s].prim_storage_needed[1] -
 283            so->stream[s].prim_storage_needed[0]) !=
 284           (so->stream[s].num_prims[1] - so->stream[s].num_prims[0]);
 285 }
 286
 287 static void
 288 calculate_result_on_cpu(const struct gen_device_info *devinfo,
 289                         struct iris_query *q)
 290 {
 291    switch (q->type) {
 292    case PIPE_QUERY_OCCLUSION_PREDICATE:
 293    case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
 294       q->result = q->map->end != q->map->start;
 295       break;
 296    case PIPE_QUERY_TIMESTAMP:
 297    case PIPE_QUERY_TIMESTAMP_DISJOINT:
 298       /* The timestamp is the single starting snapshot. */
 299       q->result = gen_device_info_timebase_scale(devinfo, q->map->start);
 300       q->result &= (1ull << TIMESTAMP_BITS) - 1;
 301       break;
 302    case PIPE_QUERY_TIME_ELAPSED:
 303       q->result = iris_raw_timestamp_delta(q->map->start, q->map->end);
 304       q->result = gen_device_info_timebase_scale(devinfo, q->result);
 305       q->result &= (1ull << TIMESTAMP_BITS) - 1;
 306       break;
 307    case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
 308       q->result = stream_overflowed((void *) q->map, q->index);
 309       break;
 310    case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
 311       q->result = false;
 312       for (int i = 0; i < MAX_VERTEX_STREAMS; i++)
 313          q->result |= stream_overflowed((void *) q->map, i);
 314       break;
 315    case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE:
 316       q->result = q->map->end - q->map->start;
 317
 318       /* WaDividePSInvocationCountBy4:HSW,BDW */
 319       if (GEN_GEN == 8 && q->index == PIPE_STAT_QUERY_PS_INVOCATIONS)
 320          q->result /= 4;
 321       break;
 322    case PIPE_QUERY_OCCLUSION_COUNTER:
 323    case PIPE_QUERY_PRIMITIVES_GENERATED:
 324    case PIPE_QUERY_PRIMITIVES_EMITTED:
 325    default:
 326       q->result = q->map->end - q->map->start;
 327       break;
 328    }
 329
 330    q->ready = true;
 331 }
 332
 333 /**
 334  * Calculate the streamout overflow for stream \p idx:
 335  *
 336  * (num_prims[1] - num_prims[0]) - (storage_needed[1] - storage_needed[0])
 337  */
 338 static struct gen_mi_value
 339 calc_overflow_for_stream(struct gen_mi_builder *b,
 340                          struct iris_query *q,
 341                          int idx)
 342 {
 343 #define C(counter, i) query_mem64(q, \
 344    offsetof(struct iris_query_so_overflow, stream[idx].counter[i]))
 345
 346    return gen_mi_isub(b, gen_mi_isub(b, C(num_prims, 1), C(num_prims, 0)),
 347                          gen_mi_isub(b, C(prim_storage_needed, 1),
 348                                         C(prim_storage_needed, 0)));
 349 #undef C
 350 }
 351
 352 /**
 353  * Calculate whether any stream has overflowed.
 354  */
 355 static struct gen_mi_value
 356 calc_overflow_any_stream(struct gen_mi_builder *b, struct iris_query *q)
 357 {
 358    struct gen_mi_value stream_result[MAX_VERTEX_STREAMS];
 359    for (int i = 0; i < MAX_VERTEX_STREAMS; i++)
 360       stream_result[i] = calc_overflow_for_stream(b, q, i);
 361
 362    struct gen_mi_value result = stream_result[0];
 363    for (int i = 1; i < MAX_VERTEX_STREAMS; i++)
 364       result = gen_mi_ior(b, result, stream_result[i]);
 365
 366    return result;
 367 }
 368
 369 static bool
 370 query_is_boolean(enum pipe_query_type type)
 371 {
 372    switch (type) {
 373    case PIPE_QUERY_OCCLUSION_PREDICATE:
 374    case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
 375    case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
 376    case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
 377       return true;
 378    default:
 379       return false;
 380    }
 381 }
 382
 383 /**
 384  * Calculate the result using MI_MATH.
 385  */
 386 static struct gen_mi_value
 387 calculate_result_on_gpu(const struct gen_device_info *devinfo,
 388                         struct gen_mi_builder *b,
 389                         struct iris_query *q)
 390 {
 391    struct gen_mi_value result;
 392    struct gen_mi_value start_val =
 393       query_mem64(q, offsetof(struct iris_query_snapshots, start));
 394    struct gen_mi_value end_val =
 395       query_mem64(q, offsetof(struct iris_query_snapshots, end));
 396
 397    switch (q->type) {
 398    case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
 399       result = calc_overflow_for_stream(b, q, q->index);
 400       break;
 401    case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
 402       result = calc_overflow_any_stream(b, q);
 403       break;
 404    case PIPE_QUERY_TIMESTAMP: {
 405       /* TODO: This discards any fractional bits of the timebase scale.
 406        * We would need to do a bit of fixed point math on the CS ALU, or
 407        * launch an actual shader to calculate this with full precision.
 408        */
 409       uint32_t scale = 1000000000ull / devinfo->timestamp_frequency;
 410       result = gen_mi_iand(b, gen_mi_imm((1ull << 36) - 1),
 411                            gen_mi_imul_imm(b, start_val, scale));
 412       break;
 413    }
 414    case PIPE_QUERY_TIME_ELAPSED: {
 415       /* TODO: This discards fractional bits (see above). */
 416       uint32_t scale = 1000000000ull / devinfo->timestamp_frequency;
 417       result = gen_mi_imul_imm(b, gen_mi_isub(b, end_val, start_val), scale);
 418       break;
 419    }
 420    default:
 421       result = gen_mi_isub(b, end_val, start_val);
 422       break;
 423    }
 424
 425    /* WaDividePSInvocationCountBy4:HSW,BDW */
 426    if (GEN_GEN == 8 &&
 427        q->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE &&
 428        q->index == PIPE_STAT_QUERY_PS_INVOCATIONS)
 429       result = gen_mi_ushr32_imm(b, result, 2);
 430
 431    if (query_is_boolean(q->type))
 432       result = gen_mi_iand(b, gen_mi_nz(b, result), gen_mi_imm(1));
 433
 434    return result;
 435 }
 436
 437 static struct pipe_query *
 438 iris_create_query(struct pipe_context *ctx,
 439                   unsigned query_type,
 440                   unsigned index)
 441 {
 442    struct iris_query *q = calloc(1, sizeof(struct iris_query));
 443
 444    q->type = query_type;
 445    q->index = index;
 446    q->monitor = NULL;
 447
 448    if (q->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE &&
 449        q->index == PIPE_STAT_QUERY_CS_INVOCATIONS)
 450       q->batch_idx = IRIS_BATCH_COMPUTE;
 451    else
 452       q->batch_idx = IRIS_BATCH_RENDER;
 453    return (struct pipe_query *) q;
 454 }
 455
 456 static struct pipe_query *
 457 iris_create_batch_query(struct pipe_context *ctx,
 458                         unsigned num_queries,
 459                         unsigned *query_types)
 460 {
 461    struct iris_context *ice = (void *) ctx;
 462    struct iris_query *q = calloc(1, sizeof(struct iris_query));
 463    if (unlikely(!q))
 464       return NULL;
 465    q->type = PIPE_QUERY_DRIVER_SPECIFIC;
 466    q->index = -1;
 467    q->monitor = iris_create_monitor_object(ice, num_queries, query_types);
 468    if (unlikely(!q->monitor)) {
 469       free(q);
 470       return NULL;
 471    }
 472
 473    return (struct pipe_query *) q;
 474 }
 475
 476 static void
 477 iris_destroy_query(struct pipe_context *ctx, struct pipe_query *p_query)
 478 {
 479    struct iris_query *query = (void *) p_query;
 480    struct iris_screen *screen = (void *) ctx->screen;
 481    if (query->monitor) {
 482       iris_destroy_monitor_object(ctx, query->monitor);
 483       query->monitor = NULL;
 484    } else {
 485       iris_syncpt_reference(screen, &query->syncpt, NULL);
 486       screen->base.fence_reference(ctx->screen, &query->fence, NULL);
 487    }
 488    free(query);
 489 }
 490
 491
 492 static bool
 493 iris_begin_query(struct pipe_context *ctx, struct pipe_query *query)
 494 {
 495    struct iris_context *ice = (void *) ctx;
 496    struct iris_query *q = (void *) query;
 497
 498    if (q->monitor)
 499       return iris_begin_monitor(ctx, q->monitor);
 500
 501    void *ptr = NULL;
 502    uint32_t size;
 503
 504    if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ||
 505        q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)
 506       size = sizeof(struct iris_query_so_overflow);
 507    else
 508       size = sizeof(struct iris_query_snapshots);
 509
 510    u_upload_alloc(ice->query_buffer_uploader, 0,
 511                   size, size, &q->query_state_ref.offset,
 512                   &q->query_state_ref.res, &ptr);
 513
 514    if (!iris_resource_bo(q->query_state_ref.res))
 515       return false;
 516
 517    q->map = ptr;
 518    if (!q->map)
 519       return false;
 520
 521    q->result = 0ull;
 522    q->ready = false;
 523    WRITE_ONCE(q->map->snapshots_landed, false);
 524
 525    if (q->type == PIPE_QUERY_PRIMITIVES_GENERATED && q->index == 0) {
 526       ice->state.prims_generated_query_active = true;
 527       ice->state.dirty |= IRIS_DIRTY_STREAMOUT | IRIS_DIRTY_CLIP;
 528    }
 529
 530    if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ||
 531        q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)
 532       write_overflow_values(ice, q, false);
 533    else
 534       write_value(ice, q,
 535                   q->query_state_ref.offset +
 536                   offsetof(struct iris_query_snapshots, start));
 537
 538    return true;
 539 }
 540
 541 static bool
 542 iris_end_query(struct pipe_context *ctx, struct pipe_query *query)
 543 {
 544    struct iris_context *ice = (void *) ctx;
 545    struct iris_query *q = (void *) query;
 546
 547    if (q->monitor)
 548       return iris_end_monitor(ctx, q->monitor);
 549
 550    if (q->type == PIPE_QUERY_GPU_FINISHED) {
 551       ctx->flush(ctx, &q->fence, PIPE_FLUSH_DEFERRED);
 552       return true;
 553    }
 554
 555    struct iris_batch *batch = &ice->batches[q->batch_idx];
 556
 557    if (q->type == PIPE_QUERY_TIMESTAMP) {
 558       iris_begin_query(ctx, query);
 559       iris_batch_reference_signal_syncpt(batch, &q->syncpt);
 560       mark_available(ice, q);
 561       return true;
 562    }
 563
 564    if (q->type == PIPE_QUERY_PRIMITIVES_GENERATED && q->index == 0) {
 565       ice->state.prims_generated_query_active = false;
 566       ice->state.dirty |= IRIS_DIRTY_STREAMOUT | IRIS_DIRTY_CLIP;
 567    }
 568
 569    if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ||
 570        q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)
 571       write_overflow_values(ice, q, true);
 572    else
 573       write_value(ice, q,
 574                   q->query_state_ref.offset +
 575                   offsetof(struct iris_query_snapshots, end));
 576
 577    iris_batch_reference_signal_syncpt(batch, &q->syncpt);
 578    mark_available(ice, q);
 579
 580    return true;
 581 }
 582
 583 /**
 584  * See if the snapshots have landed for a query, and if so, compute the
 585  * result and mark it ready.  Does not flush (unlike iris_get_query_result).
 586  */
 587 static void
 588 iris_check_query_no_flush(struct iris_context *ice, struct iris_query *q)
 589 {
 590    struct iris_screen *screen = (void *) ice->ctx.screen;
 591    const struct gen_device_info *devinfo = &screen->devinfo;
 592
 593    if (!q->ready && READ_ONCE(q->map->snapshots_landed)) {
 594       calculate_result_on_cpu(devinfo, q);
 595    }
 596 }
 597
 598 static bool
 599 iris_get_query_result(struct pipe_context *ctx,
 600                       struct pipe_query *query,
 601                       bool wait,
 602                       union pipe_query_result *result)
 603 {
 604    struct iris_context *ice = (void *) ctx;
 605    struct iris_query *q = (void *) query;
 606
 607    if (q->monitor)
 608       return iris_get_monitor_result(ctx, q->monitor, wait, result->batch);
 609
 610    struct iris_screen *screen = (void *) ctx->screen;
 611    const struct gen_device_info *devinfo = &screen->devinfo;
 612
 613    if (unlikely(screen->no_hw)) {
 614       result->u64 = 0;
 615       return true;
 616    }
 617
 618    if (q->type == PIPE_QUERY_GPU_FINISHED) {
 619       struct pipe_screen *screen = ctx->screen;
 620
 621       result->b = screen->fence_finish(screen, ctx, q->fence,
 622                                        wait ? PIPE_TIMEOUT_INFINITE : 0);
 623       return result->b;
 624    }
 625
 626    if (!q->ready) {
 627       struct iris_batch *batch = &ice->batches[q->batch_idx];
 628       if (q->syncpt == iris_batch_get_signal_syncpt(batch))
 629          iris_batch_flush(batch);
 630
 631       while (!READ_ONCE(q->map->snapshots_landed)) {
 632          if (wait)
 633             iris_wait_syncpt(ctx->screen, q->syncpt, INT64_MAX);
 634          else
 635             return false;
 636       }
 637
 638       assert(READ_ONCE(q->map->snapshots_landed));
 639       calculate_result_on_cpu(devinfo, q);
 640    }
 641
 642    assert(q->ready);
 643
 644    result->u64 = q->result;
 645
 646    return true;
 647 }
 648
 649 static void
 650 iris_get_query_result_resource(struct pipe_context *ctx,
 651                                struct pipe_query *query,
 652                                bool wait,
 653                                enum pipe_query_value_type result_type,
 654                                int index,
 655                                struct pipe_resource *p_res,
 656                                unsigned offset)
 657 {
 658    struct iris_context *ice = (void *) ctx;
 659    struct iris_query *q = (void *) query;
 660    struct iris_batch *batch = &ice->batches[q->batch_idx];
 661    const struct gen_device_info *devinfo = &batch->screen->devinfo;
 662    struct iris_resource *res = (void *) p_res;
 663    struct iris_bo *query_bo = iris_resource_bo(q->query_state_ref.res);
 664    struct iris_bo *dst_bo = iris_resource_bo(p_res);
 665    unsigned snapshots_landed_offset =
 666       offsetof(struct iris_query_snapshots, snapshots_landed);
 667
 668    res->bind_history |= PIPE_BIND_QUERY_BUFFER;
 669
 670    if (index == -1) {
 671       /* They're asking for the availability of the result.  If we still
 672        * have commands queued up which produce the result, submit them
 673        * now so that progress happens.  Either way, copy the snapshots
 674        * landed field to the destination resource.
 675        */
 676       if (q->syncpt == iris_batch_get_signal_syncpt(batch))
 677          iris_batch_flush(batch);
 678
 679       ice->vtbl.copy_mem_mem(batch, dst_bo, offset,
 680                              query_bo, snapshots_landed_offset,
 681                              result_type <= PIPE_QUERY_TYPE_U32 ? 4 : 8);
 682       return;
 683    }
 684
 685    if (!q->ready && READ_ONCE(q->map->snapshots_landed)) {
 686       /* The final snapshots happen to have landed, so let's just compute
 687        * the result on the CPU now...
 688        */
 689       calculate_result_on_cpu(devinfo, q);
 690    }
 691
 692    if (q->ready) {
 693       /* We happen to have the result on the CPU, so just copy it. */
 694       if (result_type <= PIPE_QUERY_TYPE_U32) {
 695          ice->vtbl.store_data_imm32(batch, dst_bo, offset, q->result);
 696       } else {
 697          ice->vtbl.store_data_imm64(batch, dst_bo, offset, q->result);
 698       }
 699
 700       /* Make sure the result lands before they use bind the QBO elsewhere
 701        * and use the result.
 702        */
 703       // XXX: Why?  i965 doesn't do this.
 704       iris_emit_pipe_control_flush(batch,
 705                                    "query: unknown QBO flushing hack",
 706                                    PIPE_CONTROL_CS_STALL);
 707       return;
 708    }
 709
 710    bool predicated = !wait && !q->stalled;
 711
 712    struct gen_mi_builder b;
 713    gen_mi_builder_init(&b, batch);
 714
 715    struct gen_mi_value result = calculate_result_on_gpu(devinfo, &b, q);
 716    struct gen_mi_value dst =
 717       result_type <= PIPE_QUERY_TYPE_U32 ? gen_mi_mem32(rw_bo(dst_bo, offset))
 718                                          : gen_mi_mem64(rw_bo(dst_bo, offset));
 719
 720    if (predicated) {
 721       gen_mi_store(&b, gen_mi_reg32(MI_PREDICATE_RESULT),
 722                    gen_mi_mem64(ro_bo(query_bo, snapshots_landed_offset)));
 723       gen_mi_store_if(&b, dst, result);
 724    } else {
 725       gen_mi_store(&b, dst, result);
 726    }
 727 }
 728
 729 static void
 730 iris_set_active_query_state(struct pipe_context *ctx, bool enable)
 731 {
 732    struct iris_context *ice = (void *) ctx;
 733
 734    if (ice->state.statistics_counters_enabled == enable)
 735       return;
 736
 737    // XXX: most packets aren't paying attention to this yet, because it'd
 738    // have to be done dynamically at draw time, which is a pain
 739    ice->state.statistics_counters_enabled = enable;
 740    ice->state.dirty |= IRIS_DIRTY_CLIP |
 741                        IRIS_DIRTY_GS |
 742                        IRIS_DIRTY_RASTER |
 743                        IRIS_DIRTY_STREAMOUT |
 744                        IRIS_DIRTY_TCS |
 745                        IRIS_DIRTY_TES |
 746                        IRIS_DIRTY_VS |
 747                        IRIS_DIRTY_WM;
 748 }
 749
 750 static void
 751 set_predicate_enable(struct iris_context *ice, bool value)
 752 {
 753    if (value)
 754       ice->state.predicate = IRIS_PREDICATE_STATE_RENDER;
 755    else
 756       ice->state.predicate = IRIS_PREDICATE_STATE_DONT_RENDER;
 757 }
 758
 759 static void
 760 set_predicate_for_result(struct iris_context *ice,
 761                          struct iris_query *q,
 762                          bool inverted)
 763 {
 764    struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
 765    struct iris_bo *bo = iris_resource_bo(q->query_state_ref.res);
 766
 767    /* The CPU doesn't have the query result yet; use hardware predication */
 768    ice->state.predicate = IRIS_PREDICATE_STATE_USE_BIT;
 769
 770    /* Ensure the memory is coherent for MI_LOAD_REGISTER_* commands. */
 771    iris_emit_pipe_control_flush(batch,
 772                                 "conditional rendering: set predicate",
 773                                 PIPE_CONTROL_FLUSH_ENABLE);
 774    q->stalled = true;
 775
 776    struct gen_mi_builder b;
 777    gen_mi_builder_init(&b, batch);
 778
 779    struct gen_mi_value result;
 780
 781    switch (q->type) {
 782    case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
 783       result = calc_overflow_for_stream(&b, q, q->index);
 784       break;
 785    case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
 786       result = calc_overflow_any_stream(&b, q);
 787       break;
 788    default: {
 789       /* PIPE_QUERY_OCCLUSION_* */
 790       struct gen_mi_value start =
 791          query_mem64(q, offsetof(struct iris_query_snapshots, start));
 792       struct gen_mi_value end =
 793          query_mem64(q, offsetof(struct iris_query_snapshots, end));
 794       result = gen_mi_isub(&b, end, start);
 795       break;
 796    }
 797    }
 798
 799    result = inverted ? gen_mi_z(&b, result) : gen_mi_nz(&b, result);
 800    result = gen_mi_iand(&b, result, gen_mi_imm(1));
 801
 802    /* We immediately set the predicate on the render batch, as all the
 803     * counters come from 3D operations.  However, we may need to predicate
 804     * a compute dispatch, which executes in a different GEM context and has
 805     * a different MI_PREDICATE_RESULT register.  So, we save the result to
 806     * memory and reload it in iris_launch_grid.
 807     */
 808    gen_mi_value_ref(&b, result);
 809    gen_mi_store(&b, gen_mi_reg32(MI_PREDICATE_RESULT), result);
 810    gen_mi_store(&b, query_mem64(q, offsetof(struct iris_query_snapshots,
 811                                             predicate_result)), result);
 812    ice->state.compute_predicate = bo;
 813 }
 814
 815 static void
 816 iris_render_condition(struct pipe_context *ctx,
 817                       struct pipe_query *query,
 818                       bool condition,
 819                       enum pipe_render_cond_flag mode)
 820 {
 821    struct iris_context *ice = (void *) ctx;
 822    struct iris_query *q = (void *) query;
 823
 824    /* The old condition isn't relevant; we'll update it if necessary */
 825    ice->state.compute_predicate = NULL;
 826    ice->condition.query = q;
 827    ice->condition.condition = condition;
 828
 829    if (!q) {
 830       ice->state.predicate = IRIS_PREDICATE_STATE_RENDER;
 831       return;
 832    }
 833
 834    iris_check_query_no_flush(ice, q);
 835
 836    if (q->result || q->ready) {
 837       set_predicate_enable(ice, (q->result != 0) ^ condition);
 838    } else {
 839       if (mode == PIPE_RENDER_COND_NO_WAIT ||
 840           mode == PIPE_RENDER_COND_BY_REGION_NO_WAIT) {
 841          perf_debug(&ice->dbg, "Conditional rendering demoted from "
 842                     "\"no wait\" to \"wait\".");
 843       }
 844       set_predicate_for_result(ice, q, condition);
 845    }
 846 }
 847
 848 static void
 849 iris_resolve_conditional_render(struct iris_context *ice)
 850 {
 851    struct pipe_context *ctx = (void *) ice;
 852    struct iris_query *q = ice->condition.query;
 853    struct pipe_query *query = (void *) q;
 854    union pipe_query_result result;
 855
 856    if (ice->state.predicate != IRIS_PREDICATE_STATE_USE_BIT)
 857       return;
 858
 859    assert(q);
 860
 861    iris_get_query_result(ctx, query, true, &result);
 862    set_predicate_enable(ice, (q->result != 0) ^ ice->condition.condition);
 863 }
 864
 865 void
 866 genX(init_query)(struct iris_context *ice)
 867 {
 868    struct pipe_context *ctx = &ice->ctx;
 869
 870    ctx->create_query = iris_create_query;
 871    ctx->create_batch_query = iris_create_batch_query;
 872    ctx->destroy_query = iris_destroy_query;
 873    ctx->begin_query = iris_begin_query;
 874    ctx->end_query = iris_end_query;
 875    ctx->get_query_result = iris_get_query_result;
 876    ctx->get_query_result_resource = iris_get_query_result_resource;
 877    ctx->set_active_query_state = iris_set_active_query_state;
 878    ctx->render_condition = iris_render_condition;
 879
 880    ice->vtbl.resolve_conditional_render = iris_resolve_conditional_render;
 881 }