src/gallium/drivers/iris/iris_query.c

   1 /*
   2  * Copyright © 2017 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice shall be included
  12  * in all copies or substantial portions of the Software.
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  15  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  19  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  20  * DEALINGS IN THE SOFTWARE.
  21  */
  22
  23 /**
  24  * @file iris_query.c
  25  *
  26  * ============================= GENXML CODE =============================
  27  *              [This file is compiled once per generation.]
  28  * =======================================================================
  29  *
  30  * Query object support.  This allows measuring various simple statistics
  31  * via counters on the GPU.  We use GenX code for MI_MATH calculations.
  32  */
  33
  34 #include <stdio.h>
  35 #include <errno.h>
  36 #include "perf/gen_perf.h"
  37 #include "pipe/p_defines.h"
  38 #include "pipe/p_state.h"
  39 #include "pipe/p_context.h"
  40 #include "pipe/p_screen.h"
  41 #include "util/u_inlines.h"
  42 #include "util/u_upload_mgr.h"
  43 #include "iris_context.h"
  44 #include "iris_defines.h"
  45 #include "iris_fence.h"
  46 #include "iris_monitor.h"
  47 #include "iris_resource.h"
  48 #include "iris_screen.h"
  49
  50 #include "iris_genx_macros.h"
  51
  52 #define SO_PRIM_STORAGE_NEEDED(n) (GENX(SO_PRIM_STORAGE_NEEDED0_num) + (n) * 8)
  53 #define SO_NUM_PRIMS_WRITTEN(n)   (GENX(SO_NUM_PRIMS_WRITTEN0_num) + (n) * 8)
  54
  55 struct iris_query {
  56    enum pipe_query_type type;
  57    int index;
  58
  59    bool ready;
  60
  61    bool stalled;
  62
  63    uint64_t result;
  64
  65    struct iris_state_ref query_state_ref;
  66    struct iris_query_snapshots *map;
  67    struct iris_syncpt *syncpt;
  68
  69    int batch_idx;
  70
  71    struct iris_monitor_object *monitor;
  72 };
  73
  74 struct iris_query_snapshots {
  75    /** iris_render_condition's saved MI_PREDICATE_RESULT value. */
  76    uint64_t predicate_result;
  77
  78    /** Have the start/end snapshots landed? */
  79    uint64_t snapshots_landed;
  80
  81    /** Starting and ending counter snapshots */
  82    uint64_t start;
  83    uint64_t end;
  84 };
  85
  86 struct iris_query_so_overflow {
  87    uint64_t predicate_result;
  88    uint64_t snapshots_landed;
  89
  90    struct {
  91       uint64_t prim_storage_needed[2];
  92       uint64_t num_prims[2];
  93    } stream[4];
  94 };
  95
  96 static struct gen_mi_value
  97 query_mem64(struct iris_query *q, uint32_t offset)
  98 {
  99    struct iris_address addr = {
 100       .bo = iris_resource_bo(q->query_state_ref.res),
 101       .offset = q->query_state_ref.offset + offset,
 102       .write = true
 103    };
 104    return gen_mi_mem64(addr);
 105 }
 106
 107 /**
 108  * Is this type of query written by PIPE_CONTROL?
 109  */
 110 static bool
 111 iris_is_query_pipelined(struct iris_query *q)
 112 {
 113    switch (q->type) {
 114    case PIPE_QUERY_OCCLUSION_COUNTER:
 115    case PIPE_QUERY_OCCLUSION_PREDICATE:
 116    case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
 117    case PIPE_QUERY_TIMESTAMP:
 118    case PIPE_QUERY_TIMESTAMP_DISJOINT:
 119    case PIPE_QUERY_TIME_ELAPSED:
 120       return true;
 121
 122    default:
 123       return false;
 124    }
 125 }
 126
 127 static void
 128 mark_available(struct iris_context *ice, struct iris_query *q)
 129 {
 130    struct iris_batch *batch = &ice->batches[q->batch_idx];
 131    unsigned flags = PIPE_CONTROL_WRITE_IMMEDIATE;
 132    unsigned offset = offsetof(struct iris_query_snapshots, snapshots_landed);
 133    struct iris_bo *bo = iris_resource_bo(q->query_state_ref.res);
 134    offset += q->query_state_ref.offset;
 135
 136    if (!iris_is_query_pipelined(q)) {
 137       ice->vtbl.store_data_imm64(batch, bo, offset, true);
 138    } else {
 139       /* Order available *after* the query results. */
 140       flags |= PIPE_CONTROL_FLUSH_ENABLE;
 141       iris_emit_pipe_control_write(batch, "query: mark available",
 142                                    flags, bo, offset, true);
 143    }
 144 }
 145
 146 /**
 147  * Write PS_DEPTH_COUNT to q->(dest) via a PIPE_CONTROL.
 148  */
 149 static void
 150 iris_pipelined_write(struct iris_batch *batch,
 151                      struct iris_query *q,
 152                      enum pipe_control_flags flags,
 153                      unsigned offset)
 154 {
 155    const struct gen_device_info *devinfo = &batch->screen->devinfo;
 156    const unsigned optional_cs_stall =
 157       GEN_GEN == 9 && devinfo->gt == 4 ?  PIPE_CONTROL_CS_STALL : 0;
 158    struct iris_bo *bo = iris_resource_bo(q->query_state_ref.res);
 159
 160    iris_emit_pipe_control_write(batch, "query: pipelined snapshot write",
 161                                 flags | optional_cs_stall,
 162                                 bo, offset, 0ull);
 163 }
 164
 165 static void
 166 write_value(struct iris_context *ice, struct iris_query *q, unsigned offset)
 167 {
 168    struct iris_batch *batch = &ice->batches[q->batch_idx];
 169    struct iris_bo *bo = iris_resource_bo(q->query_state_ref.res);
 170
 171    if (!iris_is_query_pipelined(q)) {
 172       iris_emit_pipe_control_flush(batch,
 173                                    "query: non-pipelined snapshot write",
 174                                    PIPE_CONTROL_CS_STALL |
 175                                    PIPE_CONTROL_STALL_AT_SCOREBOARD);
 176       q->stalled = true;
 177    }
 178
 179    switch (q->type) {
 180    case PIPE_QUERY_OCCLUSION_COUNTER:
 181    case PIPE_QUERY_OCCLUSION_PREDICATE:
 182    case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
 183       if (GEN_GEN >= 10) {
 184          /* "Driver must program PIPE_CONTROL with only Depth Stall Enable
 185           *  bit set prior to programming a PIPE_CONTROL with Write PS Depth
 186           *  Count sync operation."
 187           */
 188          iris_emit_pipe_control_flush(batch,
 189                                       "workaround: depth stall before writing "
 190                                       "PS_DEPTH_COUNT",
 191                                       PIPE_CONTROL_DEPTH_STALL);
 192       }
 193       iris_pipelined_write(&ice->batches[IRIS_BATCH_RENDER], q,
 194                            PIPE_CONTROL_WRITE_DEPTH_COUNT |
 195                            PIPE_CONTROL_DEPTH_STALL,
 196                            offset);
 197       break;
 198    case PIPE_QUERY_TIME_ELAPSED:
 199    case PIPE_QUERY_TIMESTAMP:
 200    case PIPE_QUERY_TIMESTAMP_DISJOINT:
 201       iris_pipelined_write(&ice->batches[IRIS_BATCH_RENDER], q,
 202                            PIPE_CONTROL_WRITE_TIMESTAMP,
 203                            offset);
 204       break;
 205    case PIPE_QUERY_PRIMITIVES_GENERATED:
 206       ice->vtbl.store_register_mem64(batch,
 207                                      q->index == 0 ?
 208                                      GENX(CL_INVOCATION_COUNT_num) :
 209                                      SO_PRIM_STORAGE_NEEDED(q->index),
 210                                      bo, offset, false);
 211       break;
 212    case PIPE_QUERY_PRIMITIVES_EMITTED:
 213       ice->vtbl.store_register_mem64(batch,
 214                                      SO_NUM_PRIMS_WRITTEN(q->index),
 215                                      bo, offset, false);
 216       break;
 217    case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE: {
 218       static const uint32_t index_to_reg[] = {
 219          GENX(IA_VERTICES_COUNT_num),
 220          GENX(IA_PRIMITIVES_COUNT_num),
 221          GENX(VS_INVOCATION_COUNT_num),
 222          GENX(GS_INVOCATION_COUNT_num),
 223          GENX(GS_PRIMITIVES_COUNT_num),
 224          GENX(CL_INVOCATION_COUNT_num),
 225          GENX(CL_PRIMITIVES_COUNT_num),
 226          GENX(PS_INVOCATION_COUNT_num),
 227          GENX(HS_INVOCATION_COUNT_num),
 228          GENX(DS_INVOCATION_COUNT_num),
 229          GENX(CS_INVOCATION_COUNT_num),
 230       };
 231       const uint32_t reg = index_to_reg[q->index];
 232
 233       ice->vtbl.store_register_mem64(batch, reg, bo, offset, false);
 234       break;
 235    }
 236    default:
 237       assert(false);
 238    }
 239 }
 240
 241 static void
 242 write_overflow_values(struct iris_context *ice, struct iris_query *q, bool end)
 243 {
 244    struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
 245    uint32_t count = q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ? 1 : 4;
 246    struct iris_bo *bo = iris_resource_bo(q->query_state_ref.res);
 247    uint32_t offset = q->query_state_ref.offset;
 248
 249    iris_emit_pipe_control_flush(batch,
 250                                 "query: write SO overflow snapshots",
 251                                 PIPE_CONTROL_CS_STALL |
 252                                 PIPE_CONTROL_STALL_AT_SCOREBOARD);
 253    for (uint32_t i = 0; i < count; i++) {
 254       int s = q->index + i;
 255       int g_idx = offset + offsetof(struct iris_query_so_overflow,
 256                            stream[s].num_prims[end]);
 257       int w_idx = offset + offsetof(struct iris_query_so_overflow,
 258                            stream[s].prim_storage_needed[end]);
 259       ice->vtbl.store_register_mem64(batch, SO_NUM_PRIMS_WRITTEN(s),
 260                                      bo, g_idx, false);
 261       ice->vtbl.store_register_mem64(batch, SO_PRIM_STORAGE_NEEDED(s),
 262                                      bo, w_idx, false);
 263    }
 264 }
 265
 266 static uint64_t
 267 iris_raw_timestamp_delta(uint64_t time0, uint64_t time1)
 268 {
 269    if (time0 > time1) {
 270       return (1ULL << TIMESTAMP_BITS) + time1 - time0;
 271    } else {
 272       return time1 - time0;
 273    }
 274 }
 275
 276 static bool
 277 stream_overflowed(struct iris_query_so_overflow *so, int s)
 278 {
 279    return (so->stream[s].prim_storage_needed[1] -
 280            so->stream[s].prim_storage_needed[0]) !=
 281           (so->stream[s].num_prims[1] - so->stream[s].num_prims[0]);
 282 }
 283
 284 static void
 285 calculate_result_on_cpu(const struct gen_device_info *devinfo,
 286                         struct iris_query *q)
 287 {
 288    switch (q->type) {
 289    case PIPE_QUERY_OCCLUSION_PREDICATE:
 290    case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
 291       q->result = q->map->end != q->map->start;
 292       break;
 293    case PIPE_QUERY_TIMESTAMP:
 294    case PIPE_QUERY_TIMESTAMP_DISJOINT:
 295       /* The timestamp is the single starting snapshot. */
 296       q->result = gen_device_info_timebase_scale(devinfo, q->map->start);
 297       q->result &= (1ull << TIMESTAMP_BITS) - 1;
 298       break;
 299    case PIPE_QUERY_TIME_ELAPSED:
 300       q->result = iris_raw_timestamp_delta(q->map->start, q->map->end);
 301       q->result = gen_device_info_timebase_scale(devinfo, q->result);
 302       q->result &= (1ull << TIMESTAMP_BITS) - 1;
 303       break;
 304    case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
 305       q->result = stream_overflowed((void *) q->map, q->index);
 306       break;
 307    case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
 308       q->result = false;
 309       for (int i = 0; i < MAX_VERTEX_STREAMS; i++)
 310          q->result |= stream_overflowed((void *) q->map, i);
 311       break;
 312    case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE:
 313       q->result = q->map->end - q->map->start;
 314
 315       /* WaDividePSInvocationCountBy4:HSW,BDW */
 316       if (GEN_GEN == 8 && q->index == PIPE_STAT_QUERY_PS_INVOCATIONS)
 317          q->result /= 4;
 318       break;
 319    case PIPE_QUERY_OCCLUSION_COUNTER:
 320    case PIPE_QUERY_PRIMITIVES_GENERATED:
 321    case PIPE_QUERY_PRIMITIVES_EMITTED:
 322    default:
 323       q->result = q->map->end - q->map->start;
 324       break;
 325    }
 326
 327    q->ready = true;
 328 }
 329
 330 /**
 331  * Calculate the streamout overflow for stream \p idx:
 332  *
 333  * (num_prims[1] - num_prims[0]) - (storage_needed[1] - storage_needed[0])
 334  */
 335 static struct gen_mi_value
 336 calc_overflow_for_stream(struct gen_mi_builder *b,
 337                          struct iris_query *q,
 338                          int idx)
 339 {
 340 #define C(counter, i) query_mem64(q, \
 341    offsetof(struct iris_query_so_overflow, stream[idx].counter[i]))
 342
 343    return gen_mi_isub(b, gen_mi_isub(b, C(num_prims, 1), C(num_prims, 0)),
 344                          gen_mi_isub(b, C(prim_storage_needed, 1),
 345                                         C(prim_storage_needed, 0)));
 346 #undef C
 347 }
 348
 349 /**
 350  * Calculate whether any stream has overflowed.
 351  */
 352 static struct gen_mi_value
 353 calc_overflow_any_stream(struct gen_mi_builder *b, struct iris_query *q)
 354 {
 355    struct gen_mi_value stream_result[MAX_VERTEX_STREAMS];
 356    for (int i = 0; i < MAX_VERTEX_STREAMS; i++)
 357       stream_result[i] = calc_overflow_for_stream(b, q, i);
 358
 359    struct gen_mi_value result = stream_result[0];
 360    for (int i = 1; i < MAX_VERTEX_STREAMS; i++)
 361       result = gen_mi_ior(b, result, stream_result[i]);
 362
 363    return result;
 364 }
 365
 366 static bool
 367 query_is_boolean(enum pipe_query_type type)
 368 {
 369    switch (type) {
 370    case PIPE_QUERY_OCCLUSION_PREDICATE:
 371    case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
 372    case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
 373    case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
 374       return true;
 375    default:
 376       return false;
 377    }
 378 }
 379
 380 /**
 381  * Calculate the result using MI_MATH.
 382  */
 383 static struct gen_mi_value
 384 calculate_result_on_gpu(const struct gen_device_info *devinfo,
 385                         struct gen_mi_builder *b,
 386                         struct iris_query *q)
 387 {
 388    struct gen_mi_value result;
 389    struct gen_mi_value start_val =
 390       query_mem64(q, offsetof(struct iris_query_snapshots, start));
 391    struct gen_mi_value end_val =
 392       query_mem64(q, offsetof(struct iris_query_snapshots, end));
 393
 394    switch (q->type) {
 395    case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
 396       result = calc_overflow_for_stream(b, q, q->index);
 397       break;
 398    case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
 399       result = calc_overflow_any_stream(b, q);
 400       break;
 401    case PIPE_QUERY_TIMESTAMP: {
 402       /* TODO: This discards any fractional bits of the timebase scale.
 403        * We would need to do a bit of fixed point math on the CS ALU, or
 404        * launch an actual shader to calculate this with full precision.
 405        */
 406       uint32_t scale = 1000000000ull / devinfo->timestamp_frequency;
 407       result = gen_mi_iand(b, gen_mi_imm((1ull << 36) - 1),
 408                            gen_mi_imul_imm(b, start_val, scale));
 409       break;
 410    }
 411    case PIPE_QUERY_TIME_ELAPSED: {
 412       /* TODO: This discards fractional bits (see above). */
 413       uint32_t scale = 1000000000ull / devinfo->timestamp_frequency;
 414       result = gen_mi_imul_imm(b, gen_mi_isub(b, end_val, start_val), scale);
 415       break;
 416    }
 417    default:
 418       result = gen_mi_isub(b, end_val, start_val);
 419       break;
 420    }
 421
 422    /* WaDividePSInvocationCountBy4:HSW,BDW */
 423    if (GEN_GEN == 8 &&
 424        q->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE &&
 425        q->index == PIPE_STAT_QUERY_PS_INVOCATIONS)
 426       result = gen_mi_ushr32_imm(b, result, 2);
 427
 428    if (query_is_boolean(q->type))
 429       result = gen_mi_iand(b, gen_mi_nz(b, result), gen_mi_imm(1));
 430
 431    return result;
 432 }
 433
 434 static struct pipe_query *
 435 iris_create_query(struct pipe_context *ctx,
 436                   unsigned query_type,
 437                   unsigned index)
 438 {
 439    struct iris_query *q = calloc(1, sizeof(struct iris_query));
 440
 441    q->type = query_type;
 442    q->index = index;
 443    q->monitor = NULL;
 444
 445    if (q->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE &&
 446        q->index == PIPE_STAT_QUERY_CS_INVOCATIONS)
 447       q->batch_idx = IRIS_BATCH_COMPUTE;
 448    else
 449       q->batch_idx = IRIS_BATCH_RENDER;
 450    return (struct pipe_query *) q;
 451 }
 452
 453 static struct pipe_query *
 454 iris_create_batch_query(struct pipe_context *ctx,
 455                         unsigned num_queries,
 456                         unsigned *query_types)
 457 {
 458    struct iris_context *ice = (void *) ctx;
 459    struct iris_query *q = calloc(1, sizeof(struct iris_query));
 460    if (unlikely(!q))
 461       return NULL;
 462    q->type = PIPE_QUERY_DRIVER_SPECIFIC;
 463    q->index = -1;
 464    q->monitor = iris_create_monitor_object(ice, num_queries, query_types);
 465    if (unlikely(!q->monitor)) {
 466       free(q);
 467       return NULL;
 468    }
 469
 470    return (struct pipe_query *) q;
 471 }
 472
 473 static void
 474 iris_destroy_query(struct pipe_context *ctx, struct pipe_query *p_query)
 475 {
 476    struct iris_query *query = (void *) p_query;
 477    struct iris_screen *screen = (void *) ctx->screen;
 478    if (query->monitor) {
 479       iris_destroy_monitor_object(ctx, query->monitor);
 480       query->monitor = NULL;
 481    } else {
 482       iris_syncpt_reference(screen, &query->syncpt, NULL);
 483    }
 484    free(query);
 485 }
 486
 487
 488 static bool
 489 iris_begin_query(struct pipe_context *ctx, struct pipe_query *query)
 490 {
 491    struct iris_context *ice = (void *) ctx;
 492    struct iris_query *q = (void *) query;
 493
 494    if (q->monitor)
 495       return iris_begin_monitor(ctx, q->monitor);
 496
 497    void *ptr = NULL;
 498    uint32_t size;
 499
 500    if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ||
 501        q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)
 502       size = sizeof(struct iris_query_so_overflow);
 503    else
 504       size = sizeof(struct iris_query_snapshots);
 505
 506    u_upload_alloc(ice->query_buffer_uploader, 0,
 507                   size, size, &q->query_state_ref.offset,
 508                   &q->query_state_ref.res, &ptr);
 509
 510    if (!iris_resource_bo(q->query_state_ref.res))
 511       return false;
 512
 513    q->map = ptr;
 514    if (!q->map)
 515       return false;
 516
 517    q->result = 0ull;
 518    q->ready = false;
 519    WRITE_ONCE(q->map->snapshots_landed, false);
 520
 521    if (q->type == PIPE_QUERY_PRIMITIVES_GENERATED && q->index == 0) {
 522       ice->state.prims_generated_query_active = true;
 523       ice->state.dirty |= IRIS_DIRTY_STREAMOUT | IRIS_DIRTY_CLIP;
 524    }
 525
 526    if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ||
 527        q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)
 528       write_overflow_values(ice, q, false);
 529    else
 530       write_value(ice, q,
 531                   q->query_state_ref.offset +
 532                   offsetof(struct iris_query_snapshots, start));
 533
 534    return true;
 535 }
 536
 537 static bool
 538 iris_end_query(struct pipe_context *ctx, struct pipe_query *query)
 539 {
 540    struct iris_context *ice = (void *) ctx;
 541    struct iris_query *q = (void *) query;
 542
 543    if (q->monitor)
 544       return iris_end_monitor(ctx, q->monitor);
 545
 546    struct iris_batch *batch = &ice->batches[q->batch_idx];
 547
 548    if (q->type == PIPE_QUERY_TIMESTAMP) {
 549       iris_begin_query(ctx, query);
 550       iris_batch_reference_signal_syncpt(batch, &q->syncpt);
 551       mark_available(ice, q);
 552       return true;
 553    }
 554
 555    if (q->type == PIPE_QUERY_PRIMITIVES_GENERATED && q->index == 0) {
 556       ice->state.prims_generated_query_active = false;
 557       ice->state.dirty |= IRIS_DIRTY_STREAMOUT | IRIS_DIRTY_CLIP;
 558    }
 559
 560    if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ||
 561        q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)
 562       write_overflow_values(ice, q, true);
 563    else
 564       write_value(ice, q,
 565                   q->query_state_ref.offset +
 566                   offsetof(struct iris_query_snapshots, end));
 567
 568    iris_batch_reference_signal_syncpt(batch, &q->syncpt);
 569    mark_available(ice, q);
 570
 571    return true;
 572 }
 573
 574 /**
 575  * See if the snapshots have landed for a query, and if so, compute the
 576  * result and mark it ready.  Does not flush (unlike iris_get_query_result).
 577  */
 578 static void
 579 iris_check_query_no_flush(struct iris_context *ice, struct iris_query *q)
 580 {
 581    struct iris_screen *screen = (void *) ice->ctx.screen;
 582    const struct gen_device_info *devinfo = &screen->devinfo;
 583
 584    if (!q->ready && READ_ONCE(q->map->snapshots_landed)) {
 585       calculate_result_on_cpu(devinfo, q);
 586    }
 587 }
 588
 589 static bool
 590 iris_get_query_result(struct pipe_context *ctx,
 591                       struct pipe_query *query,
 592                       bool wait,
 593                       union pipe_query_result *result)
 594 {
 595    struct iris_context *ice = (void *) ctx;
 596    struct iris_query *q = (void *) query;
 597
 598    if (q->monitor)
 599       return iris_get_monitor_result(ctx, q->monitor, wait, result->batch);
 600
 601    struct iris_screen *screen = (void *) ctx->screen;
 602    const struct gen_device_info *devinfo = &screen->devinfo;
 603
 604    if (unlikely(screen->no_hw)) {
 605       result->u64 = 0;
 606       return true;
 607    }
 608
 609    if (!q->ready) {
 610       struct iris_batch *batch = &ice->batches[q->batch_idx];
 611       if (q->syncpt == iris_batch_get_signal_syncpt(batch))
 612          iris_batch_flush(batch);
 613
 614       while (!READ_ONCE(q->map->snapshots_landed)) {
 615          if (wait)
 616             iris_wait_syncpt(ctx->screen, q->syncpt, INT64_MAX);
 617          else
 618             return false;
 619       }
 620
 621       assert(READ_ONCE(q->map->snapshots_landed));
 622       calculate_result_on_cpu(devinfo, q);
 623    }
 624
 625    assert(q->ready);
 626
 627    result->u64 = q->result;
 628
 629    return true;
 630 }
 631
 632 static void
 633 iris_get_query_result_resource(struct pipe_context *ctx,
 634                                struct pipe_query *query,
 635                                bool wait,
 636                                enum pipe_query_value_type result_type,
 637                                int index,
 638                                struct pipe_resource *p_res,
 639                                unsigned offset)
 640 {
 641    struct iris_context *ice = (void *) ctx;
 642    struct iris_query *q = (void *) query;
 643    struct iris_batch *batch = &ice->batches[q->batch_idx];
 644    const struct gen_device_info *devinfo = &batch->screen->devinfo;
 645    struct iris_resource *res = (void *) p_res;
 646    struct iris_bo *query_bo = iris_resource_bo(q->query_state_ref.res);
 647    struct iris_bo *dst_bo = iris_resource_bo(p_res);
 648    unsigned snapshots_landed_offset =
 649       offsetof(struct iris_query_snapshots, snapshots_landed);
 650
 651    res->bind_history |= PIPE_BIND_QUERY_BUFFER;
 652
 653    if (index == -1) {
 654       /* They're asking for the availability of the result.  If we still
 655        * have commands queued up which produce the result, submit them
 656        * now so that progress happens.  Either way, copy the snapshots
 657        * landed field to the destination resource.
 658        */
 659       if (q->syncpt == iris_batch_get_signal_syncpt(batch))
 660          iris_batch_flush(batch);
 661
 662       ice->vtbl.copy_mem_mem(batch, dst_bo, offset,
 663                              query_bo, snapshots_landed_offset,
 664                              result_type <= PIPE_QUERY_TYPE_U32 ? 4 : 8);
 665       return;
 666    }
 667
 668    if (!q->ready && READ_ONCE(q->map->snapshots_landed)) {
 669       /* The final snapshots happen to have landed, so let's just compute
 670        * the result on the CPU now...
 671        */
 672       calculate_result_on_cpu(devinfo, q);
 673    }
 674
 675    if (q->ready) {
 676       /* We happen to have the result on the CPU, so just copy it. */
 677       if (result_type <= PIPE_QUERY_TYPE_U32) {
 678          ice->vtbl.store_data_imm32(batch, dst_bo, offset, q->result);
 679       } else {
 680          ice->vtbl.store_data_imm64(batch, dst_bo, offset, q->result);
 681       }
 682
 683       /* Make sure the result lands before they use bind the QBO elsewhere
 684        * and use the result.
 685        */
 686       // XXX: Why?  i965 doesn't do this.
 687       iris_emit_pipe_control_flush(batch,
 688                                    "query: unknown QBO flushing hack",
 689                                    PIPE_CONTROL_CS_STALL);
 690       return;
 691    }
 692
 693    bool predicated = !wait && !q->stalled;
 694
 695    struct gen_mi_builder b;
 696    gen_mi_builder_init(&b, batch);
 697
 698    struct gen_mi_value result = calculate_result_on_gpu(devinfo, &b, q);
 699    struct gen_mi_value dst =
 700       result_type <= PIPE_QUERY_TYPE_U32 ? gen_mi_mem32(rw_bo(dst_bo, offset))
 701                                          : gen_mi_mem64(rw_bo(dst_bo, offset));
 702
 703    if (predicated) {
 704       gen_mi_store(&b, gen_mi_reg32(MI_PREDICATE_RESULT),
 705                    gen_mi_mem64(ro_bo(query_bo, snapshots_landed_offset)));
 706       gen_mi_store_if(&b, dst, result);
 707    } else {
 708       gen_mi_store(&b, dst, result);
 709    }
 710 }
 711
 712 static void
 713 iris_set_active_query_state(struct pipe_context *ctx, bool enable)
 714 {
 715    struct iris_context *ice = (void *) ctx;
 716
 717    if (ice->state.statistics_counters_enabled == enable)
 718       return;
 719
 720    // XXX: most packets aren't paying attention to this yet, because it'd
 721    // have to be done dynamically at draw time, which is a pain
 722    ice->state.statistics_counters_enabled = enable;
 723    ice->state.dirty |= IRIS_DIRTY_CLIP |
 724                        IRIS_DIRTY_GS |
 725                        IRIS_DIRTY_RASTER |
 726                        IRIS_DIRTY_STREAMOUT |
 727                        IRIS_DIRTY_TCS |
 728                        IRIS_DIRTY_TES |
 729                        IRIS_DIRTY_VS |
 730                        IRIS_DIRTY_WM;
 731 }
 732
 733 static void
 734 set_predicate_enable(struct iris_context *ice, bool value)
 735 {
 736    if (value)
 737       ice->state.predicate = IRIS_PREDICATE_STATE_RENDER;
 738    else
 739       ice->state.predicate = IRIS_PREDICATE_STATE_DONT_RENDER;
 740 }
 741
 742 static void
 743 set_predicate_for_result(struct iris_context *ice,
 744                          struct iris_query *q,
 745                          bool inverted)
 746 {
 747    struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
 748    struct iris_bo *bo = iris_resource_bo(q->query_state_ref.res);
 749
 750    /* The CPU doesn't have the query result yet; use hardware predication */
 751    ice->state.predicate = IRIS_PREDICATE_STATE_USE_BIT;
 752
 753    /* Ensure the memory is coherent for MI_LOAD_REGISTER_* commands. */
 754    iris_emit_pipe_control_flush(batch,
 755                                 "conditional rendering: set predicate",
 756                                 PIPE_CONTROL_FLUSH_ENABLE);
 757    q->stalled = true;
 758
 759    struct gen_mi_builder b;
 760    gen_mi_builder_init(&b, batch);
 761
 762    struct gen_mi_value result;
 763
 764    switch (q->type) {
 765    case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
 766       result = calc_overflow_for_stream(&b, q, q->index);
 767       break;
 768    case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
 769       result = calc_overflow_any_stream(&b, q);
 770       break;
 771    default: {
 772       /* PIPE_QUERY_OCCLUSION_* */
 773       struct gen_mi_value start =
 774          query_mem64(q, offsetof(struct iris_query_snapshots, start));
 775       struct gen_mi_value end =
 776          query_mem64(q, offsetof(struct iris_query_snapshots, end));
 777       result = gen_mi_isub(&b, end, start);
 778       break;
 779    }
 780    }
 781
 782    result = inverted ? gen_mi_z(&b, result) : gen_mi_nz(&b, result);
 783    result = gen_mi_iand(&b, result, gen_mi_imm(1));
 784
 785    /* We immediately set the predicate on the render batch, as all the
 786     * counters come from 3D operations.  However, we may need to predicate
 787     * a compute dispatch, which executes in a different GEM context and has
 788     * a different MI_PREDICATE_RESULT register.  So, we save the result to
 789     * memory and reload it in iris_launch_grid.
 790     */
 791    gen_mi_value_ref(&b, result);
 792    gen_mi_store(&b, gen_mi_reg32(MI_PREDICATE_RESULT), result);
 793    gen_mi_store(&b, query_mem64(q, offsetof(struct iris_query_snapshots,
 794                                             predicate_result)), result);
 795    ice->state.compute_predicate = bo;
 796 }
 797
 798 static void
 799 iris_render_condition(struct pipe_context *ctx,
 800                       struct pipe_query *query,
 801                       bool condition,
 802                       enum pipe_render_cond_flag mode)
 803 {
 804    struct iris_context *ice = (void *) ctx;
 805    struct iris_query *q = (void *) query;
 806
 807    /* The old condition isn't relevant; we'll update it if necessary */
 808    ice->state.compute_predicate = NULL;
 809    ice->condition.query = q;
 810    ice->condition.condition = condition;
 811
 812    if (!q) {
 813       ice->state.predicate = IRIS_PREDICATE_STATE_RENDER;
 814       return;
 815    }
 816
 817    iris_check_query_no_flush(ice, q);
 818
 819    if (q->result || q->ready) {
 820       set_predicate_enable(ice, (q->result != 0) ^ condition);
 821    } else {
 822       if (mode == PIPE_RENDER_COND_NO_WAIT ||
 823           mode == PIPE_RENDER_COND_BY_REGION_NO_WAIT) {
 824          perf_debug(&ice->dbg, "Conditional rendering demoted from "
 825                     "\"no wait\" to \"wait\".");
 826       }
 827       set_predicate_for_result(ice, q, condition);
 828    }
 829 }
 830
 831 static void
 832 iris_resolve_conditional_render(struct iris_context *ice)
 833 {
 834    struct pipe_context *ctx = (void *) ice;
 835    struct iris_query *q = ice->condition.query;
 836    struct pipe_query *query = (void *) q;
 837    union pipe_query_result result;
 838
 839    if (ice->state.predicate != IRIS_PREDICATE_STATE_USE_BIT)
 840       return;
 841
 842    assert(q);
 843
 844    iris_get_query_result(ctx, query, true, &result);
 845    set_predicate_enable(ice, (q->result != 0) ^ ice->condition.condition);
 846 }
 847
 848 void
 849 genX(init_query)(struct iris_context *ice)
 850 {
 851    struct pipe_context *ctx = &ice->ctx;
 852
 853    ctx->create_query = iris_create_query;
 854    ctx->create_batch_query = iris_create_batch_query;
 855    ctx->destroy_query = iris_destroy_query;
 856    ctx->begin_query = iris_begin_query;
 857    ctx->end_query = iris_end_query;
 858    ctx->get_query_result = iris_get_query_result;
 859    ctx->get_query_result_resource = iris_get_query_result_resource;
 860    ctx->set_active_query_state = iris_set_active_query_state;
 861    ctx->render_condition = iris_render_condition;
 862
 863    ice->vtbl.resolve_conditional_render = iris_resolve_conditional_render;
 864 }