src/gallium/drivers/iris/iris_query.c

   1 /*
   2  * Copyright © 2017 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice shall be included
  12  * in all copies or substantial portions of the Software.
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  15  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  19  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  20  * DEALINGS IN THE SOFTWARE.
  21  */
  22
  23 /**
  24  * @file iris_query.c
  25  *
  26  * Query object support.  This allows measuring various simple statistics
  27  * via counters on the GPU.
  28  */
  29
  30 #include <stdio.h>
  31 #include <errno.h>
  32 #include "pipe/p_defines.h"
  33 #include "pipe/p_state.h"
  34 #include "pipe/p_context.h"
  35 #include "pipe/p_screen.h"
  36 #include "util/fast_idiv_by_const.h"
  37 #include "util/u_inlines.h"
  38 #include "util/u_upload_mgr.h"
  39 #include "iris_context.h"
  40 #include "iris_defines.h"
  41 #include "iris_fence.h"
  42 #include "iris_resource.h"
  43 #include "iris_screen.h"
  44 #include "vulkan/util/vk_util.h"
  45
  46 #define IA_VERTICES_COUNT          0x2310
  47 #define IA_PRIMITIVES_COUNT        0x2318
  48 #define VS_INVOCATION_COUNT        0x2320
  49 #define HS_INVOCATION_COUNT        0x2300
  50 #define DS_INVOCATION_COUNT        0x2308
  51 #define GS_INVOCATION_COUNT        0x2328
  52 #define GS_PRIMITIVES_COUNT        0x2330
  53 #define CL_INVOCATION_COUNT        0x2338
  54 #define CL_PRIMITIVES_COUNT        0x2340
  55 #define PS_INVOCATION_COUNT        0x2348
  56 #define CS_INVOCATION_COUNT        0x2290
  57 #define PS_DEPTH_COUNT             0x2350
  58
  59 #define SO_PRIM_STORAGE_NEEDED(n)  (0x5240 + (n) * 8)
  60
  61 #define SO_NUM_PRIMS_WRITTEN(n)    (0x5200 + (n) * 8)
  62
  63 #define MI_MATH (0x1a << 23)
  64
  65 #define MI_ALU_LOAD      0x080
  66 #define MI_ALU_LOADINV   0x480
  67 #define MI_ALU_LOAD0     0x081
  68 #define MI_ALU_LOAD1     0x481
  69 #define MI_ALU_ADD       0x100
  70 #define MI_ALU_SUB       0x101
  71 #define MI_ALU_AND       0x102
  72 #define MI_ALU_OR        0x103
  73 #define MI_ALU_XOR       0x104
  74 #define MI_ALU_STORE     0x180
  75 #define MI_ALU_STOREINV  0x580
  76
  77 #define MI_ALU_R0        0x00
  78 #define MI_ALU_R1        0x01
  79 #define MI_ALU_R2        0x02
  80 #define MI_ALU_R3        0x03
  81 #define MI_ALU_R4        0x04
  82 #define MI_ALU_SRCA      0x20
  83 #define MI_ALU_SRCB      0x21
  84 #define MI_ALU_ACCU      0x31
  85 #define MI_ALU_ZF        0x32
  86 #define MI_ALU_CF        0x33
  87
  88 #define _MI_ALU(op, x, y)  (((op) << 20) | ((x) << 10) | (y))
  89
  90 #define _MI_ALU0(op)       _MI_ALU(MI_ALU_##op, 0, 0)
  91 #define _MI_ALU1(op, x)    _MI_ALU(MI_ALU_##op, x, 0)
  92 #define _MI_ALU2(op, x, y) _MI_ALU(MI_ALU_##op, x, y)
  93
  94 #define MI_ALU0(op)        _MI_ALU0(op)
  95 #define MI_ALU1(op, x)     _MI_ALU1(op, MI_ALU_##x)
  96 #define MI_ALU2(op, x, y)  _MI_ALU2(op, MI_ALU_##x, MI_ALU_##y)
  97
  98 #define emit_lri32 ice->vtbl.load_register_imm32
  99 #define emit_lri64 ice->vtbl.load_register_imm64
 100 #define emit_lrr32 ice->vtbl.load_register_reg32
 101
 102 struct iris_query {
 103    enum pipe_query_type type;
 104    int index;
 105
 106    bool ready;
 107
 108    bool stalled;
 109
 110    uint64_t result;
 111
 112    struct iris_state_ref query_state_ref;
 113    struct iris_query_snapshots *map;
 114    struct iris_syncpt *syncpt;
 115
 116    int batch_idx;
 117 };
 118
 119 struct iris_query_snapshots {
 120    /** iris_render_condition's saved MI_PREDICATE_RESULT value. */
 121    uint64_t predicate_result;
 122
 123    /** Have the start/end snapshots landed? */
 124    uint64_t snapshots_landed;
 125
 126    /** Starting and ending counter snapshots */
 127    uint64_t start;
 128    uint64_t end;
 129 };
 130
 131 struct iris_query_so_overflow {
 132    uint64_t predicate_result;
 133    uint64_t snapshots_landed;
 134
 135    struct {
 136       uint64_t prim_storage_needed[2];
 137       uint64_t num_prims[2];
 138    } stream[4];
 139 };
 140
 141 /**
 142  * Is this type of query written by PIPE_CONTROL?
 143  */
 144 static bool
 145 iris_is_query_pipelined(struct iris_query *q)
 146 {
 147    switch (q->type) {
 148    case PIPE_QUERY_OCCLUSION_COUNTER:
 149    case PIPE_QUERY_OCCLUSION_PREDICATE:
 150    case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
 151    case PIPE_QUERY_TIMESTAMP:
 152    case PIPE_QUERY_TIMESTAMP_DISJOINT:
 153    case PIPE_QUERY_TIME_ELAPSED:
 154       return true;
 155
 156    default:
 157       return false;
 158    }
 159 }
 160
 161 static void
 162 mark_available(struct iris_context *ice, struct iris_query *q)
 163 {
 164    struct iris_batch *batch = &ice->batches[q->batch_idx];
 165    unsigned flags = PIPE_CONTROL_WRITE_IMMEDIATE;
 166    unsigned offset = offsetof(struct iris_query_snapshots, snapshots_landed);
 167    struct iris_bo *bo = iris_resource_bo(q->query_state_ref.res);
 168    offset += q->query_state_ref.offset;
 169
 170    if (!iris_is_query_pipelined(q)) {
 171       ice->vtbl.store_data_imm64(batch, bo, offset, true);
 172    } else {
 173       /* Order available *after* the query results. */
 174       flags |= PIPE_CONTROL_FLUSH_ENABLE;
 175       iris_emit_pipe_control_write(batch, flags, bo, offset, true);
 176    }
 177 }
 178
 179 /**
 180  * Write PS_DEPTH_COUNT to q->(dest) via a PIPE_CONTROL.
 181  */
 182 static void
 183 iris_pipelined_write(struct iris_batch *batch,
 184                      struct iris_query *q,
 185                      enum pipe_control_flags flags,
 186                      unsigned offset)
 187 {
 188    const struct gen_device_info *devinfo = &batch->screen->devinfo;
 189    const unsigned optional_cs_stall =
 190       devinfo->gen == 9 && devinfo->gt == 4 ?  PIPE_CONTROL_CS_STALL : 0;
 191    struct iris_bo *bo = iris_resource_bo(q->query_state_ref.res);
 192
 193    iris_emit_pipe_control_write(batch, flags | optional_cs_stall,
 194                                 bo, offset, 0ull);
 195 }
 196
 197 static void
 198 write_value(struct iris_context *ice, struct iris_query *q, unsigned offset)
 199 {
 200    struct iris_batch *batch = &ice->batches[q->batch_idx];
 201    const struct gen_device_info *devinfo = &batch->screen->devinfo;
 202    struct iris_bo *bo = iris_resource_bo(q->query_state_ref.res);
 203
 204    if (!iris_is_query_pipelined(q)) {
 205       iris_emit_pipe_control_flush(batch,
 206                                    PIPE_CONTROL_CS_STALL |
 207                                    PIPE_CONTROL_STALL_AT_SCOREBOARD);
 208       q->stalled = true;
 209    }
 210
 211    switch (q->type) {
 212    case PIPE_QUERY_OCCLUSION_COUNTER:
 213    case PIPE_QUERY_OCCLUSION_PREDICATE:
 214    case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
 215       if (devinfo->gen >= 10) {
 216          /* "Driver must program PIPE_CONTROL with only Depth Stall Enable
 217           *  bit set prior to programming a PIPE_CONTROL with Write PS Depth
 218           *  Count sync operation."
 219           */
 220          iris_emit_pipe_control_flush(batch, PIPE_CONTROL_DEPTH_STALL);
 221       }
 222       iris_pipelined_write(&ice->batches[IRIS_BATCH_RENDER], q,
 223                            PIPE_CONTROL_WRITE_DEPTH_COUNT |
 224                            PIPE_CONTROL_DEPTH_STALL,
 225                            offset);
 226       break;
 227    case PIPE_QUERY_TIME_ELAPSED:
 228    case PIPE_QUERY_TIMESTAMP:
 229    case PIPE_QUERY_TIMESTAMP_DISJOINT:
 230       iris_pipelined_write(&ice->batches[IRIS_BATCH_RENDER], q,
 231                            PIPE_CONTROL_WRITE_TIMESTAMP,
 232                            offset);
 233       break;
 234    case PIPE_QUERY_PRIMITIVES_GENERATED:
 235       ice->vtbl.store_register_mem64(batch,
 236                                      q->index == 0 ? CL_INVOCATION_COUNT :
 237                                      SO_PRIM_STORAGE_NEEDED(q->index),
 238                                      bo, offset, false);
 239       break;
 240    case PIPE_QUERY_PRIMITIVES_EMITTED:
 241       ice->vtbl.store_register_mem64(batch,
 242                                      SO_NUM_PRIMS_WRITTEN(q->index),
 243                                      bo, offset, false);
 244       break;
 245    case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE: {
 246       static const uint32_t index_to_reg[] = {
 247          IA_VERTICES_COUNT,
 248          IA_PRIMITIVES_COUNT,
 249          VS_INVOCATION_COUNT,
 250          GS_INVOCATION_COUNT,
 251          GS_PRIMITIVES_COUNT,
 252          CL_INVOCATION_COUNT,
 253          CL_PRIMITIVES_COUNT,
 254          PS_INVOCATION_COUNT,
 255          HS_INVOCATION_COUNT,
 256          DS_INVOCATION_COUNT,
 257          CS_INVOCATION_COUNT,
 258       };
 259       const uint32_t reg = index_to_reg[q->index];
 260
 261       ice->vtbl.store_register_mem64(batch, reg, bo, offset, false);
 262       break;
 263    }
 264    default:
 265       assert(false);
 266    }
 267 }
 268
 269 static void
 270 write_overflow_values(struct iris_context *ice, struct iris_query *q, bool end)
 271 {
 272    struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
 273    uint32_t count = q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ? 1 : 4;
 274    struct iris_bo *bo = iris_resource_bo(q->query_state_ref.res);
 275    uint32_t offset = q->query_state_ref.offset;
 276
 277    iris_emit_pipe_control_flush(batch,
 278                                 PIPE_CONTROL_CS_STALL |
 279                                 PIPE_CONTROL_STALL_AT_SCOREBOARD);
 280    for (uint32_t i = 0; i < count; i++) {
 281       int s = q->index + i;
 282       int g_idx = offset + offsetof(struct iris_query_so_overflow,
 283                            stream[s].num_prims[end]);
 284       int w_idx = offset + offsetof(struct iris_query_so_overflow,
 285                            stream[s].prim_storage_needed[end]);
 286       ice->vtbl.store_register_mem64(batch, SO_NUM_PRIMS_WRITTEN(s),
 287                                      bo, g_idx, false);
 288       ice->vtbl.store_register_mem64(batch, SO_PRIM_STORAGE_NEEDED(s),
 289                                      bo, w_idx, false);
 290    }
 291 }
 292
 293 uint64_t
 294 iris_timebase_scale(const struct gen_device_info *devinfo,
 295                     uint64_t gpu_timestamp)
 296 {
 297    return (1000000000ull * gpu_timestamp) / devinfo->timestamp_frequency;
 298 }
 299
 300 static uint64_t
 301 iris_raw_timestamp_delta(uint64_t time0, uint64_t time1)
 302 {
 303    if (time0 > time1) {
 304       return (1ULL << TIMESTAMP_BITS) + time1 - time0;
 305    } else {
 306       return time1 - time0;
 307    }
 308 }
 309
 310 static bool
 311 stream_overflowed(struct iris_query_so_overflow *so, int s)
 312 {
 313    return (so->stream[s].prim_storage_needed[1] -
 314            so->stream[s].prim_storage_needed[0]) !=
 315           (so->stream[s].num_prims[1] - so->stream[s].num_prims[0]);
 316 }
 317
 318 static void
 319 calculate_result_on_cpu(const struct gen_device_info *devinfo,
 320                         struct iris_query *q)
 321 {
 322    switch (q->type) {
 323    case PIPE_QUERY_OCCLUSION_PREDICATE:
 324    case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
 325       q->result = q->map->end != q->map->start;
 326       break;
 327    case PIPE_QUERY_TIMESTAMP:
 328    case PIPE_QUERY_TIMESTAMP_DISJOINT:
 329       /* The timestamp is the single starting snapshot. */
 330       q->result = iris_timebase_scale(devinfo, q->map->start);
 331       q->result &= (1ull << TIMESTAMP_BITS) - 1;
 332       break;
 333    case PIPE_QUERY_TIME_ELAPSED:
 334       q->result = iris_raw_timestamp_delta(q->map->start, q->map->end);
 335       q->result = iris_timebase_scale(devinfo, q->result);
 336       q->result &= (1ull << TIMESTAMP_BITS) - 1;
 337       break;
 338    case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
 339       q->result = stream_overflowed((void *) q->map, q->index);
 340       break;
 341    case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
 342       q->result = false;
 343       for (int i = 0; i < MAX_VERTEX_STREAMS; i++)
 344          q->result |= stream_overflowed((void *) q->map, i);
 345       break;
 346    case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE:
 347       q->result = q->map->end - q->map->start;
 348
 349       /* WaDividePSInvocationCountBy4:HSW,BDW */
 350       if (devinfo->gen == 8 && q->index == PIPE_STAT_QUERY_PS_INVOCATIONS)
 351          q->result /= 4;
 352       break;
 353    case PIPE_QUERY_OCCLUSION_COUNTER:
 354    case PIPE_QUERY_PRIMITIVES_GENERATED:
 355    case PIPE_QUERY_PRIMITIVES_EMITTED:
 356    default:
 357       q->result = q->map->end - q->map->start;
 358       break;
 359    }
 360
 361    q->ready = true;
 362 }
 363
 364 static void
 365 emit_alu_add(struct iris_batch *batch, unsigned dst_reg,
 366              unsigned reg_a, unsigned reg_b)
 367 {
 368    uint32_t *math = iris_get_command_space(batch, 5 * sizeof(uint32_t));
 369
 370    math[0] = MI_MATH | (5 - 2);
 371    math[1] = _MI_ALU2(LOAD, MI_ALU_SRCA, reg_a);
 372    math[2] = _MI_ALU2(LOAD, MI_ALU_SRCB, reg_b);
 373    math[3] = _MI_ALU0(ADD);
 374    math[4] = _MI_ALU2(STORE, dst_reg, MI_ALU_ACCU);
 375 }
 376
 377 static void
 378 emit_alu_shl(struct iris_batch *batch, unsigned dst_reg,
 379              unsigned src_reg, unsigned shift)
 380 {
 381    assert(shift > 0);
 382
 383    int dwords = 1 + 4 * shift;
 384
 385    uint32_t *math = iris_get_command_space(batch, sizeof(uint32_t) * dwords);
 386
 387    math[0] = MI_MATH | ((1 + 4 * shift) - 2);
 388
 389    for (unsigned i = 0; i < shift; i++) {
 390       unsigned add_src = (i == 0) ? src_reg : dst_reg;
 391       math[1 + (i * 4) + 0] = _MI_ALU2(LOAD, MI_ALU_SRCA, add_src);
 392       math[1 + (i * 4) + 1] = _MI_ALU2(LOAD, MI_ALU_SRCB, add_src);
 393       math[1 + (i * 4) + 2] = _MI_ALU0(ADD);
 394       math[1 + (i * 4) + 3] = _MI_ALU2(STORE, dst_reg, MI_ALU_ACCU);
 395    }
 396 }
 397
 398 /* Emit dwords to multiply GPR0 by N */
 399 static void
 400 build_alu_multiply_gpr0(uint32_t *dw, unsigned *dw_count, uint32_t N)
 401 {
 402    VK_OUTARRAY_MAKE(out, dw, dw_count);
 403
 404 #define APPEND_ALU(op, x, y) \
 405    vk_outarray_append(&out, alu_dw) *alu_dw = _MI_ALU(MI_ALU_##op, x, y)
 406
 407    assert(N > 0);
 408    unsigned top_bit = 31 - __builtin_clz(N);
 409    for (int i = top_bit - 1; i >= 0; i--) {
 410       /* We get our initial data in GPR0 and we write the final data out to
 411        * GPR0 but we use GPR1 as our scratch register.
 412        */
 413       unsigned src_reg = i == top_bit - 1 ? MI_ALU_R0 : MI_ALU_R1;
 414       unsigned dst_reg = i == 0 ? MI_ALU_R0 : MI_ALU_R1;
 415
 416       /* Shift the current value left by 1 */
 417       APPEND_ALU(LOAD, MI_ALU_SRCA, src_reg);
 418       APPEND_ALU(LOAD, MI_ALU_SRCB, src_reg);
 419       APPEND_ALU(ADD, 0, 0);
 420
 421       if (N & (1 << i)) {
 422          /* Store ACCU to R1 and add R0 to R1 */
 423          APPEND_ALU(STORE, MI_ALU_R1, MI_ALU_ACCU);
 424          APPEND_ALU(LOAD, MI_ALU_SRCA, MI_ALU_R0);
 425          APPEND_ALU(LOAD, MI_ALU_SRCB, MI_ALU_R1);
 426          APPEND_ALU(ADD, 0, 0);
 427       }
 428
 429       APPEND_ALU(STORE, dst_reg, MI_ALU_ACCU);
 430    }
 431
 432 #undef APPEND_ALU
 433 }
 434
 435 static void
 436 emit_mul_gpr0(struct iris_batch *batch, uint32_t N)
 437 {
 438    uint32_t num_dwords;
 439    build_alu_multiply_gpr0(NULL, &num_dwords, N);
 440
 441    uint32_t *math = iris_get_command_space(batch, 4 * num_dwords);
 442    math[0] = MI_MATH | (num_dwords - 2);
 443    build_alu_multiply_gpr0(&math[1], &num_dwords, N);
 444 }
 445
 446 void
 447 iris_math_div32_gpr0(struct iris_context *ice,
 448                      struct iris_batch *batch,
 449                      uint32_t D)
 450 {
 451    /* Zero out the top of GPR0 */
 452    emit_lri32(batch, CS_GPR(0) + 4, 0);
 453
 454    if (D == 0) {
 455       /* This invalid, but we should do something so we set GPR0 to 0. */
 456       emit_lri32(batch, CS_GPR(0), 0);
 457    } else if (util_is_power_of_two_or_zero(D)) {
 458       unsigned log2_D = util_logbase2(D);
 459       assert(log2_D < 32);
 460       /* We right-shift by log2(D) by left-shifting by 32 - log2(D) and taking
 461        * the top 32 bits of the result.
 462        */
 463       emit_alu_shl(batch, MI_ALU_R0, MI_ALU_R0, 32 - log2_D);
 464       emit_lrr32(batch, CS_GPR(0) + 0, CS_GPR(0) + 4);
 465       emit_lri32(batch, CS_GPR(0) + 4, 0);
 466    } else {
 467       struct util_fast_udiv_info m = util_compute_fast_udiv_info(D, 32, 32);
 468       assert(m.multiplier <= UINT32_MAX);
 469
 470       if (m.pre_shift) {
 471          /* We right-shift by L by left-shifting by 32 - l and taking the top
 472           * 32 bits of the result.
 473           */
 474          if (m.pre_shift < 32)
 475             emit_alu_shl(batch, MI_ALU_R0, MI_ALU_R0, 32 - m.pre_shift);
 476          emit_lrr32(batch, CS_GPR(0) + 0, CS_GPR(0) + 4);
 477          emit_lri32(batch, CS_GPR(0) + 4, 0);
 478       }
 479
 480       /* Do the 32x32 multiply into gpr0 */
 481       emit_mul_gpr0(batch, m.multiplier);
 482
 483       if (m.increment) {
 484          /* If we need to increment, save off a copy of GPR0 */
 485          emit_lri32(batch, CS_GPR(1) + 0, m.multiplier);
 486          emit_lri32(batch, CS_GPR(1) + 4, 0);
 487          emit_alu_add(batch, MI_ALU_R0, MI_ALU_R0, MI_ALU_R1);
 488       }
 489
 490       /* Shift by 32 */
 491       emit_lrr32(batch, CS_GPR(0) + 0, CS_GPR(0) + 4);
 492       emit_lri32(batch, CS_GPR(0) + 4, 0);
 493
 494       if (m.post_shift) {
 495          /* We right-shift by L by left-shifting by 32 - l and taking the top
 496           * 32 bits of the result.
 497           */
 498          if (m.post_shift < 32)
 499             emit_alu_shl(batch, MI_ALU_R0, MI_ALU_R0, 32 - m.post_shift);
 500          emit_lrr32(batch, CS_GPR(0) + 0, CS_GPR(0) + 4);
 501          emit_lri32(batch, CS_GPR(0) + 4, 0);
 502       }
 503    }
 504 }
 505
 506 void
 507 iris_math_add32_gpr0(struct iris_context *ice,
 508                      struct iris_batch *batch,
 509                      uint32_t x)
 510 {
 511    emit_lri32(batch, CS_GPR(1), x);
 512    emit_alu_add(batch, MI_ALU_R0, MI_ALU_R0, MI_ALU_R1);
 513 }
 514
 515 /*
 516  * GPR0 = (GPR0 == 0) ? 0 : 1;
 517  */
 518 static void
 519 gpr0_to_bool(struct iris_context *ice)
 520 {
 521    struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
 522
 523    ice->vtbl.load_register_imm64(batch, CS_GPR(1), 1ull);
 524
 525    static const uint32_t math[] = {
 526       MI_MATH | (9 - 2),
 527       MI_ALU2(LOAD, SRCA, R0),
 528       MI_ALU1(LOAD0, SRCB),
 529       MI_ALU0(ADD),
 530       MI_ALU2(STOREINV, R0, ZF),
 531       MI_ALU2(LOAD, SRCA, R0),
 532       MI_ALU2(LOAD, SRCB, R1),
 533       MI_ALU0(AND),
 534       MI_ALU2(STORE, R0, ACCU),
 535    };
 536    iris_batch_emit(batch, math, sizeof(math));
 537 }
 538
 539 static void
 540 load_overflow_data_to_cs_gprs(struct iris_context *ice,
 541                               struct iris_query *q,
 542                               int idx)
 543 {
 544    struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
 545    struct iris_bo *bo = iris_resource_bo(q->query_state_ref.res);
 546    uint32_t offset = q->query_state_ref.offset;
 547
 548    ice->vtbl.load_register_mem64(batch, CS_GPR(1), bo, offset +
 549                                  offsetof(struct iris_query_so_overflow,
 550                                           stream[idx].prim_storage_needed[0]));
 551    ice->vtbl.load_register_mem64(batch, CS_GPR(2), bo, offset +
 552                                  offsetof(struct iris_query_so_overflow,
 553                                           stream[idx].prim_storage_needed[1]));
 554
 555    ice->vtbl.load_register_mem64(batch, CS_GPR(3), bo, offset +
 556                                  offsetof(struct iris_query_so_overflow,
 557                                           stream[idx].num_prims[0]));
 558    ice->vtbl.load_register_mem64(batch, CS_GPR(4), bo, offset +
 559                                  offsetof(struct iris_query_so_overflow,
 560                                           stream[idx].num_prims[1]));
 561 }
 562
 563 /*
 564  * R3 = R4 - R3;
 565  * R1 = R2 - R1;
 566  * R1 = R3 - R1;
 567  * R0 = R0 | R1;
 568  */
 569 static void
 570 calc_overflow_for_stream(struct iris_context *ice)
 571 {
 572    struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
 573    static const uint32_t maths[] = {
 574       MI_MATH | (17 - 2),
 575       MI_ALU2(LOAD, SRCA, R4),
 576       MI_ALU2(LOAD, SRCB, R3),
 577       MI_ALU0(SUB),
 578       MI_ALU2(STORE, R3, ACCU),
 579       MI_ALU2(LOAD, SRCA, R2),
 580       MI_ALU2(LOAD, SRCB, R1),
 581       MI_ALU0(SUB),
 582       MI_ALU2(STORE, R1, ACCU),
 583       MI_ALU2(LOAD, SRCA, R3),
 584       MI_ALU2(LOAD, SRCB, R1),
 585       MI_ALU0(SUB),
 586       MI_ALU2(STORE, R1, ACCU),
 587       MI_ALU2(LOAD, SRCA, R1),
 588       MI_ALU2(LOAD, SRCB, R0),
 589       MI_ALU0(OR),
 590       MI_ALU2(STORE, R0, ACCU),
 591    };
 592
 593    iris_batch_emit(batch, maths, sizeof(maths));
 594 }
 595
 596 static void
 597 overflow_result_to_gpr0(struct iris_context *ice, struct iris_query *q)
 598 {
 599    struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
 600
 601    ice->vtbl.load_register_imm64(batch, CS_GPR(0), 0ull);
 602
 603    if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE) {
 604       load_overflow_data_to_cs_gprs(ice, q, q->index);
 605       calc_overflow_for_stream(ice);
 606    } else {
 607       for (int i = 0; i < MAX_VERTEX_STREAMS; i++) {
 608          load_overflow_data_to_cs_gprs(ice, q, i);
 609          calc_overflow_for_stream(ice);
 610       }
 611    }
 612
 613    gpr0_to_bool(ice);
 614 }
 615
 616 /*
 617  * GPR0 = GPR0 & ((1ull << n) -1);
 618  */
 619 static void
 620 keep_gpr0_lower_n_bits(struct iris_context *ice, uint32_t n)
 621 {
 622    struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
 623
 624    ice->vtbl.load_register_imm64(batch, CS_GPR(1), (1ull << n) - 1);
 625    static const uint32_t math[] = {
 626       MI_MATH | (5 - 2),
 627       MI_ALU2(LOAD, SRCA, R0),
 628       MI_ALU2(LOAD, SRCB, R1),
 629       MI_ALU0(AND),
 630       MI_ALU2(STORE, R0, ACCU),
 631    };
 632    iris_batch_emit(batch, math, sizeof(math));
 633 }
 634
 635 /*
 636  * GPR0 = GPR0 << 30;
 637  */
 638 static void
 639 shl_gpr0_by_30_bits(struct iris_context *ice)
 640 {
 641    struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
 642    /* First we mask 34 bits of GPR0 to prevent overflow */
 643    keep_gpr0_lower_n_bits(ice, 34);
 644
 645    static const uint32_t shl_math[] = {
 646       MI_ALU2(LOAD, SRCA, R0),
 647       MI_ALU2(LOAD, SRCB, R0),
 648       MI_ALU0(ADD),
 649       MI_ALU2(STORE, R0, ACCU),
 650    };
 651
 652    const uint32_t outer_count = 5;
 653    const uint32_t inner_count = 6;
 654    const uint32_t cmd_len = 1 + inner_count * ARRAY_SIZE(shl_math);
 655    const uint32_t batch_len = cmd_len * outer_count;
 656    uint32_t *map = iris_get_command_space(batch, batch_len * 4);
 657    uint32_t offset = 0;
 658    for (int o = 0; o < outer_count; o++) {
 659       map[offset++] = MI_MATH | (cmd_len - 2);
 660       for (int i = 0; i < inner_count; i++) {
 661          memcpy(&map[offset], shl_math, sizeof(shl_math));
 662          offset += 4;
 663       }
 664    }
 665 }
 666
 667 /*
 668  * GPR0 = GPR0 >> 2;
 669  *
 670  * Note that the upper 30 bits of GPR0 are lost!
 671  */
 672 static void
 673 shr_gpr0_by_2_bits(struct iris_context *ice)
 674 {
 675    struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
 676    shl_gpr0_by_30_bits(ice);
 677    ice->vtbl.load_register_reg32(batch, CS_GPR(0) + 4, CS_GPR(0));
 678    ice->vtbl.load_register_imm32(batch, CS_GPR(0) + 4, 0);
 679 }
 680
 681 /**
 682  * Calculate the result and store it to CS_GPR0.
 683  */
 684 static void
 685 calculate_result_on_gpu(struct iris_context *ice, struct iris_query *q)
 686 {
 687    struct iris_batch *batch = &ice->batches[q->batch_idx];
 688    struct iris_screen *screen = (void *) ice->ctx.screen;
 689    const struct gen_device_info *devinfo = &batch->screen->devinfo;
 690    struct iris_bo *bo = iris_resource_bo(q->query_state_ref.res);
 691    uint32_t offset = q->query_state_ref.offset;
 692
 693    if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ||
 694        q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) {
 695       overflow_result_to_gpr0(ice, q);
 696       return;
 697    }
 698
 699    if (q->type == PIPE_QUERY_TIMESTAMP) {
 700       ice->vtbl.load_register_mem64(batch, CS_GPR(0), bo,
 701                                     offset +
 702                                     offsetof(struct iris_query_snapshots, start));
 703       /* TODO: This discards any fractional bits of the timebase scale.
 704        * We would need to do a bit of fixed point math on the CS ALU, or
 705        * launch an actual shader to calculate this with full precision.
 706        */
 707       emit_mul_gpr0(batch, (1000000000ull / screen->devinfo.timestamp_frequency));
 708       keep_gpr0_lower_n_bits(ice, 36);
 709       return;
 710    }
 711
 712    ice->vtbl.load_register_mem64(batch, CS_GPR(1), bo,
 713                                  offset +
 714                                  offsetof(struct iris_query_snapshots, start));
 715    ice->vtbl.load_register_mem64(batch, CS_GPR(2), bo,
 716                                  offset +
 717                                  offsetof(struct iris_query_snapshots, end));
 718
 719    static const uint32_t math[] = {
 720       MI_MATH | (5 - 2),
 721       MI_ALU2(LOAD, SRCA, R2),
 722       MI_ALU2(LOAD, SRCB, R1),
 723       MI_ALU0(SUB),
 724       MI_ALU2(STORE, R0, ACCU),
 725    };
 726    iris_batch_emit(batch, math, sizeof(math));
 727
 728    /* WaDividePSInvocationCountBy4:HSW,BDW */
 729    if (devinfo->gen == 8 &&
 730        q->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE &&
 731        q->index == PIPE_STAT_QUERY_PS_INVOCATIONS)
 732       shr_gpr0_by_2_bits(ice);
 733
 734    if (q->type == PIPE_QUERY_OCCLUSION_PREDICATE ||
 735        q->type == PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE)
 736       gpr0_to_bool(ice);
 737
 738    if (q->type == PIPE_QUERY_TIME_ELAPSED) {
 739       /* TODO: This discards fractional bits (see above). */
 740       emit_mul_gpr0(batch, (1000000000ull / screen->devinfo.timestamp_frequency));
 741    }
 742 }
 743
 744 static struct pipe_query *
 745 iris_create_query(struct pipe_context *ctx,
 746                   unsigned query_type,
 747                   unsigned index)
 748 {
 749    struct iris_query *q = calloc(1, sizeof(struct iris_query));
 750
 751    q->type = query_type;
 752    q->index = index;
 753
 754    if (q->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE &&
 755        q->index == PIPE_STAT_QUERY_CS_INVOCATIONS)
 756       q->batch_idx = IRIS_BATCH_COMPUTE;
 757    else
 758       q->batch_idx = IRIS_BATCH_RENDER;
 759    return (struct pipe_query *) q;
 760 }
 761
 762 static void
 763 iris_destroy_query(struct pipe_context *ctx, struct pipe_query *p_query)
 764 {
 765    struct iris_query *query = (void *) p_query;
 766    struct iris_screen *screen = (void *) ctx->screen;
 767    iris_syncpt_reference(screen, &query->syncpt, NULL);
 768    free(query);
 769 }
 770
 771
 772 static boolean
 773 iris_begin_query(struct pipe_context *ctx, struct pipe_query *query)
 774 {
 775    struct iris_context *ice = (void *) ctx;
 776    struct iris_query *q = (void *) query;
 777    void *ptr = NULL;
 778    uint32_t size;
 779
 780    if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ||
 781        q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)
 782       size = sizeof(struct iris_query_so_overflow);
 783    else
 784       size = sizeof(struct iris_query_snapshots);
 785
 786    u_upload_alloc(ice->query_buffer_uploader, 0,
 787                   size, size, &q->query_state_ref.offset,
 788                   &q->query_state_ref.res, &ptr);
 789
 790    if (!iris_resource_bo(q->query_state_ref.res))
 791       return false;
 792
 793    q->map = ptr;
 794    if (!q->map)
 795       return false;
 796
 797    q->result = 0ull;
 798    q->ready = false;
 799    WRITE_ONCE(q->map->snapshots_landed, false);
 800
 801    if (q->type == PIPE_QUERY_PRIMITIVES_GENERATED && q->index == 0) {
 802       ice->state.prims_generated_query_active = true;
 803       ice->state.dirty |= IRIS_DIRTY_STREAMOUT | IRIS_DIRTY_CLIP;
 804    }
 805
 806    if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ||
 807        q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)
 808       write_overflow_values(ice, q, false);
 809    else
 810       write_value(ice, q,
 811                   q->query_state_ref.offset +
 812                   offsetof(struct iris_query_snapshots, start));
 813
 814    return true;
 815 }
 816
 817 static bool
 818 iris_end_query(struct pipe_context *ctx, struct pipe_query *query)
 819 {
 820    struct iris_context *ice = (void *) ctx;
 821    struct iris_query *q = (void *) query;
 822    struct iris_batch *batch = &ice->batches[q->batch_idx];
 823
 824    if (q->type == PIPE_QUERY_TIMESTAMP) {
 825       iris_begin_query(ctx, query);
 826       iris_batch_reference_signal_syncpt(batch, &q->syncpt);
 827       mark_available(ice, q);
 828       return true;
 829    }
 830
 831    if (q->type == PIPE_QUERY_PRIMITIVES_GENERATED && q->index == 0) {
 832       ice->state.prims_generated_query_active = false;
 833       ice->state.dirty |= IRIS_DIRTY_STREAMOUT | IRIS_DIRTY_CLIP;
 834    }
 835
 836    if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ||
 837        q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)
 838       write_overflow_values(ice, q, true);
 839    else
 840       write_value(ice, q,
 841                   q->query_state_ref.offset +
 842                   offsetof(struct iris_query_snapshots, end));
 843
 844    iris_batch_reference_signal_syncpt(batch, &q->syncpt);
 845    mark_available(ice, q);
 846
 847    return true;
 848 }
 849
 850 /**
 851  * See if the snapshots have landed for a query, and if so, compute the
 852  * result and mark it ready.  Does not flush (unlike iris_get_query_result).
 853  */
 854 static void
 855 iris_check_query_no_flush(struct iris_context *ice, struct iris_query *q)
 856 {
 857    struct iris_screen *screen = (void *) ice->ctx.screen;
 858    const struct gen_device_info *devinfo = &screen->devinfo;
 859
 860    if (!q->ready && READ_ONCE(q->map->snapshots_landed)) {
 861       calculate_result_on_cpu(devinfo, q);
 862    }
 863 }
 864
 865 static boolean
 866 iris_get_query_result(struct pipe_context *ctx,
 867                       struct pipe_query *query,
 868                       boolean wait,
 869                       union pipe_query_result *result)
 870 {
 871    struct iris_context *ice = (void *) ctx;
 872    struct iris_query *q = (void *) query;
 873    struct iris_screen *screen = (void *) ctx->screen;
 874    const struct gen_device_info *devinfo = &screen->devinfo;
 875    struct iris_bo *bo = iris_resource_bo(q->query_state_ref.res);
 876
 877    if (!q->ready) {
 878       if (iris_batch_references(&ice->batches[q->batch_idx], bo))
 879          iris_batch_flush(&ice->batches[q->batch_idx]);
 880
 881       while (!READ_ONCE(q->map->snapshots_landed)) {
 882          if (wait)
 883             iris_wait_syncpt(ctx->screen, q->syncpt, INT64_MAX);
 884          else
 885             return false;
 886       }
 887
 888       assert(READ_ONCE(q->map->snapshots_landed));
 889       calculate_result_on_cpu(devinfo, q);
 890    }
 891
 892    assert(q->ready);
 893
 894    result->u64 = q->result;
 895
 896    return true;
 897 }
 898
 899 static void
 900 iris_get_query_result_resource(struct pipe_context *ctx,
 901                                struct pipe_query *query,
 902                                boolean wait,
 903                                enum pipe_query_value_type result_type,
 904                                int index,
 905                                struct pipe_resource *p_res,
 906                                unsigned offset)
 907 {
 908    struct iris_context *ice = (void *) ctx;
 909    struct iris_query *q = (void *) query;
 910    struct iris_batch *batch = &ice->batches[q->batch_idx];
 911    const struct gen_device_info *devinfo = &batch->screen->devinfo;
 912    struct iris_resource *res = (void *) p_res;
 913    struct iris_bo *bo = iris_resource_bo(q->query_state_ref.res);
 914    unsigned snapshots_landed_offset =
 915       offsetof(struct iris_query_snapshots, snapshots_landed);
 916
 917    res->bind_history |= PIPE_BIND_QUERY_BUFFER;
 918
 919    if (index == -1) {
 920       /* They're asking for the availability of the result.  If we still
 921        * have commands queued up which produce the result, submit them
 922        * now so that progress happens.  Either way, copy the snapshots
 923        * landed field to the destination resource.
 924        */
 925       if (iris_batch_references(batch, bo))
 926          iris_batch_flush(batch);
 927
 928       ice->vtbl.copy_mem_mem(batch, iris_resource_bo(p_res), offset,
 929                              bo, snapshots_landed_offset,
 930                              result_type <= PIPE_QUERY_TYPE_U32 ? 4 : 8);
 931       return;
 932    }
 933
 934    if (!q->ready && READ_ONCE(q->map->snapshots_landed)) {
 935       /* The final snapshots happen to have landed, so let's just compute
 936        * the result on the CPU now...
 937        */
 938       calculate_result_on_cpu(devinfo, q);
 939    }
 940
 941    if (q->ready) {
 942       /* We happen to have the result on the CPU, so just copy it. */
 943       if (result_type <= PIPE_QUERY_TYPE_U32) {
 944          ice->vtbl.store_data_imm32(batch, iris_resource_bo(p_res), offset,
 945                                     q->result);
 946       } else {
 947          ice->vtbl.store_data_imm64(batch, iris_resource_bo(p_res), offset,
 948                                     q->result);
 949       }
 950
 951       /* Make sure the result lands before they use bind the QBO elsewhere
 952        * and use the result.
 953        */
 954       // XXX: Why?  i965 doesn't do this.
 955       iris_emit_pipe_control_flush(batch, PIPE_CONTROL_CS_STALL);
 956       return;
 957    }
 958
 959    /* Calculate the result to CS_GPR0 */
 960    calculate_result_on_gpu(ice, q);
 961
 962    bool predicated = !wait && !q->stalled;
 963
 964    if (predicated) {
 965       ice->vtbl.load_register_imm64(batch, MI_PREDICATE_SRC1, 0ull);
 966       ice->vtbl.load_register_mem64(batch, MI_PREDICATE_SRC0, bo,
 967                                     snapshots_landed_offset);
 968       uint32_t predicate = MI_PREDICATE |
 969                            MI_PREDICATE_LOADOP_LOADINV |
 970                            MI_PREDICATE_COMBINEOP_SET |
 971                            MI_PREDICATE_COMPAREOP_SRCS_EQUAL;
 972       iris_batch_emit(batch, &predicate, sizeof(uint32_t));
 973    }
 974
 975    if (result_type <= PIPE_QUERY_TYPE_U32) {
 976       ice->vtbl.store_register_mem32(batch, CS_GPR(0),
 977                                      iris_resource_bo(p_res),
 978                                      offset, predicated);
 979    } else {
 980       ice->vtbl.store_register_mem64(batch, CS_GPR(0),
 981                                      iris_resource_bo(p_res),
 982                                      offset, predicated);
 983    }
 984 }
 985
 986 static void
 987 iris_set_active_query_state(struct pipe_context *ctx, boolean enable)
 988 {
 989    struct iris_context *ice = (void *) ctx;
 990
 991    if (ice->state.statistics_counters_enabled == enable)
 992       return;
 993
 994    // XXX: most packets aren't paying attention to this yet, because it'd
 995    // have to be done dynamically at draw time, which is a pain
 996    ice->state.statistics_counters_enabled = enable;
 997    ice->state.dirty |= IRIS_DIRTY_CLIP |
 998                        IRIS_DIRTY_GS |
 999                        IRIS_DIRTY_RASTER |
1000                        IRIS_DIRTY_STREAMOUT |
1001                        IRIS_DIRTY_TCS |
1002                        IRIS_DIRTY_TES |
1003                        IRIS_DIRTY_VS |
1004                        IRIS_DIRTY_WM;
1005 }
1006
1007 static void
1008 set_predicate_enable(struct iris_context *ice, bool value)
1009 {
1010    if (value)
1011       ice->state.predicate = IRIS_PREDICATE_STATE_RENDER;
1012    else
1013       ice->state.predicate = IRIS_PREDICATE_STATE_DONT_RENDER;
1014 }
1015
1016 static void
1017 set_predicate_for_result(struct iris_context *ice,
1018                          struct iris_query *q,
1019                          bool inverted)
1020 {
1021    struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
1022    struct iris_bo *bo = iris_resource_bo(q->query_state_ref.res);
1023
1024    /* The CPU doesn't have the query result yet; use hardware predication */
1025    ice->state.predicate = IRIS_PREDICATE_STATE_USE_BIT;
1026
1027    /* Ensure the memory is coherent for MI_LOAD_REGISTER_* commands. */
1028    iris_emit_pipe_control_flush(batch, PIPE_CONTROL_FLUSH_ENABLE);
1029    q->stalled = true;
1030
1031    switch (q->type) {
1032    case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
1033    case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
1034       overflow_result_to_gpr0(ice, q);
1035
1036       ice->vtbl.load_register_reg64(batch, MI_PREDICATE_SRC0, CS_GPR(0));
1037       ice->vtbl.load_register_imm64(batch, MI_PREDICATE_SRC1, 0ull);
1038       break;
1039    default:
1040       /* PIPE_QUERY_OCCLUSION_* */
1041       ice->vtbl.load_register_mem64(batch, MI_PREDICATE_SRC0, bo,
1042          offsetof(struct iris_query_snapshots, start) +
1043          q->query_state_ref.offset);
1044       ice->vtbl.load_register_mem64(batch, MI_PREDICATE_SRC1, bo,
1045          offsetof(struct iris_query_snapshots, end) +
1046          q->query_state_ref.offset);
1047       break;
1048    }
1049
1050    uint32_t mi_predicate = MI_PREDICATE |
1051                            MI_PREDICATE_COMBINEOP_SET |
1052                            MI_PREDICATE_COMPAREOP_SRCS_EQUAL |
1053                            (inverted ? MI_PREDICATE_LOADOP_LOAD
1054                                      : MI_PREDICATE_LOADOP_LOADINV);
1055    iris_batch_emit(batch, &mi_predicate, sizeof(uint32_t));
1056
1057    /* We immediately set the predicate on the render batch, as all the
1058     * counters come from 3D operations.  However, we may need to predicate
1059     * a compute dispatch, which executes in a different GEM context and has
1060     * a different MI_PREDICATE_RESULT register.  So, we save the result to
1061     * memory and reload it in iris_launch_grid.
1062     */
1063    unsigned offset = q->query_state_ref.offset +
1064                      offsetof(struct iris_query_snapshots, predicate_result);
1065    ice->vtbl.store_register_mem64(batch, MI_PREDICATE_RESULT,
1066                                   bo, offset, false);
1067    ice->state.compute_predicate = bo;
1068 }
1069
1070 static void
1071 iris_render_condition(struct pipe_context *ctx,
1072                       struct pipe_query *query,
1073                       boolean condition,
1074                       enum pipe_render_cond_flag mode)
1075 {
1076    struct iris_context *ice = (void *) ctx;
1077    struct iris_query *q = (void *) query;
1078
1079    /* The old condition isn't relevant; we'll update it if necessary */
1080    ice->state.compute_predicate = NULL;
1081    ice->condition.query = q;
1082    ice->condition.condition = condition;
1083
1084    if (!q) {
1085       ice->state.predicate = IRIS_PREDICATE_STATE_RENDER;
1086       return;
1087    }
1088
1089    iris_check_query_no_flush(ice, q);
1090
1091    if (q->result || q->ready) {
1092       set_predicate_enable(ice, (q->result != 0) ^ condition);
1093    } else {
1094       if (mode == PIPE_RENDER_COND_NO_WAIT ||
1095           mode == PIPE_RENDER_COND_BY_REGION_NO_WAIT) {
1096          perf_debug(&ice->dbg, "Conditional rendering demoted from "
1097                     "\"no wait\" to \"wait\".");
1098       }
1099       set_predicate_for_result(ice, q, condition);
1100    }
1101 }
1102
1103 void
1104 iris_resolve_conditional_render(struct iris_context *ice)
1105 {
1106    struct pipe_context *ctx = (void *) ice;
1107    struct iris_query *q = ice->condition.query;
1108    struct pipe_query *query = (void *) q;
1109    union pipe_query_result result;
1110
1111    if (ice->state.predicate != IRIS_PREDICATE_STATE_USE_BIT)
1112       return;
1113
1114    assert(q);
1115
1116    iris_get_query_result(ctx, query, true, &result);
1117    set_predicate_enable(ice, (q->result != 0) ^ ice->condition.condition);
1118 }
1119
1120 void
1121 iris_init_query_functions(struct pipe_context *ctx)
1122 {
1123    ctx->create_query = iris_create_query;
1124    ctx->destroy_query = iris_destroy_query;
1125    ctx->begin_query = iris_begin_query;
1126    ctx->end_query = iris_end_query;
1127    ctx->get_query_result = iris_get_query_result;
1128    ctx->get_query_result_resource = iris_get_query_result_resource;
1129    ctx->set_active_query_state = iris_set_active_query_state;
1130    ctx->render_condition = iris_render_condition;
1131 }