src/gallium/drivers/iris/iris_query.c

   1 /*
   2  * Copyright © 2017 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice shall be included
  12  * in all copies or substantial portions of the Software.
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  15  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  19  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  20  * DEALINGS IN THE SOFTWARE.
  21  */
  22
  23 /**
  24  * @file iris_query.c
  25  *
  26  * Query object support.  This allows measuring various simple statistics
  27  * via counters on the GPU.
  28  */
  29
  30 #include <stdio.h>
  31 #include <errno.h>
  32 #include "pipe/p_defines.h"
  33 #include "pipe/p_state.h"
  34 #include "pipe/p_context.h"
  35 #include "pipe/p_screen.h"
  36 #include "util/u_inlines.h"
  37 #include "iris_context.h"
  38 #include "iris_defines.h"
  39 #include "iris_resource.h"
  40 #include "iris_screen.h"
  41
  42 #define IA_VERTICES_COUNT          0x2310
  43 #define IA_PRIMITIVES_COUNT        0x2318
  44 #define VS_INVOCATION_COUNT        0x2320
  45 #define HS_INVOCATION_COUNT        0x2300
  46 #define DS_INVOCATION_COUNT        0x2308
  47 #define GS_INVOCATION_COUNT        0x2328
  48 #define GS_PRIMITIVES_COUNT        0x2330
  49 #define CL_INVOCATION_COUNT        0x2338
  50 #define CL_PRIMITIVES_COUNT        0x2340
  51 #define PS_INVOCATION_COUNT        0x2348
  52 #define CS_INVOCATION_COUNT        0x2290
  53 #define PS_DEPTH_COUNT             0x2350
  54
  55 #define SO_PRIM_STORAGE_NEEDED(n)  (0x5240 + (n) * 8)
  56
  57 #define SO_NUM_PRIMS_WRITTEN(n)    (0x5200 + (n) * 8)
  58
  59 #define CS_GPR(n) (0x2600 + (n) * 8)
  60
  61 #define MI_MATH (0x1a << 23)
  62
  63 #define MI_ALU_LOAD      0x080
  64 #define MI_ALU_LOADINV   0x480
  65 #define MI_ALU_LOAD0     0x081
  66 #define MI_ALU_LOAD1     0x481
  67 #define MI_ALU_ADD       0x100
  68 #define MI_ALU_SUB       0x101
  69 #define MI_ALU_AND       0x102
  70 #define MI_ALU_OR        0x103
  71 #define MI_ALU_XOR       0x104
  72 #define MI_ALU_STORE     0x180
  73 #define MI_ALU_STOREINV  0x580
  74
  75 #define MI_ALU_R0        0x00
  76 #define MI_ALU_R1        0x01
  77 #define MI_ALU_R2        0x02
  78 #define MI_ALU_R3        0x03
  79 #define MI_ALU_R4        0x04
  80 #define MI_ALU_SRCA      0x20
  81 #define MI_ALU_SRCB      0x21
  82 #define MI_ALU_ACCU      0x31
  83 #define MI_ALU_ZF        0x32
  84 #define MI_ALU_CF        0x33
  85
  86 #define MI_ALU0(op)       ((MI_ALU_##op << 20))
  87 #define MI_ALU1(op, x)    ((MI_ALU_##op << 20) | (MI_ALU_##x << 10))
  88 #define MI_ALU2(op, x, y) \
  89    ((MI_ALU_##op << 20) | (MI_ALU_##x << 10) | (MI_ALU_##y))
  90
  91 struct iris_query {
  92    enum pipe_query_type type;
  93    int index;
  94
  95    bool ready;
  96
  97    uint64_t result;
  98
  99    struct iris_bo *bo;
 100    struct iris_query_snapshots *map;
 101
 102    int batch_idx;
 103 };
 104
 105 struct iris_query_snapshots {
 106    uint64_t snapshots_landed;
 107    uint64_t start;
 108    uint64_t end;
 109 };
 110
 111 struct iris_query_so_overflow {
 112    uint64_t snapshots_landed;
 113    struct {
 114       uint64_t prim_storage_needed[2];
 115       uint64_t num_prims[2];
 116    } stream[4];
 117 };
 118
 119 /**
 120  * Is this type of query written by PIPE_CONTROL?
 121  */
 122 static bool
 123 iris_is_query_pipelined(struct iris_query *q)
 124 {
 125    switch (q->type) {
 126    case PIPE_QUERY_OCCLUSION_COUNTER:
 127    case PIPE_QUERY_OCCLUSION_PREDICATE:
 128    case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
 129    case PIPE_QUERY_TIMESTAMP:
 130    case PIPE_QUERY_TIMESTAMP_DISJOINT:
 131    case PIPE_QUERY_TIME_ELAPSED:
 132       return true;
 133
 134    default:
 135       return false;
 136    }
 137 }
 138
 139 static void
 140 mark_available(struct iris_context *ice, struct iris_query *q)
 141 {
 142    struct iris_batch *batch = &ice->batches[q->batch_idx];
 143    unsigned flags = PIPE_CONTROL_WRITE_IMMEDIATE;
 144    unsigned offset = offsetof(struct iris_query_snapshots, snapshots_landed);
 145
 146    if (!iris_is_query_pipelined(q)) {
 147       ice->vtbl.store_data_imm64(batch, q->bo, offset, true);
 148    } else {
 149       /* Order available *after* the query results. */
 150       flags |= PIPE_CONTROL_FLUSH_ENABLE;
 151       iris_emit_pipe_control_write(batch, flags, q->bo, offset, true);
 152    }
 153 }
 154
 155 /**
 156  * Write PS_DEPTH_COUNT to q->(dest) via a PIPE_CONTROL.
 157  */
 158 static void
 159 iris_pipelined_write(struct iris_batch *batch,
 160                      struct iris_query *q,
 161                      enum pipe_control_flags flags,
 162                      unsigned offset)
 163 {
 164    const struct gen_device_info *devinfo = &batch->screen->devinfo;
 165    const unsigned optional_cs_stall =
 166       devinfo->gen == 9 && devinfo->gt == 4 ?  PIPE_CONTROL_CS_STALL : 0;
 167
 168    iris_emit_pipe_control_write(batch, flags | optional_cs_stall,
 169                                 q->bo, offset, 0ull);
 170 }
 171
 172 static void
 173 write_value(struct iris_context *ice, struct iris_query *q, unsigned offset)
 174 {
 175    struct iris_batch *batch = &ice->batches[q->batch_idx];
 176    const struct gen_device_info *devinfo = &batch->screen->devinfo;
 177
 178    if (!iris_is_query_pipelined(q)) {
 179       iris_emit_pipe_control_flush(batch,
 180                                    PIPE_CONTROL_CS_STALL |
 181                                    PIPE_CONTROL_STALL_AT_SCOREBOARD);
 182    }
 183
 184    switch (q->type) {
 185    case PIPE_QUERY_OCCLUSION_COUNTER:
 186    case PIPE_QUERY_OCCLUSION_PREDICATE:
 187    case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
 188       if (devinfo->gen >= 10) {
 189          /* "Driver must program PIPE_CONTROL with only Depth Stall Enable
 190           *  bit set prior to programming a PIPE_CONTROL with Write PS Depth
 191           *  Count sync operation."
 192           */
 193          iris_emit_pipe_control_flush(batch, PIPE_CONTROL_DEPTH_STALL);
 194       }
 195       iris_pipelined_write(&ice->batches[IRIS_BATCH_RENDER], q,
 196                            PIPE_CONTROL_WRITE_DEPTH_COUNT |
 197                            PIPE_CONTROL_DEPTH_STALL,
 198                            offset);
 199       break;
 200    case PIPE_QUERY_TIME_ELAPSED:
 201    case PIPE_QUERY_TIMESTAMP:
 202    case PIPE_QUERY_TIMESTAMP_DISJOINT:
 203       iris_pipelined_write(&ice->batches[IRIS_BATCH_RENDER], q,
 204                            PIPE_CONTROL_WRITE_TIMESTAMP,
 205                            offset);
 206       break;
 207    case PIPE_QUERY_PRIMITIVES_GENERATED:
 208       ice->vtbl.store_register_mem64(batch,
 209                                      q->index == 0 ? CL_INVOCATION_COUNT :
 210                                      SO_PRIM_STORAGE_NEEDED(q->index),
 211                                      q->bo, offset, false);
 212       break;
 213    case PIPE_QUERY_PRIMITIVES_EMITTED:
 214       ice->vtbl.store_register_mem64(batch,
 215                                      SO_NUM_PRIMS_WRITTEN(q->index),
 216                                      q->bo, offset, false);
 217       break;
 218    case PIPE_QUERY_PIPELINE_STATISTICS: {
 219       static const uint32_t index_to_reg[] = {
 220          IA_VERTICES_COUNT,
 221          IA_PRIMITIVES_COUNT,
 222          VS_INVOCATION_COUNT,
 223          GS_INVOCATION_COUNT,
 224          GS_PRIMITIVES_COUNT,
 225          CL_INVOCATION_COUNT,
 226          CL_PRIMITIVES_COUNT,
 227          PS_INVOCATION_COUNT,
 228          HS_INVOCATION_COUNT,
 229          DS_INVOCATION_COUNT,
 230          CS_INVOCATION_COUNT,
 231       };
 232       const uint32_t reg = index_to_reg[q->index];
 233
 234       ice->vtbl.store_register_mem64(batch, reg, q->bo, offset, false);
 235       break;
 236    }
 237    default:
 238       assert(false);
 239    }
 240 }
 241
 242 static void
 243 write_overflow_values(struct iris_context *ice, struct iris_query *q, bool end)
 244 {
 245    struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
 246    uint32_t count = q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ? 1 : 4;
 247
 248    iris_emit_pipe_control_flush(batch,
 249                                 PIPE_CONTROL_CS_STALL |
 250                                 PIPE_CONTROL_STALL_AT_SCOREBOARD);
 251    for (uint32_t i = 0; i < count; i++) {
 252       int s = q->index + i;
 253       int g_idx = offsetof(struct iris_query_so_overflow,
 254                            stream[s].num_prims[end]);
 255       int w_idx = offsetof(struct iris_query_so_overflow,
 256                            stream[s].prim_storage_needed[end]);
 257       ice->vtbl.store_register_mem64(batch, SO_NUM_PRIMS_WRITTEN(s),
 258                                      q->bo, g_idx, false);
 259       ice->vtbl.store_register_mem64(batch, SO_PRIM_STORAGE_NEEDED(s),
 260                                      q->bo, w_idx, false);
 261    }
 262 }
 263
 264 uint64_t
 265 iris_timebase_scale(const struct gen_device_info *devinfo,
 266                     uint64_t gpu_timestamp)
 267 {
 268    return (1000000000ull * gpu_timestamp) / devinfo->timestamp_frequency;
 269 }
 270
 271 static uint64_t
 272 iris_raw_timestamp_delta(uint64_t time0, uint64_t time1)
 273 {
 274    if (time0 > time1) {
 275       return (1ULL << TIMESTAMP_BITS) + time1 - time0;
 276    } else {
 277       return time1 - time0;
 278    }
 279 }
 280
 281 static bool
 282 stream_overflowed(struct iris_query_so_overflow *so, int s)
 283 {
 284    return (so->stream[s].prim_storage_needed[1] -
 285            so->stream[s].prim_storage_needed[0]) !=
 286           (so->stream[s].num_prims[1] - so->stream[s].num_prims[0]);
 287 }
 288
 289 static void
 290 calculate_result_on_cpu(const struct gen_device_info *devinfo,
 291                         struct iris_query *q)
 292 {
 293    switch (q->type) {
 294    case PIPE_QUERY_OCCLUSION_PREDICATE:
 295    case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
 296       q->result = q->map->end != q->map->start;
 297       break;
 298    case PIPE_QUERY_TIMESTAMP:
 299    case PIPE_QUERY_TIMESTAMP_DISJOINT:
 300       /* The timestamp is the single starting snapshot. */
 301       q->result = iris_timebase_scale(devinfo, q->map->start);
 302       q->result &= (1ull << TIMESTAMP_BITS) - 1;
 303       break;
 304    case PIPE_QUERY_TIME_ELAPSED:
 305       q->result = iris_raw_timestamp_delta(q->map->start, q->map->end);
 306       q->result = iris_timebase_scale(devinfo, q->result);
 307       q->result &= (1ull << TIMESTAMP_BITS) - 1;
 308       break;
 309    case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
 310       q->result = stream_overflowed((void *) q->map, q->index);
 311       break;
 312    case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
 313       q->result = false;
 314       for (int i = 0; i < MAX_VERTEX_STREAMS; i++)
 315          q->result |= stream_overflowed((void *) q->map, i);
 316       break;
 317    case PIPE_QUERY_OCCLUSION_COUNTER:
 318    case PIPE_QUERY_PRIMITIVES_GENERATED:
 319    case PIPE_QUERY_PRIMITIVES_EMITTED:
 320    case PIPE_QUERY_PIPELINE_STATISTICS:
 321    default:
 322       q->result = q->map->end - q->map->start;
 323       break;
 324    }
 325
 326    q->ready = true;
 327 }
 328
 329 /*
 330  * GPR0 = (GPR0 == 0) ? 0 : 1;
 331  */
 332 static void
 333 gpr0_to_bool(struct iris_context *ice)
 334 {
 335    struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
 336
 337    ice->vtbl.load_register_imm64(batch, CS_GPR(1), 1ull);
 338
 339    static const uint32_t math[] = {
 340       MI_MATH | (9 - 2),
 341       MI_ALU2(LOAD, SRCA, R0),
 342       MI_ALU1(LOAD0, SRCB),
 343       MI_ALU0(ADD),
 344       MI_ALU2(STOREINV, R0, ZF),
 345       MI_ALU2(LOAD, SRCA, R0),
 346       MI_ALU2(LOAD, SRCB, R1),
 347       MI_ALU0(AND),
 348       MI_ALU2(STORE, R0, ACCU),
 349    };
 350    iris_batch_emit(batch, math, sizeof(math));
 351 }
 352
 353 static void
 354 load_overflow_data_to_cs_gprs(struct iris_context *ice,
 355                               struct iris_query *q,
 356                               int idx)
 357 {
 358    struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
 359
 360    ice->vtbl.load_register_mem64(batch, CS_GPR(1), q->bo,
 361                                  offsetof(struct iris_query_so_overflow,
 362                                           stream[idx].prim_storage_needed[0]));
 363    ice->vtbl.load_register_mem64(batch, CS_GPR(2), q->bo,
 364                                  offsetof(struct iris_query_so_overflow,
 365                                           stream[idx].prim_storage_needed[1]));
 366
 367    ice->vtbl.load_register_mem64(batch, CS_GPR(3), q->bo,
 368                                  offsetof(struct iris_query_so_overflow,
 369                                           stream[idx].num_prims[0]));
 370    ice->vtbl.load_register_mem64(batch, CS_GPR(4), q->bo,
 371                                  offsetof(struct iris_query_so_overflow,
 372                                           stream[idx].num_prims[1]));
 373 }
 374
 375 /*
 376  * R3 = R4 - R3;
 377  * R1 = R2 - R1;
 378  * R1 = R3 - R1;
 379  * R0 = R0 | R1;
 380  */
 381 static void
 382 calc_overflow_for_stream(struct iris_context *ice)
 383 {
 384    struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
 385    static const uint32_t maths[] = {
 386       MI_MATH | (17 - 2),
 387       MI_ALU2(LOAD, SRCA, R4),
 388       MI_ALU2(LOAD, SRCB, R3),
 389       MI_ALU0(SUB),
 390       MI_ALU2(STORE, R3, ACCU),
 391       MI_ALU2(LOAD, SRCA, R2),
 392       MI_ALU2(LOAD, SRCB, R1),
 393       MI_ALU0(SUB),
 394       MI_ALU2(STORE, R1, ACCU),
 395       MI_ALU2(LOAD, SRCA, R3),
 396       MI_ALU2(LOAD, SRCB, R1),
 397       MI_ALU0(SUB),
 398       MI_ALU2(STORE, R1, ACCU),
 399       MI_ALU2(LOAD, SRCA, R1),
 400       MI_ALU2(LOAD, SRCB, R0),
 401       MI_ALU0(OR),
 402       MI_ALU2(STORE, R0, ACCU),
 403    };
 404
 405    iris_batch_emit(batch, maths, sizeof(maths));
 406 }
 407
 408 static void
 409 overflow_result_to_gpr0(struct iris_context *ice, struct iris_query *q)
 410 {
 411    struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
 412
 413    ice->vtbl.load_register_imm64(batch, CS_GPR(0), 0ull);
 414
 415    if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE) {
 416       load_overflow_data_to_cs_gprs(ice, q, q->index);
 417       calc_overflow_for_stream(ice);
 418    } else {
 419       for (int i = 0; i < MAX_VERTEX_STREAMS; i++) {
 420          load_overflow_data_to_cs_gprs(ice, q, i);
 421          calc_overflow_for_stream(ice);
 422       }
 423    }
 424
 425    gpr0_to_bool(ice);
 426 }
 427
 428 /**
 429  * Calculate the result and store it to CS_GPR0.
 430  */
 431 static void
 432 calculate_result_on_gpu(struct iris_context *ice, struct iris_query *q)
 433 {
 434    struct iris_batch *batch = &ice->batches[q->batch_idx];
 435
 436    if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ||
 437        q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) {
 438       overflow_result_to_gpr0(ice, q);
 439       return;
 440    }
 441
 442    ice->vtbl.load_register_mem64(batch, CS_GPR(1), q->bo,
 443                                  offsetof(struct iris_query_snapshots, start));
 444    ice->vtbl.load_register_mem64(batch, CS_GPR(2), q->bo,
 445                                  offsetof(struct iris_query_snapshots, end));
 446
 447    static const uint32_t math[] = {
 448       MI_MATH | (5 - 2),
 449       MI_ALU2(LOAD, SRCA, R2),
 450       MI_ALU2(LOAD, SRCB, R1),
 451       MI_ALU0(SUB),
 452       MI_ALU2(STORE, R0, ACCU),
 453    };
 454    iris_batch_emit(batch, math, sizeof(math));
 455
 456    if (q->type == PIPE_QUERY_OCCLUSION_PREDICATE ||
 457        q->type == PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE)
 458       gpr0_to_bool(ice);
 459 }
 460
 461 static struct pipe_query *
 462 iris_create_query(struct pipe_context *ctx,
 463                   unsigned query_type,
 464                   unsigned index)
 465 {
 466    struct iris_query *q = calloc(1, sizeof(struct iris_query));
 467
 468    q->type = query_type;
 469    q->index = index;
 470
 471    if (q->type == PIPE_QUERY_PIPELINE_STATISTICS && q->index == 10)
 472       q->batch_idx = IRIS_BATCH_COMPUTE;
 473    else
 474       q->batch_idx = IRIS_BATCH_RENDER;
 475    return (struct pipe_query *) q;
 476 }
 477
 478 static void
 479 iris_destroy_query(struct pipe_context *ctx, struct pipe_query *p_query)
 480 {
 481    struct iris_query *query = (void *) p_query;
 482    iris_bo_unreference(query->bo);
 483    free(query);
 484 }
 485
 486
 487 static boolean
 488 iris_begin_query(struct pipe_context *ctx, struct pipe_query *query)
 489 {
 490    struct iris_screen *screen = (void *) ctx->screen;
 491    struct iris_context *ice = (void *) ctx;
 492    struct iris_query *q = (void *) query;
 493
 494    iris_bo_unreference(q->bo);
 495    q->bo = iris_bo_alloc(screen->bufmgr, "query object", 4096,
 496                          IRIS_MEMZONE_OTHER);
 497    if (!q->bo)
 498       return false;
 499
 500    q->map = iris_bo_map(&ice->dbg, q->bo, MAP_READ | MAP_WRITE | MAP_ASYNC);
 501    if (!q->map)
 502       return false;
 503
 504    q->result = 0ull;
 505    q->ready = false;
 506    q->map->snapshots_landed = false;
 507
 508    if (q->type == PIPE_QUERY_PRIMITIVES_GENERATED && q->index == 0) {
 509       ice->state.prims_generated_query_active = true;
 510       ice->state.dirty |= IRIS_DIRTY_STREAMOUT;
 511    }
 512
 513    if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ||
 514        q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)
 515       write_overflow_values(ice, q, false);
 516    else
 517       write_value(ice, q, offsetof(struct iris_query_snapshots, start));
 518
 519    return true;
 520 }
 521
 522 static bool
 523 iris_end_query(struct pipe_context *ctx, struct pipe_query *query)
 524 {
 525    struct iris_context *ice = (void *) ctx;
 526    struct iris_query *q = (void *) query;
 527
 528    if (q->type == PIPE_QUERY_TIMESTAMP) {
 529       iris_begin_query(ctx, query);
 530       mark_available(ice, q);
 531       return true;
 532    }
 533
 534    if (q->type == PIPE_QUERY_PRIMITIVES_GENERATED && q->index == 0) {
 535       ice->state.prims_generated_query_active = true;
 536       ice->state.dirty |= IRIS_DIRTY_STREAMOUT;
 537    }
 538
 539    if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ||
 540        q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)
 541       write_overflow_values(ice, q, true);
 542    else
 543       write_value(ice, q, offsetof(struct iris_query_snapshots, end));
 544    mark_available(ice, q);
 545
 546    return true;
 547 }
 548
 549 static boolean
 550 iris_get_query_result(struct pipe_context *ctx,
 551                       struct pipe_query *query,
 552                       boolean wait,
 553                       union pipe_query_result *result)
 554 {
 555    struct iris_context *ice = (void *) ctx;
 556    struct iris_query *q = (void *) query;
 557    struct iris_screen *screen = (void *) ctx->screen;
 558    const struct gen_device_info *devinfo = &screen->devinfo;
 559
 560    if (!q->ready) {
 561       if (iris_batch_references(&ice->batches[q->batch_idx], q->bo))
 562          iris_batch_flush(&ice->batches[q->batch_idx]);
 563
 564       if (!q->map->snapshots_landed) {
 565          if (wait)
 566             iris_bo_wait_rendering(q->bo);
 567          else
 568             return false;
 569       }
 570
 571       assert(q->map->snapshots_landed);
 572       calculate_result_on_cpu(devinfo, q);
 573    }
 574
 575    assert(q->ready);
 576
 577    if (q->type == PIPE_QUERY_PIPELINE_STATISTICS) {
 578       switch (q->index) {
 579       case 0:
 580          result->pipeline_statistics.ia_vertices = q->result;
 581          break;
 582       case 1:
 583          result->pipeline_statistics.ia_primitives = q->result;
 584          break;
 585       case 2:
 586          result->pipeline_statistics.vs_invocations = q->result;
 587          break;
 588       case 3:
 589          result->pipeline_statistics.gs_invocations = q->result;
 590          break;
 591       case 4:
 592          result->pipeline_statistics.gs_primitives = q->result;
 593          break;
 594       case 5:
 595          result->pipeline_statistics.c_invocations = q->result;
 596          break;
 597       case 6:
 598          result->pipeline_statistics.c_primitives = q->result;
 599          break;
 600       case 7:
 601          result->pipeline_statistics.ps_invocations = q->result;
 602          break;
 603       case 8:
 604          result->pipeline_statistics.hs_invocations = q->result;
 605          break;
 606       case 9:
 607          result->pipeline_statistics.ds_invocations = q->result;
 608          break;
 609       case 10:
 610          result->pipeline_statistics.cs_invocations = q->result;
 611          break;
 612       }
 613    } else {
 614       result->u64 = q->result;
 615    }
 616
 617    return true;
 618 }
 619
 620 static void
 621 iris_get_query_result_resource(struct pipe_context *ctx,
 622                                struct pipe_query *query,
 623                                boolean wait,
 624                                enum pipe_query_value_type result_type,
 625                                int index,
 626                                struct pipe_resource *p_res,
 627                                unsigned offset)
 628 {
 629    struct iris_context *ice = (void *) ctx;
 630    struct iris_query *q = (void *) query;
 631    struct iris_batch *batch = &ice->batches[q->batch_idx];
 632    const struct gen_device_info *devinfo = &batch->screen->devinfo;
 633    struct iris_resource *res = (void *) p_res;
 634    unsigned snapshots_landed_offset =
 635       offsetof(struct iris_query_snapshots, snapshots_landed);
 636
 637    res->bind_history |= PIPE_BIND_QUERY_BUFFER;
 638
 639    if (index == -1) {
 640       /* They're asking for the availability of the result.  If we still
 641        * have commands queued up which produce the result, submit them
 642        * now so that progress happens.  Either way, copy the snapshots
 643        * landed field to the destination resource.
 644        */
 645       if (iris_batch_references(batch, q->bo))
 646          iris_batch_flush(batch);
 647
 648       ice->vtbl.copy_mem_mem(batch, iris_resource_bo(p_res), offset,
 649                              q->bo, snapshots_landed_offset,
 650                              result_type <= PIPE_QUERY_TYPE_U32 ? 4 : 8);
 651       return;
 652    }
 653
 654    if (!q->ready && q->map->snapshots_landed) {
 655       /* The final snapshots happen to have landed, so let's just compute
 656        * the result on the CPU now...
 657        */
 658       calculate_result_on_cpu(devinfo, q);
 659    }
 660
 661    if (q->ready) {
 662       /* We happen to have the result on the CPU, so just copy it. */
 663       if (result_type <= PIPE_QUERY_TYPE_U32) {
 664          ice->vtbl.store_data_imm32(batch, iris_resource_bo(p_res), offset,
 665                                     q->result);
 666       } else {
 667          ice->vtbl.store_data_imm64(batch, iris_resource_bo(p_res), offset,
 668                                     q->result);
 669       }
 670
 671       /* Make sure the result lands before they use bind the QBO elsewhere
 672        * and use the result.
 673        */
 674       // XXX: Why?  i965 doesn't do this.
 675       iris_emit_pipe_control_flush(batch, PIPE_CONTROL_CS_STALL);
 676       return;
 677    }
 678
 679    /* Calculate the result to CS_GPR0 */
 680    calculate_result_on_gpu(ice, q);
 681
 682    bool predicated = !wait && iris_is_query_pipelined(q);
 683
 684    if (predicated) {
 685       ice->vtbl.load_register_imm64(batch, MI_PREDICATE_SRC1, 0ull);
 686       ice->vtbl.load_register_mem64(batch, MI_PREDICATE_SRC0, q->bo,
 687                                     snapshots_landed_offset);
 688       uint32_t predicate = MI_PREDICATE |
 689                            MI_PREDICATE_LOADOP_LOADINV |
 690                            MI_PREDICATE_COMBINEOP_SET |
 691                            MI_PREDICATE_COMPAREOP_SRCS_EQUAL;
 692       iris_batch_emit(batch, &predicate, sizeof(uint32_t));
 693    }
 694
 695    if (result_type <= PIPE_QUERY_TYPE_U32) {
 696       ice->vtbl.store_register_mem32(batch, CS_GPR(0),
 697                                      iris_resource_bo(p_res),
 698                                      offset, predicated);
 699    } else {
 700       ice->vtbl.store_register_mem64(batch, CS_GPR(0),
 701                                      iris_resource_bo(p_res),
 702                                      offset, predicated);
 703    }
 704 }
 705
 706 static void
 707 iris_set_active_query_state(struct pipe_context *ctx, boolean enable)
 708 {
 709    struct iris_context *ice = (void *) ctx;
 710
 711    if (ice->state.statistics_counters_enabled == enable)
 712       return;
 713
 714    // XXX: most packets aren't paying attention to this yet, because it'd
 715    // have to be done dynamically at draw time, which is a pain
 716    ice->state.statistics_counters_enabled = enable;
 717    ice->state.dirty |= IRIS_DIRTY_CLIP |
 718                        IRIS_DIRTY_GS |
 719                        IRIS_DIRTY_RASTER |
 720                        IRIS_DIRTY_STREAMOUT |
 721                        IRIS_DIRTY_TCS |
 722                        IRIS_DIRTY_TES |
 723                        IRIS_DIRTY_VS |
 724                        IRIS_DIRTY_WM;
 725 }
 726
 727 void
 728 iris_init_query_functions(struct pipe_context *ctx)
 729 {
 730    ctx->create_query = iris_create_query;
 731    ctx->destroy_query = iris_destroy_query;
 732    ctx->begin_query = iris_begin_query;
 733    ctx->end_query = iris_end_query;
 734    ctx->get_query_result = iris_get_query_result;
 735    ctx->get_query_result_resource = iris_get_query_result_resource;
 736    ctx->set_active_query_state = iris_set_active_query_state;
 737 }