src/gallium/drivers/svga/svga_pipe_query.c

   1 /**********************************************************
   2  * Copyright 2008-2015 VMware, Inc.  All rights reserved.
   3  *
   4  * Permission is hereby granted, free of charge, to any person
   5  * obtaining a copy of this software and associated documentation
   6  * files (the "Software"), to deal in the Software without
   7  * restriction, including without limitation the rights to use, copy,
   8  * modify, merge, publish, distribute, sublicense, and/or sell copies
   9  * of the Software, and to permit persons to whom the Software is
  10  * furnished to do so, subject to the following conditions:
  11  *
  12  * The above copyright notice and this permission notice shall be
  13  * included in all copies or substantial portions of the Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  16  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  17  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  18  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  19  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  20  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  21  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22  * SOFTWARE.
  23  *
  24  **********************************************************/
  25
  26 #include "pipe/p_state.h"
  27 #include "pipe/p_context.h"
  28
  29 #include "util/u_bitmask.h"
  30 #include "util/u_memory.h"
  31
  32 #include "svga_cmd.h"
  33 #include "svga_context.h"
  34 #include "svga_screen.h"
  35 #include "svga_resource_buffer.h"
  36 #include "svga_winsys.h"
  37 #include "svga_debug.h"
  38
  39
  40 /* Fixme: want a public base class for all pipe structs, even if there
  41  * isn't much in them.
  42  */
  43 struct pipe_query {
  44    int dummy;
  45 };
  46
  47 struct svga_query {
  48    struct pipe_query base;
  49    unsigned type;                  /**< PIPE_QUERY_x or SVGA_QUERY_x */
  50    SVGA3dQueryType svga_type;      /**< SVGA3D_QUERYTYPE_x or unused */
  51
  52    unsigned id;                    /** Per-context query identifier */
  53
  54    struct pipe_fence_handle *fence;
  55
  56    /** For PIPE_QUERY_OCCLUSION_COUNTER / SVGA3D_QUERYTYPE_OCCLUSION */
  57
  58    /* For VGPU9 */
  59    struct svga_winsys_buffer *hwbuf;
  60    volatile SVGA3dQueryResult *queryResult;
  61
  62    /** For VGPU10 */
  63    struct svga_winsys_gb_query *gb_query;
  64    SVGA3dDXQueryFlags flags;
  65    unsigned offset;                /**< offset to the gb_query memory */
  66    struct pipe_query *predicate;   /** The associated query that can be used for predicate */
  67
  68    /** For non-GPU SVGA_QUERY_x queries */
  69    uint64_t begin_count, end_count;
  70 };
  71
  72
  73 /** cast wrapper */
  74 static inline struct svga_query *
  75 svga_query( struct pipe_query *q )
  76 {
  77    return (struct svga_query *)q;
  78 }
  79
  80
  81 static boolean
  82 svga_get_query_result(struct pipe_context *pipe,
  83                       struct pipe_query *q,
  84                       boolean wait,
  85                       union pipe_query_result *result);
  86
  87 static enum pipe_error
  88 define_query_vgpu9(struct svga_context *svga,
  89                    struct svga_query *sq)
  90 {
  91    struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
  92
  93    sq->hwbuf = svga_winsys_buffer_create(svga, 1,
  94                                          SVGA_BUFFER_USAGE_PINNED,
  95                                          sizeof *sq->queryResult);
  96    if (!sq->hwbuf)
  97       return PIPE_ERROR_OUT_OF_MEMORY;
  98
  99    sq->queryResult = (SVGA3dQueryResult *)
 100                      sws->buffer_map(sws, sq->hwbuf, PIPE_TRANSFER_WRITE);
 101    if (!sq->queryResult) {
 102       sws->buffer_destroy(sws, sq->hwbuf);
 103       return PIPE_ERROR_OUT_OF_MEMORY;
 104    }
 105
 106    sq->queryResult->totalSize = sizeof *sq->queryResult;
 107    sq->queryResult->state = SVGA3D_QUERYSTATE_NEW;
 108
 109    /* We request the buffer to be pinned and assume it is always mapped.
 110     * The reason is that we don't want to wait for fences when checking the
 111     * query status.
 112     */
 113    sws->buffer_unmap(sws, sq->hwbuf);
 114
 115    return PIPE_OK;
 116 }
 117
 118 static enum pipe_error
 119 begin_query_vgpu9(struct svga_context *svga, struct svga_query *sq)
 120 {
 121    struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
 122    enum pipe_error ret = PIPE_OK;
 123
 124    if (sq->queryResult->state == SVGA3D_QUERYSTATE_PENDING) {
 125       /* The application doesn't care for the pending query result.
 126        * We cannot let go of the existing buffer and just get a new one
 127        * because its storage may be reused for other purposes and clobbered
 128        * by the host when it determines the query result.  So the only
 129        * option here is to wait for the existing query's result -- not a
 130        * big deal, given that no sane application would do this.
 131        */
 132        uint64_t result;
 133        svga_get_query_result(&svga->pipe, &sq->base, TRUE, (void*)&result);
 134        assert(sq->queryResult->state != SVGA3D_QUERYSTATE_PENDING);
 135    }
 136
 137    sq->queryResult->state = SVGA3D_QUERYSTATE_NEW;
 138    sws->fence_reference(sws, &sq->fence, NULL);
 139
 140    ret = SVGA3D_BeginQuery(svga->swc, sq->svga_type);
 141    if (ret != PIPE_OK) {
 142       svga_context_flush(svga, NULL);
 143       ret = SVGA3D_BeginQuery(svga->swc, sq->svga_type);
 144    }
 145    return ret;
 146 }
 147
 148 static enum pipe_error
 149 end_query_vgpu9(struct svga_context *svga, struct svga_query *sq)
 150 {
 151    enum pipe_error ret = PIPE_OK;
 152
 153    /* Set to PENDING before sending EndQuery. */
 154    sq->queryResult->state = SVGA3D_QUERYSTATE_PENDING;
 155
 156    ret = SVGA3D_EndQuery(svga->swc, sq->svga_type, sq->hwbuf);
 157    if (ret != PIPE_OK) {
 158       svga_context_flush(svga, NULL);
 159       ret = SVGA3D_EndQuery(svga->swc, sq->svga_type, sq->hwbuf);
 160    }
 161    return ret;
 162 }
 163
 164 static boolean
 165 get_query_result_vgpu9(struct svga_context *svga, struct svga_query *sq,
 166                        boolean wait, uint64_t *result)
 167 {
 168    struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
 169    enum pipe_error ret;
 170    SVGA3dQueryState state;
 171
 172    if (!sq->fence) {
 173       /* The query status won't be updated by the host unless
 174        * SVGA_3D_CMD_WAIT_FOR_QUERY is emitted. Unfortunately this will cause
 175        * a synchronous wait on the host.
 176        */
 177       ret = SVGA3D_WaitForQuery(svga->swc, sq->svga_type, sq->hwbuf);
 178       if (ret != PIPE_OK) {
 179          svga_context_flush(svga, NULL);
 180          ret = SVGA3D_WaitForQuery(svga->swc, sq->svga_type, sq->hwbuf);
 181       }
 182       assert (ret == PIPE_OK);
 183       svga_context_flush(svga, &sq->fence);
 184       assert(sq->fence);
 185    }
 186
 187    state = sq->queryResult->state;
 188    if (state == SVGA3D_QUERYSTATE_PENDING) {
 189       if (!wait)
 190          return FALSE;
 191       sws->fence_finish(sws, sq->fence, SVGA_FENCE_FLAG_QUERY);
 192       state = sq->queryResult->state;
 193    }
 194
 195    assert(state == SVGA3D_QUERYSTATE_SUCCEEDED ||
 196           state == SVGA3D_QUERYSTATE_FAILED);
 197
 198    *result = (uint64_t)sq->queryResult->result32;
 199    return TRUE;
 200 }
 201
 202
 203 /**
 204  * VGPU10
 205  *
 206  * There is one query mob allocated for each context to be shared by all
 207  * query types. The mob is used to hold queries's state and result. Since
 208  * each query result type is of different length, to ease the query allocation
 209  * management, the mob is divided into memory blocks. Each memory block
 210  * will hold queries of the same type. Multiple memory blocks can be allocated
 211  * for a particular query type.
 212  *
 213  * Currently each memory block is of 184 bytes. We support up to 128
 214  * memory blocks. The query memory size is arbitrary right now.
 215  * Each occlusion query takes about 8 bytes. One memory block can accomodate
 216  * 23 occlusion queries. 128 of those blocks can support up to 2944 occlusion
 217  * queries. That seems reasonable for now. If we think this limit is
 218  * not enough, we can increase the limit or try to grow the mob in runtime.
 219  * Note, SVGA device does not impose one mob per context for queries,
 220  * we could allocate multiple mobs for queries; however, wddm KMD does not
 221  * currently support that.
 222  *
 223  * Also note that the GL guest driver does not issue any of the
 224  * following commands: DXMoveQuery, DXBindAllQuery & DXReadbackAllQuery.
 225  */
 226 #define SVGA_QUERY_MEM_BLOCK_SIZE    (sizeof(SVGADXQueryResultUnion) * 2)
 227 #define SVGA_QUERY_MEM_SIZE          (128 * SVGA_QUERY_MEM_BLOCK_SIZE)
 228
 229 struct svga_qmem_alloc_entry
 230 {
 231    unsigned start_offset;               /* start offset of the memory block */
 232    unsigned block_index;                /* block index of the memory block */
 233    unsigned query_size;                 /* query size in this memory block */
 234    unsigned nquery;                     /* number of queries allocated */
 235    struct util_bitmask *alloc_mask;     /* allocation mask */
 236    struct svga_qmem_alloc_entry *next;  /* next memory block */
 237 };
 238
 239
 240 /**
 241  * Allocate a memory block from the query object memory
 242  * \return -1 if out of memory, else index of the query memory block
 243  */
 244 static int
 245 allocate_query_block(struct svga_context *svga)
 246 {
 247    int index;
 248    unsigned offset;
 249
 250    /* Find the next available query block */
 251    index = util_bitmask_add(svga->gb_query_alloc_mask);
 252
 253    if (index == UTIL_BITMASK_INVALID_INDEX)
 254       return -1;
 255
 256    offset = index * SVGA_QUERY_MEM_BLOCK_SIZE;
 257    if (offset >= svga->gb_query_len) {
 258       unsigned i;
 259
 260       /**
 261        * All the memory blocks are allocated, lets see if there is
 262        * any empty memory block around that can be freed up.
 263        */
 264       index = -1;
 265       for (i = 0; i < SVGA_QUERY_MAX && index == -1; i++) {
 266          struct svga_qmem_alloc_entry *alloc_entry;
 267          struct svga_qmem_alloc_entry *prev_alloc_entry = NULL;
 268
 269          alloc_entry = svga->gb_query_map[i];
 270          while (alloc_entry && index == -1) {
 271             if (alloc_entry->nquery == 0) {
 272                /* This memory block is empty, it can be recycled. */
 273                if (prev_alloc_entry) {
 274                   prev_alloc_entry->next = alloc_entry->next;
 275                } else {
 276                   svga->gb_query_map[i] = alloc_entry->next;
 277                }
 278                index = alloc_entry->block_index;
 279             } else {
 280                prev_alloc_entry = alloc_entry;
 281                alloc_entry = alloc_entry->next;
 282             }
 283          }
 284       }
 285    }
 286
 287    return index;
 288 }
 289
 290 /**
 291  * Allocate a slot in the specified memory block.
 292  * All slots in this memory block are of the same size.
 293  *
 294  * \return -1 if out of memory, else index of the query slot
 295  */
 296 static int
 297 allocate_query_slot(struct svga_context *svga,
 298                     struct svga_qmem_alloc_entry *alloc)
 299 {
 300    int index;
 301    unsigned offset;
 302
 303    /* Find the next available slot */
 304    index = util_bitmask_add(alloc->alloc_mask);
 305
 306    if (index == UTIL_BITMASK_INVALID_INDEX)
 307       return -1;
 308
 309    offset = index * alloc->query_size;
 310    if (offset >= SVGA_QUERY_MEM_BLOCK_SIZE)
 311       return -1;
 312
 313    alloc->nquery++;
 314
 315    return index;
 316 }
 317
 318 /**
 319  * Deallocate the specified slot in the memory block.
 320  * If all slots are freed up, then deallocate the memory block
 321  * as well, so it can be allocated for other query type
 322  */
 323 static void
 324 deallocate_query_slot(struct svga_context *svga,
 325                       struct svga_qmem_alloc_entry *alloc,
 326                       unsigned index)
 327 {
 328    assert(index != UTIL_BITMASK_INVALID_INDEX);
 329
 330    util_bitmask_clear(alloc->alloc_mask, index);
 331    alloc->nquery--;
 332
 333    /**
 334     * Don't worry about deallocating the empty memory block here.
 335     * The empty memory block will be recycled when no more memory block
 336     * can be allocated.
 337     */
 338 }
 339
 340 static struct svga_qmem_alloc_entry *
 341 allocate_query_block_entry(struct svga_context *svga,
 342                            unsigned len)
 343 {
 344    struct svga_qmem_alloc_entry *alloc_entry;
 345    int block_index = -1;
 346
 347    block_index = allocate_query_block(svga);
 348    if (block_index == -1)
 349       return NULL;
 350    alloc_entry = CALLOC_STRUCT(svga_qmem_alloc_entry);
 351    if (!alloc_entry)
 352       return NULL;
 353
 354    alloc_entry->block_index = block_index;
 355    alloc_entry->start_offset = block_index * SVGA_QUERY_MEM_BLOCK_SIZE;
 356    alloc_entry->nquery = 0;
 357    alloc_entry->alloc_mask = util_bitmask_create();
 358    alloc_entry->next = NULL;
 359    alloc_entry->query_size = len;
 360
 361    return alloc_entry;
 362 }
 363
 364 /**
 365  * Allocate a memory slot for a query of the specified type.
 366  * It will first search through the memory blocks that are allocated
 367  * for the query type. If no memory slot is available, it will try
 368  * to allocate another memory block within the query object memory for
 369  * this query type.
 370  */
 371 static int
 372 allocate_query(struct svga_context *svga,
 373                SVGA3dQueryType type,
 374                unsigned len)
 375 {
 376    struct svga_qmem_alloc_entry *alloc_entry;
 377    int slot_index = -1;
 378    unsigned offset;
 379
 380    assert(type < SVGA_QUERY_MAX);
 381
 382    alloc_entry = svga->gb_query_map[type];
 383
 384    if (!alloc_entry) {
 385       /**
 386        * No query memory block has been allocated for this query type,
 387        * allocate one now
 388        */
 389       alloc_entry = allocate_query_block_entry(svga, len);
 390       if (!alloc_entry)
 391          return -1;
 392       svga->gb_query_map[type] = alloc_entry;
 393    }
 394
 395    /* Allocate a slot within the memory block allocated for this query type */
 396    slot_index = allocate_query_slot(svga, alloc_entry);
 397
 398    if (slot_index == -1) {
 399       /* This query memory block is full, allocate another one */
 400       alloc_entry = allocate_query_block_entry(svga, len);
 401       if (!alloc_entry)
 402          return -1;
 403       alloc_entry->next = svga->gb_query_map[type];
 404       svga->gb_query_map[type] = alloc_entry;
 405       slot_index = allocate_query_slot(svga, alloc_entry);
 406    }
 407
 408    assert(slot_index != -1);
 409    offset = slot_index * len + alloc_entry->start_offset;
 410
 411    return offset;
 412 }
 413
 414
 415 /**
 416  * Deallocate memory slot allocated for the specified query
 417  */
 418 static void
 419 deallocate_query(struct svga_context *svga,
 420                  struct svga_query *sq)
 421 {
 422    struct svga_qmem_alloc_entry *alloc_entry;
 423    unsigned slot_index;
 424    unsigned offset = sq->offset;
 425
 426    alloc_entry = svga->gb_query_map[sq->svga_type];
 427
 428    while (alloc_entry) {
 429       if (offset >= alloc_entry->start_offset &&
 430           offset < alloc_entry->start_offset + SVGA_QUERY_MEM_BLOCK_SIZE) {
 431
 432          /* The slot belongs to this memory block, deallocate it */
 433          slot_index = (offset - alloc_entry->start_offset) /
 434                       alloc_entry->query_size;
 435          deallocate_query_slot(svga, alloc_entry, slot_index);
 436          alloc_entry = NULL;
 437       } else {
 438          alloc_entry = alloc_entry->next;
 439       }
 440    }
 441 }
 442
 443
 444 /**
 445  * Destroy the gb query object and all the related query structures
 446  */
 447 static void
 448 destroy_gb_query_obj(struct svga_context *svga)
 449 {
 450    struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
 451    unsigned i;
 452
 453    for (i = 0; i < SVGA_QUERY_MAX; i++) {
 454       struct svga_qmem_alloc_entry *alloc_entry, *next;
 455       alloc_entry = svga->gb_query_map[i];
 456       while (alloc_entry) {
 457          next = alloc_entry->next;
 458          util_bitmask_destroy(alloc_entry->alloc_mask);
 459          FREE(alloc_entry);
 460          alloc_entry = next;
 461       }
 462       svga->gb_query_map[i] = NULL;
 463    }
 464
 465    if (svga->gb_query)
 466       sws->query_destroy(sws, svga->gb_query);
 467    svga->gb_query = NULL;
 468
 469    util_bitmask_destroy(svga->gb_query_alloc_mask);
 470 }
 471
 472 /**
 473  * Define query and create the gb query object if it is not already created.
 474  * There is only one gb query object per context which will be shared by
 475  * queries of all types.
 476  */
 477 static enum pipe_error
 478 define_query_vgpu10(struct svga_context *svga,
 479                     struct svga_query *sq, int resultLen)
 480 {
 481    struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
 482    int qlen;
 483    enum pipe_error ret = PIPE_OK;
 484
 485    SVGA_DBG(DEBUG_QUERY, "%s\n", __FUNCTION__);
 486
 487    if (svga->gb_query == NULL) {
 488       /* Create a gb query object */
 489       svga->gb_query = sws->query_create(sws, SVGA_QUERY_MEM_SIZE);
 490       if (!svga->gb_query)
 491          return PIPE_ERROR_OUT_OF_MEMORY;
 492       svga->gb_query_len = SVGA_QUERY_MEM_SIZE;
 493       memset (svga->gb_query_map, 0, sizeof(svga->gb_query_map));
 494       svga->gb_query_alloc_mask = util_bitmask_create();
 495
 496       /* Bind the query object to the context */
 497       if (svga->swc->query_bind(svga->swc, svga->gb_query,
 498                                 SVGA_QUERY_FLAG_SET) != PIPE_OK) {
 499          svga_context_flush(svga, NULL);
 500          svga->swc->query_bind(svga->swc, svga->gb_query,
 501                                SVGA_QUERY_FLAG_SET);
 502       }
 503    }
 504
 505    sq->gb_query = svga->gb_query;
 506
 507    /* Allocate an integer ID for this query */
 508    sq->id = util_bitmask_add(svga->query_id_bm);
 509    if (sq->id == UTIL_BITMASK_INVALID_INDEX)
 510       return PIPE_ERROR_OUT_OF_MEMORY;
 511
 512    /* Find a slot for this query in the gb object */
 513    qlen = resultLen + sizeof(SVGA3dQueryState);
 514    sq->offset = allocate_query(svga, sq->svga_type, qlen);
 515    if (sq->offset == -1)
 516       return PIPE_ERROR_OUT_OF_MEMORY;
 517
 518    SVGA_DBG(DEBUG_QUERY, "   query type=%d qid=0x%x offset=%d\n",
 519             sq->svga_type, sq->id, sq->offset);
 520
 521    /**
 522     * Send SVGA3D commands to define the query
 523     */
 524    ret = SVGA3D_vgpu10_DefineQuery(svga->swc, sq->id, sq->svga_type, sq->flags);
 525    if (ret != PIPE_OK) {
 526       svga_context_flush(svga, NULL);
 527       ret = SVGA3D_vgpu10_DefineQuery(svga->swc, sq->id, sq->svga_type, sq->flags);
 528    }
 529    if (ret != PIPE_OK)
 530       return PIPE_ERROR_OUT_OF_MEMORY;
 531
 532    ret = SVGA3D_vgpu10_BindQuery(svga->swc, sq->gb_query, sq->id);
 533    if (ret != PIPE_OK) {
 534       svga_context_flush(svga, NULL);
 535       ret = SVGA3D_vgpu10_BindQuery(svga->swc, sq->gb_query, sq->id);
 536    }
 537    assert(ret == PIPE_OK);
 538
 539    ret = SVGA3D_vgpu10_SetQueryOffset(svga->swc, sq->id, sq->offset);
 540    if (ret != PIPE_OK) {
 541       svga_context_flush(svga, NULL);
 542       ret = SVGA3D_vgpu10_SetQueryOffset(svga->swc, sq->id, sq->offset);
 543    }
 544    assert(ret == PIPE_OK);
 545
 546    return PIPE_OK;
 547 }
 548
 549 static enum pipe_error
 550 destroy_query_vgpu10(struct svga_context *svga, struct svga_query *sq)
 551 {
 552    enum pipe_error ret;
 553
 554    ret = SVGA3D_vgpu10_DestroyQuery(svga->swc, sq->id);
 555
 556    /* Deallocate the memory slot allocated for this query */
 557    deallocate_query(svga, sq);
 558
 559    return ret;
 560 }
 561
 562
 563 /**
 564  * Rebind queryies to the context.
 565  */
 566 static void
 567 rebind_vgpu10_query(struct svga_context *svga)
 568 {
 569    if (svga->swc->query_bind(svga->swc, svga->gb_query,
 570                              SVGA_QUERY_FLAG_REF) != PIPE_OK) {
 571       svga_context_flush(svga, NULL);
 572       svga->swc->query_bind(svga->swc, svga->gb_query,
 573                             SVGA_QUERY_FLAG_REF);
 574    }
 575
 576    svga->rebind.flags.query = FALSE;
 577 }
 578
 579
 580 static enum pipe_error
 581 begin_query_vgpu10(struct svga_context *svga, struct svga_query *sq)
 582 {
 583    struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
 584    enum pipe_error ret = PIPE_OK;
 585    int status = 0;
 586
 587    sws->fence_reference(sws, &sq->fence, NULL);
 588
 589    /* Initialize the query state to NEW */
 590    status = sws->query_init(sws, sq->gb_query, sq->offset, SVGA3D_QUERYSTATE_NEW);
 591    if (status)
 592       return PIPE_ERROR;
 593
 594    if (svga->rebind.flags.query) {
 595       rebind_vgpu10_query(svga);
 596    }
 597
 598    /* Send the BeginQuery command to the device */
 599    ret = SVGA3D_vgpu10_BeginQuery(svga->swc, sq->id);
 600    if (ret != PIPE_OK) {
 601       svga_context_flush(svga, NULL);
 602       ret = SVGA3D_vgpu10_BeginQuery(svga->swc, sq->id);
 603    }
 604    return ret;
 605 }
 606
 607 static enum pipe_error
 608 end_query_vgpu10(struct svga_context *svga, struct svga_query *sq)
 609 {
 610    struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
 611    enum pipe_error ret = PIPE_OK;
 612
 613    if (svga->rebind.flags.query) {
 614       rebind_vgpu10_query(svga);
 615    }
 616
 617    ret = SVGA3D_vgpu10_EndQuery(svga->swc, sq->id);
 618    if (ret != PIPE_OK) {
 619       svga_context_flush(svga, NULL);
 620       ret = SVGA3D_vgpu10_EndQuery(svga->swc, sq->id);
 621    }
 622
 623    /* Finish fence is copied here from get_query_result_vgpu10. This helps
 624     * with cases where svga_begin_query might be called again before
 625     * svga_get_query_result, such as GL_TIME_ELAPSED.
 626     */
 627    if (!sq->fence) {
 628       svga_context_flush(svga, &sq->fence);
 629    }
 630    sws->fence_finish(sws, sq->fence, SVGA_FENCE_FLAG_QUERY);
 631
 632    return ret;
 633 }
 634
 635 static boolean
 636 get_query_result_vgpu10(struct svga_context *svga, struct svga_query *sq,
 637                         boolean wait, void *result, int resultLen)
 638 {
 639    struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
 640    SVGA3dQueryState queryState;
 641
 642    if (svga->rebind.flags.query) {
 643       rebind_vgpu10_query(svga);
 644    }
 645
 646    sws->query_get_result(sws, sq->gb_query, sq->offset, &queryState, result, resultLen);
 647
 648    if (queryState == SVGA3D_QUERYSTATE_PENDING) {
 649       if (!wait)
 650          return FALSE;
 651       sws->fence_finish(sws, sq->fence, SVGA_FENCE_FLAG_QUERY);
 652       sws->query_get_result(sws, sq->gb_query, sq->offset, &queryState, result, resultLen);
 653    }
 654
 655    assert(queryState == SVGA3D_QUERYSTATE_SUCCEEDED ||
 656           queryState == SVGA3D_QUERYSTATE_FAILED);
 657
 658    return TRUE;
 659 }
 660
 661 static struct pipe_query *
 662 svga_create_query(struct pipe_context *pipe,
 663                   unsigned query_type,
 664                   unsigned index)
 665 {
 666    struct svga_context *svga = svga_context(pipe);
 667    struct svga_query *sq;
 668
 669    assert(query_type < SVGA_QUERY_MAX);
 670
 671    sq = CALLOC_STRUCT(svga_query);
 672    if (!sq)
 673       goto fail;
 674
 675    /* Allocate an integer ID for the query */
 676    sq->id = util_bitmask_add(svga->query_id_bm);
 677    if (sq->id == UTIL_BITMASK_INVALID_INDEX)
 678       goto fail;
 679
 680    SVGA_DBG(DEBUG_QUERY, "%s type=%d sq=0x%x id=%d\n", __FUNCTION__,
 681             query_type, sq, sq->id);
 682
 683    switch (query_type) {
 684    case PIPE_QUERY_OCCLUSION_COUNTER:
 685       sq->svga_type = SVGA3D_QUERYTYPE_OCCLUSION;
 686       if (svga_have_vgpu10(svga)) {
 687          define_query_vgpu10(svga, sq, sizeof(SVGADXOcclusionQueryResult));
 688
 689          /**
 690           * In OpenGL, occlusion counter query can be used in conditional
 691           * rendering; however, in DX10, only OCCLUSION_PREDICATE query can
 692           * be used for predication. Hence, we need to create an occlusion
 693           * predicate query along with the occlusion counter query. So when
 694           * the occlusion counter query is used for predication, the associated
 695           * query of occlusion predicate type will be used
 696           * in the SetPredication command.
 697           */
 698          sq->predicate = svga_create_query(pipe, PIPE_QUERY_OCCLUSION_PREDICATE, index);
 699
 700       } else {
 701          define_query_vgpu9(svga, sq);
 702       }
 703       break;
 704    case PIPE_QUERY_OCCLUSION_PREDICATE:
 705       assert(svga_have_vgpu10(svga));
 706       sq->svga_type = SVGA3D_QUERYTYPE_OCCLUSIONPREDICATE;
 707       define_query_vgpu10(svga, sq, sizeof(SVGADXOcclusionPredicateQueryResult));
 708       break;
 709    case PIPE_QUERY_PRIMITIVES_GENERATED:
 710    case PIPE_QUERY_PRIMITIVES_EMITTED:
 711    case PIPE_QUERY_SO_STATISTICS:
 712       assert(svga_have_vgpu10(svga));
 713       sq->svga_type = SVGA3D_QUERYTYPE_STREAMOUTPUTSTATS;
 714       define_query_vgpu10(svga, sq,
 715                           sizeof(SVGADXStreamOutStatisticsQueryResult));
 716       break;
 717    case PIPE_QUERY_TIMESTAMP:
 718       assert(svga_have_vgpu10(svga));
 719       sq->svga_type = SVGA3D_QUERYTYPE_TIMESTAMP;
 720       define_query_vgpu10(svga, sq,
 721                           sizeof(SVGADXTimestampQueryResult));
 722       break;
 723    case SVGA_QUERY_NUM_DRAW_CALLS:
 724    case SVGA_QUERY_NUM_FALLBACKS:
 725    case SVGA_QUERY_NUM_FLUSHES:
 726    case SVGA_QUERY_MEMORY_USED:
 727    case SVGA_QUERY_NUM_SHADERS:
 728    case SVGA_QUERY_NUM_RESOURCES:
 729    case SVGA_QUERY_NUM_STATE_OBJECTS:
 730    case SVGA_QUERY_NUM_VALIDATIONS:
 731    case SVGA_QUERY_MAP_BUFFER_TIME:
 732    case SVGA_QUERY_NUM_SURFACE_VIEWS:
 733    case SVGA_QUERY_NUM_RESOURCES_MAPPED:
 734    case SVGA_QUERY_NUM_BYTES_UPLOADED:
 735       break;
 736    default:
 737       assert(!"unexpected query type in svga_create_query()");
 738    }
 739
 740    sq->type = query_type;
 741
 742    return &sq->base;
 743
 744 fail:
 745    FREE(sq);
 746    return NULL;
 747 }
 748
 749 static void
 750 svga_destroy_query(struct pipe_context *pipe, struct pipe_query *q)
 751 {
 752    struct svga_context *svga = svga_context(pipe);
 753    struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
 754    struct svga_query *sq;
 755
 756    if (!q) {
 757       destroy_gb_query_obj(svga);
 758       return;
 759    }
 760
 761    sq = svga_query(q);
 762
 763    SVGA_DBG(DEBUG_QUERY, "%s sq=0x%x id=%d\n", __FUNCTION__,
 764             sq, sq->id);
 765
 766    switch (sq->type) {
 767    case PIPE_QUERY_OCCLUSION_COUNTER:
 768       if (svga_have_vgpu10(svga)) {
 769          /* make sure to also destroy any associated predicate query */
 770          if (sq->predicate)
 771             svga_destroy_query(pipe, sq->predicate);
 772          destroy_query_vgpu10(svga, sq);
 773       } else {
 774          sws->buffer_destroy(sws, sq->hwbuf);
 775       }
 776       sws->fence_reference(sws, &sq->fence, NULL);
 777       break;
 778    case PIPE_QUERY_OCCLUSION_PREDICATE:
 779       assert(svga_have_vgpu10(svga));
 780       destroy_query_vgpu10(svga, sq);
 781       sws->fence_reference(sws, &sq->fence, NULL);
 782       break;
 783    case PIPE_QUERY_PRIMITIVES_GENERATED:
 784    case PIPE_QUERY_PRIMITIVES_EMITTED:
 785    case PIPE_QUERY_SO_STATISTICS:
 786    case PIPE_QUERY_TIMESTAMP:
 787       assert(svga_have_vgpu10(svga));
 788       destroy_query_vgpu10(svga, sq);
 789       sws->fence_reference(sws, &sq->fence, NULL);
 790       break;
 791    case SVGA_QUERY_NUM_DRAW_CALLS:
 792    case SVGA_QUERY_NUM_FALLBACKS:
 793    case SVGA_QUERY_NUM_FLUSHES:
 794    case SVGA_QUERY_MEMORY_USED:
 795    case SVGA_QUERY_NUM_SHADERS:
 796    case SVGA_QUERY_NUM_RESOURCES:
 797    case SVGA_QUERY_NUM_STATE_OBJECTS:
 798    case SVGA_QUERY_NUM_VALIDATIONS:
 799    case SVGA_QUERY_MAP_BUFFER_TIME:
 800    case SVGA_QUERY_NUM_SURFACE_VIEWS:
 801    case SVGA_QUERY_NUM_RESOURCES_MAPPED:
 802    case SVGA_QUERY_NUM_BYTES_UPLOADED:
 803       /* nothing */
 804       break;
 805    default:
 806       assert(!"svga: unexpected query type in svga_destroy_query()");
 807    }
 808
 809    /* Free the query id */
 810    util_bitmask_clear(svga->query_id_bm, sq->id);
 811
 812    FREE(sq);
 813 }
 814
 815
 816 static boolean
 817 svga_begin_query(struct pipe_context *pipe, struct pipe_query *q)
 818 {
 819    struct svga_context *svga = svga_context(pipe);
 820    struct svga_query *sq = svga_query(q);
 821    enum pipe_error ret;
 822
 823    assert(sq);
 824    assert(sq->type < SVGA_QUERY_MAX);
 825
 826    SVGA_DBG(DEBUG_QUERY, "%s sq=0x%x id=%d\n", __FUNCTION__,
 827             sq, sq->id);
 828
 829    /* Need to flush out buffered drawing commands so that they don't
 830     * get counted in the query results.
 831     */
 832    svga_hwtnl_flush_retry(svga);
 833
 834    switch (sq->type) {
 835    case PIPE_QUERY_OCCLUSION_COUNTER:
 836       if (svga_have_vgpu10(svga)) {
 837          ret = begin_query_vgpu10(svga, sq);
 838          /* also need to start the associated occlusion predicate query */
 839          if (sq->predicate) {
 840             enum pipe_error status;
 841             status = begin_query_vgpu10(svga, svga_query(sq->predicate));
 842             assert(status == PIPE_OK);
 843             (void) status;
 844          }
 845       } else {
 846          ret = begin_query_vgpu9(svga, sq);
 847       }
 848       assert(ret == PIPE_OK);
 849       (void) ret;
 850       break;
 851    case PIPE_QUERY_OCCLUSION_PREDICATE:
 852       assert(svga_have_vgpu10(svga));
 853       ret = begin_query_vgpu10(svga, sq);
 854       assert(ret == PIPE_OK);
 855       break;
 856    case PIPE_QUERY_PRIMITIVES_GENERATED:
 857    case PIPE_QUERY_PRIMITIVES_EMITTED:
 858    case PIPE_QUERY_SO_STATISTICS:
 859    case PIPE_QUERY_TIMESTAMP:
 860       assert(svga_have_vgpu10(svga));
 861       ret = begin_query_vgpu10(svga, sq);
 862       assert(ret == PIPE_OK);
 863       break;
 864    case SVGA_QUERY_NUM_DRAW_CALLS:
 865       sq->begin_count = svga->hud.num_draw_calls;
 866       break;
 867    case SVGA_QUERY_NUM_FALLBACKS:
 868       sq->begin_count = svga->hud.num_fallbacks;
 869       break;
 870    case SVGA_QUERY_NUM_FLUSHES:
 871       sq->begin_count = svga->hud.num_flushes;
 872       break;
 873    case SVGA_QUERY_NUM_VALIDATIONS:
 874       sq->begin_count = svga->hud.num_validations;
 875       break;
 876    case SVGA_QUERY_MAP_BUFFER_TIME:
 877       sq->begin_count = svga->hud.map_buffer_time;
 878       break;
 879    case SVGA_QUERY_NUM_RESOURCES_MAPPED:
 880       sq->begin_count = svga->hud.num_resources_mapped;
 881       break;
 882    case SVGA_QUERY_NUM_BYTES_UPLOADED:
 883       sq->begin_count = svga->hud.num_bytes_uploaded;
 884       break;
 885    case SVGA_QUERY_MEMORY_USED:
 886    case SVGA_QUERY_NUM_SHADERS:
 887    case SVGA_QUERY_NUM_RESOURCES:
 888    case SVGA_QUERY_NUM_STATE_OBJECTS:
 889    case SVGA_QUERY_NUM_SURFACE_VIEWS:
 890       /* nothing */
 891       break;
 892    default:
 893       assert(!"unexpected query type in svga_begin_query()");
 894    }
 895
 896    svga->sq[sq->type] = sq;
 897
 898    return true;
 899 }
 900
 901
 902 static void
 903 svga_end_query(struct pipe_context *pipe, struct pipe_query *q)
 904 {
 905    struct svga_context *svga = svga_context(pipe);
 906    struct svga_query *sq = svga_query(q);
 907    enum pipe_error ret;
 908
 909    assert(sq);
 910    assert(sq->type < SVGA_QUERY_MAX);
 911
 912    SVGA_DBG(DEBUG_QUERY, "%s sq=0x%x id=%d\n", __FUNCTION__,
 913             sq, sq->id);
 914
 915    if (sq->type == PIPE_QUERY_TIMESTAMP && svga->sq[sq->type] != sq)
 916       svga_begin_query(pipe, q);
 917
 918    svga_hwtnl_flush_retry(svga);
 919
 920    assert(svga->sq[sq->type] == sq);
 921
 922    switch (sq->type) {
 923    case PIPE_QUERY_OCCLUSION_COUNTER:
 924       if (svga_have_vgpu10(svga)) {
 925          ret = end_query_vgpu10(svga, sq);
 926          /* also need to end the associated occlusion predicate query */
 927          if (sq->predicate) {
 928             enum pipe_error status;
 929             status = end_query_vgpu10(svga, svga_query(sq->predicate));
 930             assert(status == PIPE_OK);
 931             (void) status;
 932          }
 933       } else {
 934          ret = end_query_vgpu9(svga, sq);
 935       }
 936       assert(ret == PIPE_OK);
 937       (void) ret;
 938       /* TODO: Delay flushing. We don't really need to flush here, just ensure
 939        * that there is one flush before svga_get_query_result attempts to get
 940        * the result.
 941        */
 942       svga_context_flush(svga, NULL);
 943       break;
 944    case PIPE_QUERY_OCCLUSION_PREDICATE:
 945       assert(svga_have_vgpu10(svga));
 946       ret = end_query_vgpu10(svga, sq);
 947       assert(ret == PIPE_OK);
 948       break;
 949    case PIPE_QUERY_PRIMITIVES_GENERATED:
 950    case PIPE_QUERY_PRIMITIVES_EMITTED:
 951    case PIPE_QUERY_SO_STATISTICS:
 952    case PIPE_QUERY_TIMESTAMP:
 953       assert(svga_have_vgpu10(svga));
 954       ret = end_query_vgpu10(svga, sq);
 955       assert(ret == PIPE_OK);
 956       break;
 957    case SVGA_QUERY_NUM_DRAW_CALLS:
 958       sq->end_count = svga->hud.num_draw_calls;
 959       break;
 960    case SVGA_QUERY_NUM_FALLBACKS:
 961       sq->end_count = svga->hud.num_fallbacks;
 962       break;
 963    case SVGA_QUERY_NUM_FLUSHES:
 964       sq->end_count = svga->hud.num_flushes;
 965       break;
 966    case SVGA_QUERY_NUM_VALIDATIONS:
 967       sq->end_count = svga->hud.num_validations;
 968       break;
 969    case SVGA_QUERY_MAP_BUFFER_TIME:
 970       sq->end_count = svga->hud.map_buffer_time;
 971       break;
 972    case SVGA_QUERY_NUM_RESOURCES_MAPPED:
 973       sq->end_count = svga->hud.num_resources_mapped;
 974       break;
 975    case SVGA_QUERY_NUM_BYTES_UPLOADED:
 976       sq->end_count = svga->hud.num_bytes_uploaded;
 977       break;
 978    case SVGA_QUERY_MEMORY_USED:
 979    case SVGA_QUERY_NUM_SHADERS:
 980    case SVGA_QUERY_NUM_RESOURCES:
 981    case SVGA_QUERY_NUM_STATE_OBJECTS:
 982    case SVGA_QUERY_NUM_SURFACE_VIEWS:
 983       /* nothing */
 984       break;
 985    default:
 986       assert(!"unexpected query type in svga_end_query()");
 987    }
 988    svga->sq[sq->type] = NULL;
 989 }
 990
 991
 992 static boolean
 993 svga_get_query_result(struct pipe_context *pipe,
 994                       struct pipe_query *q,
 995                       boolean wait,
 996                       union pipe_query_result *vresult)
 997 {
 998    struct svga_screen *svgascreen = svga_screen(pipe->screen);
 999    struct svga_context *svga = svga_context(pipe);
1000    struct svga_query *sq = svga_query(q);
1001    uint64_t *result = (uint64_t *)vresult;
1002    boolean ret = TRUE;
1003
1004    assert(sq);
1005
1006    SVGA_DBG(DEBUG_QUERY, "%s sq=0x%x id=%d wait: %d\n",
1007             __FUNCTION__, sq, sq->id, wait);
1008
1009    switch (sq->type) {
1010    case PIPE_QUERY_OCCLUSION_COUNTER:
1011       if (svga_have_vgpu10(svga)) {
1012          SVGADXOcclusionQueryResult occResult;
1013          ret = get_query_result_vgpu10(svga, sq, wait,
1014                                        (void *)&occResult, sizeof(occResult));
1015          *result = (uint64_t)occResult.samplesRendered;
1016       } else {
1017          ret = get_query_result_vgpu9(svga, sq, wait, (uint64_t *)result);
1018       }
1019       break;
1020    case PIPE_QUERY_OCCLUSION_PREDICATE: {
1021       SVGADXOcclusionPredicateQueryResult occResult;
1022       assert(svga_have_vgpu10(svga));
1023       ret = get_query_result_vgpu10(svga, sq, wait,
1024                                     (void *)&occResult, sizeof(occResult));
1025       vresult->b = occResult.anySamplesRendered != 0;
1026       break;
1027    }
1028    case PIPE_QUERY_SO_STATISTICS: {
1029       SVGADXStreamOutStatisticsQueryResult sResult;
1030       struct pipe_query_data_so_statistics *pResult =
1031          (struct pipe_query_data_so_statistics *)vresult;
1032
1033       assert(svga_have_vgpu10(svga));
1034       ret = get_query_result_vgpu10(svga, sq, wait,
1035                                     (void *)&sResult, sizeof(sResult));
1036       pResult->num_primitives_written = sResult.numPrimitivesWritten;
1037       pResult->primitives_storage_needed = sResult.numPrimitivesRequired;
1038       break;
1039    }
1040    case PIPE_QUERY_TIMESTAMP: {
1041       SVGADXTimestampQueryResult sResult;
1042
1043       assert(svga_have_vgpu10(svga));
1044       ret = get_query_result_vgpu10(svga, sq, wait,
1045                                     (void *)&sResult, sizeof(sResult));
1046       *result = (uint64_t)sResult.timestamp;
1047       break;
1048    }
1049    case PIPE_QUERY_PRIMITIVES_GENERATED: {
1050       SVGADXStreamOutStatisticsQueryResult sResult;
1051
1052       assert(svga_have_vgpu10(svga));
1053       ret = get_query_result_vgpu10(svga, sq, wait,
1054                                     (void *)&sResult, sizeof sResult);
1055       *result = (uint64_t)sResult.numPrimitivesRequired;
1056       break;
1057    }
1058    case PIPE_QUERY_PRIMITIVES_EMITTED: {
1059       SVGADXStreamOutStatisticsQueryResult sResult;
1060
1061       assert(svga_have_vgpu10(svga));
1062       ret = get_query_result_vgpu10(svga, sq, wait,
1063                                     (void *)&sResult, sizeof sResult);
1064       *result = (uint64_t)sResult.numPrimitivesWritten;
1065       break;
1066    }
1067    /* These are per-frame counters */
1068    case SVGA_QUERY_NUM_DRAW_CALLS:
1069    case SVGA_QUERY_NUM_FALLBACKS:
1070    case SVGA_QUERY_NUM_FLUSHES:
1071    case SVGA_QUERY_NUM_VALIDATIONS:
1072    case SVGA_QUERY_NUM_RESOURCES_MAPPED:
1073    case SVGA_QUERY_NUM_BYTES_UPLOADED:
1074    case SVGA_QUERY_MAP_BUFFER_TIME:
1075       vresult->u64 = sq->end_count - sq->begin_count;
1076       break;
1077    /* These are running total counters */
1078    case SVGA_QUERY_MEMORY_USED:
1079       vresult->u64 = svgascreen->hud.total_resource_bytes;
1080       break;
1081    case SVGA_QUERY_NUM_SHADERS:
1082       vresult->u64 = svga->hud.num_shaders;
1083       break;
1084    case SVGA_QUERY_NUM_RESOURCES:
1085       vresult->u64 = svgascreen->hud.num_resources;
1086       break;
1087    case SVGA_QUERY_NUM_STATE_OBJECTS:
1088       vresult->u64 = svga->hud.num_state_objects;
1089       break;
1090    case SVGA_QUERY_NUM_SURFACE_VIEWS:
1091       vresult->u64 = svga->hud.num_surface_views;
1092       break;
1093    default:
1094       assert(!"unexpected query type in svga_get_query_result");
1095    }
1096
1097    SVGA_DBG(DEBUG_QUERY, "%s result %d\n", __FUNCTION__, *((uint64_t *)vresult));
1098
1099    return ret;
1100 }
1101
1102 static void
1103 svga_render_condition(struct pipe_context *pipe, struct pipe_query *q,
1104                       boolean condition, uint mode)
1105 {
1106    struct svga_context *svga = svga_context(pipe);
1107    struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
1108    struct svga_query *sq = svga_query(q);
1109    SVGA3dQueryId queryId;
1110    enum pipe_error ret;
1111
1112    SVGA_DBG(DEBUG_QUERY, "%s\n", __FUNCTION__);
1113
1114    assert(svga_have_vgpu10(svga));
1115    if (sq == NULL) {
1116       queryId = SVGA3D_INVALID_ID;
1117    }
1118    else {
1119       assert(sq->svga_type == SVGA3D_QUERYTYPE_OCCLUSION ||
1120              sq->svga_type == SVGA3D_QUERYTYPE_OCCLUSIONPREDICATE);
1121
1122       if (sq->svga_type == SVGA3D_QUERYTYPE_OCCLUSION) {
1123          assert(sq->predicate);
1124          /**
1125           * For conditional rendering, make sure to use the associated
1126           * predicate query.
1127           */
1128          sq = svga_query(sq->predicate);
1129       }
1130       queryId = sq->id;
1131
1132       if ((mode == PIPE_RENDER_COND_WAIT ||
1133            mode == PIPE_RENDER_COND_BY_REGION_WAIT) && sq->fence) {
1134          sws->fence_finish(sws, sq->fence, SVGA_FENCE_FLAG_QUERY);
1135       }
1136    }
1137
1138    ret = SVGA3D_vgpu10_SetPredication(svga->swc, queryId,
1139                                       (uint32) condition);
1140    if (ret != PIPE_OK) {
1141       svga_context_flush(svga, NULL);
1142       ret = SVGA3D_vgpu10_SetPredication(svga->swc, queryId,
1143                                          (uint32) condition);
1144    }
1145 }
1146
1147
1148 /*
1149  * This function is a workaround because we lack the ability to query
1150  * renderer's time synchornously.
1151  */
1152 static uint64_t
1153 svga_get_timestamp(struct pipe_context *pipe)
1154 {
1155    struct pipe_query *q = svga_create_query(pipe, PIPE_QUERY_TIMESTAMP, 0);
1156    union pipe_query_result result;
1157
1158    svga_begin_query(pipe, q);
1159    svga_end_query(pipe,q);
1160    svga_get_query_result(pipe, q, TRUE, &result);
1161    svga_destroy_query(pipe, q);
1162
1163    return result.u64;
1164 }
1165
1166
1167 void
1168 svga_init_query_functions(struct svga_context *svga)
1169 {
1170    svga->pipe.create_query = svga_create_query;
1171    svga->pipe.destroy_query = svga_destroy_query;
1172    svga->pipe.begin_query = svga_begin_query;
1173    svga->pipe.end_query = svga_end_query;
1174    svga->pipe.get_query_result = svga_get_query_result;
1175    svga->pipe.render_condition = svga_render_condition;
1176    svga->pipe.get_timestamp = svga_get_timestamp;
1177 }