src/gallium/drivers/svga/svga_pipe_query.c

   1 /**********************************************************
   2  * Copyright 2008-2015 VMware, Inc.  All rights reserved.
   3  *
   4  * Permission is hereby granted, free of charge, to any person
   5  * obtaining a copy of this software and associated documentation
   6  * files (the "Software"), to deal in the Software without
   7  * restriction, including without limitation the rights to use, copy,
   8  * modify, merge, publish, distribute, sublicense, and/or sell copies
   9  * of the Software, and to permit persons to whom the Software is
  10  * furnished to do so, subject to the following conditions:
  11  *
  12  * The above copyright notice and this permission notice shall be
  13  * included in all copies or substantial portions of the Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  16  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  17  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  18  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  19  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  20  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  21  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22  * SOFTWARE.
  23  *
  24  **********************************************************/
  25
  26 #include "pipe/p_state.h"
  27 #include "pipe/p_context.h"
  28
  29 #include "util/u_bitmask.h"
  30 #include "util/u_memory.h"
  31
  32 #include "svga_cmd.h"
  33 #include "svga_context.h"
  34 #include "svga_screen.h"
  35 #include "svga_resource_buffer.h"
  36 #include "svga_winsys.h"
  37 #include "svga_debug.h"
  38
  39
  40 /* Fixme: want a public base class for all pipe structs, even if there
  41  * isn't much in them.
  42  */
  43 struct pipe_query {
  44    int dummy;
  45 };
  46
  47 struct svga_query {
  48    struct pipe_query base;
  49    unsigned type;                  /**< PIPE_QUERY_x or SVGA_QUERY_x */
  50    SVGA3dQueryType svga_type;      /**< SVGA3D_QUERYTYPE_x or unused */
  51
  52    unsigned id;                    /** Per-context query identifier */
  53
  54    struct pipe_fence_handle *fence;
  55
  56    /** For PIPE_QUERY_OCCLUSION_COUNTER / SVGA3D_QUERYTYPE_OCCLUSION */
  57
  58    /* For VGPU9 */
  59    struct svga_winsys_buffer *hwbuf;
  60    volatile SVGA3dQueryResult *queryResult;
  61
  62    /** For VGPU10 */
  63    struct svga_winsys_gb_query *gb_query;
  64    SVGA3dDXQueryFlags flags;
  65    unsigned offset;                /**< offset to the gb_query memory */
  66    struct pipe_query *predicate;   /** The associated query that can be used for predicate */
  67
  68    /** For non-GPU SVGA_QUERY_x queries */
  69    uint64_t begin_count, end_count;
  70 };
  71
  72
  73 /** cast wrapper */
  74 static inline struct svga_query *
  75 svga_query( struct pipe_query *q )
  76 {
  77    return (struct svga_query *)q;
  78 }
  79
  80
  81 static boolean
  82 svga_get_query_result(struct pipe_context *pipe,
  83                       struct pipe_query *q,
  84                       boolean wait,
  85                       union pipe_query_result *result);
  86
  87 static enum pipe_error
  88 define_query_vgpu9(struct svga_context *svga,
  89                    struct svga_query *sq)
  90 {
  91    struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
  92
  93    sq->hwbuf = svga_winsys_buffer_create(svga, 1,
  94                                          SVGA_BUFFER_USAGE_PINNED,
  95                                          sizeof *sq->queryResult);
  96    if (!sq->hwbuf)
  97       return PIPE_ERROR_OUT_OF_MEMORY;
  98
  99    sq->queryResult = (SVGA3dQueryResult *)
 100                      sws->buffer_map(sws, sq->hwbuf, PIPE_TRANSFER_WRITE);
 101    if (!sq->queryResult) {
 102       sws->buffer_destroy(sws, sq->hwbuf);
 103       return PIPE_ERROR_OUT_OF_MEMORY;
 104    }
 105
 106    sq->queryResult->totalSize = sizeof *sq->queryResult;
 107    sq->queryResult->state = SVGA3D_QUERYSTATE_NEW;
 108
 109    /* We request the buffer to be pinned and assume it is always mapped.
 110     * The reason is that we don't want to wait for fences when checking the
 111     * query status.
 112     */
 113    sws->buffer_unmap(sws, sq->hwbuf);
 114
 115    return PIPE_OK;
 116 }
 117
 118 static enum pipe_error
 119 begin_query_vgpu9(struct svga_context *svga, struct svga_query *sq)
 120 {
 121    struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
 122    enum pipe_error ret = PIPE_OK;
 123
 124    if (sq->queryResult->state == SVGA3D_QUERYSTATE_PENDING) {
 125       /* The application doesn't care for the pending query result.
 126        * We cannot let go of the existing buffer and just get a new one
 127        * because its storage may be reused for other purposes and clobbered
 128        * by the host when it determines the query result.  So the only
 129        * option here is to wait for the existing query's result -- not a
 130        * big deal, given that no sane application would do this.
 131        */
 132        uint64_t result;
 133        svga_get_query_result(&svga->pipe, &sq->base, TRUE, (void*)&result);
 134        assert(sq->queryResult->state != SVGA3D_QUERYSTATE_PENDING);
 135    }
 136
 137    sq->queryResult->state = SVGA3D_QUERYSTATE_NEW;
 138    sws->fence_reference(sws, &sq->fence, NULL);
 139
 140    ret = SVGA3D_BeginQuery(svga->swc, sq->svga_type);
 141    if (ret != PIPE_OK) {
 142       svga_context_flush(svga, NULL);
 143       ret = SVGA3D_BeginQuery(svga->swc, sq->svga_type);
 144    }
 145    return ret;
 146 }
 147
 148 static enum pipe_error
 149 end_query_vgpu9(struct svga_context *svga, struct svga_query *sq)
 150 {
 151    enum pipe_error ret = PIPE_OK;
 152
 153    /* Set to PENDING before sending EndQuery. */
 154    sq->queryResult->state = SVGA3D_QUERYSTATE_PENDING;
 155
 156    ret = SVGA3D_EndQuery(svga->swc, sq->svga_type, sq->hwbuf);
 157    if (ret != PIPE_OK) {
 158       svga_context_flush(svga, NULL);
 159       ret = SVGA3D_EndQuery(svga->swc, sq->svga_type, sq->hwbuf);
 160    }
 161    return ret;
 162 }
 163
 164 static boolean
 165 get_query_result_vgpu9(struct svga_context *svga, struct svga_query *sq,
 166                        boolean wait, uint64_t *result)
 167 {
 168    struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
 169    enum pipe_error ret;
 170    SVGA3dQueryState state;
 171
 172    if (!sq->fence) {
 173       /* The query status won't be updated by the host unless
 174        * SVGA_3D_CMD_WAIT_FOR_QUERY is emitted. Unfortunately this will cause
 175        * a synchronous wait on the host.
 176        */
 177       ret = SVGA3D_WaitForQuery(svga->swc, sq->svga_type, sq->hwbuf);
 178       if (ret != PIPE_OK) {
 179          svga_context_flush(svga, NULL);
 180          ret = SVGA3D_WaitForQuery(svga->swc, sq->svga_type, sq->hwbuf);
 181       }
 182       assert (ret == PIPE_OK);
 183       svga_context_flush(svga, &sq->fence);
 184       assert(sq->fence);
 185    }
 186
 187    state = sq->queryResult->state;
 188    if (state == SVGA3D_QUERYSTATE_PENDING) {
 189       if (!wait)
 190          return FALSE;
 191       sws->fence_finish(sws, sq->fence, SVGA_FENCE_FLAG_QUERY);
 192       state = sq->queryResult->state;
 193    }
 194
 195    assert(state == SVGA3D_QUERYSTATE_SUCCEEDED ||
 196           state == SVGA3D_QUERYSTATE_FAILED);
 197
 198    *result = (uint64_t)sq->queryResult->result32;
 199    return TRUE;
 200 }
 201
 202
 203 /**
 204  * VGPU10
 205  *
 206  * There is one query mob allocated for each context to be shared by all
 207  * query types. The mob is used to hold queries's state and result. Since
 208  * each query result type is of different length, to ease the query allocation
 209  * management, the mob is divided into memory blocks. Each memory block
 210  * will hold queries of the same type. Multiple memory blocks can be allocated
 211  * for a particular query type.
 212  *
 213  * Currently each memory block is of 184 bytes. We support up to 128
 214  * memory blocks. The query memory size is arbitrary right now.
 215  * Each occlusion query takes about 8 bytes. One memory block can accomodate
 216  * 23 occlusion queries. 128 of those blocks can support up to 2944 occlusion
 217  * queries. That seems reasonable for now. If we think this limit is
 218  * not enough, we can increase the limit or try to grow the mob in runtime.
 219  * Note, SVGA device does not impose one mob per context for queries,
 220  * we could allocate multiple mobs for queries; however, wddm KMD does not
 221  * currently support that.
 222  *
 223  * Also note that the GL guest driver does not issue any of the
 224  * following commands: DXMoveQuery, DXBindAllQuery & DXReadbackAllQuery.
 225  */
 226 #define SVGA_QUERY_MEM_BLOCK_SIZE    (sizeof(SVGADXQueryResultUnion) * 2)
 227 #define SVGA_QUERY_MEM_SIZE          (128 * SVGA_QUERY_MEM_BLOCK_SIZE)
 228
 229 struct svga_qmem_alloc_entry
 230 {
 231    unsigned start_offset;               /* start offset of the memory block */
 232    unsigned block_index;                /* block index of the memory block */
 233    unsigned query_size;                 /* query size in this memory block */
 234    unsigned nquery;                     /* number of queries allocated */
 235    struct util_bitmask *alloc_mask;     /* allocation mask */
 236    struct svga_qmem_alloc_entry *next;  /* next memory block */
 237 };
 238
 239
 240 /**
 241  * Allocate a memory block from the query object memory
 242  * \return -1 if out of memory, else index of the query memory block
 243  */
 244 static int
 245 allocate_query_block(struct svga_context *svga)
 246 {
 247    int index;
 248    unsigned offset;
 249
 250    /* Find the next available query block */
 251    index = util_bitmask_add(svga->gb_query_alloc_mask);
 252
 253    if (index == UTIL_BITMASK_INVALID_INDEX)
 254       return -1;
 255
 256    offset = index * SVGA_QUERY_MEM_BLOCK_SIZE;
 257    if (offset >= svga->gb_query_len) {
 258       unsigned i;
 259
 260       /**
 261        * All the memory blocks are allocated, lets see if there is
 262        * any empty memory block around that can be freed up.
 263        */
 264       index = -1;
 265       for (i = 0; i < SVGA_QUERY_MAX && index == -1; i++) {
 266          struct svga_qmem_alloc_entry *alloc_entry;
 267          struct svga_qmem_alloc_entry *prev_alloc_entry = NULL;
 268
 269          alloc_entry = svga->gb_query_map[i];
 270          while (alloc_entry && index == -1) {
 271             if (alloc_entry->nquery == 0) {
 272                /* This memory block is empty, it can be recycled. */
 273                if (prev_alloc_entry) {
 274                   prev_alloc_entry->next = alloc_entry->next;
 275                } else {
 276                   svga->gb_query_map[i] = alloc_entry->next;
 277                }
 278                index = alloc_entry->block_index;
 279             } else {
 280                prev_alloc_entry = alloc_entry;
 281                alloc_entry = alloc_entry->next;
 282             }
 283          }
 284       }
 285    }
 286
 287    return index;
 288 }
 289
 290 /**
 291  * Allocate a slot in the specified memory block.
 292  * All slots in this memory block are of the same size.
 293  *
 294  * \return -1 if out of memory, else index of the query slot
 295  */
 296 static int
 297 allocate_query_slot(struct svga_context *svga,
 298                     struct svga_qmem_alloc_entry *alloc)
 299 {
 300    int index;
 301    unsigned offset;
 302
 303    /* Find the next available slot */
 304    index = util_bitmask_add(alloc->alloc_mask);
 305
 306    if (index == UTIL_BITMASK_INVALID_INDEX)
 307       return -1;
 308
 309    offset = index * alloc->query_size;
 310    if (offset >= SVGA_QUERY_MEM_BLOCK_SIZE)
 311       return -1;
 312
 313    alloc->nquery++;
 314
 315    return index;
 316 }
 317
 318 /**
 319  * Deallocate the specified slot in the memory block.
 320  * If all slots are freed up, then deallocate the memory block
 321  * as well, so it can be allocated for other query type
 322  */
 323 static void
 324 deallocate_query_slot(struct svga_context *svga,
 325                       struct svga_qmem_alloc_entry *alloc,
 326                       unsigned index)
 327 {
 328    assert(index != UTIL_BITMASK_INVALID_INDEX);
 329
 330    util_bitmask_clear(alloc->alloc_mask, index);
 331    alloc->nquery--;
 332
 333    /**
 334     * Don't worry about deallocating the empty memory block here.
 335     * The empty memory block will be recycled when no more memory block
 336     * can be allocated.
 337     */
 338 }
 339
 340 static struct svga_qmem_alloc_entry *
 341 allocate_query_block_entry(struct svga_context *svga,
 342                            unsigned len)
 343 {
 344    struct svga_qmem_alloc_entry *alloc_entry;
 345    int block_index = -1;
 346
 347    block_index = allocate_query_block(svga);
 348    if (block_index == -1)
 349       return NULL;
 350    alloc_entry = CALLOC_STRUCT(svga_qmem_alloc_entry);
 351    if (alloc_entry == NULL)
 352       return NULL;
 353
 354    alloc_entry->block_index = block_index;
 355    alloc_entry->start_offset = block_index * SVGA_QUERY_MEM_BLOCK_SIZE;
 356    alloc_entry->nquery = 0;
 357    alloc_entry->alloc_mask = util_bitmask_create();
 358    alloc_entry->next = NULL;
 359    alloc_entry->query_size = len;
 360
 361    return alloc_entry;
 362 }
 363
 364 /**
 365  * Allocate a memory slot for a query of the specified type.
 366  * It will first search through the memory blocks that are allocated
 367  * for the query type. If no memory slot is available, it will try
 368  * to allocate another memory block within the query object memory for
 369  * this query type.
 370  */
 371 static int
 372 allocate_query(struct svga_context *svga,
 373                SVGA3dQueryType type,
 374                unsigned len)
 375 {
 376    struct svga_qmem_alloc_entry *alloc_entry;
 377    int slot_index = -1;
 378    unsigned offset;
 379
 380    assert(type < SVGA_QUERY_MAX);
 381
 382    alloc_entry = svga->gb_query_map[type];
 383
 384    if (alloc_entry == NULL) {
 385       /**
 386        * No query memory block has been allocated for this query type,
 387        * allocate one now
 388        */
 389       alloc_entry = allocate_query_block_entry(svga, len);
 390       if (alloc_entry == NULL)
 391          return -1;
 392       svga->gb_query_map[type] = alloc_entry;
 393    }
 394
 395    /* Allocate a slot within the memory block allocated for this query type */
 396    slot_index = allocate_query_slot(svga, alloc_entry);
 397
 398    if (slot_index == -1) {
 399       /* This query memory block is full, allocate another one */
 400       alloc_entry = allocate_query_block_entry(svga, len);
 401       if (alloc_entry == NULL)
 402          return -1;
 403       alloc_entry->next = svga->gb_query_map[type];
 404       svga->gb_query_map[type] = alloc_entry;
 405       slot_index = allocate_query_slot(svga, alloc_entry);
 406    }
 407
 408    assert(slot_index != -1);
 409    offset = slot_index * len + alloc_entry->start_offset;
 410
 411    return offset;
 412 }
 413
 414
 415 /**
 416  * Deallocate memory slot allocated for the specified query
 417  */
 418 static void
 419 deallocate_query(struct svga_context *svga,
 420                  struct svga_query *sq)
 421 {
 422    struct svga_qmem_alloc_entry *alloc_entry;
 423    unsigned slot_index;
 424    unsigned offset = sq->offset;
 425
 426    alloc_entry = svga->gb_query_map[sq->svga_type];
 427
 428    while (alloc_entry) {
 429       if (offset >= alloc_entry->start_offset &&
 430           offset < alloc_entry->start_offset + SVGA_QUERY_MEM_BLOCK_SIZE) {
 431
 432          /* The slot belongs to this memory block, deallocate it */
 433          slot_index = (offset - alloc_entry->start_offset) /
 434                       alloc_entry->query_size;
 435          deallocate_query_slot(svga, alloc_entry, slot_index);
 436          alloc_entry = NULL;
 437       } else {
 438          alloc_entry = alloc_entry->next;
 439       }
 440    }
 441 }
 442
 443
 444 /**
 445  * Destroy the gb query object and all the related query structures
 446  */
 447 static void
 448 destroy_gb_query_obj(struct svga_context *svga)
 449 {
 450    struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
 451    unsigned i;
 452
 453    for (i = 0; i < SVGA_QUERY_MAX; i++) {
 454       struct svga_qmem_alloc_entry *alloc_entry, *next;
 455       alloc_entry = svga->gb_query_map[i];
 456       while (alloc_entry) {
 457          next = alloc_entry->next;
 458          util_bitmask_destroy(alloc_entry->alloc_mask);
 459          FREE(alloc_entry);
 460          alloc_entry = next;
 461       }
 462       svga->gb_query_map[i] = NULL;
 463    }
 464
 465    if (svga->gb_query)
 466       sws->query_destroy(sws, svga->gb_query);
 467    svga->gb_query = NULL;
 468
 469    util_bitmask_destroy(svga->gb_query_alloc_mask);
 470 }
 471
 472 /**
 473  * Define query and create the gb query object if it is not already created.
 474  * There is only one gb query object per context which will be shared by
 475  * queries of all types.
 476  */
 477 static enum pipe_error
 478 define_query_vgpu10(struct svga_context *svga,
 479                     struct svga_query *sq, int resultLen)
 480 {
 481    struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
 482    int qlen;
 483    enum pipe_error ret = PIPE_OK;
 484
 485    SVGA_DBG(DEBUG_QUERY, "%s\n", __FUNCTION__);
 486
 487    if (svga->gb_query == NULL) {
 488       /* Create a gb query object */
 489       svga->gb_query = sws->query_create(sws, SVGA_QUERY_MEM_SIZE);
 490       if (!svga->gb_query)
 491          return PIPE_ERROR_OUT_OF_MEMORY;
 492       svga->gb_query_len = SVGA_QUERY_MEM_SIZE;
 493       memset (svga->gb_query_map, 0, sizeof(svga->gb_query_map));
 494       svga->gb_query_alloc_mask = util_bitmask_create();
 495
 496       /* Bind the query object to the context */
 497       if (svga->swc->query_bind(svga->swc, svga->gb_query,
 498                                 SVGA_QUERY_FLAG_SET) != PIPE_OK) {
 499          svga_context_flush(svga, NULL);
 500          svga->swc->query_bind(svga->swc, svga->gb_query,
 501                                SVGA_QUERY_FLAG_SET);
 502       }
 503    }
 504
 505    sq->gb_query = svga->gb_query;
 506
 507    /* Allocate an integer ID for this query */
 508    sq->id = util_bitmask_add(svga->query_id_bm);
 509    if (sq->id == UTIL_BITMASK_INVALID_INDEX)
 510       return PIPE_ERROR_OUT_OF_MEMORY;
 511
 512    /* Find a slot for this query in the gb object */
 513    qlen = resultLen + sizeof(SVGA3dQueryState);
 514    sq->offset = allocate_query(svga, sq->svga_type, qlen);
 515    if (sq->offset == -1)
 516       return PIPE_ERROR_OUT_OF_MEMORY;
 517
 518    SVGA_DBG(DEBUG_QUERY, "   query type=%d qid=0x%x offset=%d\n",
 519             sq->svga_type, sq->id, sq->offset);
 520
 521    /**
 522     * Send SVGA3D commands to define the query
 523     */
 524    ret = SVGA3D_vgpu10_DefineQuery(svga->swc, sq->id, sq->svga_type, sq->flags);
 525    if (ret != PIPE_OK) {
 526       svga_context_flush(svga, NULL);
 527       ret = SVGA3D_vgpu10_DefineQuery(svga->swc, sq->id, sq->svga_type, sq->flags);
 528    }
 529    if (ret != PIPE_OK)
 530       return PIPE_ERROR_OUT_OF_MEMORY;
 531
 532    ret = SVGA3D_vgpu10_BindQuery(svga->swc, sq->gb_query, sq->id);
 533    if (ret != PIPE_OK) {
 534       svga_context_flush(svga, NULL);
 535       ret = SVGA3D_vgpu10_BindQuery(svga->swc, sq->gb_query, sq->id);
 536    }
 537    assert(ret == PIPE_OK);
 538
 539    ret = SVGA3D_vgpu10_SetQueryOffset(svga->swc, sq->id, sq->offset);
 540    if (ret != PIPE_OK) {
 541       svga_context_flush(svga, NULL);
 542       ret = SVGA3D_vgpu10_SetQueryOffset(svga->swc, sq->id, sq->offset);
 543    }
 544    assert(ret == PIPE_OK);
 545
 546    return PIPE_OK;
 547 }
 548
 549 static enum pipe_error
 550 destroy_query_vgpu10(struct svga_context *svga, struct svga_query *sq)
 551 {
 552    enum pipe_error ret;
 553
 554    ret = SVGA3D_vgpu10_DestroyQuery(svga->swc, sq->id);
 555
 556    /* Deallocate the memory slot allocated for this query */
 557    deallocate_query(svga, sq);
 558
 559    return ret;
 560 }
 561
 562
 563 /**
 564  * Rebind queryies to the context.
 565  */
 566 static void
 567 rebind_vgpu10_query(struct svga_context *svga)
 568 {
 569    if (svga->swc->query_bind(svga->swc, svga->gb_query,
 570                              SVGA_QUERY_FLAG_REF) != PIPE_OK) {
 571       svga_context_flush(svga, NULL);
 572       svga->swc->query_bind(svga->swc, svga->gb_query,
 573                             SVGA_QUERY_FLAG_REF);
 574    }
 575
 576    svga->rebind.flags.query = FALSE;
 577 }
 578
 579
 580 static enum pipe_error
 581 begin_query_vgpu10(struct svga_context *svga, struct svga_query *sq)
 582 {
 583    struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
 584    enum pipe_error ret = PIPE_OK;
 585    int status = 0;
 586
 587    sws->fence_reference(sws, &sq->fence, NULL);
 588
 589    /* Initialize the query state to NEW */
 590    status = sws->query_init(sws, sq->gb_query, sq->offset, SVGA3D_QUERYSTATE_NEW);
 591    if (status)
 592       return PIPE_ERROR;
 593
 594    if (svga->rebind.flags.query) {
 595       rebind_vgpu10_query(svga);
 596    }
 597
 598    /* Send the BeginQuery command to the device */
 599    ret = SVGA3D_vgpu10_BeginQuery(svga->swc, sq->id);
 600    if (ret != PIPE_OK) {
 601       svga_context_flush(svga, NULL);
 602       ret = SVGA3D_vgpu10_BeginQuery(svga->swc, sq->id);
 603    }
 604    return ret;
 605 }
 606
 607 static enum pipe_error
 608 end_query_vgpu10(struct svga_context *svga, struct svga_query *sq)
 609 {
 610    struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
 611    enum pipe_error ret = PIPE_OK;
 612
 613    if (svga->rebind.flags.query) {
 614       rebind_vgpu10_query(svga);
 615    }
 616
 617    ret = SVGA3D_vgpu10_EndQuery(svga->swc, sq->id);
 618    if (ret != PIPE_OK) {
 619       svga_context_flush(svga, NULL);
 620       ret = SVGA3D_vgpu10_EndQuery(svga->swc, sq->id);
 621    }
 622
 623    /* Finish fence is copied here from get_query_result_vgpu10. This helps
 624     * with cases where svga_begin_query might be called again before
 625     * svga_get_query_result, such as GL_TIME_ELAPSED.
 626     */
 627    if (!sq->fence) {
 628       svga_context_flush(svga, &sq->fence);
 629    }
 630    sws->fence_finish(sws, sq->fence, SVGA_FENCE_FLAG_QUERY);
 631
 632    return ret;
 633 }
 634
 635 static boolean
 636 get_query_result_vgpu10(struct svga_context *svga, struct svga_query *sq,
 637                         boolean wait, void *result, int resultLen)
 638 {
 639    struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
 640    SVGA3dQueryState queryState;
 641
 642    if (svga->rebind.flags.query) {
 643       rebind_vgpu10_query(svga);
 644    }
 645
 646    sws->query_get_result(sws, sq->gb_query, sq->offset, &queryState, result, resultLen);
 647
 648    if (queryState == SVGA3D_QUERYSTATE_PENDING) {
 649       if (!wait)
 650          return FALSE;
 651       sws->fence_finish(sws, sq->fence, SVGA_FENCE_FLAG_QUERY);
 652       sws->query_get_result(sws, sq->gb_query, sq->offset, &queryState, result, resultLen);
 653    }
 654
 655    assert(queryState == SVGA3D_QUERYSTATE_SUCCEEDED ||
 656           queryState == SVGA3D_QUERYSTATE_FAILED);
 657
 658    return TRUE;
 659 }
 660
 661 static struct pipe_query *
 662 svga_create_query(struct pipe_context *pipe,
 663                   unsigned query_type,
 664                   unsigned index)
 665 {
 666    struct svga_context *svga = svga_context(pipe);
 667    struct svga_query *sq;
 668
 669    assert(query_type < SVGA_QUERY_MAX);
 670
 671    sq = CALLOC_STRUCT(svga_query);
 672    if (!sq)
 673       goto fail;
 674
 675    /* Allocate an integer ID for the query */
 676    sq->id = util_bitmask_add(svga->query_id_bm);
 677    if (sq->id == UTIL_BITMASK_INVALID_INDEX)
 678       goto fail;
 679
 680    SVGA_DBG(DEBUG_QUERY, "%s type=%d sq=0x%x id=%d\n", __FUNCTION__,
 681             query_type, sq, sq->id);
 682
 683    switch (query_type) {
 684    case PIPE_QUERY_OCCLUSION_COUNTER:
 685       sq->svga_type = SVGA3D_QUERYTYPE_OCCLUSION;
 686       if (svga_have_vgpu10(svga)) {
 687          define_query_vgpu10(svga, sq, sizeof(SVGADXOcclusionQueryResult));
 688
 689          /**
 690           * In OpenGL, occlusion counter query can be used in conditional
 691           * rendering; however, in DX10, only OCCLUSION_PREDICATE query can
 692           * be used for predication. Hence, we need to create an occlusion
 693           * predicate query along with the occlusion counter query. So when
 694           * the occlusion counter query is used for predication, the associated
 695           * query of occlusion predicate type will be used
 696           * in the SetPredication command.
 697           */
 698          sq->predicate = svga_create_query(pipe, PIPE_QUERY_OCCLUSION_PREDICATE, index);
 699
 700       } else {
 701          define_query_vgpu9(svga, sq);
 702       }
 703       break;
 704    case PIPE_QUERY_OCCLUSION_PREDICATE:
 705       assert(svga_have_vgpu10(svga));
 706       sq->svga_type = SVGA3D_QUERYTYPE_OCCLUSIONPREDICATE;
 707       define_query_vgpu10(svga, sq, sizeof(SVGADXOcclusionPredicateQueryResult));
 708       break;
 709    case PIPE_QUERY_PRIMITIVES_GENERATED:
 710    case PIPE_QUERY_PRIMITIVES_EMITTED:
 711    case PIPE_QUERY_SO_STATISTICS:
 712       assert(svga_have_vgpu10(svga));
 713       sq->svga_type = SVGA3D_QUERYTYPE_STREAMOUTPUTSTATS;
 714       define_query_vgpu10(svga, sq,
 715                           sizeof(SVGADXStreamOutStatisticsQueryResult));
 716       break;
 717    case PIPE_QUERY_TIMESTAMP:
 718       assert(svga_have_vgpu10(svga));
 719       sq->svga_type = SVGA3D_QUERYTYPE_TIMESTAMP;
 720       define_query_vgpu10(svga, sq,
 721                           sizeof(SVGADXTimestampQueryResult));
 722       break;
 723    case SVGA_QUERY_NUM_DRAW_CALLS:
 724    case SVGA_QUERY_NUM_FALLBACKS:
 725    case SVGA_QUERY_NUM_FLUSHES:
 726    case SVGA_QUERY_MEMORY_USED:
 727    case SVGA_QUERY_NUM_SHADERS:
 728    case SVGA_QUERY_NUM_RESOURCES:
 729    case SVGA_QUERY_NUM_STATE_OBJECTS:
 730    case SVGA_QUERY_NUM_VALIDATIONS:
 731    case SVGA_QUERY_MAP_BUFFER_TIME:
 732    case SVGA_QUERY_NUM_SURFACE_VIEWS:
 733    case SVGA_QUERY_NUM_RESOURCES_MAPPED:
 734       break;
 735    default:
 736       assert(!"unexpected query type in svga_create_query()");
 737    }
 738
 739    sq->type = query_type;
 740
 741    return &sq->base;
 742
 743 fail:
 744    FREE(sq);
 745    return NULL;
 746 }
 747
 748 static void
 749 svga_destroy_query(struct pipe_context *pipe, struct pipe_query *q)
 750 {
 751    struct svga_context *svga = svga_context(pipe);
 752    struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
 753    struct svga_query *sq;
 754
 755    if (q == NULL) {
 756       return destroy_gb_query_obj(svga);
 757    }
 758
 759    sq = svga_query(q);
 760
 761    SVGA_DBG(DEBUG_QUERY, "%s sq=0x%x id=%d\n", __FUNCTION__,
 762             sq, sq->id);
 763
 764    switch (sq->type) {
 765    case PIPE_QUERY_OCCLUSION_COUNTER:
 766       if (svga_have_vgpu10(svga)) {
 767          /* make sure to also destroy any associated predicate query */
 768          if (sq->predicate)
 769             svga_destroy_query(pipe, sq->predicate);
 770          destroy_query_vgpu10(svga, sq);
 771       } else {
 772          sws->buffer_destroy(sws, sq->hwbuf);
 773       }
 774       sws->fence_reference(sws, &sq->fence, NULL);
 775       break;
 776    case PIPE_QUERY_OCCLUSION_PREDICATE:
 777       assert(svga_have_vgpu10(svga));
 778       destroy_query_vgpu10(svga, sq);
 779       sws->fence_reference(sws, &sq->fence, NULL);
 780       break;
 781    case PIPE_QUERY_PRIMITIVES_GENERATED:
 782    case PIPE_QUERY_PRIMITIVES_EMITTED:
 783    case PIPE_QUERY_SO_STATISTICS:
 784    case PIPE_QUERY_TIMESTAMP:
 785       assert(svga_have_vgpu10(svga));
 786       destroy_query_vgpu10(svga, sq);
 787       sws->fence_reference(sws, &sq->fence, NULL);
 788       break;
 789    case SVGA_QUERY_NUM_DRAW_CALLS:
 790    case SVGA_QUERY_NUM_FALLBACKS:
 791    case SVGA_QUERY_NUM_FLUSHES:
 792    case SVGA_QUERY_MEMORY_USED:
 793    case SVGA_QUERY_NUM_SHADERS:
 794    case SVGA_QUERY_NUM_RESOURCES:
 795    case SVGA_QUERY_NUM_STATE_OBJECTS:
 796    case SVGA_QUERY_NUM_VALIDATIONS:
 797    case SVGA_QUERY_MAP_BUFFER_TIME:
 798    case SVGA_QUERY_NUM_SURFACE_VIEWS:
 799    case SVGA_QUERY_NUM_RESOURCES_MAPPED:
 800       /* nothing */
 801       break;
 802    default:
 803       assert(!"svga: unexpected query type in svga_destroy_query()");
 804    }
 805
 806    /* Free the query id */
 807    util_bitmask_clear(svga->query_id_bm, sq->id);
 808
 809    FREE(sq);
 810 }
 811
 812
 813 static boolean
 814 svga_begin_query(struct pipe_context *pipe, struct pipe_query *q)
 815 {
 816    struct svga_context *svga = svga_context(pipe);
 817    struct svga_query *sq = svga_query(q);
 818    enum pipe_error ret;
 819
 820    assert(sq);
 821    assert(sq->type < SVGA_QUERY_MAX);
 822
 823    SVGA_DBG(DEBUG_QUERY, "%s sq=0x%x id=%d\n", __FUNCTION__,
 824             sq, sq->id);
 825
 826    /* Need to flush out buffered drawing commands so that they don't
 827     * get counted in the query results.
 828     */
 829    svga_hwtnl_flush_retry(svga);
 830
 831    switch (sq->type) {
 832    case PIPE_QUERY_OCCLUSION_COUNTER:
 833       if (svga_have_vgpu10(svga)) {
 834          ret = begin_query_vgpu10(svga, sq);
 835          /* also need to start the associated occlusion predicate query */
 836          if (sq->predicate) {
 837             enum pipe_error status;
 838             status = begin_query_vgpu10(svga, svga_query(sq->predicate));
 839             assert(status == PIPE_OK);
 840             (void) status;
 841          }
 842       } else {
 843          ret = begin_query_vgpu9(svga, sq);
 844       }
 845       assert(ret == PIPE_OK);
 846       (void) ret;
 847       break;
 848    case PIPE_QUERY_OCCLUSION_PREDICATE:
 849       assert(svga_have_vgpu10(svga));
 850       ret = begin_query_vgpu10(svga, sq);
 851       assert(ret == PIPE_OK);
 852       break;
 853    case PIPE_QUERY_PRIMITIVES_GENERATED:
 854    case PIPE_QUERY_PRIMITIVES_EMITTED:
 855    case PIPE_QUERY_SO_STATISTICS:
 856    case PIPE_QUERY_TIMESTAMP:
 857       assert(svga_have_vgpu10(svga));
 858       ret = begin_query_vgpu10(svga, sq);
 859       assert(ret == PIPE_OK);
 860       break;
 861    case SVGA_QUERY_NUM_DRAW_CALLS:
 862       sq->begin_count = svga->hud.num_draw_calls;
 863       break;
 864    case SVGA_QUERY_NUM_FALLBACKS:
 865       sq->begin_count = svga->hud.num_fallbacks;
 866       break;
 867    case SVGA_QUERY_NUM_FLUSHES:
 868       sq->begin_count = svga->hud.num_flushes;
 869       break;
 870    case SVGA_QUERY_NUM_VALIDATIONS:
 871       sq->begin_count = svga->hud.num_validations;
 872       break;
 873    case SVGA_QUERY_MAP_BUFFER_TIME:
 874       sq->begin_count = svga->hud.map_buffer_time;
 875       break;
 876    case SVGA_QUERY_NUM_RESOURCES_MAPPED:
 877       sq->begin_count = svga->hud.num_resources_mapped;
 878       break;
 879    case SVGA_QUERY_MEMORY_USED:
 880    case SVGA_QUERY_NUM_SHADERS:
 881    case SVGA_QUERY_NUM_RESOURCES:
 882    case SVGA_QUERY_NUM_STATE_OBJECTS:
 883    case SVGA_QUERY_NUM_SURFACE_VIEWS:
 884       /* nothing */
 885       break;
 886    default:
 887       assert(!"unexpected query type in svga_begin_query()");
 888    }
 889
 890    svga->sq[sq->type] = sq;
 891
 892    return true;
 893 }
 894
 895
 896 static void
 897 svga_end_query(struct pipe_context *pipe, struct pipe_query *q)
 898 {
 899    struct svga_context *svga = svga_context(pipe);
 900    struct svga_query *sq = svga_query(q);
 901    enum pipe_error ret;
 902
 903    assert(sq);
 904    assert(sq->type < SVGA_QUERY_MAX);
 905
 906    SVGA_DBG(DEBUG_QUERY, "%s sq=0x%x id=%d\n", __FUNCTION__,
 907             sq, sq->id);
 908
 909    if (sq->type == PIPE_QUERY_TIMESTAMP && svga->sq[sq->type] != sq)
 910       svga_begin_query(pipe, q);
 911
 912    svga_hwtnl_flush_retry(svga);
 913
 914    assert(svga->sq[sq->type] == sq);
 915
 916    switch (sq->type) {
 917    case PIPE_QUERY_OCCLUSION_COUNTER:
 918       if (svga_have_vgpu10(svga)) {
 919          ret = end_query_vgpu10(svga, sq);
 920          /* also need to end the associated occlusion predicate query */
 921          if (sq->predicate) {
 922             enum pipe_error status;
 923             status = end_query_vgpu10(svga, svga_query(sq->predicate));
 924             assert(status == PIPE_OK);
 925             (void) status;
 926          }
 927       } else {
 928          ret = end_query_vgpu9(svga, sq);
 929       }
 930       assert(ret == PIPE_OK);
 931       (void) ret;
 932       /* TODO: Delay flushing. We don't really need to flush here, just ensure
 933        * that there is one flush before svga_get_query_result attempts to get
 934        * the result.
 935        */
 936       svga_context_flush(svga, NULL);
 937       break;
 938    case PIPE_QUERY_OCCLUSION_PREDICATE:
 939       assert(svga_have_vgpu10(svga));
 940       ret = end_query_vgpu10(svga, sq);
 941       assert(ret == PIPE_OK);
 942       break;
 943    case PIPE_QUERY_PRIMITIVES_GENERATED:
 944    case PIPE_QUERY_PRIMITIVES_EMITTED:
 945    case PIPE_QUERY_SO_STATISTICS:
 946    case PIPE_QUERY_TIMESTAMP:
 947       assert(svga_have_vgpu10(svga));
 948       ret = end_query_vgpu10(svga, sq);
 949       assert(ret == PIPE_OK);
 950       break;
 951    case SVGA_QUERY_NUM_DRAW_CALLS:
 952       sq->end_count = svga->hud.num_draw_calls;
 953       break;
 954    case SVGA_QUERY_NUM_FALLBACKS:
 955       sq->end_count = svga->hud.num_fallbacks;
 956       break;
 957    case SVGA_QUERY_NUM_FLUSHES:
 958       sq->end_count = svga->hud.num_flushes;
 959       break;
 960    case SVGA_QUERY_NUM_VALIDATIONS:
 961       sq->end_count = svga->hud.num_validations;
 962       break;
 963    case SVGA_QUERY_MAP_BUFFER_TIME:
 964       sq->end_count = svga->hud.map_buffer_time;
 965       break;
 966    case SVGA_QUERY_NUM_RESOURCES_MAPPED:
 967       sq->end_count = svga->hud.num_resources_mapped;
 968       break;
 969    case SVGA_QUERY_MEMORY_USED:
 970    case SVGA_QUERY_NUM_SHADERS:
 971    case SVGA_QUERY_NUM_RESOURCES:
 972    case SVGA_QUERY_NUM_STATE_OBJECTS:
 973    case SVGA_QUERY_NUM_SURFACE_VIEWS:
 974       /* nothing */
 975       break;
 976    default:
 977       assert(!"unexpected query type in svga_end_query()");
 978    }
 979    svga->sq[sq->type] = NULL;
 980 }
 981
 982
 983 static boolean
 984 svga_get_query_result(struct pipe_context *pipe,
 985                       struct pipe_query *q,
 986                       boolean wait,
 987                       union pipe_query_result *vresult)
 988 {
 989    struct svga_screen *svgascreen = svga_screen(pipe->screen);
 990    struct svga_context *svga = svga_context(pipe);
 991    struct svga_query *sq = svga_query(q);
 992    uint64_t *result = (uint64_t *)vresult;
 993    boolean ret = TRUE;
 994
 995    assert(sq);
 996
 997    SVGA_DBG(DEBUG_QUERY, "%s sq=0x%x id=%d wait: %d\n",
 998             __FUNCTION__, sq, sq->id, wait);
 999
1000    switch (sq->type) {
1001    case PIPE_QUERY_OCCLUSION_COUNTER:
1002       if (svga_have_vgpu10(svga)) {
1003          SVGADXOcclusionQueryResult occResult;
1004          ret = get_query_result_vgpu10(svga, sq, wait,
1005                                        (void *)&occResult, sizeof(occResult));
1006          *result = (uint64_t)occResult.samplesRendered;
1007       } else {
1008          ret = get_query_result_vgpu9(svga, sq, wait, (uint64_t *)result);
1009       }
1010       break;
1011    case PIPE_QUERY_OCCLUSION_PREDICATE: {
1012       SVGADXOcclusionPredicateQueryResult occResult;
1013       assert(svga_have_vgpu10(svga));
1014       ret = get_query_result_vgpu10(svga, sq, wait,
1015                                     (void *)&occResult, sizeof(occResult));
1016       vresult->b = occResult.anySamplesRendered != 0;
1017       break;
1018    }
1019    case PIPE_QUERY_SO_STATISTICS: {
1020       SVGADXStreamOutStatisticsQueryResult sResult;
1021       struct pipe_query_data_so_statistics *pResult =
1022          (struct pipe_query_data_so_statistics *)vresult;
1023
1024       assert(svga_have_vgpu10(svga));
1025       ret = get_query_result_vgpu10(svga, sq, wait,
1026                                     (void *)&sResult, sizeof(sResult));
1027       pResult->num_primitives_written = sResult.numPrimitivesWritten;
1028       pResult->primitives_storage_needed = sResult.numPrimitivesRequired;
1029       break;
1030    }
1031    case PIPE_QUERY_TIMESTAMP: {
1032       SVGADXTimestampQueryResult sResult;
1033
1034       assert(svga_have_vgpu10(svga));
1035       ret = get_query_result_vgpu10(svga, sq, wait,
1036                                     (void *)&sResult, sizeof(sResult));
1037       *result = (uint64_t)sResult.timestamp;
1038       break;
1039    }
1040    case PIPE_QUERY_PRIMITIVES_GENERATED: {
1041       SVGADXStreamOutStatisticsQueryResult sResult;
1042
1043       assert(svga_have_vgpu10(svga));
1044       ret = get_query_result_vgpu10(svga, sq, wait,
1045                                     (void *)&sResult, sizeof sResult);
1046       *result = (uint64_t)sResult.numPrimitivesRequired;
1047       break;
1048    }
1049    case PIPE_QUERY_PRIMITIVES_EMITTED: {
1050       SVGADXStreamOutStatisticsQueryResult sResult;
1051
1052       assert(svga_have_vgpu10(svga));
1053       ret = get_query_result_vgpu10(svga, sq, wait,
1054                                     (void *)&sResult, sizeof sResult);
1055       *result = (uint64_t)sResult.numPrimitivesWritten;
1056       break;
1057    }
1058    /* These are per-frame counters */
1059    case SVGA_QUERY_NUM_DRAW_CALLS:
1060    case SVGA_QUERY_NUM_FALLBACKS:
1061    case SVGA_QUERY_NUM_FLUSHES:
1062    case SVGA_QUERY_NUM_VALIDATIONS:
1063    case SVGA_QUERY_NUM_RESOURCES_MAPPED:
1064    case SVGA_QUERY_MAP_BUFFER_TIME:
1065       vresult->u64 = sq->end_count - sq->begin_count;
1066       break;
1067    /* These are running total counters */
1068    case SVGA_QUERY_MEMORY_USED:
1069       vresult->u64 = svgascreen->hud.total_resource_bytes;
1070       break;
1071    case SVGA_QUERY_NUM_SHADERS:
1072       vresult->u64 = svga->hud.num_shaders;
1073       break;
1074    case SVGA_QUERY_NUM_RESOURCES:
1075       vresult->u64 = svgascreen->hud.num_resources;
1076       break;
1077    case SVGA_QUERY_NUM_STATE_OBJECTS:
1078       vresult->u64 = svga->hud.num_state_objects;
1079       break;
1080    case SVGA_QUERY_NUM_SURFACE_VIEWS:
1081       vresult->u64 = svga->hud.num_surface_views;
1082       break;
1083    default:
1084       assert(!"unexpected query type in svga_get_query_result");
1085    }
1086
1087    SVGA_DBG(DEBUG_QUERY, "%s result %d\n", __FUNCTION__, *((uint64_t *)vresult));
1088
1089    return ret;
1090 }
1091
1092 static void
1093 svga_render_condition(struct pipe_context *pipe, struct pipe_query *q,
1094                       boolean condition, uint mode)
1095 {
1096    struct svga_context *svga = svga_context(pipe);
1097    struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
1098    struct svga_query *sq = svga_query(q);
1099    SVGA3dQueryId queryId;
1100    enum pipe_error ret;
1101
1102    SVGA_DBG(DEBUG_QUERY, "%s\n", __FUNCTION__);
1103
1104    assert(svga_have_vgpu10(svga));
1105    if (sq == NULL) {
1106       queryId = SVGA3D_INVALID_ID;
1107    }
1108    else {
1109       assert(sq->svga_type == SVGA3D_QUERYTYPE_OCCLUSION ||
1110              sq->svga_type == SVGA3D_QUERYTYPE_OCCLUSIONPREDICATE);
1111
1112       if (sq->svga_type == SVGA3D_QUERYTYPE_OCCLUSION) {
1113          assert(sq->predicate);
1114          /**
1115           * For conditional rendering, make sure to use the associated
1116           * predicate query.
1117           */
1118          sq = svga_query(sq->predicate);
1119       }
1120       queryId = sq->id;
1121
1122       if ((mode == PIPE_RENDER_COND_WAIT ||
1123            mode == PIPE_RENDER_COND_BY_REGION_WAIT) && sq->fence) {
1124          sws->fence_finish(sws, sq->fence, SVGA_FENCE_FLAG_QUERY);
1125       }
1126    }
1127
1128    ret = SVGA3D_vgpu10_SetPredication(svga->swc, queryId,
1129                                       (uint32) condition);
1130    if (ret != PIPE_OK) {
1131       svga_context_flush(svga, NULL);
1132       ret = SVGA3D_vgpu10_SetPredication(svga->swc, queryId,
1133                                          (uint32) condition);
1134    }
1135 }
1136
1137
1138 /*
1139  * This function is a workaround because we lack the ability to query
1140  * renderer's time synchornously.
1141  */
1142 static uint64_t
1143 svga_get_timestamp(struct pipe_context *pipe)
1144 {
1145    struct pipe_query *q = svga_create_query(pipe, PIPE_QUERY_TIMESTAMP, 0);
1146    union pipe_query_result result;
1147
1148    svga_begin_query(pipe, q);
1149    svga_end_query(pipe,q);
1150    svga_get_query_result(pipe, q, TRUE, &result);
1151    svga_destroy_query(pipe, q);
1152
1153    return result.u64;
1154 }
1155
1156
1157 void
1158 svga_init_query_functions(struct svga_context *svga)
1159 {
1160    svga->pipe.create_query = svga_create_query;
1161    svga->pipe.destroy_query = svga_destroy_query;
1162    svga->pipe.begin_query = svga_begin_query;
1163    svga->pipe.end_query = svga_end_query;
1164    svga->pipe.get_query_result = svga_get_query_result;
1165    svga->pipe.render_condition = svga_render_condition;
1166    svga->pipe.get_timestamp = svga_get_timestamp;
1167 }