svga: add new num-readbacks HUD query
[mesa.git] / src / gallium / drivers / svga / svga_pipe_query.c
1 /**********************************************************
2 * Copyright 2008-2015 VMware, Inc. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person
5 * obtaining a copy of this software and associated documentation
6 * files (the "Software"), to deal in the Software without
7 * restriction, including without limitation the rights to use, copy,
8 * modify, merge, publish, distribute, sublicense, and/or sell copies
9 * of the Software, and to permit persons to whom the Software is
10 * furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be
13 * included in all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 *
24 **********************************************************/
25
26 #include "pipe/p_state.h"
27 #include "pipe/p_context.h"
28
29 #include "util/u_bitmask.h"
30 #include "util/u_memory.h"
31
32 #include "svga_cmd.h"
33 #include "svga_context.h"
34 #include "svga_screen.h"
35 #include "svga_resource_buffer.h"
36 #include "svga_winsys.h"
37 #include "svga_debug.h"
38
39
40 /* Fixme: want a public base class for all pipe structs, even if there
41 * isn't much in them.
42 */
43 struct pipe_query {
44 int dummy;
45 };
46
47 struct svga_query {
48 struct pipe_query base;
49 unsigned type; /**< PIPE_QUERY_x or SVGA_QUERY_x */
50 SVGA3dQueryType svga_type; /**< SVGA3D_QUERYTYPE_x or unused */
51
52 unsigned id; /** Per-context query identifier */
53
54 struct pipe_fence_handle *fence;
55
56 /** For PIPE_QUERY_OCCLUSION_COUNTER / SVGA3D_QUERYTYPE_OCCLUSION */
57
58 /* For VGPU9 */
59 struct svga_winsys_buffer *hwbuf;
60 volatile SVGA3dQueryResult *queryResult;
61
62 /** For VGPU10 */
63 struct svga_winsys_gb_query *gb_query;
64 SVGA3dDXQueryFlags flags;
65 unsigned offset; /**< offset to the gb_query memory */
66 struct pipe_query *predicate; /** The associated query that can be used for predicate */
67
68 /** For non-GPU SVGA_QUERY_x queries */
69 uint64_t begin_count, end_count;
70 };
71
72
73 /** cast wrapper */
74 static inline struct svga_query *
75 svga_query( struct pipe_query *q )
76 {
77 return (struct svga_query *)q;
78 }
79
80
81 static boolean
82 svga_get_query_result(struct pipe_context *pipe,
83 struct pipe_query *q,
84 boolean wait,
85 union pipe_query_result *result);
86
87 static enum pipe_error
88 define_query_vgpu9(struct svga_context *svga,
89 struct svga_query *sq)
90 {
91 struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
92
93 sq->hwbuf = svga_winsys_buffer_create(svga, 1,
94 SVGA_BUFFER_USAGE_PINNED,
95 sizeof *sq->queryResult);
96 if (!sq->hwbuf)
97 return PIPE_ERROR_OUT_OF_MEMORY;
98
99 sq->queryResult = (SVGA3dQueryResult *)
100 sws->buffer_map(sws, sq->hwbuf, PIPE_TRANSFER_WRITE);
101 if (!sq->queryResult) {
102 sws->buffer_destroy(sws, sq->hwbuf);
103 return PIPE_ERROR_OUT_OF_MEMORY;
104 }
105
106 sq->queryResult->totalSize = sizeof *sq->queryResult;
107 sq->queryResult->state = SVGA3D_QUERYSTATE_NEW;
108
109 /* We request the buffer to be pinned and assume it is always mapped.
110 * The reason is that we don't want to wait for fences when checking the
111 * query status.
112 */
113 sws->buffer_unmap(sws, sq->hwbuf);
114
115 return PIPE_OK;
116 }
117
118 static enum pipe_error
119 begin_query_vgpu9(struct svga_context *svga, struct svga_query *sq)
120 {
121 struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
122 enum pipe_error ret = PIPE_OK;
123
124 if (sq->queryResult->state == SVGA3D_QUERYSTATE_PENDING) {
125 /* The application doesn't care for the pending query result.
126 * We cannot let go of the existing buffer and just get a new one
127 * because its storage may be reused for other purposes and clobbered
128 * by the host when it determines the query result. So the only
129 * option here is to wait for the existing query's result -- not a
130 * big deal, given that no sane application would do this.
131 */
132 uint64_t result;
133 svga_get_query_result(&svga->pipe, &sq->base, TRUE, (void*)&result);
134 assert(sq->queryResult->state != SVGA3D_QUERYSTATE_PENDING);
135 }
136
137 sq->queryResult->state = SVGA3D_QUERYSTATE_NEW;
138 sws->fence_reference(sws, &sq->fence, NULL);
139
140 ret = SVGA3D_BeginQuery(svga->swc, sq->svga_type);
141 if (ret != PIPE_OK) {
142 svga_context_flush(svga, NULL);
143 ret = SVGA3D_BeginQuery(svga->swc, sq->svga_type);
144 }
145 return ret;
146 }
147
148 static enum pipe_error
149 end_query_vgpu9(struct svga_context *svga, struct svga_query *sq)
150 {
151 enum pipe_error ret = PIPE_OK;
152
153 /* Set to PENDING before sending EndQuery. */
154 sq->queryResult->state = SVGA3D_QUERYSTATE_PENDING;
155
156 ret = SVGA3D_EndQuery(svga->swc, sq->svga_type, sq->hwbuf);
157 if (ret != PIPE_OK) {
158 svga_context_flush(svga, NULL);
159 ret = SVGA3D_EndQuery(svga->swc, sq->svga_type, sq->hwbuf);
160 }
161 return ret;
162 }
163
164 static boolean
165 get_query_result_vgpu9(struct svga_context *svga, struct svga_query *sq,
166 boolean wait, uint64_t *result)
167 {
168 struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
169 enum pipe_error ret;
170 SVGA3dQueryState state;
171
172 if (!sq->fence) {
173 /* The query status won't be updated by the host unless
174 * SVGA_3D_CMD_WAIT_FOR_QUERY is emitted. Unfortunately this will cause
175 * a synchronous wait on the host.
176 */
177 ret = SVGA3D_WaitForQuery(svga->swc, sq->svga_type, sq->hwbuf);
178 if (ret != PIPE_OK) {
179 svga_context_flush(svga, NULL);
180 ret = SVGA3D_WaitForQuery(svga->swc, sq->svga_type, sq->hwbuf);
181 }
182 assert (ret == PIPE_OK);
183 svga_context_flush(svga, &sq->fence);
184 assert(sq->fence);
185 }
186
187 state = sq->queryResult->state;
188 if (state == SVGA3D_QUERYSTATE_PENDING) {
189 if (!wait)
190 return FALSE;
191 sws->fence_finish(sws, sq->fence, SVGA_FENCE_FLAG_QUERY);
192 state = sq->queryResult->state;
193 }
194
195 assert(state == SVGA3D_QUERYSTATE_SUCCEEDED ||
196 state == SVGA3D_QUERYSTATE_FAILED);
197
198 *result = (uint64_t)sq->queryResult->result32;
199 return TRUE;
200 }
201
202
203 /**
204 * VGPU10
205 *
206 * There is one query mob allocated for each context to be shared by all
207 * query types. The mob is used to hold queries's state and result. Since
208 * each query result type is of different length, to ease the query allocation
209 * management, the mob is divided into memory blocks. Each memory block
210 * will hold queries of the same type. Multiple memory blocks can be allocated
211 * for a particular query type.
212 *
213 * Currently each memory block is of 184 bytes. We support up to 128
214 * memory blocks. The query memory size is arbitrary right now.
215 * Each occlusion query takes about 8 bytes. One memory block can accomodate
216 * 23 occlusion queries. 128 of those blocks can support up to 2944 occlusion
217 * queries. That seems reasonable for now. If we think this limit is
218 * not enough, we can increase the limit or try to grow the mob in runtime.
219 * Note, SVGA device does not impose one mob per context for queries,
220 * we could allocate multiple mobs for queries; however, wddm KMD does not
221 * currently support that.
222 *
223 * Also note that the GL guest driver does not issue any of the
224 * following commands: DXMoveQuery, DXBindAllQuery & DXReadbackAllQuery.
225 */
226 #define SVGA_QUERY_MEM_BLOCK_SIZE (sizeof(SVGADXQueryResultUnion) * 2)
227 #define SVGA_QUERY_MEM_SIZE (128 * SVGA_QUERY_MEM_BLOCK_SIZE)
228
229 struct svga_qmem_alloc_entry
230 {
231 unsigned start_offset; /* start offset of the memory block */
232 unsigned block_index; /* block index of the memory block */
233 unsigned query_size; /* query size in this memory block */
234 unsigned nquery; /* number of queries allocated */
235 struct util_bitmask *alloc_mask; /* allocation mask */
236 struct svga_qmem_alloc_entry *next; /* next memory block */
237 };
238
239
240 /**
241 * Allocate a memory block from the query object memory
242 * \return -1 if out of memory, else index of the query memory block
243 */
244 static int
245 allocate_query_block(struct svga_context *svga)
246 {
247 int index;
248 unsigned offset;
249
250 /* Find the next available query block */
251 index = util_bitmask_add(svga->gb_query_alloc_mask);
252
253 if (index == UTIL_BITMASK_INVALID_INDEX)
254 return -1;
255
256 offset = index * SVGA_QUERY_MEM_BLOCK_SIZE;
257 if (offset >= svga->gb_query_len) {
258 unsigned i;
259
260 /**
261 * All the memory blocks are allocated, lets see if there is
262 * any empty memory block around that can be freed up.
263 */
264 index = -1;
265 for (i = 0; i < SVGA_QUERY_MAX && index == -1; i++) {
266 struct svga_qmem_alloc_entry *alloc_entry;
267 struct svga_qmem_alloc_entry *prev_alloc_entry = NULL;
268
269 alloc_entry = svga->gb_query_map[i];
270 while (alloc_entry && index == -1) {
271 if (alloc_entry->nquery == 0) {
272 /* This memory block is empty, it can be recycled. */
273 if (prev_alloc_entry) {
274 prev_alloc_entry->next = alloc_entry->next;
275 } else {
276 svga->gb_query_map[i] = alloc_entry->next;
277 }
278 index = alloc_entry->block_index;
279 } else {
280 prev_alloc_entry = alloc_entry;
281 alloc_entry = alloc_entry->next;
282 }
283 }
284 }
285 }
286
287 return index;
288 }
289
290 /**
291 * Allocate a slot in the specified memory block.
292 * All slots in this memory block are of the same size.
293 *
294 * \return -1 if out of memory, else index of the query slot
295 */
296 static int
297 allocate_query_slot(struct svga_context *svga,
298 struct svga_qmem_alloc_entry *alloc)
299 {
300 int index;
301 unsigned offset;
302
303 /* Find the next available slot */
304 index = util_bitmask_add(alloc->alloc_mask);
305
306 if (index == UTIL_BITMASK_INVALID_INDEX)
307 return -1;
308
309 offset = index * alloc->query_size;
310 if (offset >= SVGA_QUERY_MEM_BLOCK_SIZE)
311 return -1;
312
313 alloc->nquery++;
314
315 return index;
316 }
317
318 /**
319 * Deallocate the specified slot in the memory block.
320 * If all slots are freed up, then deallocate the memory block
321 * as well, so it can be allocated for other query type
322 */
323 static void
324 deallocate_query_slot(struct svga_context *svga,
325 struct svga_qmem_alloc_entry *alloc,
326 unsigned index)
327 {
328 assert(index != UTIL_BITMASK_INVALID_INDEX);
329
330 util_bitmask_clear(alloc->alloc_mask, index);
331 alloc->nquery--;
332
333 /**
334 * Don't worry about deallocating the empty memory block here.
335 * The empty memory block will be recycled when no more memory block
336 * can be allocated.
337 */
338 }
339
340 static struct svga_qmem_alloc_entry *
341 allocate_query_block_entry(struct svga_context *svga,
342 unsigned len)
343 {
344 struct svga_qmem_alloc_entry *alloc_entry;
345 int block_index = -1;
346
347 block_index = allocate_query_block(svga);
348 if (block_index == -1)
349 return NULL;
350 alloc_entry = CALLOC_STRUCT(svga_qmem_alloc_entry);
351 if (!alloc_entry)
352 return NULL;
353
354 alloc_entry->block_index = block_index;
355 alloc_entry->start_offset = block_index * SVGA_QUERY_MEM_BLOCK_SIZE;
356 alloc_entry->nquery = 0;
357 alloc_entry->alloc_mask = util_bitmask_create();
358 alloc_entry->next = NULL;
359 alloc_entry->query_size = len;
360
361 return alloc_entry;
362 }
363
364 /**
365 * Allocate a memory slot for a query of the specified type.
366 * It will first search through the memory blocks that are allocated
367 * for the query type. If no memory slot is available, it will try
368 * to allocate another memory block within the query object memory for
369 * this query type.
370 */
371 static int
372 allocate_query(struct svga_context *svga,
373 SVGA3dQueryType type,
374 unsigned len)
375 {
376 struct svga_qmem_alloc_entry *alloc_entry;
377 int slot_index = -1;
378 unsigned offset;
379
380 assert(type < SVGA_QUERY_MAX);
381
382 alloc_entry = svga->gb_query_map[type];
383
384 if (!alloc_entry) {
385 /**
386 * No query memory block has been allocated for this query type,
387 * allocate one now
388 */
389 alloc_entry = allocate_query_block_entry(svga, len);
390 if (!alloc_entry)
391 return -1;
392 svga->gb_query_map[type] = alloc_entry;
393 }
394
395 /* Allocate a slot within the memory block allocated for this query type */
396 slot_index = allocate_query_slot(svga, alloc_entry);
397
398 if (slot_index == -1) {
399 /* This query memory block is full, allocate another one */
400 alloc_entry = allocate_query_block_entry(svga, len);
401 if (!alloc_entry)
402 return -1;
403 alloc_entry->next = svga->gb_query_map[type];
404 svga->gb_query_map[type] = alloc_entry;
405 slot_index = allocate_query_slot(svga, alloc_entry);
406 }
407
408 assert(slot_index != -1);
409 offset = slot_index * len + alloc_entry->start_offset;
410
411 return offset;
412 }
413
414
415 /**
416 * Deallocate memory slot allocated for the specified query
417 */
418 static void
419 deallocate_query(struct svga_context *svga,
420 struct svga_query *sq)
421 {
422 struct svga_qmem_alloc_entry *alloc_entry;
423 unsigned slot_index;
424 unsigned offset = sq->offset;
425
426 alloc_entry = svga->gb_query_map[sq->svga_type];
427
428 while (alloc_entry) {
429 if (offset >= alloc_entry->start_offset &&
430 offset < alloc_entry->start_offset + SVGA_QUERY_MEM_BLOCK_SIZE) {
431
432 /* The slot belongs to this memory block, deallocate it */
433 slot_index = (offset - alloc_entry->start_offset) /
434 alloc_entry->query_size;
435 deallocate_query_slot(svga, alloc_entry, slot_index);
436 alloc_entry = NULL;
437 } else {
438 alloc_entry = alloc_entry->next;
439 }
440 }
441 }
442
443
444 /**
445 * Destroy the gb query object and all the related query structures
446 */
447 static void
448 destroy_gb_query_obj(struct svga_context *svga)
449 {
450 struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
451 unsigned i;
452
453 for (i = 0; i < SVGA_QUERY_MAX; i++) {
454 struct svga_qmem_alloc_entry *alloc_entry, *next;
455 alloc_entry = svga->gb_query_map[i];
456 while (alloc_entry) {
457 next = alloc_entry->next;
458 util_bitmask_destroy(alloc_entry->alloc_mask);
459 FREE(alloc_entry);
460 alloc_entry = next;
461 }
462 svga->gb_query_map[i] = NULL;
463 }
464
465 if (svga->gb_query)
466 sws->query_destroy(sws, svga->gb_query);
467 svga->gb_query = NULL;
468
469 util_bitmask_destroy(svga->gb_query_alloc_mask);
470 }
471
472 /**
473 * Define query and create the gb query object if it is not already created.
474 * There is only one gb query object per context which will be shared by
475 * queries of all types.
476 */
477 static enum pipe_error
478 define_query_vgpu10(struct svga_context *svga,
479 struct svga_query *sq, int resultLen)
480 {
481 struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
482 int qlen;
483 enum pipe_error ret = PIPE_OK;
484
485 SVGA_DBG(DEBUG_QUERY, "%s\n", __FUNCTION__);
486
487 if (svga->gb_query == NULL) {
488 /* Create a gb query object */
489 svga->gb_query = sws->query_create(sws, SVGA_QUERY_MEM_SIZE);
490 if (!svga->gb_query)
491 return PIPE_ERROR_OUT_OF_MEMORY;
492 svga->gb_query_len = SVGA_QUERY_MEM_SIZE;
493 memset (svga->gb_query_map, 0, sizeof(svga->gb_query_map));
494 svga->gb_query_alloc_mask = util_bitmask_create();
495
496 /* Bind the query object to the context */
497 if (svga->swc->query_bind(svga->swc, svga->gb_query,
498 SVGA_QUERY_FLAG_SET) != PIPE_OK) {
499 svga_context_flush(svga, NULL);
500 svga->swc->query_bind(svga->swc, svga->gb_query,
501 SVGA_QUERY_FLAG_SET);
502 }
503 }
504
505 sq->gb_query = svga->gb_query;
506
507 /* Allocate an integer ID for this query */
508 sq->id = util_bitmask_add(svga->query_id_bm);
509 if (sq->id == UTIL_BITMASK_INVALID_INDEX)
510 return PIPE_ERROR_OUT_OF_MEMORY;
511
512 /* Find a slot for this query in the gb object */
513 qlen = resultLen + sizeof(SVGA3dQueryState);
514 sq->offset = allocate_query(svga, sq->svga_type, qlen);
515 if (sq->offset == -1)
516 return PIPE_ERROR_OUT_OF_MEMORY;
517
518 SVGA_DBG(DEBUG_QUERY, " query type=%d qid=0x%x offset=%d\n",
519 sq->svga_type, sq->id, sq->offset);
520
521 /**
522 * Send SVGA3D commands to define the query
523 */
524 ret = SVGA3D_vgpu10_DefineQuery(svga->swc, sq->id, sq->svga_type, sq->flags);
525 if (ret != PIPE_OK) {
526 svga_context_flush(svga, NULL);
527 ret = SVGA3D_vgpu10_DefineQuery(svga->swc, sq->id, sq->svga_type, sq->flags);
528 }
529 if (ret != PIPE_OK)
530 return PIPE_ERROR_OUT_OF_MEMORY;
531
532 ret = SVGA3D_vgpu10_BindQuery(svga->swc, sq->gb_query, sq->id);
533 if (ret != PIPE_OK) {
534 svga_context_flush(svga, NULL);
535 ret = SVGA3D_vgpu10_BindQuery(svga->swc, sq->gb_query, sq->id);
536 }
537 assert(ret == PIPE_OK);
538
539 ret = SVGA3D_vgpu10_SetQueryOffset(svga->swc, sq->id, sq->offset);
540 if (ret != PIPE_OK) {
541 svga_context_flush(svga, NULL);
542 ret = SVGA3D_vgpu10_SetQueryOffset(svga->swc, sq->id, sq->offset);
543 }
544 assert(ret == PIPE_OK);
545
546 return PIPE_OK;
547 }
548
549 static enum pipe_error
550 destroy_query_vgpu10(struct svga_context *svga, struct svga_query *sq)
551 {
552 enum pipe_error ret;
553
554 ret = SVGA3D_vgpu10_DestroyQuery(svga->swc, sq->id);
555
556 /* Deallocate the memory slot allocated for this query */
557 deallocate_query(svga, sq);
558
559 return ret;
560 }
561
562
563 /**
564 * Rebind queryies to the context.
565 */
566 static void
567 rebind_vgpu10_query(struct svga_context *svga)
568 {
569 if (svga->swc->query_bind(svga->swc, svga->gb_query,
570 SVGA_QUERY_FLAG_REF) != PIPE_OK) {
571 svga_context_flush(svga, NULL);
572 svga->swc->query_bind(svga->swc, svga->gb_query,
573 SVGA_QUERY_FLAG_REF);
574 }
575
576 svga->rebind.flags.query = FALSE;
577 }
578
579
580 static enum pipe_error
581 begin_query_vgpu10(struct svga_context *svga, struct svga_query *sq)
582 {
583 struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
584 enum pipe_error ret = PIPE_OK;
585 int status = 0;
586
587 sws->fence_reference(sws, &sq->fence, NULL);
588
589 /* Initialize the query state to NEW */
590 status = sws->query_init(sws, sq->gb_query, sq->offset, SVGA3D_QUERYSTATE_NEW);
591 if (status)
592 return PIPE_ERROR;
593
594 if (svga->rebind.flags.query) {
595 rebind_vgpu10_query(svga);
596 }
597
598 /* Send the BeginQuery command to the device */
599 ret = SVGA3D_vgpu10_BeginQuery(svga->swc, sq->id);
600 if (ret != PIPE_OK) {
601 svga_context_flush(svga, NULL);
602 ret = SVGA3D_vgpu10_BeginQuery(svga->swc, sq->id);
603 }
604 return ret;
605 }
606
607 static enum pipe_error
608 end_query_vgpu10(struct svga_context *svga, struct svga_query *sq)
609 {
610 struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
611 enum pipe_error ret = PIPE_OK;
612
613 if (svga->rebind.flags.query) {
614 rebind_vgpu10_query(svga);
615 }
616
617 ret = SVGA3D_vgpu10_EndQuery(svga->swc, sq->id);
618 if (ret != PIPE_OK) {
619 svga_context_flush(svga, NULL);
620 ret = SVGA3D_vgpu10_EndQuery(svga->swc, sq->id);
621 }
622
623 /* Finish fence is copied here from get_query_result_vgpu10. This helps
624 * with cases where svga_begin_query might be called again before
625 * svga_get_query_result, such as GL_TIME_ELAPSED.
626 */
627 if (!sq->fence) {
628 svga_context_flush(svga, &sq->fence);
629 }
630 sws->fence_finish(sws, sq->fence, SVGA_FENCE_FLAG_QUERY);
631
632 return ret;
633 }
634
635 static boolean
636 get_query_result_vgpu10(struct svga_context *svga, struct svga_query *sq,
637 boolean wait, void *result, int resultLen)
638 {
639 struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
640 SVGA3dQueryState queryState;
641
642 if (svga->rebind.flags.query) {
643 rebind_vgpu10_query(svga);
644 }
645
646 sws->query_get_result(sws, sq->gb_query, sq->offset, &queryState, result, resultLen);
647
648 if (queryState == SVGA3D_QUERYSTATE_PENDING) {
649 if (!wait)
650 return FALSE;
651 sws->fence_finish(sws, sq->fence, SVGA_FENCE_FLAG_QUERY);
652 sws->query_get_result(sws, sq->gb_query, sq->offset, &queryState, result, resultLen);
653 }
654
655 assert(queryState == SVGA3D_QUERYSTATE_SUCCEEDED ||
656 queryState == SVGA3D_QUERYSTATE_FAILED);
657
658 return TRUE;
659 }
660
661 static struct pipe_query *
662 svga_create_query(struct pipe_context *pipe,
663 unsigned query_type,
664 unsigned index)
665 {
666 struct svga_context *svga = svga_context(pipe);
667 struct svga_query *sq;
668
669 assert(query_type < SVGA_QUERY_MAX);
670
671 sq = CALLOC_STRUCT(svga_query);
672 if (!sq)
673 goto fail;
674
675 /* Allocate an integer ID for the query */
676 sq->id = util_bitmask_add(svga->query_id_bm);
677 if (sq->id == UTIL_BITMASK_INVALID_INDEX)
678 goto fail;
679
680 SVGA_DBG(DEBUG_QUERY, "%s type=%d sq=0x%x id=%d\n", __FUNCTION__,
681 query_type, sq, sq->id);
682
683 switch (query_type) {
684 case PIPE_QUERY_OCCLUSION_COUNTER:
685 sq->svga_type = SVGA3D_QUERYTYPE_OCCLUSION;
686 if (svga_have_vgpu10(svga)) {
687 define_query_vgpu10(svga, sq, sizeof(SVGADXOcclusionQueryResult));
688
689 /**
690 * In OpenGL, occlusion counter query can be used in conditional
691 * rendering; however, in DX10, only OCCLUSION_PREDICATE query can
692 * be used for predication. Hence, we need to create an occlusion
693 * predicate query along with the occlusion counter query. So when
694 * the occlusion counter query is used for predication, the associated
695 * query of occlusion predicate type will be used
696 * in the SetPredication command.
697 */
698 sq->predicate = svga_create_query(pipe, PIPE_QUERY_OCCLUSION_PREDICATE, index);
699
700 } else {
701 define_query_vgpu9(svga, sq);
702 }
703 break;
704 case PIPE_QUERY_OCCLUSION_PREDICATE:
705 assert(svga_have_vgpu10(svga));
706 sq->svga_type = SVGA3D_QUERYTYPE_OCCLUSIONPREDICATE;
707 define_query_vgpu10(svga, sq, sizeof(SVGADXOcclusionPredicateQueryResult));
708 break;
709 case PIPE_QUERY_PRIMITIVES_GENERATED:
710 case PIPE_QUERY_PRIMITIVES_EMITTED:
711 case PIPE_QUERY_SO_STATISTICS:
712 assert(svga_have_vgpu10(svga));
713 sq->svga_type = SVGA3D_QUERYTYPE_STREAMOUTPUTSTATS;
714 define_query_vgpu10(svga, sq,
715 sizeof(SVGADXStreamOutStatisticsQueryResult));
716 break;
717 case PIPE_QUERY_TIMESTAMP:
718 assert(svga_have_vgpu10(svga));
719 sq->svga_type = SVGA3D_QUERYTYPE_TIMESTAMP;
720 define_query_vgpu10(svga, sq,
721 sizeof(SVGADXTimestampQueryResult));
722 break;
723 case SVGA_QUERY_NUM_DRAW_CALLS:
724 case SVGA_QUERY_NUM_FALLBACKS:
725 case SVGA_QUERY_NUM_FLUSHES:
726 case SVGA_QUERY_NUM_VALIDATIONS:
727 case SVGA_QUERY_MAP_BUFFER_TIME:
728 case SVGA_QUERY_NUM_RESOURCES_MAPPED:
729 case SVGA_QUERY_NUM_BYTES_UPLOADED:
730 case SVGA_QUERY_COMMAND_BUFFER_SIZE:
731 case SVGA_QUERY_FLUSH_TIME:
732 case SVGA_QUERY_SURFACE_WRITE_FLUSHES:
733 case SVGA_QUERY_MEMORY_USED:
734 case SVGA_QUERY_NUM_SHADERS:
735 case SVGA_QUERY_NUM_RESOURCES:
736 case SVGA_QUERY_NUM_STATE_OBJECTS:
737 case SVGA_QUERY_NUM_SURFACE_VIEWS:
738 case SVGA_QUERY_NUM_GENERATE_MIPMAP:
739 case SVGA_QUERY_NUM_READBACKS:
740 break;
741 default:
742 assert(!"unexpected query type in svga_create_query()");
743 }
744
745 sq->type = query_type;
746
747 return &sq->base;
748
749 fail:
750 FREE(sq);
751 return NULL;
752 }
753
754 static void
755 svga_destroy_query(struct pipe_context *pipe, struct pipe_query *q)
756 {
757 struct svga_context *svga = svga_context(pipe);
758 struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
759 struct svga_query *sq;
760
761 if (!q) {
762 destroy_gb_query_obj(svga);
763 return;
764 }
765
766 sq = svga_query(q);
767
768 SVGA_DBG(DEBUG_QUERY, "%s sq=0x%x id=%d\n", __FUNCTION__,
769 sq, sq->id);
770
771 switch (sq->type) {
772 case PIPE_QUERY_OCCLUSION_COUNTER:
773 if (svga_have_vgpu10(svga)) {
774 /* make sure to also destroy any associated predicate query */
775 if (sq->predicate)
776 svga_destroy_query(pipe, sq->predicate);
777 destroy_query_vgpu10(svga, sq);
778 } else {
779 sws->buffer_destroy(sws, sq->hwbuf);
780 }
781 sws->fence_reference(sws, &sq->fence, NULL);
782 break;
783 case PIPE_QUERY_OCCLUSION_PREDICATE:
784 assert(svga_have_vgpu10(svga));
785 destroy_query_vgpu10(svga, sq);
786 sws->fence_reference(sws, &sq->fence, NULL);
787 break;
788 case PIPE_QUERY_PRIMITIVES_GENERATED:
789 case PIPE_QUERY_PRIMITIVES_EMITTED:
790 case PIPE_QUERY_SO_STATISTICS:
791 case PIPE_QUERY_TIMESTAMP:
792 assert(svga_have_vgpu10(svga));
793 destroy_query_vgpu10(svga, sq);
794 sws->fence_reference(sws, &sq->fence, NULL);
795 break;
796 case SVGA_QUERY_NUM_DRAW_CALLS:
797 case SVGA_QUERY_NUM_FALLBACKS:
798 case SVGA_QUERY_NUM_FLUSHES:
799 case SVGA_QUERY_NUM_VALIDATIONS:
800 case SVGA_QUERY_MAP_BUFFER_TIME:
801 case SVGA_QUERY_NUM_RESOURCES_MAPPED:
802 case SVGA_QUERY_NUM_BYTES_UPLOADED:
803 case SVGA_QUERY_COMMAND_BUFFER_SIZE:
804 case SVGA_QUERY_FLUSH_TIME:
805 case SVGA_QUERY_SURFACE_WRITE_FLUSHES:
806 case SVGA_QUERY_MEMORY_USED:
807 case SVGA_QUERY_NUM_SHADERS:
808 case SVGA_QUERY_NUM_RESOURCES:
809 case SVGA_QUERY_NUM_STATE_OBJECTS:
810 case SVGA_QUERY_NUM_SURFACE_VIEWS:
811 case SVGA_QUERY_NUM_GENERATE_MIPMAP:
812 case SVGA_QUERY_NUM_READBACKS:
813 /* nothing */
814 break;
815 default:
816 assert(!"svga: unexpected query type in svga_destroy_query()");
817 }
818
819 /* Free the query id */
820 util_bitmask_clear(svga->query_id_bm, sq->id);
821
822 FREE(sq);
823 }
824
825
826 static boolean
827 svga_begin_query(struct pipe_context *pipe, struct pipe_query *q)
828 {
829 struct svga_context *svga = svga_context(pipe);
830 struct svga_query *sq = svga_query(q);
831 enum pipe_error ret;
832
833 assert(sq);
834 assert(sq->type < SVGA_QUERY_MAX);
835
836 SVGA_DBG(DEBUG_QUERY, "%s sq=0x%x id=%d\n", __FUNCTION__,
837 sq, sq->id);
838
839 /* Need to flush out buffered drawing commands so that they don't
840 * get counted in the query results.
841 */
842 svga_hwtnl_flush_retry(svga);
843
844 switch (sq->type) {
845 case PIPE_QUERY_OCCLUSION_COUNTER:
846 if (svga_have_vgpu10(svga)) {
847 ret = begin_query_vgpu10(svga, sq);
848 /* also need to start the associated occlusion predicate query */
849 if (sq->predicate) {
850 enum pipe_error status;
851 status = begin_query_vgpu10(svga, svga_query(sq->predicate));
852 assert(status == PIPE_OK);
853 (void) status;
854 }
855 } else {
856 ret = begin_query_vgpu9(svga, sq);
857 }
858 assert(ret == PIPE_OK);
859 (void) ret;
860 break;
861 case PIPE_QUERY_OCCLUSION_PREDICATE:
862 assert(svga_have_vgpu10(svga));
863 ret = begin_query_vgpu10(svga, sq);
864 assert(ret == PIPE_OK);
865 break;
866 case PIPE_QUERY_PRIMITIVES_GENERATED:
867 case PIPE_QUERY_PRIMITIVES_EMITTED:
868 case PIPE_QUERY_SO_STATISTICS:
869 case PIPE_QUERY_TIMESTAMP:
870 assert(svga_have_vgpu10(svga));
871 ret = begin_query_vgpu10(svga, sq);
872 assert(ret == PIPE_OK);
873 break;
874 case SVGA_QUERY_NUM_DRAW_CALLS:
875 sq->begin_count = svga->hud.num_draw_calls;
876 break;
877 case SVGA_QUERY_NUM_FALLBACKS:
878 sq->begin_count = svga->hud.num_fallbacks;
879 break;
880 case SVGA_QUERY_NUM_FLUSHES:
881 sq->begin_count = svga->hud.num_flushes;
882 break;
883 case SVGA_QUERY_NUM_VALIDATIONS:
884 sq->begin_count = svga->hud.num_validations;
885 break;
886 case SVGA_QUERY_MAP_BUFFER_TIME:
887 sq->begin_count = svga->hud.map_buffer_time;
888 break;
889 case SVGA_QUERY_NUM_RESOURCES_MAPPED:
890 sq->begin_count = svga->hud.num_resources_mapped;
891 break;
892 case SVGA_QUERY_NUM_BYTES_UPLOADED:
893 sq->begin_count = svga->hud.num_bytes_uploaded;
894 break;
895 case SVGA_QUERY_COMMAND_BUFFER_SIZE:
896 sq->begin_count = svga->hud.command_buffer_size;
897 break;
898 case SVGA_QUERY_FLUSH_TIME:
899 sq->begin_count = svga->hud.flush_time;
900 break;
901 case SVGA_QUERY_SURFACE_WRITE_FLUSHES:
902 sq->begin_count = svga->hud.surface_write_flushes;
903 break;
904 case SVGA_QUERY_NUM_READBACKS:
905 sq->begin_count = svga->hud.num_readbacks;
906 break;
907 case SVGA_QUERY_MEMORY_USED:
908 case SVGA_QUERY_NUM_SHADERS:
909 case SVGA_QUERY_NUM_RESOURCES:
910 case SVGA_QUERY_NUM_STATE_OBJECTS:
911 case SVGA_QUERY_NUM_SURFACE_VIEWS:
912 case SVGA_QUERY_NUM_GENERATE_MIPMAP:
913 /* nothing */
914 break;
915 default:
916 assert(!"unexpected query type in svga_begin_query()");
917 }
918
919 svga->sq[sq->type] = sq;
920
921 return true;
922 }
923
924
925 static void
926 svga_end_query(struct pipe_context *pipe, struct pipe_query *q)
927 {
928 struct svga_context *svga = svga_context(pipe);
929 struct svga_query *sq = svga_query(q);
930 enum pipe_error ret;
931
932 assert(sq);
933 assert(sq->type < SVGA_QUERY_MAX);
934
935 SVGA_DBG(DEBUG_QUERY, "%s sq=0x%x id=%d\n", __FUNCTION__,
936 sq, sq->id);
937
938 if (sq->type == PIPE_QUERY_TIMESTAMP && svga->sq[sq->type] != sq)
939 svga_begin_query(pipe, q);
940
941 svga_hwtnl_flush_retry(svga);
942
943 assert(svga->sq[sq->type] == sq);
944
945 switch (sq->type) {
946 case PIPE_QUERY_OCCLUSION_COUNTER:
947 if (svga_have_vgpu10(svga)) {
948 ret = end_query_vgpu10(svga, sq);
949 /* also need to end the associated occlusion predicate query */
950 if (sq->predicate) {
951 enum pipe_error status;
952 status = end_query_vgpu10(svga, svga_query(sq->predicate));
953 assert(status == PIPE_OK);
954 (void) status;
955 }
956 } else {
957 ret = end_query_vgpu9(svga, sq);
958 }
959 assert(ret == PIPE_OK);
960 (void) ret;
961 /* TODO: Delay flushing. We don't really need to flush here, just ensure
962 * that there is one flush before svga_get_query_result attempts to get
963 * the result.
964 */
965 svga_context_flush(svga, NULL);
966 break;
967 case PIPE_QUERY_OCCLUSION_PREDICATE:
968 assert(svga_have_vgpu10(svga));
969 ret = end_query_vgpu10(svga, sq);
970 assert(ret == PIPE_OK);
971 break;
972 case PIPE_QUERY_PRIMITIVES_GENERATED:
973 case PIPE_QUERY_PRIMITIVES_EMITTED:
974 case PIPE_QUERY_SO_STATISTICS:
975 case PIPE_QUERY_TIMESTAMP:
976 assert(svga_have_vgpu10(svga));
977 ret = end_query_vgpu10(svga, sq);
978 assert(ret == PIPE_OK);
979 break;
980 case SVGA_QUERY_NUM_DRAW_CALLS:
981 sq->end_count = svga->hud.num_draw_calls;
982 break;
983 case SVGA_QUERY_NUM_FALLBACKS:
984 sq->end_count = svga->hud.num_fallbacks;
985 break;
986 case SVGA_QUERY_NUM_FLUSHES:
987 sq->end_count = svga->hud.num_flushes;
988 break;
989 case SVGA_QUERY_NUM_VALIDATIONS:
990 sq->end_count = svga->hud.num_validations;
991 break;
992 case SVGA_QUERY_MAP_BUFFER_TIME:
993 sq->end_count = svga->hud.map_buffer_time;
994 break;
995 case SVGA_QUERY_NUM_RESOURCES_MAPPED:
996 sq->end_count = svga->hud.num_resources_mapped;
997 break;
998 case SVGA_QUERY_NUM_BYTES_UPLOADED:
999 sq->end_count = svga->hud.num_bytes_uploaded;
1000 break;
1001 case SVGA_QUERY_COMMAND_BUFFER_SIZE:
1002 sq->end_count = svga->hud.command_buffer_size;
1003 break;
1004 case SVGA_QUERY_FLUSH_TIME:
1005 sq->end_count = svga->hud.flush_time;
1006 break;
1007 case SVGA_QUERY_SURFACE_WRITE_FLUSHES:
1008 sq->end_count = svga->hud.surface_write_flushes;
1009 break;
1010 case SVGA_QUERY_NUM_READBACKS:
1011 sq->end_count = svga->hud.num_readbacks;
1012 break;
1013 case SVGA_QUERY_MEMORY_USED:
1014 case SVGA_QUERY_NUM_SHADERS:
1015 case SVGA_QUERY_NUM_RESOURCES:
1016 case SVGA_QUERY_NUM_STATE_OBJECTS:
1017 case SVGA_QUERY_NUM_SURFACE_VIEWS:
1018 case SVGA_QUERY_NUM_GENERATE_MIPMAP:
1019 /* nothing */
1020 break;
1021 default:
1022 assert(!"unexpected query type in svga_end_query()");
1023 }
1024 svga->sq[sq->type] = NULL;
1025 }
1026
1027
1028 static boolean
1029 svga_get_query_result(struct pipe_context *pipe,
1030 struct pipe_query *q,
1031 boolean wait,
1032 union pipe_query_result *vresult)
1033 {
1034 struct svga_screen *svgascreen = svga_screen(pipe->screen);
1035 struct svga_context *svga = svga_context(pipe);
1036 struct svga_query *sq = svga_query(q);
1037 uint64_t *result = (uint64_t *)vresult;
1038 boolean ret = TRUE;
1039
1040 assert(sq);
1041
1042 SVGA_DBG(DEBUG_QUERY, "%s sq=0x%x id=%d wait: %d\n",
1043 __FUNCTION__, sq, sq->id, wait);
1044
1045 switch (sq->type) {
1046 case PIPE_QUERY_OCCLUSION_COUNTER:
1047 if (svga_have_vgpu10(svga)) {
1048 SVGADXOcclusionQueryResult occResult;
1049 ret = get_query_result_vgpu10(svga, sq, wait,
1050 (void *)&occResult, sizeof(occResult));
1051 *result = (uint64_t)occResult.samplesRendered;
1052 } else {
1053 ret = get_query_result_vgpu9(svga, sq, wait, (uint64_t *)result);
1054 }
1055 break;
1056 case PIPE_QUERY_OCCLUSION_PREDICATE: {
1057 SVGADXOcclusionPredicateQueryResult occResult;
1058 assert(svga_have_vgpu10(svga));
1059 ret = get_query_result_vgpu10(svga, sq, wait,
1060 (void *)&occResult, sizeof(occResult));
1061 vresult->b = occResult.anySamplesRendered != 0;
1062 break;
1063 }
1064 case PIPE_QUERY_SO_STATISTICS: {
1065 SVGADXStreamOutStatisticsQueryResult sResult;
1066 struct pipe_query_data_so_statistics *pResult =
1067 (struct pipe_query_data_so_statistics *)vresult;
1068
1069 assert(svga_have_vgpu10(svga));
1070 ret = get_query_result_vgpu10(svga, sq, wait,
1071 (void *)&sResult, sizeof(sResult));
1072 pResult->num_primitives_written = sResult.numPrimitivesWritten;
1073 pResult->primitives_storage_needed = sResult.numPrimitivesRequired;
1074 break;
1075 }
1076 case PIPE_QUERY_TIMESTAMP: {
1077 SVGADXTimestampQueryResult sResult;
1078
1079 assert(svga_have_vgpu10(svga));
1080 ret = get_query_result_vgpu10(svga, sq, wait,
1081 (void *)&sResult, sizeof(sResult));
1082 *result = (uint64_t)sResult.timestamp;
1083 break;
1084 }
1085 case PIPE_QUERY_PRIMITIVES_GENERATED: {
1086 SVGADXStreamOutStatisticsQueryResult sResult;
1087
1088 assert(svga_have_vgpu10(svga));
1089 ret = get_query_result_vgpu10(svga, sq, wait,
1090 (void *)&sResult, sizeof sResult);
1091 *result = (uint64_t)sResult.numPrimitivesRequired;
1092 break;
1093 }
1094 case PIPE_QUERY_PRIMITIVES_EMITTED: {
1095 SVGADXStreamOutStatisticsQueryResult sResult;
1096
1097 assert(svga_have_vgpu10(svga));
1098 ret = get_query_result_vgpu10(svga, sq, wait,
1099 (void *)&sResult, sizeof sResult);
1100 *result = (uint64_t)sResult.numPrimitivesWritten;
1101 break;
1102 }
1103 /* These are per-frame counters */
1104 case SVGA_QUERY_NUM_DRAW_CALLS:
1105 case SVGA_QUERY_NUM_FALLBACKS:
1106 case SVGA_QUERY_NUM_FLUSHES:
1107 case SVGA_QUERY_NUM_VALIDATIONS:
1108 case SVGA_QUERY_MAP_BUFFER_TIME:
1109 case SVGA_QUERY_NUM_RESOURCES_MAPPED:
1110 case SVGA_QUERY_NUM_BYTES_UPLOADED:
1111 case SVGA_QUERY_COMMAND_BUFFER_SIZE:
1112 case SVGA_QUERY_FLUSH_TIME:
1113 case SVGA_QUERY_SURFACE_WRITE_FLUSHES:
1114 case SVGA_QUERY_NUM_READBACKS:
1115 vresult->u64 = sq->end_count - sq->begin_count;
1116 break;
1117 /* These are running total counters */
1118 case SVGA_QUERY_MEMORY_USED:
1119 vresult->u64 = svgascreen->hud.total_resource_bytes;
1120 break;
1121 case SVGA_QUERY_NUM_SHADERS:
1122 vresult->u64 = svga->hud.num_shaders;
1123 break;
1124 case SVGA_QUERY_NUM_RESOURCES:
1125 vresult->u64 = svgascreen->hud.num_resources;
1126 break;
1127 case SVGA_QUERY_NUM_STATE_OBJECTS:
1128 vresult->u64 = svga->hud.num_state_objects;
1129 break;
1130 case SVGA_QUERY_NUM_SURFACE_VIEWS:
1131 vresult->u64 = svga->hud.num_surface_views;
1132 break;
1133 case SVGA_QUERY_NUM_GENERATE_MIPMAP:
1134 vresult->u64 = svga->hud.num_generate_mipmap;
1135 break;
1136 default:
1137 assert(!"unexpected query type in svga_get_query_result");
1138 }
1139
1140 SVGA_DBG(DEBUG_QUERY, "%s result %d\n", __FUNCTION__, *((uint64_t *)vresult));
1141
1142 return ret;
1143 }
1144
1145 static void
1146 svga_render_condition(struct pipe_context *pipe, struct pipe_query *q,
1147 boolean condition, uint mode)
1148 {
1149 struct svga_context *svga = svga_context(pipe);
1150 struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
1151 struct svga_query *sq = svga_query(q);
1152 SVGA3dQueryId queryId;
1153 enum pipe_error ret;
1154
1155 SVGA_DBG(DEBUG_QUERY, "%s\n", __FUNCTION__);
1156
1157 assert(svga_have_vgpu10(svga));
1158 if (sq == NULL) {
1159 queryId = SVGA3D_INVALID_ID;
1160 }
1161 else {
1162 assert(sq->svga_type == SVGA3D_QUERYTYPE_OCCLUSION ||
1163 sq->svga_type == SVGA3D_QUERYTYPE_OCCLUSIONPREDICATE);
1164
1165 if (sq->svga_type == SVGA3D_QUERYTYPE_OCCLUSION) {
1166 assert(sq->predicate);
1167 /**
1168 * For conditional rendering, make sure to use the associated
1169 * predicate query.
1170 */
1171 sq = svga_query(sq->predicate);
1172 }
1173 queryId = sq->id;
1174
1175 if ((mode == PIPE_RENDER_COND_WAIT ||
1176 mode == PIPE_RENDER_COND_BY_REGION_WAIT) && sq->fence) {
1177 sws->fence_finish(sws, sq->fence, SVGA_FENCE_FLAG_QUERY);
1178 }
1179 }
1180
1181 ret = SVGA3D_vgpu10_SetPredication(svga->swc, queryId,
1182 (uint32) condition);
1183 if (ret != PIPE_OK) {
1184 svga_context_flush(svga, NULL);
1185 ret = SVGA3D_vgpu10_SetPredication(svga->swc, queryId,
1186 (uint32) condition);
1187 }
1188 }
1189
1190
1191 /*
1192 * This function is a workaround because we lack the ability to query
1193 * renderer's time synchornously.
1194 */
1195 static uint64_t
1196 svga_get_timestamp(struct pipe_context *pipe)
1197 {
1198 struct pipe_query *q = svga_create_query(pipe, PIPE_QUERY_TIMESTAMP, 0);
1199 union pipe_query_result result;
1200
1201 svga_begin_query(pipe, q);
1202 svga_end_query(pipe,q);
1203 svga_get_query_result(pipe, q, TRUE, &result);
1204 svga_destroy_query(pipe, q);
1205
1206 return result.u64;
1207 }
1208
1209
1210 void
1211 svga_init_query_functions(struct svga_context *svga)
1212 {
1213 svga->pipe.create_query = svga_create_query;
1214 svga->pipe.destroy_query = svga_destroy_query;
1215 svga->pipe.begin_query = svga_begin_query;
1216 svga->pipe.end_query = svga_end_query;
1217 svga->pipe.get_query_result = svga_get_query_result;
1218 svga->pipe.render_condition = svga_render_condition;
1219 svga->pipe.get_timestamp = svga_get_timestamp;
1220 }