2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 #include "r600_pipe.h"
25 #include "util/u_memory.h"
27 static bool r600_is_timer_query(unsigned type
)
29 return type
== PIPE_QUERY_TIME_ELAPSED
||
30 type
== PIPE_QUERY_TIMESTAMP
||
31 type
== PIPE_QUERY_TIMESTAMP_DISJOINT
;
34 static bool r600_query_needs_begin(unsigned type
)
36 return type
!= PIPE_QUERY_GPU_FINISHED
&&
37 type
!= PIPE_QUERY_TIMESTAMP
;
40 static struct r600_resource
*r600_new_query_buffer(struct r600_context
*ctx
, unsigned type
)
42 unsigned j
, i
, num_results
, buf_size
= 4096;
45 /* Non-GPU queries. */
47 case R600_QUERY_DRAW_CALLS
:
48 case R600_QUERY_REQUESTED_VRAM
:
49 case R600_QUERY_REQUESTED_GTT
:
50 case R600_QUERY_BUFFER_WAIT_TIME
:
54 /* Queries are normally read by the CPU after
55 * being written by the gpu, hence staging is probably a good
58 struct r600_resource
*buf
= (struct r600_resource
*)
59 pipe_buffer_create(&ctx
->screen
->screen
, PIPE_BIND_CUSTOM
,
60 PIPE_USAGE_STAGING
, buf_size
);
63 case PIPE_QUERY_OCCLUSION_COUNTER
:
64 case PIPE_QUERY_OCCLUSION_PREDICATE
:
65 results
= r600_buffer_mmap_sync_with_rings(ctx
, buf
, PIPE_TRANSFER_WRITE
);
66 memset(results
, 0, buf_size
);
68 /* Set top bits for unused backends. */
69 num_results
= buf_size
/ (16 * ctx
->max_db
);
70 for (j
= 0; j
< num_results
; j
++) {
71 for (i
= 0; i
< ctx
->max_db
; i
++) {
72 if (!(ctx
->backend_mask
& (1<<i
))) {
73 results
[(i
* 4)+1] = 0x80000000;
74 results
[(i
* 4)+3] = 0x80000000;
77 results
+= 4 * ctx
->max_db
;
79 ctx
->ws
->buffer_unmap(buf
->cs_buf
);
81 case PIPE_QUERY_TIME_ELAPSED
:
82 case PIPE_QUERY_TIMESTAMP
:
84 case PIPE_QUERY_PRIMITIVES_EMITTED
:
85 case PIPE_QUERY_PRIMITIVES_GENERATED
:
86 case PIPE_QUERY_SO_STATISTICS
:
87 case PIPE_QUERY_SO_OVERFLOW_PREDICATE
:
88 case PIPE_QUERY_PIPELINE_STATISTICS
:
89 results
= r600_buffer_mmap_sync_with_rings(ctx
, buf
, PIPE_TRANSFER_WRITE
);
90 memset(results
, 0, buf_size
);
91 ctx
->ws
->buffer_unmap(buf
->cs_buf
);
99 static void r600_update_occlusion_query_state(struct r600_context
*rctx
,
100 unsigned type
, int diff
)
102 if (type
== PIPE_QUERY_OCCLUSION_COUNTER
||
103 type
== PIPE_QUERY_OCCLUSION_PREDICATE
) {
106 rctx
->num_occlusion_queries
+= diff
;
107 assert(rctx
->num_occlusion_queries
>= 0);
109 enable
= rctx
->num_occlusion_queries
!= 0;
111 if (rctx
->db_misc_state
.occlusion_query_enabled
!= enable
) {
112 rctx
->db_misc_state
.occlusion_query_enabled
= enable
;
113 rctx
->db_misc_state
.atom
.dirty
= true;
118 static void r600_emit_query_begin(struct r600_context
*ctx
, struct r600_query
*query
)
120 struct radeon_winsys_cs
*cs
= ctx
->rings
.gfx
.cs
;
123 r600_update_occlusion_query_state(ctx
, query
->type
, 1);
124 r600_need_cs_space(ctx
, query
->num_cs_dw
* 2, TRUE
);
126 /* Get a new query buffer if needed. */
127 if (query
->buffer
.results_end
+ query
->result_size
> query
->buffer
.buf
->b
.b
.width0
) {
128 struct r600_query_buffer
*qbuf
= MALLOC_STRUCT(r600_query_buffer
);
129 *qbuf
= query
->buffer
;
130 query
->buffer
.buf
= r600_new_query_buffer(ctx
, query
->type
);
131 query
->buffer
.results_end
= 0;
132 query
->buffer
.previous
= qbuf
;
135 /* emit begin query */
136 va
= r600_resource_va(&ctx
->screen
->screen
, (void*)query
->buffer
.buf
);
137 va
+= query
->buffer
.results_end
;
139 switch (query
->type
) {
140 case PIPE_QUERY_OCCLUSION_COUNTER
:
141 case PIPE_QUERY_OCCLUSION_PREDICATE
:
142 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_EVENT_WRITE
, 2, 0);
143 cs
->buf
[cs
->cdw
++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE
) | EVENT_INDEX(1);
144 cs
->buf
[cs
->cdw
++] = va
;
145 cs
->buf
[cs
->cdw
++] = (va
>> 32UL) & 0xFF;
147 case PIPE_QUERY_PRIMITIVES_EMITTED
:
148 case PIPE_QUERY_PRIMITIVES_GENERATED
:
149 case PIPE_QUERY_SO_STATISTICS
:
150 case PIPE_QUERY_SO_OVERFLOW_PREDICATE
:
151 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_EVENT_WRITE
, 2, 0);
152 cs
->buf
[cs
->cdw
++] = EVENT_TYPE(EVENT_TYPE_SAMPLE_STREAMOUTSTATS
) | EVENT_INDEX(3);
153 cs
->buf
[cs
->cdw
++] = va
;
154 cs
->buf
[cs
->cdw
++] = (va
>> 32UL) & 0xFF;
156 case PIPE_QUERY_TIME_ELAPSED
:
157 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_EVENT_WRITE_EOP
, 4, 0);
158 cs
->buf
[cs
->cdw
++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT
) | EVENT_INDEX(5);
159 cs
->buf
[cs
->cdw
++] = va
;
160 cs
->buf
[cs
->cdw
++] = (3 << 29) | ((va
>> 32UL) & 0xFF);
161 cs
->buf
[cs
->cdw
++] = 0;
162 cs
->buf
[cs
->cdw
++] = 0;
164 case PIPE_QUERY_PIPELINE_STATISTICS
:
165 if (!ctx
->num_pipelinestat_queries
) {
166 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_EVENT_WRITE
, 0, 0);
167 cs
->buf
[cs
->cdw
++] = EVENT_TYPE(EVENT_TYPE_PIPELINESTAT_START
) | EVENT_INDEX(0);
169 ctx
->num_pipelinestat_queries
++;
170 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_EVENT_WRITE
, 2, 0);
171 cs
->buf
[cs
->cdw
++] = EVENT_TYPE(EVENT_TYPE_SAMPLE_PIPELINESTAT
) | EVENT_INDEX(2);
172 cs
->buf
[cs
->cdw
++] = va
;
173 cs
->buf
[cs
->cdw
++] = (va
>> 32UL) & 0xFF;
178 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_NOP
, 0, 0);
179 cs
->buf
[cs
->cdw
++] = r600_context_bo_reloc(ctx
, &ctx
->rings
.gfx
, query
->buffer
.buf
, RADEON_USAGE_WRITE
);
181 if (!r600_is_timer_query(query
->type
)) {
182 ctx
->num_cs_dw_nontimer_queries_suspend
+= query
->num_cs_dw
;
186 static void r600_emit_query_end(struct r600_context
*ctx
, struct r600_query
*query
)
188 struct radeon_winsys_cs
*cs
= ctx
->rings
.gfx
.cs
;
191 /* The queries which need begin already called this in begin_query. */
192 if (!r600_query_needs_begin(query
->type
)) {
193 r600_need_cs_space(ctx
, query
->num_cs_dw
, FALSE
);
196 va
= r600_resource_va(&ctx
->screen
->screen
, (void*)query
->buffer
.buf
);
198 switch (query
->type
) {
199 case PIPE_QUERY_OCCLUSION_COUNTER
:
200 case PIPE_QUERY_OCCLUSION_PREDICATE
:
201 va
+= query
->buffer
.results_end
+ 8;
202 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_EVENT_WRITE
, 2, 0);
203 cs
->buf
[cs
->cdw
++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE
) | EVENT_INDEX(1);
204 cs
->buf
[cs
->cdw
++] = va
;
205 cs
->buf
[cs
->cdw
++] = (va
>> 32UL) & 0xFF;
207 case PIPE_QUERY_PRIMITIVES_EMITTED
:
208 case PIPE_QUERY_PRIMITIVES_GENERATED
:
209 case PIPE_QUERY_SO_STATISTICS
:
210 case PIPE_QUERY_SO_OVERFLOW_PREDICATE
:
211 va
+= query
->buffer
.results_end
+ query
->result_size
/2;
212 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_EVENT_WRITE
, 2, 0);
213 cs
->buf
[cs
->cdw
++] = EVENT_TYPE(EVENT_TYPE_SAMPLE_STREAMOUTSTATS
) | EVENT_INDEX(3);
214 cs
->buf
[cs
->cdw
++] = va
;
215 cs
->buf
[cs
->cdw
++] = (va
>> 32UL) & 0xFF;
217 case PIPE_QUERY_TIME_ELAPSED
:
218 va
+= query
->buffer
.results_end
+ query
->result_size
/2;
220 case PIPE_QUERY_TIMESTAMP
:
221 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_EVENT_WRITE_EOP
, 4, 0);
222 cs
->buf
[cs
->cdw
++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT
) | EVENT_INDEX(5);
223 cs
->buf
[cs
->cdw
++] = va
;
224 cs
->buf
[cs
->cdw
++] = (3 << 29) | ((va
>> 32UL) & 0xFF);
225 cs
->buf
[cs
->cdw
++] = 0;
226 cs
->buf
[cs
->cdw
++] = 0;
228 case PIPE_QUERY_PIPELINE_STATISTICS
:
229 assert(ctx
->num_pipelinestat_queries
> 0);
230 ctx
->num_pipelinestat_queries
--;
231 if (!ctx
->num_pipelinestat_queries
) {
232 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_EVENT_WRITE
, 0, 0);
233 cs
->buf
[cs
->cdw
++] = EVENT_TYPE(EVENT_TYPE_PIPELINESTAT_STOP
) | EVENT_INDEX(0);
235 va
+= query
->buffer
.results_end
+ query
->result_size
/2;
236 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_EVENT_WRITE
, 2, 0);
237 cs
->buf
[cs
->cdw
++] = EVENT_TYPE(EVENT_TYPE_SAMPLE_PIPELINESTAT
) | EVENT_INDEX(2);
238 cs
->buf
[cs
->cdw
++] = va
;
239 cs
->buf
[cs
->cdw
++] = (va
>> 32UL) & 0xFF;
244 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_NOP
, 0, 0);
245 cs
->buf
[cs
->cdw
++] = r600_context_bo_reloc(ctx
, &ctx
->rings
.gfx
, query
->buffer
.buf
, RADEON_USAGE_WRITE
);
247 query
->buffer
.results_end
+= query
->result_size
;
249 if (r600_query_needs_begin(query
->type
)) {
250 if (!r600_is_timer_query(query
->type
)) {
251 ctx
->num_cs_dw_nontimer_queries_suspend
-= query
->num_cs_dw
;
255 r600_update_occlusion_query_state(ctx
, query
->type
, -1);
258 static void r600_emit_query_predication(struct r600_context
*ctx
, struct r600_query
*query
,
259 int operation
, bool flag_wait
)
261 struct radeon_winsys_cs
*cs
= ctx
->rings
.gfx
.cs
;
263 if (operation
== PREDICATION_OP_CLEAR
) {
264 r600_need_cs_space(ctx
, 3, FALSE
);
266 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_SET_PREDICATION
, 1, 0);
267 cs
->buf
[cs
->cdw
++] = 0;
268 cs
->buf
[cs
->cdw
++] = PRED_OP(PREDICATION_OP_CLEAR
);
270 struct r600_query_buffer
*qbuf
;
274 /* Find how many results there are. */
276 for (qbuf
= &query
->buffer
; qbuf
; qbuf
= qbuf
->previous
) {
277 count
+= qbuf
->results_end
/ query
->result_size
;
280 r600_need_cs_space(ctx
, 5 * count
, TRUE
);
282 op
= PRED_OP(operation
) | PREDICATION_DRAW_VISIBLE
|
283 (flag_wait
? PREDICATION_HINT_WAIT
: PREDICATION_HINT_NOWAIT_DRAW
);
285 /* emit predicate packets for all data blocks */
286 for (qbuf
= &query
->buffer
; qbuf
; qbuf
= qbuf
->previous
) {
287 unsigned results_base
= 0;
288 uint64_t va
= r600_resource_va(&ctx
->screen
->screen
, &qbuf
->buf
->b
.b
);
290 while (results_base
< qbuf
->results_end
) {
291 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_SET_PREDICATION
, 1, 0);
292 cs
->buf
[cs
->cdw
++] = (va
+ results_base
) & 0xFFFFFFFFUL
;
293 cs
->buf
[cs
->cdw
++] = op
| (((va
+ results_base
) >> 32UL) & 0xFF);
294 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_NOP
, 0, 0);
295 cs
->buf
[cs
->cdw
++] = r600_context_bo_reloc(ctx
, &ctx
->rings
.gfx
, qbuf
->buf
, RADEON_USAGE_READ
);
296 results_base
+= query
->result_size
;
298 /* set CONTINUE bit for all packets except the first */
299 op
|= PREDICATION_CONTINUE
;
305 static struct pipe_query
*r600_create_query(struct pipe_context
*ctx
, unsigned query_type
)
307 struct r600_context
*rctx
= (struct r600_context
*)ctx
;
308 struct r600_query
*query
;
309 bool skip_allocation
= false;
311 query
= CALLOC_STRUCT(r600_query
);
315 query
->type
= query_type
;
317 switch (query_type
) {
318 case PIPE_QUERY_OCCLUSION_COUNTER
:
319 case PIPE_QUERY_OCCLUSION_PREDICATE
:
320 query
->result_size
= 16 * rctx
->max_db
;
321 query
->num_cs_dw
= 6;
323 case PIPE_QUERY_TIME_ELAPSED
:
324 query
->result_size
= 16;
325 query
->num_cs_dw
= 8;
327 case PIPE_QUERY_TIMESTAMP
:
328 query
->result_size
= 8;
329 query
->num_cs_dw
= 8;
331 case PIPE_QUERY_PRIMITIVES_EMITTED
:
332 case PIPE_QUERY_PRIMITIVES_GENERATED
:
333 case PIPE_QUERY_SO_STATISTICS
:
334 case PIPE_QUERY_SO_OVERFLOW_PREDICATE
:
335 /* NumPrimitivesWritten, PrimitiveStorageNeeded. */
336 query
->result_size
= 32;
337 query
->num_cs_dw
= 6;
339 case PIPE_QUERY_PIPELINE_STATISTICS
:
340 /* 11 values on EG, 8 on R600. */
341 query
->result_size
= (rctx
->chip_class
>= EVERGREEN
? 11 : 8) * 16;
342 query
->num_cs_dw
= 8;
344 /* Non-GPU queries. */
345 case R600_QUERY_DRAW_CALLS
:
346 case R600_QUERY_REQUESTED_VRAM
:
347 case R600_QUERY_REQUESTED_GTT
:
348 case R600_QUERY_BUFFER_WAIT_TIME
:
349 skip_allocation
= true;
357 if (!skip_allocation
) {
358 query
->buffer
.buf
= r600_new_query_buffer(rctx
, query_type
);
359 if (!query
->buffer
.buf
) {
364 return (struct pipe_query
*)query
;
367 static void r600_destroy_query(struct pipe_context
*ctx
, struct pipe_query
*query
)
369 struct r600_query
*rquery
= (struct r600_query
*)query
;
370 struct r600_query_buffer
*prev
= rquery
->buffer
.previous
;
372 /* Release all query buffers. */
374 struct r600_query_buffer
*qbuf
= prev
;
375 prev
= prev
->previous
;
376 pipe_resource_reference((struct pipe_resource
**)&qbuf
->buf
, NULL
);
380 pipe_resource_reference((struct pipe_resource
**)&rquery
->buffer
.buf
, NULL
);
384 static void r600_begin_query(struct pipe_context
*ctx
, struct pipe_query
*query
)
386 struct r600_context
*rctx
= (struct r600_context
*)ctx
;
387 struct r600_query
*rquery
= (struct r600_query
*)query
;
388 struct r600_query_buffer
*prev
= rquery
->buffer
.previous
;
390 if (!r600_query_needs_begin(rquery
->type
)) {
395 /* Non-GPU queries. */
396 switch (rquery
->type
) {
397 case R600_QUERY_DRAW_CALLS
:
398 rquery
->begin_result
= rctx
->num_draw_calls
;
400 case R600_QUERY_REQUESTED_VRAM
:
401 case R600_QUERY_REQUESTED_GTT
:
402 rquery
->begin_result
= 0;
404 case R600_QUERY_BUFFER_WAIT_TIME
:
405 rquery
->begin_result
= rctx
->ws
->query_value(rctx
->ws
, RADEON_BUFFER_WAIT_TIME_NS
);
409 /* Discard the old query buffers. */
411 struct r600_query_buffer
*qbuf
= prev
;
412 prev
= prev
->previous
;
413 pipe_resource_reference((struct pipe_resource
**)&qbuf
->buf
, NULL
);
417 /* Obtain a new buffer if the current one can't be mapped without a stall. */
418 if (r600_rings_is_buffer_referenced(rctx
, rquery
->buffer
.buf
->cs_buf
, RADEON_USAGE_READWRITE
) ||
419 rctx
->ws
->buffer_is_busy(rquery
->buffer
.buf
->buf
, RADEON_USAGE_READWRITE
)) {
420 pipe_resource_reference((struct pipe_resource
**)&rquery
->buffer
.buf
, NULL
);
421 rquery
->buffer
.buf
= r600_new_query_buffer(rctx
, rquery
->type
);
424 rquery
->buffer
.results_end
= 0;
425 rquery
->buffer
.previous
= NULL
;
427 r600_emit_query_begin(rctx
, rquery
);
429 if (!r600_is_timer_query(rquery
->type
)) {
430 LIST_ADDTAIL(&rquery
->list
, &rctx
->active_nontimer_queries
);
434 static void r600_end_query(struct pipe_context
*ctx
, struct pipe_query
*query
)
436 struct r600_context
*rctx
= (struct r600_context
*)ctx
;
437 struct r600_query
*rquery
= (struct r600_query
*)query
;
439 /* Non-GPU queries. */
440 switch (rquery
->type
) {
441 case R600_QUERY_DRAW_CALLS
:
442 rquery
->end_result
= rctx
->num_draw_calls
;
444 case R600_QUERY_REQUESTED_VRAM
:
445 rquery
->end_result
= rctx
->ws
->query_value(rctx
->ws
, RADEON_REQUESTED_VRAM_MEMORY
);
447 case R600_QUERY_REQUESTED_GTT
:
448 rquery
->end_result
= rctx
->ws
->query_value(rctx
->ws
, RADEON_REQUESTED_GTT_MEMORY
);
450 case R600_QUERY_BUFFER_WAIT_TIME
:
451 rquery
->end_result
= rctx
->ws
->query_value(rctx
->ws
, RADEON_BUFFER_WAIT_TIME_NS
);
455 r600_emit_query_end(rctx
, rquery
);
457 if (r600_query_needs_begin(rquery
->type
) && !r600_is_timer_query(rquery
->type
)) {
458 LIST_DELINIT(&rquery
->list
);
462 static unsigned r600_query_read_result(char *map
, unsigned start_index
, unsigned end_index
,
463 bool test_status_bit
)
465 uint32_t *current_result
= (uint32_t*)map
;
468 start
= (uint64_t)current_result
[start_index
] |
469 (uint64_t)current_result
[start_index
+1] << 32;
470 end
= (uint64_t)current_result
[end_index
] |
471 (uint64_t)current_result
[end_index
+1] << 32;
473 if (!test_status_bit
||
474 ((start
& 0x8000000000000000UL
) && (end
& 0x8000000000000000UL
))) {
480 static boolean
r600_get_query_buffer_result(struct r600_context
*ctx
,
481 struct r600_query
*query
,
482 struct r600_query_buffer
*qbuf
,
484 union pipe_query_result
*result
)
486 unsigned results_base
= 0;
489 /* Non-GPU queries. */
490 switch (query
->type
) {
491 case R600_QUERY_DRAW_CALLS
:
492 case R600_QUERY_REQUESTED_VRAM
:
493 case R600_QUERY_REQUESTED_GTT
:
494 case R600_QUERY_BUFFER_WAIT_TIME
:
495 result
->u64
= query
->end_result
- query
->begin_result
;
499 map
= r600_buffer_mmap_sync_with_rings(ctx
, qbuf
->buf
,
501 (wait
? 0 : PIPE_TRANSFER_DONTBLOCK
));
505 /* count all results across all data blocks */
506 switch (query
->type
) {
507 case PIPE_QUERY_OCCLUSION_COUNTER
:
508 while (results_base
!= qbuf
->results_end
) {
510 r600_query_read_result(map
+ results_base
, 0, 2, true);
514 case PIPE_QUERY_OCCLUSION_PREDICATE
:
515 while (results_base
!= qbuf
->results_end
) {
516 result
->b
= result
->b
||
517 r600_query_read_result(map
+ results_base
, 0, 2, true) != 0;
521 case PIPE_QUERY_TIME_ELAPSED
:
522 while (results_base
!= qbuf
->results_end
) {
524 r600_query_read_result(map
+ results_base
, 0, 2, false);
525 results_base
+= query
->result_size
;
528 case PIPE_QUERY_TIMESTAMP
:
530 uint32_t *current_result
= (uint32_t*)map
;
531 result
->u64
= (uint64_t)current_result
[0] |
532 (uint64_t)current_result
[1] << 32;
535 case PIPE_QUERY_PRIMITIVES_EMITTED
:
536 /* SAMPLE_STREAMOUTSTATS stores this structure:
538 * u64 NumPrimitivesWritten;
539 * u64 PrimitiveStorageNeeded;
541 * We only need NumPrimitivesWritten here. */
542 while (results_base
!= qbuf
->results_end
) {
544 r600_query_read_result(map
+ results_base
, 2, 6, true);
545 results_base
+= query
->result_size
;
548 case PIPE_QUERY_PRIMITIVES_GENERATED
:
549 /* Here we read PrimitiveStorageNeeded. */
550 while (results_base
!= qbuf
->results_end
) {
552 r600_query_read_result(map
+ results_base
, 0, 4, true);
553 results_base
+= query
->result_size
;
556 case PIPE_QUERY_SO_STATISTICS
:
557 while (results_base
!= qbuf
->results_end
) {
558 result
->so_statistics
.num_primitives_written
+=
559 r600_query_read_result(map
+ results_base
, 2, 6, true);
560 result
->so_statistics
.primitives_storage_needed
+=
561 r600_query_read_result(map
+ results_base
, 0, 4, true);
562 results_base
+= query
->result_size
;
565 case PIPE_QUERY_SO_OVERFLOW_PREDICATE
:
566 while (results_base
!= qbuf
->results_end
) {
567 result
->b
= result
->b
||
568 r600_query_read_result(map
+ results_base
, 2, 6, true) !=
569 r600_query_read_result(map
+ results_base
, 0, 4, true);
570 results_base
+= query
->result_size
;
573 case PIPE_QUERY_PIPELINE_STATISTICS
:
574 if (ctx
->chip_class
>= EVERGREEN
) {
575 while (results_base
!= qbuf
->results_end
) {
576 result
->pipeline_statistics
.ps_invocations
+=
577 r600_query_read_result(map
+ results_base
, 0, 22, false);
578 result
->pipeline_statistics
.c_primitives
+=
579 r600_query_read_result(map
+ results_base
, 2, 24, false);
580 result
->pipeline_statistics
.c_invocations
+=
581 r600_query_read_result(map
+ results_base
, 4, 26, false);
582 result
->pipeline_statistics
.vs_invocations
+=
583 r600_query_read_result(map
+ results_base
, 6, 28, false);
584 result
->pipeline_statistics
.gs_invocations
+=
585 r600_query_read_result(map
+ results_base
, 8, 30, false);
586 result
->pipeline_statistics
.gs_primitives
+=
587 r600_query_read_result(map
+ results_base
, 10, 32, false);
588 result
->pipeline_statistics
.ia_primitives
+=
589 r600_query_read_result(map
+ results_base
, 12, 34, false);
590 result
->pipeline_statistics
.ia_vertices
+=
591 r600_query_read_result(map
+ results_base
, 14, 36, false);
592 result
->pipeline_statistics
.hs_invocations
+=
593 r600_query_read_result(map
+ results_base
, 16, 38, false);
594 result
->pipeline_statistics
.ds_invocations
+=
595 r600_query_read_result(map
+ results_base
, 18, 40, false);
596 result
->pipeline_statistics
.cs_invocations
+=
597 r600_query_read_result(map
+ results_base
, 20, 42, false);
598 results_base
+= query
->result_size
;
601 while (results_base
!= qbuf
->results_end
) {
602 result
->pipeline_statistics
.ps_invocations
+=
603 r600_query_read_result(map
+ results_base
, 0, 16, false);
604 result
->pipeline_statistics
.c_primitives
+=
605 r600_query_read_result(map
+ results_base
, 2, 18, false);
606 result
->pipeline_statistics
.c_invocations
+=
607 r600_query_read_result(map
+ results_base
, 4, 20, false);
608 result
->pipeline_statistics
.vs_invocations
+=
609 r600_query_read_result(map
+ results_base
, 6, 22, false);
610 result
->pipeline_statistics
.gs_invocations
+=
611 r600_query_read_result(map
+ results_base
, 8, 24, false);
612 result
->pipeline_statistics
.gs_primitives
+=
613 r600_query_read_result(map
+ results_base
, 10, 26, false);
614 result
->pipeline_statistics
.ia_primitives
+=
615 r600_query_read_result(map
+ results_base
, 12, 28, false);
616 result
->pipeline_statistics
.ia_vertices
+=
617 r600_query_read_result(map
+ results_base
, 14, 30, false);
618 results_base
+= query
->result_size
;
621 #if 0 /* for testing */
622 printf("Pipeline stats: IA verts=%llu, IA prims=%llu, VS=%llu, HS=%llu, "
623 "DS=%llu, GS=%llu, GS prims=%llu, Clipper=%llu, "
624 "Clipper prims=%llu, PS=%llu, CS=%llu\n",
625 result
->pipeline_statistics
.ia_vertices
,
626 result
->pipeline_statistics
.ia_primitives
,
627 result
->pipeline_statistics
.vs_invocations
,
628 result
->pipeline_statistics
.hs_invocations
,
629 result
->pipeline_statistics
.ds_invocations
,
630 result
->pipeline_statistics
.gs_invocations
,
631 result
->pipeline_statistics
.gs_primitives
,
632 result
->pipeline_statistics
.c_invocations
,
633 result
->pipeline_statistics
.c_primitives
,
634 result
->pipeline_statistics
.ps_invocations
,
635 result
->pipeline_statistics
.cs_invocations
);
642 ctx
->ws
->buffer_unmap(qbuf
->buf
->cs_buf
);
646 static boolean
r600_get_query_result(struct pipe_context
*ctx
,
647 struct pipe_query
*query
,
648 boolean wait
, union pipe_query_result
*result
)
650 struct r600_context
*rctx
= (struct r600_context
*)ctx
;
651 struct r600_query
*rquery
= (struct r600_query
*)query
;
652 struct r600_query_buffer
*qbuf
;
654 util_query_clear_result(result
, rquery
->type
);
656 for (qbuf
= &rquery
->buffer
; qbuf
; qbuf
= qbuf
->previous
) {
657 if (!r600_get_query_buffer_result(rctx
, rquery
, qbuf
, wait
, result
)) {
662 /* Convert the time to expected units. */
663 if (rquery
->type
== PIPE_QUERY_TIME_ELAPSED
||
664 rquery
->type
== PIPE_QUERY_TIMESTAMP
) {
665 result
->u64
= (1000000 * result
->u64
) / rctx
->screen
->info
.r600_clock_crystal_freq
;
670 static void r600_render_condition(struct pipe_context
*ctx
,
671 struct pipe_query
*query
,
675 struct r600_context
*rctx
= (struct r600_context
*)ctx
;
676 struct r600_query
*rquery
= (struct r600_query
*)query
;
677 bool wait_flag
= false;
679 rctx
->current_render_cond
= query
;
680 rctx
->current_render_cond_cond
= condition
;
681 rctx
->current_render_cond_mode
= mode
;
684 if (rctx
->predicate_drawing
) {
685 rctx
->predicate_drawing
= false;
686 r600_emit_query_predication(rctx
, NULL
, PREDICATION_OP_CLEAR
, false);
691 if (mode
== PIPE_RENDER_COND_WAIT
||
692 mode
== PIPE_RENDER_COND_BY_REGION_WAIT
) {
696 rctx
->predicate_drawing
= true;
698 switch (rquery
->type
) {
699 case PIPE_QUERY_OCCLUSION_COUNTER
:
700 case PIPE_QUERY_OCCLUSION_PREDICATE
:
701 r600_emit_query_predication(rctx
, rquery
, PREDICATION_OP_ZPASS
, wait_flag
);
703 case PIPE_QUERY_PRIMITIVES_EMITTED
:
704 case PIPE_QUERY_PRIMITIVES_GENERATED
:
705 case PIPE_QUERY_SO_STATISTICS
:
706 case PIPE_QUERY_SO_OVERFLOW_PREDICATE
:
707 r600_emit_query_predication(rctx
, rquery
, PREDICATION_OP_PRIMCOUNT
, wait_flag
);
714 void r600_suspend_nontimer_queries(struct r600_context
*ctx
)
716 struct r600_query
*query
;
718 LIST_FOR_EACH_ENTRY(query
, &ctx
->active_nontimer_queries
, list
) {
719 r600_emit_query_end(ctx
, query
);
721 assert(ctx
->num_cs_dw_nontimer_queries_suspend
== 0);
724 void r600_resume_nontimer_queries(struct r600_context
*ctx
)
726 struct r600_query
*query
;
728 assert(ctx
->num_cs_dw_nontimer_queries_suspend
== 0);
730 LIST_FOR_EACH_ENTRY(query
, &ctx
->active_nontimer_queries
, list
) {
731 r600_emit_query_begin(ctx
, query
);
735 void r600_init_query_functions(struct r600_context
*rctx
)
737 rctx
->context
.create_query
= r600_create_query
;
738 rctx
->context
.destroy_query
= r600_destroy_query
;
739 rctx
->context
.begin_query
= r600_begin_query
;
740 rctx
->context
.end_query
= r600_end_query
;
741 rctx
->context
.get_query_result
= r600_get_query_result
;
743 if (rctx
->screen
->info
.r600_num_backends
> 0)
744 rctx
->context
.render_condition
= r600_render_condition
;