2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 * Copyright 2014 Marek Olšák <marek.olsak@amd.com>
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
26 #include "util/u_memory.h"
29 struct r600_query_buffer
{
30 /* The buffer where query results are stored. */
31 struct r600_resource
*buf
;
32 /* Offset of the next free result after current query data */
34 /* If a query buffer is full, a new buffer is created and the old one
35 * is put in here. When we calculate the result, we sum up the samples
36 * from all buffers. */
37 struct r600_query_buffer
*previous
;
41 /* The query buffer and how many results are in it. */
42 struct r600_query_buffer buffer
;
43 /* The type of query */
45 /* Size of the result in memory for both begin_query and end_query,
46 * this can be one or two numbers, or it could even be a size of a structure. */
48 /* The number of dwords for begin_query or end_query. */
50 /* linked list of queries */
51 struct list_head list
;
52 /* for custom non-GPU queries */
53 uint64_t begin_result
;
58 static bool r600_is_timer_query(unsigned type
)
60 return type
== PIPE_QUERY_TIME_ELAPSED
||
61 type
== PIPE_QUERY_TIMESTAMP
||
62 type
== PIPE_QUERY_TIMESTAMP_DISJOINT
;
65 static bool r600_query_needs_begin(unsigned type
)
67 return type
!= PIPE_QUERY_GPU_FINISHED
&&
68 type
!= PIPE_QUERY_TIMESTAMP
;
71 static struct r600_resource
*r600_new_query_buffer(struct r600_common_context
*ctx
, unsigned type
)
73 unsigned j
, i
, num_results
, buf_size
= 4096;
76 /* Non-GPU queries. */
78 case R600_QUERY_DRAW_CALLS
:
79 case R600_QUERY_REQUESTED_VRAM
:
80 case R600_QUERY_REQUESTED_GTT
:
81 case R600_QUERY_BUFFER_WAIT_TIME
:
85 /* Queries are normally read by the CPU after
86 * being written by the gpu, hence staging is probably a good
89 struct r600_resource
*buf
= (struct r600_resource
*)
90 pipe_buffer_create(ctx
->b
.screen
, PIPE_BIND_CUSTOM
,
91 PIPE_USAGE_STAGING
, buf_size
);
94 case PIPE_QUERY_OCCLUSION_COUNTER
:
95 case PIPE_QUERY_OCCLUSION_PREDICATE
:
96 results
= r600_buffer_map_sync_with_rings(ctx
, buf
, PIPE_TRANSFER_WRITE
);
97 memset(results
, 0, buf_size
);
99 /* Set top bits for unused backends. */
100 num_results
= buf_size
/ (16 * ctx
->max_db
);
101 for (j
= 0; j
< num_results
; j
++) {
102 for (i
= 0; i
< ctx
->max_db
; i
++) {
103 if (!(ctx
->backend_mask
& (1<<i
))) {
104 results
[(i
* 4)+1] = 0x80000000;
105 results
[(i
* 4)+3] = 0x80000000;
108 results
+= 4 * ctx
->max_db
;
110 ctx
->ws
->buffer_unmap(buf
->cs_buf
);
112 case PIPE_QUERY_TIME_ELAPSED
:
113 case PIPE_QUERY_TIMESTAMP
:
115 case PIPE_QUERY_PRIMITIVES_EMITTED
:
116 case PIPE_QUERY_PRIMITIVES_GENERATED
:
117 case PIPE_QUERY_SO_STATISTICS
:
118 case PIPE_QUERY_SO_OVERFLOW_PREDICATE
:
119 case PIPE_QUERY_PIPELINE_STATISTICS
:
120 results
= r600_buffer_map_sync_with_rings(ctx
, buf
, PIPE_TRANSFER_WRITE
);
121 memset(results
, 0, buf_size
);
122 ctx
->ws
->buffer_unmap(buf
->cs_buf
);
130 static void r600_update_occlusion_query_state(struct r600_common_context
*rctx
,
131 unsigned type
, int diff
)
133 if (type
== PIPE_QUERY_OCCLUSION_COUNTER
||
134 type
== PIPE_QUERY_OCCLUSION_PREDICATE
) {
135 bool old_enable
= rctx
->num_occlusion_queries
!= 0;
138 rctx
->num_occlusion_queries
+= diff
;
139 assert(rctx
->num_occlusion_queries
>= 0);
141 enable
= rctx
->num_occlusion_queries
!= 0;
143 if (enable
!= old_enable
) {
144 rctx
->set_occlusion_query_state(&rctx
->b
, enable
);
149 static void r600_emit_query_begin(struct r600_common_context
*ctx
, struct r600_query
*query
)
151 struct radeon_winsys_cs
*cs
= ctx
->rings
.gfx
.cs
;
154 r600_update_occlusion_query_state(ctx
, query
->type
, 1);
155 ctx
->need_gfx_cs_space(&ctx
->b
, query
->num_cs_dw
* 2, TRUE
);
157 /* Get a new query buffer if needed. */
158 if (query
->buffer
.results_end
+ query
->result_size
> query
->buffer
.buf
->b
.b
.width0
) {
159 struct r600_query_buffer
*qbuf
= MALLOC_STRUCT(r600_query_buffer
);
160 *qbuf
= query
->buffer
;
161 query
->buffer
.buf
= r600_new_query_buffer(ctx
, query
->type
);
162 query
->buffer
.results_end
= 0;
163 query
->buffer
.previous
= qbuf
;
166 /* emit begin query */
167 va
= r600_resource_va(ctx
->b
.screen
, (void*)query
->buffer
.buf
);
168 va
+= query
->buffer
.results_end
;
170 switch (query
->type
) {
171 case PIPE_QUERY_OCCLUSION_COUNTER
:
172 case PIPE_QUERY_OCCLUSION_PREDICATE
:
173 radeon_emit(cs
, PKT3(PKT3_EVENT_WRITE
, 2, 0));
174 radeon_emit(cs
, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE
) | EVENT_INDEX(1));
176 radeon_emit(cs
, (va
>> 32UL) & 0xFF);
178 case PIPE_QUERY_PRIMITIVES_EMITTED
:
179 case PIPE_QUERY_PRIMITIVES_GENERATED
:
180 case PIPE_QUERY_SO_STATISTICS
:
181 case PIPE_QUERY_SO_OVERFLOW_PREDICATE
:
182 radeon_emit(cs
, PKT3(PKT3_EVENT_WRITE
, 2, 0));
183 radeon_emit(cs
, EVENT_TYPE(EVENT_TYPE_SAMPLE_STREAMOUTSTATS
) | EVENT_INDEX(3));
185 radeon_emit(cs
, (va
>> 32UL) & 0xFF);
187 case PIPE_QUERY_TIME_ELAPSED
:
188 radeon_emit(cs
, PKT3(PKT3_EVENT_WRITE_EOP
, 4, 0));
189 radeon_emit(cs
, EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT
) | EVENT_INDEX(5));
191 radeon_emit(cs
, (3 << 29) | ((va
>> 32UL) & 0xFF));
195 case PIPE_QUERY_PIPELINE_STATISTICS
:
196 if (!ctx
->num_pipelinestat_queries
) {
197 radeon_emit(cs
, PKT3(PKT3_EVENT_WRITE
, 0, 0));
198 radeon_emit(cs
, EVENT_TYPE(EVENT_TYPE_PIPELINESTAT_START
) | EVENT_INDEX(0));
200 ctx
->num_pipelinestat_queries
++;
201 radeon_emit(cs
, PKT3(PKT3_EVENT_WRITE
, 2, 0));
202 radeon_emit(cs
, EVENT_TYPE(EVENT_TYPE_SAMPLE_PIPELINESTAT
) | EVENT_INDEX(2));
204 radeon_emit(cs
, (va
>> 32UL) & 0xFF);
209 r600_emit_reloc(ctx
, &ctx
->rings
.gfx
, query
->buffer
.buf
, RADEON_USAGE_WRITE
);
211 if (!r600_is_timer_query(query
->type
)) {
212 ctx
->num_cs_dw_nontimer_queries_suspend
+= query
->num_cs_dw
;
216 static void r600_emit_query_end(struct r600_common_context
*ctx
, struct r600_query
*query
)
218 struct radeon_winsys_cs
*cs
= ctx
->rings
.gfx
.cs
;
221 /* The queries which need begin already called this in begin_query. */
222 if (!r600_query_needs_begin(query
->type
)) {
223 ctx
->need_gfx_cs_space(&ctx
->b
, query
->num_cs_dw
, FALSE
);
226 va
= r600_resource_va(ctx
->b
.screen
, (void*)query
->buffer
.buf
);
228 switch (query
->type
) {
229 case PIPE_QUERY_OCCLUSION_COUNTER
:
230 case PIPE_QUERY_OCCLUSION_PREDICATE
:
231 va
+= query
->buffer
.results_end
+ 8;
232 radeon_emit(cs
, PKT3(PKT3_EVENT_WRITE
, 2, 0));
233 radeon_emit(cs
, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE
) | EVENT_INDEX(1));
235 radeon_emit(cs
, (va
>> 32UL) & 0xFF);
237 case PIPE_QUERY_PRIMITIVES_EMITTED
:
238 case PIPE_QUERY_PRIMITIVES_GENERATED
:
239 case PIPE_QUERY_SO_STATISTICS
:
240 case PIPE_QUERY_SO_OVERFLOW_PREDICATE
:
241 va
+= query
->buffer
.results_end
+ query
->result_size
/2;
242 radeon_emit(cs
, PKT3(PKT3_EVENT_WRITE
, 2, 0));
243 radeon_emit(cs
, EVENT_TYPE(EVENT_TYPE_SAMPLE_STREAMOUTSTATS
) | EVENT_INDEX(3));
245 radeon_emit(cs
, (va
>> 32UL) & 0xFF);
247 case PIPE_QUERY_TIME_ELAPSED
:
248 va
+= query
->buffer
.results_end
+ query
->result_size
/2;
250 case PIPE_QUERY_TIMESTAMP
:
251 radeon_emit(cs
, PKT3(PKT3_EVENT_WRITE_EOP
, 4, 0));
252 radeon_emit(cs
, EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT
) | EVENT_INDEX(5));
254 radeon_emit(cs
, (3 << 29) | ((va
>> 32UL) & 0xFF));
258 case PIPE_QUERY_PIPELINE_STATISTICS
:
259 assert(ctx
->num_pipelinestat_queries
> 0);
260 ctx
->num_pipelinestat_queries
--;
261 if (!ctx
->num_pipelinestat_queries
) {
262 radeon_emit(cs
, PKT3(PKT3_EVENT_WRITE
, 0, 0));
263 radeon_emit(cs
, EVENT_TYPE(EVENT_TYPE_PIPELINESTAT_STOP
) | EVENT_INDEX(0));
265 va
+= query
->buffer
.results_end
+ query
->result_size
/2;
266 radeon_emit(cs
, PKT3(PKT3_EVENT_WRITE
, 2, 0));
267 radeon_emit(cs
, EVENT_TYPE(EVENT_TYPE_SAMPLE_PIPELINESTAT
) | EVENT_INDEX(2));
269 radeon_emit(cs
, (va
>> 32UL) & 0xFF);
274 r600_emit_reloc(ctx
, &ctx
->rings
.gfx
, query
->buffer
.buf
, RADEON_USAGE_WRITE
);
276 query
->buffer
.results_end
+= query
->result_size
;
278 if (r600_query_needs_begin(query
->type
)) {
279 if (!r600_is_timer_query(query
->type
)) {
280 ctx
->num_cs_dw_nontimer_queries_suspend
-= query
->num_cs_dw
;
284 r600_update_occlusion_query_state(ctx
, query
->type
, -1);
287 static void r600_emit_query_predication(struct r600_common_context
*ctx
, struct r600_query
*query
,
288 int operation
, bool flag_wait
)
290 struct radeon_winsys_cs
*cs
= ctx
->rings
.gfx
.cs
;
292 if (operation
== PREDICATION_OP_CLEAR
) {
293 ctx
->need_gfx_cs_space(&ctx
->b
, 3, FALSE
);
295 radeon_emit(cs
, PKT3(PKT3_SET_PREDICATION
, 1, 0));
297 radeon_emit(cs
, PRED_OP(PREDICATION_OP_CLEAR
));
299 struct r600_query_buffer
*qbuf
;
303 /* Find how many results there are. */
305 for (qbuf
= &query
->buffer
; qbuf
; qbuf
= qbuf
->previous
) {
306 count
+= qbuf
->results_end
/ query
->result_size
;
309 ctx
->need_gfx_cs_space(&ctx
->b
, 5 * count
, TRUE
);
311 op
= PRED_OP(operation
) | PREDICATION_DRAW_VISIBLE
|
312 (flag_wait
? PREDICATION_HINT_WAIT
: PREDICATION_HINT_NOWAIT_DRAW
);
314 /* emit predicate packets for all data blocks */
315 for (qbuf
= &query
->buffer
; qbuf
; qbuf
= qbuf
->previous
) {
316 unsigned results_base
= 0;
317 uint64_t va
= r600_resource_va(ctx
->b
.screen
, &qbuf
->buf
->b
.b
);
319 while (results_base
< qbuf
->results_end
) {
320 radeon_emit(cs
, PKT3(PKT3_SET_PREDICATION
, 1, 0));
321 radeon_emit(cs
, (va
+ results_base
) & 0xFFFFFFFFUL
);
322 radeon_emit(cs
, op
| (((va
+ results_base
) >> 32UL) & 0xFF));
323 r600_emit_reloc(ctx
, &ctx
->rings
.gfx
, qbuf
->buf
, RADEON_USAGE_READ
);
324 results_base
+= query
->result_size
;
326 /* set CONTINUE bit for all packets except the first */
327 op
|= PREDICATION_CONTINUE
;
333 static struct pipe_query
*r600_create_query(struct pipe_context
*ctx
, unsigned query_type
)
335 struct r600_common_context
*rctx
= (struct r600_common_context
*)ctx
;
336 struct r600_query
*query
;
337 bool skip_allocation
= false;
339 query
= CALLOC_STRUCT(r600_query
);
343 query
->type
= query_type
;
345 switch (query_type
) {
346 case PIPE_QUERY_OCCLUSION_COUNTER
:
347 case PIPE_QUERY_OCCLUSION_PREDICATE
:
348 query
->result_size
= 16 * rctx
->max_db
;
349 query
->num_cs_dw
= 6;
351 case PIPE_QUERY_TIME_ELAPSED
:
352 query
->result_size
= 16;
353 query
->num_cs_dw
= 8;
355 case PIPE_QUERY_TIMESTAMP
:
356 query
->result_size
= 8;
357 query
->num_cs_dw
= 8;
359 case PIPE_QUERY_PRIMITIVES_EMITTED
:
360 case PIPE_QUERY_PRIMITIVES_GENERATED
:
361 case PIPE_QUERY_SO_STATISTICS
:
362 case PIPE_QUERY_SO_OVERFLOW_PREDICATE
:
363 /* NumPrimitivesWritten, PrimitiveStorageNeeded. */
364 query
->result_size
= 32;
365 query
->num_cs_dw
= 6;
367 case PIPE_QUERY_PIPELINE_STATISTICS
:
368 /* 11 values on EG, 8 on R600. */
369 query
->result_size
= (rctx
->chip_class
>= EVERGREEN
? 11 : 8) * 16;
370 query
->num_cs_dw
= 8;
372 /* Non-GPU queries. */
373 case R600_QUERY_DRAW_CALLS
:
374 case R600_QUERY_REQUESTED_VRAM
:
375 case R600_QUERY_REQUESTED_GTT
:
376 case R600_QUERY_BUFFER_WAIT_TIME
:
377 skip_allocation
= true;
385 if (!skip_allocation
) {
386 query
->buffer
.buf
= r600_new_query_buffer(rctx
, query_type
);
387 if (!query
->buffer
.buf
) {
392 return (struct pipe_query
*)query
;
395 static void r600_destroy_query(struct pipe_context
*ctx
, struct pipe_query
*query
)
397 struct r600_query
*rquery
= (struct r600_query
*)query
;
398 struct r600_query_buffer
*prev
= rquery
->buffer
.previous
;
400 /* Release all query buffers. */
402 struct r600_query_buffer
*qbuf
= prev
;
403 prev
= prev
->previous
;
404 pipe_resource_reference((struct pipe_resource
**)&qbuf
->buf
, NULL
);
408 pipe_resource_reference((struct pipe_resource
**)&rquery
->buffer
.buf
, NULL
);
412 static void r600_begin_query(struct pipe_context
*ctx
, struct pipe_query
*query
)
414 struct r600_common_context
*rctx
= (struct r600_common_context
*)ctx
;
415 struct r600_query
*rquery
= (struct r600_query
*)query
;
416 struct r600_query_buffer
*prev
= rquery
->buffer
.previous
;
418 if (!r600_query_needs_begin(rquery
->type
)) {
423 /* Non-GPU queries. */
424 switch (rquery
->type
) {
425 case R600_QUERY_DRAW_CALLS
:
426 rquery
->begin_result
= rctx
->num_draw_calls
;
428 case R600_QUERY_REQUESTED_VRAM
:
429 case R600_QUERY_REQUESTED_GTT
:
430 rquery
->begin_result
= 0;
432 case R600_QUERY_BUFFER_WAIT_TIME
:
433 rquery
->begin_result
= rctx
->ws
->query_value(rctx
->ws
, RADEON_BUFFER_WAIT_TIME_NS
);
437 /* Discard the old query buffers. */
439 struct r600_query_buffer
*qbuf
= prev
;
440 prev
= prev
->previous
;
441 pipe_resource_reference((struct pipe_resource
**)&qbuf
->buf
, NULL
);
445 /* Obtain a new buffer if the current one can't be mapped without a stall. */
446 if (r600_rings_is_buffer_referenced(rctx
, rquery
->buffer
.buf
->cs_buf
, RADEON_USAGE_READWRITE
) ||
447 rctx
->ws
->buffer_is_busy(rquery
->buffer
.buf
->buf
, RADEON_USAGE_READWRITE
)) {
448 pipe_resource_reference((struct pipe_resource
**)&rquery
->buffer
.buf
, NULL
);
449 rquery
->buffer
.buf
= r600_new_query_buffer(rctx
, rquery
->type
);
452 rquery
->buffer
.results_end
= 0;
453 rquery
->buffer
.previous
= NULL
;
455 r600_emit_query_begin(rctx
, rquery
);
457 if (!r600_is_timer_query(rquery
->type
)) {
458 LIST_ADDTAIL(&rquery
->list
, &rctx
->active_nontimer_queries
);
462 static void r600_end_query(struct pipe_context
*ctx
, struct pipe_query
*query
)
464 struct r600_common_context
*rctx
= (struct r600_common_context
*)ctx
;
465 struct r600_query
*rquery
= (struct r600_query
*)query
;
467 /* Non-GPU queries. */
468 switch (rquery
->type
) {
469 case R600_QUERY_DRAW_CALLS
:
470 rquery
->end_result
= rctx
->num_draw_calls
;
472 case R600_QUERY_REQUESTED_VRAM
:
473 rquery
->end_result
= rctx
->ws
->query_value(rctx
->ws
, RADEON_REQUESTED_VRAM_MEMORY
);
475 case R600_QUERY_REQUESTED_GTT
:
476 rquery
->end_result
= rctx
->ws
->query_value(rctx
->ws
, RADEON_REQUESTED_GTT_MEMORY
);
478 case R600_QUERY_BUFFER_WAIT_TIME
:
479 rquery
->end_result
= rctx
->ws
->query_value(rctx
->ws
, RADEON_BUFFER_WAIT_TIME_NS
);
483 r600_emit_query_end(rctx
, rquery
);
485 if (r600_query_needs_begin(rquery
->type
) && !r600_is_timer_query(rquery
->type
)) {
486 LIST_DELINIT(&rquery
->list
);
490 static unsigned r600_query_read_result(char *map
, unsigned start_index
, unsigned end_index
,
491 bool test_status_bit
)
493 uint32_t *current_result
= (uint32_t*)map
;
496 start
= (uint64_t)current_result
[start_index
] |
497 (uint64_t)current_result
[start_index
+1] << 32;
498 end
= (uint64_t)current_result
[end_index
] |
499 (uint64_t)current_result
[end_index
+1] << 32;
501 if (!test_status_bit
||
502 ((start
& 0x8000000000000000UL
) && (end
& 0x8000000000000000UL
))) {
508 static boolean
r600_get_query_buffer_result(struct r600_common_context
*ctx
,
509 struct r600_query
*query
,
510 struct r600_query_buffer
*qbuf
,
512 union pipe_query_result
*result
)
514 unsigned results_base
= 0;
517 /* Non-GPU queries. */
518 switch (query
->type
) {
519 case R600_QUERY_DRAW_CALLS
:
520 case R600_QUERY_REQUESTED_VRAM
:
521 case R600_QUERY_REQUESTED_GTT
:
522 case R600_QUERY_BUFFER_WAIT_TIME
:
523 result
->u64
= query
->end_result
- query
->begin_result
;
527 map
= r600_buffer_map_sync_with_rings(ctx
, qbuf
->buf
,
529 (wait
? 0 : PIPE_TRANSFER_DONTBLOCK
));
533 /* count all results across all data blocks */
534 switch (query
->type
) {
535 case PIPE_QUERY_OCCLUSION_COUNTER
:
536 while (results_base
!= qbuf
->results_end
) {
538 r600_query_read_result(map
+ results_base
, 0, 2, true);
542 case PIPE_QUERY_OCCLUSION_PREDICATE
:
543 while (results_base
!= qbuf
->results_end
) {
544 result
->b
= result
->b
||
545 r600_query_read_result(map
+ results_base
, 0, 2, true) != 0;
549 case PIPE_QUERY_TIME_ELAPSED
:
550 while (results_base
!= qbuf
->results_end
) {
552 r600_query_read_result(map
+ results_base
, 0, 2, false);
553 results_base
+= query
->result_size
;
556 case PIPE_QUERY_TIMESTAMP
:
558 uint32_t *current_result
= (uint32_t*)map
;
559 result
->u64
= (uint64_t)current_result
[0] |
560 (uint64_t)current_result
[1] << 32;
563 case PIPE_QUERY_PRIMITIVES_EMITTED
:
564 /* SAMPLE_STREAMOUTSTATS stores this structure:
566 * u64 NumPrimitivesWritten;
567 * u64 PrimitiveStorageNeeded;
569 * We only need NumPrimitivesWritten here. */
570 while (results_base
!= qbuf
->results_end
) {
572 r600_query_read_result(map
+ results_base
, 2, 6, true);
573 results_base
+= query
->result_size
;
576 case PIPE_QUERY_PRIMITIVES_GENERATED
:
577 /* Here we read PrimitiveStorageNeeded. */
578 while (results_base
!= qbuf
->results_end
) {
580 r600_query_read_result(map
+ results_base
, 0, 4, true);
581 results_base
+= query
->result_size
;
584 case PIPE_QUERY_SO_STATISTICS
:
585 while (results_base
!= qbuf
->results_end
) {
586 result
->so_statistics
.num_primitives_written
+=
587 r600_query_read_result(map
+ results_base
, 2, 6, true);
588 result
->so_statistics
.primitives_storage_needed
+=
589 r600_query_read_result(map
+ results_base
, 0, 4, true);
590 results_base
+= query
->result_size
;
593 case PIPE_QUERY_SO_OVERFLOW_PREDICATE
:
594 while (results_base
!= qbuf
->results_end
) {
595 result
->b
= result
->b
||
596 r600_query_read_result(map
+ results_base
, 2, 6, true) !=
597 r600_query_read_result(map
+ results_base
, 0, 4, true);
598 results_base
+= query
->result_size
;
601 case PIPE_QUERY_PIPELINE_STATISTICS
:
602 if (ctx
->chip_class
>= EVERGREEN
) {
603 while (results_base
!= qbuf
->results_end
) {
604 result
->pipeline_statistics
.ps_invocations
+=
605 r600_query_read_result(map
+ results_base
, 0, 22, false);
606 result
->pipeline_statistics
.c_primitives
+=
607 r600_query_read_result(map
+ results_base
, 2, 24, false);
608 result
->pipeline_statistics
.c_invocations
+=
609 r600_query_read_result(map
+ results_base
, 4, 26, false);
610 result
->pipeline_statistics
.vs_invocations
+=
611 r600_query_read_result(map
+ results_base
, 6, 28, false);
612 result
->pipeline_statistics
.gs_invocations
+=
613 r600_query_read_result(map
+ results_base
, 8, 30, false);
614 result
->pipeline_statistics
.gs_primitives
+=
615 r600_query_read_result(map
+ results_base
, 10, 32, false);
616 result
->pipeline_statistics
.ia_primitives
+=
617 r600_query_read_result(map
+ results_base
, 12, 34, false);
618 result
->pipeline_statistics
.ia_vertices
+=
619 r600_query_read_result(map
+ results_base
, 14, 36, false);
620 result
->pipeline_statistics
.hs_invocations
+=
621 r600_query_read_result(map
+ results_base
, 16, 38, false);
622 result
->pipeline_statistics
.ds_invocations
+=
623 r600_query_read_result(map
+ results_base
, 18, 40, false);
624 result
->pipeline_statistics
.cs_invocations
+=
625 r600_query_read_result(map
+ results_base
, 20, 42, false);
626 results_base
+= query
->result_size
;
629 while (results_base
!= qbuf
->results_end
) {
630 result
->pipeline_statistics
.ps_invocations
+=
631 r600_query_read_result(map
+ results_base
, 0, 16, false);
632 result
->pipeline_statistics
.c_primitives
+=
633 r600_query_read_result(map
+ results_base
, 2, 18, false);
634 result
->pipeline_statistics
.c_invocations
+=
635 r600_query_read_result(map
+ results_base
, 4, 20, false);
636 result
->pipeline_statistics
.vs_invocations
+=
637 r600_query_read_result(map
+ results_base
, 6, 22, false);
638 result
->pipeline_statistics
.gs_invocations
+=
639 r600_query_read_result(map
+ results_base
, 8, 24, false);
640 result
->pipeline_statistics
.gs_primitives
+=
641 r600_query_read_result(map
+ results_base
, 10, 26, false);
642 result
->pipeline_statistics
.ia_primitives
+=
643 r600_query_read_result(map
+ results_base
, 12, 28, false);
644 result
->pipeline_statistics
.ia_vertices
+=
645 r600_query_read_result(map
+ results_base
, 14, 30, false);
646 results_base
+= query
->result_size
;
649 #if 0 /* for testing */
650 printf("Pipeline stats: IA verts=%llu, IA prims=%llu, VS=%llu, HS=%llu, "
651 "DS=%llu, GS=%llu, GS prims=%llu, Clipper=%llu, "
652 "Clipper prims=%llu, PS=%llu, CS=%llu\n",
653 result
->pipeline_statistics
.ia_vertices
,
654 result
->pipeline_statistics
.ia_primitives
,
655 result
->pipeline_statistics
.vs_invocations
,
656 result
->pipeline_statistics
.hs_invocations
,
657 result
->pipeline_statistics
.ds_invocations
,
658 result
->pipeline_statistics
.gs_invocations
,
659 result
->pipeline_statistics
.gs_primitives
,
660 result
->pipeline_statistics
.c_invocations
,
661 result
->pipeline_statistics
.c_primitives
,
662 result
->pipeline_statistics
.ps_invocations
,
663 result
->pipeline_statistics
.cs_invocations
);
670 ctx
->ws
->buffer_unmap(qbuf
->buf
->cs_buf
);
674 static boolean
r600_get_query_result(struct pipe_context
*ctx
,
675 struct pipe_query
*query
,
676 boolean wait
, union pipe_query_result
*result
)
678 struct r600_common_context
*rctx
= (struct r600_common_context
*)ctx
;
679 struct r600_query
*rquery
= (struct r600_query
*)query
;
680 struct r600_query_buffer
*qbuf
;
682 util_query_clear_result(result
, rquery
->type
);
684 for (qbuf
= &rquery
->buffer
; qbuf
; qbuf
= qbuf
->previous
) {
685 if (!r600_get_query_buffer_result(rctx
, rquery
, qbuf
, wait
, result
)) {
690 /* Convert the time to expected units. */
691 if (rquery
->type
== PIPE_QUERY_TIME_ELAPSED
||
692 rquery
->type
== PIPE_QUERY_TIMESTAMP
) {
693 result
->u64
= (1000000 * result
->u64
) / rctx
->screen
->info
.r600_clock_crystal_freq
;
698 static void r600_render_condition(struct pipe_context
*ctx
,
699 struct pipe_query
*query
,
703 struct r600_common_context
*rctx
= (struct r600_common_context
*)ctx
;
704 struct r600_query
*rquery
= (struct r600_query
*)query
;
705 bool wait_flag
= false;
707 rctx
->current_render_cond
= query
;
708 rctx
->current_render_cond_cond
= condition
;
709 rctx
->current_render_cond_mode
= mode
;
712 if (rctx
->predicate_drawing
) {
713 rctx
->predicate_drawing
= false;
714 r600_emit_query_predication(rctx
, NULL
, PREDICATION_OP_CLEAR
, false);
719 if (mode
== PIPE_RENDER_COND_WAIT
||
720 mode
== PIPE_RENDER_COND_BY_REGION_WAIT
) {
724 rctx
->predicate_drawing
= true;
726 switch (rquery
->type
) {
727 case PIPE_QUERY_OCCLUSION_COUNTER
:
728 case PIPE_QUERY_OCCLUSION_PREDICATE
:
729 r600_emit_query_predication(rctx
, rquery
, PREDICATION_OP_ZPASS
, wait_flag
);
731 case PIPE_QUERY_PRIMITIVES_EMITTED
:
732 case PIPE_QUERY_PRIMITIVES_GENERATED
:
733 case PIPE_QUERY_SO_STATISTICS
:
734 case PIPE_QUERY_SO_OVERFLOW_PREDICATE
:
735 r600_emit_query_predication(rctx
, rquery
, PREDICATION_OP_PRIMCOUNT
, wait_flag
);
742 void r600_suspend_nontimer_queries(struct r600_common_context
*ctx
)
744 struct r600_query
*query
;
746 LIST_FOR_EACH_ENTRY(query
, &ctx
->active_nontimer_queries
, list
) {
747 r600_emit_query_end(ctx
, query
);
749 assert(ctx
->num_cs_dw_nontimer_queries_suspend
== 0);
752 void r600_resume_nontimer_queries(struct r600_common_context
*ctx
)
754 struct r600_query
*query
;
756 assert(ctx
->num_cs_dw_nontimer_queries_suspend
== 0);
758 LIST_FOR_EACH_ENTRY(query
, &ctx
->active_nontimer_queries
, list
) {
759 r600_emit_query_begin(ctx
, query
);
763 /* Get backends mask */
764 void r600_query_init_backend_mask(struct r600_common_context
*ctx
)
766 struct radeon_winsys_cs
*cs
= ctx
->rings
.gfx
.cs
;
767 struct r600_resource
*buffer
;
769 unsigned num_backends
= ctx
->screen
->info
.r600_num_backends
;
770 unsigned i
, mask
= 0;
773 /* if backend_map query is supported by the kernel */
774 if (ctx
->screen
->info
.r600_backend_map_valid
) {
775 unsigned num_tile_pipes
= ctx
->screen
->info
.r600_num_tile_pipes
;
776 unsigned backend_map
= ctx
->screen
->info
.r600_backend_map
;
777 unsigned item_width
, item_mask
;
779 if (ctx
->chip_class
>= EVERGREEN
) {
787 while(num_tile_pipes
--) {
788 i
= backend_map
& item_mask
;
790 backend_map
>>= item_width
;
793 ctx
->backend_mask
= mask
;
798 /* otherwise backup path for older kernels */
800 /* create buffer for event data */
801 buffer
= (struct r600_resource
*)
802 pipe_buffer_create(ctx
->b
.screen
, PIPE_BIND_CUSTOM
,
803 PIPE_USAGE_STAGING
, ctx
->max_db
*16);
806 va
= r600_resource_va(ctx
->b
.screen
, (void*)buffer
);
808 /* initialize buffer with zeroes */
809 results
= r600_buffer_map_sync_with_rings(ctx
, buffer
, PIPE_TRANSFER_WRITE
);
811 memset(results
, 0, ctx
->max_db
* 4 * 4);
812 ctx
->ws
->buffer_unmap(buffer
->cs_buf
);
814 /* emit EVENT_WRITE for ZPASS_DONE */
815 radeon_emit(cs
, PKT3(PKT3_EVENT_WRITE
, 2, 0));
816 radeon_emit(cs
, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE
) | EVENT_INDEX(1));
818 radeon_emit(cs
, va
>> 32);
820 r600_emit_reloc(ctx
, &ctx
->rings
.gfx
, buffer
, RADEON_USAGE_WRITE
);
822 /* analyze results */
823 results
= r600_buffer_map_sync_with_rings(ctx
, buffer
, PIPE_TRANSFER_READ
);
825 for(i
= 0; i
< ctx
->max_db
; i
++) {
826 /* at least highest bit will be set if backend is used */
827 if (results
[i
*4 + 1])
830 ctx
->ws
->buffer_unmap(buffer
->cs_buf
);
834 pipe_resource_reference((struct pipe_resource
**)&buffer
, NULL
);
837 ctx
->backend_mask
= mask
;
842 /* fallback to old method - set num_backends lower bits to 1 */
843 ctx
->backend_mask
= (~((uint32_t)0))>>(32-num_backends
);
847 void r600_query_init(struct r600_common_context
*rctx
)
849 rctx
->b
.create_query
= r600_create_query
;
850 rctx
->b
.destroy_query
= r600_destroy_query
;
851 rctx
->b
.begin_query
= r600_begin_query
;
852 rctx
->b
.end_query
= r600_end_query
;
853 rctx
->b
.get_query_result
= r600_get_query_result
;
855 if (((struct r600_common_screen
*)rctx
->b
.screen
)->info
.r600_num_backends
> 0)
856 rctx
->b
.render_condition
= r600_render_condition
;
858 LIST_INITHEAD(&rctx
->active_nontimer_queries
);