2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 #include "r600_pipe.h"
25 #include "util/u_memory.h"
26 #include "r600_hw_context_priv.h"
28 static struct r600_resource
*r600_new_query_buffer(struct r600_context
*ctx
, unsigned type
)
30 unsigned j
, i
, num_results
, buf_size
= 4096;
32 /* Queries are normally read by the CPU after
33 * being written by the gpu, hence staging is probably a good
36 struct r600_resource
*buf
= (struct r600_resource
*)
37 pipe_buffer_create(&ctx
->screen
->screen
, PIPE_BIND_CUSTOM
,
38 PIPE_USAGE_STAGING
, buf_size
);
41 case PIPE_QUERY_OCCLUSION_COUNTER
:
42 case PIPE_QUERY_OCCLUSION_PREDICATE
:
43 results
= ctx
->ws
->buffer_map(buf
->buf
, ctx
->cs
, PIPE_TRANSFER_WRITE
);
44 memset(results
, 0, buf_size
);
46 /* Set top bits for unused backends. */
47 num_results
= buf_size
/ (16 * ctx
->max_db
);
48 for (j
= 0; j
< num_results
; j
++) {
49 for (i
= 0; i
< ctx
->max_db
; i
++) {
50 if (!(ctx
->backend_mask
& (1<<i
))) {
51 results
[(i
* 4)+1] = 0x80000000;
52 results
[(i
* 4)+3] = 0x80000000;
55 results
+= 4 * ctx
->max_db
;
57 ctx
->ws
->buffer_unmap(buf
->buf
);
59 case PIPE_QUERY_TIME_ELAPSED
:
61 case PIPE_QUERY_PRIMITIVES_EMITTED
:
62 case PIPE_QUERY_PRIMITIVES_GENERATED
:
63 case PIPE_QUERY_SO_STATISTICS
:
64 case PIPE_QUERY_SO_OVERFLOW_PREDICATE
:
65 results
= ctx
->ws
->buffer_map(buf
->buf
, ctx
->cs
, PIPE_TRANSFER_WRITE
);
66 memset(results
, 0, buf_size
);
67 ctx
->ws
->buffer_unmap(buf
->buf
);
75 static void r600_emit_query_begin(struct r600_context
*ctx
, struct r600_query
*query
)
77 struct radeon_winsys_cs
*cs
= ctx
->cs
;
80 r600_need_cs_space(ctx
, query
->num_cs_dw
* 2, TRUE
);
82 /* Get a new query buffer if needed. */
83 if (query
->buffer
.results_end
+ query
->result_size
> query
->buffer
.buf
->b
.b
.b
.width0
) {
84 struct r600_query_buffer
*qbuf
= MALLOC_STRUCT(r600_query_buffer
);
85 *qbuf
= query
->buffer
;
86 query
->buffer
.buf
= r600_new_query_buffer(ctx
, query
->type
);
87 query
->buffer
.results_end
= 0;
88 query
->buffer
.previous
= qbuf
;
91 /* emit begin query */
92 va
= r600_resource_va(&ctx
->screen
->screen
, (void*)query
->buffer
.buf
);
93 va
+= query
->buffer
.results_end
;
95 switch (query
->type
) {
96 case PIPE_QUERY_OCCLUSION_COUNTER
:
97 case PIPE_QUERY_OCCLUSION_PREDICATE
:
98 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_EVENT_WRITE
, 2, 0);
99 cs
->buf
[cs
->cdw
++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE
) | EVENT_INDEX(1);
100 cs
->buf
[cs
->cdw
++] = va
;
101 cs
->buf
[cs
->cdw
++] = (va
>> 32UL) & 0xFF;
103 case PIPE_QUERY_PRIMITIVES_EMITTED
:
104 case PIPE_QUERY_PRIMITIVES_GENERATED
:
105 case PIPE_QUERY_SO_STATISTICS
:
106 case PIPE_QUERY_SO_OVERFLOW_PREDICATE
:
107 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_EVENT_WRITE
, 2, 0);
108 cs
->buf
[cs
->cdw
++] = EVENT_TYPE(EVENT_TYPE_SAMPLE_STREAMOUTSTATS
) | EVENT_INDEX(3);
109 cs
->buf
[cs
->cdw
++] = va
;
110 cs
->buf
[cs
->cdw
++] = (va
>> 32UL) & 0xFF;
112 case PIPE_QUERY_TIME_ELAPSED
:
113 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_EVENT_WRITE_EOP
, 4, 0);
114 cs
->buf
[cs
->cdw
++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT
) | EVENT_INDEX(5);
115 cs
->buf
[cs
->cdw
++] = va
;
116 cs
->buf
[cs
->cdw
++] = (3 << 29) | ((va
>> 32UL) & 0xFF);
117 cs
->buf
[cs
->cdw
++] = 0;
118 cs
->buf
[cs
->cdw
++] = 0;
123 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_NOP
, 0, 0);
124 cs
->buf
[cs
->cdw
++] = r600_context_bo_reloc(ctx
, query
->buffer
.buf
, RADEON_USAGE_WRITE
);
126 ctx
->num_cs_dw_queries_suspend
+= query
->num_cs_dw
;
129 static void r600_emit_query_end(struct r600_context
*ctx
, struct r600_query
*query
)
131 struct radeon_winsys_cs
*cs
= ctx
->cs
;
134 va
= r600_resource_va(&ctx
->screen
->screen
, (void*)query
->buffer
.buf
);
136 switch (query
->type
) {
137 case PIPE_QUERY_OCCLUSION_COUNTER
:
138 case PIPE_QUERY_OCCLUSION_PREDICATE
:
139 va
+= query
->buffer
.results_end
+ 8;
140 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_EVENT_WRITE
, 2, 0);
141 cs
->buf
[cs
->cdw
++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE
) | EVENT_INDEX(1);
142 cs
->buf
[cs
->cdw
++] = va
;
143 cs
->buf
[cs
->cdw
++] = (va
>> 32UL) & 0xFF;
145 case PIPE_QUERY_PRIMITIVES_EMITTED
:
146 case PIPE_QUERY_PRIMITIVES_GENERATED
:
147 case PIPE_QUERY_SO_STATISTICS
:
148 case PIPE_QUERY_SO_OVERFLOW_PREDICATE
:
149 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_EVENT_WRITE
, 2, 0);
150 cs
->buf
[cs
->cdw
++] = EVENT_TYPE(EVENT_TYPE_SAMPLE_STREAMOUTSTATS
) | EVENT_INDEX(3);
151 cs
->buf
[cs
->cdw
++] = query
->buffer
.results_end
+ query
->result_size
/2;
152 cs
->buf
[cs
->cdw
++] = 0;
154 case PIPE_QUERY_TIME_ELAPSED
:
155 va
+= query
->buffer
.results_end
+ query
->result_size
/2;
156 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_EVENT_WRITE_EOP
, 4, 0);
157 cs
->buf
[cs
->cdw
++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT
) | EVENT_INDEX(5);
158 cs
->buf
[cs
->cdw
++] = va
;
159 cs
->buf
[cs
->cdw
++] = (3 << 29) | ((va
>> 32UL) & 0xFF);
160 cs
->buf
[cs
->cdw
++] = 0;
161 cs
->buf
[cs
->cdw
++] = 0;
166 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_NOP
, 0, 0);
167 cs
->buf
[cs
->cdw
++] = r600_context_bo_reloc(ctx
, query
->buffer
.buf
, RADEON_USAGE_WRITE
);
169 query
->buffer
.results_end
+= query
->result_size
;
170 ctx
->num_cs_dw_queries_suspend
-= query
->num_cs_dw
;
173 static void r600_emit_query_predication(struct r600_context
*ctx
, struct r600_query
*query
,
174 int operation
, bool flag_wait
)
176 struct radeon_winsys_cs
*cs
= ctx
->cs
;
178 if (operation
== PREDICATION_OP_CLEAR
) {
179 r600_need_cs_space(ctx
, 3, FALSE
);
181 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_SET_PREDICATION
, 1, 0);
182 cs
->buf
[cs
->cdw
++] = 0;
183 cs
->buf
[cs
->cdw
++] = PRED_OP(PREDICATION_OP_CLEAR
);
185 struct r600_query_buffer
*qbuf
;
189 /* Find how many results there are. */
191 for (qbuf
= &query
->buffer
; qbuf
; qbuf
= qbuf
->previous
) {
192 count
+= qbuf
->results_end
/ query
->result_size
;
195 r600_need_cs_space(ctx
, 5 * count
, TRUE
);
197 op
= PRED_OP(operation
) | PREDICATION_DRAW_VISIBLE
|
198 (flag_wait
? PREDICATION_HINT_WAIT
: PREDICATION_HINT_NOWAIT_DRAW
);
200 /* emit predicate packets for all data blocks */
201 for (qbuf
= &query
->buffer
; qbuf
; qbuf
= qbuf
->previous
) {
202 unsigned results_base
= 0;
203 uint64_t va
= r600_resource_va(&ctx
->screen
->screen
, &qbuf
->buf
->b
.b
.b
);
205 while (results_base
< qbuf
->results_end
) {
206 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_SET_PREDICATION
, 1, 0);
207 cs
->buf
[cs
->cdw
++] = (va
+ results_base
) & 0xFFFFFFFFUL
;
208 cs
->buf
[cs
->cdw
++] = op
| (((va
+ results_base
) >> 32UL) & 0xFF);
209 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_NOP
, 0, 0);
210 cs
->buf
[cs
->cdw
++] = r600_context_bo_reloc(ctx
, qbuf
->buf
, RADEON_USAGE_READ
);
211 results_base
+= query
->result_size
;
213 /* set CONTINUE bit for all packets except the first */
214 op
|= PREDICATION_CONTINUE
;
220 static struct pipe_query
*r600_create_query(struct pipe_context
*ctx
, unsigned query_type
)
222 struct r600_context
*rctx
= (struct r600_context
*)ctx
;
224 struct r600_query
*query
;
226 query
= CALLOC_STRUCT(r600_query
);
230 query
->type
= query_type
;
232 switch (query_type
) {
233 case PIPE_QUERY_OCCLUSION_COUNTER
:
234 case PIPE_QUERY_OCCLUSION_PREDICATE
:
235 query
->result_size
= 16 * rctx
->max_db
;
236 query
->num_cs_dw
= 6;
238 case PIPE_QUERY_TIME_ELAPSED
:
239 query
->result_size
= 16;
240 query
->num_cs_dw
= 8;
242 case PIPE_QUERY_PRIMITIVES_EMITTED
:
243 case PIPE_QUERY_PRIMITIVES_GENERATED
:
244 case PIPE_QUERY_SO_STATISTICS
:
245 case PIPE_QUERY_SO_OVERFLOW_PREDICATE
:
246 /* NumPrimitivesWritten, PrimitiveStorageNeeded. */
247 query
->result_size
= 32;
248 query
->num_cs_dw
= 6;
256 query
->buffer
.buf
= r600_new_query_buffer(rctx
, query_type
);
257 if (!query
->buffer
.buf
) {
261 return (struct pipe_query
*)query
;
264 static void r600_destroy_query(struct pipe_context
*ctx
, struct pipe_query
*query
)
266 struct r600_query
*rquery
= (struct r600_query
*)query
;
267 struct r600_query_buffer
*prev
= rquery
->buffer
.previous
;
269 /* Release all query buffers. */
271 struct r600_query_buffer
*qbuf
= prev
;
272 prev
= prev
->previous
;
273 pipe_resource_reference((struct pipe_resource
**)&qbuf
->buf
, NULL
);
277 pipe_resource_reference((struct pipe_resource
**)&rquery
->buffer
.buf
, NULL
);
281 static void r600_update_occlusion_query_state(struct r600_context
*rctx
,
282 unsigned type
, int diff
)
284 if (type
== PIPE_QUERY_OCCLUSION_COUNTER
||
285 type
== PIPE_QUERY_OCCLUSION_PREDICATE
) {
288 rctx
->num_occlusion_queries
+= diff
;
289 assert(rctx
->num_occlusion_queries
>= 0);
291 enable
= rctx
->num_occlusion_queries
!= 0;
293 if (rctx
->atom_db_misc_state
.occlusion_query_enabled
!= enable
) {
294 rctx
->atom_db_misc_state
.occlusion_query_enabled
= enable
;
295 r600_atom_dirty(rctx
, &rctx
->atom_db_misc_state
.atom
);
300 static void r600_begin_query(struct pipe_context
*ctx
, struct pipe_query
*query
)
302 struct r600_context
*rctx
= (struct r600_context
*)ctx
;
303 struct r600_query
*rquery
= (struct r600_query
*)query
;
304 /* Discard the old query buffers. */
305 struct r600_query_buffer
*prev
= rquery
->buffer
.previous
;
308 struct r600_query_buffer
*qbuf
= prev
;
309 prev
= prev
->previous
;
310 pipe_resource_reference((struct pipe_resource
**)&qbuf
->buf
, NULL
);
314 /* Obtain a new buffer if the current one can't be mapped without a stall. */
315 if (rctx
->ws
->cs_is_buffer_referenced(rctx
->cs
, rquery
->buffer
.buf
->cs_buf
) ||
316 rctx
->ws
->buffer_is_busy(rquery
->buffer
.buf
->buf
, RADEON_USAGE_READWRITE
)) {
317 pipe_resource_reference((struct pipe_resource
**)&rquery
->buffer
.buf
, NULL
);
318 rquery
->buffer
.buf
= r600_new_query_buffer(rctx
, rquery
->type
);
321 rquery
->buffer
.results_end
= 0;
322 rquery
->buffer
.previous
= NULL
;
324 r600_update_occlusion_query_state(rctx
, rquery
->type
, 1);
326 r600_emit_query_begin(rctx
, rquery
);
327 LIST_ADDTAIL(&rquery
->list
, &rctx
->active_query_list
);
330 static void r600_end_query(struct pipe_context
*ctx
, struct pipe_query
*query
)
332 struct r600_context
*rctx
= (struct r600_context
*)ctx
;
333 struct r600_query
*rquery
= (struct r600_query
*)query
;
335 r600_emit_query_end(rctx
, rquery
);
336 LIST_DELINIT(&rquery
->list
);
338 r600_update_occlusion_query_state(rctx
, rquery
->type
, -1);
341 static unsigned r600_query_read_result(char *map
, unsigned start_index
, unsigned end_index
,
342 bool test_status_bit
)
344 uint32_t *current_result
= (uint32_t*)map
;
347 start
= (uint64_t)current_result
[start_index
] |
348 (uint64_t)current_result
[start_index
+1] << 32;
349 end
= (uint64_t)current_result
[end_index
] |
350 (uint64_t)current_result
[end_index
+1] << 32;
352 if (!test_status_bit
||
353 ((start
& 0x8000000000000000UL
) && (end
& 0x8000000000000000UL
))) {
359 static boolean
r600_get_query_buffer_result(struct r600_context
*ctx
,
360 struct r600_query
*query
,
361 struct r600_query_buffer
*qbuf
,
363 union r600_query_result
*result
)
365 unsigned results_base
= 0;
368 map
= ctx
->ws
->buffer_map(qbuf
->buf
->buf
, ctx
->cs
,
370 (wait
? 0 : PIPE_TRANSFER_DONTBLOCK
));
374 /* count all results across all data blocks */
375 switch (query
->type
) {
376 case PIPE_QUERY_OCCLUSION_COUNTER
:
377 while (results_base
!= qbuf
->results_end
) {
379 r600_query_read_result(map
+ results_base
, 0, 2, true);
383 case PIPE_QUERY_OCCLUSION_PREDICATE
:
384 while (results_base
!= qbuf
->results_end
) {
385 result
->b
= result
->b
||
386 r600_query_read_result(map
+ results_base
, 0, 2, true) != 0;
390 case PIPE_QUERY_TIME_ELAPSED
:
391 while (results_base
!= qbuf
->results_end
) {
393 r600_query_read_result(map
+ results_base
, 0, 2, false);
394 results_base
+= query
->result_size
;
397 case PIPE_QUERY_PRIMITIVES_EMITTED
:
398 /* SAMPLE_STREAMOUTSTATS stores this structure:
400 * u64 NumPrimitivesWritten;
401 * u64 PrimitiveStorageNeeded;
403 * We only need NumPrimitivesWritten here. */
404 while (results_base
!= qbuf
->results_end
) {
406 r600_query_read_result(map
+ results_base
, 2, 6, true);
407 results_base
+= query
->result_size
;
410 case PIPE_QUERY_PRIMITIVES_GENERATED
:
411 /* Here we read PrimitiveStorageNeeded. */
412 while (results_base
!= qbuf
->results_end
) {
414 r600_query_read_result(map
+ results_base
, 0, 4, true);
415 results_base
+= query
->result_size
;
418 case PIPE_QUERY_SO_STATISTICS
:
419 while (results_base
!= qbuf
->results_end
) {
420 result
->so
.num_primitives_written
+=
421 r600_query_read_result(map
+ results_base
, 2, 6, true);
422 result
->so
.primitives_storage_needed
+=
423 r600_query_read_result(map
+ results_base
, 0, 4, true);
424 results_base
+= query
->result_size
;
427 case PIPE_QUERY_SO_OVERFLOW_PREDICATE
:
428 while (results_base
!= qbuf
->results_end
) {
429 result
->b
= result
->b
||
430 r600_query_read_result(map
+ results_base
, 2, 6, true) !=
431 r600_query_read_result(map
+ results_base
, 0, 4, true);
432 results_base
+= query
->result_size
;
439 ctx
->ws
->buffer_unmap(qbuf
->buf
->buf
);
443 static boolean
r600_get_query_result(struct pipe_context
*ctx
,
444 struct pipe_query
*query
,
445 boolean wait
, void *vresult
)
447 struct r600_context
*rctx
= (struct r600_context
*)ctx
;
448 struct r600_query
*rquery
= (struct r600_query
*)query
;
449 boolean
*result_b
= (boolean
*)vresult
;
450 uint64_t *result_u64
= (uint64_t*)vresult
;
451 union r600_query_result result
;
452 struct pipe_query_data_so_statistics
*result_so
=
453 (struct pipe_query_data_so_statistics
*)vresult
;
454 struct r600_query_buffer
*qbuf
;
456 memset(&result
, 0, sizeof(result
));
458 for (qbuf
= &rquery
->buffer
; qbuf
; qbuf
= qbuf
->previous
) {
459 if (!r600_get_query_buffer_result(rctx
, rquery
, qbuf
, wait
, &result
)) {
464 switch (rquery
->type
) {
465 case PIPE_QUERY_OCCLUSION_COUNTER
:
466 case PIPE_QUERY_PRIMITIVES_EMITTED
:
467 case PIPE_QUERY_PRIMITIVES_GENERATED
:
468 *result_u64
= result
.u64
;
470 case PIPE_QUERY_OCCLUSION_PREDICATE
:
471 case PIPE_QUERY_SO_OVERFLOW_PREDICATE
:
472 *result_b
= result
.b
;
474 case PIPE_QUERY_TIME_ELAPSED
:
475 *result_u64
= (1000000 * result
.u64
) / rctx
->screen
->info
.r600_clock_crystal_freq
;
477 case PIPE_QUERY_SO_STATISTICS
:
478 *result_so
= result
.so
;
486 static void r600_render_condition(struct pipe_context
*ctx
,
487 struct pipe_query
*query
,
490 struct r600_context
*rctx
= (struct r600_context
*)ctx
;
491 struct r600_query
*rquery
= (struct r600_query
*)query
;
492 bool wait_flag
= false;
494 rctx
->current_render_cond
= query
;
495 rctx
->current_render_cond_mode
= mode
;
498 if (rctx
->predicate_drawing
) {
499 rctx
->predicate_drawing
= false;
500 r600_emit_query_predication(rctx
, NULL
, PREDICATION_OP_CLEAR
, false);
505 if (mode
== PIPE_RENDER_COND_WAIT
||
506 mode
== PIPE_RENDER_COND_BY_REGION_WAIT
) {
510 rctx
->predicate_drawing
= true;
512 switch (rquery
->type
) {
513 case PIPE_QUERY_OCCLUSION_COUNTER
:
514 case PIPE_QUERY_OCCLUSION_PREDICATE
:
515 r600_emit_query_predication(rctx
, rquery
, PREDICATION_OP_ZPASS
, wait_flag
);
517 case PIPE_QUERY_PRIMITIVES_EMITTED
:
518 case PIPE_QUERY_PRIMITIVES_GENERATED
:
519 case PIPE_QUERY_SO_STATISTICS
:
520 case PIPE_QUERY_SO_OVERFLOW_PREDICATE
:
521 r600_emit_query_predication(rctx
, rquery
, PREDICATION_OP_PRIMCOUNT
, wait_flag
);
528 void r600_suspend_queries(struct r600_context
*ctx
)
530 struct r600_query
*query
;
532 LIST_FOR_EACH_ENTRY(query
, &ctx
->active_query_list
, list
) {
533 r600_emit_query_end(ctx
, query
);
535 assert(ctx
->num_cs_dw_queries_suspend
== 0);
538 void r600_resume_queries(struct r600_context
*ctx
)
540 struct r600_query
*query
;
542 assert(ctx
->num_cs_dw_queries_suspend
== 0);
544 LIST_FOR_EACH_ENTRY(query
, &ctx
->active_query_list
, list
) {
545 r600_emit_query_begin(ctx
, query
);
549 void r600_init_query_functions(struct r600_context
*rctx
)
551 rctx
->context
.create_query
= r600_create_query
;
552 rctx
->context
.destroy_query
= r600_destroy_query
;
553 rctx
->context
.begin_query
= r600_begin_query
;
554 rctx
->context
.end_query
= r600_end_query
;
555 rctx
->context
.get_query_result
= r600_get_query_result
;
557 if (rctx
->screen
->info
.r600_num_backends
> 0)
558 rctx
->context
.render_condition
= r600_render_condition
;