2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
26 #include "r600_hw_context_priv.h"
27 #include "radeonsi_pm4.h"
28 #include "radeonsi_pipe.h"
30 #include "util/u_memory.h"
33 #define GROUP_FORCE_NEW_BLOCK 0
35 /* Get backends mask */
36 void r600_get_backend_mask(struct r600_context
*ctx
)
38 struct radeon_winsys_cs
*cs
= ctx
->cs
;
39 struct si_resource
*buffer
;
41 unsigned num_backends
= ctx
->screen
->info
.r600_num_backends
;
44 /* if backend_map query is supported by the kernel */
45 if (ctx
->screen
->info
.r600_backend_map_valid
) {
46 unsigned num_tile_pipes
= ctx
->screen
->info
.r600_num_tile_pipes
;
47 unsigned backend_map
= ctx
->screen
->info
.r600_backend_map
;
48 unsigned item_width
, item_mask
;
50 if (ctx
->chip_class
>= CAYMAN
) {
55 while(num_tile_pipes
--) {
56 i
= backend_map
& item_mask
;
58 backend_map
>>= item_width
;
61 ctx
->backend_mask
= mask
;
66 /* otherwise backup path for older kernels */
68 /* create buffer for event data */
69 buffer
= si_resource_create_custom(&ctx
->screen
->screen
,
75 /* initialize buffer with zeroes */
76 results
= ctx
->ws
->buffer_map(buffer
->cs_buf
, ctx
->cs
, PIPE_TRANSFER_WRITE
);
80 memset(results
, 0, ctx
->max_db
* 4 * 4);
81 ctx
->ws
->buffer_unmap(buffer
->cs_buf
);
83 /* emit EVENT_WRITE for ZPASS_DONE */
84 va
= r600_resource_va(&ctx
->screen
->screen
, (void *)buffer
);
85 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_EVENT_WRITE
, 2, 0);
86 cs
->buf
[cs
->cdw
++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE
) | EVENT_INDEX(1);
87 cs
->buf
[cs
->cdw
++] = va
;
88 cs
->buf
[cs
->cdw
++] = va
>> 32;
90 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_NOP
, 0, 0);
91 cs
->buf
[cs
->cdw
++] = r600_context_bo_reloc(ctx
, buffer
, RADEON_USAGE_WRITE
);
94 results
= ctx
->ws
->buffer_map(buffer
->cs_buf
, ctx
->cs
, PIPE_TRANSFER_READ
);
96 for(i
= 0; i
< ctx
->max_db
; i
++) {
97 /* at least highest bit will be set if backend is used */
101 ctx
->ws
->buffer_unmap(buffer
->cs_buf
);
105 si_resource_reference(&buffer
, NULL
);
108 ctx
->backend_mask
= mask
;
113 /* fallback to old method - set num_backends lower bits to 1 */
114 ctx
->backend_mask
= (~((uint32_t)0))>>(32-num_backends
);
119 void r600_need_cs_space(struct r600_context
*ctx
, unsigned num_dw
,
120 boolean count_draw_in
)
122 struct r600_atom
*state
;
124 /* The number of dwords we already used in the CS so far. */
125 num_dw
+= ctx
->cs
->cdw
;
128 /* The number of dwords all the dirty states would take. */
129 LIST_FOR_EACH_ENTRY(state
, &ctx
->dirty_states
, head
) {
130 num_dw
+= state
->num_dw
;
133 num_dw
+= ctx
->pm4_dirty_cdwords
;
135 /* The upper-bound of how much a draw command would take. */
136 num_dw
+= SI_MAX_DRAW_CS_DWORDS
;
139 /* Count in queries_suspend. */
140 num_dw
+= ctx
->num_cs_dw_queries_suspend
;
142 /* Count in streamout_end at the end of CS. */
143 num_dw
+= ctx
->num_cs_dw_streamout_end
;
145 /* Count in render_condition(NULL) at the end of CS. */
146 if (ctx
->predicate_drawing
) {
150 /* Count in framebuffer cache flushes at the end of CS. */
151 num_dw
+= 7; /* one SURFACE_SYNC and CACHE_FLUSH_AND_INV (r6xx-only) */
153 /* Save 16 dwords for the fence mechanism. */
156 /* Flush if there's not enough space. */
157 if (num_dw
> RADEON_MAX_CMDBUF_DWORDS
) {
158 radeonsi_flush(&ctx
->context
, NULL
, RADEON_FLUSH_ASYNC
);
162 static void r600_flush_framebuffer(struct r600_context
*ctx
, bool flush_now
)
164 if (!(ctx
->flags
& R600_CONTEXT_DST_CACHES_DIRTY
))
167 ctx
->atom_surface_sync
.flush_flags
|=
168 r600_get_cb_flush_flags(ctx
) |
169 (ctx
->framebuffer
.zsbuf
? S_0085F0_DB_ACTION_ENA(1) | S_0085F0_DB_DEST_BASE_ENA(1) : 0);
172 r600_emit_atom(ctx
, &ctx
->atom_surface_sync
.atom
);
174 r600_atom_dirty(ctx
, &ctx
->atom_surface_sync
.atom
);
177 ctx
->flags
&= ~R600_CONTEXT_DST_CACHES_DIRTY
;
180 void r600_context_flush(struct r600_context
*ctx
, unsigned flags
)
182 struct radeon_winsys_cs
*cs
= ctx
->cs
;
183 struct r600_block
*enable_block
= NULL
;
184 bool queries_suspended
= false;
187 bool streamout_suspended
= false;
193 /* suspend queries */
194 if (ctx
->num_cs_dw_queries_suspend
) {
195 r600_context_queries_suspend(ctx
);
196 queries_suspended
= true;
200 if (ctx
->num_cs_dw_streamout_end
) {
201 r600_context_streamout_end(ctx
);
202 streamout_suspended
= true;
206 r600_flush_framebuffer(ctx
, true);
208 /* partial flush is needed to avoid lockups on some chips with user fences */
209 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_EVENT_WRITE
, 0, 0);
210 cs
->buf
[cs
->cdw
++] = EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH
) | EVENT_INDEX(4);
212 /* force to keep tiling flags */
213 flags
|= RADEON_FLUSH_KEEP_TILING_FLAGS
;
216 ctx
->ws
->cs_flush(ctx
->cs
, flags
);
218 ctx
->pm4_dirty_cdwords
= 0;
222 if (streamout_suspended
) {
223 ctx
->streamout_start
= TRUE
;
224 ctx
->streamout_append_bitmask
= ~0;
229 if (queries_suspended
) {
230 r600_context_queries_resume(ctx
);
233 /* set all valid group as dirty so they get reemited on
236 si_pm4_reset_emitted(ctx
);
239 void r600_context_emit_fence(struct r600_context
*ctx
, struct si_resource
*fence_bo
, unsigned offset
, unsigned value
)
241 struct radeon_winsys_cs
*cs
= ctx
->cs
;
244 r600_need_cs_space(ctx
, 10, FALSE
);
246 va
= r600_resource_va(&ctx
->screen
->screen
, (void*)fence_bo
);
247 va
= va
+ (offset
<< 2);
249 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_EVENT_WRITE
, 0, 0);
250 cs
->buf
[cs
->cdw
++] = EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH
) | EVENT_INDEX(4);
251 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_EVENT_WRITE_EOP
, 4, 0);
252 cs
->buf
[cs
->cdw
++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT
) | EVENT_INDEX(5);
253 cs
->buf
[cs
->cdw
++] = va
& 0xFFFFFFFFUL
; /* ADDRESS_LO */
254 /* DATA_SEL | INT_EN | ADDRESS_HI */
255 cs
->buf
[cs
->cdw
++] = (1 << 29) | (0 << 24) | ((va
>> 32UL) & 0xFF);
256 cs
->buf
[cs
->cdw
++] = value
; /* DATA_LO */
257 cs
->buf
[cs
->cdw
++] = 0; /* DATA_HI */
258 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_NOP
, 0, 0);
259 cs
->buf
[cs
->cdw
++] = r600_context_bo_reloc(ctx
, fence_bo
, RADEON_USAGE_WRITE
);
262 static unsigned r600_query_read_result(char *map
, unsigned start_index
, unsigned end_index
,
263 bool test_status_bit
)
265 uint32_t *current_result
= (uint32_t*)map
;
268 start
= (uint64_t)current_result
[start_index
] |
269 (uint64_t)current_result
[start_index
+1] << 32;
270 end
= (uint64_t)current_result
[end_index
] |
271 (uint64_t)current_result
[end_index
+1] << 32;
273 if (!test_status_bit
||
274 ((start
& 0x8000000000000000UL
) && (end
& 0x8000000000000000UL
))) {
280 static boolean
r600_query_result(struct r600_context
*ctx
, struct r600_query
*query
, boolean wait
)
282 unsigned results_base
= query
->results_start
;
285 map
= ctx
->ws
->buffer_map(query
->buffer
->cs_buf
, ctx
->cs
,
287 (wait
? 0 : PIPE_TRANSFER_DONTBLOCK
));
291 /* count all results across all data blocks */
292 switch (query
->type
) {
293 case PIPE_QUERY_OCCLUSION_COUNTER
:
294 while (results_base
!= query
->results_end
) {
296 r600_query_read_result(map
+ results_base
, 0, 2, true);
297 results_base
= (results_base
+ 16) % query
->buffer
->b
.b
.width0
;
300 case PIPE_QUERY_OCCLUSION_PREDICATE
:
301 while (results_base
!= query
->results_end
) {
302 query
->result
.b
= query
->result
.b
||
303 r600_query_read_result(map
+ results_base
, 0, 2, true) != 0;
304 results_base
= (results_base
+ 16) % query
->buffer
->b
.b
.width0
;
307 case PIPE_QUERY_TIME_ELAPSED
:
308 while (results_base
!= query
->results_end
) {
310 r600_query_read_result(map
+ results_base
, 0, 2, false);
311 results_base
= (results_base
+ query
->result_size
) % query
->buffer
->b
.b
.width0
;
314 case PIPE_QUERY_PRIMITIVES_EMITTED
:
315 /* SAMPLE_STREAMOUTSTATS stores this structure:
317 * u64 NumPrimitivesWritten;
318 * u64 PrimitiveStorageNeeded;
320 * We only need NumPrimitivesWritten here. */
321 while (results_base
!= query
->results_end
) {
323 r600_query_read_result(map
+ results_base
, 2, 6, true);
324 results_base
= (results_base
+ query
->result_size
) % query
->buffer
->b
.b
.width0
;
327 case PIPE_QUERY_PRIMITIVES_GENERATED
:
328 /* Here we read PrimitiveStorageNeeded. */
329 while (results_base
!= query
->results_end
) {
331 r600_query_read_result(map
+ results_base
, 0, 4, true);
332 results_base
= (results_base
+ query
->result_size
) % query
->buffer
->b
.b
.width0
;
335 case PIPE_QUERY_SO_STATISTICS
:
336 while (results_base
!= query
->results_end
) {
337 query
->result
.so
.num_primitives_written
+=
338 r600_query_read_result(map
+ results_base
, 2, 6, true);
339 query
->result
.so
.primitives_storage_needed
+=
340 r600_query_read_result(map
+ results_base
, 0, 4, true);
341 results_base
= (results_base
+ query
->result_size
) % query
->buffer
->b
.b
.width0
;
344 case PIPE_QUERY_SO_OVERFLOW_PREDICATE
:
345 while (results_base
!= query
->results_end
) {
346 query
->result
.b
= query
->result
.b
||
347 r600_query_read_result(map
+ results_base
, 2, 6, true) !=
348 r600_query_read_result(map
+ results_base
, 0, 4, true);
349 results_base
= (results_base
+ query
->result_size
) % query
->buffer
->b
.b
.width0
;
356 query
->results_start
= query
->results_end
;
357 ctx
->ws
->buffer_unmap(query
->buffer
->cs_buf
);
361 void r600_query_begin(struct r600_context
*ctx
, struct r600_query
*query
)
363 struct radeon_winsys_cs
*cs
= ctx
->cs
;
364 unsigned new_results_end
, i
;
368 r600_need_cs_space(ctx
, query
->num_cs_dw
* 2, TRUE
);
370 new_results_end
= (query
->results_end
+ query
->result_size
) % query
->buffer
->b
.b
.width0
;
372 /* collect current results if query buffer is full */
373 if (new_results_end
== query
->results_start
) {
374 r600_query_result(ctx
, query
, TRUE
);
377 switch (query
->type
) {
378 case PIPE_QUERY_OCCLUSION_COUNTER
:
379 case PIPE_QUERY_OCCLUSION_PREDICATE
:
380 results
= ctx
->ws
->buffer_map(query
->buffer
->cs_buf
, ctx
->cs
, PIPE_TRANSFER_WRITE
);
382 results
= (uint32_t*)((char*)results
+ query
->results_end
);
383 memset(results
, 0, query
->result_size
);
385 /* Set top bits for unused backends */
386 for (i
= 0; i
< ctx
->max_db
; i
++) {
387 if (!(ctx
->backend_mask
& (1<<i
))) {
388 results
[(i
* 4)+1] = 0x80000000;
389 results
[(i
* 4)+3] = 0x80000000;
392 ctx
->ws
->buffer_unmap(query
->buffer
->cs_buf
);
395 case PIPE_QUERY_TIME_ELAPSED
:
397 case PIPE_QUERY_PRIMITIVES_EMITTED
:
398 case PIPE_QUERY_PRIMITIVES_GENERATED
:
399 case PIPE_QUERY_SO_STATISTICS
:
400 case PIPE_QUERY_SO_OVERFLOW_PREDICATE
:
401 results
= ctx
->ws
->buffer_map(query
->buffer
->cs_buf
, ctx
->cs
, PIPE_TRANSFER_WRITE
);
402 results
= (uint32_t*)((char*)results
+ query
->results_end
);
403 memset(results
, 0, query
->result_size
);
404 ctx
->ws
->buffer_unmap(query
->buffer
->cs_buf
);
410 /* emit begin query */
411 va
= r600_resource_va(&ctx
->screen
->screen
, (void*)query
->buffer
);
412 va
+= query
->results_end
;
414 switch (query
->type
) {
415 case PIPE_QUERY_OCCLUSION_COUNTER
:
416 case PIPE_QUERY_OCCLUSION_PREDICATE
:
417 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_EVENT_WRITE
, 2, 0);
418 cs
->buf
[cs
->cdw
++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE
) | EVENT_INDEX(1);
419 cs
->buf
[cs
->cdw
++] = va
;
420 cs
->buf
[cs
->cdw
++] = (va
>> 32UL) & 0xFF;
422 case PIPE_QUERY_PRIMITIVES_EMITTED
:
423 case PIPE_QUERY_PRIMITIVES_GENERATED
:
424 case PIPE_QUERY_SO_STATISTICS
:
425 case PIPE_QUERY_SO_OVERFLOW_PREDICATE
:
426 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_EVENT_WRITE
, 2, 0);
427 cs
->buf
[cs
->cdw
++] = EVENT_TYPE(EVENT_TYPE_SAMPLE_STREAMOUTSTATS
) | EVENT_INDEX(3);
428 cs
->buf
[cs
->cdw
++] = query
->results_end
;
429 cs
->buf
[cs
->cdw
++] = 0;
431 case PIPE_QUERY_TIME_ELAPSED
:
432 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_EVENT_WRITE_EOP
, 4, 0);
433 cs
->buf
[cs
->cdw
++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT
) | EVENT_INDEX(5);
434 cs
->buf
[cs
->cdw
++] = va
;
435 cs
->buf
[cs
->cdw
++] = (3 << 29) | ((va
>> 32UL) & 0xFF);
436 cs
->buf
[cs
->cdw
++] = 0;
437 cs
->buf
[cs
->cdw
++] = 0;
442 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_NOP
, 0, 0);
443 cs
->buf
[cs
->cdw
++] = r600_context_bo_reloc(ctx
, query
->buffer
, RADEON_USAGE_WRITE
);
445 ctx
->num_cs_dw_queries_suspend
+= query
->num_cs_dw
;
448 void r600_query_end(struct r600_context
*ctx
, struct r600_query
*query
)
450 struct radeon_winsys_cs
*cs
= ctx
->cs
;
453 va
= r600_resource_va(&ctx
->screen
->screen
, (void*)query
->buffer
);
455 switch (query
->type
) {
456 case PIPE_QUERY_OCCLUSION_COUNTER
:
457 case PIPE_QUERY_OCCLUSION_PREDICATE
:
458 va
+= query
->results_end
+ 8;
459 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_EVENT_WRITE
, 2, 0);
460 cs
->buf
[cs
->cdw
++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE
) | EVENT_INDEX(1);
461 cs
->buf
[cs
->cdw
++] = va
;
462 cs
->buf
[cs
->cdw
++] = (va
>> 32UL) & 0xFF;
464 case PIPE_QUERY_PRIMITIVES_EMITTED
:
465 case PIPE_QUERY_PRIMITIVES_GENERATED
:
466 case PIPE_QUERY_SO_STATISTICS
:
467 case PIPE_QUERY_SO_OVERFLOW_PREDICATE
:
468 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_EVENT_WRITE
, 2, 0);
469 cs
->buf
[cs
->cdw
++] = EVENT_TYPE(EVENT_TYPE_SAMPLE_STREAMOUTSTATS
) | EVENT_INDEX(3);
470 cs
->buf
[cs
->cdw
++] = query
->results_end
+ query
->result_size
/2;
471 cs
->buf
[cs
->cdw
++] = 0;
473 case PIPE_QUERY_TIME_ELAPSED
:
474 va
+= query
->results_end
+ query
->result_size
/2;
475 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_EVENT_WRITE_EOP
, 4, 0);
476 cs
->buf
[cs
->cdw
++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT
) | EVENT_INDEX(5);
477 cs
->buf
[cs
->cdw
++] = va
;
478 cs
->buf
[cs
->cdw
++] = (3 << 29) | ((va
>> 32UL) & 0xFF);
479 cs
->buf
[cs
->cdw
++] = 0;
480 cs
->buf
[cs
->cdw
++] = 0;
485 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_NOP
, 0, 0);
486 cs
->buf
[cs
->cdw
++] = r600_context_bo_reloc(ctx
, query
->buffer
, RADEON_USAGE_WRITE
);
488 query
->results_end
= (query
->results_end
+ query
->result_size
) % query
->buffer
->b
.b
.width0
;
489 ctx
->num_cs_dw_queries_suspend
-= query
->num_cs_dw
;
492 void r600_query_predication(struct r600_context
*ctx
, struct r600_query
*query
, int operation
,
495 struct radeon_winsys_cs
*cs
= ctx
->cs
;
498 if (operation
== PREDICATION_OP_CLEAR
) {
499 r600_need_cs_space(ctx
, 3, FALSE
);
501 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_SET_PREDICATION
, 1, 0);
502 cs
->buf
[cs
->cdw
++] = 0;
503 cs
->buf
[cs
->cdw
++] = PRED_OP(PREDICATION_OP_CLEAR
);
505 unsigned results_base
= query
->results_start
;
509 /* find count of the query data blocks */
510 count
= (query
->buffer
->b
.b
.width0
+ query
->results_end
- query
->results_start
) % query
->buffer
->b
.b
.width0
;
511 count
/= query
->result_size
;
513 r600_need_cs_space(ctx
, 5 * count
, TRUE
);
515 op
= PRED_OP(operation
) | PREDICATION_DRAW_VISIBLE
|
516 (flag_wait
? PREDICATION_HINT_WAIT
: PREDICATION_HINT_NOWAIT_DRAW
);
517 va
= r600_resource_va(&ctx
->screen
->screen
, (void*)query
->buffer
);
519 /* emit predicate packets for all data blocks */
520 while (results_base
!= query
->results_end
) {
521 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_SET_PREDICATION
, 1, 0);
522 cs
->buf
[cs
->cdw
++] = (va
+ results_base
) & 0xFFFFFFFFUL
;
523 cs
->buf
[cs
->cdw
++] = op
| (((va
+ results_base
) >> 32UL) & 0xFF);
524 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_NOP
, 0, 0);
525 cs
->buf
[cs
->cdw
++] = r600_context_bo_reloc(ctx
, query
->buffer
,
527 results_base
= (results_base
+ query
->result_size
) % query
->buffer
->b
.b
.width0
;
529 /* set CONTINUE bit for all packets except the first */
530 op
|= PREDICATION_CONTINUE
;
535 struct r600_query
*r600_context_query_create(struct r600_context
*ctx
, unsigned query_type
)
537 struct r600_query
*query
;
538 unsigned buffer_size
= 4096;
540 query
= CALLOC_STRUCT(r600_query
);
544 query
->type
= query_type
;
546 switch (query_type
) {
547 case PIPE_QUERY_OCCLUSION_COUNTER
:
548 case PIPE_QUERY_OCCLUSION_PREDICATE
:
549 query
->result_size
= 16 * ctx
->max_db
;
550 query
->num_cs_dw
= 6;
552 case PIPE_QUERY_TIME_ELAPSED
:
553 query
->result_size
= 16;
554 query
->num_cs_dw
= 8;
556 case PIPE_QUERY_PRIMITIVES_EMITTED
:
557 case PIPE_QUERY_PRIMITIVES_GENERATED
:
558 case PIPE_QUERY_SO_STATISTICS
:
559 case PIPE_QUERY_SO_OVERFLOW_PREDICATE
:
560 /* NumPrimitivesWritten, PrimitiveStorageNeeded. */
561 query
->result_size
= 32;
562 query
->num_cs_dw
= 6;
570 /* adjust buffer size to simplify offsets wrapping math */
571 buffer_size
-= buffer_size
% query
->result_size
;
573 /* Queries are normally read by the CPU after
574 * being written by the gpu, hence staging is probably a good
577 query
->buffer
= si_resource_create_custom(&ctx
->screen
->screen
,
580 if (!query
->buffer
) {
587 void r600_context_query_destroy(struct r600_context
*ctx
, struct r600_query
*query
)
589 si_resource_reference(&query
->buffer
, NULL
);
593 boolean
r600_context_query_result(struct r600_context
*ctx
,
594 struct r600_query
*query
,
595 boolean wait
, void *vresult
)
597 boolean
*result_b
= (boolean
*)vresult
;
598 uint64_t *result_u64
= (uint64_t*)vresult
;
599 struct pipe_query_data_so_statistics
*result_so
=
600 (struct pipe_query_data_so_statistics
*)vresult
;
602 if (!r600_query_result(ctx
, query
, wait
))
605 switch (query
->type
) {
606 case PIPE_QUERY_OCCLUSION_COUNTER
:
607 case PIPE_QUERY_PRIMITIVES_EMITTED
:
608 case PIPE_QUERY_PRIMITIVES_GENERATED
:
609 *result_u64
= query
->result
.u64
;
611 case PIPE_QUERY_OCCLUSION_PREDICATE
:
612 case PIPE_QUERY_SO_OVERFLOW_PREDICATE
:
613 *result_b
= query
->result
.b
;
615 case PIPE_QUERY_TIME_ELAPSED
:
616 *result_u64
= (1000000 * query
->result
.u64
) / ctx
->screen
->info
.r600_clock_crystal_freq
;
618 case PIPE_QUERY_SO_STATISTICS
:
619 *result_so
= query
->result
.so
;
627 void r600_context_queries_suspend(struct r600_context
*ctx
)
629 struct r600_query
*query
;
631 LIST_FOR_EACH_ENTRY(query
, &ctx
->active_query_list
, list
) {
632 r600_query_end(ctx
, query
);
634 assert(ctx
->num_cs_dw_queries_suspend
== 0);
637 void r600_context_queries_resume(struct r600_context
*ctx
)
639 struct r600_query
*query
;
641 assert(ctx
->num_cs_dw_queries_suspend
== 0);
643 LIST_FOR_EACH_ENTRY(query
, &ctx
->active_query_list
, list
) {
644 r600_query_begin(ctx
, query
);
648 void r600_context_draw_opaque_count(struct r600_context
*ctx
, struct r600_so_target
*t
)
650 struct radeon_winsys_cs
*cs
= ctx
->cs
;
651 r600_need_cs_space(ctx
, 14 + 21, TRUE
);
653 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_SET_CONTEXT_REG
, 1, 0);
654 cs
->buf
[cs
->cdw
++] = (R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET
- SI_CONTEXT_REG_OFFSET
) >> 2;
655 cs
->buf
[cs
->cdw
++] = 0;
657 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_SET_CONTEXT_REG
, 1, 0);
658 cs
->buf
[cs
->cdw
++] = (R_028B30_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE
- SI_CONTEXT_REG_OFFSET
) >> 2;
659 cs
->buf
[cs
->cdw
++] = t
->stride
>> 2;
662 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_COPY_DW
, 4, 0);
663 cs
->buf
[cs
->cdw
++] = COPY_DW_SRC_IS_MEM
| COPY_DW_DST_IS_REG
;
664 cs
->buf
[cs
->cdw
++] = 0; /* src address lo */
665 cs
->buf
[cs
->cdw
++] = 0; /* src address hi */
666 cs
->buf
[cs
->cdw
++] = R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE
>> 2; /* dst register */
667 cs
->buf
[cs
->cdw
++] = 0; /* unused */
670 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_NOP
, 0, 0);
671 cs
->buf
[cs
->cdw
++] = r600_context_bo_reloc(ctx
, t
->filled_size
, RADEON_USAGE_READ
);
673 #if 0 /* I have not found this useful yet. */
674 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_COPY_DW
, 4, 0);
675 cs
->buf
[cs
->cdw
++] = COPY_DW_SRC_IS_REG
| COPY_DW_DST_IS_REG
;
676 cs
->buf
[cs
->cdw
++] = R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE
>> 2; /* src register */
677 cs
->buf
[cs
->cdw
++] = 0; /* unused */
678 cs
->buf
[cs
->cdw
++] = R_0085F4_CP_COHER_SIZE
>> 2; /* dst register */
679 cs
->buf
[cs
->cdw
++] = 0; /* unused */
681 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_SET_CONFIG_REG
, 1, 0);
682 cs
->buf
[cs
->cdw
++] = (R_0085F0_CP_COHER_CNTL
- SI_CONFIG_REG_OFFSET
) >> 2;
683 cs
->buf
[cs
->cdw
++] = S_0085F0_SO0_DEST_BASE_ENA(1) << t
->so_index
;
685 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_SET_CONFIG_REG
, 1, 0);
686 cs
->buf
[cs
->cdw
++] = (R_0085F8_CP_COHER_BASE
- SI_CONFIG_REG_OFFSET
) >> 2;
687 cs
->buf
[cs
->cdw
++] = t
->b
.buffer_offset
>> 2;
689 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_NOP
, 0, 0);
690 cs
->buf
[cs
->cdw
++] = r600_context_bo_reloc(ctx
, (struct si_resource
*)t
->b
.buffer
,
693 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_WAIT_REG_MEM
, 5, 0);
694 cs
->buf
[cs
->cdw
++] = WAIT_REG_MEM_EQUAL
; /* wait until the register is equal to the reference value */
695 cs
->buf
[cs
->cdw
++] = R_0085FC_CP_COHER_STATUS
>> 2; /* register */
696 cs
->buf
[cs
->cdw
++] = 0;
697 cs
->buf
[cs
->cdw
++] = 0; /* reference value */
698 cs
->buf
[cs
->cdw
++] = 0xffffffff; /* mask */
699 cs
->buf
[cs
->cdw
++] = 4; /* poll interval */