2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
26 #include "../radeon/r600_cs.h"
30 #include "util/u_memory.h"
33 #define GROUP_FORCE_NEW_BLOCK 0
35 /* Get backends mask */
36 void si_get_backend_mask(struct si_context
*ctx
)
38 struct radeon_winsys_cs
*cs
= ctx
->b
.rings
.gfx
.cs
;
39 struct r600_resource
*buffer
;
41 unsigned num_backends
= ctx
->screen
->b
.info
.r600_num_backends
;
44 /* if backend_map query is supported by the kernel */
45 if (ctx
->screen
->b
.info
.r600_backend_map_valid
) {
46 unsigned num_tile_pipes
= ctx
->screen
->b
.info
.r600_num_tile_pipes
;
47 unsigned backend_map
= ctx
->screen
->b
.info
.r600_backend_map
;
48 unsigned item_width
= 4, item_mask
= 0x7;
50 while(num_tile_pipes
--) {
51 i
= backend_map
& item_mask
;
53 backend_map
>>= item_width
;
56 ctx
->backend_mask
= mask
;
61 /* otherwise backup path for older kernels */
63 /* create buffer for event data */
64 buffer
= si_resource_create_custom(&ctx
->screen
->b
.b
,
70 /* initialize buffer with zeroes */
71 results
= ctx
->b
.ws
->buffer_map(buffer
->cs_buf
, ctx
->b
.rings
.gfx
.cs
, PIPE_TRANSFER_WRITE
);
75 memset(results
, 0, ctx
->max_db
* 4 * 4);
76 ctx
->b
.ws
->buffer_unmap(buffer
->cs_buf
);
78 /* emit EVENT_WRITE for ZPASS_DONE */
79 va
= r600_resource_va(&ctx
->screen
->b
.b
, (void *)buffer
);
80 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_EVENT_WRITE
, 2, 0);
81 cs
->buf
[cs
->cdw
++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE
) | EVENT_INDEX(1);
82 cs
->buf
[cs
->cdw
++] = va
;
83 cs
->buf
[cs
->cdw
++] = va
>> 32;
85 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_NOP
, 0, 0);
86 cs
->buf
[cs
->cdw
++] = r600_context_bo_reloc(&ctx
->b
, &ctx
->b
.rings
.gfx
, buffer
, RADEON_USAGE_WRITE
);
89 results
= ctx
->b
.ws
->buffer_map(buffer
->cs_buf
, ctx
->b
.rings
.gfx
.cs
, PIPE_TRANSFER_READ
);
91 for(i
= 0; i
< ctx
->max_db
; i
++) {
92 /* at least highest bit will be set if backend is used */
96 ctx
->b
.ws
->buffer_unmap(buffer
->cs_buf
);
100 r600_resource_reference(&buffer
, NULL
);
103 ctx
->backend_mask
= mask
;
108 /* fallback to old method - set num_backends lower bits to 1 */
109 ctx
->backend_mask
= (~((uint32_t)0))>>(32-num_backends
);
113 bool si_is_timer_query(unsigned type
)
115 return type
== PIPE_QUERY_TIME_ELAPSED
||
116 type
== PIPE_QUERY_TIMESTAMP
||
117 type
== PIPE_QUERY_TIMESTAMP_DISJOINT
;
120 bool si_query_needs_begin(unsigned type
)
122 return type
!= PIPE_QUERY_TIMESTAMP
;
126 void si_need_cs_space(struct si_context
*ctx
, unsigned num_dw
,
127 boolean count_draw_in
)
131 /* The number of dwords we already used in the CS so far. */
132 num_dw
+= ctx
->b
.rings
.gfx
.cs
->cdw
;
134 for (i
= 0; i
< SI_NUM_ATOMS(ctx
); i
++) {
135 if (ctx
->atoms
.array
[i
]->dirty
) {
136 num_dw
+= ctx
->atoms
.array
[i
]->num_dw
;
141 /* The number of dwords all the dirty states would take. */
142 num_dw
+= ctx
->pm4_dirty_cdwords
;
144 /* The upper-bound of how much a draw command would take. */
145 num_dw
+= SI_MAX_DRAW_CS_DWORDS
;
148 /* Count in queries_suspend. */
149 num_dw
+= ctx
->num_cs_dw_nontimer_queries_suspend
;
151 /* Count in streamout_end at the end of CS. */
152 if (ctx
->b
.streamout
.begin_emitted
) {
153 num_dw
+= ctx
->b
.streamout
.num_dw_for_end
;
156 /* Count in render_condition(NULL) at the end of CS. */
157 if (ctx
->predicate_drawing
) {
161 /* Count in framebuffer cache flushes at the end of CS. */
162 num_dw
+= ctx
->atoms
.cache_flush
->num_dw
;
165 if (ctx
->screen
->trace_bo
) {
166 num_dw
+= SI_TRACE_CS_DWORDS
;
170 /* Flush if there's not enough space. */
171 if (num_dw
> RADEON_MAX_CMDBUF_DWORDS
) {
172 si_flush(&ctx
->b
.b
, NULL
, RADEON_FLUSH_ASYNC
);
176 void si_context_flush(struct si_context
*ctx
, unsigned flags
)
178 struct radeon_winsys_cs
*cs
= ctx
->b
.rings
.gfx
.cs
;
183 /* suspend queries */
184 ctx
->nontimer_queries_suspended
= false;
185 if (ctx
->num_cs_dw_nontimer_queries_suspend
) {
186 si_context_queries_suspend(ctx
);
187 ctx
->nontimer_queries_suspended
= true;
190 ctx
->b
.streamout
.suspended
= false;
192 if (ctx
->b
.streamout
.begin_emitted
) {
193 r600_emit_streamout_end(&ctx
->b
);
194 ctx
->b
.streamout
.suspended
= true;
197 ctx
->b
.flags
|= R600_CONTEXT_FLUSH_AND_INV_CB
|
198 R600_CONTEXT_FLUSH_AND_INV_CB_META
|
199 R600_CONTEXT_FLUSH_AND_INV_DB
|
200 R600_CONTEXT_FLUSH_AND_INV_DB_META
|
201 R600_CONTEXT_INV_TEX_CACHE
;
202 si_emit_cache_flush(&ctx
->b
, NULL
);
204 /* this is probably not needed anymore */
205 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_EVENT_WRITE
, 0, 0);
206 cs
->buf
[cs
->cdw
++] = EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH
) | EVENT_INDEX(4);
208 /* force to keep tiling flags */
209 flags
|= RADEON_FLUSH_KEEP_TILING_FLAGS
;
212 if (ctx
->screen
->trace_bo
) {
213 struct si_screen
*sscreen
= ctx
->screen
;
216 for (i
= 0; i
< cs
->cdw
; i
++) {
217 fprintf(stderr
, "[%4d] [%5d] 0x%08x\n", sscreen
->cs_count
, i
, cs
->buf
[i
]);
224 ctx
->b
.ws
->cs_flush(ctx
->b
.rings
.gfx
.cs
, flags
, 0);
227 if (ctx
->screen
->trace_bo
) {
228 struct si_screen
*sscreen
= ctx
->screen
;
231 for (i
= 0; i
< 10; i
++) {
233 if (!ctx
->ws
->buffer_is_busy(sscreen
->trace_bo
->buf
, RADEON_USAGE_READWRITE
)) {
238 fprintf(stderr
, "timeout on cs lockup likely happen at cs %d dw %d\n",
239 sscreen
->trace_ptr
[1], sscreen
->trace_ptr
[0]);
241 fprintf(stderr
, "cs %d executed in %dms\n", sscreen
->trace_ptr
[1], i
* 5);
246 si_begin_new_cs(ctx
);
249 void si_begin_new_cs(struct si_context
*ctx
)
251 ctx
->pm4_dirty_cdwords
= 0;
253 /* Flush read caches at the beginning of CS. */
254 ctx
->b
.flags
|= R600_CONTEXT_INV_TEX_CACHE
|
255 R600_CONTEXT_INV_CONST_CACHE
|
256 R600_CONTEXT_INV_SHADER_CACHE
;
258 /* set all valid group as dirty so they get reemited on
261 si_pm4_reset_emitted(ctx
);
263 /* The CS initialization should be emitted before everything else. */
264 si_pm4_emit(ctx
, ctx
->queued
.named
.init
);
265 ctx
->emitted
.named
.init
= ctx
->queued
.named
.init
;
267 if (ctx
->b
.streamout
.suspended
) {
268 ctx
->b
.streamout
.append_bitmask
= ctx
->b
.streamout
.enabled_mask
;
269 r600_streamout_buffers_dirty(&ctx
->b
);
273 if (ctx
->nontimer_queries_suspended
) {
274 si_context_queries_resume(ctx
);
277 si_all_descriptors_begin_new_cs(ctx
);
280 static unsigned si_query_read_result(char *map
, unsigned start_index
, unsigned end_index
,
281 bool test_status_bit
)
283 uint32_t *current_result
= (uint32_t*)map
;
286 start
= (uint64_t)current_result
[start_index
] |
287 (uint64_t)current_result
[start_index
+1] << 32;
288 end
= (uint64_t)current_result
[end_index
] |
289 (uint64_t)current_result
[end_index
+1] << 32;
291 if (!test_status_bit
||
292 ((start
& 0x8000000000000000UL
) && (end
& 0x8000000000000000UL
))) {
298 static boolean
si_query_result(struct si_context
*ctx
, struct si_query
*query
, boolean wait
)
300 unsigned results_base
= query
->results_start
;
303 map
= ctx
->b
.ws
->buffer_map(query
->buffer
->cs_buf
, ctx
->b
.rings
.gfx
.cs
,
305 (wait
? 0 : PIPE_TRANSFER_DONTBLOCK
));
309 /* count all results across all data blocks */
310 switch (query
->type
) {
311 case PIPE_QUERY_OCCLUSION_COUNTER
:
312 while (results_base
!= query
->results_end
) {
314 si_query_read_result(map
+ results_base
, 0, 2, true);
315 results_base
= (results_base
+ 16) % query
->buffer
->b
.b
.width0
;
318 case PIPE_QUERY_OCCLUSION_PREDICATE
:
319 while (results_base
!= query
->results_end
) {
320 query
->result
.b
= query
->result
.b
||
321 si_query_read_result(map
+ results_base
, 0, 2, true) != 0;
322 results_base
= (results_base
+ 16) % query
->buffer
->b
.b
.width0
;
325 case PIPE_QUERY_TIMESTAMP
:
327 uint32_t *current_result
= (uint32_t*)map
;
328 query
->result
.u64
= (uint64_t)current_result
[0] | (uint64_t)current_result
[1] << 32;
331 case PIPE_QUERY_TIME_ELAPSED
:
332 while (results_base
!= query
->results_end
) {
334 si_query_read_result(map
+ results_base
, 0, 2, false);
335 results_base
= (results_base
+ query
->result_size
) % query
->buffer
->b
.b
.width0
;
338 case PIPE_QUERY_PRIMITIVES_EMITTED
:
339 /* SAMPLE_STREAMOUTSTATS stores this structure:
341 * u64 NumPrimitivesWritten;
342 * u64 PrimitiveStorageNeeded;
344 * We only need NumPrimitivesWritten here. */
345 while (results_base
!= query
->results_end
) {
347 si_query_read_result(map
+ results_base
, 2, 6, true);
348 results_base
= (results_base
+ query
->result_size
) % query
->buffer
->b
.b
.width0
;
351 case PIPE_QUERY_PRIMITIVES_GENERATED
:
352 /* Here we read PrimitiveStorageNeeded. */
353 while (results_base
!= query
->results_end
) {
355 si_query_read_result(map
+ results_base
, 0, 4, true);
356 results_base
= (results_base
+ query
->result_size
) % query
->buffer
->b
.b
.width0
;
359 case PIPE_QUERY_SO_STATISTICS
:
360 while (results_base
!= query
->results_end
) {
361 query
->result
.so
.num_primitives_written
+=
362 si_query_read_result(map
+ results_base
, 2, 6, true);
363 query
->result
.so
.primitives_storage_needed
+=
364 si_query_read_result(map
+ results_base
, 0, 4, true);
365 results_base
= (results_base
+ query
->result_size
) % query
->buffer
->b
.b
.width0
;
368 case PIPE_QUERY_SO_OVERFLOW_PREDICATE
:
369 while (results_base
!= query
->results_end
) {
370 query
->result
.b
= query
->result
.b
||
371 si_query_read_result(map
+ results_base
, 2, 6, true) !=
372 si_query_read_result(map
+ results_base
, 0, 4, true);
373 results_base
= (results_base
+ query
->result_size
) % query
->buffer
->b
.b
.width0
;
380 query
->results_start
= query
->results_end
;
381 ctx
->b
.ws
->buffer_unmap(query
->buffer
->cs_buf
);
385 void si_query_begin(struct si_context
*ctx
, struct si_query
*query
)
387 struct radeon_winsys_cs
*cs
= ctx
->b
.rings
.gfx
.cs
;
388 unsigned new_results_end
, i
;
392 si_need_cs_space(ctx
, query
->num_cs_dw
* 2, TRUE
);
394 new_results_end
= (query
->results_end
+ query
->result_size
) % query
->buffer
->b
.b
.width0
;
396 /* collect current results if query buffer is full */
397 if (new_results_end
== query
->results_start
) {
398 si_query_result(ctx
, query
, TRUE
);
401 switch (query
->type
) {
402 case PIPE_QUERY_OCCLUSION_COUNTER
:
403 case PIPE_QUERY_OCCLUSION_PREDICATE
:
404 results
= ctx
->b
.ws
->buffer_map(query
->buffer
->cs_buf
, ctx
->b
.rings
.gfx
.cs
, PIPE_TRANSFER_WRITE
);
406 results
= (uint32_t*)((char*)results
+ query
->results_end
);
407 memset(results
, 0, query
->result_size
);
409 /* Set top bits for unused backends */
410 for (i
= 0; i
< ctx
->max_db
; i
++) {
411 if (!(ctx
->backend_mask
& (1<<i
))) {
412 results
[(i
* 4)+1] = 0x80000000;
413 results
[(i
* 4)+3] = 0x80000000;
416 ctx
->b
.ws
->buffer_unmap(query
->buffer
->cs_buf
);
419 case PIPE_QUERY_TIME_ELAPSED
:
421 case PIPE_QUERY_PRIMITIVES_EMITTED
:
422 case PIPE_QUERY_PRIMITIVES_GENERATED
:
423 case PIPE_QUERY_SO_STATISTICS
:
424 case PIPE_QUERY_SO_OVERFLOW_PREDICATE
:
425 results
= ctx
->b
.ws
->buffer_map(query
->buffer
->cs_buf
, ctx
->b
.rings
.gfx
.cs
, PIPE_TRANSFER_WRITE
);
426 results
= (uint32_t*)((char*)results
+ query
->results_end
);
427 memset(results
, 0, query
->result_size
);
428 ctx
->b
.ws
->buffer_unmap(query
->buffer
->cs_buf
);
434 /* emit begin query */
435 va
= r600_resource_va(&ctx
->screen
->b
.b
, (void*)query
->buffer
);
436 va
+= query
->results_end
;
438 switch (query
->type
) {
439 case PIPE_QUERY_OCCLUSION_COUNTER
:
440 case PIPE_QUERY_OCCLUSION_PREDICATE
:
441 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_EVENT_WRITE
, 2, 0);
442 cs
->buf
[cs
->cdw
++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE
) | EVENT_INDEX(1);
443 cs
->buf
[cs
->cdw
++] = va
;
444 cs
->buf
[cs
->cdw
++] = (va
>> 32UL) & 0xFF;
446 case PIPE_QUERY_PRIMITIVES_EMITTED
:
447 case PIPE_QUERY_PRIMITIVES_GENERATED
:
448 case PIPE_QUERY_SO_STATISTICS
:
449 case PIPE_QUERY_SO_OVERFLOW_PREDICATE
:
450 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_EVENT_WRITE
, 2, 0);
451 cs
->buf
[cs
->cdw
++] = EVENT_TYPE(EVENT_TYPE_SAMPLE_STREAMOUTSTATS
) | EVENT_INDEX(3);
452 cs
->buf
[cs
->cdw
++] = va
;
453 cs
->buf
[cs
->cdw
++] = (va
>> 32UL) & 0xFF;
455 case PIPE_QUERY_TIME_ELAPSED
:
456 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_EVENT_WRITE_EOP
, 4, 0);
457 cs
->buf
[cs
->cdw
++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT
) | EVENT_INDEX(5);
458 cs
->buf
[cs
->cdw
++] = va
;
459 cs
->buf
[cs
->cdw
++] = (3 << 29) | ((va
>> 32UL) & 0xFF);
460 cs
->buf
[cs
->cdw
++] = 0;
461 cs
->buf
[cs
->cdw
++] = 0;
466 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_NOP
, 0, 0);
467 cs
->buf
[cs
->cdw
++] = r600_context_bo_reloc(&ctx
->b
, &ctx
->b
.rings
.gfx
, query
->buffer
, RADEON_USAGE_WRITE
);
469 if (!si_is_timer_query(query
->type
)) {
470 ctx
->num_cs_dw_nontimer_queries_suspend
+= query
->num_cs_dw
;
474 void si_query_end(struct si_context
*ctx
, struct si_query
*query
)
476 struct radeon_winsys_cs
*cs
= ctx
->b
.rings
.gfx
.cs
;
478 unsigned new_results_end
;
480 /* The queries which need begin already called this in begin_query. */
481 if (!si_query_needs_begin(query
->type
)) {
482 si_need_cs_space(ctx
, query
->num_cs_dw
, TRUE
);
484 new_results_end
= (query
->results_end
+ query
->result_size
) % query
->buffer
->b
.b
.width0
;
486 /* collect current results if query buffer is full */
487 if (new_results_end
== query
->results_start
) {
488 si_query_result(ctx
, query
, TRUE
);
492 va
= r600_resource_va(&ctx
->screen
->b
.b
, (void*)query
->buffer
);
494 switch (query
->type
) {
495 case PIPE_QUERY_OCCLUSION_COUNTER
:
496 case PIPE_QUERY_OCCLUSION_PREDICATE
:
497 va
+= query
->results_end
+ 8;
498 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_EVENT_WRITE
, 2, 0);
499 cs
->buf
[cs
->cdw
++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE
) | EVENT_INDEX(1);
500 cs
->buf
[cs
->cdw
++] = va
;
501 cs
->buf
[cs
->cdw
++] = (va
>> 32UL) & 0xFF;
503 case PIPE_QUERY_PRIMITIVES_EMITTED
:
504 case PIPE_QUERY_PRIMITIVES_GENERATED
:
505 case PIPE_QUERY_SO_STATISTICS
:
506 case PIPE_QUERY_SO_OVERFLOW_PREDICATE
:
507 va
+= query
->results_end
+ query
->result_size
/2;
508 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_EVENT_WRITE
, 2, 0);
509 cs
->buf
[cs
->cdw
++] = EVENT_TYPE(EVENT_TYPE_SAMPLE_STREAMOUTSTATS
) | EVENT_INDEX(3);
510 cs
->buf
[cs
->cdw
++] = va
;
511 cs
->buf
[cs
->cdw
++] = (va
>> 32UL) & 0xFF;
513 case PIPE_QUERY_TIME_ELAPSED
:
514 va
+= query
->results_end
+ query
->result_size
/2;
516 case PIPE_QUERY_TIMESTAMP
:
517 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_EVENT_WRITE_EOP
, 4, 0);
518 cs
->buf
[cs
->cdw
++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT
) | EVENT_INDEX(5);
519 cs
->buf
[cs
->cdw
++] = va
;
520 cs
->buf
[cs
->cdw
++] = (3 << 29) | ((va
>> 32UL) & 0xFF);
521 cs
->buf
[cs
->cdw
++] = 0;
522 cs
->buf
[cs
->cdw
++] = 0;
527 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_NOP
, 0, 0);
528 cs
->buf
[cs
->cdw
++] = r600_context_bo_reloc(&ctx
->b
, &ctx
->b
.rings
.gfx
, query
->buffer
, RADEON_USAGE_WRITE
);
530 query
->results_end
= (query
->results_end
+ query
->result_size
) % query
->buffer
->b
.b
.width0
;
532 if (si_query_needs_begin(query
->type
) && !si_is_timer_query(query
->type
)) {
533 ctx
->num_cs_dw_nontimer_queries_suspend
-= query
->num_cs_dw
;
537 void si_query_predication(struct si_context
*ctx
, struct si_query
*query
, int operation
,
540 struct radeon_winsys_cs
*cs
= ctx
->b
.rings
.gfx
.cs
;
543 if (operation
== PREDICATION_OP_CLEAR
) {
544 si_need_cs_space(ctx
, 3, FALSE
);
546 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_SET_PREDICATION
, 1, 0);
547 cs
->buf
[cs
->cdw
++] = 0;
548 cs
->buf
[cs
->cdw
++] = PRED_OP(PREDICATION_OP_CLEAR
);
550 unsigned results_base
= query
->results_start
;
554 /* find count of the query data blocks */
555 count
= (query
->buffer
->b
.b
.width0
+ query
->results_end
- query
->results_start
) % query
->buffer
->b
.b
.width0
;
556 count
/= query
->result_size
;
558 si_need_cs_space(ctx
, 5 * count
, TRUE
);
560 op
= PRED_OP(operation
) | PREDICATION_DRAW_VISIBLE
|
561 (flag_wait
? PREDICATION_HINT_WAIT
: PREDICATION_HINT_NOWAIT_DRAW
);
562 va
= r600_resource_va(&ctx
->screen
->b
.b
, (void*)query
->buffer
);
564 /* emit predicate packets for all data blocks */
565 while (results_base
!= query
->results_end
) {
566 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_SET_PREDICATION
, 1, 0);
567 cs
->buf
[cs
->cdw
++] = (va
+ results_base
) & 0xFFFFFFFFUL
;
568 cs
->buf
[cs
->cdw
++] = op
| (((va
+ results_base
) >> 32UL) & 0xFF);
569 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_NOP
, 0, 0);
570 cs
->buf
[cs
->cdw
++] = r600_context_bo_reloc(&ctx
->b
, &ctx
->b
.rings
.gfx
,
571 query
->buffer
, RADEON_USAGE_READ
);
572 results_base
= (results_base
+ query
->result_size
) % query
->buffer
->b
.b
.width0
;
574 /* set CONTINUE bit for all packets except the first */
575 op
|= PREDICATION_CONTINUE
;
580 struct si_query
*si_context_query_create(struct si_context
*ctx
, unsigned query_type
)
582 struct si_query
*query
;
583 unsigned buffer_size
= 4096;
585 query
= CALLOC_STRUCT(si_query
);
589 query
->type
= query_type
;
591 switch (query_type
) {
592 case PIPE_QUERY_OCCLUSION_COUNTER
:
593 case PIPE_QUERY_OCCLUSION_PREDICATE
:
594 query
->result_size
= 16 * ctx
->max_db
;
595 query
->num_cs_dw
= 6;
597 case PIPE_QUERY_TIMESTAMP
:
598 query
->result_size
= 8;
599 query
->num_cs_dw
= 8;
601 case PIPE_QUERY_TIME_ELAPSED
:
602 query
->result_size
= 16;
603 query
->num_cs_dw
= 8;
605 case PIPE_QUERY_PRIMITIVES_EMITTED
:
606 case PIPE_QUERY_PRIMITIVES_GENERATED
:
607 case PIPE_QUERY_SO_STATISTICS
:
608 case PIPE_QUERY_SO_OVERFLOW_PREDICATE
:
609 /* NumPrimitivesWritten, PrimitiveStorageNeeded. */
610 query
->result_size
= 32;
611 query
->num_cs_dw
= 6;
619 /* adjust buffer size to simplify offsets wrapping math */
620 buffer_size
-= buffer_size
% query
->result_size
;
622 /* Queries are normally read by the CPU after
623 * being written by the gpu, hence staging is probably a good
626 query
->buffer
= si_resource_create_custom(&ctx
->screen
->b
.b
,
629 if (!query
->buffer
) {
636 void si_context_query_destroy(struct si_context
*ctx
, struct si_query
*query
)
638 r600_resource_reference(&query
->buffer
, NULL
);
642 boolean
si_context_query_result(struct si_context
*ctx
,
643 struct si_query
*query
,
644 boolean wait
, void *vresult
)
646 boolean
*result_b
= (boolean
*)vresult
;
647 uint64_t *result_u64
= (uint64_t*)vresult
;
648 struct pipe_query_data_so_statistics
*result_so
=
649 (struct pipe_query_data_so_statistics
*)vresult
;
651 if (!si_query_result(ctx
, query
, wait
))
654 switch (query
->type
) {
655 case PIPE_QUERY_OCCLUSION_COUNTER
:
656 case PIPE_QUERY_PRIMITIVES_EMITTED
:
657 case PIPE_QUERY_PRIMITIVES_GENERATED
:
658 *result_u64
= query
->result
.u64
;
660 case PIPE_QUERY_OCCLUSION_PREDICATE
:
661 case PIPE_QUERY_SO_OVERFLOW_PREDICATE
:
662 *result_b
= query
->result
.b
;
664 case PIPE_QUERY_TIMESTAMP
:
665 case PIPE_QUERY_TIME_ELAPSED
:
666 *result_u64
= (1000000 * query
->result
.u64
) / ctx
->screen
->b
.info
.r600_clock_crystal_freq
;
668 case PIPE_QUERY_SO_STATISTICS
:
669 *result_so
= query
->result
.so
;
677 void si_context_queries_suspend(struct si_context
*ctx
)
679 struct si_query
*query
;
681 LIST_FOR_EACH_ENTRY(query
, &ctx
->active_nontimer_query_list
, list
) {
682 si_query_end(ctx
, query
);
684 assert(ctx
->num_cs_dw_nontimer_queries_suspend
== 0);
687 void si_context_queries_resume(struct si_context
*ctx
)
689 struct si_query
*query
;
691 assert(ctx
->num_cs_dw_nontimer_queries_suspend
== 0);
693 LIST_FOR_EACH_ENTRY(query
, &ctx
->active_nontimer_query_list
, list
) {
694 si_query_begin(ctx
, query
);
699 void si_trace_emit(struct si_context
*sctx
)
701 struct si_screen
*sscreen
= sctx
->screen
;
702 struct radeon_winsys_cs
*cs
= sctx
->cs
;
705 va
= r600_resource_va(&sscreen
->screen
, (void*)sscreen
->trace_bo
);
706 r600_context_bo_reloc(sctx
, sscreen
->trace_bo
, RADEON_USAGE_READWRITE
);
707 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_WRITE_DATA
, 4, 0);
708 cs
->buf
[cs
->cdw
++] = PKT3_WRITE_DATA_DST_SEL(PKT3_WRITE_DATA_DST_SEL_MEM_SYNC
) |
709 PKT3_WRITE_DATA_WR_CONFIRM
|
710 PKT3_WRITE_DATA_ENGINE_SEL(PKT3_WRITE_DATA_ENGINE_SEL_ME
);
711 cs
->buf
[cs
->cdw
++] = va
& 0xFFFFFFFFUL
;
712 cs
->buf
[cs
->cdw
++] = (va
>> 32UL) & 0xFFFFFFFFUL
;
713 cs
->buf
[cs
->cdw
++] = cs
->cdw
;
714 cs
->buf
[cs
->cdw
++] = sscreen
->cs_count
;