r600g: move streamout state to drivers/radeon
[mesa.git] / src / gallium / drivers / r600 / r600_query.c
1 /*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23 #include "r600_pipe.h"
24 #include "r600d.h"
25 #include "util/u_memory.h"
26
27 static bool r600_is_timer_query(unsigned type)
28 {
29 return type == PIPE_QUERY_TIME_ELAPSED ||
30 type == PIPE_QUERY_TIMESTAMP ||
31 type == PIPE_QUERY_TIMESTAMP_DISJOINT;
32 }
33
34 static bool r600_query_needs_begin(unsigned type)
35 {
36 return type != PIPE_QUERY_GPU_FINISHED &&
37 type != PIPE_QUERY_TIMESTAMP;
38 }
39
40 static struct r600_resource *r600_new_query_buffer(struct r600_context *ctx, unsigned type)
41 {
42 unsigned j, i, num_results, buf_size = 4096;
43 uint32_t *results;
44
45 /* Non-GPU queries. */
46 switch (type) {
47 case R600_QUERY_DRAW_CALLS:
48 case R600_QUERY_REQUESTED_VRAM:
49 case R600_QUERY_REQUESTED_GTT:
50 case R600_QUERY_BUFFER_WAIT_TIME:
51 return NULL;
52 }
53
54 /* Queries are normally read by the CPU after
55 * being written by the gpu, hence staging is probably a good
56 * usage pattern.
57 */
58 struct r600_resource *buf = (struct r600_resource*)
59 pipe_buffer_create(&ctx->screen->b.b, PIPE_BIND_CUSTOM,
60 PIPE_USAGE_STAGING, buf_size);
61
62 switch (type) {
63 case PIPE_QUERY_OCCLUSION_COUNTER:
64 case PIPE_QUERY_OCCLUSION_PREDICATE:
65 results = r600_buffer_mmap_sync_with_rings(ctx, buf, PIPE_TRANSFER_WRITE);
66 memset(results, 0, buf_size);
67
68 /* Set top bits for unused backends. */
69 num_results = buf_size / (16 * ctx->max_db);
70 for (j = 0; j < num_results; j++) {
71 for (i = 0; i < ctx->max_db; i++) {
72 if (!(ctx->backend_mask & (1<<i))) {
73 results[(i * 4)+1] = 0x80000000;
74 results[(i * 4)+3] = 0x80000000;
75 }
76 }
77 results += 4 * ctx->max_db;
78 }
79 ctx->b.ws->buffer_unmap(buf->cs_buf);
80 break;
81 case PIPE_QUERY_TIME_ELAPSED:
82 case PIPE_QUERY_TIMESTAMP:
83 break;
84 case PIPE_QUERY_PRIMITIVES_EMITTED:
85 case PIPE_QUERY_PRIMITIVES_GENERATED:
86 case PIPE_QUERY_SO_STATISTICS:
87 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
88 case PIPE_QUERY_PIPELINE_STATISTICS:
89 results = r600_buffer_mmap_sync_with_rings(ctx, buf, PIPE_TRANSFER_WRITE);
90 memset(results, 0, buf_size);
91 ctx->b.ws->buffer_unmap(buf->cs_buf);
92 break;
93 default:
94 assert(0);
95 }
96 return buf;
97 }
98
99 static void r600_update_occlusion_query_state(struct r600_context *rctx,
100 unsigned type, int diff)
101 {
102 if (type == PIPE_QUERY_OCCLUSION_COUNTER ||
103 type == PIPE_QUERY_OCCLUSION_PREDICATE) {
104 bool enable;
105
106 rctx->num_occlusion_queries += diff;
107 assert(rctx->num_occlusion_queries >= 0);
108
109 enable = rctx->num_occlusion_queries != 0;
110
111 if (rctx->db_misc_state.occlusion_query_enabled != enable) {
112 rctx->db_misc_state.occlusion_query_enabled = enable;
113 rctx->db_misc_state.atom.dirty = true;
114 }
115 }
116 }
117
118 static void r600_emit_query_begin(struct r600_context *ctx, struct r600_query *query)
119 {
120 struct radeon_winsys_cs *cs = ctx->b.rings.gfx.cs;
121 uint64_t va;
122
123 r600_update_occlusion_query_state(ctx, query->type, 1);
124 r600_need_cs_space(ctx, query->num_cs_dw * 2, TRUE);
125
126 /* Get a new query buffer if needed. */
127 if (query->buffer.results_end + query->result_size > query->buffer.buf->b.b.width0) {
128 struct r600_query_buffer *qbuf = MALLOC_STRUCT(r600_query_buffer);
129 *qbuf = query->buffer;
130 query->buffer.buf = r600_new_query_buffer(ctx, query->type);
131 query->buffer.results_end = 0;
132 query->buffer.previous = qbuf;
133 }
134
135 /* emit begin query */
136 va = r600_resource_va(&ctx->screen->b.b, (void*)query->buffer.buf);
137 va += query->buffer.results_end;
138
139 switch (query->type) {
140 case PIPE_QUERY_OCCLUSION_COUNTER:
141 case PIPE_QUERY_OCCLUSION_PREDICATE:
142 cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0);
143 cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1);
144 cs->buf[cs->cdw++] = va;
145 cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF;
146 break;
147 case PIPE_QUERY_PRIMITIVES_EMITTED:
148 case PIPE_QUERY_PRIMITIVES_GENERATED:
149 case PIPE_QUERY_SO_STATISTICS:
150 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
151 cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0);
152 cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_SAMPLE_STREAMOUTSTATS) | EVENT_INDEX(3);
153 cs->buf[cs->cdw++] = va;
154 cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF;
155 break;
156 case PIPE_QUERY_TIME_ELAPSED:
157 cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0);
158 cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5);
159 cs->buf[cs->cdw++] = va;
160 cs->buf[cs->cdw++] = (3 << 29) | ((va >> 32UL) & 0xFF);
161 cs->buf[cs->cdw++] = 0;
162 cs->buf[cs->cdw++] = 0;
163 break;
164 case PIPE_QUERY_PIPELINE_STATISTICS:
165 if (!ctx->num_pipelinestat_queries) {
166 cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
167 cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_PIPELINESTAT_START) | EVENT_INDEX(0);
168 }
169 ctx->num_pipelinestat_queries++;
170 cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0);
171 cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2);
172 cs->buf[cs->cdw++] = va;
173 cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF;
174 break;
175 default:
176 assert(0);
177 }
178 cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
179 cs->buf[cs->cdw++] = r600_context_bo_reloc(&ctx->b, &ctx->b.rings.gfx, query->buffer.buf, RADEON_USAGE_WRITE);
180
181 if (!r600_is_timer_query(query->type)) {
182 ctx->num_cs_dw_nontimer_queries_suspend += query->num_cs_dw;
183 }
184 }
185
186 static void r600_emit_query_end(struct r600_context *ctx, struct r600_query *query)
187 {
188 struct radeon_winsys_cs *cs = ctx->b.rings.gfx.cs;
189 uint64_t va;
190
191 /* The queries which need begin already called this in begin_query. */
192 if (!r600_query_needs_begin(query->type)) {
193 r600_need_cs_space(ctx, query->num_cs_dw, FALSE);
194 }
195
196 va = r600_resource_va(&ctx->screen->b.b, (void*)query->buffer.buf);
197 /* emit end query */
198 switch (query->type) {
199 case PIPE_QUERY_OCCLUSION_COUNTER:
200 case PIPE_QUERY_OCCLUSION_PREDICATE:
201 va += query->buffer.results_end + 8;
202 cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0);
203 cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1);
204 cs->buf[cs->cdw++] = va;
205 cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF;
206 break;
207 case PIPE_QUERY_PRIMITIVES_EMITTED:
208 case PIPE_QUERY_PRIMITIVES_GENERATED:
209 case PIPE_QUERY_SO_STATISTICS:
210 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
211 va += query->buffer.results_end + query->result_size/2;
212 cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0);
213 cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_SAMPLE_STREAMOUTSTATS) | EVENT_INDEX(3);
214 cs->buf[cs->cdw++] = va;
215 cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF;
216 break;
217 case PIPE_QUERY_TIME_ELAPSED:
218 va += query->buffer.results_end + query->result_size/2;
219 /* fall through */
220 case PIPE_QUERY_TIMESTAMP:
221 cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0);
222 cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5);
223 cs->buf[cs->cdw++] = va;
224 cs->buf[cs->cdw++] = (3 << 29) | ((va >> 32UL) & 0xFF);
225 cs->buf[cs->cdw++] = 0;
226 cs->buf[cs->cdw++] = 0;
227 break;
228 case PIPE_QUERY_PIPELINE_STATISTICS:
229 assert(ctx->num_pipelinestat_queries > 0);
230 ctx->num_pipelinestat_queries--;
231 if (!ctx->num_pipelinestat_queries) {
232 cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
233 cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_PIPELINESTAT_STOP) | EVENT_INDEX(0);
234 }
235 va += query->buffer.results_end + query->result_size/2;
236 cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0);
237 cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2);
238 cs->buf[cs->cdw++] = va;
239 cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF;
240 break;
241 default:
242 assert(0);
243 }
244 cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
245 cs->buf[cs->cdw++] = r600_context_bo_reloc(&ctx->b, &ctx->b.rings.gfx, query->buffer.buf, RADEON_USAGE_WRITE);
246
247 query->buffer.results_end += query->result_size;
248
249 if (r600_query_needs_begin(query->type)) {
250 if (!r600_is_timer_query(query->type)) {
251 ctx->num_cs_dw_nontimer_queries_suspend -= query->num_cs_dw;
252 }
253 }
254
255 r600_update_occlusion_query_state(ctx, query->type, -1);
256 }
257
258 static void r600_emit_query_predication(struct r600_context *ctx, struct r600_query *query,
259 int operation, bool flag_wait)
260 {
261 struct radeon_winsys_cs *cs = ctx->b.rings.gfx.cs;
262
263 if (operation == PREDICATION_OP_CLEAR) {
264 r600_need_cs_space(ctx, 3, FALSE);
265
266 cs->buf[cs->cdw++] = PKT3(PKT3_SET_PREDICATION, 1, 0);
267 cs->buf[cs->cdw++] = 0;
268 cs->buf[cs->cdw++] = PRED_OP(PREDICATION_OP_CLEAR);
269 } else {
270 struct r600_query_buffer *qbuf;
271 unsigned count;
272 uint32_t op;
273
274 /* Find how many results there are. */
275 count = 0;
276 for (qbuf = &query->buffer; qbuf; qbuf = qbuf->previous) {
277 count += qbuf->results_end / query->result_size;
278 }
279
280 r600_need_cs_space(ctx, 5 * count, TRUE);
281
282 op = PRED_OP(operation) | PREDICATION_DRAW_VISIBLE |
283 (flag_wait ? PREDICATION_HINT_WAIT : PREDICATION_HINT_NOWAIT_DRAW);
284
285 /* emit predicate packets for all data blocks */
286 for (qbuf = &query->buffer; qbuf; qbuf = qbuf->previous) {
287 unsigned results_base = 0;
288 uint64_t va = r600_resource_va(&ctx->screen->b.b, &qbuf->buf->b.b);
289
290 while (results_base < qbuf->results_end) {
291 cs->buf[cs->cdw++] = PKT3(PKT3_SET_PREDICATION, 1, 0);
292 cs->buf[cs->cdw++] = (va + results_base) & 0xFFFFFFFFUL;
293 cs->buf[cs->cdw++] = op | (((va + results_base) >> 32UL) & 0xFF);
294 cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
295 cs->buf[cs->cdw++] = r600_context_bo_reloc(&ctx->b, &ctx->b.rings.gfx, qbuf->buf, RADEON_USAGE_READ);
296 results_base += query->result_size;
297
298 /* set CONTINUE bit for all packets except the first */
299 op |= PREDICATION_CONTINUE;
300 }
301 } while (qbuf);
302 }
303 }
304
305 static struct pipe_query *r600_create_query(struct pipe_context *ctx, unsigned query_type)
306 {
307 struct r600_context *rctx = (struct r600_context *)ctx;
308 struct r600_query *query;
309 bool skip_allocation = false;
310
311 query = CALLOC_STRUCT(r600_query);
312 if (query == NULL)
313 return NULL;
314
315 query->type = query_type;
316
317 switch (query_type) {
318 case PIPE_QUERY_OCCLUSION_COUNTER:
319 case PIPE_QUERY_OCCLUSION_PREDICATE:
320 query->result_size = 16 * rctx->max_db;
321 query->num_cs_dw = 6;
322 break;
323 case PIPE_QUERY_TIME_ELAPSED:
324 query->result_size = 16;
325 query->num_cs_dw = 8;
326 break;
327 case PIPE_QUERY_TIMESTAMP:
328 query->result_size = 8;
329 query->num_cs_dw = 8;
330 break;
331 case PIPE_QUERY_PRIMITIVES_EMITTED:
332 case PIPE_QUERY_PRIMITIVES_GENERATED:
333 case PIPE_QUERY_SO_STATISTICS:
334 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
335 /* NumPrimitivesWritten, PrimitiveStorageNeeded. */
336 query->result_size = 32;
337 query->num_cs_dw = 6;
338 break;
339 case PIPE_QUERY_PIPELINE_STATISTICS:
340 /* 11 values on EG, 8 on R600. */
341 query->result_size = (rctx->b.chip_class >= EVERGREEN ? 11 : 8) * 16;
342 query->num_cs_dw = 8;
343 break;
344 /* Non-GPU queries. */
345 case R600_QUERY_DRAW_CALLS:
346 case R600_QUERY_REQUESTED_VRAM:
347 case R600_QUERY_REQUESTED_GTT:
348 case R600_QUERY_BUFFER_WAIT_TIME:
349 skip_allocation = true;
350 break;
351 default:
352 assert(0);
353 FREE(query);
354 return NULL;
355 }
356
357 if (!skip_allocation) {
358 query->buffer.buf = r600_new_query_buffer(rctx, query_type);
359 if (!query->buffer.buf) {
360 FREE(query);
361 return NULL;
362 }
363 }
364 return (struct pipe_query*)query;
365 }
366
367 static void r600_destroy_query(struct pipe_context *ctx, struct pipe_query *query)
368 {
369 struct r600_query *rquery = (struct r600_query*)query;
370 struct r600_query_buffer *prev = rquery->buffer.previous;
371
372 /* Release all query buffers. */
373 while (prev) {
374 struct r600_query_buffer *qbuf = prev;
375 prev = prev->previous;
376 pipe_resource_reference((struct pipe_resource**)&qbuf->buf, NULL);
377 FREE(qbuf);
378 }
379
380 pipe_resource_reference((struct pipe_resource**)&rquery->buffer.buf, NULL);
381 FREE(query);
382 }
383
384 static void r600_begin_query(struct pipe_context *ctx, struct pipe_query *query)
385 {
386 struct r600_context *rctx = (struct r600_context *)ctx;
387 struct r600_query *rquery = (struct r600_query *)query;
388 struct r600_query_buffer *prev = rquery->buffer.previous;
389
390 if (!r600_query_needs_begin(rquery->type)) {
391 assert(0);
392 return;
393 }
394
395 /* Non-GPU queries. */
396 switch (rquery->type) {
397 case R600_QUERY_DRAW_CALLS:
398 rquery->begin_result = rctx->num_draw_calls;
399 return;
400 case R600_QUERY_REQUESTED_VRAM:
401 case R600_QUERY_REQUESTED_GTT:
402 rquery->begin_result = 0;
403 return;
404 case R600_QUERY_BUFFER_WAIT_TIME:
405 rquery->begin_result = rctx->b.ws->query_value(rctx->b.ws, RADEON_BUFFER_WAIT_TIME_NS);
406 return;
407 }
408
409 /* Discard the old query buffers. */
410 while (prev) {
411 struct r600_query_buffer *qbuf = prev;
412 prev = prev->previous;
413 pipe_resource_reference((struct pipe_resource**)&qbuf->buf, NULL);
414 FREE(qbuf);
415 }
416
417 /* Obtain a new buffer if the current one can't be mapped without a stall. */
418 if (r600_rings_is_buffer_referenced(rctx, rquery->buffer.buf->cs_buf, RADEON_USAGE_READWRITE) ||
419 rctx->b.ws->buffer_is_busy(rquery->buffer.buf->buf, RADEON_USAGE_READWRITE)) {
420 pipe_resource_reference((struct pipe_resource**)&rquery->buffer.buf, NULL);
421 rquery->buffer.buf = r600_new_query_buffer(rctx, rquery->type);
422 }
423
424 rquery->buffer.results_end = 0;
425 rquery->buffer.previous = NULL;
426
427 r600_emit_query_begin(rctx, rquery);
428
429 if (!r600_is_timer_query(rquery->type)) {
430 LIST_ADDTAIL(&rquery->list, &rctx->active_nontimer_queries);
431 }
432 }
433
434 static void r600_end_query(struct pipe_context *ctx, struct pipe_query *query)
435 {
436 struct r600_context *rctx = (struct r600_context *)ctx;
437 struct r600_query *rquery = (struct r600_query *)query;
438
439 /* Non-GPU queries. */
440 switch (rquery->type) {
441 case R600_QUERY_DRAW_CALLS:
442 rquery->end_result = rctx->num_draw_calls;
443 return;
444 case R600_QUERY_REQUESTED_VRAM:
445 rquery->end_result = rctx->b.ws->query_value(rctx->b.ws, RADEON_REQUESTED_VRAM_MEMORY);
446 return;
447 case R600_QUERY_REQUESTED_GTT:
448 rquery->end_result = rctx->b.ws->query_value(rctx->b.ws, RADEON_REQUESTED_GTT_MEMORY);
449 return;
450 case R600_QUERY_BUFFER_WAIT_TIME:
451 rquery->end_result = rctx->b.ws->query_value(rctx->b.ws, RADEON_BUFFER_WAIT_TIME_NS);
452 return;
453 }
454
455 r600_emit_query_end(rctx, rquery);
456
457 if (r600_query_needs_begin(rquery->type) && !r600_is_timer_query(rquery->type)) {
458 LIST_DELINIT(&rquery->list);
459 }
460 }
461
462 static unsigned r600_query_read_result(char *map, unsigned start_index, unsigned end_index,
463 bool test_status_bit)
464 {
465 uint32_t *current_result = (uint32_t*)map;
466 uint64_t start, end;
467
468 start = (uint64_t)current_result[start_index] |
469 (uint64_t)current_result[start_index+1] << 32;
470 end = (uint64_t)current_result[end_index] |
471 (uint64_t)current_result[end_index+1] << 32;
472
473 if (!test_status_bit ||
474 ((start & 0x8000000000000000UL) && (end & 0x8000000000000000UL))) {
475 return end - start;
476 }
477 return 0;
478 }
479
480 static boolean r600_get_query_buffer_result(struct r600_context *ctx,
481 struct r600_query *query,
482 struct r600_query_buffer *qbuf,
483 boolean wait,
484 union pipe_query_result *result)
485 {
486 unsigned results_base = 0;
487 char *map;
488
489 /* Non-GPU queries. */
490 switch (query->type) {
491 case R600_QUERY_DRAW_CALLS:
492 case R600_QUERY_REQUESTED_VRAM:
493 case R600_QUERY_REQUESTED_GTT:
494 case R600_QUERY_BUFFER_WAIT_TIME:
495 result->u64 = query->end_result - query->begin_result;
496 return TRUE;
497 }
498
499 map = r600_buffer_mmap_sync_with_rings(ctx, qbuf->buf,
500 PIPE_TRANSFER_READ |
501 (wait ? 0 : PIPE_TRANSFER_DONTBLOCK));
502 if (!map)
503 return FALSE;
504
505 /* count all results across all data blocks */
506 switch (query->type) {
507 case PIPE_QUERY_OCCLUSION_COUNTER:
508 while (results_base != qbuf->results_end) {
509 result->u64 +=
510 r600_query_read_result(map + results_base, 0, 2, true);
511 results_base += 16;
512 }
513 break;
514 case PIPE_QUERY_OCCLUSION_PREDICATE:
515 while (results_base != qbuf->results_end) {
516 result->b = result->b ||
517 r600_query_read_result(map + results_base, 0, 2, true) != 0;
518 results_base += 16;
519 }
520 break;
521 case PIPE_QUERY_TIME_ELAPSED:
522 while (results_base != qbuf->results_end) {
523 result->u64 +=
524 r600_query_read_result(map + results_base, 0, 2, false);
525 results_base += query->result_size;
526 }
527 break;
528 case PIPE_QUERY_TIMESTAMP:
529 {
530 uint32_t *current_result = (uint32_t*)map;
531 result->u64 = (uint64_t)current_result[0] |
532 (uint64_t)current_result[1] << 32;
533 break;
534 }
535 case PIPE_QUERY_PRIMITIVES_EMITTED:
536 /* SAMPLE_STREAMOUTSTATS stores this structure:
537 * {
538 * u64 NumPrimitivesWritten;
539 * u64 PrimitiveStorageNeeded;
540 * }
541 * We only need NumPrimitivesWritten here. */
542 while (results_base != qbuf->results_end) {
543 result->u64 +=
544 r600_query_read_result(map + results_base, 2, 6, true);
545 results_base += query->result_size;
546 }
547 break;
548 case PIPE_QUERY_PRIMITIVES_GENERATED:
549 /* Here we read PrimitiveStorageNeeded. */
550 while (results_base != qbuf->results_end) {
551 result->u64 +=
552 r600_query_read_result(map + results_base, 0, 4, true);
553 results_base += query->result_size;
554 }
555 break;
556 case PIPE_QUERY_SO_STATISTICS:
557 while (results_base != qbuf->results_end) {
558 result->so_statistics.num_primitives_written +=
559 r600_query_read_result(map + results_base, 2, 6, true);
560 result->so_statistics.primitives_storage_needed +=
561 r600_query_read_result(map + results_base, 0, 4, true);
562 results_base += query->result_size;
563 }
564 break;
565 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
566 while (results_base != qbuf->results_end) {
567 result->b = result->b ||
568 r600_query_read_result(map + results_base, 2, 6, true) !=
569 r600_query_read_result(map + results_base, 0, 4, true);
570 results_base += query->result_size;
571 }
572 break;
573 case PIPE_QUERY_PIPELINE_STATISTICS:
574 if (ctx->b.chip_class >= EVERGREEN) {
575 while (results_base != qbuf->results_end) {
576 result->pipeline_statistics.ps_invocations +=
577 r600_query_read_result(map + results_base, 0, 22, false);
578 result->pipeline_statistics.c_primitives +=
579 r600_query_read_result(map + results_base, 2, 24, false);
580 result->pipeline_statistics.c_invocations +=
581 r600_query_read_result(map + results_base, 4, 26, false);
582 result->pipeline_statistics.vs_invocations +=
583 r600_query_read_result(map + results_base, 6, 28, false);
584 result->pipeline_statistics.gs_invocations +=
585 r600_query_read_result(map + results_base, 8, 30, false);
586 result->pipeline_statistics.gs_primitives +=
587 r600_query_read_result(map + results_base, 10, 32, false);
588 result->pipeline_statistics.ia_primitives +=
589 r600_query_read_result(map + results_base, 12, 34, false);
590 result->pipeline_statistics.ia_vertices +=
591 r600_query_read_result(map + results_base, 14, 36, false);
592 result->pipeline_statistics.hs_invocations +=
593 r600_query_read_result(map + results_base, 16, 38, false);
594 result->pipeline_statistics.ds_invocations +=
595 r600_query_read_result(map + results_base, 18, 40, false);
596 result->pipeline_statistics.cs_invocations +=
597 r600_query_read_result(map + results_base, 20, 42, false);
598 results_base += query->result_size;
599 }
600 } else {
601 while (results_base != qbuf->results_end) {
602 result->pipeline_statistics.ps_invocations +=
603 r600_query_read_result(map + results_base, 0, 16, false);
604 result->pipeline_statistics.c_primitives +=
605 r600_query_read_result(map + results_base, 2, 18, false);
606 result->pipeline_statistics.c_invocations +=
607 r600_query_read_result(map + results_base, 4, 20, false);
608 result->pipeline_statistics.vs_invocations +=
609 r600_query_read_result(map + results_base, 6, 22, false);
610 result->pipeline_statistics.gs_invocations +=
611 r600_query_read_result(map + results_base, 8, 24, false);
612 result->pipeline_statistics.gs_primitives +=
613 r600_query_read_result(map + results_base, 10, 26, false);
614 result->pipeline_statistics.ia_primitives +=
615 r600_query_read_result(map + results_base, 12, 28, false);
616 result->pipeline_statistics.ia_vertices +=
617 r600_query_read_result(map + results_base, 14, 30, false);
618 results_base += query->result_size;
619 }
620 }
621 #if 0 /* for testing */
622 printf("Pipeline stats: IA verts=%llu, IA prims=%llu, VS=%llu, HS=%llu, "
623 "DS=%llu, GS=%llu, GS prims=%llu, Clipper=%llu, "
624 "Clipper prims=%llu, PS=%llu, CS=%llu\n",
625 result->pipeline_statistics.ia_vertices,
626 result->pipeline_statistics.ia_primitives,
627 result->pipeline_statistics.vs_invocations,
628 result->pipeline_statistics.hs_invocations,
629 result->pipeline_statistics.ds_invocations,
630 result->pipeline_statistics.gs_invocations,
631 result->pipeline_statistics.gs_primitives,
632 result->pipeline_statistics.c_invocations,
633 result->pipeline_statistics.c_primitives,
634 result->pipeline_statistics.ps_invocations,
635 result->pipeline_statistics.cs_invocations);
636 #endif
637 break;
638 default:
639 assert(0);
640 }
641
642 ctx->b.ws->buffer_unmap(qbuf->buf->cs_buf);
643 return TRUE;
644 }
645
646 static boolean r600_get_query_result(struct pipe_context *ctx,
647 struct pipe_query *query,
648 boolean wait, union pipe_query_result *result)
649 {
650 struct r600_context *rctx = (struct r600_context *)ctx;
651 struct r600_query *rquery = (struct r600_query *)query;
652 struct r600_query_buffer *qbuf;
653
654 util_query_clear_result(result, rquery->type);
655
656 for (qbuf = &rquery->buffer; qbuf; qbuf = qbuf->previous) {
657 if (!r600_get_query_buffer_result(rctx, rquery, qbuf, wait, result)) {
658 return FALSE;
659 }
660 }
661
662 /* Convert the time to expected units. */
663 if (rquery->type == PIPE_QUERY_TIME_ELAPSED ||
664 rquery->type == PIPE_QUERY_TIMESTAMP) {
665 result->u64 = (1000000 * result->u64) / rctx->screen->b.info.r600_clock_crystal_freq;
666 }
667 return TRUE;
668 }
669
670 static void r600_render_condition(struct pipe_context *ctx,
671 struct pipe_query *query,
672 boolean condition,
673 uint mode)
674 {
675 struct r600_context *rctx = (struct r600_context *)ctx;
676 struct r600_query *rquery = (struct r600_query *)query;
677 bool wait_flag = false;
678
679 rctx->current_render_cond = query;
680 rctx->current_render_cond_cond = condition;
681 rctx->current_render_cond_mode = mode;
682
683 if (query == NULL) {
684 if (rctx->predicate_drawing) {
685 rctx->predicate_drawing = false;
686 r600_emit_query_predication(rctx, NULL, PREDICATION_OP_CLEAR, false);
687 }
688 return;
689 }
690
691 if (mode == PIPE_RENDER_COND_WAIT ||
692 mode == PIPE_RENDER_COND_BY_REGION_WAIT) {
693 wait_flag = true;
694 }
695
696 rctx->predicate_drawing = true;
697
698 switch (rquery->type) {
699 case PIPE_QUERY_OCCLUSION_COUNTER:
700 case PIPE_QUERY_OCCLUSION_PREDICATE:
701 r600_emit_query_predication(rctx, rquery, PREDICATION_OP_ZPASS, wait_flag);
702 break;
703 case PIPE_QUERY_PRIMITIVES_EMITTED:
704 case PIPE_QUERY_PRIMITIVES_GENERATED:
705 case PIPE_QUERY_SO_STATISTICS:
706 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
707 r600_emit_query_predication(rctx, rquery, PREDICATION_OP_PRIMCOUNT, wait_flag);
708 break;
709 default:
710 assert(0);
711 }
712 }
713
714 void r600_suspend_nontimer_queries(struct r600_context *ctx)
715 {
716 struct r600_query *query;
717
718 LIST_FOR_EACH_ENTRY(query, &ctx->active_nontimer_queries, list) {
719 r600_emit_query_end(ctx, query);
720 }
721 assert(ctx->num_cs_dw_nontimer_queries_suspend == 0);
722 }
723
724 void r600_resume_nontimer_queries(struct r600_context *ctx)
725 {
726 struct r600_query *query;
727
728 assert(ctx->num_cs_dw_nontimer_queries_suspend == 0);
729
730 LIST_FOR_EACH_ENTRY(query, &ctx->active_nontimer_queries, list) {
731 r600_emit_query_begin(ctx, query);
732 }
733 }
734
735 void r600_init_query_functions(struct r600_context *rctx)
736 {
737 rctx->b.b.create_query = r600_create_query;
738 rctx->b.b.destroy_query = r600_destroy_query;
739 rctx->b.b.begin_query = r600_begin_query;
740 rctx->b.b.end_query = r600_end_query;
741 rctx->b.b.get_query_result = r600_get_query_result;
742
743 if (rctx->screen->b.info.r600_num_backends > 0)
744 rctx->b.b.render_condition = r600_render_condition;
745 }