25731c290f69960c175790199a37ec55c2cd6774
[mesa.git] / src / gallium / drivers / r600 / r600_query.c
1 /*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23 #include "r600_pipe.h"
24 #include "r600d.h"
25 #include "util/u_memory.h"
26 #include "r600_hw_context_priv.h"
27
28 static struct r600_resource *r600_new_query_buffer(struct r600_context *ctx, unsigned type)
29 {
30 unsigned j, i, num_results, buf_size = 4096;
31 uint32_t *results;
32 /* Queries are normally read by the CPU after
33 * being written by the gpu, hence staging is probably a good
34 * usage pattern.
35 */
36 struct r600_resource *buf = (struct r600_resource*)
37 pipe_buffer_create(&ctx->screen->screen, PIPE_BIND_CUSTOM,
38 PIPE_USAGE_STAGING, buf_size);
39
40 switch (type) {
41 case PIPE_QUERY_OCCLUSION_COUNTER:
42 case PIPE_QUERY_OCCLUSION_PREDICATE:
43 results = ctx->ws->buffer_map(buf->buf, ctx->cs, PIPE_TRANSFER_WRITE);
44 memset(results, 0, buf_size);
45
46 /* Set top bits for unused backends. */
47 num_results = buf_size / (16 * ctx->max_db);
48 for (j = 0; j < num_results; j++) {
49 for (i = 0; i < ctx->max_db; i++) {
50 if (!(ctx->backend_mask & (1<<i))) {
51 results[(i * 4)+1] = 0x80000000;
52 results[(i * 4)+3] = 0x80000000;
53 }
54 }
55 results += 4 * ctx->max_db;
56 }
57 ctx->ws->buffer_unmap(buf->buf);
58 break;
59 case PIPE_QUERY_TIME_ELAPSED:
60 break;
61 case PIPE_QUERY_PRIMITIVES_EMITTED:
62 case PIPE_QUERY_PRIMITIVES_GENERATED:
63 case PIPE_QUERY_SO_STATISTICS:
64 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
65 results = ctx->ws->buffer_map(buf->buf, ctx->cs, PIPE_TRANSFER_WRITE);
66 memset(results, 0, buf_size);
67 ctx->ws->buffer_unmap(buf->buf);
68 break;
69 default:
70 assert(0);
71 }
72 return buf;
73 }
74
75 static void r600_emit_query_begin(struct r600_context *ctx, struct r600_query *query)
76 {
77 struct radeon_winsys_cs *cs = ctx->cs;
78 uint64_t va;
79
80 r600_need_cs_space(ctx, query->num_cs_dw * 2, TRUE);
81
82 /* Get a new query buffer if needed. */
83 if (query->buffer.results_end + query->result_size > query->buffer.buf->b.b.b.width0) {
84 struct r600_query_buffer *qbuf = MALLOC_STRUCT(r600_query_buffer);
85 *qbuf = query->buffer;
86 query->buffer.buf = r600_new_query_buffer(ctx, query->type);
87 query->buffer.results_end = 0;
88 query->buffer.previous = qbuf;
89 }
90
91 /* emit begin query */
92 va = r600_resource_va(&ctx->screen->screen, (void*)query->buffer.buf);
93 va += query->buffer.results_end;
94
95 switch (query->type) {
96 case PIPE_QUERY_OCCLUSION_COUNTER:
97 case PIPE_QUERY_OCCLUSION_PREDICATE:
98 cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0);
99 cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1);
100 cs->buf[cs->cdw++] = va;
101 cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF;
102 break;
103 case PIPE_QUERY_PRIMITIVES_EMITTED:
104 case PIPE_QUERY_PRIMITIVES_GENERATED:
105 case PIPE_QUERY_SO_STATISTICS:
106 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
107 cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0);
108 cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_SAMPLE_STREAMOUTSTATS) | EVENT_INDEX(3);
109 cs->buf[cs->cdw++] = va;
110 cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF;
111 break;
112 case PIPE_QUERY_TIME_ELAPSED:
113 cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0);
114 cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5);
115 cs->buf[cs->cdw++] = va;
116 cs->buf[cs->cdw++] = (3 << 29) | ((va >> 32UL) & 0xFF);
117 cs->buf[cs->cdw++] = 0;
118 cs->buf[cs->cdw++] = 0;
119 break;
120 default:
121 assert(0);
122 }
123 cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
124 cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, query->buffer.buf, RADEON_USAGE_WRITE);
125
126 ctx->num_cs_dw_queries_suspend += query->num_cs_dw;
127 }
128
129 static void r600_emit_query_end(struct r600_context *ctx, struct r600_query *query)
130 {
131 struct radeon_winsys_cs *cs = ctx->cs;
132 uint64_t va;
133
134 va = r600_resource_va(&ctx->screen->screen, (void*)query->buffer.buf);
135 /* emit end query */
136 switch (query->type) {
137 case PIPE_QUERY_OCCLUSION_COUNTER:
138 case PIPE_QUERY_OCCLUSION_PREDICATE:
139 va += query->buffer.results_end + 8;
140 cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0);
141 cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1);
142 cs->buf[cs->cdw++] = va;
143 cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF;
144 break;
145 case PIPE_QUERY_PRIMITIVES_EMITTED:
146 case PIPE_QUERY_PRIMITIVES_GENERATED:
147 case PIPE_QUERY_SO_STATISTICS:
148 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
149 cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0);
150 cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_SAMPLE_STREAMOUTSTATS) | EVENT_INDEX(3);
151 cs->buf[cs->cdw++] = query->buffer.results_end + query->result_size/2;
152 cs->buf[cs->cdw++] = 0;
153 break;
154 case PIPE_QUERY_TIME_ELAPSED:
155 va += query->buffer.results_end + query->result_size/2;
156 cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0);
157 cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5);
158 cs->buf[cs->cdw++] = va;
159 cs->buf[cs->cdw++] = (3 << 29) | ((va >> 32UL) & 0xFF);
160 cs->buf[cs->cdw++] = 0;
161 cs->buf[cs->cdw++] = 0;
162 break;
163 default:
164 assert(0);
165 }
166 cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
167 cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, query->buffer.buf, RADEON_USAGE_WRITE);
168
169 query->buffer.results_end += query->result_size;
170 ctx->num_cs_dw_queries_suspend -= query->num_cs_dw;
171 }
172
173 static void r600_emit_query_predication(struct r600_context *ctx, struct r600_query *query,
174 int operation, bool flag_wait)
175 {
176 struct radeon_winsys_cs *cs = ctx->cs;
177
178 if (operation == PREDICATION_OP_CLEAR) {
179 r600_need_cs_space(ctx, 3, FALSE);
180
181 cs->buf[cs->cdw++] = PKT3(PKT3_SET_PREDICATION, 1, 0);
182 cs->buf[cs->cdw++] = 0;
183 cs->buf[cs->cdw++] = PRED_OP(PREDICATION_OP_CLEAR);
184 } else {
185 struct r600_query_buffer *qbuf;
186 unsigned count;
187 uint32_t op;
188
189 /* Find how many results there are. */
190 count = 0;
191 for (qbuf = &query->buffer; qbuf; qbuf = qbuf->previous) {
192 count += qbuf->results_end / query->result_size;
193 }
194
195 r600_need_cs_space(ctx, 5 * count, TRUE);
196
197 op = PRED_OP(operation) | PREDICATION_DRAW_VISIBLE |
198 (flag_wait ? PREDICATION_HINT_WAIT : PREDICATION_HINT_NOWAIT_DRAW);
199
200 /* emit predicate packets for all data blocks */
201 for (qbuf = &query->buffer; qbuf; qbuf = qbuf->previous) {
202 unsigned results_base = 0;
203 uint64_t va = r600_resource_va(&ctx->screen->screen, &qbuf->buf->b.b.b);
204
205 while (results_base < qbuf->results_end) {
206 cs->buf[cs->cdw++] = PKT3(PKT3_SET_PREDICATION, 1, 0);
207 cs->buf[cs->cdw++] = (va + results_base) & 0xFFFFFFFFUL;
208 cs->buf[cs->cdw++] = op | (((va + results_base) >> 32UL) & 0xFF);
209 cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
210 cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, qbuf->buf, RADEON_USAGE_READ);
211 results_base += query->result_size;
212
213 /* set CONTINUE bit for all packets except the first */
214 op |= PREDICATION_CONTINUE;
215 }
216 } while (qbuf);
217 }
218 }
219
220 static struct pipe_query *r600_create_query(struct pipe_context *ctx, unsigned query_type)
221 {
222 struct r600_context *rctx = (struct r600_context *)ctx;
223
224 struct r600_query *query;
225
226 query = CALLOC_STRUCT(r600_query);
227 if (query == NULL)
228 return NULL;
229
230 query->type = query_type;
231
232 switch (query_type) {
233 case PIPE_QUERY_OCCLUSION_COUNTER:
234 case PIPE_QUERY_OCCLUSION_PREDICATE:
235 query->result_size = 16 * rctx->max_db;
236 query->num_cs_dw = 6;
237 break;
238 case PIPE_QUERY_TIME_ELAPSED:
239 query->result_size = 16;
240 query->num_cs_dw = 8;
241 break;
242 case PIPE_QUERY_PRIMITIVES_EMITTED:
243 case PIPE_QUERY_PRIMITIVES_GENERATED:
244 case PIPE_QUERY_SO_STATISTICS:
245 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
246 /* NumPrimitivesWritten, PrimitiveStorageNeeded. */
247 query->result_size = 32;
248 query->num_cs_dw = 6;
249 break;
250 default:
251 assert(0);
252 FREE(query);
253 return NULL;
254 }
255
256 query->buffer.buf = r600_new_query_buffer(rctx, query_type);
257 if (!query->buffer.buf) {
258 FREE(query);
259 return NULL;
260 }
261 return (struct pipe_query*)query;
262 }
263
264 static void r600_destroy_query(struct pipe_context *ctx, struct pipe_query *query)
265 {
266 struct r600_query *rquery = (struct r600_query*)query;
267 struct r600_query_buffer *prev = rquery->buffer.previous;
268
269 /* Release all query buffers. */
270 while (prev) {
271 struct r600_query_buffer *qbuf = prev;
272 prev = prev->previous;
273 pipe_resource_reference((struct pipe_resource**)&qbuf->buf, NULL);
274 FREE(qbuf);
275 }
276
277 pipe_resource_reference((struct pipe_resource**)&rquery->buffer.buf, NULL);
278 FREE(query);
279 }
280
281 static void r600_update_occlusion_query_state(struct r600_context *rctx,
282 unsigned type, int diff)
283 {
284 if (type == PIPE_QUERY_OCCLUSION_COUNTER ||
285 type == PIPE_QUERY_OCCLUSION_PREDICATE) {
286 bool enable;
287
288 rctx->num_occlusion_queries += diff;
289 assert(rctx->num_occlusion_queries >= 0);
290
291 enable = rctx->num_occlusion_queries != 0;
292
293 if (rctx->atom_db_misc_state.occlusion_query_enabled != enable) {
294 rctx->atom_db_misc_state.occlusion_query_enabled = enable;
295 r600_atom_dirty(rctx, &rctx->atom_db_misc_state.atom);
296 }
297 }
298 }
299
300 static void r600_begin_query(struct pipe_context *ctx, struct pipe_query *query)
301 {
302 struct r600_context *rctx = (struct r600_context *)ctx;
303 struct r600_query *rquery = (struct r600_query *)query;
304 /* Discard the old query buffers. */
305 struct r600_query_buffer *prev = rquery->buffer.previous;
306
307 while (prev) {
308 struct r600_query_buffer *qbuf = prev;
309 prev = prev->previous;
310 pipe_resource_reference((struct pipe_resource**)&qbuf->buf, NULL);
311 FREE(qbuf);
312 }
313
314 /* Obtain a new buffer if the current one can't be mapped without a stall. */
315 if (rctx->ws->cs_is_buffer_referenced(rctx->cs, rquery->buffer.buf->cs_buf) ||
316 rctx->ws->buffer_is_busy(rquery->buffer.buf->buf, RADEON_USAGE_READWRITE)) {
317 pipe_resource_reference((struct pipe_resource**)&rquery->buffer.buf, NULL);
318 rquery->buffer.buf = r600_new_query_buffer(rctx, rquery->type);
319 }
320
321 rquery->buffer.results_end = 0;
322 rquery->buffer.previous = NULL;
323
324 r600_update_occlusion_query_state(rctx, rquery->type, 1);
325
326 r600_emit_query_begin(rctx, rquery);
327 LIST_ADDTAIL(&rquery->list, &rctx->active_query_list);
328 }
329
330 static void r600_end_query(struct pipe_context *ctx, struct pipe_query *query)
331 {
332 struct r600_context *rctx = (struct r600_context *)ctx;
333 struct r600_query *rquery = (struct r600_query *)query;
334
335 r600_emit_query_end(rctx, rquery);
336 LIST_DELINIT(&rquery->list);
337
338 r600_update_occlusion_query_state(rctx, rquery->type, -1);
339 }
340
341 static unsigned r600_query_read_result(char *map, unsigned start_index, unsigned end_index,
342 bool test_status_bit)
343 {
344 uint32_t *current_result = (uint32_t*)map;
345 uint64_t start, end;
346
347 start = (uint64_t)current_result[start_index] |
348 (uint64_t)current_result[start_index+1] << 32;
349 end = (uint64_t)current_result[end_index] |
350 (uint64_t)current_result[end_index+1] << 32;
351
352 if (!test_status_bit ||
353 ((start & 0x8000000000000000UL) && (end & 0x8000000000000000UL))) {
354 return end - start;
355 }
356 return 0;
357 }
358
359 static boolean r600_get_query_buffer_result(struct r600_context *ctx,
360 struct r600_query *query,
361 struct r600_query_buffer *qbuf,
362 boolean wait,
363 union r600_query_result *result)
364 {
365 unsigned results_base = 0;
366 char *map;
367
368 map = ctx->ws->buffer_map(qbuf->buf->buf, ctx->cs,
369 PIPE_TRANSFER_READ |
370 (wait ? 0 : PIPE_TRANSFER_DONTBLOCK));
371 if (!map)
372 return FALSE;
373
374 /* count all results across all data blocks */
375 switch (query->type) {
376 case PIPE_QUERY_OCCLUSION_COUNTER:
377 while (results_base != qbuf->results_end) {
378 result->u64 +=
379 r600_query_read_result(map + results_base, 0, 2, true);
380 results_base += 16;
381 }
382 break;
383 case PIPE_QUERY_OCCLUSION_PREDICATE:
384 while (results_base != qbuf->results_end) {
385 result->b = result->b ||
386 r600_query_read_result(map + results_base, 0, 2, true) != 0;
387 results_base += 16;
388 }
389 break;
390 case PIPE_QUERY_TIME_ELAPSED:
391 while (results_base != qbuf->results_end) {
392 result->u64 +=
393 r600_query_read_result(map + results_base, 0, 2, false);
394 results_base += query->result_size;
395 }
396 break;
397 case PIPE_QUERY_PRIMITIVES_EMITTED:
398 /* SAMPLE_STREAMOUTSTATS stores this structure:
399 * {
400 * u64 NumPrimitivesWritten;
401 * u64 PrimitiveStorageNeeded;
402 * }
403 * We only need NumPrimitivesWritten here. */
404 while (results_base != qbuf->results_end) {
405 result->u64 +=
406 r600_query_read_result(map + results_base, 2, 6, true);
407 results_base += query->result_size;
408 }
409 break;
410 case PIPE_QUERY_PRIMITIVES_GENERATED:
411 /* Here we read PrimitiveStorageNeeded. */
412 while (results_base != qbuf->results_end) {
413 result->u64 +=
414 r600_query_read_result(map + results_base, 0, 4, true);
415 results_base += query->result_size;
416 }
417 break;
418 case PIPE_QUERY_SO_STATISTICS:
419 while (results_base != qbuf->results_end) {
420 result->so.num_primitives_written +=
421 r600_query_read_result(map + results_base, 2, 6, true);
422 result->so.primitives_storage_needed +=
423 r600_query_read_result(map + results_base, 0, 4, true);
424 results_base += query->result_size;
425 }
426 break;
427 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
428 while (results_base != qbuf->results_end) {
429 result->b = result->b ||
430 r600_query_read_result(map + results_base, 2, 6, true) !=
431 r600_query_read_result(map + results_base, 0, 4, true);
432 results_base += query->result_size;
433 }
434 break;
435 default:
436 assert(0);
437 }
438
439 ctx->ws->buffer_unmap(qbuf->buf->buf);
440 return TRUE;
441 }
442
443 static boolean r600_get_query_result(struct pipe_context *ctx,
444 struct pipe_query *query,
445 boolean wait, void *vresult)
446 {
447 struct r600_context *rctx = (struct r600_context *)ctx;
448 struct r600_query *rquery = (struct r600_query *)query;
449 boolean *result_b = (boolean*)vresult;
450 uint64_t *result_u64 = (uint64_t*)vresult;
451 union r600_query_result result;
452 struct pipe_query_data_so_statistics *result_so =
453 (struct pipe_query_data_so_statistics*)vresult;
454 struct r600_query_buffer *qbuf;
455
456 memset(&result, 0, sizeof(result));
457
458 for (qbuf = &rquery->buffer; qbuf; qbuf = qbuf->previous) {
459 if (!r600_get_query_buffer_result(rctx, rquery, qbuf, wait, &result)) {
460 return FALSE;
461 }
462 }
463
464 switch (rquery->type) {
465 case PIPE_QUERY_OCCLUSION_COUNTER:
466 case PIPE_QUERY_PRIMITIVES_EMITTED:
467 case PIPE_QUERY_PRIMITIVES_GENERATED:
468 *result_u64 = result.u64;
469 break;
470 case PIPE_QUERY_OCCLUSION_PREDICATE:
471 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
472 *result_b = result.b;
473 break;
474 case PIPE_QUERY_TIME_ELAPSED:
475 *result_u64 = (1000000 * result.u64) / rctx->screen->info.r600_clock_crystal_freq;
476 break;
477 case PIPE_QUERY_SO_STATISTICS:
478 *result_so = result.so;
479 break;
480 default:
481 assert(0);
482 }
483 return TRUE;
484 }
485
486 static void r600_render_condition(struct pipe_context *ctx,
487 struct pipe_query *query,
488 uint mode)
489 {
490 struct r600_context *rctx = (struct r600_context *)ctx;
491 struct r600_query *rquery = (struct r600_query *)query;
492 bool wait_flag = false;
493
494 rctx->current_render_cond = query;
495 rctx->current_render_cond_mode = mode;
496
497 if (query == NULL) {
498 if (rctx->predicate_drawing) {
499 rctx->predicate_drawing = false;
500 r600_emit_query_predication(rctx, NULL, PREDICATION_OP_CLEAR, false);
501 }
502 return;
503 }
504
505 if (mode == PIPE_RENDER_COND_WAIT ||
506 mode == PIPE_RENDER_COND_BY_REGION_WAIT) {
507 wait_flag = true;
508 }
509
510 rctx->predicate_drawing = true;
511
512 switch (rquery->type) {
513 case PIPE_QUERY_OCCLUSION_COUNTER:
514 case PIPE_QUERY_OCCLUSION_PREDICATE:
515 r600_emit_query_predication(rctx, rquery, PREDICATION_OP_ZPASS, wait_flag);
516 break;
517 case PIPE_QUERY_PRIMITIVES_EMITTED:
518 case PIPE_QUERY_PRIMITIVES_GENERATED:
519 case PIPE_QUERY_SO_STATISTICS:
520 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
521 r600_emit_query_predication(rctx, rquery, PREDICATION_OP_PRIMCOUNT, wait_flag);
522 break;
523 default:
524 assert(0);
525 }
526 }
527
528 void r600_suspend_queries(struct r600_context *ctx)
529 {
530 struct r600_query *query;
531
532 LIST_FOR_EACH_ENTRY(query, &ctx->active_query_list, list) {
533 r600_emit_query_end(ctx, query);
534 }
535 assert(ctx->num_cs_dw_queries_suspend == 0);
536 }
537
538 void r600_resume_queries(struct r600_context *ctx)
539 {
540 struct r600_query *query;
541
542 assert(ctx->num_cs_dw_queries_suspend == 0);
543
544 LIST_FOR_EACH_ENTRY(query, &ctx->active_query_list, list) {
545 r600_emit_query_begin(ctx, query);
546 }
547 }
548
549 void r600_init_query_functions(struct r600_context *rctx)
550 {
551 rctx->context.create_query = r600_create_query;
552 rctx->context.destroy_query = r600_destroy_query;
553 rctx->context.begin_query = r600_begin_query;
554 rctx->context.end_query = r600_end_query;
555 rctx->context.get_query_result = r600_get_query_result;
556
557 if (rctx->screen->info.r600_num_backends > 0)
558 rctx->context.render_condition = r600_render_condition;
559 }