be363a21e6dbcc61ce84f1b070f966050bbdd0de
[mesa.git] / src / gallium / drivers / nvc0 / nvc0_query.c
1 /*
2 * Copyright 2011 Nouveau Project
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 *
22 * Authors: Christoph Bumiller
23 */
24
25 #include "nvc0_context.h"
26 #include "nouveau/nv_object.xml.h"
27
28 struct nvc0_query {
29 uint32_t *data;
30 uint32_t type;
31 uint32_t sequence;
32 struct nouveau_bo *bo;
33 uint32_t base;
34 uint32_t offset; /* base + i * rotate */
35 boolean ready;
36 boolean active;
37 boolean is64bit;
38 uint8_t rotate;
39 int nesting; /* only used for occlusion queries */
40 struct nouveau_mm_allocation *mm;
41 };
42
43 #define NVC0_QUERY_ALLOC_SPACE 256
44
45 static INLINE struct nvc0_query *
46 nvc0_query(struct pipe_query *pipe)
47 {
48 return (struct nvc0_query *)pipe;
49 }
50
51 static boolean
52 nvc0_query_allocate(struct nvc0_context *nvc0, struct nvc0_query *q, int size)
53 {
54 struct nvc0_screen *screen = nvc0->screen;
55 int ret;
56
57 if (q->bo) {
58 nouveau_bo_ref(NULL, &q->bo);
59 if (q->mm) {
60 if (q->ready)
61 nouveau_mm_free(q->mm);
62 else
63 nouveau_fence_work(screen->base.fence.current,
64 nouveau_mm_free_work, q->mm);
65 }
66 }
67 if (size) {
68 q->mm = nouveau_mm_allocate(screen->base.mm_GART, size, &q->bo, &q->base);
69 if (!q->bo)
70 return FALSE;
71 q->offset = q->base;
72
73 ret = nouveau_bo_map_range(q->bo, q->base, size, NOUVEAU_BO_RD |
74 NOUVEAU_BO_NOSYNC);
75 if (ret) {
76 nvc0_query_allocate(nvc0, q, 0);
77 return FALSE;
78 }
79 q->data = q->bo->map;
80 nouveau_bo_unmap(q->bo);
81 }
82 return TRUE;
83 }
84
85 static void
86 nvc0_query_destroy(struct pipe_context *pipe, struct pipe_query *pq)
87 {
88 nvc0_query_allocate(nvc0_context(pipe), nvc0_query(pq), 0);
89 FREE(nvc0_query(pq));
90 }
91
92 static struct pipe_query *
93 nvc0_query_create(struct pipe_context *pipe, unsigned type)
94 {
95 struct nvc0_context *nvc0 = nvc0_context(pipe);
96 struct nvc0_query *q;
97 unsigned space = NVC0_QUERY_ALLOC_SPACE;
98
99 q = CALLOC_STRUCT(nvc0_query);
100 if (!q)
101 return NULL;
102
103 switch (type) {
104 case PIPE_QUERY_OCCLUSION_COUNTER:
105 case PIPE_QUERY_OCCLUSION_PREDICATE:
106 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
107 q->rotate = 32;
108 space = NVC0_QUERY_ALLOC_SPACE;
109 break;
110 case PIPE_QUERY_PIPELINE_STATISTICS:
111 q->is64bit = TRUE;
112 space = 512;
113 break;
114 case PIPE_QUERY_SO_STATISTICS:
115 q->is64bit = TRUE;
116 space = 64;
117 break;
118 case PIPE_QUERY_TIME_ELAPSED:
119 case PIPE_QUERY_TIMESTAMP:
120 case PIPE_QUERY_TIMESTAMP_DISJOINT:
121 case PIPE_QUERY_GPU_FINISHED:
122 case PIPE_QUERY_PRIMITIVES_GENERATED:
123 case PIPE_QUERY_PRIMITIVES_EMITTED:
124 space = 32;
125 break;
126 case NVC0_QUERY_TFB_BUFFER_OFFSETS:
127 space = 16;
128 break;
129 default:
130 FREE(q);
131 return NULL;
132 }
133 if (!nvc0_query_allocate(nvc0, q, space)) {
134 FREE(q);
135 return NULL;
136 }
137
138 q->type = type;
139
140 if (q->rotate) {
141 /* we advance before query_begin ! */
142 q->offset -= q->rotate;
143 q->data -= q->rotate / sizeof(*q->data);
144 }
145
146 return (struct pipe_query *)q;
147 }
148
149 static void
150 nvc0_query_get(struct nouveau_channel *chan, struct nvc0_query *q,
151 unsigned offset, uint32_t get)
152 {
153 offset += q->offset;
154
155 MARK_RING (chan, 5, 2);
156 BEGIN_RING(chan, RING_3D(QUERY_ADDRESS_HIGH), 4);
157 OUT_RELOCh(chan, q->bo, offset, NOUVEAU_BO_GART | NOUVEAU_BO_WR);
158 OUT_RELOCl(chan, q->bo, offset, NOUVEAU_BO_GART | NOUVEAU_BO_WR);
159 OUT_RING (chan, q->sequence);
160 OUT_RING (chan, get);
161 }
162
163 static void
164 nvc0_query_rotate(struct nvc0_context *nvc0, struct nvc0_query *q)
165 {
166 q->offset += q->rotate;
167 q->data += q->rotate / sizeof(*q->data);
168 if (q->offset - q->base == NVC0_QUERY_ALLOC_SPACE)
169 nvc0_query_allocate(nvc0, q, NVC0_QUERY_ALLOC_SPACE);
170 }
171
172 static void
173 nvc0_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
174 {
175 struct nvc0_context *nvc0 = nvc0_context(pipe);
176 struct nouveau_channel *chan = nvc0->screen->base.channel;
177 struct nvc0_query *q = nvc0_query(pq);
178
179 const int index = 0; /* vertex stream */
180
181 /* For occlusion queries we have to change the storage, because a previous
182 * query might set the initial render conition to FALSE even *after* we re-
183 * initialized it to TRUE.
184 */
185 if (q->rotate) {
186 nvc0_query_rotate(nvc0, q);
187
188 /* XXX: can we do this with the GPU, and sync with respect to a previous
189 * query ?
190 */
191 q->data[1] = 1; /* initial render condition = TRUE */
192 q->data[4] = q->sequence + 1; /* for comparison COND_MODE */
193 q->data[5] = 0;
194 }
195 if (!q->is64bit)
196 q->data[0] = q->sequence++; /* the previously used one */
197
198 switch (q->type) {
199 case PIPE_QUERY_OCCLUSION_COUNTER:
200 case PIPE_QUERY_OCCLUSION_PREDICATE:
201 q->nesting = nvc0->screen->num_occlusion_queries_active++;
202 if (q->nesting) {
203 nvc0_query_get(chan, q, 0x10, 0x0100f002);
204 } else {
205 BEGIN_RING(chan, RING_3D(COUNTER_RESET), 1);
206 OUT_RING (chan, NVC0_3D_COUNTER_RESET_SAMPLECNT);
207 IMMED_RING(chan, RING_3D(SAMPLECNT_ENABLE), 1);
208 }
209 break;
210 case PIPE_QUERY_PRIMITIVES_GENERATED:
211 nvc0_query_get(chan, q, 0x10, 0x06805002 | (index << 5));
212 break;
213 case PIPE_QUERY_PRIMITIVES_EMITTED:
214 nvc0_query_get(chan, q, 0x10, 0x05805002 | (index << 5));
215 break;
216 case PIPE_QUERY_SO_STATISTICS:
217 nvc0_query_get(chan, q, 0x20, 0x05805002 | (index << 5));
218 nvc0_query_get(chan, q, 0x30, 0x06805002 | (index << 5));
219 break;
220 case PIPE_QUERY_TIMESTAMP_DISJOINT:
221 case PIPE_QUERY_TIME_ELAPSED:
222 nvc0_query_get(chan, q, 0x10, 0x00005002);
223 break;
224 case PIPE_QUERY_PIPELINE_STATISTICS:
225 nvc0_query_get(chan, q, 0xc0 + 0x00, 0x00801002); /* VFETCH, VERTICES */
226 nvc0_query_get(chan, q, 0xc0 + 0x10, 0x01801002); /* VFETCH, PRIMS */
227 nvc0_query_get(chan, q, 0xc0 + 0x20, 0x02802002); /* VP, LAUNCHES */
228 nvc0_query_get(chan, q, 0xc0 + 0x30, 0x03806002); /* GP, LAUNCHES */
229 nvc0_query_get(chan, q, 0xc0 + 0x40, 0x04806002); /* GP, PRIMS_OUT */
230 nvc0_query_get(chan, q, 0xc0 + 0x50, 0x07804002); /* RAST, PRIMS_IN */
231 nvc0_query_get(chan, q, 0xc0 + 0x60, 0x08804002); /* RAST, PRIMS_OUT */
232 nvc0_query_get(chan, q, 0xc0 + 0x70, 0x0980a002); /* ROP, PIXELS */
233 nvc0_query_get(chan, q, 0xc0 + 0x80, 0x0d808002); /* TCP, LAUNCHES */
234 nvc0_query_get(chan, q, 0xc0 + 0x90, 0x0e809002); /* TEP, LAUNCHES */
235 break;
236 default:
237 break;
238 }
239 q->ready = FALSE;
240 q->active = TRUE;
241 }
242
243 static void
244 nvc0_query_end(struct pipe_context *pipe, struct pipe_query *pq)
245 {
246 struct nvc0_context *nvc0 = nvc0_context(pipe);
247 struct nouveau_channel *chan = nvc0->screen->base.channel;
248 struct nvc0_query *q = nvc0_query(pq);
249
250 const int index = 0; /* for multiple vertex streams */
251
252 if (!q->active) {
253 /* some queries don't require 'begin' to be called (e.g. GPU_FINISHED) */
254 if (q->rotate)
255 nvc0_query_rotate(nvc0, q);
256 else
257 if (!q->is64bit)
258 q->data[0] = q->sequence++;
259 }
260 q->ready = FALSE;
261 q->active = FALSE;
262
263 switch (q->type) {
264 case PIPE_QUERY_OCCLUSION_COUNTER:
265 case PIPE_QUERY_OCCLUSION_PREDICATE:
266 nvc0_query_get(chan, q, 0, 0x0100f002);
267 if (--nvc0->screen->num_occlusion_queries_active == 0)
268 IMMED_RING(chan, RING_3D(SAMPLECNT_ENABLE), 0);
269 break;
270 case PIPE_QUERY_PRIMITIVES_GENERATED:
271 nvc0_query_get(chan, q, 0, 0x06805002 | (index << 5));
272 break;
273 case PIPE_QUERY_PRIMITIVES_EMITTED:
274 nvc0_query_get(chan, q, 0, 0x05805002 | (index << 5));
275 break;
276 case PIPE_QUERY_SO_STATISTICS:
277 nvc0_query_get(chan, q, 0x00, 0x05805002 | (index << 5));
278 nvc0_query_get(chan, q, 0x10, 0x06805002 | (index << 5));
279 break;
280 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
281 nvc0_query_get(chan, q, 0x00, 0x02005002 | (index << 5));
282 break;
283 case PIPE_QUERY_TIMESTAMP:
284 case PIPE_QUERY_TIMESTAMP_DISJOINT:
285 case PIPE_QUERY_TIME_ELAPSED:
286 nvc0_query_get(chan, q, 0, 0x00005002);
287 break;
288 case PIPE_QUERY_GPU_FINISHED:
289 nvc0_query_get(chan, q, 0, 0x1000f010);
290 break;
291 case PIPE_QUERY_PIPELINE_STATISTICS:
292 nvc0_query_get(chan, q, 0x00, 0x00801002); /* VFETCH, VERTICES */
293 nvc0_query_get(chan, q, 0x10, 0x01801002); /* VFETCH, PRIMS */
294 nvc0_query_get(chan, q, 0x20, 0x02802002); /* VP, LAUNCHES */
295 nvc0_query_get(chan, q, 0x30, 0x03806002); /* GP, LAUNCHES */
296 nvc0_query_get(chan, q, 0x40, 0x04806002); /* GP, PRIMS_OUT */
297 nvc0_query_get(chan, q, 0x50, 0x07804002); /* RAST, PRIMS_IN */
298 nvc0_query_get(chan, q, 0x60, 0x08804002); /* RAST, PRIMS_OUT */
299 nvc0_query_get(chan, q, 0x70, 0x0980a002); /* ROP, PIXELS */
300 nvc0_query_get(chan, q, 0x80, 0x0d808002); /* TCP, LAUNCHES */
301 nvc0_query_get(chan, q, 0x90, 0x0e809002); /* TEP, LAUNCHES */
302 break;
303 case NVC0_QUERY_TFB_BUFFER_OFFSETS:
304 nvc0_query_get(chan, q, 0x00, 0x1d005002); /* TFB, BUFFER_OFFSET */
305 nvc0_query_get(chan, q, 0x04, 0x1d005022);
306 nvc0_query_get(chan, q, 0x08, 0x1d005042);
307 nvc0_query_get(chan, q, 0x0c, 0x1d005062);
308 break;
309 default:
310 assert(0);
311 break;
312 }
313 }
314
315 static INLINE boolean
316 nvc0_query_ready(struct nvc0_query *q)
317 {
318 return q->ready || (!q->is64bit && (q->data[0] == q->sequence));
319 }
320
321 static INLINE boolean
322 nvc0_query_wait(struct nvc0_query *q)
323 {
324 int ret = nouveau_bo_map(q->bo, NOUVEAU_BO_RD);
325 if (ret)
326 return FALSE;
327 nouveau_bo_unmap(q->bo);
328 return TRUE;
329 }
330
331 static boolean
332 nvc0_query_result(struct pipe_context *pipe, struct pipe_query *pq,
333 boolean wait, void *result)
334 {
335 struct nvc0_query *q = nvc0_query(pq);
336 uint64_t *res64 = result;
337 uint32_t *res32 = result;
338 boolean *res8 = result;
339 uint64_t *data64 = (uint64_t *)q->data;
340 unsigned i;
341
342 if (!q->ready) /* update ? */
343 q->ready = nvc0_query_ready(q);
344 if (!q->ready) {
345 struct nouveau_channel *chan = nvc0_context(pipe)->screen->base.channel;
346 if (!wait) {
347 if (nouveau_bo_pending(q->bo) & NOUVEAU_BO_WR) /* for daft apps */
348 FIRE_RING(chan);
349 return FALSE;
350 }
351 if (!nvc0_query_wait(q))
352 return FALSE;
353 }
354 q->ready = TRUE;
355
356 switch (q->type) {
357 case PIPE_QUERY_GPU_FINISHED:
358 res32[0] = 0;
359 res8[0] = TRUE;
360 break;
361 case PIPE_QUERY_OCCLUSION_COUNTER: /* u32 sequence, u32 count, u64 time */
362 res64[0] = q->data[1] - q->data[5];
363 break;
364 case PIPE_QUERY_OCCLUSION_PREDICATE:
365 res32[0] = 0;
366 res8[0] = q->data[1] != q->data[5];
367 break;
368 case PIPE_QUERY_PRIMITIVES_GENERATED: /* u64 count, u64 time */
369 case PIPE_QUERY_PRIMITIVES_EMITTED: /* u64 count, u64 time */
370 res64[0] = data64[0] - data64[2];
371 break;
372 case PIPE_QUERY_SO_STATISTICS:
373 res64[0] = data64[0] - data64[4];
374 res64[1] = data64[2] - data64[6];
375 break;
376 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
377 res32[0] = 0;
378 res8[0] = !q->data[1];
379 break;
380 case PIPE_QUERY_TIMESTAMP:
381 res64[0] = data64[1];
382 break;
383 case PIPE_QUERY_TIMESTAMP_DISJOINT: /* u32 sequence, u32 0, u64 time */
384 res64[0] = 1000000000;
385 res32[2] = 0;
386 res8[8] = (data64[1] == data64[3]) ? FALSE : TRUE;
387 break;
388 case PIPE_QUERY_TIME_ELAPSED:
389 res64[0] = data64[1] - data64[3];
390 break;
391 case PIPE_QUERY_PIPELINE_STATISTICS:
392 for (i = 0; i < 10; ++i)
393 res64[i] = data64[i * 2] - data64[24 + i * 2];
394 break;
395 case NVC0_QUERY_TFB_BUFFER_OFFSETS:
396 res32[0] = q->data[0];
397 res32[1] = q->data[1];
398 res32[2] = q->data[2];
399 res32[3] = q->data[3];
400 break;
401 default:
402 return FALSE;
403 }
404
405 return TRUE;
406 }
407
408 static void
409 nvc0_render_condition(struct pipe_context *pipe,
410 struct pipe_query *pq, uint mode)
411 {
412 struct nvc0_context *nvc0 = nvc0_context(pipe);
413 struct nouveau_channel *chan = nvc0->screen->base.channel;
414 struct nvc0_query *q;
415 uint32_t cond;
416 boolean negated = FALSE;
417 boolean wait =
418 mode != PIPE_RENDER_COND_NO_WAIT &&
419 mode != PIPE_RENDER_COND_BY_REGION_NO_WAIT;
420
421 if (!pq) {
422 IMMED_RING(chan, RING_3D(COND_MODE), NVC0_3D_COND_MODE_ALWAYS);
423 return;
424 }
425 q = nvc0_query(pq);
426
427 /* NOTE: comparison of 2 queries only works if both have completed */
428 switch (q->type) {
429 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
430 /* query writes 1 if there was no overflow */
431 cond = negated ? NVC0_3D_COND_MODE_RES_NON_ZERO :
432 NVC0_3D_COND_MODE_EQUAL;
433 wait = TRUE;
434 break;
435 case PIPE_QUERY_OCCLUSION_COUNTER:
436 case PIPE_QUERY_OCCLUSION_PREDICATE:
437 if (likely(!negated)) {
438 if (unlikely(q->nesting))
439 cond = wait ? NVC0_3D_COND_MODE_NOT_EQUAL :
440 NVC0_3D_COND_MODE_ALWAYS;
441 else
442 cond = NVC0_3D_COND_MODE_RES_NON_ZERO;
443 } else {
444 cond = wait ? NVC0_3D_COND_MODE_EQUAL : NVC0_3D_COND_MODE_ALWAYS;
445 }
446 break;
447 default:
448 assert(!"render condition query not a predicate");
449 mode = NVC0_3D_COND_MODE_ALWAYS;
450 break;
451 }
452
453 if (wait) {
454 MARK_RING (chan, 5, 2);
455 BEGIN_RING(chan, RING_3D_(NV84_SUBCHAN_QUERY_ADDRESS_HIGH), 4);
456 OUT_RELOCh(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
457 OUT_RELOCl(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
458 OUT_RING (chan, q->sequence);
459 OUT_RING (chan, 0x00001001);
460 }
461
462 MARK_RING (chan, 4, 2);
463 BEGIN_RING(chan, RING_3D(COND_ADDRESS_HIGH), 3);
464 OUT_RELOCh(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
465 OUT_RELOCl(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
466 OUT_RING (chan, cond);
467 }
468
469 void
470 nvc0_query_pushbuf_submit(struct nvc0_context *nvc0,
471 struct pipe_query *pq, unsigned result_offset)
472 {
473 struct nvc0_query *q = nvc0_query(pq);
474
475 nouveau_pushbuf_submit(nvc0->screen->base.channel,
476 q->bo, q->offset + result_offset, 4);
477 }
478
479 void
480 nvc0_init_query_functions(struct nvc0_context *nvc0)
481 {
482 struct pipe_context *pipe = &nvc0->base.pipe;
483
484 pipe->create_query = nvc0_query_create;
485 pipe->destroy_query = nvc0_query_destroy;
486 pipe->begin_query = nvc0_query_begin;
487 pipe->end_query = nvc0_query_end;
488 pipe->get_query_result = nvc0_query_result;
489 pipe->render_condition = nvc0_render_condition;
490 }