gallium: add an index argument to create_query
[mesa.git] / src / gallium / drivers / nouveau / nvc0 / nvc0_query.c
1 /*
2 * Copyright 2011 Nouveau Project
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Christoph Bumiller
23 */
24
25 #define NVC0_PUSH_EXPLICIT_SPACE_CHECKING
26
27 #include "nvc0/nvc0_context.h"
28 #include "nv_object.xml.h"
29 #include "nvc0/nve4_compute.xml.h"
30 #include "nvc0/nvc0_compute.xml.h"
31
32 #define NVC0_QUERY_STATE_READY 0
33 #define NVC0_QUERY_STATE_ACTIVE 1
34 #define NVC0_QUERY_STATE_ENDED 2
35 #define NVC0_QUERY_STATE_FLUSHED 3
36
37 struct nvc0_query {
38 uint32_t *data;
39 uint16_t type;
40 uint16_t index;
41 int8_t ctr[4];
42 uint32_t sequence;
43 struct nouveau_bo *bo;
44 uint32_t base;
45 uint32_t offset; /* base + i * rotate */
46 uint8_t state;
47 boolean is64bit;
48 uint8_t rotate;
49 int nesting; /* only used for occlusion queries */
50 union {
51 struct nouveau_mm_allocation *mm;
52 uint64_t value;
53 } u;
54 struct nouveau_fence *fence;
55 };
56
57 #define NVC0_QUERY_ALLOC_SPACE 256
58
59 static void nvc0_mp_pm_query_begin(struct nvc0_context *, struct nvc0_query *);
60 static void nvc0_mp_pm_query_end(struct nvc0_context *, struct nvc0_query *);
61 static boolean nvc0_mp_pm_query_result(struct nvc0_context *,
62 struct nvc0_query *, void *, boolean);
63
64 static INLINE struct nvc0_query *
65 nvc0_query(struct pipe_query *pipe)
66 {
67 return (struct nvc0_query *)pipe;
68 }
69
70 static boolean
71 nvc0_query_allocate(struct nvc0_context *nvc0, struct nvc0_query *q, int size)
72 {
73 struct nvc0_screen *screen = nvc0->screen;
74 int ret;
75
76 if (q->bo) {
77 nouveau_bo_ref(NULL, &q->bo);
78 if (q->u.mm) {
79 if (q->state == NVC0_QUERY_STATE_READY)
80 nouveau_mm_free(q->u.mm);
81 else
82 nouveau_fence_work(screen->base.fence.current,
83 nouveau_mm_free_work, q->u.mm);
84 }
85 }
86 if (size) {
87 q->u.mm = nouveau_mm_allocate(screen->base.mm_GART, size, &q->bo, &q->base);
88 if (!q->bo)
89 return FALSE;
90 q->offset = q->base;
91
92 ret = nouveau_bo_map(q->bo, 0, screen->base.client);
93 if (ret) {
94 nvc0_query_allocate(nvc0, q, 0);
95 return FALSE;
96 }
97 q->data = (uint32_t *)((uint8_t *)q->bo->map + q->base);
98 }
99 return TRUE;
100 }
101
102 static void
103 nvc0_query_destroy(struct pipe_context *pipe, struct pipe_query *pq)
104 {
105 nvc0_query_allocate(nvc0_context(pipe), nvc0_query(pq), 0);
106 nouveau_fence_ref(NULL, &nvc0_query(pq)->fence);
107 FREE(nvc0_query(pq));
108 }
109
110 static struct pipe_query *
111 nvc0_query_create(struct pipe_context *pipe, unsigned type, unsigned index)
112 {
113 struct nvc0_context *nvc0 = nvc0_context(pipe);
114 struct nvc0_query *q;
115 unsigned space = NVC0_QUERY_ALLOC_SPACE;
116
117 q = CALLOC_STRUCT(nvc0_query);
118 if (!q)
119 return NULL;
120
121 switch (type) {
122 case PIPE_QUERY_OCCLUSION_COUNTER:
123 case PIPE_QUERY_OCCLUSION_PREDICATE:
124 q->rotate = 32;
125 space = NVC0_QUERY_ALLOC_SPACE;
126 break;
127 case PIPE_QUERY_PIPELINE_STATISTICS:
128 q->is64bit = TRUE;
129 space = 512;
130 break;
131 case PIPE_QUERY_SO_STATISTICS:
132 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
133 q->is64bit = TRUE;
134 space = 64;
135 break;
136 case PIPE_QUERY_PRIMITIVES_GENERATED:
137 case PIPE_QUERY_PRIMITIVES_EMITTED:
138 q->is64bit = TRUE;
139 space = 32;
140 break;
141 case PIPE_QUERY_TIME_ELAPSED:
142 case PIPE_QUERY_TIMESTAMP:
143 case PIPE_QUERY_TIMESTAMP_DISJOINT:
144 case PIPE_QUERY_GPU_FINISHED:
145 space = 32;
146 break;
147 case NVC0_QUERY_TFB_BUFFER_OFFSET:
148 space = 16;
149 break;
150 default:
151 #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
152 if (type >= NVC0_QUERY_DRV_STAT(0) && type <= NVC0_QUERY_DRV_STAT_LAST) {
153 space = 0;
154 q->is64bit = true;
155 q->index = type - NVC0_QUERY_DRV_STAT(0);
156 break;
157 } else
158 #endif
159 if (nvc0->screen->base.device->drm_version >= 0x01000101) {
160 if (type >= NVE4_PM_QUERY(0) && type <= NVE4_PM_QUERY_LAST) {
161 /* for each MP:
162 * [00] = WS0.C0
163 * [04] = WS0.C1
164 * [08] = WS0.C2
165 * [0c] = WS0.C3
166 * [10] = WS1.C0
167 * [14] = WS1.C1
168 * [18] = WS1.C2
169 * [1c] = WS1.C3
170 * [20] = WS2.C0
171 * [24] = WS2.C1
172 * [28] = WS2.C2
173 * [2c] = WS2.C3
174 * [30] = WS3.C0
175 * [34] = WS3.C1
176 * [38] = WS3.C2
177 * [3c] = WS3.C3
178 * [40] = MP.C4
179 * [44] = MP.C5
180 * [48] = MP.C6
181 * [4c] = MP.C7
182 * [50] = WS0.sequence
183 * [54] = WS1.sequence
184 * [58] = WS2.sequence
185 * [5c] = WS3.sequence
186 */
187 space = (4 * 4 + 4 + 4) * nvc0->screen->mp_count * sizeof(uint32_t);
188 break;
189 } else
190 if (type >= NVC0_PM_QUERY(0) && type <= NVC0_PM_QUERY_LAST) {
191 /* for each MP:
192 * [00] = MP.C0
193 * [04] = MP.C1
194 * [08] = MP.C2
195 * [0c] = MP.C3
196 * [10] = MP.C4
197 * [14] = MP.C5
198 * [18] = MP.C6
199 * [1c] = MP.C7
200 * [20] = MP.sequence
201 */
202 space = (8 + 1) * nvc0->screen->mp_count * sizeof(uint32_t);
203 break;
204 }
205 }
206 debug_printf("invalid query type: %u\n", type);
207 FREE(q);
208 return NULL;
209 }
210 if (!nvc0_query_allocate(nvc0, q, space)) {
211 FREE(q);
212 return NULL;
213 }
214
215 q->type = type;
216
217 if (q->rotate) {
218 /* we advance before query_begin ! */
219 q->offset -= q->rotate;
220 q->data -= q->rotate / sizeof(*q->data);
221 } else
222 if (!q->is64bit)
223 q->data[0] = 0; /* initialize sequence */
224
225 return (struct pipe_query *)q;
226 }
227
228 static void
229 nvc0_query_get(struct nouveau_pushbuf *push, struct nvc0_query *q,
230 unsigned offset, uint32_t get)
231 {
232 offset += q->offset;
233
234 PUSH_SPACE(push, 5);
235 PUSH_REFN (push, q->bo, NOUVEAU_BO_GART | NOUVEAU_BO_WR);
236 BEGIN_NVC0(push, NVC0_3D(QUERY_ADDRESS_HIGH), 4);
237 PUSH_DATAh(push, q->bo->offset + offset);
238 PUSH_DATA (push, q->bo->offset + offset);
239 PUSH_DATA (push, q->sequence);
240 PUSH_DATA (push, get);
241 }
242
243 static void
244 nvc0_query_rotate(struct nvc0_context *nvc0, struct nvc0_query *q)
245 {
246 q->offset += q->rotate;
247 q->data += q->rotate / sizeof(*q->data);
248 if (q->offset - q->base == NVC0_QUERY_ALLOC_SPACE)
249 nvc0_query_allocate(nvc0, q, NVC0_QUERY_ALLOC_SPACE);
250 }
251
252 static void
253 nvc0_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
254 {
255 struct nvc0_context *nvc0 = nvc0_context(pipe);
256 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
257 struct nvc0_query *q = nvc0_query(pq);
258
259 /* For occlusion queries we have to change the storage, because a previous
260 * query might set the initial render conition to FALSE even *after* we re-
261 * initialized it to TRUE.
262 */
263 if (q->rotate) {
264 nvc0_query_rotate(nvc0, q);
265
266 /* XXX: can we do this with the GPU, and sync with respect to a previous
267 * query ?
268 */
269 q->data[0] = q->sequence; /* initialize sequence */
270 q->data[1] = 1; /* initial render condition = TRUE */
271 q->data[4] = q->sequence + 1; /* for comparison COND_MODE */
272 q->data[5] = 0;
273 }
274 q->sequence++;
275
276 switch (q->type) {
277 case PIPE_QUERY_OCCLUSION_COUNTER:
278 case PIPE_QUERY_OCCLUSION_PREDICATE:
279 q->nesting = nvc0->screen->num_occlusion_queries_active++;
280 if (q->nesting) {
281 nvc0_query_get(push, q, 0x10, 0x0100f002);
282 } else {
283 PUSH_SPACE(push, 3);
284 BEGIN_NVC0(push, NVC0_3D(COUNTER_RESET), 1);
285 PUSH_DATA (push, NVC0_3D_COUNTER_RESET_SAMPLECNT);
286 IMMED_NVC0(push, NVC0_3D(SAMPLECNT_ENABLE), 1);
287 }
288 break;
289 case PIPE_QUERY_PRIMITIVES_GENERATED:
290 nvc0_query_get(push, q, 0x10, 0x09005002 | (q->index << 5));
291 break;
292 case PIPE_QUERY_PRIMITIVES_EMITTED:
293 nvc0_query_get(push, q, 0x10, 0x05805002 | (q->index << 5));
294 break;
295 case PIPE_QUERY_SO_STATISTICS:
296 nvc0_query_get(push, q, 0x20, 0x05805002 | (q->index << 5));
297 nvc0_query_get(push, q, 0x30, 0x06805002 | (q->index << 5));
298 break;
299 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
300 nvc0_query_get(push, q, 0x10, 0x03005002 | (q->index << 5));
301 break;
302 case PIPE_QUERY_TIME_ELAPSED:
303 nvc0_query_get(push, q, 0x10, 0x00005002);
304 break;
305 case PIPE_QUERY_PIPELINE_STATISTICS:
306 nvc0_query_get(push, q, 0xc0 + 0x00, 0x00801002); /* VFETCH, VERTICES */
307 nvc0_query_get(push, q, 0xc0 + 0x10, 0x01801002); /* VFETCH, PRIMS */
308 nvc0_query_get(push, q, 0xc0 + 0x20, 0x02802002); /* VP, LAUNCHES */
309 nvc0_query_get(push, q, 0xc0 + 0x30, 0x03806002); /* GP, LAUNCHES */
310 nvc0_query_get(push, q, 0xc0 + 0x40, 0x04806002); /* GP, PRIMS_OUT */
311 nvc0_query_get(push, q, 0xc0 + 0x50, 0x07804002); /* RAST, PRIMS_IN */
312 nvc0_query_get(push, q, 0xc0 + 0x60, 0x08804002); /* RAST, PRIMS_OUT */
313 nvc0_query_get(push, q, 0xc0 + 0x70, 0x0980a002); /* ROP, PIXELS */
314 nvc0_query_get(push, q, 0xc0 + 0x80, 0x0d808002); /* TCP, LAUNCHES */
315 nvc0_query_get(push, q, 0xc0 + 0x90, 0x0e809002); /* TEP, LAUNCHES */
316 break;
317 default:
318 #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
319 if (q->type >= NVC0_QUERY_DRV_STAT(0) &&
320 q->type <= NVC0_QUERY_DRV_STAT_LAST) {
321 if (q->index >= 5)
322 q->u.value = nvc0->screen->base.stats.v[q->index];
323 else
324 q->u.value = 0;
325 } else
326 #endif
327 if ((q->type >= NVE4_PM_QUERY(0) && q->type <= NVE4_PM_QUERY_LAST) ||
328 (q->type >= NVC0_PM_QUERY(0) && q->type <= NVC0_PM_QUERY_LAST)) {
329 nvc0_mp_pm_query_begin(nvc0, q);
330 }
331 break;
332 }
333 q->state = NVC0_QUERY_STATE_ACTIVE;
334 }
335
336 static void
337 nvc0_query_end(struct pipe_context *pipe, struct pipe_query *pq)
338 {
339 struct nvc0_context *nvc0 = nvc0_context(pipe);
340 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
341 struct nvc0_query *q = nvc0_query(pq);
342
343 if (q->state != NVC0_QUERY_STATE_ACTIVE) {
344 /* some queries don't require 'begin' to be called (e.g. GPU_FINISHED) */
345 if (q->rotate)
346 nvc0_query_rotate(nvc0, q);
347 q->sequence++;
348 }
349 q->state = NVC0_QUERY_STATE_ENDED;
350
351 switch (q->type) {
352 case PIPE_QUERY_OCCLUSION_COUNTER:
353 case PIPE_QUERY_OCCLUSION_PREDICATE:
354 nvc0_query_get(push, q, 0, 0x0100f002);
355 if (--nvc0->screen->num_occlusion_queries_active == 0) {
356 PUSH_SPACE(push, 1);
357 IMMED_NVC0(push, NVC0_3D(SAMPLECNT_ENABLE), 0);
358 }
359 break;
360 case PIPE_QUERY_PRIMITIVES_GENERATED:
361 nvc0_query_get(push, q, 0, 0x09005002 | (q->index << 5));
362 break;
363 case PIPE_QUERY_PRIMITIVES_EMITTED:
364 nvc0_query_get(push, q, 0, 0x05805002 | (q->index << 5));
365 break;
366 case PIPE_QUERY_SO_STATISTICS:
367 nvc0_query_get(push, q, 0x00, 0x05805002 | (q->index << 5));
368 nvc0_query_get(push, q, 0x10, 0x06805002 | (q->index << 5));
369 break;
370 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
371 /* TODO: How do we sum over all streams for render condition ? */
372 /* PRIMS_DROPPED doesn't write sequence, use a ZERO query to sync on */
373 nvc0_query_get(push, q, 0x00, 0x03005002 | (q->index << 5));
374 nvc0_query_get(push, q, 0x20, 0x00005002);
375 break;
376 case PIPE_QUERY_TIMESTAMP:
377 case PIPE_QUERY_TIME_ELAPSED:
378 nvc0_query_get(push, q, 0, 0x00005002);
379 break;
380 case PIPE_QUERY_GPU_FINISHED:
381 nvc0_query_get(push, q, 0, 0x1000f010);
382 break;
383 case PIPE_QUERY_PIPELINE_STATISTICS:
384 nvc0_query_get(push, q, 0x00, 0x00801002); /* VFETCH, VERTICES */
385 nvc0_query_get(push, q, 0x10, 0x01801002); /* VFETCH, PRIMS */
386 nvc0_query_get(push, q, 0x20, 0x02802002); /* VP, LAUNCHES */
387 nvc0_query_get(push, q, 0x30, 0x03806002); /* GP, LAUNCHES */
388 nvc0_query_get(push, q, 0x40, 0x04806002); /* GP, PRIMS_OUT */
389 nvc0_query_get(push, q, 0x50, 0x07804002); /* RAST, PRIMS_IN */
390 nvc0_query_get(push, q, 0x60, 0x08804002); /* RAST, PRIMS_OUT */
391 nvc0_query_get(push, q, 0x70, 0x0980a002); /* ROP, PIXELS */
392 nvc0_query_get(push, q, 0x80, 0x0d808002); /* TCP, LAUNCHES */
393 nvc0_query_get(push, q, 0x90, 0x0e809002); /* TEP, LAUNCHES */
394 break;
395 case NVC0_QUERY_TFB_BUFFER_OFFSET:
396 /* indexed by TFB buffer instead of by vertex stream */
397 nvc0_query_get(push, q, 0x00, 0x0d005002 | (q->index << 5));
398 break;
399 default:
400 #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
401 if (q->type >= NVC0_QUERY_DRV_STAT(0) &&
402 q->type <= NVC0_QUERY_DRV_STAT_LAST) {
403 q->u.value = nvc0->screen->base.stats.v[q->index] - q->u.value;
404 return;
405 } else
406 #endif
407 if ((q->type >= NVE4_PM_QUERY(0) && q->type <= NVE4_PM_QUERY_LAST) ||
408 (q->type >= NVC0_PM_QUERY(0) && q->type <= NVC0_PM_QUERY_LAST)) {
409 nvc0_mp_pm_query_end(nvc0, q);
410 }
411 break;
412 }
413 if (q->is64bit)
414 nouveau_fence_ref(nvc0->screen->base.fence.current, &q->fence);
415 }
416
417 static INLINE void
418 nvc0_query_update(struct nouveau_client *cli, struct nvc0_query *q)
419 {
420 if (q->is64bit) {
421 if (nouveau_fence_signalled(q->fence))
422 q->state = NVC0_QUERY_STATE_READY;
423 } else {
424 if (q->data[0] == q->sequence)
425 q->state = NVC0_QUERY_STATE_READY;
426 }
427 }
428
429 static boolean
430 nvc0_query_result(struct pipe_context *pipe, struct pipe_query *pq,
431 boolean wait, union pipe_query_result *result)
432 {
433 struct nvc0_context *nvc0 = nvc0_context(pipe);
434 struct nvc0_query *q = nvc0_query(pq);
435 uint64_t *res64 = (uint64_t*)result;
436 uint32_t *res32 = (uint32_t*)result;
437 boolean *res8 = (boolean*)result;
438 uint64_t *data64 = (uint64_t *)q->data;
439 unsigned i;
440
441 #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
442 if (q->type >= NVC0_QUERY_DRV_STAT(0) &&
443 q->type <= NVC0_QUERY_DRV_STAT_LAST) {
444 res64[0] = q->u.value;
445 return TRUE;
446 } else
447 #endif
448 if ((q->type >= NVE4_PM_QUERY(0) && q->type <= NVE4_PM_QUERY_LAST) ||
449 (q->type >= NVC0_PM_QUERY(0) && q->type <= NVC0_PM_QUERY_LAST)) {
450 return nvc0_mp_pm_query_result(nvc0, q, result, wait);
451 }
452
453 if (q->state != NVC0_QUERY_STATE_READY)
454 nvc0_query_update(nvc0->screen->base.client, q);
455
456 if (q->state != NVC0_QUERY_STATE_READY) {
457 if (!wait) {
458 if (q->state != NVC0_QUERY_STATE_FLUSHED) {
459 q->state = NVC0_QUERY_STATE_FLUSHED;
460 /* flush for silly apps that spin on GL_QUERY_RESULT_AVAILABLE */
461 PUSH_KICK(nvc0->base.pushbuf);
462 }
463 return FALSE;
464 }
465 if (nouveau_bo_wait(q->bo, NOUVEAU_BO_RD, nvc0->screen->base.client))
466 return FALSE;
467 NOUVEAU_DRV_STAT(&nvc0->screen->base, query_sync_count, 1);
468 }
469 q->state = NVC0_QUERY_STATE_READY;
470
471 switch (q->type) {
472 case PIPE_QUERY_GPU_FINISHED:
473 res8[0] = TRUE;
474 break;
475 case PIPE_QUERY_OCCLUSION_COUNTER: /* u32 sequence, u32 count, u64 time */
476 res64[0] = q->data[1] - q->data[5];
477 break;
478 case PIPE_QUERY_OCCLUSION_PREDICATE:
479 res8[0] = q->data[1] != q->data[5];
480 break;
481 case PIPE_QUERY_PRIMITIVES_GENERATED: /* u64 count, u64 time */
482 case PIPE_QUERY_PRIMITIVES_EMITTED: /* u64 count, u64 time */
483 res64[0] = data64[0] - data64[2];
484 break;
485 case PIPE_QUERY_SO_STATISTICS:
486 res64[0] = data64[0] - data64[4];
487 res64[1] = data64[2] - data64[6];
488 break;
489 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
490 res8[0] = data64[0] != data64[2];
491 break;
492 case PIPE_QUERY_TIMESTAMP:
493 res64[0] = data64[1];
494 break;
495 case PIPE_QUERY_TIMESTAMP_DISJOINT:
496 res64[0] = 1000000000;
497 res8[8] = FALSE;
498 break;
499 case PIPE_QUERY_TIME_ELAPSED:
500 res64[0] = data64[1] - data64[3];
501 break;
502 case PIPE_QUERY_PIPELINE_STATISTICS:
503 for (i = 0; i < 10; ++i)
504 res64[i] = data64[i * 2] - data64[24 + i * 2];
505 break;
506 case NVC0_QUERY_TFB_BUFFER_OFFSET:
507 res32[0] = q->data[1];
508 break;
509 default:
510 assert(0); /* can't happen, we don't create queries with invalid type */
511 return FALSE;
512 }
513
514 return TRUE;
515 }
516
517 void
518 nvc0_query_fifo_wait(struct nouveau_pushbuf *push, struct pipe_query *pq)
519 {
520 struct nvc0_query *q = nvc0_query(pq);
521 unsigned offset = q->offset;
522
523 if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE) offset += 0x20;
524
525 PUSH_SPACE(push, 5);
526 PUSH_REFN (push, q->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
527 BEGIN_NVC0(push, SUBC_3D(NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH), 4);
528 PUSH_DATAh(push, q->bo->offset + offset);
529 PUSH_DATA (push, q->bo->offset + offset);
530 PUSH_DATA (push, q->sequence);
531 PUSH_DATA (push, (1 << 12) |
532 NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_EQUAL);
533 }
534
535 static void
536 nvc0_render_condition(struct pipe_context *pipe,
537 struct pipe_query *pq,
538 boolean condition, uint mode)
539 {
540 struct nvc0_context *nvc0 = nvc0_context(pipe);
541 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
542 struct nvc0_query *q;
543 uint32_t cond;
544 boolean negated = FALSE;
545 boolean wait =
546 mode != PIPE_RENDER_COND_NO_WAIT &&
547 mode != PIPE_RENDER_COND_BY_REGION_NO_WAIT;
548
549 nvc0->cond_query = pq;
550 nvc0->cond_cond = condition;
551 nvc0->cond_mode = mode;
552
553 if (!pq) {
554 PUSH_SPACE(push, 1);
555 IMMED_NVC0(push, NVC0_3D(COND_MODE), NVC0_3D_COND_MODE_ALWAYS);
556 return;
557 }
558 q = nvc0_query(pq);
559
560 /* NOTE: comparison of 2 queries only works if both have completed */
561 switch (q->type) {
562 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
563 cond = negated ? NVC0_3D_COND_MODE_EQUAL :
564 NVC0_3D_COND_MODE_NOT_EQUAL;
565 wait = TRUE;
566 break;
567 case PIPE_QUERY_OCCLUSION_COUNTER:
568 case PIPE_QUERY_OCCLUSION_PREDICATE:
569 if (likely(!negated)) {
570 if (unlikely(q->nesting))
571 cond = wait ? NVC0_3D_COND_MODE_NOT_EQUAL :
572 NVC0_3D_COND_MODE_ALWAYS;
573 else
574 cond = NVC0_3D_COND_MODE_RES_NON_ZERO;
575 } else {
576 cond = wait ? NVC0_3D_COND_MODE_EQUAL : NVC0_3D_COND_MODE_ALWAYS;
577 }
578 break;
579 default:
580 assert(!"render condition query not a predicate");
581 mode = NVC0_3D_COND_MODE_ALWAYS;
582 break;
583 }
584
585 if (wait)
586 nvc0_query_fifo_wait(push, pq);
587
588 PUSH_SPACE(push, 7);
589 PUSH_REFN (push, q->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
590 BEGIN_NVC0(push, NVC0_3D(COND_ADDRESS_HIGH), 3);
591 PUSH_DATAh(push, q->bo->offset + q->offset);
592 PUSH_DATA (push, q->bo->offset + q->offset);
593 PUSH_DATA (push, cond);
594 BEGIN_NVC0(push, NVC0_2D(COND_ADDRESS_HIGH), 2);
595 PUSH_DATAh(push, q->bo->offset + q->offset);
596 PUSH_DATA (push, q->bo->offset + q->offset);
597 }
598
599 void
600 nvc0_query_pushbuf_submit(struct nouveau_pushbuf *push,
601 struct pipe_query *pq, unsigned result_offset)
602 {
603 struct nvc0_query *q = nvc0_query(pq);
604
605 #define NVC0_IB_ENTRY_1_NO_PREFETCH (1 << (31 - 8))
606
607 nouveau_pushbuf_space(push, 0, 0, 1);
608 nouveau_pushbuf_data(push, q->bo, q->offset + result_offset, 4 |
609 NVC0_IB_ENTRY_1_NO_PREFETCH);
610 }
611
612 void
613 nvc0_so_target_save_offset(struct pipe_context *pipe,
614 struct pipe_stream_output_target *ptarg,
615 unsigned index, boolean *serialize)
616 {
617 struct nvc0_so_target *targ = nvc0_so_target(ptarg);
618
619 if (*serialize) {
620 *serialize = FALSE;
621 PUSH_SPACE(nvc0_context(pipe)->base.pushbuf, 1);
622 IMMED_NVC0(nvc0_context(pipe)->base.pushbuf, NVC0_3D(SERIALIZE), 0);
623
624 NOUVEAU_DRV_STAT(nouveau_screen(pipe->screen), gpu_serialize_count, 1);
625 }
626
627 nvc0_query(targ->pq)->index = index;
628
629 nvc0_query_end(pipe, targ->pq);
630 }
631
632
633 /* === DRIVER STATISTICS === */
634
635 #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
636
637 static const char *nvc0_drv_stat_names[] =
638 {
639 "drv-tex_obj_current_count",
640 "drv-tex_obj_current_bytes",
641 "drv-buf_obj_current_count",
642 "drv-buf_obj_current_bytes_vid",
643 "drv-buf_obj_current_bytes_sys",
644 "drv-tex_transfers_rd",
645 "drv-tex_transfers_wr",
646 "drv-tex_copy_count",
647 "drv-tex_blit_count",
648 "drv-tex_cache_flush_count",
649 "drv-buf_transfers_rd",
650 "drv-buf_transfers_wr",
651 "drv-buf_read_bytes_staging_vid",
652 "drv-buf_write_bytes_direct",
653 "drv-buf_write_bytes_staging_vid",
654 "drv-buf_write_bytes_staging_sys",
655 "drv-buf_copy_bytes",
656 "drv-buf_non_kernel_fence_sync_count",
657 "drv-any_non_kernel_fence_sync_count",
658 "drv-query_sync_count",
659 "drv-gpu_serialize_count",
660 "drv-draw_calls_array",
661 "drv-draw_calls_indexed",
662 "drv-draw_calls_fallback_count",
663 "drv-user_buffer_upload_bytes",
664 "drv-constbuf_upload_count",
665 "drv-constbuf_upload_bytes",
666 "drv-pushbuf_count",
667 "drv-resource_validate_count"
668 };
669
670 #endif /* NOUVEAU_ENABLE_DRIVER_STATISTICS */
671
672
673 /* === PERFORMANCE MONITORING COUNTERS for NVE4+ === */
674
675 /* Code to read out MP counters: They are accessible via mmio, too, but let's
676 * just avoid mapping registers in userspace. We'd have to know which MPs are
677 * enabled/present, too, and that information is not presently exposed.
678 * We could add a kernel interface for it, but reading the counters like this
679 * has the advantage of being async (if get_result isn't called immediately).
680 */
681 static const uint64_t nve4_read_mp_pm_counters_code[] =
682 {
683 /* sched 0x20 0x20 0x20 0x20 0x20 0x20 0x20
684 * mov b32 $r8 $tidx
685 * mov b32 $r12 $physid
686 * mov b32 $r0 $pm0
687 * mov b32 $r1 $pm1
688 * mov b32 $r2 $pm2
689 * mov b32 $r3 $pm3
690 * mov b32 $r4 $pm4
691 * sched 0x20 0x20 0x23 0x04 0x20 0x04 0x2b
692 * mov b32 $r5 $pm5
693 * mov b32 $r6 $pm6
694 * mov b32 $r7 $pm7
695 * set $p0 0x1 eq u32 $r8 0x0
696 * mov b32 $r10 c0[0x0]
697 * ext u32 $r8 $r12 0x414
698 * mov b32 $r11 c0[0x4]
699 * sched 0x04 0x2e 0x04 0x20 0x20 0x28 0x04
700 * ext u32 $r9 $r12 0x208
701 * (not $p0) exit
702 * set $p1 0x1 eq u32 $r9 0x0
703 * mul $r8 u32 $r8 u32 96
704 * mul $r12 u32 $r9 u32 16
705 * mul $r13 u32 $r9 u32 4
706 * add b32 $r9 $r8 $r13
707 * sched 0x28 0x04 0x2c 0x04 0x2c 0x04 0x2c
708 * add b32 $r8 $r8 $r12
709 * mov b32 $r12 $r10
710 * add b32 $r10 $c $r10 $r8
711 * mov b32 $r13 $r11
712 * add b32 $r11 $r11 0x0 $c
713 * add b32 $r12 $c $r12 $r9
714 * st b128 wt g[$r10d] $r0q
715 * sched 0x4 0x2c 0x20 0x04 0x2e 0x00 0x00
716 * mov b32 $r0 c0[0x8]
717 * add b32 $r13 $r13 0x0 $c
718 * $p1 st b128 wt g[$r12d+0x40] $r4q
719 * st b32 wt g[$r12d+0x50] $r0
720 * exit */
721 0x2202020202020207ULL,
722 0x2c00000084021c04ULL,
723 0x2c0000000c031c04ULL,
724 0x2c00000010001c04ULL,
725 0x2c00000014005c04ULL,
726 0x2c00000018009c04ULL,
727 0x2c0000001c00dc04ULL,
728 0x2c00000020011c04ULL,
729 0x22b0420042320207ULL,
730 0x2c00000024015c04ULL,
731 0x2c00000028019c04ULL,
732 0x2c0000002c01dc04ULL,
733 0x190e0000fc81dc03ULL,
734 0x2800400000029de4ULL,
735 0x7000c01050c21c03ULL,
736 0x280040001002dde4ULL,
737 0x204282020042e047ULL,
738 0x7000c00820c25c03ULL,
739 0x80000000000021e7ULL,
740 0x190e0000fc93dc03ULL,
741 0x1000000180821c02ULL,
742 0x1000000040931c02ULL,
743 0x1000000010935c02ULL,
744 0x4800000034825c03ULL,
745 0x22c042c042c04287ULL,
746 0x4800000030821c03ULL,
747 0x2800000028031de4ULL,
748 0x4801000020a29c03ULL,
749 0x280000002c035de4ULL,
750 0x0800000000b2dc42ULL,
751 0x4801000024c31c03ULL,
752 0x9400000000a01fc5ULL,
753 0x200002e04202c047ULL,
754 0x2800400020001de4ULL,
755 0x0800000000d35c42ULL,
756 0x9400000100c107c5ULL,
757 0x9400000140c01f85ULL,
758 0x8000000000001de7ULL
759 };
760
761 /* NOTE: intentionally using the same names as NV */
762 static const char *nve4_pm_query_names[] =
763 {
764 /* MP counters */
765 "prof_trigger_00",
766 "prof_trigger_01",
767 "prof_trigger_02",
768 "prof_trigger_03",
769 "prof_trigger_04",
770 "prof_trigger_05",
771 "prof_trigger_06",
772 "prof_trigger_07",
773 "warps_launched",
774 "threads_launched",
775 "sm_cta_launched",
776 "inst_issued1",
777 "inst_issued2",
778 "inst_executed",
779 "local_load",
780 "local_store",
781 "shared_load",
782 "shared_store",
783 "l1_local_load_hit",
784 "l1_local_load_miss",
785 "l1_local_store_hit",
786 "l1_local_store_miss",
787 "gld_request",
788 "gst_request",
789 "l1_global_load_hit",
790 "l1_global_load_miss",
791 "uncached_global_load_transaction",
792 "global_store_transaction",
793 "branch",
794 "divergent_branch",
795 "active_warps",
796 "active_cycles",
797 "inst_issued",
798 "atom_count",
799 "gred_count",
800 "shared_load_replay",
801 "shared_store_replay",
802 "local_load_transactions",
803 "local_store_transactions",
804 "l1_shared_load_transactions",
805 "l1_shared_store_transactions",
806 "global_ld_mem_divergence_replays",
807 "global_st_mem_divergence_replays",
808 /* metrics, i.e. functions of the MP counters */
809 "metric-ipc", /* inst_executed, clock */
810 "metric-ipac", /* inst_executed, active_cycles */
811 "metric-ipec", /* inst_executed, (bool)inst_executed */
812 "metric-achieved_occupancy", /* active_warps, active_cycles */
813 "metric-sm_efficiency", /* active_cycles, clock */
814 "metric-inst_replay_overhead" /* inst_issued, inst_executed */
815 };
816
817 /* For simplicity, we will allocate as many group slots as we allocate counter
818 * slots. This means that a single counter which wants to source from 2 groups
819 * will have to be declared as using 2 counter slots. This shouldn't really be
820 * a problem because such queries don't make much sense ... (unless someone is
821 * really creative).
822 */
823 struct nvc0_mp_counter_cfg
824 {
825 uint32_t func : 16; /* mask or 4-bit logic op (depending on mode) */
826 uint32_t mode : 4; /* LOGOP,B6,LOGOP_B6(_PULSE) */
827 uint32_t num_src : 3; /* number of sources (1 - 6, only for NVC0:NVE4) */
828 uint32_t sig_dom : 1; /* if 0, MP_PM_A (per warp-sched), if 1, MP_PM_B */
829 uint32_t sig_sel : 8; /* signal group */
830 uint64_t src_sel; /* signal selection for up to 6 sources (48 bit) */
831 };
832
833 #define NVC0_COUNTER_OPn_SUM 0
834 #define NVC0_COUNTER_OPn_OR 1
835 #define NVC0_COUNTER_OPn_AND 2
836 #define NVC0_COUNTER_OP2_REL_SUM_MM 3 /* (sum(ctr0) - sum(ctr1)) / sum(ctr0) */
837 #define NVC0_COUNTER_OP2_DIV_SUM_M0 4 /* sum(ctr0) / ctr1 of MP[0]) */
838 #define NVC0_COUNTER_OP2_AVG_DIV_MM 5 /* avg(ctr0 / ctr1) */
839 #define NVC0_COUNTER_OP2_AVG_DIV_M0 6 /* avg(ctr0) / ctr1 of MP[0]) */
840
841 struct nvc0_mp_pm_query_cfg
842 {
843 struct nvc0_mp_counter_cfg ctr[4];
844 uint8_t num_counters;
845 uint8_t op;
846 uint8_t norm[2]; /* normalization num,denom */
847 };
848
849 #define _Q1A(n, f, m, g, s, nu, dn) [NVE4_PM_QUERY_##n] = { { { f, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m, 0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g, s }, {}, {}, {} }, 1, NVC0_COUNTER_OPn_SUM, { nu, dn } }
850 #define _Q1B(n, f, m, g, s, nu, dn) [NVE4_PM_QUERY_##n] = { { { f, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m, 0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g, s }, {}, {}, {} }, 1, NVC0_COUNTER_OPn_SUM, { nu, dn } }
851 #define _M2A(n, f0, m0, g0, s0, f1, m1, g1, s1, o, nu, dn) [NVE4_PM_QUERY_METRIC_##n] = { { \
852 { f0, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m0, 0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g0, s0 }, \
853 { f1, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m1, 0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g1, s1 }, \
854 {}, {}, }, 2, NVC0_COUNTER_OP2_##o, { nu, dn } }
855 #define _M2B(n, f0, m0, g0, s0, f1, m1, g1, s1, o, nu, dn) [NVE4_PM_QUERY_METRIC_##n] = { { \
856 { f0, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m0, 0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g0, s0 }, \
857 { f1, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m1, 0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g1, s1 }, \
858 {}, {}, }, 2, NVC0_COUNTER_OP2_##o, { nu, dn } }
859 #define _M2AB(n, f0, m0, g0, s0, f1, m1, g1, s1, o, nu, dn) [NVE4_PM_QUERY_METRIC_##n] = { { \
860 { f0, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m0, 0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g0, s0 }, \
861 { f1, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m1, 0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g1, s1 }, \
862 {}, {}, }, 2, NVC0_COUNTER_OP2_##o, { nu, dn } }
863
864 /* NOTES:
865 * active_warps: bit 0 alternates btw 0 and 1 for odd nr of warps
866 * inst_executed etc.: we only count a single warp scheduler
867 * metric-ipXc: we simply multiply by 4 to account for the 4 warp schedulers;
868 * this is inaccurate !
869 */
870 static const struct nvc0_mp_pm_query_cfg nve4_mp_pm_queries[] =
871 {
872 _Q1A(PROF_TRIGGER_0, 0x0001, B6, USER, 0x00000000, 1, 1),
873 _Q1A(PROF_TRIGGER_1, 0x0001, B6, USER, 0x00000004, 1, 1),
874 _Q1A(PROF_TRIGGER_2, 0x0001, B6, USER, 0x00000008, 1, 1),
875 _Q1A(PROF_TRIGGER_3, 0x0001, B6, USER, 0x0000000c, 1, 1),
876 _Q1A(PROF_TRIGGER_4, 0x0001, B6, USER, 0x00000010, 1, 1),
877 _Q1A(PROF_TRIGGER_5, 0x0001, B6, USER, 0x00000014, 1, 1),
878 _Q1A(PROF_TRIGGER_6, 0x0001, B6, USER, 0x00000018, 1, 1),
879 _Q1A(PROF_TRIGGER_7, 0x0001, B6, USER, 0x0000001c, 1, 1),
880 _Q1A(LAUNCHED_WARPS, 0x0001, B6, LAUNCH, 0x00000004, 1, 1),
881 _Q1A(LAUNCHED_THREADS, 0x003f, B6, LAUNCH, 0x398a4188, 1, 1),
882 _Q1B(LAUNCHED_CTA, 0x0001, B6, WARP, 0x0000001c, 1, 1),
883 _Q1A(INST_ISSUED1, 0x0001, B6, ISSUE, 0x00000004, 1, 1),
884 _Q1A(INST_ISSUED2, 0x0001, B6, ISSUE, 0x00000008, 1, 1),
885 _Q1A(INST_ISSUED, 0x0003, B6, ISSUE, 0x00000104, 1, 1),
886 _Q1A(INST_EXECUTED, 0x0003, B6, EXEC, 0x00000398, 1, 1),
887 _Q1A(LD_SHARED, 0x0001, B6, LDST, 0x00000000, 1, 1),
888 _Q1A(ST_SHARED, 0x0001, B6, LDST, 0x00000004, 1, 1),
889 _Q1A(LD_LOCAL, 0x0001, B6, LDST, 0x00000008, 1, 1),
890 _Q1A(ST_LOCAL, 0x0001, B6, LDST, 0x0000000c, 1, 1),
891 _Q1A(GLD_REQUEST, 0x0001, B6, LDST, 0x00000010, 1, 1),
892 _Q1A(GST_REQUEST, 0x0001, B6, LDST, 0x00000014, 1, 1),
893 _Q1B(L1_LOCAL_LOAD_HIT, 0x0001, B6, L1, 0x00000000, 1, 1),
894 _Q1B(L1_LOCAL_LOAD_MISS, 0x0001, B6, L1, 0x00000004, 1, 1),
895 _Q1B(L1_LOCAL_STORE_HIT, 0x0001, B6, L1, 0x00000008, 1, 1),
896 _Q1B(L1_LOCAL_STORE_MISS, 0x0001, B6, L1, 0x0000000c, 1, 1),
897 _Q1B(L1_GLOBAL_LOAD_HIT, 0x0001, B6, L1, 0x00000010, 1, 1),
898 _Q1B(L1_GLOBAL_LOAD_MISS, 0x0001, B6, L1, 0x00000014, 1, 1),
899 _Q1B(GLD_TRANSACTIONS_UNCACHED, 0x0001, B6, MEM, 0x00000000, 1, 1),
900 _Q1B(GST_TRANSACTIONS, 0x0001, B6, MEM, 0x00000004, 1, 1),
901 _Q1A(BRANCH, 0x0001, B6, BRANCH, 0x0000000c, 1, 1),
902 _Q1A(BRANCH_DIVERGENT, 0x0001, B6, BRANCH, 0x00000010, 1, 1),
903 _Q1B(ACTIVE_WARPS, 0x003f, B6, WARP, 0x31483104, 2, 1),
904 _Q1B(ACTIVE_CYCLES, 0x0001, B6, WARP, 0x00000000, 1, 1),
905 _Q1A(ATOM_COUNT, 0x0001, B6, BRANCH, 0x00000000, 1, 1),
906 _Q1A(GRED_COUNT, 0x0001, B6, BRANCH, 0x00000008, 1, 1),
907 _Q1B(LD_SHARED_REPLAY, 0x0001, B6, REPLAY, 0x00000008, 1, 1),
908 _Q1B(ST_SHARED_REPLAY, 0x0001, B6, REPLAY, 0x0000000c, 1, 1),
909 _Q1B(LD_LOCAL_TRANSACTIONS, 0x0001, B6, TRANSACTION, 0x00000000, 1, 1),
910 _Q1B(ST_LOCAL_TRANSACTIONS, 0x0001, B6, TRANSACTION, 0x00000004, 1, 1),
911 _Q1B(L1_LD_SHARED_TRANSACTIONS, 0x0001, B6, TRANSACTION, 0x00000008, 1, 1),
912 _Q1B(L1_ST_SHARED_TRANSACTIONS, 0x0001, B6, TRANSACTION, 0x0000000c, 1, 1),
913 _Q1B(GLD_MEM_DIV_REPLAY, 0x0001, B6, REPLAY, 0x00000010, 1, 1),
914 _Q1B(GST_MEM_DIV_REPLAY, 0x0001, B6, REPLAY, 0x00000014, 1, 1),
915 _M2AB(IPC, 0x3, B6, EXEC, 0x398, 0xffff, LOGOP, WARP, 0x0, DIV_SUM_M0, 10, 1),
916 _M2AB(IPAC, 0x3, B6, EXEC, 0x398, 0x1, B6, WARP, 0x0, AVG_DIV_MM, 10, 1),
917 _M2A(IPEC, 0x3, B6, EXEC, 0x398, 0xe, LOGOP, EXEC, 0x398, AVG_DIV_MM, 10, 1),
918 _M2A(INST_REPLAY_OHEAD, 0x3, B6, ISSUE, 0x104, 0x3, B6, EXEC, 0x398, REL_SUM_MM, 100, 1),
919 _M2B(MP_OCCUPANCY, 0x3f, B6, WARP, 0x31483104, 0x01, B6, WARP, 0x0, AVG_DIV_MM, 200, 64),
920 _M2B(MP_EFFICIENCY, 0x01, B6, WARP, 0x0, 0xffff, LOGOP, WARP, 0x0, AVG_DIV_M0, 100, 1),
921 };
922
923 #undef _Q1A
924 #undef _Q1B
925 #undef _M2A
926 #undef _M2B
927
928 /* === PERFORMANCE MONITORING COUNTERS for NVC0:NVE4 === */
929 static const uint64_t nvc0_read_mp_pm_counters_code[] =
930 {
931 /* mov b32 $r8 $tidx
932 * mov b32 $r9 $physid
933 * mov b32 $r0 $pm0
934 * mov b32 $r1 $pm1
935 * mov b32 $r2 $pm2
936 * mov b32 $r3 $pm3
937 * mov b32 $r4 $pm4
938 * mov b32 $r5 $pm5
939 * mov b32 $r6 $pm6
940 * mov b32 $r7 $pm7
941 * set $p0 0x1 eq u32 $r8 0x0
942 * mov b32 $r10 c0[0x0]
943 * mov b32 $r11 c0[0x4]
944 * ext u32 $r8 $r9 0x414
945 * (not $p0) exit
946 * mul $r8 u32 $r8 u32 36
947 * add b32 $r10 $c $r10 $r8
948 * add b32 $r11 $r11 0x0 $c
949 * mov b32 $r8 c0[0x8]
950 * st b128 wt g[$r10d+0x00] $r0q
951 * st b128 wt g[$r10d+0x10] $r4q
952 * st b32 wt g[$r10d+0x20] $r8
953 * exit */
954 0x2c00000084021c04ULL,
955 0x2c0000000c025c04ULL,
956 0x2c00000010001c04ULL,
957 0x2c00000014005c04ULL,
958 0x2c00000018009c04ULL,
959 0x2c0000001c00dc04ULL,
960 0x2c00000020011c04ULL,
961 0x2c00000024015c04ULL,
962 0x2c00000028019c04ULL,
963 0x2c0000002c01dc04ULL,
964 0x190e0000fc81dc03ULL,
965 0x2800400000029de4ULL,
966 0x280040001002dde4ULL,
967 0x7000c01050921c03ULL,
968 0x80000000000021e7ULL,
969 0x1000000090821c02ULL,
970 0x4801000020a29c03ULL,
971 0x0800000000b2dc42ULL,
972 0x2800400020021de4ULL,
973 0x9400000000a01fc5ULL,
974 0x9400000040a11fc5ULL,
975 0x9400000080a21f85ULL,
976 0x8000000000001de7ULL
977 };
978
979 static const char *nvc0_pm_query_names[] =
980 {
981 /* MP counters */
982 "inst_executed",
983 "branch",
984 "divergent_branch",
985 "active_warps",
986 "active_cycles",
987 "warps_launched",
988 "threads_launched",
989 "shared_load",
990 "shared_store",
991 "local_load",
992 "local_store",
993 "gred_count",
994 "atom_count",
995 "gld_request",
996 "gst_request",
997 "inst_issued1_0",
998 "inst_issued1_1",
999 "inst_issued2_0",
1000 "inst_issued2_1",
1001 "thread_inst_executed_0",
1002 "thread_inst_executed_1",
1003 "thread_inst_executed_2",
1004 "thread_inst_executed_3",
1005 "prof_trigger_00",
1006 "prof_trigger_01",
1007 "prof_trigger_02",
1008 "prof_trigger_03",
1009 "prof_trigger_04",
1010 "prof_trigger_05",
1011 "prof_trigger_06",
1012 "prof_trigger_07",
1013 };
1014
1015 #define _Q(n, f, m, g, c, s0, s1, s2, s3, s4, s5) [NVC0_PM_QUERY_##n] = { { { f, NVC0_COMPUTE_MP_PM_OP_MODE_##m, c, 0, g, s0|(s1 << 8)|(s2 << 16)|(s3 << 24)|(s4##ULL << 32)|(s5##ULL << 40) }, {}, {}, {} }, 1, NVC0_COUNTER_OPn_SUM, { 1, 1 } }
1016
1017 static const struct nvc0_mp_pm_query_cfg nvc0_mp_pm_queries[] =
1018 {
1019 _Q(INST_EXECUTED, 0xaaaa, LOGOP, 0x2d, 3, 0x00, 0x11, 0x22, 0x00, 0x00, 0x00),
1020 _Q(BRANCH, 0xaaaa, LOGOP, 0x1a, 2, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00),
1021 _Q(BRANCH_DIVERGENT, 0xaaaa, LOGOP, 0x19, 2, 0x20, 0x31, 0x00, 0x00, 0x00, 0x00),
1022 _Q(ACTIVE_WARPS, 0xaaaa, LOGOP, 0x24, 6, 0x10, 0x21, 0x32, 0x43, 0x54, 0x65),
1023 _Q(ACTIVE_CYCLES, 0xaaaa, LOGOP, 0x11, 1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00),
1024 _Q(LAUNCHED_WARPS, 0xaaaa, LOGOP, 0x26, 1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00),
1025 _Q(LAUNCHED_THREADS, 0xaaaa, LOGOP, 0x26, 6, 0x10, 0x21, 0x32, 0x43, 0x54, 0x65),
1026 _Q(LD_SHARED, 0xaaaa, LOGOP, 0x64, 1, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00),
1027 _Q(ST_SHARED, 0xaaaa, LOGOP, 0x64, 1, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00),
1028 _Q(LD_LOCAL, 0xaaaa, LOGOP, 0x64, 1, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00),
1029 _Q(ST_LOCAL, 0xaaaa, LOGOP, 0x64, 1, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00),
1030 _Q(GRED_COUNT, 0xaaaa, LOGOP, 0x63, 1, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00),
1031 _Q(ATOM_COUNT, 0xaaaa, LOGOP, 0x63, 1, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00),
1032 _Q(GLD_REQUEST, 0xaaaa, LOGOP, 0x64, 1, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00),
1033 _Q(GST_REQUEST, 0xaaaa, LOGOP, 0x64, 1, 0x60, 0x00, 0x00, 0x00, 0x00, 0x00),
1034 _Q(INST_ISSUED1_0, 0xaaaa, LOGOP, 0x7e, 1, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00),
1035 _Q(INST_ISSUED1_1, 0xaaaa, LOGOP, 0x7e, 1, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00),
1036 _Q(INST_ISSUED2_0, 0xaaaa, LOGOP, 0x7e, 1, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00),
1037 _Q(INST_ISSUED2_1, 0xaaaa, LOGOP, 0x7e, 1, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00),
1038 _Q(TH_INST_EXECUTED_0, 0xaaaa, LOGOP, 0xa3, 6, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55),
1039 _Q(TH_INST_EXECUTED_1, 0xaaaa, LOGOP, 0xa5, 6, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55),
1040 _Q(TH_INST_EXECUTED_2, 0xaaaa, LOGOP, 0xa4, 6, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55),
1041 _Q(TH_INST_EXECUTED_3, 0xaaaa, LOGOP, 0xa6, 6, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55),
1042 _Q(PROF_TRIGGER_0, 0xaaaa, LOGOP, 0x01, 1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00),
1043 _Q(PROF_TRIGGER_1, 0xaaaa, LOGOP, 0x01, 1, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00),
1044 _Q(PROF_TRIGGER_2, 0xaaaa, LOGOP, 0x01, 1, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00),
1045 _Q(PROF_TRIGGER_3, 0xaaaa, LOGOP, 0x01, 1, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00),
1046 _Q(PROF_TRIGGER_4, 0xaaaa, LOGOP, 0x01, 1, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00),
1047 _Q(PROF_TRIGGER_5, 0xaaaa, LOGOP, 0x01, 1, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00),
1048 _Q(PROF_TRIGGER_6, 0xaaaa, LOGOP, 0x01, 1, 0x60, 0x00, 0x00, 0x00, 0x00, 0x00),
1049 _Q(PROF_TRIGGER_7, 0xaaaa, LOGOP, 0x01, 1, 0x70, 0x00, 0x00, 0x00, 0x00, 0x00),
1050 };
1051
1052 #undef _Q
1053
1054 static const struct nvc0_mp_pm_query_cfg *
1055 nvc0_mp_pm_query_get_cfg(struct nvc0_context *nvc0, struct nvc0_query *q)
1056 {
1057 struct nvc0_screen *screen = nvc0->screen;
1058
1059 if (screen->base.class_3d >= NVE4_3D_CLASS)
1060 return &nve4_mp_pm_queries[q->type - PIPE_QUERY_DRIVER_SPECIFIC];
1061 return &nvc0_mp_pm_queries[q->type - NVC0_PM_QUERY(0)];
1062 }
1063
1064 void
1065 nvc0_mp_pm_query_begin(struct nvc0_context *nvc0, struct nvc0_query *q)
1066 {
1067 struct nvc0_screen *screen = nvc0->screen;
1068 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
1069 const struct nvc0_mp_pm_query_cfg *cfg;
1070 unsigned i, c;
1071 unsigned num_ab[2] = { 0, 0 };
1072
1073 cfg = nvc0_mp_pm_query_get_cfg(nvc0, q);
1074
1075 /* check if we have enough free counter slots */
1076 for (i = 0; i < cfg->num_counters; ++i)
1077 num_ab[cfg->ctr[i].sig_dom]++;
1078
1079 if (screen->pm.num_mp_pm_active[0] + num_ab[0] > 4 ||
1080 screen->pm.num_mp_pm_active[1] + num_ab[1] > 4) {
1081 NOUVEAU_ERR("Not enough free MP counter slots !\n");
1082 return;
1083 }
1084
1085 assert(cfg->num_counters <= 4);
1086 PUSH_SPACE(push, 4 * 8 + 6);
1087
1088 if (!screen->pm.mp_counters_enabled) {
1089 screen->pm.mp_counters_enabled = TRUE;
1090 BEGIN_NVC0(push, SUBC_SW(0x06ac), 1);
1091 PUSH_DATA (push, 0x1fcb);
1092 }
1093
1094 /* set sequence field to 0 (used to check if result is available) */
1095 for (i = 0; i < screen->mp_count; ++i)
1096 q->data[i * 10 + 10] = 0;
1097
1098 for (i = 0; i < cfg->num_counters; ++i) {
1099 const unsigned d = cfg->ctr[i].sig_dom;
1100
1101 if (!screen->pm.num_mp_pm_active[d]) {
1102 uint32_t m = (1 << 22) | (1 << (7 + (8 * !d)));
1103 if (screen->pm.num_mp_pm_active[!d])
1104 m |= 1 << (7 + (8 * d));
1105 BEGIN_NVC0(push, SUBC_SW(0x0600), 1);
1106 PUSH_DATA (push, m);
1107 }
1108 screen->pm.num_mp_pm_active[d]++;
1109
1110 for (c = d * 4; c < (d * 4 + 4); ++c) {
1111 if (!screen->pm.mp_counter[c]) {
1112 q->ctr[i] = c;
1113 screen->pm.mp_counter[c] = (struct pipe_query *)q;
1114 break;
1115 }
1116 }
1117 assert(c <= (d * 4 + 3)); /* must succeed, already checked for space */
1118
1119 /* configure and reset the counter(s) */
1120 if (screen->base.class_3d >= NVE4_3D_CLASS) {
1121 if (d == 0)
1122 BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_A_SIGSEL(c & 3)), 1);
1123 else
1124 BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_B_SIGSEL(c & 3)), 1);
1125 PUSH_DATA (push, cfg->ctr[i].sig_sel);
1126 BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_SRCSEL(c)), 1);
1127 PUSH_DATA (push, cfg->ctr[i].src_sel + 0x2108421 * (c & 3));
1128 BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_FUNC(c)), 1);
1129 PUSH_DATA (push, (cfg->ctr[i].func << 4) | cfg->ctr[i].mode);
1130 BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_SET(c)), 1);
1131 PUSH_DATA (push, 0);
1132 } else {
1133 unsigned s;
1134
1135 for (s = 0; s < cfg->ctr[i].num_src; s++) {
1136 BEGIN_NVC0(push, NVC0_COMPUTE(MP_PM_SIGSEL(s)), 1);
1137 PUSH_DATA (push, cfg->ctr[i].sig_sel);
1138 BEGIN_NVC0(push, NVC0_COMPUTE(MP_PM_SRCSEL(s)), 1);
1139 PUSH_DATA (push, (cfg->ctr[i].src_sel >> (s * 8)) & 0xff);
1140 BEGIN_NVC0(push, NVC0_COMPUTE(MP_PM_OP(s)), 1);
1141 PUSH_DATA (push, (cfg->ctr[i].func << 4) | cfg->ctr[i].mode);
1142 BEGIN_NVC0(push, NVC0_COMPUTE(MP_PM_SET(s)), 1);
1143 PUSH_DATA (push, 0);
1144 }
1145 }
1146 }
1147 }
1148
1149 static void
1150 nvc0_mp_pm_query_end(struct nvc0_context *nvc0, struct nvc0_query *q)
1151 {
1152 struct nvc0_screen *screen = nvc0->screen;
1153 struct pipe_context *pipe = &nvc0->base.pipe;
1154 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
1155 const boolean is_nve4 = screen->base.class_3d >= NVE4_3D_CLASS;
1156 uint32_t mask;
1157 uint32_t input[3];
1158 const uint block[3] = { 32, is_nve4 ? 4 : 1, 1 };
1159 const uint grid[3] = { screen->mp_count, 1, 1 };
1160 unsigned c;
1161 const struct nvc0_mp_pm_query_cfg *cfg;
1162
1163 cfg = nvc0_mp_pm_query_get_cfg(nvc0, q);
1164
1165 if (unlikely(!screen->pm.prog)) {
1166 struct nvc0_program *prog = CALLOC_STRUCT(nvc0_program);
1167 prog->type = PIPE_SHADER_COMPUTE;
1168 prog->translated = TRUE;
1169 prog->num_gprs = 14;
1170 prog->parm_size = 12;
1171 if (is_nve4) {
1172 prog->code = (uint32_t *)nve4_read_mp_pm_counters_code;
1173 prog->code_size = sizeof(nve4_read_mp_pm_counters_code);
1174 } else {
1175 prog->code = (uint32_t *)nvc0_read_mp_pm_counters_code;
1176 prog->code_size = sizeof(nvc0_read_mp_pm_counters_code);
1177 }
1178 screen->pm.prog = prog;
1179 }
1180
1181 /* disable all counting */
1182 PUSH_SPACE(push, 8);
1183 for (c = 0; c < 8; ++c)
1184 if (screen->pm.mp_counter[c]) {
1185 if (is_nve4) {
1186 IMMED_NVC0(push, NVE4_COMPUTE(MP_PM_FUNC(c)), 0);
1187 } else {
1188 IMMED_NVC0(push, NVC0_COMPUTE(MP_PM_OP(c)), 0);
1189 }
1190 }
1191 /* release counters for this query */
1192 for (c = 0; c < 8; ++c) {
1193 if (nvc0_query(screen->pm.mp_counter[c]) == q) {
1194 screen->pm.num_mp_pm_active[c / 4]--;
1195 screen->pm.mp_counter[c] = NULL;
1196 }
1197 }
1198
1199 BCTX_REFN_bo(nvc0->bufctx_cp, CP_QUERY, NOUVEAU_BO_GART | NOUVEAU_BO_WR,
1200 q->bo);
1201
1202 PUSH_SPACE(push, 1);
1203 IMMED_NVC0(push, SUBC_COMPUTE(NV50_GRAPH_SERIALIZE), 0);
1204
1205 pipe->bind_compute_state(pipe, screen->pm.prog);
1206 input[0] = (q->bo->offset + q->base);
1207 input[1] = (q->bo->offset + q->base) >> 32;
1208 input[2] = q->sequence;
1209 pipe->launch_grid(pipe, block, grid, 0, input);
1210
1211 nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_QUERY);
1212
1213 /* re-activate other counters */
1214 PUSH_SPACE(push, 16);
1215 mask = 0;
1216 for (c = 0; c < 8; ++c) {
1217 unsigned i;
1218 q = nvc0_query(screen->pm.mp_counter[c]);
1219 if (!q)
1220 continue;
1221 cfg = nvc0_mp_pm_query_get_cfg(nvc0, q);
1222 for (i = 0; i < cfg->num_counters; ++i) {
1223 if (mask & (1 << q->ctr[i]))
1224 break;
1225 mask |= 1 << q->ctr[i];
1226 if (is_nve4) {
1227 BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_FUNC(q->ctr[i])), 1);
1228 } else {
1229 BEGIN_NVC0(push, NVC0_COMPUTE(MP_PM_OP(q->ctr[i])), 1);
1230 }
1231 PUSH_DATA (push, (cfg->ctr[i].func << 4) | cfg->ctr[i].mode);
1232 }
1233 }
1234 }
1235
1236 static INLINE boolean
1237 nvc0_mp_pm_query_read_data(uint32_t count[32][4],
1238 struct nvc0_context *nvc0, boolean wait,
1239 struct nvc0_query *q,
1240 const struct nvc0_mp_pm_query_cfg *cfg,
1241 unsigned mp_count)
1242 {
1243 unsigned p, c;
1244
1245 for (p = 0; p < mp_count; ++p) {
1246 const unsigned b = (0x24 / 4) * p;
1247
1248 for (c = 0; c < cfg->num_counters; ++c) {
1249 if (q->data[b + 8] != q->sequence) {
1250 if (!wait)
1251 return FALSE;
1252 if (nouveau_bo_wait(q->bo, NOUVEAU_BO_RD, nvc0->base.client))
1253 return FALSE;
1254 }
1255 count[p][c] = q->data[b + q->ctr[c]];
1256 }
1257 }
1258 return TRUE;
1259 }
1260
1261 static INLINE boolean
1262 nve4_mp_pm_query_read_data(uint32_t count[32][4],
1263 struct nvc0_context *nvc0, boolean wait,
1264 struct nvc0_query *q,
1265 const struct nvc0_mp_pm_query_cfg *cfg,
1266 unsigned mp_count)
1267 {
1268 unsigned p, c, d;
1269
1270 for (p = 0; p < mp_count; ++p) {
1271 const unsigned b = (0x60 / 4) * p;
1272
1273 for (c = 0; c < cfg->num_counters; ++c) {
1274 count[p][c] = 0;
1275 for (d = 0; d < ((q->ctr[c] & ~3) ? 1 : 4); ++d) {
1276 if (q->data[b + 20 + d] != q->sequence) {
1277 if (!wait)
1278 return FALSE;
1279 if (nouveau_bo_wait(q->bo, NOUVEAU_BO_RD, nvc0->base.client))
1280 return FALSE;
1281 }
1282 if (q->ctr[c] & ~0x3)
1283 count[p][c] = q->data[b + 16 + (q->ctr[c] & 3)];
1284 else
1285 count[p][c] += q->data[b + d * 4 + q->ctr[c]];
1286 }
1287 }
1288 }
1289 return TRUE;
1290 }
1291
1292 /* Metric calculations:
1293 * sum(x) ... sum of x over all MPs
1294 * avg(x) ... average of x over all MPs
1295 *
1296 * IPC : sum(inst_executed) / clock
1297 * INST_REPLAY_OHEAD: (sum(inst_issued) - sum(inst_executed)) / sum(inst_issued)
1298 * MP_OCCUPANCY : avg((active_warps / 64) / active_cycles)
1299 * MP_EFFICIENCY : avg(active_cycles / clock)
1300 *
1301 * NOTE: Interpretation of IPC requires knowledge of MP count.
1302 */
1303 static boolean
1304 nvc0_mp_pm_query_result(struct nvc0_context *nvc0, struct nvc0_query *q,
1305 void *result, boolean wait)
1306 {
1307 uint32_t count[32][4];
1308 uint64_t value = 0;
1309 unsigned mp_count = MIN2(nvc0->screen->mp_count_compute, 32);
1310 unsigned p, c;
1311 const struct nvc0_mp_pm_query_cfg *cfg;
1312 boolean ret;
1313
1314 cfg = nvc0_mp_pm_query_get_cfg(nvc0, q);
1315
1316 if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS)
1317 ret = nve4_mp_pm_query_read_data(count, nvc0, wait, q, cfg, mp_count);
1318 else
1319 ret = nvc0_mp_pm_query_read_data(count, nvc0, wait, q, cfg, mp_count);
1320 if (!ret)
1321 return FALSE;
1322
1323 if (cfg->op == NVC0_COUNTER_OPn_SUM) {
1324 for (c = 0; c < cfg->num_counters; ++c)
1325 for (p = 0; p < mp_count; ++p)
1326 value += count[p][c];
1327 value = (value * cfg->norm[0]) / cfg->norm[1];
1328 } else
1329 if (cfg->op == NVC0_COUNTER_OPn_OR) {
1330 uint32_t v = 0;
1331 for (c = 0; c < cfg->num_counters; ++c)
1332 for (p = 0; p < mp_count; ++p)
1333 v |= count[p][c];
1334 value = (v * cfg->norm[0]) / cfg->norm[1];
1335 } else
1336 if (cfg->op == NVC0_COUNTER_OPn_AND) {
1337 uint32_t v = ~0;
1338 for (c = 0; c < cfg->num_counters; ++c)
1339 for (p = 0; p < mp_count; ++p)
1340 v &= count[p][c];
1341 value = (v * cfg->norm[0]) / cfg->norm[1];
1342 } else
1343 if (cfg->op == NVC0_COUNTER_OP2_REL_SUM_MM) {
1344 uint64_t v[2] = { 0, 0 };
1345 for (p = 0; p < mp_count; ++p) {
1346 v[0] += count[p][0];
1347 v[1] += count[p][1];
1348 }
1349 if (v[0])
1350 value = ((v[0] - v[1]) * cfg->norm[0]) / (v[0] * cfg->norm[1]);
1351 } else
1352 if (cfg->op == NVC0_COUNTER_OP2_DIV_SUM_M0) {
1353 for (p = 0; p < mp_count; ++p)
1354 value += count[p][0];
1355 if (count[0][1])
1356 value = (value * cfg->norm[0]) / (count[0][1] * cfg->norm[1]);
1357 else
1358 value = 0;
1359 } else
1360 if (cfg->op == NVC0_COUNTER_OP2_AVG_DIV_MM) {
1361 unsigned mp_used = 0;
1362 for (p = 0; p < mp_count; ++p, mp_used += !!count[p][0])
1363 if (count[p][1])
1364 value += (count[p][0] * cfg->norm[0]) / count[p][1];
1365 if (mp_used)
1366 value /= mp_used * cfg->norm[1];
1367 } else
1368 if (cfg->op == NVC0_COUNTER_OP2_AVG_DIV_M0) {
1369 unsigned mp_used = 0;
1370 for (p = 0; p < mp_count; ++p, mp_used += !!count[p][0])
1371 value += count[p][0];
1372 if (count[0][1] && mp_used) {
1373 value *= cfg->norm[0];
1374 value /= count[0][1] * mp_used * cfg->norm[1];
1375 } else {
1376 value = 0;
1377 }
1378 }
1379
1380 *(uint64_t *)result = value;
1381 return TRUE;
1382 }
1383
1384 int
1385 nvc0_screen_get_driver_query_info(struct pipe_screen *pscreen,
1386 unsigned id,
1387 struct pipe_driver_query_info *info)
1388 {
1389 struct nvc0_screen *screen = nvc0_screen(pscreen);
1390 int count = 0;
1391
1392 count += NVC0_QUERY_DRV_STAT_COUNT;
1393
1394 if (screen->base.device->drm_version >= 0x01000101) {
1395 if (screen->base.class_3d >= NVE4_3D_CLASS) {
1396 count += NVE4_PM_QUERY_COUNT;
1397 } else
1398 if (screen->compute) {
1399 count += NVC0_PM_QUERY_COUNT; /* NVC0_COMPUTE is not always enabled */
1400 }
1401 }
1402
1403 if (!info)
1404 return count;
1405
1406 #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
1407 if (id < NVC0_QUERY_DRV_STAT_COUNT) {
1408 info->name = nvc0_drv_stat_names[id];
1409 info->query_type = NVC0_QUERY_DRV_STAT(id);
1410 info->max_value = ~0ULL;
1411 info->uses_byte_units = !!strstr(info->name, "bytes");
1412 return 1;
1413 } else
1414 #endif
1415 if (id < count) {
1416 if (screen->base.class_3d >= NVE4_3D_CLASS) {
1417 info->name = nve4_pm_query_names[id - NVC0_QUERY_DRV_STAT_COUNT];
1418 info->query_type = NVE4_PM_QUERY(id - NVC0_QUERY_DRV_STAT_COUNT);
1419 info->max_value = (id < NVE4_PM_QUERY_METRIC_MP_OCCUPANCY) ?
1420 ~0ULL : 100;
1421 info->uses_byte_units = FALSE;
1422 return 1;
1423 } else
1424 if (screen->compute) {
1425 info->name = nvc0_pm_query_names[id - NVC0_QUERY_DRV_STAT_COUNT];
1426 info->query_type = NVC0_PM_QUERY(id - NVC0_QUERY_DRV_STAT_COUNT);
1427 info->max_value = ~0ULL;
1428 info->uses_byte_units = FALSE;
1429 return 1;
1430 }
1431 }
1432 /* user asked for info about non-existing query */
1433 info->name = "this_is_not_the_query_you_are_looking_for";
1434 info->query_type = 0xdeadd01d;
1435 info->max_value = 0;
1436 info->uses_byte_units = FALSE;
1437 return 0;
1438 }
1439
1440 void
1441 nvc0_init_query_functions(struct nvc0_context *nvc0)
1442 {
1443 struct pipe_context *pipe = &nvc0->base.pipe;
1444
1445 pipe->create_query = nvc0_query_create;
1446 pipe->destroy_query = nvc0_query_destroy;
1447 pipe->begin_query = nvc0_query_begin;
1448 pipe->end_query = nvc0_query_end;
1449 pipe->get_query_result = nvc0_query_result;
1450 pipe->render_condition = nvc0_render_condition;
1451 }