nvc0: use 64-bit math when scaling the query results
[mesa.git] / src / gallium / drivers / nouveau / nvc0 / nvc0_query.c
1 /*
2 * Copyright 2011 Nouveau Project
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Christoph Bumiller
23 */
24
25 #define NVC0_PUSH_EXPLICIT_SPACE_CHECKING
26
27 #include "nvc0/nvc0_context.h"
28 #include "nv_object.xml.h"
29 #include "nvc0/nve4_compute.xml.h"
30 #include "nvc0/nvc0_compute.xml.h"
31
32 #define NVC0_QUERY_STATE_READY 0
33 #define NVC0_QUERY_STATE_ACTIVE 1
34 #define NVC0_QUERY_STATE_ENDED 2
35 #define NVC0_QUERY_STATE_FLUSHED 3
36
37 struct nvc0_query {
38 uint32_t *data;
39 uint16_t type;
40 uint16_t index;
41 int8_t ctr[4];
42 uint32_t sequence;
43 struct nouveau_bo *bo;
44 uint32_t base;
45 uint32_t offset; /* base + i * rotate */
46 uint8_t state;
47 boolean is64bit;
48 uint8_t rotate;
49 int nesting; /* only used for occlusion queries */
50 union {
51 struct nouveau_mm_allocation *mm;
52 uint64_t value;
53 } u;
54 struct nouveau_fence *fence;
55 };
56
57 #define NVC0_QUERY_ALLOC_SPACE 256
58
59 static void nvc0_mp_pm_query_begin(struct nvc0_context *, struct nvc0_query *);
60 static void nvc0_mp_pm_query_end(struct nvc0_context *, struct nvc0_query *);
61 static boolean nvc0_mp_pm_query_result(struct nvc0_context *,
62 struct nvc0_query *, void *, boolean);
63
64 static INLINE struct nvc0_query *
65 nvc0_query(struct pipe_query *pipe)
66 {
67 return (struct nvc0_query *)pipe;
68 }
69
70 static boolean
71 nvc0_query_allocate(struct nvc0_context *nvc0, struct nvc0_query *q, int size)
72 {
73 struct nvc0_screen *screen = nvc0->screen;
74 int ret;
75
76 if (q->bo) {
77 nouveau_bo_ref(NULL, &q->bo);
78 if (q->u.mm) {
79 if (q->state == NVC0_QUERY_STATE_READY)
80 nouveau_mm_free(q->u.mm);
81 else
82 nouveau_fence_work(screen->base.fence.current,
83 nouveau_mm_free_work, q->u.mm);
84 }
85 }
86 if (size) {
87 q->u.mm = nouveau_mm_allocate(screen->base.mm_GART, size, &q->bo, &q->base);
88 if (!q->bo)
89 return FALSE;
90 q->offset = q->base;
91
92 ret = nouveau_bo_map(q->bo, 0, screen->base.client);
93 if (ret) {
94 nvc0_query_allocate(nvc0, q, 0);
95 return FALSE;
96 }
97 q->data = (uint32_t *)((uint8_t *)q->bo->map + q->base);
98 }
99 return TRUE;
100 }
101
102 static void
103 nvc0_query_destroy(struct pipe_context *pipe, struct pipe_query *pq)
104 {
105 nvc0_query_allocate(nvc0_context(pipe), nvc0_query(pq), 0);
106 nouveau_fence_ref(NULL, &nvc0_query(pq)->fence);
107 FREE(nvc0_query(pq));
108 }
109
110 static struct pipe_query *
111 nvc0_query_create(struct pipe_context *pipe, unsigned type, unsigned index)
112 {
113 struct nvc0_context *nvc0 = nvc0_context(pipe);
114 struct nvc0_query *q;
115 unsigned space = NVC0_QUERY_ALLOC_SPACE;
116
117 q = CALLOC_STRUCT(nvc0_query);
118 if (!q)
119 return NULL;
120
121 switch (type) {
122 case PIPE_QUERY_OCCLUSION_COUNTER:
123 case PIPE_QUERY_OCCLUSION_PREDICATE:
124 q->rotate = 32;
125 space = NVC0_QUERY_ALLOC_SPACE;
126 break;
127 case PIPE_QUERY_PIPELINE_STATISTICS:
128 q->is64bit = TRUE;
129 space = 512;
130 break;
131 case PIPE_QUERY_SO_STATISTICS:
132 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
133 q->is64bit = TRUE;
134 space = 64;
135 break;
136 case PIPE_QUERY_PRIMITIVES_GENERATED:
137 case PIPE_QUERY_PRIMITIVES_EMITTED:
138 q->is64bit = TRUE;
139 q->index = index;
140 space = 32;
141 break;
142 case PIPE_QUERY_TIME_ELAPSED:
143 case PIPE_QUERY_TIMESTAMP:
144 case PIPE_QUERY_TIMESTAMP_DISJOINT:
145 case PIPE_QUERY_GPU_FINISHED:
146 space = 32;
147 break;
148 case NVC0_QUERY_TFB_BUFFER_OFFSET:
149 space = 16;
150 break;
151 default:
152 #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
153 if (type >= NVC0_QUERY_DRV_STAT(0) && type <= NVC0_QUERY_DRV_STAT_LAST) {
154 space = 0;
155 q->is64bit = true;
156 q->index = type - NVC0_QUERY_DRV_STAT(0);
157 break;
158 } else
159 #endif
160 if (nvc0->screen->base.device->drm_version >= 0x01000101) {
161 if (type >= NVE4_PM_QUERY(0) && type <= NVE4_PM_QUERY_LAST) {
162 /* for each MP:
163 * [00] = WS0.C0
164 * [04] = WS0.C1
165 * [08] = WS0.C2
166 * [0c] = WS0.C3
167 * [10] = WS1.C0
168 * [14] = WS1.C1
169 * [18] = WS1.C2
170 * [1c] = WS1.C3
171 * [20] = WS2.C0
172 * [24] = WS2.C1
173 * [28] = WS2.C2
174 * [2c] = WS2.C3
175 * [30] = WS3.C0
176 * [34] = WS3.C1
177 * [38] = WS3.C2
178 * [3c] = WS3.C3
179 * [40] = MP.C4
180 * [44] = MP.C5
181 * [48] = MP.C6
182 * [4c] = MP.C7
183 * [50] = WS0.sequence
184 * [54] = WS1.sequence
185 * [58] = WS2.sequence
186 * [5c] = WS3.sequence
187 */
188 space = (4 * 4 + 4 + 4) * nvc0->screen->mp_count * sizeof(uint32_t);
189 break;
190 } else
191 if (type >= NVC0_PM_QUERY(0) && type <= NVC0_PM_QUERY_LAST) {
192 /* for each MP:
193 * [00] = MP.C0
194 * [04] = MP.C1
195 * [08] = MP.C2
196 * [0c] = MP.C3
197 * [10] = MP.C4
198 * [14] = MP.C5
199 * [18] = MP.C6
200 * [1c] = MP.C7
201 * [20] = MP.sequence
202 */
203 space = (8 + 1) * nvc0->screen->mp_count * sizeof(uint32_t);
204 break;
205 }
206 }
207 debug_printf("invalid query type: %u\n", type);
208 FREE(q);
209 return NULL;
210 }
211 if (!nvc0_query_allocate(nvc0, q, space)) {
212 FREE(q);
213 return NULL;
214 }
215
216 q->type = type;
217
218 if (q->rotate) {
219 /* we advance before query_begin ! */
220 q->offset -= q->rotate;
221 q->data -= q->rotate / sizeof(*q->data);
222 } else
223 if (!q->is64bit)
224 q->data[0] = 0; /* initialize sequence */
225
226 return (struct pipe_query *)q;
227 }
228
229 static void
230 nvc0_query_get(struct nouveau_pushbuf *push, struct nvc0_query *q,
231 unsigned offset, uint32_t get)
232 {
233 offset += q->offset;
234
235 PUSH_SPACE(push, 5);
236 PUSH_REFN (push, q->bo, NOUVEAU_BO_GART | NOUVEAU_BO_WR);
237 BEGIN_NVC0(push, NVC0_3D(QUERY_ADDRESS_HIGH), 4);
238 PUSH_DATAh(push, q->bo->offset + offset);
239 PUSH_DATA (push, q->bo->offset + offset);
240 PUSH_DATA (push, q->sequence);
241 PUSH_DATA (push, get);
242 }
243
244 static void
245 nvc0_query_rotate(struct nvc0_context *nvc0, struct nvc0_query *q)
246 {
247 q->offset += q->rotate;
248 q->data += q->rotate / sizeof(*q->data);
249 if (q->offset - q->base == NVC0_QUERY_ALLOC_SPACE)
250 nvc0_query_allocate(nvc0, q, NVC0_QUERY_ALLOC_SPACE);
251 }
252
253 static void
254 nvc0_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
255 {
256 struct nvc0_context *nvc0 = nvc0_context(pipe);
257 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
258 struct nvc0_query *q = nvc0_query(pq);
259
260 /* For occlusion queries we have to change the storage, because a previous
261 * query might set the initial render conition to FALSE even *after* we re-
262 * initialized it to TRUE.
263 */
264 if (q->rotate) {
265 nvc0_query_rotate(nvc0, q);
266
267 /* XXX: can we do this with the GPU, and sync with respect to a previous
268 * query ?
269 */
270 q->data[0] = q->sequence; /* initialize sequence */
271 q->data[1] = 1; /* initial render condition = TRUE */
272 q->data[4] = q->sequence + 1; /* for comparison COND_MODE */
273 q->data[5] = 0;
274 }
275 q->sequence++;
276
277 switch (q->type) {
278 case PIPE_QUERY_OCCLUSION_COUNTER:
279 case PIPE_QUERY_OCCLUSION_PREDICATE:
280 q->nesting = nvc0->screen->num_occlusion_queries_active++;
281 if (q->nesting) {
282 nvc0_query_get(push, q, 0x10, 0x0100f002);
283 } else {
284 PUSH_SPACE(push, 3);
285 BEGIN_NVC0(push, NVC0_3D(COUNTER_RESET), 1);
286 PUSH_DATA (push, NVC0_3D_COUNTER_RESET_SAMPLECNT);
287 IMMED_NVC0(push, NVC0_3D(SAMPLECNT_ENABLE), 1);
288 }
289 break;
290 case PIPE_QUERY_PRIMITIVES_GENERATED:
291 nvc0_query_get(push, q, 0x10, 0x09005002 | (q->index << 5));
292 break;
293 case PIPE_QUERY_PRIMITIVES_EMITTED:
294 nvc0_query_get(push, q, 0x10, 0x05805002 | (q->index << 5));
295 break;
296 case PIPE_QUERY_SO_STATISTICS:
297 nvc0_query_get(push, q, 0x20, 0x05805002 | (q->index << 5));
298 nvc0_query_get(push, q, 0x30, 0x06805002 | (q->index << 5));
299 break;
300 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
301 nvc0_query_get(push, q, 0x10, 0x03005002 | (q->index << 5));
302 break;
303 case PIPE_QUERY_TIME_ELAPSED:
304 nvc0_query_get(push, q, 0x10, 0x00005002);
305 break;
306 case PIPE_QUERY_PIPELINE_STATISTICS:
307 nvc0_query_get(push, q, 0xc0 + 0x00, 0x00801002); /* VFETCH, VERTICES */
308 nvc0_query_get(push, q, 0xc0 + 0x10, 0x01801002); /* VFETCH, PRIMS */
309 nvc0_query_get(push, q, 0xc0 + 0x20, 0x02802002); /* VP, LAUNCHES */
310 nvc0_query_get(push, q, 0xc0 + 0x30, 0x03806002); /* GP, LAUNCHES */
311 nvc0_query_get(push, q, 0xc0 + 0x40, 0x04806002); /* GP, PRIMS_OUT */
312 nvc0_query_get(push, q, 0xc0 + 0x50, 0x07804002); /* RAST, PRIMS_IN */
313 nvc0_query_get(push, q, 0xc0 + 0x60, 0x08804002); /* RAST, PRIMS_OUT */
314 nvc0_query_get(push, q, 0xc0 + 0x70, 0x0980a002); /* ROP, PIXELS */
315 nvc0_query_get(push, q, 0xc0 + 0x80, 0x0d808002); /* TCP, LAUNCHES */
316 nvc0_query_get(push, q, 0xc0 + 0x90, 0x0e809002); /* TEP, LAUNCHES */
317 break;
318 default:
319 #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
320 if (q->type >= NVC0_QUERY_DRV_STAT(0) &&
321 q->type <= NVC0_QUERY_DRV_STAT_LAST) {
322 if (q->index >= 5)
323 q->u.value = nvc0->screen->base.stats.v[q->index];
324 else
325 q->u.value = 0;
326 } else
327 #endif
328 if ((q->type >= NVE4_PM_QUERY(0) && q->type <= NVE4_PM_QUERY_LAST) ||
329 (q->type >= NVC0_PM_QUERY(0) && q->type <= NVC0_PM_QUERY_LAST)) {
330 nvc0_mp_pm_query_begin(nvc0, q);
331 }
332 break;
333 }
334 q->state = NVC0_QUERY_STATE_ACTIVE;
335 }
336
337 static void
338 nvc0_query_end(struct pipe_context *pipe, struct pipe_query *pq)
339 {
340 struct nvc0_context *nvc0 = nvc0_context(pipe);
341 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
342 struct nvc0_query *q = nvc0_query(pq);
343
344 if (q->state != NVC0_QUERY_STATE_ACTIVE) {
345 /* some queries don't require 'begin' to be called (e.g. GPU_FINISHED) */
346 if (q->rotate)
347 nvc0_query_rotate(nvc0, q);
348 q->sequence++;
349 }
350 q->state = NVC0_QUERY_STATE_ENDED;
351
352 switch (q->type) {
353 case PIPE_QUERY_OCCLUSION_COUNTER:
354 case PIPE_QUERY_OCCLUSION_PREDICATE:
355 nvc0_query_get(push, q, 0, 0x0100f002);
356 if (--nvc0->screen->num_occlusion_queries_active == 0) {
357 PUSH_SPACE(push, 1);
358 IMMED_NVC0(push, NVC0_3D(SAMPLECNT_ENABLE), 0);
359 }
360 break;
361 case PIPE_QUERY_PRIMITIVES_GENERATED:
362 nvc0_query_get(push, q, 0, 0x09005002 | (q->index << 5));
363 break;
364 case PIPE_QUERY_PRIMITIVES_EMITTED:
365 nvc0_query_get(push, q, 0, 0x05805002 | (q->index << 5));
366 break;
367 case PIPE_QUERY_SO_STATISTICS:
368 nvc0_query_get(push, q, 0x00, 0x05805002 | (q->index << 5));
369 nvc0_query_get(push, q, 0x10, 0x06805002 | (q->index << 5));
370 break;
371 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
372 /* TODO: How do we sum over all streams for render condition ? */
373 /* PRIMS_DROPPED doesn't write sequence, use a ZERO query to sync on */
374 nvc0_query_get(push, q, 0x00, 0x03005002 | (q->index << 5));
375 nvc0_query_get(push, q, 0x20, 0x00005002);
376 break;
377 case PIPE_QUERY_TIMESTAMP:
378 case PIPE_QUERY_TIME_ELAPSED:
379 nvc0_query_get(push, q, 0, 0x00005002);
380 break;
381 case PIPE_QUERY_GPU_FINISHED:
382 nvc0_query_get(push, q, 0, 0x1000f010);
383 break;
384 case PIPE_QUERY_PIPELINE_STATISTICS:
385 nvc0_query_get(push, q, 0x00, 0x00801002); /* VFETCH, VERTICES */
386 nvc0_query_get(push, q, 0x10, 0x01801002); /* VFETCH, PRIMS */
387 nvc0_query_get(push, q, 0x20, 0x02802002); /* VP, LAUNCHES */
388 nvc0_query_get(push, q, 0x30, 0x03806002); /* GP, LAUNCHES */
389 nvc0_query_get(push, q, 0x40, 0x04806002); /* GP, PRIMS_OUT */
390 nvc0_query_get(push, q, 0x50, 0x07804002); /* RAST, PRIMS_IN */
391 nvc0_query_get(push, q, 0x60, 0x08804002); /* RAST, PRIMS_OUT */
392 nvc0_query_get(push, q, 0x70, 0x0980a002); /* ROP, PIXELS */
393 nvc0_query_get(push, q, 0x80, 0x0d808002); /* TCP, LAUNCHES */
394 nvc0_query_get(push, q, 0x90, 0x0e809002); /* TEP, LAUNCHES */
395 break;
396 case NVC0_QUERY_TFB_BUFFER_OFFSET:
397 /* indexed by TFB buffer instead of by vertex stream */
398 nvc0_query_get(push, q, 0x00, 0x0d005002 | (q->index << 5));
399 break;
400 default:
401 #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
402 if (q->type >= NVC0_QUERY_DRV_STAT(0) &&
403 q->type <= NVC0_QUERY_DRV_STAT_LAST) {
404 q->u.value = nvc0->screen->base.stats.v[q->index] - q->u.value;
405 return;
406 } else
407 #endif
408 if ((q->type >= NVE4_PM_QUERY(0) && q->type <= NVE4_PM_QUERY_LAST) ||
409 (q->type >= NVC0_PM_QUERY(0) && q->type <= NVC0_PM_QUERY_LAST)) {
410 nvc0_mp_pm_query_end(nvc0, q);
411 }
412 break;
413 }
414 if (q->is64bit)
415 nouveau_fence_ref(nvc0->screen->base.fence.current, &q->fence);
416 }
417
418 static INLINE void
419 nvc0_query_update(struct nouveau_client *cli, struct nvc0_query *q)
420 {
421 if (q->is64bit) {
422 if (nouveau_fence_signalled(q->fence))
423 q->state = NVC0_QUERY_STATE_READY;
424 } else {
425 if (q->data[0] == q->sequence)
426 q->state = NVC0_QUERY_STATE_READY;
427 }
428 }
429
430 static boolean
431 nvc0_query_result(struct pipe_context *pipe, struct pipe_query *pq,
432 boolean wait, union pipe_query_result *result)
433 {
434 struct nvc0_context *nvc0 = nvc0_context(pipe);
435 struct nvc0_query *q = nvc0_query(pq);
436 uint64_t *res64 = (uint64_t*)result;
437 uint32_t *res32 = (uint32_t*)result;
438 boolean *res8 = (boolean*)result;
439 uint64_t *data64 = (uint64_t *)q->data;
440 unsigned i;
441
442 #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
443 if (q->type >= NVC0_QUERY_DRV_STAT(0) &&
444 q->type <= NVC0_QUERY_DRV_STAT_LAST) {
445 res64[0] = q->u.value;
446 return TRUE;
447 } else
448 #endif
449 if ((q->type >= NVE4_PM_QUERY(0) && q->type <= NVE4_PM_QUERY_LAST) ||
450 (q->type >= NVC0_PM_QUERY(0) && q->type <= NVC0_PM_QUERY_LAST)) {
451 return nvc0_mp_pm_query_result(nvc0, q, result, wait);
452 }
453
454 if (q->state != NVC0_QUERY_STATE_READY)
455 nvc0_query_update(nvc0->screen->base.client, q);
456
457 if (q->state != NVC0_QUERY_STATE_READY) {
458 if (!wait) {
459 if (q->state != NVC0_QUERY_STATE_FLUSHED) {
460 q->state = NVC0_QUERY_STATE_FLUSHED;
461 /* flush for silly apps that spin on GL_QUERY_RESULT_AVAILABLE */
462 PUSH_KICK(nvc0->base.pushbuf);
463 }
464 return FALSE;
465 }
466 if (nouveau_bo_wait(q->bo, NOUVEAU_BO_RD, nvc0->screen->base.client))
467 return FALSE;
468 NOUVEAU_DRV_STAT(&nvc0->screen->base, query_sync_count, 1);
469 }
470 q->state = NVC0_QUERY_STATE_READY;
471
472 switch (q->type) {
473 case PIPE_QUERY_GPU_FINISHED:
474 res8[0] = TRUE;
475 break;
476 case PIPE_QUERY_OCCLUSION_COUNTER: /* u32 sequence, u32 count, u64 time */
477 res64[0] = q->data[1] - q->data[5];
478 break;
479 case PIPE_QUERY_OCCLUSION_PREDICATE:
480 res8[0] = q->data[1] != q->data[5];
481 break;
482 case PIPE_QUERY_PRIMITIVES_GENERATED: /* u64 count, u64 time */
483 case PIPE_QUERY_PRIMITIVES_EMITTED: /* u64 count, u64 time */
484 res64[0] = data64[0] - data64[2];
485 break;
486 case PIPE_QUERY_SO_STATISTICS:
487 res64[0] = data64[0] - data64[4];
488 res64[1] = data64[2] - data64[6];
489 break;
490 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
491 res8[0] = data64[0] != data64[2];
492 break;
493 case PIPE_QUERY_TIMESTAMP:
494 res64[0] = data64[1];
495 break;
496 case PIPE_QUERY_TIMESTAMP_DISJOINT:
497 res64[0] = 1000000000;
498 res8[8] = FALSE;
499 break;
500 case PIPE_QUERY_TIME_ELAPSED:
501 res64[0] = data64[1] - data64[3];
502 break;
503 case PIPE_QUERY_PIPELINE_STATISTICS:
504 for (i = 0; i < 10; ++i)
505 res64[i] = data64[i * 2] - data64[24 + i * 2];
506 break;
507 case NVC0_QUERY_TFB_BUFFER_OFFSET:
508 res32[0] = q->data[1];
509 break;
510 default:
511 assert(0); /* can't happen, we don't create queries with invalid type */
512 return FALSE;
513 }
514
515 return TRUE;
516 }
517
518 void
519 nvc0_query_fifo_wait(struct nouveau_pushbuf *push, struct pipe_query *pq)
520 {
521 struct nvc0_query *q = nvc0_query(pq);
522 unsigned offset = q->offset;
523
524 if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE) offset += 0x20;
525
526 PUSH_SPACE(push, 5);
527 PUSH_REFN (push, q->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
528 BEGIN_NVC0(push, SUBC_3D(NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH), 4);
529 PUSH_DATAh(push, q->bo->offset + offset);
530 PUSH_DATA (push, q->bo->offset + offset);
531 PUSH_DATA (push, q->sequence);
532 PUSH_DATA (push, (1 << 12) |
533 NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_EQUAL);
534 }
535
536 static void
537 nvc0_render_condition(struct pipe_context *pipe,
538 struct pipe_query *pq,
539 boolean condition, uint mode)
540 {
541 struct nvc0_context *nvc0 = nvc0_context(pipe);
542 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
543 struct nvc0_query *q;
544 uint32_t cond;
545 boolean wait =
546 mode != PIPE_RENDER_COND_NO_WAIT &&
547 mode != PIPE_RENDER_COND_BY_REGION_NO_WAIT;
548
549 if (!pq) {
550 cond = NVC0_3D_COND_MODE_ALWAYS;
551 }
552 else {
553 q = nvc0_query(pq);
554 /* NOTE: comparison of 2 queries only works if both have completed */
555 switch (q->type) {
556 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
557 cond = condition ? NVC0_3D_COND_MODE_EQUAL :
558 NVC0_3D_COND_MODE_NOT_EQUAL;
559 wait = TRUE;
560 break;
561 case PIPE_QUERY_OCCLUSION_COUNTER:
562 case PIPE_QUERY_OCCLUSION_PREDICATE:
563 if (likely(!condition)) {
564 if (unlikely(q->nesting))
565 cond = wait ? NVC0_3D_COND_MODE_NOT_EQUAL :
566 NVC0_3D_COND_MODE_ALWAYS;
567 else
568 cond = NVC0_3D_COND_MODE_RES_NON_ZERO;
569 } else {
570 cond = wait ? NVC0_3D_COND_MODE_EQUAL : NVC0_3D_COND_MODE_ALWAYS;
571 }
572 break;
573 default:
574 assert(!"render condition query not a predicate");
575 cond = NVC0_3D_COND_MODE_ALWAYS;
576 break;
577 }
578 }
579
580 nvc0->cond_query = pq;
581 nvc0->cond_cond = condition;
582 nvc0->cond_condmode = cond;
583 nvc0->cond_mode = mode;
584
585 if (!pq) {
586 PUSH_SPACE(push, 1);
587 IMMED_NVC0(push, NVC0_3D(COND_MODE), cond);
588 return;
589 }
590
591 if (wait)
592 nvc0_query_fifo_wait(push, pq);
593
594 PUSH_SPACE(push, 7);
595 PUSH_REFN (push, q->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
596 BEGIN_NVC0(push, NVC0_3D(COND_ADDRESS_HIGH), 3);
597 PUSH_DATAh(push, q->bo->offset + q->offset);
598 PUSH_DATA (push, q->bo->offset + q->offset);
599 PUSH_DATA (push, cond);
600 BEGIN_NVC0(push, NVC0_2D(COND_ADDRESS_HIGH), 2);
601 PUSH_DATAh(push, q->bo->offset + q->offset);
602 PUSH_DATA (push, q->bo->offset + q->offset);
603 }
604
605 void
606 nvc0_query_pushbuf_submit(struct nouveau_pushbuf *push,
607 struct pipe_query *pq, unsigned result_offset)
608 {
609 struct nvc0_query *q = nvc0_query(pq);
610
611 #define NVC0_IB_ENTRY_1_NO_PREFETCH (1 << (31 - 8))
612
613 nouveau_pushbuf_space(push, 0, 0, 1);
614 nouveau_pushbuf_data(push, q->bo, q->offset + result_offset, 4 |
615 NVC0_IB_ENTRY_1_NO_PREFETCH);
616 }
617
618 void
619 nvc0_so_target_save_offset(struct pipe_context *pipe,
620 struct pipe_stream_output_target *ptarg,
621 unsigned index, boolean *serialize)
622 {
623 struct nvc0_so_target *targ = nvc0_so_target(ptarg);
624
625 if (*serialize) {
626 *serialize = FALSE;
627 PUSH_SPACE(nvc0_context(pipe)->base.pushbuf, 1);
628 IMMED_NVC0(nvc0_context(pipe)->base.pushbuf, NVC0_3D(SERIALIZE), 0);
629
630 NOUVEAU_DRV_STAT(nouveau_screen(pipe->screen), gpu_serialize_count, 1);
631 }
632
633 nvc0_query(targ->pq)->index = index;
634
635 nvc0_query_end(pipe, targ->pq);
636 }
637
638
639 /* === DRIVER STATISTICS === */
640
641 #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
642
643 static const char *nvc0_drv_stat_names[] =
644 {
645 "drv-tex_obj_current_count",
646 "drv-tex_obj_current_bytes",
647 "drv-buf_obj_current_count",
648 "drv-buf_obj_current_bytes_vid",
649 "drv-buf_obj_current_bytes_sys",
650 "drv-tex_transfers_rd",
651 "drv-tex_transfers_wr",
652 "drv-tex_copy_count",
653 "drv-tex_blit_count",
654 "drv-tex_cache_flush_count",
655 "drv-buf_transfers_rd",
656 "drv-buf_transfers_wr",
657 "drv-buf_read_bytes_staging_vid",
658 "drv-buf_write_bytes_direct",
659 "drv-buf_write_bytes_staging_vid",
660 "drv-buf_write_bytes_staging_sys",
661 "drv-buf_copy_bytes",
662 "drv-buf_non_kernel_fence_sync_count",
663 "drv-any_non_kernel_fence_sync_count",
664 "drv-query_sync_count",
665 "drv-gpu_serialize_count",
666 "drv-draw_calls_array",
667 "drv-draw_calls_indexed",
668 "drv-draw_calls_fallback_count",
669 "drv-user_buffer_upload_bytes",
670 "drv-constbuf_upload_count",
671 "drv-constbuf_upload_bytes",
672 "drv-pushbuf_count",
673 "drv-resource_validate_count"
674 };
675
676 #endif /* NOUVEAU_ENABLE_DRIVER_STATISTICS */
677
678
679 /* === PERFORMANCE MONITORING COUNTERS for NVE4+ === */
680
681 /* Code to read out MP counters: They are accessible via mmio, too, but let's
682 * just avoid mapping registers in userspace. We'd have to know which MPs are
683 * enabled/present, too, and that information is not presently exposed.
684 * We could add a kernel interface for it, but reading the counters like this
685 * has the advantage of being async (if get_result isn't called immediately).
686 */
687 static const uint64_t nve4_read_mp_pm_counters_code[] =
688 {
689 /* sched 0x20 0x20 0x20 0x20 0x20 0x20 0x20
690 * mov b32 $r8 $tidx
691 * mov b32 $r12 $physid
692 * mov b32 $r0 $pm0
693 * mov b32 $r1 $pm1
694 * mov b32 $r2 $pm2
695 * mov b32 $r3 $pm3
696 * mov b32 $r4 $pm4
697 * sched 0x20 0x20 0x23 0x04 0x20 0x04 0x2b
698 * mov b32 $r5 $pm5
699 * mov b32 $r6 $pm6
700 * mov b32 $r7 $pm7
701 * set $p0 0x1 eq u32 $r8 0x0
702 * mov b32 $r10 c0[0x0]
703 * ext u32 $r8 $r12 0x414
704 * mov b32 $r11 c0[0x4]
705 * sched 0x04 0x2e 0x04 0x20 0x20 0x28 0x04
706 * ext u32 $r9 $r12 0x208
707 * (not $p0) exit
708 * set $p1 0x1 eq u32 $r9 0x0
709 * mul $r8 u32 $r8 u32 96
710 * mul $r12 u32 $r9 u32 16
711 * mul $r13 u32 $r9 u32 4
712 * add b32 $r9 $r8 $r13
713 * sched 0x28 0x04 0x2c 0x04 0x2c 0x04 0x2c
714 * add b32 $r8 $r8 $r12
715 * mov b32 $r12 $r10
716 * add b32 $r10 $c $r10 $r8
717 * mov b32 $r13 $r11
718 * add b32 $r11 $r11 0x0 $c
719 * add b32 $r12 $c $r12 $r9
720 * st b128 wt g[$r10d] $r0q
721 * sched 0x4 0x2c 0x20 0x04 0x2e 0x00 0x00
722 * mov b32 $r0 c0[0x8]
723 * add b32 $r13 $r13 0x0 $c
724 * $p1 st b128 wt g[$r12d+0x40] $r4q
725 * st b32 wt g[$r12d+0x50] $r0
726 * exit */
727 0x2202020202020207ULL,
728 0x2c00000084021c04ULL,
729 0x2c0000000c031c04ULL,
730 0x2c00000010001c04ULL,
731 0x2c00000014005c04ULL,
732 0x2c00000018009c04ULL,
733 0x2c0000001c00dc04ULL,
734 0x2c00000020011c04ULL,
735 0x22b0420042320207ULL,
736 0x2c00000024015c04ULL,
737 0x2c00000028019c04ULL,
738 0x2c0000002c01dc04ULL,
739 0x190e0000fc81dc03ULL,
740 0x2800400000029de4ULL,
741 0x7000c01050c21c03ULL,
742 0x280040001002dde4ULL,
743 0x204282020042e047ULL,
744 0x7000c00820c25c03ULL,
745 0x80000000000021e7ULL,
746 0x190e0000fc93dc03ULL,
747 0x1000000180821c02ULL,
748 0x1000000040931c02ULL,
749 0x1000000010935c02ULL,
750 0x4800000034825c03ULL,
751 0x22c042c042c04287ULL,
752 0x4800000030821c03ULL,
753 0x2800000028031de4ULL,
754 0x4801000020a29c03ULL,
755 0x280000002c035de4ULL,
756 0x0800000000b2dc42ULL,
757 0x4801000024c31c03ULL,
758 0x9400000000a01fc5ULL,
759 0x200002e04202c047ULL,
760 0x2800400020001de4ULL,
761 0x0800000000d35c42ULL,
762 0x9400000100c107c5ULL,
763 0x9400000140c01f85ULL,
764 0x8000000000001de7ULL
765 };
766
767 /* NOTE: intentionally using the same names as NV */
768 static const char *nve4_pm_query_names[] =
769 {
770 /* MP counters */
771 "prof_trigger_00",
772 "prof_trigger_01",
773 "prof_trigger_02",
774 "prof_trigger_03",
775 "prof_trigger_04",
776 "prof_trigger_05",
777 "prof_trigger_06",
778 "prof_trigger_07",
779 "warps_launched",
780 "threads_launched",
781 "sm_cta_launched",
782 "inst_issued1",
783 "inst_issued2",
784 "inst_executed",
785 "local_load",
786 "local_store",
787 "shared_load",
788 "shared_store",
789 "l1_local_load_hit",
790 "l1_local_load_miss",
791 "l1_local_store_hit",
792 "l1_local_store_miss",
793 "gld_request",
794 "gst_request",
795 "l1_global_load_hit",
796 "l1_global_load_miss",
797 "uncached_global_load_transaction",
798 "global_store_transaction",
799 "branch",
800 "divergent_branch",
801 "active_warps",
802 "active_cycles",
803 "inst_issued",
804 "atom_count",
805 "gred_count",
806 "shared_load_replay",
807 "shared_store_replay",
808 "local_load_transactions",
809 "local_store_transactions",
810 "l1_shared_load_transactions",
811 "l1_shared_store_transactions",
812 "global_ld_mem_divergence_replays",
813 "global_st_mem_divergence_replays",
814 /* metrics, i.e. functions of the MP counters */
815 "metric-ipc", /* inst_executed, clock */
816 "metric-ipac", /* inst_executed, active_cycles */
817 "metric-ipec", /* inst_executed, (bool)inst_executed */
818 "metric-achieved_occupancy", /* active_warps, active_cycles */
819 "metric-sm_efficiency", /* active_cycles, clock */
820 "metric-inst_replay_overhead" /* inst_issued, inst_executed */
821 };
822
823 /* For simplicity, we will allocate as many group slots as we allocate counter
824 * slots. This means that a single counter which wants to source from 2 groups
825 * will have to be declared as using 2 counter slots. This shouldn't really be
826 * a problem because such queries don't make much sense ... (unless someone is
827 * really creative).
828 */
829 struct nvc0_mp_counter_cfg
830 {
831 uint32_t func : 16; /* mask or 4-bit logic op (depending on mode) */
832 uint32_t mode : 4; /* LOGOP,B6,LOGOP_B6(_PULSE) */
833 uint32_t num_src : 3; /* number of sources (1 - 6, only for NVC0:NVE4) */
834 uint32_t sig_dom : 1; /* if 0, MP_PM_A (per warp-sched), if 1, MP_PM_B */
835 uint32_t sig_sel : 8; /* signal group */
836 uint64_t src_sel; /* signal selection for up to 6 sources (48 bit) */
837 };
838
839 #define NVC0_COUNTER_OPn_SUM 0
840 #define NVC0_COUNTER_OPn_OR 1
841 #define NVC0_COUNTER_OPn_AND 2
842 #define NVC0_COUNTER_OP2_REL_SUM_MM 3 /* (sum(ctr0) - sum(ctr1)) / sum(ctr0) */
843 #define NVC0_COUNTER_OP2_DIV_SUM_M0 4 /* sum(ctr0) / ctr1 of MP[0]) */
844 #define NVC0_COUNTER_OP2_AVG_DIV_MM 5 /* avg(ctr0 / ctr1) */
845 #define NVC0_COUNTER_OP2_AVG_DIV_M0 6 /* avg(ctr0) / ctr1 of MP[0]) */
846
847 struct nvc0_mp_pm_query_cfg
848 {
849 struct nvc0_mp_counter_cfg ctr[4];
850 uint8_t num_counters;
851 uint8_t op;
852 uint8_t norm[2]; /* normalization num,denom */
853 };
854
855 #define _Q1A(n, f, m, g, s, nu, dn) [NVE4_PM_QUERY_##n] = { { { f, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m, 0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g, s }, {}, {}, {} }, 1, NVC0_COUNTER_OPn_SUM, { nu, dn } }
856 #define _Q1B(n, f, m, g, s, nu, dn) [NVE4_PM_QUERY_##n] = { { { f, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m, 0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g, s }, {}, {}, {} }, 1, NVC0_COUNTER_OPn_SUM, { nu, dn } }
857 #define _M2A(n, f0, m0, g0, s0, f1, m1, g1, s1, o, nu, dn) [NVE4_PM_QUERY_METRIC_##n] = { { \
858 { f0, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m0, 0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g0, s0 }, \
859 { f1, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m1, 0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g1, s1 }, \
860 {}, {}, }, 2, NVC0_COUNTER_OP2_##o, { nu, dn } }
861 #define _M2B(n, f0, m0, g0, s0, f1, m1, g1, s1, o, nu, dn) [NVE4_PM_QUERY_METRIC_##n] = { { \
862 { f0, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m0, 0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g0, s0 }, \
863 { f1, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m1, 0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g1, s1 }, \
864 {}, {}, }, 2, NVC0_COUNTER_OP2_##o, { nu, dn } }
865 #define _M2AB(n, f0, m0, g0, s0, f1, m1, g1, s1, o, nu, dn) [NVE4_PM_QUERY_METRIC_##n] = { { \
866 { f0, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m0, 0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g0, s0 }, \
867 { f1, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m1, 0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g1, s1 }, \
868 {}, {}, }, 2, NVC0_COUNTER_OP2_##o, { nu, dn } }
869
870 /* NOTES:
871 * active_warps: bit 0 alternates btw 0 and 1 for odd nr of warps
872 * inst_executed etc.: we only count a single warp scheduler
873 * metric-ipXc: we simply multiply by 4 to account for the 4 warp schedulers;
874 * this is inaccurate !
875 */
876 static const struct nvc0_mp_pm_query_cfg nve4_mp_pm_queries[] =
877 {
878 _Q1A(PROF_TRIGGER_0, 0x0001, B6, USER, 0x00000000, 1, 1),
879 _Q1A(PROF_TRIGGER_1, 0x0001, B6, USER, 0x00000004, 1, 1),
880 _Q1A(PROF_TRIGGER_2, 0x0001, B6, USER, 0x00000008, 1, 1),
881 _Q1A(PROF_TRIGGER_3, 0x0001, B6, USER, 0x0000000c, 1, 1),
882 _Q1A(PROF_TRIGGER_4, 0x0001, B6, USER, 0x00000010, 1, 1),
883 _Q1A(PROF_TRIGGER_5, 0x0001, B6, USER, 0x00000014, 1, 1),
884 _Q1A(PROF_TRIGGER_6, 0x0001, B6, USER, 0x00000018, 1, 1),
885 _Q1A(PROF_TRIGGER_7, 0x0001, B6, USER, 0x0000001c, 1, 1),
886 _Q1A(LAUNCHED_WARPS, 0x0001, B6, LAUNCH, 0x00000004, 1, 1),
887 _Q1A(LAUNCHED_THREADS, 0x003f, B6, LAUNCH, 0x398a4188, 1, 1),
888 _Q1B(LAUNCHED_CTA, 0x0001, B6, WARP, 0x0000001c, 1, 1),
889 _Q1A(INST_ISSUED1, 0x0001, B6, ISSUE, 0x00000004, 1, 1),
890 _Q1A(INST_ISSUED2, 0x0001, B6, ISSUE, 0x00000008, 1, 1),
891 _Q1A(INST_ISSUED, 0x0003, B6, ISSUE, 0x00000104, 1, 1),
892 _Q1A(INST_EXECUTED, 0x0003, B6, EXEC, 0x00000398, 1, 1),
893 _Q1A(LD_SHARED, 0x0001, B6, LDST, 0x00000000, 1, 1),
894 _Q1A(ST_SHARED, 0x0001, B6, LDST, 0x00000004, 1, 1),
895 _Q1A(LD_LOCAL, 0x0001, B6, LDST, 0x00000008, 1, 1),
896 _Q1A(ST_LOCAL, 0x0001, B6, LDST, 0x0000000c, 1, 1),
897 _Q1A(GLD_REQUEST, 0x0001, B6, LDST, 0x00000010, 1, 1),
898 _Q1A(GST_REQUEST, 0x0001, B6, LDST, 0x00000014, 1, 1),
899 _Q1B(L1_LOCAL_LOAD_HIT, 0x0001, B6, L1, 0x00000000, 1, 1),
900 _Q1B(L1_LOCAL_LOAD_MISS, 0x0001, B6, L1, 0x00000004, 1, 1),
901 _Q1B(L1_LOCAL_STORE_HIT, 0x0001, B6, L1, 0x00000008, 1, 1),
902 _Q1B(L1_LOCAL_STORE_MISS, 0x0001, B6, L1, 0x0000000c, 1, 1),
903 _Q1B(L1_GLOBAL_LOAD_HIT, 0x0001, B6, L1, 0x00000010, 1, 1),
904 _Q1B(L1_GLOBAL_LOAD_MISS, 0x0001, B6, L1, 0x00000014, 1, 1),
905 _Q1B(GLD_TRANSACTIONS_UNCACHED, 0x0001, B6, MEM, 0x00000000, 1, 1),
906 _Q1B(GST_TRANSACTIONS, 0x0001, B6, MEM, 0x00000004, 1, 1),
907 _Q1A(BRANCH, 0x0001, B6, BRANCH, 0x0000000c, 1, 1),
908 _Q1A(BRANCH_DIVERGENT, 0x0001, B6, BRANCH, 0x00000010, 1, 1),
909 _Q1B(ACTIVE_WARPS, 0x003f, B6, WARP, 0x31483104, 2, 1),
910 _Q1B(ACTIVE_CYCLES, 0x0001, B6, WARP, 0x00000000, 1, 1),
911 _Q1A(ATOM_COUNT, 0x0001, B6, BRANCH, 0x00000000, 1, 1),
912 _Q1A(GRED_COUNT, 0x0001, B6, BRANCH, 0x00000008, 1, 1),
913 _Q1B(LD_SHARED_REPLAY, 0x0001, B6, REPLAY, 0x00000008, 1, 1),
914 _Q1B(ST_SHARED_REPLAY, 0x0001, B6, REPLAY, 0x0000000c, 1, 1),
915 _Q1B(LD_LOCAL_TRANSACTIONS, 0x0001, B6, TRANSACTION, 0x00000000, 1, 1),
916 _Q1B(ST_LOCAL_TRANSACTIONS, 0x0001, B6, TRANSACTION, 0x00000004, 1, 1),
917 _Q1B(L1_LD_SHARED_TRANSACTIONS, 0x0001, B6, TRANSACTION, 0x00000008, 1, 1),
918 _Q1B(L1_ST_SHARED_TRANSACTIONS, 0x0001, B6, TRANSACTION, 0x0000000c, 1, 1),
919 _Q1B(GLD_MEM_DIV_REPLAY, 0x0001, B6, REPLAY, 0x00000010, 1, 1),
920 _Q1B(GST_MEM_DIV_REPLAY, 0x0001, B6, REPLAY, 0x00000014, 1, 1),
921 _M2AB(IPC, 0x3, B6, EXEC, 0x398, 0xffff, LOGOP, WARP, 0x0, DIV_SUM_M0, 10, 1),
922 _M2AB(IPAC, 0x3, B6, EXEC, 0x398, 0x1, B6, WARP, 0x0, AVG_DIV_MM, 10, 1),
923 _M2A(IPEC, 0x3, B6, EXEC, 0x398, 0xe, LOGOP, EXEC, 0x398, AVG_DIV_MM, 10, 1),
924 _M2A(INST_REPLAY_OHEAD, 0x3, B6, ISSUE, 0x104, 0x3, B6, EXEC, 0x398, REL_SUM_MM, 100, 1),
925 _M2B(MP_OCCUPANCY, 0x3f, B6, WARP, 0x31483104, 0x01, B6, WARP, 0x0, AVG_DIV_MM, 200, 64),
926 _M2B(MP_EFFICIENCY, 0x01, B6, WARP, 0x0, 0xffff, LOGOP, WARP, 0x0, AVG_DIV_M0, 100, 1),
927 };
928
929 #undef _Q1A
930 #undef _Q1B
931 #undef _M2A
932 #undef _M2B
933
934 /* === PERFORMANCE MONITORING COUNTERS for NVC0:NVE4 === */
935 static const uint64_t nvc0_read_mp_pm_counters_code[] =
936 {
937 /* mov b32 $r8 $tidx
938 * mov b32 $r9 $physid
939 * mov b32 $r0 $pm0
940 * mov b32 $r1 $pm1
941 * mov b32 $r2 $pm2
942 * mov b32 $r3 $pm3
943 * mov b32 $r4 $pm4
944 * mov b32 $r5 $pm5
945 * mov b32 $r6 $pm6
946 * mov b32 $r7 $pm7
947 * set $p0 0x1 eq u32 $r8 0x0
948 * mov b32 $r10 c0[0x0]
949 * mov b32 $r11 c0[0x4]
950 * ext u32 $r8 $r9 0x414
951 * (not $p0) exit
952 * mul $r8 u32 $r8 u32 36
953 * add b32 $r10 $c $r10 $r8
954 * add b32 $r11 $r11 0x0 $c
955 * mov b32 $r8 c0[0x8]
956 * st b128 wt g[$r10d+0x00] $r0q
957 * st b128 wt g[$r10d+0x10] $r4q
958 * st b32 wt g[$r10d+0x20] $r8
959 * exit */
960 0x2c00000084021c04ULL,
961 0x2c0000000c025c04ULL,
962 0x2c00000010001c04ULL,
963 0x2c00000014005c04ULL,
964 0x2c00000018009c04ULL,
965 0x2c0000001c00dc04ULL,
966 0x2c00000020011c04ULL,
967 0x2c00000024015c04ULL,
968 0x2c00000028019c04ULL,
969 0x2c0000002c01dc04ULL,
970 0x190e0000fc81dc03ULL,
971 0x2800400000029de4ULL,
972 0x280040001002dde4ULL,
973 0x7000c01050921c03ULL,
974 0x80000000000021e7ULL,
975 0x1000000090821c02ULL,
976 0x4801000020a29c03ULL,
977 0x0800000000b2dc42ULL,
978 0x2800400020021de4ULL,
979 0x9400000000a01fc5ULL,
980 0x9400000040a11fc5ULL,
981 0x9400000080a21f85ULL,
982 0x8000000000001de7ULL
983 };
984
985 static const char *nvc0_pm_query_names[] =
986 {
987 /* MP counters */
988 "inst_executed",
989 "branch",
990 "divergent_branch",
991 "active_warps",
992 "active_cycles",
993 "warps_launched",
994 "threads_launched",
995 "shared_load",
996 "shared_store",
997 "local_load",
998 "local_store",
999 "gred_count",
1000 "atom_count",
1001 "gld_request",
1002 "gst_request",
1003 "inst_issued1_0",
1004 "inst_issued1_1",
1005 "inst_issued2_0",
1006 "inst_issued2_1",
1007 "thread_inst_executed_0",
1008 "thread_inst_executed_1",
1009 "thread_inst_executed_2",
1010 "thread_inst_executed_3",
1011 "prof_trigger_00",
1012 "prof_trigger_01",
1013 "prof_trigger_02",
1014 "prof_trigger_03",
1015 "prof_trigger_04",
1016 "prof_trigger_05",
1017 "prof_trigger_06",
1018 "prof_trigger_07",
1019 };
1020
1021 #define _Q(n, f, m, g, c, s0, s1, s2, s3, s4, s5) [NVC0_PM_QUERY_##n] = { { { f, NVC0_COMPUTE_MP_PM_OP_MODE_##m, c, 0, g, s0|(s1 << 8)|(s2 << 16)|(s3 << 24)|(s4##ULL << 32)|(s5##ULL << 40) }, {}, {}, {} }, 1, NVC0_COUNTER_OPn_SUM, { 1, 1 } }
1022
1023 static const struct nvc0_mp_pm_query_cfg nvc0_mp_pm_queries[] =
1024 {
1025 _Q(INST_EXECUTED, 0xaaaa, LOGOP, 0x2d, 3, 0x00, 0x11, 0x22, 0x00, 0x00, 0x00),
1026 _Q(BRANCH, 0xaaaa, LOGOP, 0x1a, 2, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00),
1027 _Q(BRANCH_DIVERGENT, 0xaaaa, LOGOP, 0x19, 2, 0x20, 0x31, 0x00, 0x00, 0x00, 0x00),
1028 _Q(ACTIVE_WARPS, 0xaaaa, LOGOP, 0x24, 6, 0x10, 0x21, 0x32, 0x43, 0x54, 0x65),
1029 _Q(ACTIVE_CYCLES, 0xaaaa, LOGOP, 0x11, 1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00),
1030 _Q(LAUNCHED_WARPS, 0xaaaa, LOGOP, 0x26, 1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00),
1031 _Q(LAUNCHED_THREADS, 0xaaaa, LOGOP, 0x26, 6, 0x10, 0x21, 0x32, 0x43, 0x54, 0x65),
1032 _Q(LD_SHARED, 0xaaaa, LOGOP, 0x64, 1, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00),
1033 _Q(ST_SHARED, 0xaaaa, LOGOP, 0x64, 1, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00),
1034 _Q(LD_LOCAL, 0xaaaa, LOGOP, 0x64, 1, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00),
1035 _Q(ST_LOCAL, 0xaaaa, LOGOP, 0x64, 1, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00),
1036 _Q(GRED_COUNT, 0xaaaa, LOGOP, 0x63, 1, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00),
1037 _Q(ATOM_COUNT, 0xaaaa, LOGOP, 0x63, 1, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00),
1038 _Q(GLD_REQUEST, 0xaaaa, LOGOP, 0x64, 1, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00),
1039 _Q(GST_REQUEST, 0xaaaa, LOGOP, 0x64, 1, 0x60, 0x00, 0x00, 0x00, 0x00, 0x00),
1040 _Q(INST_ISSUED1_0, 0xaaaa, LOGOP, 0x7e, 1, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00),
1041 _Q(INST_ISSUED1_1, 0xaaaa, LOGOP, 0x7e, 1, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00),
1042 _Q(INST_ISSUED2_0, 0xaaaa, LOGOP, 0x7e, 1, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00),
1043 _Q(INST_ISSUED2_1, 0xaaaa, LOGOP, 0x7e, 1, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00),
1044 _Q(TH_INST_EXECUTED_0, 0xaaaa, LOGOP, 0xa3, 6, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55),
1045 _Q(TH_INST_EXECUTED_1, 0xaaaa, LOGOP, 0xa5, 6, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55),
1046 _Q(TH_INST_EXECUTED_2, 0xaaaa, LOGOP, 0xa4, 6, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55),
1047 _Q(TH_INST_EXECUTED_3, 0xaaaa, LOGOP, 0xa6, 6, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55),
1048 _Q(PROF_TRIGGER_0, 0xaaaa, LOGOP, 0x01, 1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00),
1049 _Q(PROF_TRIGGER_1, 0xaaaa, LOGOP, 0x01, 1, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00),
1050 _Q(PROF_TRIGGER_2, 0xaaaa, LOGOP, 0x01, 1, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00),
1051 _Q(PROF_TRIGGER_3, 0xaaaa, LOGOP, 0x01, 1, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00),
1052 _Q(PROF_TRIGGER_4, 0xaaaa, LOGOP, 0x01, 1, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00),
1053 _Q(PROF_TRIGGER_5, 0xaaaa, LOGOP, 0x01, 1, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00),
1054 _Q(PROF_TRIGGER_6, 0xaaaa, LOGOP, 0x01, 1, 0x60, 0x00, 0x00, 0x00, 0x00, 0x00),
1055 _Q(PROF_TRIGGER_7, 0xaaaa, LOGOP, 0x01, 1, 0x70, 0x00, 0x00, 0x00, 0x00, 0x00),
1056 };
1057
1058 #undef _Q
1059
1060 static const struct nvc0_mp_pm_query_cfg *
1061 nvc0_mp_pm_query_get_cfg(struct nvc0_context *nvc0, struct nvc0_query *q)
1062 {
1063 struct nvc0_screen *screen = nvc0->screen;
1064
1065 if (screen->base.class_3d >= NVE4_3D_CLASS)
1066 return &nve4_mp_pm_queries[q->type - PIPE_QUERY_DRIVER_SPECIFIC];
1067 return &nvc0_mp_pm_queries[q->type - NVC0_PM_QUERY(0)];
1068 }
1069
1070 void
1071 nvc0_mp_pm_query_begin(struct nvc0_context *nvc0, struct nvc0_query *q)
1072 {
1073 struct nvc0_screen *screen = nvc0->screen;
1074 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
1075 const boolean is_nve4 = screen->base.class_3d >= NVE4_3D_CLASS;
1076 const struct nvc0_mp_pm_query_cfg *cfg;
1077 unsigned i, c;
1078 unsigned num_ab[2] = { 0, 0 };
1079
1080 cfg = nvc0_mp_pm_query_get_cfg(nvc0, q);
1081
1082 /* check if we have enough free counter slots */
1083 for (i = 0; i < cfg->num_counters; ++i)
1084 num_ab[cfg->ctr[i].sig_dom]++;
1085
1086 if (screen->pm.num_mp_pm_active[0] + num_ab[0] > 4 ||
1087 screen->pm.num_mp_pm_active[1] + num_ab[1] > 4) {
1088 NOUVEAU_ERR("Not enough free MP counter slots !\n");
1089 return;
1090 }
1091
1092 assert(cfg->num_counters <= 4);
1093 PUSH_SPACE(push, 4 * 8 * (is_nve4 ? 1 : 6) + 6);
1094
1095 if (!screen->pm.mp_counters_enabled) {
1096 screen->pm.mp_counters_enabled = TRUE;
1097 BEGIN_NVC0(push, SUBC_SW(0x06ac), 1);
1098 PUSH_DATA (push, 0x1fcb);
1099 }
1100
1101 /* set sequence field to 0 (used to check if result is available) */
1102 for (i = 0; i < screen->mp_count; ++i)
1103 q->data[i * 10 + 10] = 0;
1104
1105 for (i = 0; i < cfg->num_counters; ++i) {
1106 const unsigned d = cfg->ctr[i].sig_dom;
1107
1108 if (!screen->pm.num_mp_pm_active[d]) {
1109 uint32_t m = (1 << 22) | (1 << (7 + (8 * !d)));
1110 if (screen->pm.num_mp_pm_active[!d])
1111 m |= 1 << (7 + (8 * d));
1112 BEGIN_NVC0(push, SUBC_SW(0x0600), 1);
1113 PUSH_DATA (push, m);
1114 }
1115 screen->pm.num_mp_pm_active[d]++;
1116
1117 for (c = d * 4; c < (d * 4 + 4); ++c) {
1118 if (!screen->pm.mp_counter[c]) {
1119 q->ctr[i] = c;
1120 screen->pm.mp_counter[c] = (struct pipe_query *)q;
1121 break;
1122 }
1123 }
1124 assert(c <= (d * 4 + 3)); /* must succeed, already checked for space */
1125
1126 /* configure and reset the counter(s) */
1127 if (is_nve4) {
1128 if (d == 0)
1129 BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_A_SIGSEL(c & 3)), 1);
1130 else
1131 BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_B_SIGSEL(c & 3)), 1);
1132 PUSH_DATA (push, cfg->ctr[i].sig_sel);
1133 BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_SRCSEL(c)), 1);
1134 PUSH_DATA (push, cfg->ctr[i].src_sel + 0x2108421 * (c & 3));
1135 BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_FUNC(c)), 1);
1136 PUSH_DATA (push, (cfg->ctr[i].func << 4) | cfg->ctr[i].mode);
1137 BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_SET(c)), 1);
1138 PUSH_DATA (push, 0);
1139 } else {
1140 unsigned s;
1141
1142 for (s = 0; s < cfg->ctr[i].num_src; s++) {
1143 BEGIN_NVC0(push, NVC0_COMPUTE(MP_PM_SIGSEL(s)), 1);
1144 PUSH_DATA (push, cfg->ctr[i].sig_sel);
1145 BEGIN_NVC0(push, NVC0_COMPUTE(MP_PM_SRCSEL(s)), 1);
1146 PUSH_DATA (push, (cfg->ctr[i].src_sel >> (s * 8)) & 0xff);
1147 BEGIN_NVC0(push, NVC0_COMPUTE(MP_PM_OP(s)), 1);
1148 PUSH_DATA (push, (cfg->ctr[i].func << 4) | cfg->ctr[i].mode);
1149 BEGIN_NVC0(push, NVC0_COMPUTE(MP_PM_SET(s)), 1);
1150 PUSH_DATA (push, 0);
1151 }
1152 }
1153 }
1154 }
1155
1156 static void
1157 nvc0_mp_pm_query_end(struct nvc0_context *nvc0, struct nvc0_query *q)
1158 {
1159 struct nvc0_screen *screen = nvc0->screen;
1160 struct pipe_context *pipe = &nvc0->base.pipe;
1161 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
1162 const boolean is_nve4 = screen->base.class_3d >= NVE4_3D_CLASS;
1163 uint32_t mask;
1164 uint32_t input[3];
1165 const uint block[3] = { 32, is_nve4 ? 4 : 1, 1 };
1166 const uint grid[3] = { screen->mp_count, 1, 1 };
1167 unsigned c;
1168 const struct nvc0_mp_pm_query_cfg *cfg;
1169
1170 cfg = nvc0_mp_pm_query_get_cfg(nvc0, q);
1171
1172 if (unlikely(!screen->pm.prog)) {
1173 struct nvc0_program *prog = CALLOC_STRUCT(nvc0_program);
1174 prog->type = PIPE_SHADER_COMPUTE;
1175 prog->translated = TRUE;
1176 prog->num_gprs = 14;
1177 prog->parm_size = 12;
1178 if (is_nve4) {
1179 prog->code = (uint32_t *)nve4_read_mp_pm_counters_code;
1180 prog->code_size = sizeof(nve4_read_mp_pm_counters_code);
1181 } else {
1182 prog->code = (uint32_t *)nvc0_read_mp_pm_counters_code;
1183 prog->code_size = sizeof(nvc0_read_mp_pm_counters_code);
1184 }
1185 screen->pm.prog = prog;
1186 }
1187
1188 /* disable all counting */
1189 PUSH_SPACE(push, 8);
1190 for (c = 0; c < 8; ++c)
1191 if (screen->pm.mp_counter[c]) {
1192 if (is_nve4) {
1193 IMMED_NVC0(push, NVE4_COMPUTE(MP_PM_FUNC(c)), 0);
1194 } else {
1195 IMMED_NVC0(push, NVC0_COMPUTE(MP_PM_OP(c)), 0);
1196 }
1197 }
1198 /* release counters for this query */
1199 for (c = 0; c < 8; ++c) {
1200 if (nvc0_query(screen->pm.mp_counter[c]) == q) {
1201 screen->pm.num_mp_pm_active[c / 4]--;
1202 screen->pm.mp_counter[c] = NULL;
1203 }
1204 }
1205
1206 BCTX_REFN_bo(nvc0->bufctx_cp, CP_QUERY, NOUVEAU_BO_GART | NOUVEAU_BO_WR,
1207 q->bo);
1208
1209 PUSH_SPACE(push, 1);
1210 IMMED_NVC0(push, SUBC_COMPUTE(NV50_GRAPH_SERIALIZE), 0);
1211
1212 pipe->bind_compute_state(pipe, screen->pm.prog);
1213 input[0] = (q->bo->offset + q->base);
1214 input[1] = (q->bo->offset + q->base) >> 32;
1215 input[2] = q->sequence;
1216 pipe->launch_grid(pipe, block, grid, 0, input);
1217
1218 nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_QUERY);
1219
1220 /* re-activate other counters */
1221 PUSH_SPACE(push, 16);
1222 mask = 0;
1223 for (c = 0; c < 8; ++c) {
1224 unsigned i;
1225 q = nvc0_query(screen->pm.mp_counter[c]);
1226 if (!q)
1227 continue;
1228 cfg = nvc0_mp_pm_query_get_cfg(nvc0, q);
1229 for (i = 0; i < cfg->num_counters; ++i) {
1230 if (mask & (1 << q->ctr[i]))
1231 break;
1232 mask |= 1 << q->ctr[i];
1233 if (is_nve4) {
1234 BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_FUNC(q->ctr[i])), 1);
1235 } else {
1236 BEGIN_NVC0(push, NVC0_COMPUTE(MP_PM_OP(q->ctr[i])), 1);
1237 }
1238 PUSH_DATA (push, (cfg->ctr[i].func << 4) | cfg->ctr[i].mode);
1239 }
1240 }
1241 }
1242
1243 static INLINE boolean
1244 nvc0_mp_pm_query_read_data(uint32_t count[32][4],
1245 struct nvc0_context *nvc0, boolean wait,
1246 struct nvc0_query *q,
1247 const struct nvc0_mp_pm_query_cfg *cfg,
1248 unsigned mp_count)
1249 {
1250 unsigned p, c;
1251
1252 for (p = 0; p < mp_count; ++p) {
1253 const unsigned b = (0x24 / 4) * p;
1254
1255 for (c = 0; c < cfg->num_counters; ++c) {
1256 if (q->data[b + 8] != q->sequence) {
1257 if (!wait)
1258 return FALSE;
1259 if (nouveau_bo_wait(q->bo, NOUVEAU_BO_RD, nvc0->base.client))
1260 return FALSE;
1261 }
1262 count[p][c] = q->data[b + q->ctr[c]];
1263 }
1264 }
1265 return TRUE;
1266 }
1267
1268 static INLINE boolean
1269 nve4_mp_pm_query_read_data(uint32_t count[32][4],
1270 struct nvc0_context *nvc0, boolean wait,
1271 struct nvc0_query *q,
1272 const struct nvc0_mp_pm_query_cfg *cfg,
1273 unsigned mp_count)
1274 {
1275 unsigned p, c, d;
1276
1277 for (p = 0; p < mp_count; ++p) {
1278 const unsigned b = (0x60 / 4) * p;
1279
1280 for (c = 0; c < cfg->num_counters; ++c) {
1281 count[p][c] = 0;
1282 for (d = 0; d < ((q->ctr[c] & ~3) ? 1 : 4); ++d) {
1283 if (q->data[b + 20 + d] != q->sequence) {
1284 if (!wait)
1285 return FALSE;
1286 if (nouveau_bo_wait(q->bo, NOUVEAU_BO_RD, nvc0->base.client))
1287 return FALSE;
1288 }
1289 if (q->ctr[c] & ~0x3)
1290 count[p][c] = q->data[b + 16 + (q->ctr[c] & 3)];
1291 else
1292 count[p][c] += q->data[b + d * 4 + q->ctr[c]];
1293 }
1294 }
1295 }
1296 return TRUE;
1297 }
1298
1299 /* Metric calculations:
1300 * sum(x) ... sum of x over all MPs
1301 * avg(x) ... average of x over all MPs
1302 *
1303 * IPC : sum(inst_executed) / clock
1304 * INST_REPLAY_OHEAD: (sum(inst_issued) - sum(inst_executed)) / sum(inst_issued)
1305 * MP_OCCUPANCY : avg((active_warps / 64) / active_cycles)
1306 * MP_EFFICIENCY : avg(active_cycles / clock)
1307 *
1308 * NOTE: Interpretation of IPC requires knowledge of MP count.
1309 */
1310 static boolean
1311 nvc0_mp_pm_query_result(struct nvc0_context *nvc0, struct nvc0_query *q,
1312 void *result, boolean wait)
1313 {
1314 uint32_t count[32][4];
1315 uint64_t value = 0;
1316 unsigned mp_count = MIN2(nvc0->screen->mp_count_compute, 32);
1317 unsigned p, c;
1318 const struct nvc0_mp_pm_query_cfg *cfg;
1319 boolean ret;
1320
1321 cfg = nvc0_mp_pm_query_get_cfg(nvc0, q);
1322
1323 if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS)
1324 ret = nve4_mp_pm_query_read_data(count, nvc0, wait, q, cfg, mp_count);
1325 else
1326 ret = nvc0_mp_pm_query_read_data(count, nvc0, wait, q, cfg, mp_count);
1327 if (!ret)
1328 return FALSE;
1329
1330 if (cfg->op == NVC0_COUNTER_OPn_SUM) {
1331 for (c = 0; c < cfg->num_counters; ++c)
1332 for (p = 0; p < mp_count; ++p)
1333 value += count[p][c];
1334 value = (value * cfg->norm[0]) / cfg->norm[1];
1335 } else
1336 if (cfg->op == NVC0_COUNTER_OPn_OR) {
1337 uint32_t v = 0;
1338 for (c = 0; c < cfg->num_counters; ++c)
1339 for (p = 0; p < mp_count; ++p)
1340 v |= count[p][c];
1341 value = ((uint64_t)v * cfg->norm[0]) / cfg->norm[1];
1342 } else
1343 if (cfg->op == NVC0_COUNTER_OPn_AND) {
1344 uint32_t v = ~0;
1345 for (c = 0; c < cfg->num_counters; ++c)
1346 for (p = 0; p < mp_count; ++p)
1347 v &= count[p][c];
1348 value = ((uint64_t)v * cfg->norm[0]) / cfg->norm[1];
1349 } else
1350 if (cfg->op == NVC0_COUNTER_OP2_REL_SUM_MM) {
1351 uint64_t v[2] = { 0, 0 };
1352 for (p = 0; p < mp_count; ++p) {
1353 v[0] += count[p][0];
1354 v[1] += count[p][1];
1355 }
1356 if (v[0])
1357 value = ((v[0] - v[1]) * cfg->norm[0]) / (v[0] * cfg->norm[1]);
1358 } else
1359 if (cfg->op == NVC0_COUNTER_OP2_DIV_SUM_M0) {
1360 for (p = 0; p < mp_count; ++p)
1361 value += count[p][0];
1362 if (count[0][1])
1363 value = (value * cfg->norm[0]) / (count[0][1] * cfg->norm[1]);
1364 else
1365 value = 0;
1366 } else
1367 if (cfg->op == NVC0_COUNTER_OP2_AVG_DIV_MM) {
1368 unsigned mp_used = 0;
1369 for (p = 0; p < mp_count; ++p, mp_used += !!count[p][0])
1370 if (count[p][1])
1371 value += (count[p][0] * cfg->norm[0]) / count[p][1];
1372 if (mp_used)
1373 value /= (uint64_t)mp_used * cfg->norm[1];
1374 } else
1375 if (cfg->op == NVC0_COUNTER_OP2_AVG_DIV_M0) {
1376 unsigned mp_used = 0;
1377 for (p = 0; p < mp_count; ++p, mp_used += !!count[p][0])
1378 value += count[p][0];
1379 if (count[0][1] && mp_used) {
1380 value *= cfg->norm[0];
1381 value /= (uint64_t)count[0][1] * mp_used * cfg->norm[1];
1382 } else {
1383 value = 0;
1384 }
1385 }
1386
1387 *(uint64_t *)result = value;
1388 return TRUE;
1389 }
1390
1391 int
1392 nvc0_screen_get_driver_query_info(struct pipe_screen *pscreen,
1393 unsigned id,
1394 struct pipe_driver_query_info *info)
1395 {
1396 struct nvc0_screen *screen = nvc0_screen(pscreen);
1397 int count = 0;
1398
1399 count += NVC0_QUERY_DRV_STAT_COUNT;
1400
1401 if (screen->base.device->drm_version >= 0x01000101) {
1402 if (screen->base.class_3d >= NVE4_3D_CLASS) {
1403 count += NVE4_PM_QUERY_COUNT;
1404 } else
1405 if (screen->compute) {
1406 count += NVC0_PM_QUERY_COUNT; /* NVC0_COMPUTE is not always enabled */
1407 }
1408 }
1409
1410 if (!info)
1411 return count;
1412
1413 #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
1414 if (id < NVC0_QUERY_DRV_STAT_COUNT) {
1415 info->name = nvc0_drv_stat_names[id];
1416 info->query_type = NVC0_QUERY_DRV_STAT(id);
1417 info->max_value = ~0ULL;
1418 info->uses_byte_units = !!strstr(info->name, "bytes");
1419 return 1;
1420 } else
1421 #endif
1422 if (id < count) {
1423 if (screen->base.class_3d >= NVE4_3D_CLASS) {
1424 info->name = nve4_pm_query_names[id - NVC0_QUERY_DRV_STAT_COUNT];
1425 info->query_type = NVE4_PM_QUERY(id - NVC0_QUERY_DRV_STAT_COUNT);
1426 info->max_value = (id < NVE4_PM_QUERY_METRIC_MP_OCCUPANCY) ?
1427 ~0ULL : 100;
1428 info->uses_byte_units = FALSE;
1429 return 1;
1430 } else
1431 if (screen->compute) {
1432 info->name = nvc0_pm_query_names[id - NVC0_QUERY_DRV_STAT_COUNT];
1433 info->query_type = NVC0_PM_QUERY(id - NVC0_QUERY_DRV_STAT_COUNT);
1434 info->max_value = ~0ULL;
1435 info->uses_byte_units = FALSE;
1436 return 1;
1437 }
1438 }
1439 /* user asked for info about non-existing query */
1440 info->name = "this_is_not_the_query_you_are_looking_for";
1441 info->query_type = 0xdeadd01d;
1442 info->max_value = 0;
1443 info->uses_byte_units = FALSE;
1444 return 0;
1445 }
1446
1447 void
1448 nvc0_init_query_functions(struct nvc0_context *nvc0)
1449 {
1450 struct pipe_context *pipe = &nvc0->base.pipe;
1451
1452 pipe->create_query = nvc0_query_create;
1453 pipe->destroy_query = nvc0_query_destroy;
1454 pipe->begin_query = nvc0_query_begin;
1455 pipe->end_query = nvc0_query_end;
1456 pipe->get_query_result = nvc0_query_result;
1457 pipe->render_condition = nvc0_render_condition;
1458 }