nv50,nvc0: Mark PIPE_QUERY_TIMESTAMP_DISJOINT as ready immediately
[mesa.git] / src / gallium / drivers / nouveau / nvc0 / nvc0_query.c
1 /*
2 * Copyright 2011 Nouveau Project
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Christoph Bumiller
23 */
24
25 #define NVC0_PUSH_EXPLICIT_SPACE_CHECKING
26
27 #include "nvc0/nvc0_context.h"
28 #include "nv_object.xml.h"
29 #include "nvc0/nve4_compute.xml.h"
30 #include "nvc0/nvc0_compute.xml.h"
31
32 #define NVC0_QUERY_STATE_READY 0
33 #define NVC0_QUERY_STATE_ACTIVE 1
34 #define NVC0_QUERY_STATE_ENDED 2
35 #define NVC0_QUERY_STATE_FLUSHED 3
36
37 struct nvc0_query {
38 uint32_t *data;
39 uint16_t type;
40 uint16_t index;
41 int8_t ctr[4];
42 uint32_t sequence;
43 struct nouveau_bo *bo;
44 uint32_t base;
45 uint32_t offset; /* base + i * rotate */
46 uint8_t state;
47 boolean is64bit;
48 uint8_t rotate;
49 int nesting; /* only used for occlusion queries */
50 union {
51 struct nouveau_mm_allocation *mm;
52 uint64_t value;
53 } u;
54 struct nouveau_fence *fence;
55 };
56
57 #define NVC0_QUERY_ALLOC_SPACE 256
58
59 static void nvc0_mp_pm_query_begin(struct nvc0_context *, struct nvc0_query *);
60 static void nvc0_mp_pm_query_end(struct nvc0_context *, struct nvc0_query *);
61 static boolean nvc0_mp_pm_query_result(struct nvc0_context *,
62 struct nvc0_query *, void *, boolean);
63
64 static INLINE struct nvc0_query *
65 nvc0_query(struct pipe_query *pipe)
66 {
67 return (struct nvc0_query *)pipe;
68 }
69
70 static boolean
71 nvc0_query_allocate(struct nvc0_context *nvc0, struct nvc0_query *q, int size)
72 {
73 struct nvc0_screen *screen = nvc0->screen;
74 int ret;
75
76 if (q->bo) {
77 nouveau_bo_ref(NULL, &q->bo);
78 if (q->u.mm) {
79 if (q->state == NVC0_QUERY_STATE_READY)
80 nouveau_mm_free(q->u.mm);
81 else
82 nouveau_fence_work(screen->base.fence.current,
83 nouveau_mm_free_work, q->u.mm);
84 }
85 }
86 if (size) {
87 q->u.mm = nouveau_mm_allocate(screen->base.mm_GART, size, &q->bo, &q->base);
88 if (!q->bo)
89 return FALSE;
90 q->offset = q->base;
91
92 ret = nouveau_bo_map(q->bo, 0, screen->base.client);
93 if (ret) {
94 nvc0_query_allocate(nvc0, q, 0);
95 return FALSE;
96 }
97 q->data = (uint32_t *)((uint8_t *)q->bo->map + q->base);
98 }
99 return TRUE;
100 }
101
102 static void
103 nvc0_query_destroy(struct pipe_context *pipe, struct pipe_query *pq)
104 {
105 nvc0_query_allocate(nvc0_context(pipe), nvc0_query(pq), 0);
106 nouveau_fence_ref(NULL, &nvc0_query(pq)->fence);
107 FREE(nvc0_query(pq));
108 }
109
110 static struct pipe_query *
111 nvc0_query_create(struct pipe_context *pipe, unsigned type, unsigned index)
112 {
113 struct nvc0_context *nvc0 = nvc0_context(pipe);
114 struct nvc0_query *q;
115 unsigned space = NVC0_QUERY_ALLOC_SPACE;
116
117 q = CALLOC_STRUCT(nvc0_query);
118 if (!q)
119 return NULL;
120
121 switch (type) {
122 case PIPE_QUERY_OCCLUSION_COUNTER:
123 case PIPE_QUERY_OCCLUSION_PREDICATE:
124 q->rotate = 32;
125 space = NVC0_QUERY_ALLOC_SPACE;
126 break;
127 case PIPE_QUERY_PIPELINE_STATISTICS:
128 q->is64bit = TRUE;
129 space = 512;
130 break;
131 case PIPE_QUERY_SO_STATISTICS:
132 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
133 q->is64bit = TRUE;
134 space = 64;
135 break;
136 case PIPE_QUERY_PRIMITIVES_GENERATED:
137 case PIPE_QUERY_PRIMITIVES_EMITTED:
138 q->is64bit = TRUE;
139 q->index = index;
140 space = 32;
141 break;
142 case PIPE_QUERY_TIME_ELAPSED:
143 case PIPE_QUERY_TIMESTAMP:
144 case PIPE_QUERY_TIMESTAMP_DISJOINT:
145 case PIPE_QUERY_GPU_FINISHED:
146 space = 32;
147 break;
148 case NVC0_QUERY_TFB_BUFFER_OFFSET:
149 space = 16;
150 break;
151 default:
152 #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
153 if (type >= NVC0_QUERY_DRV_STAT(0) && type <= NVC0_QUERY_DRV_STAT_LAST) {
154 space = 0;
155 q->is64bit = true;
156 q->index = type - NVC0_QUERY_DRV_STAT(0);
157 break;
158 } else
159 #endif
160 if (nvc0->screen->base.device->drm_version >= 0x01000101) {
161 if (type >= NVE4_PM_QUERY(0) && type <= NVE4_PM_QUERY_LAST) {
162 /* for each MP:
163 * [00] = WS0.C0
164 * [04] = WS0.C1
165 * [08] = WS0.C2
166 * [0c] = WS0.C3
167 * [10] = WS1.C0
168 * [14] = WS1.C1
169 * [18] = WS1.C2
170 * [1c] = WS1.C3
171 * [20] = WS2.C0
172 * [24] = WS2.C1
173 * [28] = WS2.C2
174 * [2c] = WS2.C3
175 * [30] = WS3.C0
176 * [34] = WS3.C1
177 * [38] = WS3.C2
178 * [3c] = WS3.C3
179 * [40] = MP.C4
180 * [44] = MP.C5
181 * [48] = MP.C6
182 * [4c] = MP.C7
183 * [50] = WS0.sequence
184 * [54] = WS1.sequence
185 * [58] = WS2.sequence
186 * [5c] = WS3.sequence
187 */
188 space = (4 * 4 + 4 + 4) * nvc0->screen->mp_count * sizeof(uint32_t);
189 break;
190 } else
191 if (type >= NVC0_PM_QUERY(0) && type <= NVC0_PM_QUERY_LAST) {
192 /* for each MP:
193 * [00] = MP.C0
194 * [04] = MP.C1
195 * [08] = MP.C2
196 * [0c] = MP.C3
197 * [10] = MP.C4
198 * [14] = MP.C5
199 * [18] = MP.C6
200 * [1c] = MP.C7
201 * [20] = MP.sequence
202 */
203 space = (8 + 1) * nvc0->screen->mp_count * sizeof(uint32_t);
204 break;
205 }
206 }
207 debug_printf("invalid query type: %u\n", type);
208 FREE(q);
209 return NULL;
210 }
211 if (!nvc0_query_allocate(nvc0, q, space)) {
212 FREE(q);
213 return NULL;
214 }
215
216 q->type = type;
217
218 if (q->rotate) {
219 /* we advance before query_begin ! */
220 q->offset -= q->rotate;
221 q->data -= q->rotate / sizeof(*q->data);
222 } else
223 if (!q->is64bit)
224 q->data[0] = 0; /* initialize sequence */
225
226 return (struct pipe_query *)q;
227 }
228
229 static void
230 nvc0_query_get(struct nouveau_pushbuf *push, struct nvc0_query *q,
231 unsigned offset, uint32_t get)
232 {
233 offset += q->offset;
234
235 PUSH_SPACE(push, 5);
236 PUSH_REFN (push, q->bo, NOUVEAU_BO_GART | NOUVEAU_BO_WR);
237 BEGIN_NVC0(push, NVC0_3D(QUERY_ADDRESS_HIGH), 4);
238 PUSH_DATAh(push, q->bo->offset + offset);
239 PUSH_DATA (push, q->bo->offset + offset);
240 PUSH_DATA (push, q->sequence);
241 PUSH_DATA (push, get);
242 }
243
244 static void
245 nvc0_query_rotate(struct nvc0_context *nvc0, struct nvc0_query *q)
246 {
247 q->offset += q->rotate;
248 q->data += q->rotate / sizeof(*q->data);
249 if (q->offset - q->base == NVC0_QUERY_ALLOC_SPACE)
250 nvc0_query_allocate(nvc0, q, NVC0_QUERY_ALLOC_SPACE);
251 }
252
253 static void
254 nvc0_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
255 {
256 struct nvc0_context *nvc0 = nvc0_context(pipe);
257 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
258 struct nvc0_query *q = nvc0_query(pq);
259
260 /* For occlusion queries we have to change the storage, because a previous
261 * query might set the initial render conition to FALSE even *after* we re-
262 * initialized it to TRUE.
263 */
264 if (q->rotate) {
265 nvc0_query_rotate(nvc0, q);
266
267 /* XXX: can we do this with the GPU, and sync with respect to a previous
268 * query ?
269 */
270 q->data[0] = q->sequence; /* initialize sequence */
271 q->data[1] = 1; /* initial render condition = TRUE */
272 q->data[4] = q->sequence + 1; /* for comparison COND_MODE */
273 q->data[5] = 0;
274 }
275 q->sequence++;
276
277 switch (q->type) {
278 case PIPE_QUERY_OCCLUSION_COUNTER:
279 case PIPE_QUERY_OCCLUSION_PREDICATE:
280 q->nesting = nvc0->screen->num_occlusion_queries_active++;
281 if (q->nesting) {
282 nvc0_query_get(push, q, 0x10, 0x0100f002);
283 } else {
284 PUSH_SPACE(push, 3);
285 BEGIN_NVC0(push, NVC0_3D(COUNTER_RESET), 1);
286 PUSH_DATA (push, NVC0_3D_COUNTER_RESET_SAMPLECNT);
287 IMMED_NVC0(push, NVC0_3D(SAMPLECNT_ENABLE), 1);
288 }
289 break;
290 case PIPE_QUERY_PRIMITIVES_GENERATED:
291 nvc0_query_get(push, q, 0x10, 0x09005002 | (q->index << 5));
292 break;
293 case PIPE_QUERY_PRIMITIVES_EMITTED:
294 nvc0_query_get(push, q, 0x10, 0x05805002 | (q->index << 5));
295 break;
296 case PIPE_QUERY_SO_STATISTICS:
297 nvc0_query_get(push, q, 0x20, 0x05805002 | (q->index << 5));
298 nvc0_query_get(push, q, 0x30, 0x06805002 | (q->index << 5));
299 break;
300 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
301 nvc0_query_get(push, q, 0x10, 0x03005002 | (q->index << 5));
302 break;
303 case PIPE_QUERY_TIME_ELAPSED:
304 nvc0_query_get(push, q, 0x10, 0x00005002);
305 break;
306 case PIPE_QUERY_PIPELINE_STATISTICS:
307 nvc0_query_get(push, q, 0xc0 + 0x00, 0x00801002); /* VFETCH, VERTICES */
308 nvc0_query_get(push, q, 0xc0 + 0x10, 0x01801002); /* VFETCH, PRIMS */
309 nvc0_query_get(push, q, 0xc0 + 0x20, 0x02802002); /* VP, LAUNCHES */
310 nvc0_query_get(push, q, 0xc0 + 0x30, 0x03806002); /* GP, LAUNCHES */
311 nvc0_query_get(push, q, 0xc0 + 0x40, 0x04806002); /* GP, PRIMS_OUT */
312 nvc0_query_get(push, q, 0xc0 + 0x50, 0x07804002); /* RAST, PRIMS_IN */
313 nvc0_query_get(push, q, 0xc0 + 0x60, 0x08804002); /* RAST, PRIMS_OUT */
314 nvc0_query_get(push, q, 0xc0 + 0x70, 0x0980a002); /* ROP, PIXELS */
315 nvc0_query_get(push, q, 0xc0 + 0x80, 0x0d808002); /* TCP, LAUNCHES */
316 nvc0_query_get(push, q, 0xc0 + 0x90, 0x0e809002); /* TEP, LAUNCHES */
317 break;
318 default:
319 #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
320 if (q->type >= NVC0_QUERY_DRV_STAT(0) &&
321 q->type <= NVC0_QUERY_DRV_STAT_LAST) {
322 if (q->index >= 5)
323 q->u.value = nvc0->screen->base.stats.v[q->index];
324 else
325 q->u.value = 0;
326 } else
327 #endif
328 if ((q->type >= NVE4_PM_QUERY(0) && q->type <= NVE4_PM_QUERY_LAST) ||
329 (q->type >= NVC0_PM_QUERY(0) && q->type <= NVC0_PM_QUERY_LAST)) {
330 nvc0_mp_pm_query_begin(nvc0, q);
331 }
332 break;
333 }
334 q->state = NVC0_QUERY_STATE_ACTIVE;
335 }
336
337 static void
338 nvc0_query_end(struct pipe_context *pipe, struct pipe_query *pq)
339 {
340 struct nvc0_context *nvc0 = nvc0_context(pipe);
341 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
342 struct nvc0_query *q = nvc0_query(pq);
343
344 if (q->state != NVC0_QUERY_STATE_ACTIVE) {
345 /* some queries don't require 'begin' to be called (e.g. GPU_FINISHED) */
346 if (q->rotate)
347 nvc0_query_rotate(nvc0, q);
348 q->sequence++;
349 }
350 q->state = NVC0_QUERY_STATE_ENDED;
351
352 switch (q->type) {
353 case PIPE_QUERY_OCCLUSION_COUNTER:
354 case PIPE_QUERY_OCCLUSION_PREDICATE:
355 nvc0_query_get(push, q, 0, 0x0100f002);
356 if (--nvc0->screen->num_occlusion_queries_active == 0) {
357 PUSH_SPACE(push, 1);
358 IMMED_NVC0(push, NVC0_3D(SAMPLECNT_ENABLE), 0);
359 }
360 break;
361 case PIPE_QUERY_PRIMITIVES_GENERATED:
362 nvc0_query_get(push, q, 0, 0x09005002 | (q->index << 5));
363 break;
364 case PIPE_QUERY_PRIMITIVES_EMITTED:
365 nvc0_query_get(push, q, 0, 0x05805002 | (q->index << 5));
366 break;
367 case PIPE_QUERY_SO_STATISTICS:
368 nvc0_query_get(push, q, 0x00, 0x05805002 | (q->index << 5));
369 nvc0_query_get(push, q, 0x10, 0x06805002 | (q->index << 5));
370 break;
371 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
372 /* TODO: How do we sum over all streams for render condition ? */
373 /* PRIMS_DROPPED doesn't write sequence, use a ZERO query to sync on */
374 nvc0_query_get(push, q, 0x00, 0x03005002 | (q->index << 5));
375 nvc0_query_get(push, q, 0x20, 0x00005002);
376 break;
377 case PIPE_QUERY_TIMESTAMP:
378 case PIPE_QUERY_TIME_ELAPSED:
379 nvc0_query_get(push, q, 0, 0x00005002);
380 break;
381 case PIPE_QUERY_GPU_FINISHED:
382 nvc0_query_get(push, q, 0, 0x1000f010);
383 break;
384 case PIPE_QUERY_PIPELINE_STATISTICS:
385 nvc0_query_get(push, q, 0x00, 0x00801002); /* VFETCH, VERTICES */
386 nvc0_query_get(push, q, 0x10, 0x01801002); /* VFETCH, PRIMS */
387 nvc0_query_get(push, q, 0x20, 0x02802002); /* VP, LAUNCHES */
388 nvc0_query_get(push, q, 0x30, 0x03806002); /* GP, LAUNCHES */
389 nvc0_query_get(push, q, 0x40, 0x04806002); /* GP, PRIMS_OUT */
390 nvc0_query_get(push, q, 0x50, 0x07804002); /* RAST, PRIMS_IN */
391 nvc0_query_get(push, q, 0x60, 0x08804002); /* RAST, PRIMS_OUT */
392 nvc0_query_get(push, q, 0x70, 0x0980a002); /* ROP, PIXELS */
393 nvc0_query_get(push, q, 0x80, 0x0d808002); /* TCP, LAUNCHES */
394 nvc0_query_get(push, q, 0x90, 0x0e809002); /* TEP, LAUNCHES */
395 break;
396 case NVC0_QUERY_TFB_BUFFER_OFFSET:
397 /* indexed by TFB buffer instead of by vertex stream */
398 nvc0_query_get(push, q, 0x00, 0x0d005002 | (q->index << 5));
399 break;
400 case PIPE_QUERY_TIMESTAMP_DISJOINT:
401 /* This query is not issued on GPU because disjoint is forced to FALSE */
402 q->state = NVC0_QUERY_STATE_READY;
403 break;
404 default:
405 #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
406 if (q->type >= NVC0_QUERY_DRV_STAT(0) &&
407 q->type <= NVC0_QUERY_DRV_STAT_LAST) {
408 q->u.value = nvc0->screen->base.stats.v[q->index] - q->u.value;
409 return;
410 } else
411 #endif
412 if ((q->type >= NVE4_PM_QUERY(0) && q->type <= NVE4_PM_QUERY_LAST) ||
413 (q->type >= NVC0_PM_QUERY(0) && q->type <= NVC0_PM_QUERY_LAST)) {
414 nvc0_mp_pm_query_end(nvc0, q);
415 }
416 break;
417 }
418 if (q->is64bit)
419 nouveau_fence_ref(nvc0->screen->base.fence.current, &q->fence);
420 }
421
422 static INLINE void
423 nvc0_query_update(struct nouveau_client *cli, struct nvc0_query *q)
424 {
425 if (q->is64bit) {
426 if (nouveau_fence_signalled(q->fence))
427 q->state = NVC0_QUERY_STATE_READY;
428 } else {
429 if (q->data[0] == q->sequence)
430 q->state = NVC0_QUERY_STATE_READY;
431 }
432 }
433
434 static boolean
435 nvc0_query_result(struct pipe_context *pipe, struct pipe_query *pq,
436 boolean wait, union pipe_query_result *result)
437 {
438 struct nvc0_context *nvc0 = nvc0_context(pipe);
439 struct nvc0_query *q = nvc0_query(pq);
440 uint64_t *res64 = (uint64_t*)result;
441 uint32_t *res32 = (uint32_t*)result;
442 boolean *res8 = (boolean*)result;
443 uint64_t *data64 = (uint64_t *)q->data;
444 unsigned i;
445
446 #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
447 if (q->type >= NVC0_QUERY_DRV_STAT(0) &&
448 q->type <= NVC0_QUERY_DRV_STAT_LAST) {
449 res64[0] = q->u.value;
450 return TRUE;
451 } else
452 #endif
453 if ((q->type >= NVE4_PM_QUERY(0) && q->type <= NVE4_PM_QUERY_LAST) ||
454 (q->type >= NVC0_PM_QUERY(0) && q->type <= NVC0_PM_QUERY_LAST)) {
455 return nvc0_mp_pm_query_result(nvc0, q, result, wait);
456 }
457
458 if (q->state != NVC0_QUERY_STATE_READY)
459 nvc0_query_update(nvc0->screen->base.client, q);
460
461 if (q->state != NVC0_QUERY_STATE_READY) {
462 if (!wait) {
463 if (q->state != NVC0_QUERY_STATE_FLUSHED) {
464 q->state = NVC0_QUERY_STATE_FLUSHED;
465 /* flush for silly apps that spin on GL_QUERY_RESULT_AVAILABLE */
466 PUSH_KICK(nvc0->base.pushbuf);
467 }
468 return FALSE;
469 }
470 if (nouveau_bo_wait(q->bo, NOUVEAU_BO_RD, nvc0->screen->base.client))
471 return FALSE;
472 NOUVEAU_DRV_STAT(&nvc0->screen->base, query_sync_count, 1);
473 }
474 q->state = NVC0_QUERY_STATE_READY;
475
476 switch (q->type) {
477 case PIPE_QUERY_GPU_FINISHED:
478 res8[0] = TRUE;
479 break;
480 case PIPE_QUERY_OCCLUSION_COUNTER: /* u32 sequence, u32 count, u64 time */
481 res64[0] = q->data[1] - q->data[5];
482 break;
483 case PIPE_QUERY_OCCLUSION_PREDICATE:
484 res8[0] = q->data[1] != q->data[5];
485 break;
486 case PIPE_QUERY_PRIMITIVES_GENERATED: /* u64 count, u64 time */
487 case PIPE_QUERY_PRIMITIVES_EMITTED: /* u64 count, u64 time */
488 res64[0] = data64[0] - data64[2];
489 break;
490 case PIPE_QUERY_SO_STATISTICS:
491 res64[0] = data64[0] - data64[4];
492 res64[1] = data64[2] - data64[6];
493 break;
494 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
495 res8[0] = data64[0] != data64[2];
496 break;
497 case PIPE_QUERY_TIMESTAMP:
498 res64[0] = data64[1];
499 break;
500 case PIPE_QUERY_TIMESTAMP_DISJOINT:
501 res64[0] = 1000000000;
502 res8[8] = FALSE;
503 break;
504 case PIPE_QUERY_TIME_ELAPSED:
505 res64[0] = data64[1] - data64[3];
506 break;
507 case PIPE_QUERY_PIPELINE_STATISTICS:
508 for (i = 0; i < 10; ++i)
509 res64[i] = data64[i * 2] - data64[24 + i * 2];
510 break;
511 case NVC0_QUERY_TFB_BUFFER_OFFSET:
512 res32[0] = q->data[1];
513 break;
514 default:
515 assert(0); /* can't happen, we don't create queries with invalid type */
516 return FALSE;
517 }
518
519 return TRUE;
520 }
521
522 void
523 nvc0_query_fifo_wait(struct nouveau_pushbuf *push, struct pipe_query *pq)
524 {
525 struct nvc0_query *q = nvc0_query(pq);
526 unsigned offset = q->offset;
527
528 if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE) offset += 0x20;
529
530 PUSH_SPACE(push, 5);
531 PUSH_REFN (push, q->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
532 BEGIN_NVC0(push, SUBC_3D(NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH), 4);
533 PUSH_DATAh(push, q->bo->offset + offset);
534 PUSH_DATA (push, q->bo->offset + offset);
535 PUSH_DATA (push, q->sequence);
536 PUSH_DATA (push, (1 << 12) |
537 NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_EQUAL);
538 }
539
540 static void
541 nvc0_render_condition(struct pipe_context *pipe,
542 struct pipe_query *pq,
543 boolean condition, uint mode)
544 {
545 struct nvc0_context *nvc0 = nvc0_context(pipe);
546 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
547 struct nvc0_query *q;
548 uint32_t cond;
549 boolean wait =
550 mode != PIPE_RENDER_COND_NO_WAIT &&
551 mode != PIPE_RENDER_COND_BY_REGION_NO_WAIT;
552
553 if (!pq) {
554 cond = NVC0_3D_COND_MODE_ALWAYS;
555 }
556 else {
557 q = nvc0_query(pq);
558 /* NOTE: comparison of 2 queries only works if both have completed */
559 switch (q->type) {
560 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
561 cond = condition ? NVC0_3D_COND_MODE_EQUAL :
562 NVC0_3D_COND_MODE_NOT_EQUAL;
563 wait = TRUE;
564 break;
565 case PIPE_QUERY_OCCLUSION_COUNTER:
566 case PIPE_QUERY_OCCLUSION_PREDICATE:
567 if (likely(!condition)) {
568 if (unlikely(q->nesting))
569 cond = wait ? NVC0_3D_COND_MODE_NOT_EQUAL :
570 NVC0_3D_COND_MODE_ALWAYS;
571 else
572 cond = NVC0_3D_COND_MODE_RES_NON_ZERO;
573 } else {
574 cond = wait ? NVC0_3D_COND_MODE_EQUAL : NVC0_3D_COND_MODE_ALWAYS;
575 }
576 break;
577 default:
578 assert(!"render condition query not a predicate");
579 cond = NVC0_3D_COND_MODE_ALWAYS;
580 break;
581 }
582 }
583
584 nvc0->cond_query = pq;
585 nvc0->cond_cond = condition;
586 nvc0->cond_condmode = cond;
587 nvc0->cond_mode = mode;
588
589 if (!pq) {
590 PUSH_SPACE(push, 1);
591 IMMED_NVC0(push, NVC0_3D(COND_MODE), cond);
592 return;
593 }
594
595 if (wait)
596 nvc0_query_fifo_wait(push, pq);
597
598 PUSH_SPACE(push, 7);
599 PUSH_REFN (push, q->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
600 BEGIN_NVC0(push, NVC0_3D(COND_ADDRESS_HIGH), 3);
601 PUSH_DATAh(push, q->bo->offset + q->offset);
602 PUSH_DATA (push, q->bo->offset + q->offset);
603 PUSH_DATA (push, cond);
604 BEGIN_NVC0(push, NVC0_2D(COND_ADDRESS_HIGH), 2);
605 PUSH_DATAh(push, q->bo->offset + q->offset);
606 PUSH_DATA (push, q->bo->offset + q->offset);
607 }
608
609 void
610 nvc0_query_pushbuf_submit(struct nouveau_pushbuf *push,
611 struct pipe_query *pq, unsigned result_offset)
612 {
613 struct nvc0_query *q = nvc0_query(pq);
614
615 #define NVC0_IB_ENTRY_1_NO_PREFETCH (1 << (31 - 8))
616
617 nouveau_pushbuf_space(push, 0, 0, 1);
618 nouveau_pushbuf_data(push, q->bo, q->offset + result_offset, 4 |
619 NVC0_IB_ENTRY_1_NO_PREFETCH);
620 }
621
622 void
623 nvc0_so_target_save_offset(struct pipe_context *pipe,
624 struct pipe_stream_output_target *ptarg,
625 unsigned index, boolean *serialize)
626 {
627 struct nvc0_so_target *targ = nvc0_so_target(ptarg);
628
629 if (*serialize) {
630 *serialize = FALSE;
631 PUSH_SPACE(nvc0_context(pipe)->base.pushbuf, 1);
632 IMMED_NVC0(nvc0_context(pipe)->base.pushbuf, NVC0_3D(SERIALIZE), 0);
633
634 NOUVEAU_DRV_STAT(nouveau_screen(pipe->screen), gpu_serialize_count, 1);
635 }
636
637 nvc0_query(targ->pq)->index = index;
638
639 nvc0_query_end(pipe, targ->pq);
640 }
641
642
643 /* === DRIVER STATISTICS === */
644
645 #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
646
647 static const char *nvc0_drv_stat_names[] =
648 {
649 "drv-tex_obj_current_count",
650 "drv-tex_obj_current_bytes",
651 "drv-buf_obj_current_count",
652 "drv-buf_obj_current_bytes_vid",
653 "drv-buf_obj_current_bytes_sys",
654 "drv-tex_transfers_rd",
655 "drv-tex_transfers_wr",
656 "drv-tex_copy_count",
657 "drv-tex_blit_count",
658 "drv-tex_cache_flush_count",
659 "drv-buf_transfers_rd",
660 "drv-buf_transfers_wr",
661 "drv-buf_read_bytes_staging_vid",
662 "drv-buf_write_bytes_direct",
663 "drv-buf_write_bytes_staging_vid",
664 "drv-buf_write_bytes_staging_sys",
665 "drv-buf_copy_bytes",
666 "drv-buf_non_kernel_fence_sync_count",
667 "drv-any_non_kernel_fence_sync_count",
668 "drv-query_sync_count",
669 "drv-gpu_serialize_count",
670 "drv-draw_calls_array",
671 "drv-draw_calls_indexed",
672 "drv-draw_calls_fallback_count",
673 "drv-user_buffer_upload_bytes",
674 "drv-constbuf_upload_count",
675 "drv-constbuf_upload_bytes",
676 "drv-pushbuf_count",
677 "drv-resource_validate_count"
678 };
679
680 #endif /* NOUVEAU_ENABLE_DRIVER_STATISTICS */
681
682
683 /* === PERFORMANCE MONITORING COUNTERS for NVE4+ === */
684
685 /* Code to read out MP counters: They are accessible via mmio, too, but let's
686 * just avoid mapping registers in userspace. We'd have to know which MPs are
687 * enabled/present, too, and that information is not presently exposed.
688 * We could add a kernel interface for it, but reading the counters like this
689 * has the advantage of being async (if get_result isn't called immediately).
690 */
691 static const uint64_t nve4_read_mp_pm_counters_code[] =
692 {
693 /* sched 0x20 0x20 0x20 0x20 0x20 0x20 0x20
694 * mov b32 $r8 $tidx
695 * mov b32 $r12 $physid
696 * mov b32 $r0 $pm0
697 * mov b32 $r1 $pm1
698 * mov b32 $r2 $pm2
699 * mov b32 $r3 $pm3
700 * mov b32 $r4 $pm4
701 * sched 0x20 0x20 0x23 0x04 0x20 0x04 0x2b
702 * mov b32 $r5 $pm5
703 * mov b32 $r6 $pm6
704 * mov b32 $r7 $pm7
705 * set $p0 0x1 eq u32 $r8 0x0
706 * mov b32 $r10 c0[0x0]
707 * ext u32 $r8 $r12 0x414
708 * mov b32 $r11 c0[0x4]
709 * sched 0x04 0x2e 0x04 0x20 0x20 0x28 0x04
710 * ext u32 $r9 $r12 0x208
711 * (not $p0) exit
712 * set $p1 0x1 eq u32 $r9 0x0
713 * mul $r8 u32 $r8 u32 96
714 * mul $r12 u32 $r9 u32 16
715 * mul $r13 u32 $r9 u32 4
716 * add b32 $r9 $r8 $r13
717 * sched 0x28 0x04 0x2c 0x04 0x2c 0x04 0x2c
718 * add b32 $r8 $r8 $r12
719 * mov b32 $r12 $r10
720 * add b32 $r10 $c $r10 $r8
721 * mov b32 $r13 $r11
722 * add b32 $r11 $r11 0x0 $c
723 * add b32 $r12 $c $r12 $r9
724 * st b128 wt g[$r10d] $r0q
725 * sched 0x4 0x2c 0x20 0x04 0x2e 0x00 0x00
726 * mov b32 $r0 c0[0x8]
727 * add b32 $r13 $r13 0x0 $c
728 * $p1 st b128 wt g[$r12d+0x40] $r4q
729 * st b32 wt g[$r12d+0x50] $r0
730 * exit */
731 0x2202020202020207ULL,
732 0x2c00000084021c04ULL,
733 0x2c0000000c031c04ULL,
734 0x2c00000010001c04ULL,
735 0x2c00000014005c04ULL,
736 0x2c00000018009c04ULL,
737 0x2c0000001c00dc04ULL,
738 0x2c00000020011c04ULL,
739 0x22b0420042320207ULL,
740 0x2c00000024015c04ULL,
741 0x2c00000028019c04ULL,
742 0x2c0000002c01dc04ULL,
743 0x190e0000fc81dc03ULL,
744 0x2800400000029de4ULL,
745 0x7000c01050c21c03ULL,
746 0x280040001002dde4ULL,
747 0x204282020042e047ULL,
748 0x7000c00820c25c03ULL,
749 0x80000000000021e7ULL,
750 0x190e0000fc93dc03ULL,
751 0x1000000180821c02ULL,
752 0x1000000040931c02ULL,
753 0x1000000010935c02ULL,
754 0x4800000034825c03ULL,
755 0x22c042c042c04287ULL,
756 0x4800000030821c03ULL,
757 0x2800000028031de4ULL,
758 0x4801000020a29c03ULL,
759 0x280000002c035de4ULL,
760 0x0800000000b2dc42ULL,
761 0x4801000024c31c03ULL,
762 0x9400000000a01fc5ULL,
763 0x200002e04202c047ULL,
764 0x2800400020001de4ULL,
765 0x0800000000d35c42ULL,
766 0x9400000100c107c5ULL,
767 0x9400000140c01f85ULL,
768 0x8000000000001de7ULL
769 };
770
771 /* NOTE: intentionally using the same names as NV */
772 static const char *nve4_pm_query_names[] =
773 {
774 /* MP counters */
775 "prof_trigger_00",
776 "prof_trigger_01",
777 "prof_trigger_02",
778 "prof_trigger_03",
779 "prof_trigger_04",
780 "prof_trigger_05",
781 "prof_trigger_06",
782 "prof_trigger_07",
783 "warps_launched",
784 "threads_launched",
785 "sm_cta_launched",
786 "inst_issued1",
787 "inst_issued2",
788 "inst_executed",
789 "local_load",
790 "local_store",
791 "shared_load",
792 "shared_store",
793 "l1_local_load_hit",
794 "l1_local_load_miss",
795 "l1_local_store_hit",
796 "l1_local_store_miss",
797 "gld_request",
798 "gst_request",
799 "l1_global_load_hit",
800 "l1_global_load_miss",
801 "uncached_global_load_transaction",
802 "global_store_transaction",
803 "branch",
804 "divergent_branch",
805 "active_warps",
806 "active_cycles",
807 "inst_issued",
808 "atom_count",
809 "gred_count",
810 "shared_load_replay",
811 "shared_store_replay",
812 "local_load_transactions",
813 "local_store_transactions",
814 "l1_shared_load_transactions",
815 "l1_shared_store_transactions",
816 "global_ld_mem_divergence_replays",
817 "global_st_mem_divergence_replays",
818 /* metrics, i.e. functions of the MP counters */
819 "metric-ipc", /* inst_executed, clock */
820 "metric-ipac", /* inst_executed, active_cycles */
821 "metric-ipec", /* inst_executed, (bool)inst_executed */
822 "metric-achieved_occupancy", /* active_warps, active_cycles */
823 "metric-sm_efficiency", /* active_cycles, clock */
824 "metric-inst_replay_overhead" /* inst_issued, inst_executed */
825 };
826
827 /* For simplicity, we will allocate as many group slots as we allocate counter
828 * slots. This means that a single counter which wants to source from 2 groups
829 * will have to be declared as using 2 counter slots. This shouldn't really be
830 * a problem because such queries don't make much sense ... (unless someone is
831 * really creative).
832 */
833 struct nvc0_mp_counter_cfg
834 {
835 uint32_t func : 16; /* mask or 4-bit logic op (depending on mode) */
836 uint32_t mode : 4; /* LOGOP,B6,LOGOP_B6(_PULSE) */
837 uint32_t num_src : 3; /* number of sources (1 - 6, only for NVC0:NVE4) */
838 uint32_t sig_dom : 1; /* if 0, MP_PM_A (per warp-sched), if 1, MP_PM_B */
839 uint32_t sig_sel : 8; /* signal group */
840 uint64_t src_sel; /* signal selection for up to 6 sources (48 bit) */
841 };
842
843 #define NVC0_COUNTER_OPn_SUM 0
844 #define NVC0_COUNTER_OPn_OR 1
845 #define NVC0_COUNTER_OPn_AND 2
846 #define NVC0_COUNTER_OP2_REL_SUM_MM 3 /* (sum(ctr0) - sum(ctr1)) / sum(ctr0) */
847 #define NVC0_COUNTER_OP2_DIV_SUM_M0 4 /* sum(ctr0) / ctr1 of MP[0]) */
848 #define NVC0_COUNTER_OP2_AVG_DIV_MM 5 /* avg(ctr0 / ctr1) */
849 #define NVC0_COUNTER_OP2_AVG_DIV_M0 6 /* avg(ctr0) / ctr1 of MP[0]) */
850
851 struct nvc0_mp_pm_query_cfg
852 {
853 struct nvc0_mp_counter_cfg ctr[4];
854 uint8_t num_counters;
855 uint8_t op;
856 uint8_t norm[2]; /* normalization num,denom */
857 };
858
859 #define _Q1A(n, f, m, g, s, nu, dn) [NVE4_PM_QUERY_##n] = { { { f, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m, 0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g, s }, {}, {}, {} }, 1, NVC0_COUNTER_OPn_SUM, { nu, dn } }
860 #define _Q1B(n, f, m, g, s, nu, dn) [NVE4_PM_QUERY_##n] = { { { f, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m, 0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g, s }, {}, {}, {} }, 1, NVC0_COUNTER_OPn_SUM, { nu, dn } }
861 #define _M2A(n, f0, m0, g0, s0, f1, m1, g1, s1, o, nu, dn) [NVE4_PM_QUERY_METRIC_##n] = { { \
862 { f0, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m0, 0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g0, s0 }, \
863 { f1, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m1, 0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g1, s1 }, \
864 {}, {}, }, 2, NVC0_COUNTER_OP2_##o, { nu, dn } }
865 #define _M2B(n, f0, m0, g0, s0, f1, m1, g1, s1, o, nu, dn) [NVE4_PM_QUERY_METRIC_##n] = { { \
866 { f0, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m0, 0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g0, s0 }, \
867 { f1, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m1, 0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g1, s1 }, \
868 {}, {}, }, 2, NVC0_COUNTER_OP2_##o, { nu, dn } }
869 #define _M2AB(n, f0, m0, g0, s0, f1, m1, g1, s1, o, nu, dn) [NVE4_PM_QUERY_METRIC_##n] = { { \
870 { f0, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m0, 0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g0, s0 }, \
871 { f1, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m1, 0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g1, s1 }, \
872 {}, {}, }, 2, NVC0_COUNTER_OP2_##o, { nu, dn } }
873
874 /* NOTES:
875 * active_warps: bit 0 alternates btw 0 and 1 for odd nr of warps
876 * inst_executed etc.: we only count a single warp scheduler
877 * metric-ipXc: we simply multiply by 4 to account for the 4 warp schedulers;
878 * this is inaccurate !
879 */
880 static const struct nvc0_mp_pm_query_cfg nve4_mp_pm_queries[] =
881 {
882 _Q1A(PROF_TRIGGER_0, 0x0001, B6, USER, 0x00000000, 1, 1),
883 _Q1A(PROF_TRIGGER_1, 0x0001, B6, USER, 0x00000004, 1, 1),
884 _Q1A(PROF_TRIGGER_2, 0x0001, B6, USER, 0x00000008, 1, 1),
885 _Q1A(PROF_TRIGGER_3, 0x0001, B6, USER, 0x0000000c, 1, 1),
886 _Q1A(PROF_TRIGGER_4, 0x0001, B6, USER, 0x00000010, 1, 1),
887 _Q1A(PROF_TRIGGER_5, 0x0001, B6, USER, 0x00000014, 1, 1),
888 _Q1A(PROF_TRIGGER_6, 0x0001, B6, USER, 0x00000018, 1, 1),
889 _Q1A(PROF_TRIGGER_7, 0x0001, B6, USER, 0x0000001c, 1, 1),
890 _Q1A(LAUNCHED_WARPS, 0x0001, B6, LAUNCH, 0x00000004, 1, 1),
891 _Q1A(LAUNCHED_THREADS, 0x003f, B6, LAUNCH, 0x398a4188, 1, 1),
892 _Q1B(LAUNCHED_CTA, 0x0001, B6, WARP, 0x0000001c, 1, 1),
893 _Q1A(INST_ISSUED1, 0x0001, B6, ISSUE, 0x00000004, 1, 1),
894 _Q1A(INST_ISSUED2, 0x0001, B6, ISSUE, 0x00000008, 1, 1),
895 _Q1A(INST_ISSUED, 0x0003, B6, ISSUE, 0x00000104, 1, 1),
896 _Q1A(INST_EXECUTED, 0x0003, B6, EXEC, 0x00000398, 1, 1),
897 _Q1A(LD_SHARED, 0x0001, B6, LDST, 0x00000000, 1, 1),
898 _Q1A(ST_SHARED, 0x0001, B6, LDST, 0x00000004, 1, 1),
899 _Q1A(LD_LOCAL, 0x0001, B6, LDST, 0x00000008, 1, 1),
900 _Q1A(ST_LOCAL, 0x0001, B6, LDST, 0x0000000c, 1, 1),
901 _Q1A(GLD_REQUEST, 0x0001, B6, LDST, 0x00000010, 1, 1),
902 _Q1A(GST_REQUEST, 0x0001, B6, LDST, 0x00000014, 1, 1),
903 _Q1B(L1_LOCAL_LOAD_HIT, 0x0001, B6, L1, 0x00000000, 1, 1),
904 _Q1B(L1_LOCAL_LOAD_MISS, 0x0001, B6, L1, 0x00000004, 1, 1),
905 _Q1B(L1_LOCAL_STORE_HIT, 0x0001, B6, L1, 0x00000008, 1, 1),
906 _Q1B(L1_LOCAL_STORE_MISS, 0x0001, B6, L1, 0x0000000c, 1, 1),
907 _Q1B(L1_GLOBAL_LOAD_HIT, 0x0001, B6, L1, 0x00000010, 1, 1),
908 _Q1B(L1_GLOBAL_LOAD_MISS, 0x0001, B6, L1, 0x00000014, 1, 1),
909 _Q1B(GLD_TRANSACTIONS_UNCACHED, 0x0001, B6, MEM, 0x00000000, 1, 1),
910 _Q1B(GST_TRANSACTIONS, 0x0001, B6, MEM, 0x00000004, 1, 1),
911 _Q1A(BRANCH, 0x0001, B6, BRANCH, 0x0000000c, 1, 1),
912 _Q1A(BRANCH_DIVERGENT, 0x0001, B6, BRANCH, 0x00000010, 1, 1),
913 _Q1B(ACTIVE_WARPS, 0x003f, B6, WARP, 0x31483104, 2, 1),
914 _Q1B(ACTIVE_CYCLES, 0x0001, B6, WARP, 0x00000000, 1, 1),
915 _Q1A(ATOM_COUNT, 0x0001, B6, BRANCH, 0x00000000, 1, 1),
916 _Q1A(GRED_COUNT, 0x0001, B6, BRANCH, 0x00000008, 1, 1),
917 _Q1B(LD_SHARED_REPLAY, 0x0001, B6, REPLAY, 0x00000008, 1, 1),
918 _Q1B(ST_SHARED_REPLAY, 0x0001, B6, REPLAY, 0x0000000c, 1, 1),
919 _Q1B(LD_LOCAL_TRANSACTIONS, 0x0001, B6, TRANSACTION, 0x00000000, 1, 1),
920 _Q1B(ST_LOCAL_TRANSACTIONS, 0x0001, B6, TRANSACTION, 0x00000004, 1, 1),
921 _Q1B(L1_LD_SHARED_TRANSACTIONS, 0x0001, B6, TRANSACTION, 0x00000008, 1, 1),
922 _Q1B(L1_ST_SHARED_TRANSACTIONS, 0x0001, B6, TRANSACTION, 0x0000000c, 1, 1),
923 _Q1B(GLD_MEM_DIV_REPLAY, 0x0001, B6, REPLAY, 0x00000010, 1, 1),
924 _Q1B(GST_MEM_DIV_REPLAY, 0x0001, B6, REPLAY, 0x00000014, 1, 1),
925 _M2AB(IPC, 0x3, B6, EXEC, 0x398, 0xffff, LOGOP, WARP, 0x0, DIV_SUM_M0, 10, 1),
926 _M2AB(IPAC, 0x3, B6, EXEC, 0x398, 0x1, B6, WARP, 0x0, AVG_DIV_MM, 10, 1),
927 _M2A(IPEC, 0x3, B6, EXEC, 0x398, 0xe, LOGOP, EXEC, 0x398, AVG_DIV_MM, 10, 1),
928 _M2A(INST_REPLAY_OHEAD, 0x3, B6, ISSUE, 0x104, 0x3, B6, EXEC, 0x398, REL_SUM_MM, 100, 1),
929 _M2B(MP_OCCUPANCY, 0x3f, B6, WARP, 0x31483104, 0x01, B6, WARP, 0x0, AVG_DIV_MM, 200, 64),
930 _M2B(MP_EFFICIENCY, 0x01, B6, WARP, 0x0, 0xffff, LOGOP, WARP, 0x0, AVG_DIV_M0, 100, 1),
931 };
932
933 #undef _Q1A
934 #undef _Q1B
935 #undef _M2A
936 #undef _M2B
937
938 /* === PERFORMANCE MONITORING COUNTERS for NVC0:NVE4 === */
939 static const uint64_t nvc0_read_mp_pm_counters_code[] =
940 {
941 /* mov b32 $r8 $tidx
942 * mov b32 $r9 $physid
943 * mov b32 $r0 $pm0
944 * mov b32 $r1 $pm1
945 * mov b32 $r2 $pm2
946 * mov b32 $r3 $pm3
947 * mov b32 $r4 $pm4
948 * mov b32 $r5 $pm5
949 * mov b32 $r6 $pm6
950 * mov b32 $r7 $pm7
951 * set $p0 0x1 eq u32 $r8 0x0
952 * mov b32 $r10 c0[0x0]
953 * mov b32 $r11 c0[0x4]
954 * ext u32 $r8 $r9 0x414
955 * (not $p0) exit
956 * mul $r8 u32 $r8 u32 36
957 * add b32 $r10 $c $r10 $r8
958 * add b32 $r11 $r11 0x0 $c
959 * mov b32 $r8 c0[0x8]
960 * st b128 wt g[$r10d+0x00] $r0q
961 * st b128 wt g[$r10d+0x10] $r4q
962 * st b32 wt g[$r10d+0x20] $r8
963 * exit */
964 0x2c00000084021c04ULL,
965 0x2c0000000c025c04ULL,
966 0x2c00000010001c04ULL,
967 0x2c00000014005c04ULL,
968 0x2c00000018009c04ULL,
969 0x2c0000001c00dc04ULL,
970 0x2c00000020011c04ULL,
971 0x2c00000024015c04ULL,
972 0x2c00000028019c04ULL,
973 0x2c0000002c01dc04ULL,
974 0x190e0000fc81dc03ULL,
975 0x2800400000029de4ULL,
976 0x280040001002dde4ULL,
977 0x7000c01050921c03ULL,
978 0x80000000000021e7ULL,
979 0x1000000090821c02ULL,
980 0x4801000020a29c03ULL,
981 0x0800000000b2dc42ULL,
982 0x2800400020021de4ULL,
983 0x9400000000a01fc5ULL,
984 0x9400000040a11fc5ULL,
985 0x9400000080a21f85ULL,
986 0x8000000000001de7ULL
987 };
988
989 static const char *nvc0_pm_query_names[] =
990 {
991 /* MP counters */
992 "inst_executed",
993 "branch",
994 "divergent_branch",
995 "active_warps",
996 "active_cycles",
997 "warps_launched",
998 "threads_launched",
999 "shared_load",
1000 "shared_store",
1001 "local_load",
1002 "local_store",
1003 "gred_count",
1004 "atom_count",
1005 "gld_request",
1006 "gst_request",
1007 "inst_issued1_0",
1008 "inst_issued1_1",
1009 "inst_issued2_0",
1010 "inst_issued2_1",
1011 "thread_inst_executed_0",
1012 "thread_inst_executed_1",
1013 "thread_inst_executed_2",
1014 "thread_inst_executed_3",
1015 "prof_trigger_00",
1016 "prof_trigger_01",
1017 "prof_trigger_02",
1018 "prof_trigger_03",
1019 "prof_trigger_04",
1020 "prof_trigger_05",
1021 "prof_trigger_06",
1022 "prof_trigger_07",
1023 };
1024
1025 #define _Q(n, f, m, g, c, s0, s1, s2, s3, s4, s5) [NVC0_PM_QUERY_##n] = { { { f, NVC0_COMPUTE_MP_PM_OP_MODE_##m, c, 0, g, s0|(s1 << 8)|(s2 << 16)|(s3 << 24)|(s4##ULL << 32)|(s5##ULL << 40) }, {}, {}, {} }, 1, NVC0_COUNTER_OPn_SUM, { 1, 1 } }
1026
1027 static const struct nvc0_mp_pm_query_cfg nvc0_mp_pm_queries[] =
1028 {
1029 _Q(INST_EXECUTED, 0xaaaa, LOGOP, 0x2d, 3, 0x00, 0x11, 0x22, 0x00, 0x00, 0x00),
1030 _Q(BRANCH, 0xaaaa, LOGOP, 0x1a, 2, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00),
1031 _Q(BRANCH_DIVERGENT, 0xaaaa, LOGOP, 0x19, 2, 0x20, 0x31, 0x00, 0x00, 0x00, 0x00),
1032 _Q(ACTIVE_WARPS, 0xaaaa, LOGOP, 0x24, 6, 0x10, 0x21, 0x32, 0x43, 0x54, 0x65),
1033 _Q(ACTIVE_CYCLES, 0xaaaa, LOGOP, 0x11, 1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00),
1034 _Q(LAUNCHED_WARPS, 0xaaaa, LOGOP, 0x26, 1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00),
1035 _Q(LAUNCHED_THREADS, 0xaaaa, LOGOP, 0x26, 6, 0x10, 0x21, 0x32, 0x43, 0x54, 0x65),
1036 _Q(LD_SHARED, 0xaaaa, LOGOP, 0x64, 1, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00),
1037 _Q(ST_SHARED, 0xaaaa, LOGOP, 0x64, 1, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00),
1038 _Q(LD_LOCAL, 0xaaaa, LOGOP, 0x64, 1, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00),
1039 _Q(ST_LOCAL, 0xaaaa, LOGOP, 0x64, 1, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00),
1040 _Q(GRED_COUNT, 0xaaaa, LOGOP, 0x63, 1, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00),
1041 _Q(ATOM_COUNT, 0xaaaa, LOGOP, 0x63, 1, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00),
1042 _Q(GLD_REQUEST, 0xaaaa, LOGOP, 0x64, 1, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00),
1043 _Q(GST_REQUEST, 0xaaaa, LOGOP, 0x64, 1, 0x60, 0x00, 0x00, 0x00, 0x00, 0x00),
1044 _Q(INST_ISSUED1_0, 0xaaaa, LOGOP, 0x7e, 1, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00),
1045 _Q(INST_ISSUED1_1, 0xaaaa, LOGOP, 0x7e, 1, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00),
1046 _Q(INST_ISSUED2_0, 0xaaaa, LOGOP, 0x7e, 1, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00),
1047 _Q(INST_ISSUED2_1, 0xaaaa, LOGOP, 0x7e, 1, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00),
1048 _Q(TH_INST_EXECUTED_0, 0xaaaa, LOGOP, 0xa3, 6, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55),
1049 _Q(TH_INST_EXECUTED_1, 0xaaaa, LOGOP, 0xa5, 6, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55),
1050 _Q(TH_INST_EXECUTED_2, 0xaaaa, LOGOP, 0xa4, 6, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55),
1051 _Q(TH_INST_EXECUTED_3, 0xaaaa, LOGOP, 0xa6, 6, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55),
1052 _Q(PROF_TRIGGER_0, 0xaaaa, LOGOP, 0x01, 1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00),
1053 _Q(PROF_TRIGGER_1, 0xaaaa, LOGOP, 0x01, 1, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00),
1054 _Q(PROF_TRIGGER_2, 0xaaaa, LOGOP, 0x01, 1, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00),
1055 _Q(PROF_TRIGGER_3, 0xaaaa, LOGOP, 0x01, 1, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00),
1056 _Q(PROF_TRIGGER_4, 0xaaaa, LOGOP, 0x01, 1, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00),
1057 _Q(PROF_TRIGGER_5, 0xaaaa, LOGOP, 0x01, 1, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00),
1058 _Q(PROF_TRIGGER_6, 0xaaaa, LOGOP, 0x01, 1, 0x60, 0x00, 0x00, 0x00, 0x00, 0x00),
1059 _Q(PROF_TRIGGER_7, 0xaaaa, LOGOP, 0x01, 1, 0x70, 0x00, 0x00, 0x00, 0x00, 0x00),
1060 };
1061
1062 #undef _Q
1063
1064 static const struct nvc0_mp_pm_query_cfg *
1065 nvc0_mp_pm_query_get_cfg(struct nvc0_context *nvc0, struct nvc0_query *q)
1066 {
1067 struct nvc0_screen *screen = nvc0->screen;
1068
1069 if (screen->base.class_3d >= NVE4_3D_CLASS)
1070 return &nve4_mp_pm_queries[q->type - PIPE_QUERY_DRIVER_SPECIFIC];
1071 return &nvc0_mp_pm_queries[q->type - NVC0_PM_QUERY(0)];
1072 }
1073
1074 void
1075 nvc0_mp_pm_query_begin(struct nvc0_context *nvc0, struct nvc0_query *q)
1076 {
1077 struct nvc0_screen *screen = nvc0->screen;
1078 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
1079 const boolean is_nve4 = screen->base.class_3d >= NVE4_3D_CLASS;
1080 const struct nvc0_mp_pm_query_cfg *cfg;
1081 unsigned i, c;
1082 unsigned num_ab[2] = { 0, 0 };
1083
1084 cfg = nvc0_mp_pm_query_get_cfg(nvc0, q);
1085
1086 /* check if we have enough free counter slots */
1087 for (i = 0; i < cfg->num_counters; ++i)
1088 num_ab[cfg->ctr[i].sig_dom]++;
1089
1090 if (screen->pm.num_mp_pm_active[0] + num_ab[0] > 4 ||
1091 screen->pm.num_mp_pm_active[1] + num_ab[1] > 4) {
1092 NOUVEAU_ERR("Not enough free MP counter slots !\n");
1093 return;
1094 }
1095
1096 assert(cfg->num_counters <= 4);
1097 PUSH_SPACE(push, 4 * 8 * (is_nve4 ? 1 : 6) + 6);
1098
1099 if (!screen->pm.mp_counters_enabled) {
1100 screen->pm.mp_counters_enabled = TRUE;
1101 BEGIN_NVC0(push, SUBC_SW(0x06ac), 1);
1102 PUSH_DATA (push, 0x1fcb);
1103 }
1104
1105 /* set sequence field to 0 (used to check if result is available) */
1106 for (i = 0; i < screen->mp_count; ++i)
1107 q->data[i * 10 + 10] = 0;
1108
1109 for (i = 0; i < cfg->num_counters; ++i) {
1110 const unsigned d = cfg->ctr[i].sig_dom;
1111
1112 if (!screen->pm.num_mp_pm_active[d]) {
1113 uint32_t m = (1 << 22) | (1 << (7 + (8 * !d)));
1114 if (screen->pm.num_mp_pm_active[!d])
1115 m |= 1 << (7 + (8 * d));
1116 BEGIN_NVC0(push, SUBC_SW(0x0600), 1);
1117 PUSH_DATA (push, m);
1118 }
1119 screen->pm.num_mp_pm_active[d]++;
1120
1121 for (c = d * 4; c < (d * 4 + 4); ++c) {
1122 if (!screen->pm.mp_counter[c]) {
1123 q->ctr[i] = c;
1124 screen->pm.mp_counter[c] = (struct pipe_query *)q;
1125 break;
1126 }
1127 }
1128 assert(c <= (d * 4 + 3)); /* must succeed, already checked for space */
1129
1130 /* configure and reset the counter(s) */
1131 if (is_nve4) {
1132 if (d == 0)
1133 BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_A_SIGSEL(c & 3)), 1);
1134 else
1135 BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_B_SIGSEL(c & 3)), 1);
1136 PUSH_DATA (push, cfg->ctr[i].sig_sel);
1137 BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_SRCSEL(c)), 1);
1138 PUSH_DATA (push, cfg->ctr[i].src_sel + 0x2108421 * (c & 3));
1139 BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_FUNC(c)), 1);
1140 PUSH_DATA (push, (cfg->ctr[i].func << 4) | cfg->ctr[i].mode);
1141 BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_SET(c)), 1);
1142 PUSH_DATA (push, 0);
1143 } else {
1144 unsigned s;
1145
1146 for (s = 0; s < cfg->ctr[i].num_src; s++) {
1147 BEGIN_NVC0(push, NVC0_COMPUTE(MP_PM_SIGSEL(s)), 1);
1148 PUSH_DATA (push, cfg->ctr[i].sig_sel);
1149 BEGIN_NVC0(push, NVC0_COMPUTE(MP_PM_SRCSEL(s)), 1);
1150 PUSH_DATA (push, (cfg->ctr[i].src_sel >> (s * 8)) & 0xff);
1151 BEGIN_NVC0(push, NVC0_COMPUTE(MP_PM_OP(s)), 1);
1152 PUSH_DATA (push, (cfg->ctr[i].func << 4) | cfg->ctr[i].mode);
1153 BEGIN_NVC0(push, NVC0_COMPUTE(MP_PM_SET(s)), 1);
1154 PUSH_DATA (push, 0);
1155 }
1156 }
1157 }
1158 }
1159
1160 static void
1161 nvc0_mp_pm_query_end(struct nvc0_context *nvc0, struct nvc0_query *q)
1162 {
1163 struct nvc0_screen *screen = nvc0->screen;
1164 struct pipe_context *pipe = &nvc0->base.pipe;
1165 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
1166 const boolean is_nve4 = screen->base.class_3d >= NVE4_3D_CLASS;
1167 uint32_t mask;
1168 uint32_t input[3];
1169 const uint block[3] = { 32, is_nve4 ? 4 : 1, 1 };
1170 const uint grid[3] = { screen->mp_count, 1, 1 };
1171 unsigned c;
1172 const struct nvc0_mp_pm_query_cfg *cfg;
1173
1174 cfg = nvc0_mp_pm_query_get_cfg(nvc0, q);
1175
1176 if (unlikely(!screen->pm.prog)) {
1177 struct nvc0_program *prog = CALLOC_STRUCT(nvc0_program);
1178 prog->type = PIPE_SHADER_COMPUTE;
1179 prog->translated = TRUE;
1180 prog->num_gprs = 14;
1181 prog->parm_size = 12;
1182 if (is_nve4) {
1183 prog->code = (uint32_t *)nve4_read_mp_pm_counters_code;
1184 prog->code_size = sizeof(nve4_read_mp_pm_counters_code);
1185 } else {
1186 prog->code = (uint32_t *)nvc0_read_mp_pm_counters_code;
1187 prog->code_size = sizeof(nvc0_read_mp_pm_counters_code);
1188 }
1189 screen->pm.prog = prog;
1190 }
1191
1192 /* disable all counting */
1193 PUSH_SPACE(push, 8);
1194 for (c = 0; c < 8; ++c)
1195 if (screen->pm.mp_counter[c]) {
1196 if (is_nve4) {
1197 IMMED_NVC0(push, NVE4_COMPUTE(MP_PM_FUNC(c)), 0);
1198 } else {
1199 IMMED_NVC0(push, NVC0_COMPUTE(MP_PM_OP(c)), 0);
1200 }
1201 }
1202 /* release counters for this query */
1203 for (c = 0; c < 8; ++c) {
1204 if (nvc0_query(screen->pm.mp_counter[c]) == q) {
1205 screen->pm.num_mp_pm_active[c / 4]--;
1206 screen->pm.mp_counter[c] = NULL;
1207 }
1208 }
1209
1210 BCTX_REFN_bo(nvc0->bufctx_cp, CP_QUERY, NOUVEAU_BO_GART | NOUVEAU_BO_WR,
1211 q->bo);
1212
1213 PUSH_SPACE(push, 1);
1214 IMMED_NVC0(push, SUBC_COMPUTE(NV50_GRAPH_SERIALIZE), 0);
1215
1216 pipe->bind_compute_state(pipe, screen->pm.prog);
1217 input[0] = (q->bo->offset + q->base);
1218 input[1] = (q->bo->offset + q->base) >> 32;
1219 input[2] = q->sequence;
1220 pipe->launch_grid(pipe, block, grid, 0, input);
1221
1222 nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_QUERY);
1223
1224 /* re-activate other counters */
1225 PUSH_SPACE(push, 16);
1226 mask = 0;
1227 for (c = 0; c < 8; ++c) {
1228 unsigned i;
1229 q = nvc0_query(screen->pm.mp_counter[c]);
1230 if (!q)
1231 continue;
1232 cfg = nvc0_mp_pm_query_get_cfg(nvc0, q);
1233 for (i = 0; i < cfg->num_counters; ++i) {
1234 if (mask & (1 << q->ctr[i]))
1235 break;
1236 mask |= 1 << q->ctr[i];
1237 if (is_nve4) {
1238 BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_FUNC(q->ctr[i])), 1);
1239 } else {
1240 BEGIN_NVC0(push, NVC0_COMPUTE(MP_PM_OP(q->ctr[i])), 1);
1241 }
1242 PUSH_DATA (push, (cfg->ctr[i].func << 4) | cfg->ctr[i].mode);
1243 }
1244 }
1245 }
1246
1247 static INLINE boolean
1248 nvc0_mp_pm_query_read_data(uint32_t count[32][4],
1249 struct nvc0_context *nvc0, boolean wait,
1250 struct nvc0_query *q,
1251 const struct nvc0_mp_pm_query_cfg *cfg,
1252 unsigned mp_count)
1253 {
1254 unsigned p, c;
1255
1256 for (p = 0; p < mp_count; ++p) {
1257 const unsigned b = (0x24 / 4) * p;
1258
1259 for (c = 0; c < cfg->num_counters; ++c) {
1260 if (q->data[b + 8] != q->sequence) {
1261 if (!wait)
1262 return FALSE;
1263 if (nouveau_bo_wait(q->bo, NOUVEAU_BO_RD, nvc0->base.client))
1264 return FALSE;
1265 }
1266 count[p][c] = q->data[b + q->ctr[c]];
1267 }
1268 }
1269 return TRUE;
1270 }
1271
1272 static INLINE boolean
1273 nve4_mp_pm_query_read_data(uint32_t count[32][4],
1274 struct nvc0_context *nvc0, boolean wait,
1275 struct nvc0_query *q,
1276 const struct nvc0_mp_pm_query_cfg *cfg,
1277 unsigned mp_count)
1278 {
1279 unsigned p, c, d;
1280
1281 for (p = 0; p < mp_count; ++p) {
1282 const unsigned b = (0x60 / 4) * p;
1283
1284 for (c = 0; c < cfg->num_counters; ++c) {
1285 count[p][c] = 0;
1286 for (d = 0; d < ((q->ctr[c] & ~3) ? 1 : 4); ++d) {
1287 if (q->data[b + 20 + d] != q->sequence) {
1288 if (!wait)
1289 return FALSE;
1290 if (nouveau_bo_wait(q->bo, NOUVEAU_BO_RD, nvc0->base.client))
1291 return FALSE;
1292 }
1293 if (q->ctr[c] & ~0x3)
1294 count[p][c] = q->data[b + 16 + (q->ctr[c] & 3)];
1295 else
1296 count[p][c] += q->data[b + d * 4 + q->ctr[c]];
1297 }
1298 }
1299 }
1300 return TRUE;
1301 }
1302
1303 /* Metric calculations:
1304 * sum(x) ... sum of x over all MPs
1305 * avg(x) ... average of x over all MPs
1306 *
1307 * IPC : sum(inst_executed) / clock
1308 * INST_REPLAY_OHEAD: (sum(inst_issued) - sum(inst_executed)) / sum(inst_issued)
1309 * MP_OCCUPANCY : avg((active_warps / 64) / active_cycles)
1310 * MP_EFFICIENCY : avg(active_cycles / clock)
1311 *
1312 * NOTE: Interpretation of IPC requires knowledge of MP count.
1313 */
1314 static boolean
1315 nvc0_mp_pm_query_result(struct nvc0_context *nvc0, struct nvc0_query *q,
1316 void *result, boolean wait)
1317 {
1318 uint32_t count[32][4];
1319 uint64_t value = 0;
1320 unsigned mp_count = MIN2(nvc0->screen->mp_count_compute, 32);
1321 unsigned p, c;
1322 const struct nvc0_mp_pm_query_cfg *cfg;
1323 boolean ret;
1324
1325 cfg = nvc0_mp_pm_query_get_cfg(nvc0, q);
1326
1327 if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS)
1328 ret = nve4_mp_pm_query_read_data(count, nvc0, wait, q, cfg, mp_count);
1329 else
1330 ret = nvc0_mp_pm_query_read_data(count, nvc0, wait, q, cfg, mp_count);
1331 if (!ret)
1332 return FALSE;
1333
1334 if (cfg->op == NVC0_COUNTER_OPn_SUM) {
1335 for (c = 0; c < cfg->num_counters; ++c)
1336 for (p = 0; p < mp_count; ++p)
1337 value += count[p][c];
1338 value = (value * cfg->norm[0]) / cfg->norm[1];
1339 } else
1340 if (cfg->op == NVC0_COUNTER_OPn_OR) {
1341 uint32_t v = 0;
1342 for (c = 0; c < cfg->num_counters; ++c)
1343 for (p = 0; p < mp_count; ++p)
1344 v |= count[p][c];
1345 value = ((uint64_t)v * cfg->norm[0]) / cfg->norm[1];
1346 } else
1347 if (cfg->op == NVC0_COUNTER_OPn_AND) {
1348 uint32_t v = ~0;
1349 for (c = 0; c < cfg->num_counters; ++c)
1350 for (p = 0; p < mp_count; ++p)
1351 v &= count[p][c];
1352 value = ((uint64_t)v * cfg->norm[0]) / cfg->norm[1];
1353 } else
1354 if (cfg->op == NVC0_COUNTER_OP2_REL_SUM_MM) {
1355 uint64_t v[2] = { 0, 0 };
1356 for (p = 0; p < mp_count; ++p) {
1357 v[0] += count[p][0];
1358 v[1] += count[p][1];
1359 }
1360 if (v[0])
1361 value = ((v[0] - v[1]) * cfg->norm[0]) / (v[0] * cfg->norm[1]);
1362 } else
1363 if (cfg->op == NVC0_COUNTER_OP2_DIV_SUM_M0) {
1364 for (p = 0; p < mp_count; ++p)
1365 value += count[p][0];
1366 if (count[0][1])
1367 value = (value * cfg->norm[0]) / (count[0][1] * cfg->norm[1]);
1368 else
1369 value = 0;
1370 } else
1371 if (cfg->op == NVC0_COUNTER_OP2_AVG_DIV_MM) {
1372 unsigned mp_used = 0;
1373 for (p = 0; p < mp_count; ++p, mp_used += !!count[p][0])
1374 if (count[p][1])
1375 value += (count[p][0] * cfg->norm[0]) / count[p][1];
1376 if (mp_used)
1377 value /= (uint64_t)mp_used * cfg->norm[1];
1378 } else
1379 if (cfg->op == NVC0_COUNTER_OP2_AVG_DIV_M0) {
1380 unsigned mp_used = 0;
1381 for (p = 0; p < mp_count; ++p, mp_used += !!count[p][0])
1382 value += count[p][0];
1383 if (count[0][1] && mp_used) {
1384 value *= cfg->norm[0];
1385 value /= (uint64_t)count[0][1] * mp_used * cfg->norm[1];
1386 } else {
1387 value = 0;
1388 }
1389 }
1390
1391 *(uint64_t *)result = value;
1392 return TRUE;
1393 }
1394
1395 int
1396 nvc0_screen_get_driver_query_info(struct pipe_screen *pscreen,
1397 unsigned id,
1398 struct pipe_driver_query_info *info)
1399 {
1400 struct nvc0_screen *screen = nvc0_screen(pscreen);
1401 int count = 0;
1402
1403 count += NVC0_QUERY_DRV_STAT_COUNT;
1404
1405 if (screen->base.device->drm_version >= 0x01000101) {
1406 if (screen->base.class_3d >= NVE4_3D_CLASS) {
1407 count += NVE4_PM_QUERY_COUNT;
1408 } else
1409 if (screen->compute) {
1410 count += NVC0_PM_QUERY_COUNT; /* NVC0_COMPUTE is not always enabled */
1411 }
1412 }
1413
1414 if (!info)
1415 return count;
1416
1417 #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
1418 if (id < NVC0_QUERY_DRV_STAT_COUNT) {
1419 info->name = nvc0_drv_stat_names[id];
1420 info->query_type = NVC0_QUERY_DRV_STAT(id);
1421 info->max_value = ~0ULL;
1422 info->uses_byte_units = !!strstr(info->name, "bytes");
1423 return 1;
1424 } else
1425 #endif
1426 if (id < count) {
1427 if (screen->base.class_3d >= NVE4_3D_CLASS) {
1428 info->name = nve4_pm_query_names[id - NVC0_QUERY_DRV_STAT_COUNT];
1429 info->query_type = NVE4_PM_QUERY(id - NVC0_QUERY_DRV_STAT_COUNT);
1430 info->max_value = (id < NVE4_PM_QUERY_METRIC_MP_OCCUPANCY) ?
1431 ~0ULL : 100;
1432 info->uses_byte_units = FALSE;
1433 return 1;
1434 } else
1435 if (screen->compute) {
1436 info->name = nvc0_pm_query_names[id - NVC0_QUERY_DRV_STAT_COUNT];
1437 info->query_type = NVC0_PM_QUERY(id - NVC0_QUERY_DRV_STAT_COUNT);
1438 info->max_value = ~0ULL;
1439 info->uses_byte_units = FALSE;
1440 return 1;
1441 }
1442 }
1443 /* user asked for info about non-existing query */
1444 info->name = "this_is_not_the_query_you_are_looking_for";
1445 info->query_type = 0xdeadd01d;
1446 info->max_value = 0;
1447 info->uses_byte_units = FALSE;
1448 return 0;
1449 }
1450
1451 void
1452 nvc0_init_query_functions(struct nvc0_context *nvc0)
1453 {
1454 struct pipe_context *pipe = &nvc0->base.pipe;
1455
1456 pipe->create_query = nvc0_query_create;
1457 pipe->destroy_query = nvc0_query_destroy;
1458 pipe->begin_query = nvc0_query_begin;
1459 pipe->end_query = nvc0_query_end;
1460 pipe->get_query_result = nvc0_query_result;
1461 pipe->render_condition = nvc0_render_condition;
1462 }