nvc0: all queries use an unsigned 64-bits integer by default
[mesa.git] / src / gallium / drivers / nouveau / nvc0 / nvc0_query.c
1 /*
2 * Copyright 2011 Nouveau Project
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Christoph Bumiller
23 */
24
25 #define NVC0_PUSH_EXPLICIT_SPACE_CHECKING
26
27 #include "nvc0/nvc0_context.h"
28 #include "nv_object.xml.h"
29 #include "nvc0/nve4_compute.xml.h"
30 #include "nvc0/nvc0_compute.xml.h"
31
32 #define NVC0_QUERY_STATE_READY 0
33 #define NVC0_QUERY_STATE_ACTIVE 1
34 #define NVC0_QUERY_STATE_ENDED 2
35 #define NVC0_QUERY_STATE_FLUSHED 3
36
37 struct nvc0_query {
38 uint32_t *data;
39 uint16_t type;
40 uint16_t index;
41 int8_t ctr[4];
42 uint32_t sequence;
43 struct nouveau_bo *bo;
44 uint32_t base;
45 uint32_t offset; /* base + i * rotate */
46 uint8_t state;
47 boolean is64bit;
48 uint8_t rotate;
49 int nesting; /* only used for occlusion queries */
50 union {
51 struct nouveau_mm_allocation *mm;
52 uint64_t value;
53 } u;
54 struct nouveau_fence *fence;
55 };
56
57 #define NVC0_QUERY_ALLOC_SPACE 256
58
59 static boolean nvc0_mp_pm_query_begin(struct nvc0_context *,
60 struct nvc0_query *);
61 static void nvc0_mp_pm_query_end(struct nvc0_context *, struct nvc0_query *);
62 static boolean nvc0_mp_pm_query_result(struct nvc0_context *,
63 struct nvc0_query *, void *, boolean);
64
65 static INLINE struct nvc0_query *
66 nvc0_query(struct pipe_query *pipe)
67 {
68 return (struct nvc0_query *)pipe;
69 }
70
71 static boolean
72 nvc0_query_allocate(struct nvc0_context *nvc0, struct nvc0_query *q, int size)
73 {
74 struct nvc0_screen *screen = nvc0->screen;
75 int ret;
76
77 if (q->bo) {
78 nouveau_bo_ref(NULL, &q->bo);
79 if (q->u.mm) {
80 if (q->state == NVC0_QUERY_STATE_READY)
81 nouveau_mm_free(q->u.mm);
82 else
83 nouveau_fence_work(screen->base.fence.current,
84 nouveau_mm_free_work, q->u.mm);
85 }
86 }
87 if (size) {
88 q->u.mm = nouveau_mm_allocate(screen->base.mm_GART, size, &q->bo, &q->base);
89 if (!q->bo)
90 return FALSE;
91 q->offset = q->base;
92
93 ret = nouveau_bo_map(q->bo, 0, screen->base.client);
94 if (ret) {
95 nvc0_query_allocate(nvc0, q, 0);
96 return FALSE;
97 }
98 q->data = (uint32_t *)((uint8_t *)q->bo->map + q->base);
99 }
100 return TRUE;
101 }
102
103 static void
104 nvc0_query_destroy(struct pipe_context *pipe, struct pipe_query *pq)
105 {
106 nvc0_query_allocate(nvc0_context(pipe), nvc0_query(pq), 0);
107 nouveau_fence_ref(NULL, &nvc0_query(pq)->fence);
108 FREE(nvc0_query(pq));
109 }
110
111 static struct pipe_query *
112 nvc0_query_create(struct pipe_context *pipe, unsigned type, unsigned index)
113 {
114 struct nvc0_context *nvc0 = nvc0_context(pipe);
115 struct nvc0_query *q;
116 unsigned space = NVC0_QUERY_ALLOC_SPACE;
117
118 q = CALLOC_STRUCT(nvc0_query);
119 if (!q)
120 return NULL;
121
122 switch (type) {
123 case PIPE_QUERY_OCCLUSION_COUNTER:
124 case PIPE_QUERY_OCCLUSION_PREDICATE:
125 q->rotate = 32;
126 space = NVC0_QUERY_ALLOC_SPACE;
127 break;
128 case PIPE_QUERY_PIPELINE_STATISTICS:
129 q->is64bit = TRUE;
130 space = 512;
131 break;
132 case PIPE_QUERY_SO_STATISTICS:
133 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
134 q->is64bit = TRUE;
135 space = 64;
136 break;
137 case PIPE_QUERY_PRIMITIVES_GENERATED:
138 case PIPE_QUERY_PRIMITIVES_EMITTED:
139 q->is64bit = TRUE;
140 q->index = index;
141 space = 32;
142 break;
143 case PIPE_QUERY_TIME_ELAPSED:
144 case PIPE_QUERY_TIMESTAMP:
145 case PIPE_QUERY_TIMESTAMP_DISJOINT:
146 case PIPE_QUERY_GPU_FINISHED:
147 space = 32;
148 break;
149 case NVC0_QUERY_TFB_BUFFER_OFFSET:
150 space = 16;
151 break;
152 default:
153 #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
154 if (type >= NVC0_QUERY_DRV_STAT(0) && type <= NVC0_QUERY_DRV_STAT_LAST) {
155 space = 0;
156 q->is64bit = true;
157 q->index = type - NVC0_QUERY_DRV_STAT(0);
158 break;
159 } else
160 #endif
161 if (nvc0->screen->base.device->drm_version >= 0x01000101) {
162 if (type >= NVE4_PM_QUERY(0) && type <= NVE4_PM_QUERY_LAST) {
163 /* for each MP:
164 * [00] = WS0.C0
165 * [04] = WS0.C1
166 * [08] = WS0.C2
167 * [0c] = WS0.C3
168 * [10] = WS1.C0
169 * [14] = WS1.C1
170 * [18] = WS1.C2
171 * [1c] = WS1.C3
172 * [20] = WS2.C0
173 * [24] = WS2.C1
174 * [28] = WS2.C2
175 * [2c] = WS2.C3
176 * [30] = WS3.C0
177 * [34] = WS3.C1
178 * [38] = WS3.C2
179 * [3c] = WS3.C3
180 * [40] = MP.C4
181 * [44] = MP.C5
182 * [48] = MP.C6
183 * [4c] = MP.C7
184 * [50] = WS0.sequence
185 * [54] = WS1.sequence
186 * [58] = WS2.sequence
187 * [5c] = WS3.sequence
188 */
189 space = (4 * 4 + 4 + 4) * nvc0->screen->mp_count * sizeof(uint32_t);
190 break;
191 } else
192 if (type >= NVC0_PM_QUERY(0) && type <= NVC0_PM_QUERY_LAST) {
193 /* for each MP:
194 * [00] = MP.C0
195 * [04] = MP.C1
196 * [08] = MP.C2
197 * [0c] = MP.C3
198 * [10] = MP.C4
199 * [14] = MP.C5
200 * [18] = MP.C6
201 * [1c] = MP.C7
202 * [20] = MP.sequence
203 */
204 space = (8 + 1) * nvc0->screen->mp_count * sizeof(uint32_t);
205 break;
206 }
207 }
208 debug_printf("invalid query type: %u\n", type);
209 FREE(q);
210 return NULL;
211 }
212 if (!nvc0_query_allocate(nvc0, q, space)) {
213 FREE(q);
214 return NULL;
215 }
216
217 q->type = type;
218
219 if (q->rotate) {
220 /* we advance before query_begin ! */
221 q->offset -= q->rotate;
222 q->data -= q->rotate / sizeof(*q->data);
223 } else
224 if (!q->is64bit)
225 q->data[0] = 0; /* initialize sequence */
226
227 return (struct pipe_query *)q;
228 }
229
230 static void
231 nvc0_query_get(struct nouveau_pushbuf *push, struct nvc0_query *q,
232 unsigned offset, uint32_t get)
233 {
234 offset += q->offset;
235
236 PUSH_SPACE(push, 5);
237 PUSH_REFN (push, q->bo, NOUVEAU_BO_GART | NOUVEAU_BO_WR);
238 BEGIN_NVC0(push, NVC0_3D(QUERY_ADDRESS_HIGH), 4);
239 PUSH_DATAh(push, q->bo->offset + offset);
240 PUSH_DATA (push, q->bo->offset + offset);
241 PUSH_DATA (push, q->sequence);
242 PUSH_DATA (push, get);
243 }
244
245 static void
246 nvc0_query_rotate(struct nvc0_context *nvc0, struct nvc0_query *q)
247 {
248 q->offset += q->rotate;
249 q->data += q->rotate / sizeof(*q->data);
250 if (q->offset - q->base == NVC0_QUERY_ALLOC_SPACE)
251 nvc0_query_allocate(nvc0, q, NVC0_QUERY_ALLOC_SPACE);
252 }
253
254 static boolean
255 nvc0_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
256 {
257 struct nvc0_context *nvc0 = nvc0_context(pipe);
258 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
259 struct nvc0_query *q = nvc0_query(pq);
260 boolean ret = true;
261
262 /* For occlusion queries we have to change the storage, because a previous
263 * query might set the initial render conition to FALSE even *after* we re-
264 * initialized it to TRUE.
265 */
266 if (q->rotate) {
267 nvc0_query_rotate(nvc0, q);
268
269 /* XXX: can we do this with the GPU, and sync with respect to a previous
270 * query ?
271 */
272 q->data[0] = q->sequence; /* initialize sequence */
273 q->data[1] = 1; /* initial render condition = TRUE */
274 q->data[4] = q->sequence + 1; /* for comparison COND_MODE */
275 q->data[5] = 0;
276 }
277 q->sequence++;
278
279 switch (q->type) {
280 case PIPE_QUERY_OCCLUSION_COUNTER:
281 case PIPE_QUERY_OCCLUSION_PREDICATE:
282 q->nesting = nvc0->screen->num_occlusion_queries_active++;
283 if (q->nesting) {
284 nvc0_query_get(push, q, 0x10, 0x0100f002);
285 } else {
286 PUSH_SPACE(push, 3);
287 BEGIN_NVC0(push, NVC0_3D(COUNTER_RESET), 1);
288 PUSH_DATA (push, NVC0_3D_COUNTER_RESET_SAMPLECNT);
289 IMMED_NVC0(push, NVC0_3D(SAMPLECNT_ENABLE), 1);
290 }
291 break;
292 case PIPE_QUERY_PRIMITIVES_GENERATED:
293 nvc0_query_get(push, q, 0x10, 0x09005002 | (q->index << 5));
294 break;
295 case PIPE_QUERY_PRIMITIVES_EMITTED:
296 nvc0_query_get(push, q, 0x10, 0x05805002 | (q->index << 5));
297 break;
298 case PIPE_QUERY_SO_STATISTICS:
299 nvc0_query_get(push, q, 0x20, 0x05805002 | (q->index << 5));
300 nvc0_query_get(push, q, 0x30, 0x06805002 | (q->index << 5));
301 break;
302 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
303 nvc0_query_get(push, q, 0x10, 0x03005002 | (q->index << 5));
304 break;
305 case PIPE_QUERY_TIME_ELAPSED:
306 nvc0_query_get(push, q, 0x10, 0x00005002);
307 break;
308 case PIPE_QUERY_PIPELINE_STATISTICS:
309 nvc0_query_get(push, q, 0xc0 + 0x00, 0x00801002); /* VFETCH, VERTICES */
310 nvc0_query_get(push, q, 0xc0 + 0x10, 0x01801002); /* VFETCH, PRIMS */
311 nvc0_query_get(push, q, 0xc0 + 0x20, 0x02802002); /* VP, LAUNCHES */
312 nvc0_query_get(push, q, 0xc0 + 0x30, 0x03806002); /* GP, LAUNCHES */
313 nvc0_query_get(push, q, 0xc0 + 0x40, 0x04806002); /* GP, PRIMS_OUT */
314 nvc0_query_get(push, q, 0xc0 + 0x50, 0x07804002); /* RAST, PRIMS_IN */
315 nvc0_query_get(push, q, 0xc0 + 0x60, 0x08804002); /* RAST, PRIMS_OUT */
316 nvc0_query_get(push, q, 0xc0 + 0x70, 0x0980a002); /* ROP, PIXELS */
317 nvc0_query_get(push, q, 0xc0 + 0x80, 0x0d808002); /* TCP, LAUNCHES */
318 nvc0_query_get(push, q, 0xc0 + 0x90, 0x0e809002); /* TEP, LAUNCHES */
319 break;
320 default:
321 #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
322 if (q->type >= NVC0_QUERY_DRV_STAT(0) &&
323 q->type <= NVC0_QUERY_DRV_STAT_LAST) {
324 if (q->index >= 5)
325 q->u.value = nvc0->screen->base.stats.v[q->index];
326 else
327 q->u.value = 0;
328 } else
329 #endif
330 if ((q->type >= NVE4_PM_QUERY(0) && q->type <= NVE4_PM_QUERY_LAST) ||
331 (q->type >= NVC0_PM_QUERY(0) && q->type <= NVC0_PM_QUERY_LAST)) {
332 ret = nvc0_mp_pm_query_begin(nvc0, q);
333 }
334 break;
335 }
336 q->state = NVC0_QUERY_STATE_ACTIVE;
337 return ret;
338 }
339
340 static void
341 nvc0_query_end(struct pipe_context *pipe, struct pipe_query *pq)
342 {
343 struct nvc0_context *nvc0 = nvc0_context(pipe);
344 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
345 struct nvc0_query *q = nvc0_query(pq);
346
347 if (q->state != NVC0_QUERY_STATE_ACTIVE) {
348 /* some queries don't require 'begin' to be called (e.g. GPU_FINISHED) */
349 if (q->rotate)
350 nvc0_query_rotate(nvc0, q);
351 q->sequence++;
352 }
353 q->state = NVC0_QUERY_STATE_ENDED;
354
355 switch (q->type) {
356 case PIPE_QUERY_OCCLUSION_COUNTER:
357 case PIPE_QUERY_OCCLUSION_PREDICATE:
358 nvc0_query_get(push, q, 0, 0x0100f002);
359 if (--nvc0->screen->num_occlusion_queries_active == 0) {
360 PUSH_SPACE(push, 1);
361 IMMED_NVC0(push, NVC0_3D(SAMPLECNT_ENABLE), 0);
362 }
363 break;
364 case PIPE_QUERY_PRIMITIVES_GENERATED:
365 nvc0_query_get(push, q, 0, 0x09005002 | (q->index << 5));
366 break;
367 case PIPE_QUERY_PRIMITIVES_EMITTED:
368 nvc0_query_get(push, q, 0, 0x05805002 | (q->index << 5));
369 break;
370 case PIPE_QUERY_SO_STATISTICS:
371 nvc0_query_get(push, q, 0x00, 0x05805002 | (q->index << 5));
372 nvc0_query_get(push, q, 0x10, 0x06805002 | (q->index << 5));
373 break;
374 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
375 /* TODO: How do we sum over all streams for render condition ? */
376 /* PRIMS_DROPPED doesn't write sequence, use a ZERO query to sync on */
377 nvc0_query_get(push, q, 0x00, 0x03005002 | (q->index << 5));
378 nvc0_query_get(push, q, 0x20, 0x00005002);
379 break;
380 case PIPE_QUERY_TIMESTAMP:
381 case PIPE_QUERY_TIME_ELAPSED:
382 nvc0_query_get(push, q, 0, 0x00005002);
383 break;
384 case PIPE_QUERY_GPU_FINISHED:
385 nvc0_query_get(push, q, 0, 0x1000f010);
386 break;
387 case PIPE_QUERY_PIPELINE_STATISTICS:
388 nvc0_query_get(push, q, 0x00, 0x00801002); /* VFETCH, VERTICES */
389 nvc0_query_get(push, q, 0x10, 0x01801002); /* VFETCH, PRIMS */
390 nvc0_query_get(push, q, 0x20, 0x02802002); /* VP, LAUNCHES */
391 nvc0_query_get(push, q, 0x30, 0x03806002); /* GP, LAUNCHES */
392 nvc0_query_get(push, q, 0x40, 0x04806002); /* GP, PRIMS_OUT */
393 nvc0_query_get(push, q, 0x50, 0x07804002); /* RAST, PRIMS_IN */
394 nvc0_query_get(push, q, 0x60, 0x08804002); /* RAST, PRIMS_OUT */
395 nvc0_query_get(push, q, 0x70, 0x0980a002); /* ROP, PIXELS */
396 nvc0_query_get(push, q, 0x80, 0x0d808002); /* TCP, LAUNCHES */
397 nvc0_query_get(push, q, 0x90, 0x0e809002); /* TEP, LAUNCHES */
398 break;
399 case NVC0_QUERY_TFB_BUFFER_OFFSET:
400 /* indexed by TFB buffer instead of by vertex stream */
401 nvc0_query_get(push, q, 0x00, 0x0d005002 | (q->index << 5));
402 break;
403 case PIPE_QUERY_TIMESTAMP_DISJOINT:
404 /* This query is not issued on GPU because disjoint is forced to FALSE */
405 q->state = NVC0_QUERY_STATE_READY;
406 break;
407 default:
408 #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
409 if (q->type >= NVC0_QUERY_DRV_STAT(0) &&
410 q->type <= NVC0_QUERY_DRV_STAT_LAST) {
411 q->u.value = nvc0->screen->base.stats.v[q->index] - q->u.value;
412 return;
413 } else
414 #endif
415 if ((q->type >= NVE4_PM_QUERY(0) && q->type <= NVE4_PM_QUERY_LAST) ||
416 (q->type >= NVC0_PM_QUERY(0) && q->type <= NVC0_PM_QUERY_LAST)) {
417 nvc0_mp_pm_query_end(nvc0, q);
418 }
419 break;
420 }
421 if (q->is64bit)
422 nouveau_fence_ref(nvc0->screen->base.fence.current, &q->fence);
423 }
424
425 static INLINE void
426 nvc0_query_update(struct nouveau_client *cli, struct nvc0_query *q)
427 {
428 if (q->is64bit) {
429 if (nouveau_fence_signalled(q->fence))
430 q->state = NVC0_QUERY_STATE_READY;
431 } else {
432 if (q->data[0] == q->sequence)
433 q->state = NVC0_QUERY_STATE_READY;
434 }
435 }
436
437 static boolean
438 nvc0_query_result(struct pipe_context *pipe, struct pipe_query *pq,
439 boolean wait, union pipe_query_result *result)
440 {
441 struct nvc0_context *nvc0 = nvc0_context(pipe);
442 struct nvc0_query *q = nvc0_query(pq);
443 uint64_t *res64 = (uint64_t*)result;
444 uint32_t *res32 = (uint32_t*)result;
445 boolean *res8 = (boolean*)result;
446 uint64_t *data64 = (uint64_t *)q->data;
447 unsigned i;
448
449 #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
450 if (q->type >= NVC0_QUERY_DRV_STAT(0) &&
451 q->type <= NVC0_QUERY_DRV_STAT_LAST) {
452 res64[0] = q->u.value;
453 return TRUE;
454 } else
455 #endif
456 if ((q->type >= NVE4_PM_QUERY(0) && q->type <= NVE4_PM_QUERY_LAST) ||
457 (q->type >= NVC0_PM_QUERY(0) && q->type <= NVC0_PM_QUERY_LAST)) {
458 return nvc0_mp_pm_query_result(nvc0, q, result, wait);
459 }
460
461 if (q->state != NVC0_QUERY_STATE_READY)
462 nvc0_query_update(nvc0->screen->base.client, q);
463
464 if (q->state != NVC0_QUERY_STATE_READY) {
465 if (!wait) {
466 if (q->state != NVC0_QUERY_STATE_FLUSHED) {
467 q->state = NVC0_QUERY_STATE_FLUSHED;
468 /* flush for silly apps that spin on GL_QUERY_RESULT_AVAILABLE */
469 PUSH_KICK(nvc0->base.pushbuf);
470 }
471 return FALSE;
472 }
473 if (nouveau_bo_wait(q->bo, NOUVEAU_BO_RD, nvc0->screen->base.client))
474 return FALSE;
475 NOUVEAU_DRV_STAT(&nvc0->screen->base, query_sync_count, 1);
476 }
477 q->state = NVC0_QUERY_STATE_READY;
478
479 switch (q->type) {
480 case PIPE_QUERY_GPU_FINISHED:
481 res8[0] = TRUE;
482 break;
483 case PIPE_QUERY_OCCLUSION_COUNTER: /* u32 sequence, u32 count, u64 time */
484 res64[0] = q->data[1] - q->data[5];
485 break;
486 case PIPE_QUERY_OCCLUSION_PREDICATE:
487 res8[0] = q->data[1] != q->data[5];
488 break;
489 case PIPE_QUERY_PRIMITIVES_GENERATED: /* u64 count, u64 time */
490 case PIPE_QUERY_PRIMITIVES_EMITTED: /* u64 count, u64 time */
491 res64[0] = data64[0] - data64[2];
492 break;
493 case PIPE_QUERY_SO_STATISTICS:
494 res64[0] = data64[0] - data64[4];
495 res64[1] = data64[2] - data64[6];
496 break;
497 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
498 res8[0] = data64[0] != data64[2];
499 break;
500 case PIPE_QUERY_TIMESTAMP:
501 res64[0] = data64[1];
502 break;
503 case PIPE_QUERY_TIMESTAMP_DISJOINT:
504 res64[0] = 1000000000;
505 res8[8] = FALSE;
506 break;
507 case PIPE_QUERY_TIME_ELAPSED:
508 res64[0] = data64[1] - data64[3];
509 break;
510 case PIPE_QUERY_PIPELINE_STATISTICS:
511 for (i = 0; i < 10; ++i)
512 res64[i] = data64[i * 2] - data64[24 + i * 2];
513 break;
514 case NVC0_QUERY_TFB_BUFFER_OFFSET:
515 res32[0] = q->data[1];
516 break;
517 default:
518 assert(0); /* can't happen, we don't create queries with invalid type */
519 return FALSE;
520 }
521
522 return TRUE;
523 }
524
525 void
526 nvc0_query_fifo_wait(struct nouveau_pushbuf *push, struct pipe_query *pq)
527 {
528 struct nvc0_query *q = nvc0_query(pq);
529 unsigned offset = q->offset;
530
531 if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE) offset += 0x20;
532
533 PUSH_SPACE(push, 5);
534 PUSH_REFN (push, q->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
535 BEGIN_NVC0(push, SUBC_3D(NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH), 4);
536 PUSH_DATAh(push, q->bo->offset + offset);
537 PUSH_DATA (push, q->bo->offset + offset);
538 PUSH_DATA (push, q->sequence);
539 PUSH_DATA (push, (1 << 12) |
540 NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_EQUAL);
541 }
542
543 static void
544 nvc0_render_condition(struct pipe_context *pipe,
545 struct pipe_query *pq,
546 boolean condition, uint mode)
547 {
548 struct nvc0_context *nvc0 = nvc0_context(pipe);
549 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
550 struct nvc0_query *q;
551 uint32_t cond;
552 boolean wait =
553 mode != PIPE_RENDER_COND_NO_WAIT &&
554 mode != PIPE_RENDER_COND_BY_REGION_NO_WAIT;
555
556 if (!pq) {
557 cond = NVC0_3D_COND_MODE_ALWAYS;
558 }
559 else {
560 q = nvc0_query(pq);
561 /* NOTE: comparison of 2 queries only works if both have completed */
562 switch (q->type) {
563 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
564 cond = condition ? NVC0_3D_COND_MODE_EQUAL :
565 NVC0_3D_COND_MODE_NOT_EQUAL;
566 wait = TRUE;
567 break;
568 case PIPE_QUERY_OCCLUSION_COUNTER:
569 case PIPE_QUERY_OCCLUSION_PREDICATE:
570 if (likely(!condition)) {
571 if (unlikely(q->nesting))
572 cond = wait ? NVC0_3D_COND_MODE_NOT_EQUAL :
573 NVC0_3D_COND_MODE_ALWAYS;
574 else
575 cond = NVC0_3D_COND_MODE_RES_NON_ZERO;
576 } else {
577 cond = wait ? NVC0_3D_COND_MODE_EQUAL : NVC0_3D_COND_MODE_ALWAYS;
578 }
579 break;
580 default:
581 assert(!"render condition query not a predicate");
582 cond = NVC0_3D_COND_MODE_ALWAYS;
583 break;
584 }
585 }
586
587 nvc0->cond_query = pq;
588 nvc0->cond_cond = condition;
589 nvc0->cond_condmode = cond;
590 nvc0->cond_mode = mode;
591
592 if (!pq) {
593 PUSH_SPACE(push, 1);
594 IMMED_NVC0(push, NVC0_3D(COND_MODE), cond);
595 return;
596 }
597
598 if (wait)
599 nvc0_query_fifo_wait(push, pq);
600
601 PUSH_SPACE(push, 7);
602 PUSH_REFN (push, q->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
603 BEGIN_NVC0(push, NVC0_3D(COND_ADDRESS_HIGH), 3);
604 PUSH_DATAh(push, q->bo->offset + q->offset);
605 PUSH_DATA (push, q->bo->offset + q->offset);
606 PUSH_DATA (push, cond);
607 BEGIN_NVC0(push, NVC0_2D(COND_ADDRESS_HIGH), 2);
608 PUSH_DATAh(push, q->bo->offset + q->offset);
609 PUSH_DATA (push, q->bo->offset + q->offset);
610 }
611
612 void
613 nvc0_query_pushbuf_submit(struct nouveau_pushbuf *push,
614 struct pipe_query *pq, unsigned result_offset)
615 {
616 struct nvc0_query *q = nvc0_query(pq);
617
618 #define NVC0_IB_ENTRY_1_NO_PREFETCH (1 << (31 - 8))
619
620 nouveau_pushbuf_space(push, 0, 0, 1);
621 nouveau_pushbuf_data(push, q->bo, q->offset + result_offset, 4 |
622 NVC0_IB_ENTRY_1_NO_PREFETCH);
623 }
624
625 void
626 nvc0_so_target_save_offset(struct pipe_context *pipe,
627 struct pipe_stream_output_target *ptarg,
628 unsigned index, boolean *serialize)
629 {
630 struct nvc0_so_target *targ = nvc0_so_target(ptarg);
631
632 if (*serialize) {
633 *serialize = FALSE;
634 PUSH_SPACE(nvc0_context(pipe)->base.pushbuf, 1);
635 IMMED_NVC0(nvc0_context(pipe)->base.pushbuf, NVC0_3D(SERIALIZE), 0);
636
637 NOUVEAU_DRV_STAT(nouveau_screen(pipe->screen), gpu_serialize_count, 1);
638 }
639
640 nvc0_query(targ->pq)->index = index;
641
642 nvc0_query_end(pipe, targ->pq);
643 }
644
645
646 /* === DRIVER STATISTICS === */
647
648 #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
649
650 static const char *nvc0_drv_stat_names[] =
651 {
652 "drv-tex_obj_current_count",
653 "drv-tex_obj_current_bytes",
654 "drv-buf_obj_current_count",
655 "drv-buf_obj_current_bytes_vid",
656 "drv-buf_obj_current_bytes_sys",
657 "drv-tex_transfers_rd",
658 "drv-tex_transfers_wr",
659 "drv-tex_copy_count",
660 "drv-tex_blit_count",
661 "drv-tex_cache_flush_count",
662 "drv-buf_transfers_rd",
663 "drv-buf_transfers_wr",
664 "drv-buf_read_bytes_staging_vid",
665 "drv-buf_write_bytes_direct",
666 "drv-buf_write_bytes_staging_vid",
667 "drv-buf_write_bytes_staging_sys",
668 "drv-buf_copy_bytes",
669 "drv-buf_non_kernel_fence_sync_count",
670 "drv-any_non_kernel_fence_sync_count",
671 "drv-query_sync_count",
672 "drv-gpu_serialize_count",
673 "drv-draw_calls_array",
674 "drv-draw_calls_indexed",
675 "drv-draw_calls_fallback_count",
676 "drv-user_buffer_upload_bytes",
677 "drv-constbuf_upload_count",
678 "drv-constbuf_upload_bytes",
679 "drv-pushbuf_count",
680 "drv-resource_validate_count"
681 };
682
683 #endif /* NOUVEAU_ENABLE_DRIVER_STATISTICS */
684
685
686 /* === PERFORMANCE MONITORING COUNTERS for NVE4+ === */
687
688 /* Code to read out MP counters: They are accessible via mmio, too, but let's
689 * just avoid mapping registers in userspace. We'd have to know which MPs are
690 * enabled/present, too, and that information is not presently exposed.
691 * We could add a kernel interface for it, but reading the counters like this
692 * has the advantage of being async (if get_result isn't called immediately).
693 */
694 static const uint64_t nve4_read_mp_pm_counters_code[] =
695 {
696 /* sched 0x20 0x20 0x20 0x20 0x20 0x20 0x20
697 * mov b32 $r8 $tidx
698 * mov b32 $r12 $physid
699 * mov b32 $r0 $pm0
700 * mov b32 $r1 $pm1
701 * mov b32 $r2 $pm2
702 * mov b32 $r3 $pm3
703 * mov b32 $r4 $pm4
704 * sched 0x20 0x20 0x23 0x04 0x20 0x04 0x2b
705 * mov b32 $r5 $pm5
706 * mov b32 $r6 $pm6
707 * mov b32 $r7 $pm7
708 * set $p0 0x1 eq u32 $r8 0x0
709 * mov b32 $r10 c0[0x0]
710 * ext u32 $r8 $r12 0x414
711 * mov b32 $r11 c0[0x4]
712 * sched 0x04 0x2e 0x04 0x20 0x20 0x28 0x04
713 * ext u32 $r9 $r12 0x208
714 * (not $p0) exit
715 * set $p1 0x1 eq u32 $r9 0x0
716 * mul $r8 u32 $r8 u32 96
717 * mul $r12 u32 $r9 u32 16
718 * mul $r13 u32 $r9 u32 4
719 * add b32 $r9 $r8 $r13
720 * sched 0x28 0x04 0x2c 0x04 0x2c 0x04 0x2c
721 * add b32 $r8 $r8 $r12
722 * mov b32 $r12 $r10
723 * add b32 $r10 $c $r10 $r8
724 * mov b32 $r13 $r11
725 * add b32 $r11 $r11 0x0 $c
726 * add b32 $r12 $c $r12 $r9
727 * st b128 wt g[$r10d] $r0q
728 * sched 0x4 0x2c 0x20 0x04 0x2e 0x00 0x00
729 * mov b32 $r0 c0[0x8]
730 * add b32 $r13 $r13 0x0 $c
731 * $p1 st b128 wt g[$r12d+0x40] $r4q
732 * st b32 wt g[$r12d+0x50] $r0
733 * exit */
734 0x2202020202020207ULL,
735 0x2c00000084021c04ULL,
736 0x2c0000000c031c04ULL,
737 0x2c00000010001c04ULL,
738 0x2c00000014005c04ULL,
739 0x2c00000018009c04ULL,
740 0x2c0000001c00dc04ULL,
741 0x2c00000020011c04ULL,
742 0x22b0420042320207ULL,
743 0x2c00000024015c04ULL,
744 0x2c00000028019c04ULL,
745 0x2c0000002c01dc04ULL,
746 0x190e0000fc81dc03ULL,
747 0x2800400000029de4ULL,
748 0x7000c01050c21c03ULL,
749 0x280040001002dde4ULL,
750 0x204282020042e047ULL,
751 0x7000c00820c25c03ULL,
752 0x80000000000021e7ULL,
753 0x190e0000fc93dc03ULL,
754 0x1000000180821c02ULL,
755 0x1000000040931c02ULL,
756 0x1000000010935c02ULL,
757 0x4800000034825c03ULL,
758 0x22c042c042c04287ULL,
759 0x4800000030821c03ULL,
760 0x2800000028031de4ULL,
761 0x4801000020a29c03ULL,
762 0x280000002c035de4ULL,
763 0x0800000000b2dc42ULL,
764 0x4801000024c31c03ULL,
765 0x9400000000a01fc5ULL,
766 0x200002e04202c047ULL,
767 0x2800400020001de4ULL,
768 0x0800000000d35c42ULL,
769 0x9400000100c107c5ULL,
770 0x9400000140c01f85ULL,
771 0x8000000000001de7ULL
772 };
773
774 /* NOTE: intentionally using the same names as NV */
775 static const char *nve4_pm_query_names[] =
776 {
777 /* MP counters */
778 "prof_trigger_00",
779 "prof_trigger_01",
780 "prof_trigger_02",
781 "prof_trigger_03",
782 "prof_trigger_04",
783 "prof_trigger_05",
784 "prof_trigger_06",
785 "prof_trigger_07",
786 "warps_launched",
787 "threads_launched",
788 "sm_cta_launched",
789 "inst_issued1",
790 "inst_issued2",
791 "inst_executed",
792 "local_load",
793 "local_store",
794 "shared_load",
795 "shared_store",
796 "l1_local_load_hit",
797 "l1_local_load_miss",
798 "l1_local_store_hit",
799 "l1_local_store_miss",
800 "gld_request",
801 "gst_request",
802 "l1_global_load_hit",
803 "l1_global_load_miss",
804 "uncached_global_load_transaction",
805 "global_store_transaction",
806 "branch",
807 "divergent_branch",
808 "active_warps",
809 "active_cycles",
810 "inst_issued",
811 "atom_count",
812 "gred_count",
813 "shared_load_replay",
814 "shared_store_replay",
815 "local_load_transactions",
816 "local_store_transactions",
817 "l1_shared_load_transactions",
818 "l1_shared_store_transactions",
819 "global_ld_mem_divergence_replays",
820 "global_st_mem_divergence_replays",
821 /* metrics, i.e. functions of the MP counters */
822 "metric-ipc", /* inst_executed, clock */
823 "metric-ipac", /* inst_executed, active_cycles */
824 "metric-ipec", /* inst_executed, (bool)inst_executed */
825 "metric-achieved_occupancy", /* active_warps, active_cycles */
826 "metric-sm_efficiency", /* active_cycles, clock */
827 "metric-inst_replay_overhead" /* inst_issued, inst_executed */
828 };
829
830 /* For simplicity, we will allocate as many group slots as we allocate counter
831 * slots. This means that a single counter which wants to source from 2 groups
832 * will have to be declared as using 2 counter slots. This shouldn't really be
833 * a problem because such queries don't make much sense ... (unless someone is
834 * really creative).
835 */
836 struct nvc0_mp_counter_cfg
837 {
838 uint32_t func : 16; /* mask or 4-bit logic op (depending on mode) */
839 uint32_t mode : 4; /* LOGOP,B6,LOGOP_B6(_PULSE) */
840 uint32_t num_src : 3; /* number of sources (1 - 6, only for NVC0:NVE4) */
841 uint32_t sig_dom : 1; /* if 0, MP_PM_A (per warp-sched), if 1, MP_PM_B */
842 uint32_t sig_sel : 8; /* signal group */
843 uint64_t src_sel; /* signal selection for up to 6 sources (48 bit) */
844 };
845
846 #define NVC0_COUNTER_OPn_SUM 0
847 #define NVC0_COUNTER_OPn_OR 1
848 #define NVC0_COUNTER_OPn_AND 2
849 #define NVC0_COUNTER_OP2_REL_SUM_MM 3 /* (sum(ctr0) - sum(ctr1)) / sum(ctr0) */
850 #define NVC0_COUNTER_OP2_DIV_SUM_M0 4 /* sum(ctr0) / ctr1 of MP[0]) */
851 #define NVC0_COUNTER_OP2_AVG_DIV_MM 5 /* avg(ctr0 / ctr1) */
852 #define NVC0_COUNTER_OP2_AVG_DIV_M0 6 /* avg(ctr0) / ctr1 of MP[0]) */
853
854 struct nvc0_mp_pm_query_cfg
855 {
856 struct nvc0_mp_counter_cfg ctr[4];
857 uint8_t num_counters;
858 uint8_t op;
859 uint8_t norm[2]; /* normalization num,denom */
860 };
861
862 #define _Q1A(n, f, m, g, s, nu, dn) [NVE4_PM_QUERY_##n] = { { { f, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m, 0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g, s }, {}, {}, {} }, 1, NVC0_COUNTER_OPn_SUM, { nu, dn } }
863 #define _Q1B(n, f, m, g, s, nu, dn) [NVE4_PM_QUERY_##n] = { { { f, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m, 0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g, s }, {}, {}, {} }, 1, NVC0_COUNTER_OPn_SUM, { nu, dn } }
864 #define _M2A(n, f0, m0, g0, s0, f1, m1, g1, s1, o, nu, dn) [NVE4_PM_QUERY_METRIC_##n] = { { \
865 { f0, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m0, 0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g0, s0 }, \
866 { f1, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m1, 0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g1, s1 }, \
867 {}, {}, }, 2, NVC0_COUNTER_OP2_##o, { nu, dn } }
868 #define _M2B(n, f0, m0, g0, s0, f1, m1, g1, s1, o, nu, dn) [NVE4_PM_QUERY_METRIC_##n] = { { \
869 { f0, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m0, 0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g0, s0 }, \
870 { f1, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m1, 0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g1, s1 }, \
871 {}, {}, }, 2, NVC0_COUNTER_OP2_##o, { nu, dn } }
872 #define _M2AB(n, f0, m0, g0, s0, f1, m1, g1, s1, o, nu, dn) [NVE4_PM_QUERY_METRIC_##n] = { { \
873 { f0, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m0, 0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g0, s0 }, \
874 { f1, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m1, 0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g1, s1 }, \
875 {}, {}, }, 2, NVC0_COUNTER_OP2_##o, { nu, dn } }
876
877 /* NOTES:
878 * active_warps: bit 0 alternates btw 0 and 1 for odd nr of warps
879 * inst_executed etc.: we only count a single warp scheduler
880 * metric-ipXc: we simply multiply by 4 to account for the 4 warp schedulers;
881 * this is inaccurate !
882 */
883 static const struct nvc0_mp_pm_query_cfg nve4_mp_pm_queries[] =
884 {
885 _Q1A(PROF_TRIGGER_0, 0x0001, B6, USER, 0x00000000, 1, 1),
886 _Q1A(PROF_TRIGGER_1, 0x0001, B6, USER, 0x00000004, 1, 1),
887 _Q1A(PROF_TRIGGER_2, 0x0001, B6, USER, 0x00000008, 1, 1),
888 _Q1A(PROF_TRIGGER_3, 0x0001, B6, USER, 0x0000000c, 1, 1),
889 _Q1A(PROF_TRIGGER_4, 0x0001, B6, USER, 0x00000010, 1, 1),
890 _Q1A(PROF_TRIGGER_5, 0x0001, B6, USER, 0x00000014, 1, 1),
891 _Q1A(PROF_TRIGGER_6, 0x0001, B6, USER, 0x00000018, 1, 1),
892 _Q1A(PROF_TRIGGER_7, 0x0001, B6, USER, 0x0000001c, 1, 1),
893 _Q1A(LAUNCHED_WARPS, 0x0001, B6, LAUNCH, 0x00000004, 1, 1),
894 _Q1A(LAUNCHED_THREADS, 0x003f, B6, LAUNCH, 0x398a4188, 1, 1),
895 _Q1B(LAUNCHED_CTA, 0x0001, B6, WARP, 0x0000001c, 1, 1),
896 _Q1A(INST_ISSUED1, 0x0001, B6, ISSUE, 0x00000004, 1, 1),
897 _Q1A(INST_ISSUED2, 0x0001, B6, ISSUE, 0x00000008, 1, 1),
898 _Q1A(INST_ISSUED, 0x0003, B6, ISSUE, 0x00000104, 1, 1),
899 _Q1A(INST_EXECUTED, 0x0003, B6, EXEC, 0x00000398, 1, 1),
900 _Q1A(LD_SHARED, 0x0001, B6, LDST, 0x00000000, 1, 1),
901 _Q1A(ST_SHARED, 0x0001, B6, LDST, 0x00000004, 1, 1),
902 _Q1A(LD_LOCAL, 0x0001, B6, LDST, 0x00000008, 1, 1),
903 _Q1A(ST_LOCAL, 0x0001, B6, LDST, 0x0000000c, 1, 1),
904 _Q1A(GLD_REQUEST, 0x0001, B6, LDST, 0x00000010, 1, 1),
905 _Q1A(GST_REQUEST, 0x0001, B6, LDST, 0x00000014, 1, 1),
906 _Q1B(L1_LOCAL_LOAD_HIT, 0x0001, B6, L1, 0x00000000, 1, 1),
907 _Q1B(L1_LOCAL_LOAD_MISS, 0x0001, B6, L1, 0x00000004, 1, 1),
908 _Q1B(L1_LOCAL_STORE_HIT, 0x0001, B6, L1, 0x00000008, 1, 1),
909 _Q1B(L1_LOCAL_STORE_MISS, 0x0001, B6, L1, 0x0000000c, 1, 1),
910 _Q1B(L1_GLOBAL_LOAD_HIT, 0x0001, B6, L1, 0x00000010, 1, 1),
911 _Q1B(L1_GLOBAL_LOAD_MISS, 0x0001, B6, L1, 0x00000014, 1, 1),
912 _Q1B(GLD_TRANSACTIONS_UNCACHED, 0x0001, B6, MEM, 0x00000000, 1, 1),
913 _Q1B(GST_TRANSACTIONS, 0x0001, B6, MEM, 0x00000004, 1, 1),
914 _Q1A(BRANCH, 0x0001, B6, BRANCH, 0x0000000c, 1, 1),
915 _Q1A(BRANCH_DIVERGENT, 0x0001, B6, BRANCH, 0x00000010, 1, 1),
916 _Q1B(ACTIVE_WARPS, 0x003f, B6, WARP, 0x31483104, 2, 1),
917 _Q1B(ACTIVE_CYCLES, 0x0001, B6, WARP, 0x00000000, 1, 1),
918 _Q1A(ATOM_COUNT, 0x0001, B6, BRANCH, 0x00000000, 1, 1),
919 _Q1A(GRED_COUNT, 0x0001, B6, BRANCH, 0x00000008, 1, 1),
920 _Q1B(LD_SHARED_REPLAY, 0x0001, B6, REPLAY, 0x00000008, 1, 1),
921 _Q1B(ST_SHARED_REPLAY, 0x0001, B6, REPLAY, 0x0000000c, 1, 1),
922 _Q1B(LD_LOCAL_TRANSACTIONS, 0x0001, B6, TRANSACTION, 0x00000000, 1, 1),
923 _Q1B(ST_LOCAL_TRANSACTIONS, 0x0001, B6, TRANSACTION, 0x00000004, 1, 1),
924 _Q1B(L1_LD_SHARED_TRANSACTIONS, 0x0001, B6, TRANSACTION, 0x00000008, 1, 1),
925 _Q1B(L1_ST_SHARED_TRANSACTIONS, 0x0001, B6, TRANSACTION, 0x0000000c, 1, 1),
926 _Q1B(GLD_MEM_DIV_REPLAY, 0x0001, B6, REPLAY, 0x00000010, 1, 1),
927 _Q1B(GST_MEM_DIV_REPLAY, 0x0001, B6, REPLAY, 0x00000014, 1, 1),
928 _M2AB(IPC, 0x3, B6, EXEC, 0x398, 0xffff, LOGOP, WARP, 0x0, DIV_SUM_M0, 10, 1),
929 _M2AB(IPAC, 0x3, B6, EXEC, 0x398, 0x1, B6, WARP, 0x0, AVG_DIV_MM, 10, 1),
930 _M2A(IPEC, 0x3, B6, EXEC, 0x398, 0xe, LOGOP, EXEC, 0x398, AVG_DIV_MM, 10, 1),
931 _M2A(INST_REPLAY_OHEAD, 0x3, B6, ISSUE, 0x104, 0x3, B6, EXEC, 0x398, REL_SUM_MM, 100, 1),
932 _M2B(MP_OCCUPANCY, 0x3f, B6, WARP, 0x31483104, 0x01, B6, WARP, 0x0, AVG_DIV_MM, 200, 64),
933 _M2B(MP_EFFICIENCY, 0x01, B6, WARP, 0x0, 0xffff, LOGOP, WARP, 0x0, AVG_DIV_M0, 100, 1),
934 };
935
936 #undef _Q1A
937 #undef _Q1B
938 #undef _M2A
939 #undef _M2B
940
941 /* === PERFORMANCE MONITORING COUNTERS for NVC0:NVE4 === */
942 static const uint64_t nvc0_read_mp_pm_counters_code[] =
943 {
944 /* mov b32 $r8 $tidx
945 * mov b32 $r9 $physid
946 * mov b32 $r0 $pm0
947 * mov b32 $r1 $pm1
948 * mov b32 $r2 $pm2
949 * mov b32 $r3 $pm3
950 * mov b32 $r4 $pm4
951 * mov b32 $r5 $pm5
952 * mov b32 $r6 $pm6
953 * mov b32 $r7 $pm7
954 * set $p0 0x1 eq u32 $r8 0x0
955 * mov b32 $r10 c0[0x0]
956 * mov b32 $r11 c0[0x4]
957 * ext u32 $r8 $r9 0x414
958 * (not $p0) exit
959 * mul $r8 u32 $r8 u32 36
960 * add b32 $r10 $c $r10 $r8
961 * add b32 $r11 $r11 0x0 $c
962 * mov b32 $r8 c0[0x8]
963 * st b128 wt g[$r10d+0x00] $r0q
964 * st b128 wt g[$r10d+0x10] $r4q
965 * st b32 wt g[$r10d+0x20] $r8
966 * exit */
967 0x2c00000084021c04ULL,
968 0x2c0000000c025c04ULL,
969 0x2c00000010001c04ULL,
970 0x2c00000014005c04ULL,
971 0x2c00000018009c04ULL,
972 0x2c0000001c00dc04ULL,
973 0x2c00000020011c04ULL,
974 0x2c00000024015c04ULL,
975 0x2c00000028019c04ULL,
976 0x2c0000002c01dc04ULL,
977 0x190e0000fc81dc03ULL,
978 0x2800400000029de4ULL,
979 0x280040001002dde4ULL,
980 0x7000c01050921c03ULL,
981 0x80000000000021e7ULL,
982 0x1000000090821c02ULL,
983 0x4801000020a29c03ULL,
984 0x0800000000b2dc42ULL,
985 0x2800400020021de4ULL,
986 0x9400000000a01fc5ULL,
987 0x9400000040a11fc5ULL,
988 0x9400000080a21f85ULL,
989 0x8000000000001de7ULL
990 };
991
992 static const char *nvc0_pm_query_names[] =
993 {
994 /* MP counters */
995 "inst_executed",
996 "branch",
997 "divergent_branch",
998 "active_warps",
999 "active_cycles",
1000 "warps_launched",
1001 "threads_launched",
1002 "shared_load",
1003 "shared_store",
1004 "local_load",
1005 "local_store",
1006 "gred_count",
1007 "atom_count",
1008 "gld_request",
1009 "gst_request",
1010 "inst_issued1_0",
1011 "inst_issued1_1",
1012 "inst_issued2_0",
1013 "inst_issued2_1",
1014 "thread_inst_executed_0",
1015 "thread_inst_executed_1",
1016 "thread_inst_executed_2",
1017 "thread_inst_executed_3",
1018 "prof_trigger_00",
1019 "prof_trigger_01",
1020 "prof_trigger_02",
1021 "prof_trigger_03",
1022 "prof_trigger_04",
1023 "prof_trigger_05",
1024 "prof_trigger_06",
1025 "prof_trigger_07",
1026 };
1027
1028 #define _Q(n, f, m, g, c, s0, s1, s2, s3, s4, s5) [NVC0_PM_QUERY_##n] = { { { f, NVC0_COMPUTE_MP_PM_OP_MODE_##m, c, 0, g, s0|(s1 << 8)|(s2 << 16)|(s3 << 24)|(s4##ULL << 32)|(s5##ULL << 40) }, {}, {}, {} }, 1, NVC0_COUNTER_OPn_SUM, { 1, 1 } }
1029
1030 static const struct nvc0_mp_pm_query_cfg nvc0_mp_pm_queries[] =
1031 {
1032 _Q(INST_EXECUTED, 0xaaaa, LOGOP, 0x2d, 3, 0x00, 0x11, 0x22, 0x00, 0x00, 0x00),
1033 _Q(BRANCH, 0xaaaa, LOGOP, 0x1a, 2, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00),
1034 _Q(BRANCH_DIVERGENT, 0xaaaa, LOGOP, 0x19, 2, 0x20, 0x31, 0x00, 0x00, 0x00, 0x00),
1035 _Q(ACTIVE_WARPS, 0xaaaa, LOGOP, 0x24, 6, 0x10, 0x21, 0x32, 0x43, 0x54, 0x65),
1036 _Q(ACTIVE_CYCLES, 0xaaaa, LOGOP, 0x11, 1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00),
1037 _Q(LAUNCHED_WARPS, 0xaaaa, LOGOP, 0x26, 1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00),
1038 _Q(LAUNCHED_THREADS, 0xaaaa, LOGOP, 0x26, 6, 0x10, 0x21, 0x32, 0x43, 0x54, 0x65),
1039 _Q(LD_SHARED, 0xaaaa, LOGOP, 0x64, 1, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00),
1040 _Q(ST_SHARED, 0xaaaa, LOGOP, 0x64, 1, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00),
1041 _Q(LD_LOCAL, 0xaaaa, LOGOP, 0x64, 1, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00),
1042 _Q(ST_LOCAL, 0xaaaa, LOGOP, 0x64, 1, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00),
1043 _Q(GRED_COUNT, 0xaaaa, LOGOP, 0x63, 1, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00),
1044 _Q(ATOM_COUNT, 0xaaaa, LOGOP, 0x63, 1, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00),
1045 _Q(GLD_REQUEST, 0xaaaa, LOGOP, 0x64, 1, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00),
1046 _Q(GST_REQUEST, 0xaaaa, LOGOP, 0x64, 1, 0x60, 0x00, 0x00, 0x00, 0x00, 0x00),
1047 _Q(INST_ISSUED1_0, 0xaaaa, LOGOP, 0x7e, 1, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00),
1048 _Q(INST_ISSUED1_1, 0xaaaa, LOGOP, 0x7e, 1, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00),
1049 _Q(INST_ISSUED2_0, 0xaaaa, LOGOP, 0x7e, 1, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00),
1050 _Q(INST_ISSUED2_1, 0xaaaa, LOGOP, 0x7e, 1, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00),
1051 _Q(TH_INST_EXECUTED_0, 0xaaaa, LOGOP, 0xa3, 6, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55),
1052 _Q(TH_INST_EXECUTED_1, 0xaaaa, LOGOP, 0xa5, 6, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55),
1053 _Q(TH_INST_EXECUTED_2, 0xaaaa, LOGOP, 0xa4, 6, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55),
1054 _Q(TH_INST_EXECUTED_3, 0xaaaa, LOGOP, 0xa6, 6, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55),
1055 _Q(PROF_TRIGGER_0, 0xaaaa, LOGOP, 0x01, 1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00),
1056 _Q(PROF_TRIGGER_1, 0xaaaa, LOGOP, 0x01, 1, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00),
1057 _Q(PROF_TRIGGER_2, 0xaaaa, LOGOP, 0x01, 1, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00),
1058 _Q(PROF_TRIGGER_3, 0xaaaa, LOGOP, 0x01, 1, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00),
1059 _Q(PROF_TRIGGER_4, 0xaaaa, LOGOP, 0x01, 1, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00),
1060 _Q(PROF_TRIGGER_5, 0xaaaa, LOGOP, 0x01, 1, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00),
1061 _Q(PROF_TRIGGER_6, 0xaaaa, LOGOP, 0x01, 1, 0x60, 0x00, 0x00, 0x00, 0x00, 0x00),
1062 _Q(PROF_TRIGGER_7, 0xaaaa, LOGOP, 0x01, 1, 0x70, 0x00, 0x00, 0x00, 0x00, 0x00),
1063 };
1064
1065 #undef _Q
1066
1067 static const struct nvc0_mp_pm_query_cfg *
1068 nvc0_mp_pm_query_get_cfg(struct nvc0_context *nvc0, struct nvc0_query *q)
1069 {
1070 struct nvc0_screen *screen = nvc0->screen;
1071
1072 if (screen->base.class_3d >= NVE4_3D_CLASS)
1073 return &nve4_mp_pm_queries[q->type - PIPE_QUERY_DRIVER_SPECIFIC];
1074 return &nvc0_mp_pm_queries[q->type - NVC0_PM_QUERY(0)];
1075 }
1076
1077 boolean
1078 nvc0_mp_pm_query_begin(struct nvc0_context *nvc0, struct nvc0_query *q)
1079 {
1080 struct nvc0_screen *screen = nvc0->screen;
1081 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
1082 const boolean is_nve4 = screen->base.class_3d >= NVE4_3D_CLASS;
1083 const struct nvc0_mp_pm_query_cfg *cfg;
1084 unsigned i, c;
1085 unsigned num_ab[2] = { 0, 0 };
1086
1087 cfg = nvc0_mp_pm_query_get_cfg(nvc0, q);
1088
1089 /* check if we have enough free counter slots */
1090 for (i = 0; i < cfg->num_counters; ++i)
1091 num_ab[cfg->ctr[i].sig_dom]++;
1092
1093 if (screen->pm.num_mp_pm_active[0] + num_ab[0] > 4 ||
1094 screen->pm.num_mp_pm_active[1] + num_ab[1] > 4) {
1095 NOUVEAU_ERR("Not enough free MP counter slots !\n");
1096 return false;
1097 }
1098
1099 assert(cfg->num_counters <= 4);
1100 PUSH_SPACE(push, 4 * 8 * (is_nve4 ? 1 : 6) + 6);
1101
1102 if (!screen->pm.mp_counters_enabled) {
1103 screen->pm.mp_counters_enabled = TRUE;
1104 BEGIN_NVC0(push, SUBC_SW(0x06ac), 1);
1105 PUSH_DATA (push, 0x1fcb);
1106 }
1107
1108 /* set sequence field to 0 (used to check if result is available) */
1109 for (i = 0; i < screen->mp_count; ++i)
1110 q->data[i * 10 + 10] = 0;
1111
1112 for (i = 0; i < cfg->num_counters; ++i) {
1113 const unsigned d = cfg->ctr[i].sig_dom;
1114
1115 if (!screen->pm.num_mp_pm_active[d]) {
1116 uint32_t m = (1 << 22) | (1 << (7 + (8 * !d)));
1117 if (screen->pm.num_mp_pm_active[!d])
1118 m |= 1 << (7 + (8 * d));
1119 BEGIN_NVC0(push, SUBC_SW(0x0600), 1);
1120 PUSH_DATA (push, m);
1121 }
1122 screen->pm.num_mp_pm_active[d]++;
1123
1124 for (c = d * 4; c < (d * 4 + 4); ++c) {
1125 if (!screen->pm.mp_counter[c]) {
1126 q->ctr[i] = c;
1127 screen->pm.mp_counter[c] = (struct pipe_query *)q;
1128 break;
1129 }
1130 }
1131 assert(c <= (d * 4 + 3)); /* must succeed, already checked for space */
1132
1133 /* configure and reset the counter(s) */
1134 if (is_nve4) {
1135 if (d == 0)
1136 BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_A_SIGSEL(c & 3)), 1);
1137 else
1138 BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_B_SIGSEL(c & 3)), 1);
1139 PUSH_DATA (push, cfg->ctr[i].sig_sel);
1140 BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_SRCSEL(c)), 1);
1141 PUSH_DATA (push, cfg->ctr[i].src_sel + 0x2108421 * (c & 3));
1142 BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_FUNC(c)), 1);
1143 PUSH_DATA (push, (cfg->ctr[i].func << 4) | cfg->ctr[i].mode);
1144 BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_SET(c)), 1);
1145 PUSH_DATA (push, 0);
1146 } else {
1147 unsigned s;
1148
1149 for (s = 0; s < cfg->ctr[i].num_src; s++) {
1150 BEGIN_NVC0(push, NVC0_COMPUTE(MP_PM_SIGSEL(s)), 1);
1151 PUSH_DATA (push, cfg->ctr[i].sig_sel);
1152 BEGIN_NVC0(push, NVC0_COMPUTE(MP_PM_SRCSEL(s)), 1);
1153 PUSH_DATA (push, (cfg->ctr[i].src_sel >> (s * 8)) & 0xff);
1154 BEGIN_NVC0(push, NVC0_COMPUTE(MP_PM_OP(s)), 1);
1155 PUSH_DATA (push, (cfg->ctr[i].func << 4) | cfg->ctr[i].mode);
1156 BEGIN_NVC0(push, NVC0_COMPUTE(MP_PM_SET(s)), 1);
1157 PUSH_DATA (push, 0);
1158 }
1159 }
1160 }
1161 return true;
1162 }
1163
1164 static void
1165 nvc0_mp_pm_query_end(struct nvc0_context *nvc0, struct nvc0_query *q)
1166 {
1167 struct nvc0_screen *screen = nvc0->screen;
1168 struct pipe_context *pipe = &nvc0->base.pipe;
1169 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
1170 const boolean is_nve4 = screen->base.class_3d >= NVE4_3D_CLASS;
1171 uint32_t mask;
1172 uint32_t input[3];
1173 const uint block[3] = { 32, is_nve4 ? 4 : 1, 1 };
1174 const uint grid[3] = { screen->mp_count, 1, 1 };
1175 unsigned c;
1176 const struct nvc0_mp_pm_query_cfg *cfg;
1177
1178 cfg = nvc0_mp_pm_query_get_cfg(nvc0, q);
1179
1180 if (unlikely(!screen->pm.prog)) {
1181 struct nvc0_program *prog = CALLOC_STRUCT(nvc0_program);
1182 prog->type = PIPE_SHADER_COMPUTE;
1183 prog->translated = TRUE;
1184 prog->num_gprs = 14;
1185 prog->parm_size = 12;
1186 if (is_nve4) {
1187 prog->code = (uint32_t *)nve4_read_mp_pm_counters_code;
1188 prog->code_size = sizeof(nve4_read_mp_pm_counters_code);
1189 } else {
1190 prog->code = (uint32_t *)nvc0_read_mp_pm_counters_code;
1191 prog->code_size = sizeof(nvc0_read_mp_pm_counters_code);
1192 }
1193 screen->pm.prog = prog;
1194 }
1195
1196 /* disable all counting */
1197 PUSH_SPACE(push, 8);
1198 for (c = 0; c < 8; ++c)
1199 if (screen->pm.mp_counter[c]) {
1200 if (is_nve4) {
1201 IMMED_NVC0(push, NVE4_COMPUTE(MP_PM_FUNC(c)), 0);
1202 } else {
1203 IMMED_NVC0(push, NVC0_COMPUTE(MP_PM_OP(c)), 0);
1204 }
1205 }
1206 /* release counters for this query */
1207 for (c = 0; c < 8; ++c) {
1208 if (nvc0_query(screen->pm.mp_counter[c]) == q) {
1209 screen->pm.num_mp_pm_active[c / 4]--;
1210 screen->pm.mp_counter[c] = NULL;
1211 }
1212 }
1213
1214 BCTX_REFN_bo(nvc0->bufctx_cp, CP_QUERY, NOUVEAU_BO_GART | NOUVEAU_BO_WR,
1215 q->bo);
1216
1217 PUSH_SPACE(push, 1);
1218 IMMED_NVC0(push, SUBC_COMPUTE(NV50_GRAPH_SERIALIZE), 0);
1219
1220 pipe->bind_compute_state(pipe, screen->pm.prog);
1221 input[0] = (q->bo->offset + q->base);
1222 input[1] = (q->bo->offset + q->base) >> 32;
1223 input[2] = q->sequence;
1224 pipe->launch_grid(pipe, block, grid, 0, input);
1225
1226 nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_QUERY);
1227
1228 /* re-activate other counters */
1229 PUSH_SPACE(push, 16);
1230 mask = 0;
1231 for (c = 0; c < 8; ++c) {
1232 unsigned i;
1233 q = nvc0_query(screen->pm.mp_counter[c]);
1234 if (!q)
1235 continue;
1236 cfg = nvc0_mp_pm_query_get_cfg(nvc0, q);
1237 for (i = 0; i < cfg->num_counters; ++i) {
1238 if (mask & (1 << q->ctr[i]))
1239 break;
1240 mask |= 1 << q->ctr[i];
1241 if (is_nve4) {
1242 BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_FUNC(q->ctr[i])), 1);
1243 } else {
1244 BEGIN_NVC0(push, NVC0_COMPUTE(MP_PM_OP(q->ctr[i])), 1);
1245 }
1246 PUSH_DATA (push, (cfg->ctr[i].func << 4) | cfg->ctr[i].mode);
1247 }
1248 }
1249 }
1250
1251 static INLINE boolean
1252 nvc0_mp_pm_query_read_data(uint32_t count[32][4],
1253 struct nvc0_context *nvc0, boolean wait,
1254 struct nvc0_query *q,
1255 const struct nvc0_mp_pm_query_cfg *cfg,
1256 unsigned mp_count)
1257 {
1258 unsigned p, c;
1259
1260 for (p = 0; p < mp_count; ++p) {
1261 const unsigned b = (0x24 / 4) * p;
1262
1263 for (c = 0; c < cfg->num_counters; ++c) {
1264 if (q->data[b + 8] != q->sequence) {
1265 if (!wait)
1266 return FALSE;
1267 if (nouveau_bo_wait(q->bo, NOUVEAU_BO_RD, nvc0->base.client))
1268 return FALSE;
1269 }
1270 count[p][c] = q->data[b + q->ctr[c]];
1271 }
1272 }
1273 return TRUE;
1274 }
1275
1276 static INLINE boolean
1277 nve4_mp_pm_query_read_data(uint32_t count[32][4],
1278 struct nvc0_context *nvc0, boolean wait,
1279 struct nvc0_query *q,
1280 const struct nvc0_mp_pm_query_cfg *cfg,
1281 unsigned mp_count)
1282 {
1283 unsigned p, c, d;
1284
1285 for (p = 0; p < mp_count; ++p) {
1286 const unsigned b = (0x60 / 4) * p;
1287
1288 for (c = 0; c < cfg->num_counters; ++c) {
1289 count[p][c] = 0;
1290 for (d = 0; d < ((q->ctr[c] & ~3) ? 1 : 4); ++d) {
1291 if (q->data[b + 20 + d] != q->sequence) {
1292 if (!wait)
1293 return FALSE;
1294 if (nouveau_bo_wait(q->bo, NOUVEAU_BO_RD, nvc0->base.client))
1295 return FALSE;
1296 }
1297 if (q->ctr[c] & ~0x3)
1298 count[p][c] = q->data[b + 16 + (q->ctr[c] & 3)];
1299 else
1300 count[p][c] += q->data[b + d * 4 + q->ctr[c]];
1301 }
1302 }
1303 }
1304 return TRUE;
1305 }
1306
1307 /* Metric calculations:
1308 * sum(x) ... sum of x over all MPs
1309 * avg(x) ... average of x over all MPs
1310 *
1311 * IPC : sum(inst_executed) / clock
1312 * INST_REPLAY_OHEAD: (sum(inst_issued) - sum(inst_executed)) / sum(inst_issued)
1313 * MP_OCCUPANCY : avg((active_warps / 64) / active_cycles)
1314 * MP_EFFICIENCY : avg(active_cycles / clock)
1315 *
1316 * NOTE: Interpretation of IPC requires knowledge of MP count.
1317 */
1318 static boolean
1319 nvc0_mp_pm_query_result(struct nvc0_context *nvc0, struct nvc0_query *q,
1320 void *result, boolean wait)
1321 {
1322 uint32_t count[32][4];
1323 uint64_t value = 0;
1324 unsigned mp_count = MIN2(nvc0->screen->mp_count_compute, 32);
1325 unsigned p, c;
1326 const struct nvc0_mp_pm_query_cfg *cfg;
1327 boolean ret;
1328
1329 cfg = nvc0_mp_pm_query_get_cfg(nvc0, q);
1330
1331 if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS)
1332 ret = nve4_mp_pm_query_read_data(count, nvc0, wait, q, cfg, mp_count);
1333 else
1334 ret = nvc0_mp_pm_query_read_data(count, nvc0, wait, q, cfg, mp_count);
1335 if (!ret)
1336 return FALSE;
1337
1338 if (cfg->op == NVC0_COUNTER_OPn_SUM) {
1339 for (c = 0; c < cfg->num_counters; ++c)
1340 for (p = 0; p < mp_count; ++p)
1341 value += count[p][c];
1342 value = (value * cfg->norm[0]) / cfg->norm[1];
1343 } else
1344 if (cfg->op == NVC0_COUNTER_OPn_OR) {
1345 uint32_t v = 0;
1346 for (c = 0; c < cfg->num_counters; ++c)
1347 for (p = 0; p < mp_count; ++p)
1348 v |= count[p][c];
1349 value = ((uint64_t)v * cfg->norm[0]) / cfg->norm[1];
1350 } else
1351 if (cfg->op == NVC0_COUNTER_OPn_AND) {
1352 uint32_t v = ~0;
1353 for (c = 0; c < cfg->num_counters; ++c)
1354 for (p = 0; p < mp_count; ++p)
1355 v &= count[p][c];
1356 value = ((uint64_t)v * cfg->norm[0]) / cfg->norm[1];
1357 } else
1358 if (cfg->op == NVC0_COUNTER_OP2_REL_SUM_MM) {
1359 uint64_t v[2] = { 0, 0 };
1360 for (p = 0; p < mp_count; ++p) {
1361 v[0] += count[p][0];
1362 v[1] += count[p][1];
1363 }
1364 if (v[0])
1365 value = ((v[0] - v[1]) * cfg->norm[0]) / (v[0] * cfg->norm[1]);
1366 } else
1367 if (cfg->op == NVC0_COUNTER_OP2_DIV_SUM_M0) {
1368 for (p = 0; p < mp_count; ++p)
1369 value += count[p][0];
1370 if (count[0][1])
1371 value = (value * cfg->norm[0]) / (count[0][1] * cfg->norm[1]);
1372 else
1373 value = 0;
1374 } else
1375 if (cfg->op == NVC0_COUNTER_OP2_AVG_DIV_MM) {
1376 unsigned mp_used = 0;
1377 for (p = 0; p < mp_count; ++p, mp_used += !!count[p][0])
1378 if (count[p][1])
1379 value += (count[p][0] * cfg->norm[0]) / count[p][1];
1380 if (mp_used)
1381 value /= (uint64_t)mp_used * cfg->norm[1];
1382 } else
1383 if (cfg->op == NVC0_COUNTER_OP2_AVG_DIV_M0) {
1384 unsigned mp_used = 0;
1385 for (p = 0; p < mp_count; ++p, mp_used += !!count[p][0])
1386 value += count[p][0];
1387 if (count[0][1] && mp_used) {
1388 value *= cfg->norm[0];
1389 value /= (uint64_t)count[0][1] * mp_used * cfg->norm[1];
1390 } else {
1391 value = 0;
1392 }
1393 }
1394
1395 *(uint64_t *)result = value;
1396 return TRUE;
1397 }
1398
1399 int
1400 nvc0_screen_get_driver_query_info(struct pipe_screen *pscreen,
1401 unsigned id,
1402 struct pipe_driver_query_info *info)
1403 {
1404 struct nvc0_screen *screen = nvc0_screen(pscreen);
1405 int count = 0;
1406
1407 count += NVC0_QUERY_DRV_STAT_COUNT;
1408
1409 if (screen->base.device->drm_version >= 0x01000101) {
1410 if (screen->base.class_3d >= NVE4_3D_CLASS) {
1411 count += NVE4_PM_QUERY_COUNT;
1412 } else
1413 if (screen->compute) {
1414 count += NVC0_PM_QUERY_COUNT; /* NVC0_COMPUTE is not always enabled */
1415 }
1416 }
1417
1418 if (!info)
1419 return count;
1420
1421 /* Init default values. */
1422 info->name = "this_is_not_the_query_you_are_looking_for";
1423 info->query_type = 0xdeadd01d;
1424 info->max_value.u64 = 0;
1425 info->type = PIPE_DRIVER_QUERY_TYPE_UINT64;
1426 info->group_id = -1;
1427
1428 #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
1429 if (id < NVC0_QUERY_DRV_STAT_COUNT) {
1430 info->name = nvc0_drv_stat_names[id];
1431 info->query_type = NVC0_QUERY_DRV_STAT(id);
1432 info->max_value.u64 = 0;
1433 if (strstr(info->name, "bytes"))
1434 info->type = PIPE_DRIVER_QUERY_TYPE_BYTES;
1435 info->group_id = NVC0_QUERY_DRV_STAT_GROUP;
1436 return 1;
1437 } else
1438 #endif
1439 if (id < count) {
1440 if (screen->base.class_3d >= NVE4_3D_CLASS) {
1441 info->name = nve4_pm_query_names[id - NVC0_QUERY_DRV_STAT_COUNT];
1442 info->query_type = NVE4_PM_QUERY(id - NVC0_QUERY_DRV_STAT_COUNT);
1443 info->max_value.u64 =
1444 (id < NVE4_PM_QUERY_METRIC_MP_OCCUPANCY) ? 0 : 100;
1445 info->group_id = NVC0_QUERY_MP_COUNTER_GROUP;
1446 return 1;
1447 } else
1448 if (screen->compute) {
1449 info->name = nvc0_pm_query_names[id - NVC0_QUERY_DRV_STAT_COUNT];
1450 info->query_type = NVC0_PM_QUERY(id - NVC0_QUERY_DRV_STAT_COUNT);
1451 info->group_id = NVC0_QUERY_MP_COUNTER_GROUP;
1452 return 1;
1453 }
1454 }
1455 /* user asked for info about non-existing query */
1456 return 0;
1457 }
1458
1459 int
1460 nvc0_screen_get_driver_query_group_info(struct pipe_screen *pscreen,
1461 unsigned id,
1462 struct pipe_driver_query_group_info *info)
1463 {
1464 struct nvc0_screen *screen = nvc0_screen(pscreen);
1465 int count = 0;
1466
1467 #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
1468 count++;
1469 #endif
1470
1471 if (screen->base.device->drm_version >= 0x01000101) {
1472 if (screen->base.class_3d >= NVE4_3D_CLASS) {
1473 count++;
1474 } else if (screen->compute) {
1475 count++; /* NVC0_COMPUTE is not always enabled */
1476 }
1477 }
1478
1479 if (!info)
1480 return count;
1481
1482 if (id == NVC0_QUERY_MP_COUNTER_GROUP) {
1483 info->name = "MP counters";
1484 info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_GPU;
1485
1486 if (screen->base.class_3d >= NVE4_3D_CLASS) {
1487 info->num_queries = NVE4_PM_QUERY_COUNT;
1488
1489 /* On NVE4+, each multiprocessor have 8 hardware counters separated
1490 * in two distinct domains, but we allow only one active query
1491 * simultaneously because some of them use more than one hardware
1492 * counter and this will result in an undefined behaviour. */
1493 info->max_active_queries = 1; /* TODO: handle multiple hw counters */
1494 return 1;
1495 } else if (screen->compute) {
1496 info->num_queries = NVC0_PM_QUERY_COUNT;
1497
1498 /* On NVC0:NVE4, each multiprocessor have 8 hardware counters
1499 * in a single domain. */
1500 info->max_active_queries = 8;
1501 return 1;
1502 }
1503 }
1504 #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
1505 else if (id == NVC0_QUERY_DRV_STAT_GROUP) {
1506 info->name = "Driver statistics";
1507 info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_CPU;
1508 info->max_active_queries = NVC0_QUERY_DRV_STAT_COUNT;
1509 info->num_queries = NVC0_QUERY_DRV_STAT_COUNT;
1510 return 1;
1511 }
1512 #endif
1513
1514 /* user asked for info about non-existing query group */
1515 info->name = "this_is_not_the_query_group_you_are_looking_for";
1516 info->max_active_queries = 0;
1517 info->num_queries = 0;
1518 info->type = 0;
1519 return 0;
1520 }
1521
1522 void
1523 nvc0_init_query_functions(struct nvc0_context *nvc0)
1524 {
1525 struct pipe_context *pipe = &nvc0->base.pipe;
1526
1527 pipe->create_query = nvc0_query_create;
1528 pipe->destroy_query = nvc0_query_destroy;
1529 pipe->begin_query = nvc0_query_begin;
1530 pipe->end_query = nvc0_query_end;
1531 pipe->get_query_result = nvc0_query_result;
1532 pipe->render_condition = nvc0_render_condition;
1533 }