nvc0: make names of performance counter queries consistent
[mesa.git] / src / gallium / drivers / nouveau / nvc0 / nvc0_query.c
1 /*
2 * Copyright 2011 Nouveau Project
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Christoph Bumiller
23 */
24
25 #define NVC0_PUSH_EXPLICIT_SPACE_CHECKING
26
27 #include "nvc0/nvc0_context.h"
28 #include "nv_object.xml.h"
29 #include "nvc0/nve4_compute.xml.h"
30 #include "nvc0/nvc0_compute.xml.h"
31
32 #define NVC0_QUERY_STATE_READY 0
33 #define NVC0_QUERY_STATE_ACTIVE 1
34 #define NVC0_QUERY_STATE_ENDED 2
35 #define NVC0_QUERY_STATE_FLUSHED 3
36
37 struct nvc0_query {
38 uint32_t *data;
39 uint16_t type;
40 uint16_t index;
41 int8_t ctr[4];
42 uint32_t sequence;
43 struct nouveau_bo *bo;
44 uint32_t base;
45 uint32_t offset; /* base + i * rotate */
46 uint8_t state;
47 bool is64bit;
48 uint8_t rotate;
49 int nesting; /* only used for occlusion queries */
50 union {
51 struct nouveau_mm_allocation *mm;
52 uint64_t value;
53 } u;
54 struct nouveau_fence *fence;
55 };
56
57 #define NVC0_QUERY_ALLOC_SPACE 256
58
59 static boolean nvc0_mp_pm_query_begin(struct nvc0_context *,
60 struct nvc0_query *);
61 static void nvc0_mp_pm_query_end(struct nvc0_context *, struct nvc0_query *);
62 static boolean nvc0_mp_pm_query_result(struct nvc0_context *,
63 struct nvc0_query *, void *, boolean);
64
65 static inline struct nvc0_query *
66 nvc0_query(struct pipe_query *pipe)
67 {
68 return (struct nvc0_query *)pipe;
69 }
70
71 static bool
72 nvc0_query_allocate(struct nvc0_context *nvc0, struct nvc0_query *q, int size)
73 {
74 struct nvc0_screen *screen = nvc0->screen;
75 int ret;
76
77 if (q->bo) {
78 nouveau_bo_ref(NULL, &q->bo);
79 if (q->u.mm) {
80 if (q->state == NVC0_QUERY_STATE_READY)
81 nouveau_mm_free(q->u.mm);
82 else
83 nouveau_fence_work(screen->base.fence.current,
84 nouveau_mm_free_work, q->u.mm);
85 }
86 }
87 if (size) {
88 q->u.mm = nouveau_mm_allocate(screen->base.mm_GART, size, &q->bo, &q->base);
89 if (!q->bo)
90 return false;
91 q->offset = q->base;
92
93 ret = nouveau_bo_map(q->bo, 0, screen->base.client);
94 if (ret) {
95 nvc0_query_allocate(nvc0, q, 0);
96 return false;
97 }
98 q->data = (uint32_t *)((uint8_t *)q->bo->map + q->base);
99 }
100 return true;
101 }
102
103 static void
104 nvc0_query_destroy(struct pipe_context *pipe, struct pipe_query *pq)
105 {
106 nvc0_query_allocate(nvc0_context(pipe), nvc0_query(pq), 0);
107 nouveau_fence_ref(NULL, &nvc0_query(pq)->fence);
108 FREE(nvc0_query(pq));
109 }
110
111 static struct pipe_query *
112 nvc0_query_create(struct pipe_context *pipe, unsigned type, unsigned index)
113 {
114 struct nvc0_context *nvc0 = nvc0_context(pipe);
115 struct nvc0_query *q;
116 unsigned space = NVC0_QUERY_ALLOC_SPACE;
117
118 q = CALLOC_STRUCT(nvc0_query);
119 if (!q)
120 return NULL;
121
122 switch (type) {
123 case PIPE_QUERY_OCCLUSION_COUNTER:
124 case PIPE_QUERY_OCCLUSION_PREDICATE:
125 q->rotate = 32;
126 space = NVC0_QUERY_ALLOC_SPACE;
127 break;
128 case PIPE_QUERY_PIPELINE_STATISTICS:
129 q->is64bit = true;
130 space = 512;
131 break;
132 case PIPE_QUERY_SO_STATISTICS:
133 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
134 q->is64bit = true;
135 space = 64;
136 break;
137 case PIPE_QUERY_PRIMITIVES_GENERATED:
138 case PIPE_QUERY_PRIMITIVES_EMITTED:
139 q->is64bit = true;
140 q->index = index;
141 space = 32;
142 break;
143 case PIPE_QUERY_TIME_ELAPSED:
144 case PIPE_QUERY_TIMESTAMP:
145 case PIPE_QUERY_TIMESTAMP_DISJOINT:
146 case PIPE_QUERY_GPU_FINISHED:
147 space = 32;
148 break;
149 case NVC0_QUERY_TFB_BUFFER_OFFSET:
150 space = 16;
151 break;
152 default:
153 #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
154 if (type >= NVC0_QUERY_DRV_STAT(0) && type <= NVC0_QUERY_DRV_STAT_LAST) {
155 space = 0;
156 q->is64bit = true;
157 q->index = type - NVC0_QUERY_DRV_STAT(0);
158 break;
159 } else
160 #endif
161 if (nvc0->screen->base.device->drm_version >= 0x01000101) {
162 if (type >= NVE4_PM_QUERY(0) && type <= NVE4_PM_QUERY_LAST) {
163 /* for each MP:
164 * [00] = WS0.C0
165 * [04] = WS0.C1
166 * [08] = WS0.C2
167 * [0c] = WS0.C3
168 * [10] = WS1.C0
169 * [14] = WS1.C1
170 * [18] = WS1.C2
171 * [1c] = WS1.C3
172 * [20] = WS2.C0
173 * [24] = WS2.C1
174 * [28] = WS2.C2
175 * [2c] = WS2.C3
176 * [30] = WS3.C0
177 * [34] = WS3.C1
178 * [38] = WS3.C2
179 * [3c] = WS3.C3
180 * [40] = MP.C4
181 * [44] = MP.C5
182 * [48] = MP.C6
183 * [4c] = MP.C7
184 * [50] = WS0.sequence
185 * [54] = WS1.sequence
186 * [58] = WS2.sequence
187 * [5c] = WS3.sequence
188 */
189 space = (4 * 4 + 4 + 4) * nvc0->screen->mp_count * sizeof(uint32_t);
190 break;
191 } else
192 if (type >= NVC0_PM_QUERY(0) && type <= NVC0_PM_QUERY_LAST) {
193 /* for each MP:
194 * [00] = MP.C0
195 * [04] = MP.C1
196 * [08] = MP.C2
197 * [0c] = MP.C3
198 * [10] = MP.C4
199 * [14] = MP.C5
200 * [18] = MP.C6
201 * [1c] = MP.C7
202 * [20] = MP.sequence
203 */
204 space = (8 + 1) * nvc0->screen->mp_count * sizeof(uint32_t);
205 break;
206 }
207 }
208 debug_printf("invalid query type: %u\n", type);
209 FREE(q);
210 return NULL;
211 }
212 if (!nvc0_query_allocate(nvc0, q, space)) {
213 FREE(q);
214 return NULL;
215 }
216
217 q->type = type;
218
219 if (q->rotate) {
220 /* we advance before query_begin ! */
221 q->offset -= q->rotate;
222 q->data -= q->rotate / sizeof(*q->data);
223 } else
224 if (!q->is64bit)
225 q->data[0] = 0; /* initialize sequence */
226
227 return (struct pipe_query *)q;
228 }
229
230 static void
231 nvc0_query_get(struct nouveau_pushbuf *push, struct nvc0_query *q,
232 unsigned offset, uint32_t get)
233 {
234 offset += q->offset;
235
236 PUSH_SPACE(push, 5);
237 PUSH_REFN (push, q->bo, NOUVEAU_BO_GART | NOUVEAU_BO_WR);
238 BEGIN_NVC0(push, NVC0_3D(QUERY_ADDRESS_HIGH), 4);
239 PUSH_DATAh(push, q->bo->offset + offset);
240 PUSH_DATA (push, q->bo->offset + offset);
241 PUSH_DATA (push, q->sequence);
242 PUSH_DATA (push, get);
243 }
244
245 static void
246 nvc0_query_rotate(struct nvc0_context *nvc0, struct nvc0_query *q)
247 {
248 q->offset += q->rotate;
249 q->data += q->rotate / sizeof(*q->data);
250 if (q->offset - q->base == NVC0_QUERY_ALLOC_SPACE)
251 nvc0_query_allocate(nvc0, q, NVC0_QUERY_ALLOC_SPACE);
252 }
253
254 static boolean
255 nvc0_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
256 {
257 struct nvc0_context *nvc0 = nvc0_context(pipe);
258 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
259 struct nvc0_query *q = nvc0_query(pq);
260 bool ret = true;
261
262 /* For occlusion queries we have to change the storage, because a previous
263 * query might set the initial render conition to false even *after* we re-
264 * initialized it to true.
265 */
266 if (q->rotate) {
267 nvc0_query_rotate(nvc0, q);
268
269 /* XXX: can we do this with the GPU, and sync with respect to a previous
270 * query ?
271 */
272 q->data[0] = q->sequence; /* initialize sequence */
273 q->data[1] = 1; /* initial render condition = true */
274 q->data[4] = q->sequence + 1; /* for comparison COND_MODE */
275 q->data[5] = 0;
276 }
277 q->sequence++;
278
279 switch (q->type) {
280 case PIPE_QUERY_OCCLUSION_COUNTER:
281 case PIPE_QUERY_OCCLUSION_PREDICATE:
282 q->nesting = nvc0->screen->num_occlusion_queries_active++;
283 if (q->nesting) {
284 nvc0_query_get(push, q, 0x10, 0x0100f002);
285 } else {
286 PUSH_SPACE(push, 3);
287 BEGIN_NVC0(push, NVC0_3D(COUNTER_RESET), 1);
288 PUSH_DATA (push, NVC0_3D_COUNTER_RESET_SAMPLECNT);
289 IMMED_NVC0(push, NVC0_3D(SAMPLECNT_ENABLE), 1);
290 }
291 break;
292 case PIPE_QUERY_PRIMITIVES_GENERATED:
293 nvc0_query_get(push, q, 0x10, 0x09005002 | (q->index << 5));
294 break;
295 case PIPE_QUERY_PRIMITIVES_EMITTED:
296 nvc0_query_get(push, q, 0x10, 0x05805002 | (q->index << 5));
297 break;
298 case PIPE_QUERY_SO_STATISTICS:
299 nvc0_query_get(push, q, 0x20, 0x05805002 | (q->index << 5));
300 nvc0_query_get(push, q, 0x30, 0x06805002 | (q->index << 5));
301 break;
302 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
303 nvc0_query_get(push, q, 0x10, 0x03005002 | (q->index << 5));
304 break;
305 case PIPE_QUERY_TIME_ELAPSED:
306 nvc0_query_get(push, q, 0x10, 0x00005002);
307 break;
308 case PIPE_QUERY_PIPELINE_STATISTICS:
309 nvc0_query_get(push, q, 0xc0 + 0x00, 0x00801002); /* VFETCH, VERTICES */
310 nvc0_query_get(push, q, 0xc0 + 0x10, 0x01801002); /* VFETCH, PRIMS */
311 nvc0_query_get(push, q, 0xc0 + 0x20, 0x02802002); /* VP, LAUNCHES */
312 nvc0_query_get(push, q, 0xc0 + 0x30, 0x03806002); /* GP, LAUNCHES */
313 nvc0_query_get(push, q, 0xc0 + 0x40, 0x04806002); /* GP, PRIMS_OUT */
314 nvc0_query_get(push, q, 0xc0 + 0x50, 0x07804002); /* RAST, PRIMS_IN */
315 nvc0_query_get(push, q, 0xc0 + 0x60, 0x08804002); /* RAST, PRIMS_OUT */
316 nvc0_query_get(push, q, 0xc0 + 0x70, 0x0980a002); /* ROP, PIXELS */
317 nvc0_query_get(push, q, 0xc0 + 0x80, 0x0d808002); /* TCP, LAUNCHES */
318 nvc0_query_get(push, q, 0xc0 + 0x90, 0x0e809002); /* TEP, LAUNCHES */
319 break;
320 default:
321 #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
322 if (q->type >= NVC0_QUERY_DRV_STAT(0) &&
323 q->type <= NVC0_QUERY_DRV_STAT_LAST) {
324 if (q->index >= 5)
325 q->u.value = nvc0->screen->base.stats.v[q->index];
326 else
327 q->u.value = 0;
328 } else
329 #endif
330 if ((q->type >= NVE4_PM_QUERY(0) && q->type <= NVE4_PM_QUERY_LAST) ||
331 (q->type >= NVC0_PM_QUERY(0) && q->type <= NVC0_PM_QUERY_LAST)) {
332 ret = nvc0_mp_pm_query_begin(nvc0, q);
333 }
334 break;
335 }
336 q->state = NVC0_QUERY_STATE_ACTIVE;
337 return ret;
338 }
339
340 static void
341 nvc0_query_end(struct pipe_context *pipe, struct pipe_query *pq)
342 {
343 struct nvc0_context *nvc0 = nvc0_context(pipe);
344 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
345 struct nvc0_query *q = nvc0_query(pq);
346
347 if (q->state != NVC0_QUERY_STATE_ACTIVE) {
348 /* some queries don't require 'begin' to be called (e.g. GPU_FINISHED) */
349 if (q->rotate)
350 nvc0_query_rotate(nvc0, q);
351 q->sequence++;
352 }
353 q->state = NVC0_QUERY_STATE_ENDED;
354
355 switch (q->type) {
356 case PIPE_QUERY_OCCLUSION_COUNTER:
357 case PIPE_QUERY_OCCLUSION_PREDICATE:
358 nvc0_query_get(push, q, 0, 0x0100f002);
359 if (--nvc0->screen->num_occlusion_queries_active == 0) {
360 PUSH_SPACE(push, 1);
361 IMMED_NVC0(push, NVC0_3D(SAMPLECNT_ENABLE), 0);
362 }
363 break;
364 case PIPE_QUERY_PRIMITIVES_GENERATED:
365 nvc0_query_get(push, q, 0, 0x09005002 | (q->index << 5));
366 break;
367 case PIPE_QUERY_PRIMITIVES_EMITTED:
368 nvc0_query_get(push, q, 0, 0x05805002 | (q->index << 5));
369 break;
370 case PIPE_QUERY_SO_STATISTICS:
371 nvc0_query_get(push, q, 0x00, 0x05805002 | (q->index << 5));
372 nvc0_query_get(push, q, 0x10, 0x06805002 | (q->index << 5));
373 break;
374 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
375 /* TODO: How do we sum over all streams for render condition ? */
376 /* PRIMS_DROPPED doesn't write sequence, use a ZERO query to sync on */
377 nvc0_query_get(push, q, 0x00, 0x03005002 | (q->index << 5));
378 nvc0_query_get(push, q, 0x20, 0x00005002);
379 break;
380 case PIPE_QUERY_TIMESTAMP:
381 case PIPE_QUERY_TIME_ELAPSED:
382 nvc0_query_get(push, q, 0, 0x00005002);
383 break;
384 case PIPE_QUERY_GPU_FINISHED:
385 nvc0_query_get(push, q, 0, 0x1000f010);
386 break;
387 case PIPE_QUERY_PIPELINE_STATISTICS:
388 nvc0_query_get(push, q, 0x00, 0x00801002); /* VFETCH, VERTICES */
389 nvc0_query_get(push, q, 0x10, 0x01801002); /* VFETCH, PRIMS */
390 nvc0_query_get(push, q, 0x20, 0x02802002); /* VP, LAUNCHES */
391 nvc0_query_get(push, q, 0x30, 0x03806002); /* GP, LAUNCHES */
392 nvc0_query_get(push, q, 0x40, 0x04806002); /* GP, PRIMS_OUT */
393 nvc0_query_get(push, q, 0x50, 0x07804002); /* RAST, PRIMS_IN */
394 nvc0_query_get(push, q, 0x60, 0x08804002); /* RAST, PRIMS_OUT */
395 nvc0_query_get(push, q, 0x70, 0x0980a002); /* ROP, PIXELS */
396 nvc0_query_get(push, q, 0x80, 0x0d808002); /* TCP, LAUNCHES */
397 nvc0_query_get(push, q, 0x90, 0x0e809002); /* TEP, LAUNCHES */
398 break;
399 case NVC0_QUERY_TFB_BUFFER_OFFSET:
400 /* indexed by TFB buffer instead of by vertex stream */
401 nvc0_query_get(push, q, 0x00, 0x0d005002 | (q->index << 5));
402 break;
403 case PIPE_QUERY_TIMESTAMP_DISJOINT:
404 /* This query is not issued on GPU because disjoint is forced to false */
405 q->state = NVC0_QUERY_STATE_READY;
406 break;
407 default:
408 #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
409 if (q->type >= NVC0_QUERY_DRV_STAT(0) &&
410 q->type <= NVC0_QUERY_DRV_STAT_LAST) {
411 q->u.value = nvc0->screen->base.stats.v[q->index] - q->u.value;
412 return;
413 } else
414 #endif
415 if ((q->type >= NVE4_PM_QUERY(0) && q->type <= NVE4_PM_QUERY_LAST) ||
416 (q->type >= NVC0_PM_QUERY(0) && q->type <= NVC0_PM_QUERY_LAST)) {
417 nvc0_mp_pm_query_end(nvc0, q);
418 }
419 break;
420 }
421 if (q->is64bit)
422 nouveau_fence_ref(nvc0->screen->base.fence.current, &q->fence);
423 }
424
425 static inline void
426 nvc0_query_update(struct nouveau_client *cli, struct nvc0_query *q)
427 {
428 if (q->is64bit) {
429 if (nouveau_fence_signalled(q->fence))
430 q->state = NVC0_QUERY_STATE_READY;
431 } else {
432 if (q->data[0] == q->sequence)
433 q->state = NVC0_QUERY_STATE_READY;
434 }
435 }
436
437 static boolean
438 nvc0_query_result(struct pipe_context *pipe, struct pipe_query *pq,
439 boolean wait, union pipe_query_result *result)
440 {
441 struct nvc0_context *nvc0 = nvc0_context(pipe);
442 struct nvc0_query *q = nvc0_query(pq);
443 uint64_t *res64 = (uint64_t*)result;
444 uint32_t *res32 = (uint32_t*)result;
445 uint8_t *res8 = (uint8_t*)result;
446 uint64_t *data64 = (uint64_t *)q->data;
447 unsigned i;
448
449 #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
450 if (q->type >= NVC0_QUERY_DRV_STAT(0) &&
451 q->type <= NVC0_QUERY_DRV_STAT_LAST) {
452 res64[0] = q->u.value;
453 return true;
454 } else
455 #endif
456 if ((q->type >= NVE4_PM_QUERY(0) && q->type <= NVE4_PM_QUERY_LAST) ||
457 (q->type >= NVC0_PM_QUERY(0) && q->type <= NVC0_PM_QUERY_LAST)) {
458 return nvc0_mp_pm_query_result(nvc0, q, result, wait);
459 }
460
461 if (q->state != NVC0_QUERY_STATE_READY)
462 nvc0_query_update(nvc0->screen->base.client, q);
463
464 if (q->state != NVC0_QUERY_STATE_READY) {
465 if (!wait) {
466 if (q->state != NVC0_QUERY_STATE_FLUSHED) {
467 q->state = NVC0_QUERY_STATE_FLUSHED;
468 /* flush for silly apps that spin on GL_QUERY_RESULT_AVAILABLE */
469 PUSH_KICK(nvc0->base.pushbuf);
470 }
471 return false;
472 }
473 if (nouveau_bo_wait(q->bo, NOUVEAU_BO_RD, nvc0->screen->base.client))
474 return false;
475 NOUVEAU_DRV_STAT(&nvc0->screen->base, query_sync_count, 1);
476 }
477 q->state = NVC0_QUERY_STATE_READY;
478
479 switch (q->type) {
480 case PIPE_QUERY_GPU_FINISHED:
481 res8[0] = true;
482 break;
483 case PIPE_QUERY_OCCLUSION_COUNTER: /* u32 sequence, u32 count, u64 time */
484 res64[0] = q->data[1] - q->data[5];
485 break;
486 case PIPE_QUERY_OCCLUSION_PREDICATE:
487 res8[0] = q->data[1] != q->data[5];
488 break;
489 case PIPE_QUERY_PRIMITIVES_GENERATED: /* u64 count, u64 time */
490 case PIPE_QUERY_PRIMITIVES_EMITTED: /* u64 count, u64 time */
491 res64[0] = data64[0] - data64[2];
492 break;
493 case PIPE_QUERY_SO_STATISTICS:
494 res64[0] = data64[0] - data64[4];
495 res64[1] = data64[2] - data64[6];
496 break;
497 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
498 res8[0] = data64[0] != data64[2];
499 break;
500 case PIPE_QUERY_TIMESTAMP:
501 res64[0] = data64[1];
502 break;
503 case PIPE_QUERY_TIMESTAMP_DISJOINT:
504 res64[0] = 1000000000;
505 res8[8] = false;
506 break;
507 case PIPE_QUERY_TIME_ELAPSED:
508 res64[0] = data64[1] - data64[3];
509 break;
510 case PIPE_QUERY_PIPELINE_STATISTICS:
511 for (i = 0; i < 10; ++i)
512 res64[i] = data64[i * 2] - data64[24 + i * 2];
513 break;
514 case NVC0_QUERY_TFB_BUFFER_OFFSET:
515 res32[0] = q->data[1];
516 break;
517 default:
518 assert(0); /* can't happen, we don't create queries with invalid type */
519 return false;
520 }
521
522 return true;
523 }
524
525 void
526 nvc0_query_fifo_wait(struct nouveau_pushbuf *push, struct pipe_query *pq)
527 {
528 struct nvc0_query *q = nvc0_query(pq);
529 unsigned offset = q->offset;
530
531 if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE) offset += 0x20;
532
533 PUSH_SPACE(push, 5);
534 PUSH_REFN (push, q->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
535 BEGIN_NVC0(push, SUBC_3D(NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH), 4);
536 PUSH_DATAh(push, q->bo->offset + offset);
537 PUSH_DATA (push, q->bo->offset + offset);
538 PUSH_DATA (push, q->sequence);
539 PUSH_DATA (push, (1 << 12) |
540 NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_EQUAL);
541 }
542
543 static void
544 nvc0_render_condition(struct pipe_context *pipe,
545 struct pipe_query *pq,
546 boolean condition, uint mode)
547 {
548 struct nvc0_context *nvc0 = nvc0_context(pipe);
549 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
550 struct nvc0_query *q;
551 uint32_t cond;
552 bool wait =
553 mode != PIPE_RENDER_COND_NO_WAIT &&
554 mode != PIPE_RENDER_COND_BY_REGION_NO_WAIT;
555
556 if (!pq) {
557 cond = NVC0_3D_COND_MODE_ALWAYS;
558 }
559 else {
560 q = nvc0_query(pq);
561 /* NOTE: comparison of 2 queries only works if both have completed */
562 switch (q->type) {
563 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
564 cond = condition ? NVC0_3D_COND_MODE_EQUAL :
565 NVC0_3D_COND_MODE_NOT_EQUAL;
566 wait = true;
567 break;
568 case PIPE_QUERY_OCCLUSION_COUNTER:
569 case PIPE_QUERY_OCCLUSION_PREDICATE:
570 if (likely(!condition)) {
571 if (unlikely(q->nesting))
572 cond = wait ? NVC0_3D_COND_MODE_NOT_EQUAL :
573 NVC0_3D_COND_MODE_ALWAYS;
574 else
575 cond = NVC0_3D_COND_MODE_RES_NON_ZERO;
576 } else {
577 cond = wait ? NVC0_3D_COND_MODE_EQUAL : NVC0_3D_COND_MODE_ALWAYS;
578 }
579 break;
580 default:
581 assert(!"render condition query not a predicate");
582 cond = NVC0_3D_COND_MODE_ALWAYS;
583 break;
584 }
585 }
586
587 nvc0->cond_query = pq;
588 nvc0->cond_cond = condition;
589 nvc0->cond_condmode = cond;
590 nvc0->cond_mode = mode;
591
592 if (!pq) {
593 PUSH_SPACE(push, 1);
594 IMMED_NVC0(push, NVC0_3D(COND_MODE), cond);
595 return;
596 }
597
598 if (wait)
599 nvc0_query_fifo_wait(push, pq);
600
601 PUSH_SPACE(push, 7);
602 PUSH_REFN (push, q->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
603 BEGIN_NVC0(push, NVC0_3D(COND_ADDRESS_HIGH), 3);
604 PUSH_DATAh(push, q->bo->offset + q->offset);
605 PUSH_DATA (push, q->bo->offset + q->offset);
606 PUSH_DATA (push, cond);
607 BEGIN_NVC0(push, NVC0_2D(COND_ADDRESS_HIGH), 2);
608 PUSH_DATAh(push, q->bo->offset + q->offset);
609 PUSH_DATA (push, q->bo->offset + q->offset);
610 }
611
612 void
613 nvc0_query_pushbuf_submit(struct nouveau_pushbuf *push,
614 struct pipe_query *pq, unsigned result_offset)
615 {
616 struct nvc0_query *q = nvc0_query(pq);
617
618 #define NVC0_IB_ENTRY_1_NO_PREFETCH (1 << (31 - 8))
619
620 PUSH_REFN(push, q->bo, NOUVEAU_BO_RD | NOUVEAU_BO_GART);
621 nouveau_pushbuf_space(push, 0, 0, 1);
622 nouveau_pushbuf_data(push, q->bo, q->offset + result_offset, 4 |
623 NVC0_IB_ENTRY_1_NO_PREFETCH);
624 }
625
626 void
627 nvc0_so_target_save_offset(struct pipe_context *pipe,
628 struct pipe_stream_output_target *ptarg,
629 unsigned index, bool *serialize)
630 {
631 struct nvc0_so_target *targ = nvc0_so_target(ptarg);
632
633 if (*serialize) {
634 *serialize = false;
635 PUSH_SPACE(nvc0_context(pipe)->base.pushbuf, 1);
636 IMMED_NVC0(nvc0_context(pipe)->base.pushbuf, NVC0_3D(SERIALIZE), 0);
637
638 NOUVEAU_DRV_STAT(nouveau_screen(pipe->screen), gpu_serialize_count, 1);
639 }
640
641 nvc0_query(targ->pq)->index = index;
642
643 nvc0_query_end(pipe, targ->pq);
644 }
645
646
647 /* === DRIVER STATISTICS === */
648
649 #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
650
651 static const char *nvc0_drv_stat_names[] =
652 {
653 "drv-tex_obj_current_count",
654 "drv-tex_obj_current_bytes",
655 "drv-buf_obj_current_count",
656 "drv-buf_obj_current_bytes_vid",
657 "drv-buf_obj_current_bytes_sys",
658 "drv-tex_transfers_rd",
659 "drv-tex_transfers_wr",
660 "drv-tex_copy_count",
661 "drv-tex_blit_count",
662 "drv-tex_cache_flush_count",
663 "drv-buf_transfers_rd",
664 "drv-buf_transfers_wr",
665 "drv-buf_read_bytes_staging_vid",
666 "drv-buf_write_bytes_direct",
667 "drv-buf_write_bytes_staging_vid",
668 "drv-buf_write_bytes_staging_sys",
669 "drv-buf_copy_bytes",
670 "drv-buf_non_kernel_fence_sync_count",
671 "drv-any_non_kernel_fence_sync_count",
672 "drv-query_sync_count",
673 "drv-gpu_serialize_count",
674 "drv-draw_calls_array",
675 "drv-draw_calls_indexed",
676 "drv-draw_calls_fallback_count",
677 "drv-user_buffer_upload_bytes",
678 "drv-constbuf_upload_count",
679 "drv-constbuf_upload_bytes",
680 "drv-pushbuf_count",
681 "drv-resource_validate_count"
682 };
683
684 #endif /* NOUVEAU_ENABLE_DRIVER_STATISTICS */
685
686
687 /* === PERFORMANCE MONITORING COUNTERS for NVE4+ === */
688
689 /* Code to read out MP counters: They are accessible via mmio, too, but let's
690 * just avoid mapping registers in userspace. We'd have to know which MPs are
691 * enabled/present, too, and that information is not presently exposed.
692 * We could add a kernel interface for it, but reading the counters like this
693 * has the advantage of being async (if get_result isn't called immediately).
694 */
695 static const uint64_t nve4_read_mp_pm_counters_code[] =
696 {
697 /* sched 0x20 0x20 0x20 0x20 0x20 0x20 0x20
698 * mov b32 $r8 $tidx
699 * mov b32 $r12 $physid
700 * mov b32 $r0 $pm0
701 * mov b32 $r1 $pm1
702 * mov b32 $r2 $pm2
703 * mov b32 $r3 $pm3
704 * mov b32 $r4 $pm4
705 * sched 0x20 0x20 0x23 0x04 0x20 0x04 0x2b
706 * mov b32 $r5 $pm5
707 * mov b32 $r6 $pm6
708 * mov b32 $r7 $pm7
709 * set $p0 0x1 eq u32 $r8 0x0
710 * mov b32 $r10 c0[0x0]
711 * ext u32 $r8 $r12 0x414
712 * mov b32 $r11 c0[0x4]
713 * sched 0x04 0x2e 0x04 0x20 0x20 0x28 0x04
714 * ext u32 $r9 $r12 0x208
715 * (not $p0) exit
716 * set $p1 0x1 eq u32 $r9 0x0
717 * mul $r8 u32 $r8 u32 96
718 * mul $r12 u32 $r9 u32 16
719 * mul $r13 u32 $r9 u32 4
720 * add b32 $r9 $r8 $r13
721 * sched 0x28 0x04 0x2c 0x04 0x2c 0x04 0x2c
722 * add b32 $r8 $r8 $r12
723 * mov b32 $r12 $r10
724 * add b32 $r10 $c $r10 $r8
725 * mov b32 $r13 $r11
726 * add b32 $r11 $r11 0x0 $c
727 * add b32 $r12 $c $r12 $r9
728 * st b128 wt g[$r10d] $r0q
729 * sched 0x4 0x2c 0x20 0x04 0x2e 0x00 0x00
730 * mov b32 $r0 c0[0x8]
731 * add b32 $r13 $r13 0x0 $c
732 * $p1 st b128 wt g[$r12d+0x40] $r4q
733 * st b32 wt g[$r12d+0x50] $r0
734 * exit */
735 0x2202020202020207ULL,
736 0x2c00000084021c04ULL,
737 0x2c0000000c031c04ULL,
738 0x2c00000010001c04ULL,
739 0x2c00000014005c04ULL,
740 0x2c00000018009c04ULL,
741 0x2c0000001c00dc04ULL,
742 0x2c00000020011c04ULL,
743 0x22b0420042320207ULL,
744 0x2c00000024015c04ULL,
745 0x2c00000028019c04ULL,
746 0x2c0000002c01dc04ULL,
747 0x190e0000fc81dc03ULL,
748 0x2800400000029de4ULL,
749 0x7000c01050c21c03ULL,
750 0x280040001002dde4ULL,
751 0x204282020042e047ULL,
752 0x7000c00820c25c03ULL,
753 0x80000000000021e7ULL,
754 0x190e0000fc93dc03ULL,
755 0x1000000180821c02ULL,
756 0x1000000040931c02ULL,
757 0x1000000010935c02ULL,
758 0x4800000034825c03ULL,
759 0x22c042c042c04287ULL,
760 0x4800000030821c03ULL,
761 0x2800000028031de4ULL,
762 0x4801000020a29c03ULL,
763 0x280000002c035de4ULL,
764 0x0800000000b2dc42ULL,
765 0x4801000024c31c03ULL,
766 0x9400000000a01fc5ULL,
767 0x200002e04202c047ULL,
768 0x2800400020001de4ULL,
769 0x0800000000d35c42ULL,
770 0x9400000100c107c5ULL,
771 0x9400000140c01f85ULL,
772 0x8000000000001de7ULL
773 };
774
775 /* NOTE: intentionally using the same names as NV */
776 static const char *nve4_pm_query_names[] =
777 {
778 /* MP counters */
779 "prof_trigger_00",
780 "prof_trigger_01",
781 "prof_trigger_02",
782 "prof_trigger_03",
783 "prof_trigger_04",
784 "prof_trigger_05",
785 "prof_trigger_06",
786 "prof_trigger_07",
787 "warps_launched",
788 "threads_launched",
789 "sm_cta_launched",
790 "inst_issued1",
791 "inst_issued2",
792 "inst_executed",
793 "local_load",
794 "local_store",
795 "shared_load",
796 "shared_store",
797 "l1_local_load_hit",
798 "l1_local_load_miss",
799 "l1_local_store_hit",
800 "l1_local_store_miss",
801 "gld_request",
802 "gst_request",
803 "l1_global_load_hit",
804 "l1_global_load_miss",
805 "uncached_global_load_transaction",
806 "global_store_transaction",
807 "branch",
808 "divergent_branch",
809 "active_warps",
810 "active_cycles",
811 "inst_issued",
812 "atom_count",
813 "gred_count",
814 "shared_load_replay",
815 "shared_store_replay",
816 "local_load_transactions",
817 "local_store_transactions",
818 "l1_shared_load_transactions",
819 "l1_shared_store_transactions",
820 "global_ld_mem_divergence_replays",
821 "global_st_mem_divergence_replays",
822 /* metrics, i.e. functions of the MP counters */
823 "metric-ipc", /* inst_executed, clock */
824 "metric-ipac", /* inst_executed, active_cycles */
825 "metric-ipec", /* inst_executed, (bool)inst_executed */
826 "metric-achieved_occupancy", /* active_warps, active_cycles */
827 "metric-sm_efficiency", /* active_cycles, clock */
828 "metric-inst_replay_overhead" /* inst_issued, inst_executed */
829 };
830
831 /* For simplicity, we will allocate as many group slots as we allocate counter
832 * slots. This means that a single counter which wants to source from 2 groups
833 * will have to be declared as using 2 counter slots. This shouldn't really be
834 * a problem because such queries don't make much sense ... (unless someone is
835 * really creative).
836 */
837 struct nvc0_mp_counter_cfg
838 {
839 uint32_t func : 16; /* mask or 4-bit logic op (depending on mode) */
840 uint32_t mode : 4; /* LOGOP,B6,LOGOP_B6(_PULSE) */
841 uint32_t num_src : 3; /* number of sources (1 - 6, only for NVC0:NVE4) */
842 uint32_t sig_dom : 1; /* if 0, MP_PM_A (per warp-sched), if 1, MP_PM_B */
843 uint32_t sig_sel : 8; /* signal group */
844 uint64_t src_sel; /* signal selection for up to 6 sources (48 bit) */
845 };
846
847 #define NVC0_COUNTER_OPn_SUM 0
848 #define NVC0_COUNTER_OPn_OR 1
849 #define NVC0_COUNTER_OPn_AND 2
850 #define NVC0_COUNTER_OP2_REL_SUM_MM 3 /* (sum(ctr0) - sum(ctr1)) / sum(ctr0) */
851 #define NVC0_COUNTER_OP2_DIV_SUM_M0 4 /* sum(ctr0) / ctr1 of MP[0]) */
852 #define NVC0_COUNTER_OP2_AVG_DIV_MM 5 /* avg(ctr0 / ctr1) */
853 #define NVC0_COUNTER_OP2_AVG_DIV_M0 6 /* avg(ctr0) / ctr1 of MP[0]) */
854
855 struct nvc0_mp_pm_query_cfg
856 {
857 struct nvc0_mp_counter_cfg ctr[4];
858 uint8_t num_counters;
859 uint8_t op;
860 uint8_t norm[2]; /* normalization num,denom */
861 };
862
863 #define _Q1A(n, f, m, g, s, nu, dn) [NVE4_PM_QUERY_##n] = { { { f, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m, 0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g, s }, {}, {}, {} }, 1, NVC0_COUNTER_OPn_SUM, { nu, dn } }
864 #define _Q1B(n, f, m, g, s, nu, dn) [NVE4_PM_QUERY_##n] = { { { f, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m, 0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g, s }, {}, {}, {} }, 1, NVC0_COUNTER_OPn_SUM, { nu, dn } }
865 #define _M2A(n, f0, m0, g0, s0, f1, m1, g1, s1, o, nu, dn) [NVE4_PM_QUERY_METRIC_##n] = { { \
866 { f0, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m0, 0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g0, s0 }, \
867 { f1, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m1, 0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g1, s1 }, \
868 {}, {}, }, 2, NVC0_COUNTER_OP2_##o, { nu, dn } }
869 #define _M2B(n, f0, m0, g0, s0, f1, m1, g1, s1, o, nu, dn) [NVE4_PM_QUERY_METRIC_##n] = { { \
870 { f0, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m0, 0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g0, s0 }, \
871 { f1, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m1, 0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g1, s1 }, \
872 {}, {}, }, 2, NVC0_COUNTER_OP2_##o, { nu, dn } }
873 #define _M2AB(n, f0, m0, g0, s0, f1, m1, g1, s1, o, nu, dn) [NVE4_PM_QUERY_METRIC_##n] = { { \
874 { f0, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m0, 0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g0, s0 }, \
875 { f1, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m1, 0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g1, s1 }, \
876 {}, {}, }, 2, NVC0_COUNTER_OP2_##o, { nu, dn } }
877
878 /* NOTES:
879 * active_warps: bit 0 alternates btw 0 and 1 for odd nr of warps
880 * inst_executed etc.: we only count a single warp scheduler
881 * metric-ipXc: we simply multiply by 4 to account for the 4 warp schedulers;
882 * this is inaccurate !
883 */
884 static const struct nvc0_mp_pm_query_cfg nve4_mp_pm_queries[] =
885 {
886 _Q1A(PROF_TRIGGER_0, 0x0001, B6, USER, 0x00000000, 1, 1),
887 _Q1A(PROF_TRIGGER_1, 0x0001, B6, USER, 0x00000004, 1, 1),
888 _Q1A(PROF_TRIGGER_2, 0x0001, B6, USER, 0x00000008, 1, 1),
889 _Q1A(PROF_TRIGGER_3, 0x0001, B6, USER, 0x0000000c, 1, 1),
890 _Q1A(PROF_TRIGGER_4, 0x0001, B6, USER, 0x00000010, 1, 1),
891 _Q1A(PROF_TRIGGER_5, 0x0001, B6, USER, 0x00000014, 1, 1),
892 _Q1A(PROF_TRIGGER_6, 0x0001, B6, USER, 0x00000018, 1, 1),
893 _Q1A(PROF_TRIGGER_7, 0x0001, B6, USER, 0x0000001c, 1, 1),
894 _Q1A(WARPS_LAUNCHED, 0x0001, B6, LAUNCH, 0x00000004, 1, 1),
895 _Q1A(THREADS_LAUNCHED, 0x003f, B6, LAUNCH, 0x398a4188, 1, 1),
896 _Q1B(SM_CTA_LAUNCHED, 0x0001, B6, WARP, 0x0000001c, 1, 1),
897 _Q1A(INST_ISSUED1, 0x0001, B6, ISSUE, 0x00000004, 1, 1),
898 _Q1A(INST_ISSUED2, 0x0001, B6, ISSUE, 0x00000008, 1, 1),
899 _Q1A(INST_ISSUED, 0x0003, B6, ISSUE, 0x00000104, 1, 1),
900 _Q1A(INST_EXECUTED, 0x0003, B6, EXEC, 0x00000398, 1, 1),
901 _Q1A(SHARED_LD, 0x0001, B6, LDST, 0x00000000, 1, 1),
902 _Q1A(SHARED_ST, 0x0001, B6, LDST, 0x00000004, 1, 1),
903 _Q1A(LOCAL_LD, 0x0001, B6, LDST, 0x00000008, 1, 1),
904 _Q1A(LOCAL_ST, 0x0001, B6, LDST, 0x0000000c, 1, 1),
905 _Q1A(GLD_REQUEST, 0x0001, B6, LDST, 0x00000010, 1, 1),
906 _Q1A(GST_REQUEST, 0x0001, B6, LDST, 0x00000014, 1, 1),
907 _Q1B(L1_LOCAL_LD_HIT, 0x0001, B6, L1, 0x00000000, 1, 1),
908 _Q1B(L1_LOCAL_LD_MISS, 0x0001, B6, L1, 0x00000004, 1, 1),
909 _Q1B(L1_LOCAL_ST_HIT, 0x0001, B6, L1, 0x00000008, 1, 1),
910 _Q1B(L1_LOCAL_ST_MISS, 0x0001, B6, L1, 0x0000000c, 1, 1),
911 _Q1B(L1_GLD_HIT, 0x0001, B6, L1, 0x00000010, 1, 1),
912 _Q1B(L1_GLD_MISS, 0x0001, B6, L1, 0x00000014, 1, 1),
913 _Q1B(UNCACHED_GLD_TRANSACTIONS, 0x0001, B6, MEM, 0x00000000, 1, 1),
914 _Q1B(GST_TRANSACTIONS, 0x0001, B6, MEM, 0x00000004, 1, 1),
915 _Q1A(BRANCH, 0x0001, B6, BRANCH, 0x0000000c, 1, 1),
916 _Q1A(DIVERGENT_BRANCH, 0x0001, B6, BRANCH, 0x00000010, 1, 1),
917 _Q1B(ACTIVE_WARPS, 0x003f, B6, WARP, 0x31483104, 2, 1),
918 _Q1B(ACTIVE_CYCLES, 0x0001, B6, WARP, 0x00000000, 1, 1),
919 _Q1A(ATOM_COUNT, 0x0001, B6, BRANCH, 0x00000000, 1, 1),
920 _Q1A(GRED_COUNT, 0x0001, B6, BRANCH, 0x00000008, 1, 1),
921 _Q1B(SHARED_LD_REPLAY, 0x0001, B6, REPLAY, 0x00000008, 1, 1),
922 _Q1B(SHARED_ST_REPLAY, 0x0001, B6, REPLAY, 0x0000000c, 1, 1),
923 _Q1B(LOCAL_LD_TRANSACTIONS, 0x0001, B6, TRANSACTION, 0x00000000, 1, 1),
924 _Q1B(LOCAL_ST_TRANSACTIONS, 0x0001, B6, TRANSACTION, 0x00000004, 1, 1),
925 _Q1B(L1_SHARED_LD_TRANSACTIONS, 0x0001, B6, TRANSACTION, 0x00000008, 1, 1),
926 _Q1B(L1_SHARED_ST_TRANSACTIONS, 0x0001, B6, TRANSACTION, 0x0000000c, 1, 1),
927 _Q1B(GLD_MEM_DIV_REPLAY, 0x0001, B6, REPLAY, 0x00000010, 1, 1),
928 _Q1B(GST_MEM_DIV_REPLAY, 0x0001, B6, REPLAY, 0x00000014, 1, 1),
929 _M2AB(IPC, 0x3, B6, EXEC, 0x398, 0xffff, LOGOP, WARP, 0x0, DIV_SUM_M0, 10, 1),
930 _M2AB(IPAC, 0x3, B6, EXEC, 0x398, 0x1, B6, WARP, 0x0, AVG_DIV_MM, 10, 1),
931 _M2A(IPEC, 0x3, B6, EXEC, 0x398, 0xe, LOGOP, EXEC, 0x398, AVG_DIV_MM, 10, 1),
932 _M2A(INST_REPLAY_OHEAD, 0x3, B6, ISSUE, 0x104, 0x3, B6, EXEC, 0x398, REL_SUM_MM, 100, 1),
933 _M2B(MP_OCCUPANCY, 0x3f, B6, WARP, 0x31483104, 0x01, B6, WARP, 0x0, AVG_DIV_MM, 200, 64),
934 _M2B(MP_EFFICIENCY, 0x01, B6, WARP, 0x0, 0xffff, LOGOP, WARP, 0x0, AVG_DIV_M0, 100, 1),
935 };
936
937 #undef _Q1A
938 #undef _Q1B
939 #undef _M2A
940 #undef _M2B
941
942 /* === PERFORMANCE MONITORING COUNTERS for NVC0:NVE4 === */
943 static const uint64_t nvc0_read_mp_pm_counters_code[] =
944 {
945 /* mov b32 $r8 $tidx
946 * mov b32 $r9 $physid
947 * mov b32 $r0 $pm0
948 * mov b32 $r1 $pm1
949 * mov b32 $r2 $pm2
950 * mov b32 $r3 $pm3
951 * mov b32 $r4 $pm4
952 * mov b32 $r5 $pm5
953 * mov b32 $r6 $pm6
954 * mov b32 $r7 $pm7
955 * set $p0 0x1 eq u32 $r8 0x0
956 * mov b32 $r10 c0[0x0]
957 * mov b32 $r11 c0[0x4]
958 * ext u32 $r8 $r9 0x414
959 * (not $p0) exit
960 * mul $r8 u32 $r8 u32 36
961 * add b32 $r10 $c $r10 $r8
962 * add b32 $r11 $r11 0x0 $c
963 * mov b32 $r8 c0[0x8]
964 * st b128 wt g[$r10d+0x00] $r0q
965 * st b128 wt g[$r10d+0x10] $r4q
966 * st b32 wt g[$r10d+0x20] $r8
967 * exit */
968 0x2c00000084021c04ULL,
969 0x2c0000000c025c04ULL,
970 0x2c00000010001c04ULL,
971 0x2c00000014005c04ULL,
972 0x2c00000018009c04ULL,
973 0x2c0000001c00dc04ULL,
974 0x2c00000020011c04ULL,
975 0x2c00000024015c04ULL,
976 0x2c00000028019c04ULL,
977 0x2c0000002c01dc04ULL,
978 0x190e0000fc81dc03ULL,
979 0x2800400000029de4ULL,
980 0x280040001002dde4ULL,
981 0x7000c01050921c03ULL,
982 0x80000000000021e7ULL,
983 0x1000000090821c02ULL,
984 0x4801000020a29c03ULL,
985 0x0800000000b2dc42ULL,
986 0x2800400020021de4ULL,
987 0x9400000000a01fc5ULL,
988 0x9400000040a11fc5ULL,
989 0x9400000080a21f85ULL,
990 0x8000000000001de7ULL
991 };
992
993 static const char *nvc0_pm_query_names[] =
994 {
995 /* MP counters */
996 "inst_executed",
997 "branch",
998 "divergent_branch",
999 "active_warps",
1000 "active_cycles",
1001 "warps_launched",
1002 "threads_launched",
1003 "shared_load",
1004 "shared_store",
1005 "local_load",
1006 "local_store",
1007 "gred_count",
1008 "atom_count",
1009 "gld_request",
1010 "gst_request",
1011 "inst_issued1_0",
1012 "inst_issued1_1",
1013 "inst_issued2_0",
1014 "inst_issued2_1",
1015 "thread_inst_executed_0",
1016 "thread_inst_executed_1",
1017 "thread_inst_executed_2",
1018 "thread_inst_executed_3",
1019 "prof_trigger_00",
1020 "prof_trigger_01",
1021 "prof_trigger_02",
1022 "prof_trigger_03",
1023 "prof_trigger_04",
1024 "prof_trigger_05",
1025 "prof_trigger_06",
1026 "prof_trigger_07",
1027 };
1028
1029 #define _Q(n, f, m, g, c, s0, s1, s2, s3, s4, s5) [NVC0_PM_QUERY_##n] = { { { f, NVC0_COMPUTE_MP_PM_OP_MODE_##m, c, 0, g, s0|(s1 << 8)|(s2 << 16)|(s3 << 24)|(s4##ULL << 32)|(s5##ULL << 40) }, {}, {}, {} }, 1, NVC0_COUNTER_OPn_SUM, { 1, 1 } }
1030
1031 static const struct nvc0_mp_pm_query_cfg nvc0_mp_pm_queries[] =
1032 {
1033 _Q(INST_EXECUTED, 0xaaaa, LOGOP, 0x2d, 3, 0x00, 0x11, 0x22, 0x00, 0x00, 0x00),
1034 _Q(BRANCH, 0xaaaa, LOGOP, 0x1a, 2, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00),
1035 _Q(DIVERGENT_BRANCH, 0xaaaa, LOGOP, 0x19, 2, 0x20, 0x31, 0x00, 0x00, 0x00, 0x00),
1036 _Q(ACTIVE_WARPS, 0xaaaa, LOGOP, 0x24, 6, 0x10, 0x21, 0x32, 0x43, 0x54, 0x65),
1037 _Q(ACTIVE_CYCLES, 0xaaaa, LOGOP, 0x11, 1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00),
1038 _Q(WARPS_LAUNCHED, 0xaaaa, LOGOP, 0x26, 1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00),
1039 _Q(THREADS_LAUNCHED, 0xaaaa, LOGOP, 0x26, 6, 0x10, 0x21, 0x32, 0x43, 0x54, 0x65),
1040 _Q(SHARED_LD, 0xaaaa, LOGOP, 0x64, 1, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00),
1041 _Q(SHARED_ST, 0xaaaa, LOGOP, 0x64, 1, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00),
1042 _Q(LOCAL_LD, 0xaaaa, LOGOP, 0x64, 1, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00),
1043 _Q(LOCAL_ST, 0xaaaa, LOGOP, 0x64, 1, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00),
1044 _Q(GRED_COUNT, 0xaaaa, LOGOP, 0x63, 1, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00),
1045 _Q(ATOM_COUNT, 0xaaaa, LOGOP, 0x63, 1, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00),
1046 _Q(GLD_REQUEST, 0xaaaa, LOGOP, 0x64, 1, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00),
1047 _Q(GST_REQUEST, 0xaaaa, LOGOP, 0x64, 1, 0x60, 0x00, 0x00, 0x00, 0x00, 0x00),
1048 _Q(INST_ISSUED1_0, 0xaaaa, LOGOP, 0x7e, 1, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00),
1049 _Q(INST_ISSUED1_1, 0xaaaa, LOGOP, 0x7e, 1, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00),
1050 _Q(INST_ISSUED2_0, 0xaaaa, LOGOP, 0x7e, 1, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00),
1051 _Q(INST_ISSUED2_1, 0xaaaa, LOGOP, 0x7e, 1, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00),
1052 _Q(TH_INST_EXECUTED_0, 0xaaaa, LOGOP, 0xa3, 6, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55),
1053 _Q(TH_INST_EXECUTED_1, 0xaaaa, LOGOP, 0xa5, 6, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55),
1054 _Q(TH_INST_EXECUTED_2, 0xaaaa, LOGOP, 0xa4, 6, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55),
1055 _Q(TH_INST_EXECUTED_3, 0xaaaa, LOGOP, 0xa6, 6, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55),
1056 _Q(PROF_TRIGGER_0, 0xaaaa, LOGOP, 0x01, 1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00),
1057 _Q(PROF_TRIGGER_1, 0xaaaa, LOGOP, 0x01, 1, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00),
1058 _Q(PROF_TRIGGER_2, 0xaaaa, LOGOP, 0x01, 1, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00),
1059 _Q(PROF_TRIGGER_3, 0xaaaa, LOGOP, 0x01, 1, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00),
1060 _Q(PROF_TRIGGER_4, 0xaaaa, LOGOP, 0x01, 1, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00),
1061 _Q(PROF_TRIGGER_5, 0xaaaa, LOGOP, 0x01, 1, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00),
1062 _Q(PROF_TRIGGER_6, 0xaaaa, LOGOP, 0x01, 1, 0x60, 0x00, 0x00, 0x00, 0x00, 0x00),
1063 _Q(PROF_TRIGGER_7, 0xaaaa, LOGOP, 0x01, 1, 0x70, 0x00, 0x00, 0x00, 0x00, 0x00),
1064 };
1065
1066 #undef _Q
1067
1068 static const struct nvc0_mp_pm_query_cfg *
1069 nvc0_mp_pm_query_get_cfg(struct nvc0_context *nvc0, struct nvc0_query *q)
1070 {
1071 struct nvc0_screen *screen = nvc0->screen;
1072
1073 if (screen->base.class_3d >= NVE4_3D_CLASS)
1074 return &nve4_mp_pm_queries[q->type - PIPE_QUERY_DRIVER_SPECIFIC];
1075 return &nvc0_mp_pm_queries[q->type - NVC0_PM_QUERY(0)];
1076 }
1077
1078 boolean
1079 nvc0_mp_pm_query_begin(struct nvc0_context *nvc0, struct nvc0_query *q)
1080 {
1081 struct nvc0_screen *screen = nvc0->screen;
1082 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
1083 const bool is_nve4 = screen->base.class_3d >= NVE4_3D_CLASS;
1084 const struct nvc0_mp_pm_query_cfg *cfg;
1085 unsigned i, c;
1086 unsigned num_ab[2] = { 0, 0 };
1087
1088 cfg = nvc0_mp_pm_query_get_cfg(nvc0, q);
1089
1090 /* check if we have enough free counter slots */
1091 for (i = 0; i < cfg->num_counters; ++i)
1092 num_ab[cfg->ctr[i].sig_dom]++;
1093
1094 if (screen->pm.num_mp_pm_active[0] + num_ab[0] > 4 ||
1095 screen->pm.num_mp_pm_active[1] + num_ab[1] > 4) {
1096 NOUVEAU_ERR("Not enough free MP counter slots !\n");
1097 return false;
1098 }
1099
1100 assert(cfg->num_counters <= 4);
1101 PUSH_SPACE(push, 4 * 8 * (is_nve4 ? 1 : 6) + 6);
1102
1103 if (!screen->pm.mp_counters_enabled) {
1104 screen->pm.mp_counters_enabled = true;
1105 BEGIN_NVC0(push, SUBC_SW(0x06ac), 1);
1106 PUSH_DATA (push, 0x1fcb);
1107 }
1108
1109 /* set sequence field to 0 (used to check if result is available) */
1110 for (i = 0; i < screen->mp_count; ++i)
1111 q->data[i * 10 + 10] = 0;
1112
1113 for (i = 0; i < cfg->num_counters; ++i) {
1114 const unsigned d = cfg->ctr[i].sig_dom;
1115
1116 if (!screen->pm.num_mp_pm_active[d]) {
1117 uint32_t m = (1 << 22) | (1 << (7 + (8 * !d)));
1118 if (screen->pm.num_mp_pm_active[!d])
1119 m |= 1 << (7 + (8 * d));
1120 BEGIN_NVC0(push, SUBC_SW(0x0600), 1);
1121 PUSH_DATA (push, m);
1122 }
1123 screen->pm.num_mp_pm_active[d]++;
1124
1125 for (c = d * 4; c < (d * 4 + 4); ++c) {
1126 if (!screen->pm.mp_counter[c]) {
1127 q->ctr[i] = c;
1128 screen->pm.mp_counter[c] = (struct pipe_query *)q;
1129 break;
1130 }
1131 }
1132 assert(c <= (d * 4 + 3)); /* must succeed, already checked for space */
1133
1134 /* configure and reset the counter(s) */
1135 if (is_nve4) {
1136 if (d == 0)
1137 BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_A_SIGSEL(c & 3)), 1);
1138 else
1139 BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_B_SIGSEL(c & 3)), 1);
1140 PUSH_DATA (push, cfg->ctr[i].sig_sel);
1141 BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_SRCSEL(c)), 1);
1142 PUSH_DATA (push, cfg->ctr[i].src_sel + 0x2108421 * (c & 3));
1143 BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_FUNC(c)), 1);
1144 PUSH_DATA (push, (cfg->ctr[i].func << 4) | cfg->ctr[i].mode);
1145 BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_SET(c)), 1);
1146 PUSH_DATA (push, 0);
1147 } else {
1148 unsigned s;
1149
1150 for (s = 0; s < cfg->ctr[i].num_src; s++) {
1151 BEGIN_NVC0(push, NVC0_COMPUTE(MP_PM_SIGSEL(s)), 1);
1152 PUSH_DATA (push, cfg->ctr[i].sig_sel);
1153 BEGIN_NVC0(push, NVC0_COMPUTE(MP_PM_SRCSEL(s)), 1);
1154 PUSH_DATA (push, (cfg->ctr[i].src_sel >> (s * 8)) & 0xff);
1155 BEGIN_NVC0(push, NVC0_COMPUTE(MP_PM_OP(s)), 1);
1156 PUSH_DATA (push, (cfg->ctr[i].func << 4) | cfg->ctr[i].mode);
1157 BEGIN_NVC0(push, NVC0_COMPUTE(MP_PM_SET(s)), 1);
1158 PUSH_DATA (push, 0);
1159 }
1160 }
1161 }
1162 return true;
1163 }
1164
1165 static void
1166 nvc0_mp_pm_query_end(struct nvc0_context *nvc0, struct nvc0_query *q)
1167 {
1168 struct nvc0_screen *screen = nvc0->screen;
1169 struct pipe_context *pipe = &nvc0->base.pipe;
1170 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
1171 const bool is_nve4 = screen->base.class_3d >= NVE4_3D_CLASS;
1172 uint32_t mask;
1173 uint32_t input[3];
1174 const uint block[3] = { 32, is_nve4 ? 4 : 1, 1 };
1175 const uint grid[3] = { screen->mp_count, 1, 1 };
1176 unsigned c;
1177 const struct nvc0_mp_pm_query_cfg *cfg;
1178
1179 cfg = nvc0_mp_pm_query_get_cfg(nvc0, q);
1180
1181 if (unlikely(!screen->pm.prog)) {
1182 struct nvc0_program *prog = CALLOC_STRUCT(nvc0_program);
1183 prog->type = PIPE_SHADER_COMPUTE;
1184 prog->translated = true;
1185 prog->num_gprs = 14;
1186 prog->parm_size = 12;
1187 if (is_nve4) {
1188 prog->code = (uint32_t *)nve4_read_mp_pm_counters_code;
1189 prog->code_size = sizeof(nve4_read_mp_pm_counters_code);
1190 } else {
1191 prog->code = (uint32_t *)nvc0_read_mp_pm_counters_code;
1192 prog->code_size = sizeof(nvc0_read_mp_pm_counters_code);
1193 }
1194 screen->pm.prog = prog;
1195 }
1196
1197 /* disable all counting */
1198 PUSH_SPACE(push, 8);
1199 for (c = 0; c < 8; ++c)
1200 if (screen->pm.mp_counter[c]) {
1201 if (is_nve4) {
1202 IMMED_NVC0(push, NVE4_COMPUTE(MP_PM_FUNC(c)), 0);
1203 } else {
1204 IMMED_NVC0(push, NVC0_COMPUTE(MP_PM_OP(c)), 0);
1205 }
1206 }
1207 /* release counters for this query */
1208 for (c = 0; c < 8; ++c) {
1209 if (nvc0_query(screen->pm.mp_counter[c]) == q) {
1210 screen->pm.num_mp_pm_active[c / 4]--;
1211 screen->pm.mp_counter[c] = NULL;
1212 }
1213 }
1214
1215 BCTX_REFN_bo(nvc0->bufctx_cp, CP_QUERY, NOUVEAU_BO_GART | NOUVEAU_BO_WR,
1216 q->bo);
1217
1218 PUSH_SPACE(push, 1);
1219 IMMED_NVC0(push, SUBC_COMPUTE(NV50_GRAPH_SERIALIZE), 0);
1220
1221 pipe->bind_compute_state(pipe, screen->pm.prog);
1222 input[0] = (q->bo->offset + q->base);
1223 input[1] = (q->bo->offset + q->base) >> 32;
1224 input[2] = q->sequence;
1225 pipe->launch_grid(pipe, block, grid, 0, input);
1226
1227 nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_QUERY);
1228
1229 /* re-activate other counters */
1230 PUSH_SPACE(push, 16);
1231 mask = 0;
1232 for (c = 0; c < 8; ++c) {
1233 unsigned i;
1234 q = nvc0_query(screen->pm.mp_counter[c]);
1235 if (!q)
1236 continue;
1237 cfg = nvc0_mp_pm_query_get_cfg(nvc0, q);
1238 for (i = 0; i < cfg->num_counters; ++i) {
1239 if (mask & (1 << q->ctr[i]))
1240 break;
1241 mask |= 1 << q->ctr[i];
1242 if (is_nve4) {
1243 BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_FUNC(q->ctr[i])), 1);
1244 } else {
1245 BEGIN_NVC0(push, NVC0_COMPUTE(MP_PM_OP(q->ctr[i])), 1);
1246 }
1247 PUSH_DATA (push, (cfg->ctr[i].func << 4) | cfg->ctr[i].mode);
1248 }
1249 }
1250 }
1251
1252 static inline bool
1253 nvc0_mp_pm_query_read_data(uint32_t count[32][4],
1254 struct nvc0_context *nvc0, bool wait,
1255 struct nvc0_query *q,
1256 const struct nvc0_mp_pm_query_cfg *cfg,
1257 unsigned mp_count)
1258 {
1259 unsigned p, c;
1260
1261 for (p = 0; p < mp_count; ++p) {
1262 const unsigned b = (0x24 / 4) * p;
1263
1264 for (c = 0; c < cfg->num_counters; ++c) {
1265 if (q->data[b + 8] != q->sequence) {
1266 if (!wait)
1267 return false;
1268 if (nouveau_bo_wait(q->bo, NOUVEAU_BO_RD, nvc0->base.client))
1269 return false;
1270 }
1271 count[p][c] = q->data[b + q->ctr[c]];
1272 }
1273 }
1274 return true;
1275 }
1276
1277 static inline bool
1278 nve4_mp_pm_query_read_data(uint32_t count[32][4],
1279 struct nvc0_context *nvc0, bool wait,
1280 struct nvc0_query *q,
1281 const struct nvc0_mp_pm_query_cfg *cfg,
1282 unsigned mp_count)
1283 {
1284 unsigned p, c, d;
1285
1286 for (p = 0; p < mp_count; ++p) {
1287 const unsigned b = (0x60 / 4) * p;
1288
1289 for (c = 0; c < cfg->num_counters; ++c) {
1290 count[p][c] = 0;
1291 for (d = 0; d < ((q->ctr[c] & ~3) ? 1 : 4); ++d) {
1292 if (q->data[b + 20 + d] != q->sequence) {
1293 if (!wait)
1294 return false;
1295 if (nouveau_bo_wait(q->bo, NOUVEAU_BO_RD, nvc0->base.client))
1296 return false;
1297 }
1298 if (q->ctr[c] & ~0x3)
1299 count[p][c] = q->data[b + 16 + (q->ctr[c] & 3)];
1300 else
1301 count[p][c] += q->data[b + d * 4 + q->ctr[c]];
1302 }
1303 }
1304 }
1305 return true;
1306 }
1307
1308 /* Metric calculations:
1309 * sum(x) ... sum of x over all MPs
1310 * avg(x) ... average of x over all MPs
1311 *
1312 * IPC : sum(inst_executed) / clock
1313 * INST_REPLAY_OHEAD: (sum(inst_issued) - sum(inst_executed)) / sum(inst_issued)
1314 * MP_OCCUPANCY : avg((active_warps / 64) / active_cycles)
1315 * MP_EFFICIENCY : avg(active_cycles / clock)
1316 *
1317 * NOTE: Interpretation of IPC requires knowledge of MP count.
1318 */
1319 static boolean
1320 nvc0_mp_pm_query_result(struct nvc0_context *nvc0, struct nvc0_query *q,
1321 void *result, boolean wait)
1322 {
1323 uint32_t count[32][4];
1324 uint64_t value = 0;
1325 unsigned mp_count = MIN2(nvc0->screen->mp_count_compute, 32);
1326 unsigned p, c;
1327 const struct nvc0_mp_pm_query_cfg *cfg;
1328 bool ret;
1329
1330 cfg = nvc0_mp_pm_query_get_cfg(nvc0, q);
1331
1332 if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS)
1333 ret = nve4_mp_pm_query_read_data(count, nvc0, wait, q, cfg, mp_count);
1334 else
1335 ret = nvc0_mp_pm_query_read_data(count, nvc0, wait, q, cfg, mp_count);
1336 if (!ret)
1337 return false;
1338
1339 if (cfg->op == NVC0_COUNTER_OPn_SUM) {
1340 for (c = 0; c < cfg->num_counters; ++c)
1341 for (p = 0; p < mp_count; ++p)
1342 value += count[p][c];
1343 value = (value * cfg->norm[0]) / cfg->norm[1];
1344 } else
1345 if (cfg->op == NVC0_COUNTER_OPn_OR) {
1346 uint32_t v = 0;
1347 for (c = 0; c < cfg->num_counters; ++c)
1348 for (p = 0; p < mp_count; ++p)
1349 v |= count[p][c];
1350 value = ((uint64_t)v * cfg->norm[0]) / cfg->norm[1];
1351 } else
1352 if (cfg->op == NVC0_COUNTER_OPn_AND) {
1353 uint32_t v = ~0;
1354 for (c = 0; c < cfg->num_counters; ++c)
1355 for (p = 0; p < mp_count; ++p)
1356 v &= count[p][c];
1357 value = ((uint64_t)v * cfg->norm[0]) / cfg->norm[1];
1358 } else
1359 if (cfg->op == NVC0_COUNTER_OP2_REL_SUM_MM) {
1360 uint64_t v[2] = { 0, 0 };
1361 for (p = 0; p < mp_count; ++p) {
1362 v[0] += count[p][0];
1363 v[1] += count[p][1];
1364 }
1365 if (v[0])
1366 value = ((v[0] - v[1]) * cfg->norm[0]) / (v[0] * cfg->norm[1]);
1367 } else
1368 if (cfg->op == NVC0_COUNTER_OP2_DIV_SUM_M0) {
1369 for (p = 0; p < mp_count; ++p)
1370 value += count[p][0];
1371 if (count[0][1])
1372 value = (value * cfg->norm[0]) / (count[0][1] * cfg->norm[1]);
1373 else
1374 value = 0;
1375 } else
1376 if (cfg->op == NVC0_COUNTER_OP2_AVG_DIV_MM) {
1377 unsigned mp_used = 0;
1378 for (p = 0; p < mp_count; ++p, mp_used += !!count[p][0])
1379 if (count[p][1])
1380 value += (count[p][0] * cfg->norm[0]) / count[p][1];
1381 if (mp_used)
1382 value /= (uint64_t)mp_used * cfg->norm[1];
1383 } else
1384 if (cfg->op == NVC0_COUNTER_OP2_AVG_DIV_M0) {
1385 unsigned mp_used = 0;
1386 for (p = 0; p < mp_count; ++p, mp_used += !!count[p][0])
1387 value += count[p][0];
1388 if (count[0][1] && mp_used) {
1389 value *= cfg->norm[0];
1390 value /= (uint64_t)count[0][1] * mp_used * cfg->norm[1];
1391 } else {
1392 value = 0;
1393 }
1394 }
1395
1396 *(uint64_t *)result = value;
1397 return true;
1398 }
1399
1400 int
1401 nvc0_screen_get_driver_query_info(struct pipe_screen *pscreen,
1402 unsigned id,
1403 struct pipe_driver_query_info *info)
1404 {
1405 struct nvc0_screen *screen = nvc0_screen(pscreen);
1406 int count = 0;
1407
1408 count += NVC0_QUERY_DRV_STAT_COUNT;
1409
1410 if (screen->base.device->drm_version >= 0x01000101) {
1411 if (screen->compute) {
1412 if (screen->base.class_3d == NVE4_3D_CLASS) {
1413 count += NVE4_PM_QUERY_COUNT;
1414 } else
1415 if (screen->base.class_3d < NVE4_3D_CLASS) {
1416 /* NVC0_COMPUTE is not always enabled */
1417 count += NVC0_PM_QUERY_COUNT;
1418 }
1419 }
1420 }
1421
1422 if (!info)
1423 return count;
1424
1425 /* Init default values. */
1426 info->name = "this_is_not_the_query_you_are_looking_for";
1427 info->query_type = 0xdeadd01d;
1428 info->max_value.u64 = 0;
1429 info->type = PIPE_DRIVER_QUERY_TYPE_UINT64;
1430 info->group_id = -1;
1431
1432 #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
1433 if (id < NVC0_QUERY_DRV_STAT_COUNT) {
1434 info->name = nvc0_drv_stat_names[id];
1435 info->query_type = NVC0_QUERY_DRV_STAT(id);
1436 info->max_value.u64 = 0;
1437 if (strstr(info->name, "bytes"))
1438 info->type = PIPE_DRIVER_QUERY_TYPE_BYTES;
1439 info->group_id = NVC0_QUERY_DRV_STAT_GROUP;
1440 return 1;
1441 } else
1442 #endif
1443 if (id < count) {
1444 if (screen->compute) {
1445 if (screen->base.class_3d == NVE4_3D_CLASS) {
1446 info->name = nve4_pm_query_names[id - NVC0_QUERY_DRV_STAT_COUNT];
1447 info->query_type = NVE4_PM_QUERY(id - NVC0_QUERY_DRV_STAT_COUNT);
1448 info->max_value.u64 =
1449 (id < NVE4_PM_QUERY_METRIC_MP_OCCUPANCY) ? 0 : 100;
1450 info->group_id = NVC0_QUERY_MP_COUNTER_GROUP;
1451 return 1;
1452 } else
1453 if (screen->base.class_3d < NVE4_3D_CLASS) {
1454 info->name = nvc0_pm_query_names[id - NVC0_QUERY_DRV_STAT_COUNT];
1455 info->query_type = NVC0_PM_QUERY(id - NVC0_QUERY_DRV_STAT_COUNT);
1456 info->group_id = NVC0_QUERY_MP_COUNTER_GROUP;
1457 return 1;
1458 }
1459 }
1460 }
1461 /* user asked for info about non-existing query */
1462 return 0;
1463 }
1464
1465 int
1466 nvc0_screen_get_driver_query_group_info(struct pipe_screen *pscreen,
1467 unsigned id,
1468 struct pipe_driver_query_group_info *info)
1469 {
1470 struct nvc0_screen *screen = nvc0_screen(pscreen);
1471 int count = 0;
1472
1473 #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
1474 count++;
1475 #endif
1476
1477 if (screen->base.device->drm_version >= 0x01000101) {
1478 if (screen->compute) {
1479 if (screen->base.class_3d == NVE4_3D_CLASS) {
1480 count++;
1481 } else
1482 if (screen->base.class_3d < NVE4_3D_CLASS) {
1483 count++; /* NVC0_COMPUTE is not always enabled */
1484 }
1485 }
1486 }
1487
1488 if (!info)
1489 return count;
1490
1491 if (id == NVC0_QUERY_MP_COUNTER_GROUP) {
1492 if (screen->compute) {
1493 info->name = "MP counters";
1494 info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_GPU;
1495
1496 if (screen->base.class_3d == NVE4_3D_CLASS) {
1497 info->num_queries = NVE4_PM_QUERY_COUNT;
1498
1499 /* On NVE4+, each multiprocessor have 8 hardware counters separated
1500 * in two distinct domains, but we allow only one active query
1501 * simultaneously because some of them use more than one hardware
1502 * counter and this will result in an undefined behaviour. */
1503 info->max_active_queries = 1; /* TODO: handle multiple hw counters */
1504 return 1;
1505 } else
1506 if (screen->base.class_3d < NVE4_3D_CLASS) {
1507 info->num_queries = NVC0_PM_QUERY_COUNT;
1508
1509 /* On NVC0:NVE4, each multiprocessor have 8 hardware counters
1510 * in a single domain. */
1511 info->max_active_queries = 8;
1512 return 1;
1513 }
1514 }
1515 }
1516 #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
1517 else if (id == NVC0_QUERY_DRV_STAT_GROUP) {
1518 info->name = "Driver statistics";
1519 info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_CPU;
1520 info->max_active_queries = NVC0_QUERY_DRV_STAT_COUNT;
1521 info->num_queries = NVC0_QUERY_DRV_STAT_COUNT;
1522 return 1;
1523 }
1524 #endif
1525
1526 /* user asked for info about non-existing query group */
1527 info->name = "this_is_not_the_query_group_you_are_looking_for";
1528 info->max_active_queries = 0;
1529 info->num_queries = 0;
1530 info->type = 0;
1531 return 0;
1532 }
1533
1534 void
1535 nvc0_init_query_functions(struct nvc0_context *nvc0)
1536 {
1537 struct pipe_context *pipe = &nvc0->base.pipe;
1538
1539 pipe->create_query = nvc0_query_create;
1540 pipe->destroy_query = nvc0_query_destroy;
1541 pipe->begin_query = nvc0_query_begin;
1542 pipe->end_query = nvc0_query_end;
1543 pipe->get_query_result = nvc0_query_result;
1544 pipe->render_condition = nvc0_render_condition;
1545 }