793425b7b02011dfe7f2ea643ddf78a78fb4676e
[mesa.git] / src / gallium / drivers / nouveau / nvc0 / nvc0_query.c
1 /*
2 * Copyright 2011 Nouveau Project
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Christoph Bumiller
23 */
24
25 #define NVC0_PUSH_EXPLICIT_SPACE_CHECKING
26
27 #include "nvc0/nvc0_context.h"
28 #include "nvc0/nvc0_query.h"
29
30 #include "nv_object.xml.h"
31 #include "nvc0/nve4_compute.xml.h"
32 #include "nvc0/nvc0_compute.xml.h"
33
34 #define NVC0_QUERY_STATE_READY 0
35 #define NVC0_QUERY_STATE_ACTIVE 1
36 #define NVC0_QUERY_STATE_ENDED 2
37 #define NVC0_QUERY_STATE_FLUSHED 3
38
39 #define NVC0_QUERY_ALLOC_SPACE 256
40
41 static boolean nvc0_hw_sm_query_begin(struct nvc0_context *,
42 struct nvc0_query *);
43 static void nvc0_hw_sm_query_end(struct nvc0_context *, struct nvc0_query *);
44 static boolean nvc0_hw_sm_query_result(struct nvc0_context *,
45 struct nvc0_query *, void *, boolean);
46
47 static bool
48 nvc0_query_allocate(struct nvc0_context *nvc0, struct nvc0_query *q, int size)
49 {
50 struct nvc0_screen *screen = nvc0->screen;
51 int ret;
52
53 if (q->bo) {
54 nouveau_bo_ref(NULL, &q->bo);
55 if (q->u.mm) {
56 if (q->state == NVC0_QUERY_STATE_READY)
57 nouveau_mm_free(q->u.mm);
58 else
59 nouveau_fence_work(screen->base.fence.current,
60 nouveau_mm_free_work, q->u.mm);
61 }
62 }
63 if (size) {
64 q->u.mm = nouveau_mm_allocate(screen->base.mm_GART, size, &q->bo, &q->base);
65 if (!q->bo)
66 return false;
67 q->offset = q->base;
68
69 ret = nouveau_bo_map(q->bo, 0, screen->base.client);
70 if (ret) {
71 nvc0_query_allocate(nvc0, q, 0);
72 return false;
73 }
74 q->data = (uint32_t *)((uint8_t *)q->bo->map + q->base);
75 }
76 return true;
77 }
78
79 static void
80 nvc0_query_destroy(struct pipe_context *pipe, struct pipe_query *pq)
81 {
82 nvc0_query_allocate(nvc0_context(pipe), nvc0_query(pq), 0);
83 nouveau_fence_ref(NULL, &nvc0_query(pq)->fence);
84 FREE(nvc0_query(pq));
85 }
86
87 static struct pipe_query *
88 nvc0_query_create(struct pipe_context *pipe, unsigned type, unsigned index)
89 {
90 struct nvc0_context *nvc0 = nvc0_context(pipe);
91 struct nvc0_query *q;
92 unsigned space = NVC0_QUERY_ALLOC_SPACE;
93
94 q = CALLOC_STRUCT(nvc0_query);
95 if (!q)
96 return NULL;
97
98 switch (type) {
99 case PIPE_QUERY_OCCLUSION_COUNTER:
100 case PIPE_QUERY_OCCLUSION_PREDICATE:
101 q->rotate = 32;
102 space = NVC0_QUERY_ALLOC_SPACE;
103 break;
104 case PIPE_QUERY_PIPELINE_STATISTICS:
105 q->is64bit = true;
106 space = 512;
107 break;
108 case PIPE_QUERY_SO_STATISTICS:
109 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
110 q->is64bit = true;
111 space = 64;
112 break;
113 case PIPE_QUERY_PRIMITIVES_GENERATED:
114 case PIPE_QUERY_PRIMITIVES_EMITTED:
115 q->is64bit = true;
116 q->index = index;
117 space = 32;
118 break;
119 case PIPE_QUERY_TIME_ELAPSED:
120 case PIPE_QUERY_TIMESTAMP:
121 case PIPE_QUERY_TIMESTAMP_DISJOINT:
122 case PIPE_QUERY_GPU_FINISHED:
123 space = 32;
124 break;
125 case NVC0_QUERY_TFB_BUFFER_OFFSET:
126 space = 16;
127 break;
128 default:
129 #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
130 if (type >= NVC0_QUERY_DRV_STAT(0) && type <= NVC0_QUERY_DRV_STAT_LAST) {
131 space = 0;
132 q->is64bit = true;
133 q->index = type - NVC0_QUERY_DRV_STAT(0);
134 break;
135 } else
136 #endif
137 if (nvc0->screen->base.device->drm_version >= 0x01000101) {
138 if (type >= NVE4_HW_SM_QUERY(0) && type <= NVE4_HW_SM_QUERY_LAST) {
139 /* for each MP:
140 * [00] = WS0.C0
141 * [04] = WS0.C1
142 * [08] = WS0.C2
143 * [0c] = WS0.C3
144 * [10] = WS1.C0
145 * [14] = WS1.C1
146 * [18] = WS1.C2
147 * [1c] = WS1.C3
148 * [20] = WS2.C0
149 * [24] = WS2.C1
150 * [28] = WS2.C2
151 * [2c] = WS2.C3
152 * [30] = WS3.C0
153 * [34] = WS3.C1
154 * [38] = WS3.C2
155 * [3c] = WS3.C3
156 * [40] = MP.C4
157 * [44] = MP.C5
158 * [48] = MP.C6
159 * [4c] = MP.C7
160 * [50] = WS0.sequence
161 * [54] = WS1.sequence
162 * [58] = WS2.sequence
163 * [5c] = WS3.sequence
164 */
165 space = (4 * 4 + 4 + 4) * nvc0->screen->mp_count * sizeof(uint32_t);
166 break;
167 } else
168 if (type >= NVC0_HW_SM_QUERY(0) && type <= NVC0_HW_SM_QUERY_LAST) {
169 /* for each MP:
170 * [00] = MP.C0
171 * [04] = MP.C1
172 * [08] = MP.C2
173 * [0c] = MP.C3
174 * [10] = MP.C4
175 * [14] = MP.C5
176 * [18] = MP.C6
177 * [1c] = MP.C7
178 * [20] = MP.sequence
179 */
180 space = (8 + 1) * nvc0->screen->mp_count * sizeof(uint32_t);
181 break;
182 }
183 }
184 debug_printf("invalid query type: %u\n", type);
185 FREE(q);
186 return NULL;
187 }
188 if (!nvc0_query_allocate(nvc0, q, space)) {
189 FREE(q);
190 return NULL;
191 }
192
193 q->type = type;
194
195 if (q->rotate) {
196 /* we advance before query_begin ! */
197 q->offset -= q->rotate;
198 q->data -= q->rotate / sizeof(*q->data);
199 } else
200 if (!q->is64bit)
201 q->data[0] = 0; /* initialize sequence */
202
203 return (struct pipe_query *)q;
204 }
205
206 static void
207 nvc0_query_get(struct nouveau_pushbuf *push, struct nvc0_query *q,
208 unsigned offset, uint32_t get)
209 {
210 offset += q->offset;
211
212 PUSH_SPACE(push, 5);
213 PUSH_REFN (push, q->bo, NOUVEAU_BO_GART | NOUVEAU_BO_WR);
214 BEGIN_NVC0(push, NVC0_3D(QUERY_ADDRESS_HIGH), 4);
215 PUSH_DATAh(push, q->bo->offset + offset);
216 PUSH_DATA (push, q->bo->offset + offset);
217 PUSH_DATA (push, q->sequence);
218 PUSH_DATA (push, get);
219 }
220
221 static void
222 nvc0_query_rotate(struct nvc0_context *nvc0, struct nvc0_query *q)
223 {
224 q->offset += q->rotate;
225 q->data += q->rotate / sizeof(*q->data);
226 if (q->offset - q->base == NVC0_QUERY_ALLOC_SPACE)
227 nvc0_query_allocate(nvc0, q, NVC0_QUERY_ALLOC_SPACE);
228 }
229
230 static boolean
231 nvc0_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
232 {
233 struct nvc0_context *nvc0 = nvc0_context(pipe);
234 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
235 struct nvc0_query *q = nvc0_query(pq);
236 bool ret = true;
237
238 /* For occlusion queries we have to change the storage, because a previous
239 * query might set the initial render conition to false even *after* we re-
240 * initialized it to true.
241 */
242 if (q->rotate) {
243 nvc0_query_rotate(nvc0, q);
244
245 /* XXX: can we do this with the GPU, and sync with respect to a previous
246 * query ?
247 */
248 q->data[0] = q->sequence; /* initialize sequence */
249 q->data[1] = 1; /* initial render condition = true */
250 q->data[4] = q->sequence + 1; /* for comparison COND_MODE */
251 q->data[5] = 0;
252 }
253 q->sequence++;
254
255 switch (q->type) {
256 case PIPE_QUERY_OCCLUSION_COUNTER:
257 case PIPE_QUERY_OCCLUSION_PREDICATE:
258 q->nesting = nvc0->screen->num_occlusion_queries_active++;
259 if (q->nesting) {
260 nvc0_query_get(push, q, 0x10, 0x0100f002);
261 } else {
262 PUSH_SPACE(push, 3);
263 BEGIN_NVC0(push, NVC0_3D(COUNTER_RESET), 1);
264 PUSH_DATA (push, NVC0_3D_COUNTER_RESET_SAMPLECNT);
265 IMMED_NVC0(push, NVC0_3D(SAMPLECNT_ENABLE), 1);
266 }
267 break;
268 case PIPE_QUERY_PRIMITIVES_GENERATED:
269 nvc0_query_get(push, q, 0x10, 0x09005002 | (q->index << 5));
270 break;
271 case PIPE_QUERY_PRIMITIVES_EMITTED:
272 nvc0_query_get(push, q, 0x10, 0x05805002 | (q->index << 5));
273 break;
274 case PIPE_QUERY_SO_STATISTICS:
275 nvc0_query_get(push, q, 0x20, 0x05805002 | (q->index << 5));
276 nvc0_query_get(push, q, 0x30, 0x06805002 | (q->index << 5));
277 break;
278 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
279 nvc0_query_get(push, q, 0x10, 0x03005002 | (q->index << 5));
280 break;
281 case PIPE_QUERY_TIME_ELAPSED:
282 nvc0_query_get(push, q, 0x10, 0x00005002);
283 break;
284 case PIPE_QUERY_PIPELINE_STATISTICS:
285 nvc0_query_get(push, q, 0xc0 + 0x00, 0x00801002); /* VFETCH, VERTICES */
286 nvc0_query_get(push, q, 0xc0 + 0x10, 0x01801002); /* VFETCH, PRIMS */
287 nvc0_query_get(push, q, 0xc0 + 0x20, 0x02802002); /* VP, LAUNCHES */
288 nvc0_query_get(push, q, 0xc0 + 0x30, 0x03806002); /* GP, LAUNCHES */
289 nvc0_query_get(push, q, 0xc0 + 0x40, 0x04806002); /* GP, PRIMS_OUT */
290 nvc0_query_get(push, q, 0xc0 + 0x50, 0x07804002); /* RAST, PRIMS_IN */
291 nvc0_query_get(push, q, 0xc0 + 0x60, 0x08804002); /* RAST, PRIMS_OUT */
292 nvc0_query_get(push, q, 0xc0 + 0x70, 0x0980a002); /* ROP, PIXELS */
293 nvc0_query_get(push, q, 0xc0 + 0x80, 0x0d808002); /* TCP, LAUNCHES */
294 nvc0_query_get(push, q, 0xc0 + 0x90, 0x0e809002); /* TEP, LAUNCHES */
295 break;
296 default:
297 #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
298 if (q->type >= NVC0_QUERY_DRV_STAT(0) &&
299 q->type <= NVC0_QUERY_DRV_STAT_LAST) {
300 if (q->index >= 5)
301 q->u.value = nvc0->screen->base.stats.v[q->index];
302 else
303 q->u.value = 0;
304 } else
305 #endif
306 if ((q->type >= NVE4_HW_SM_QUERY(0) && q->type <= NVE4_HW_SM_QUERY_LAST) ||
307 (q->type >= NVC0_HW_SM_QUERY(0) && q->type <= NVC0_HW_SM_QUERY_LAST)) {
308 ret = nvc0_hw_sm_query_begin(nvc0, q);
309 }
310 break;
311 }
312 q->state = NVC0_QUERY_STATE_ACTIVE;
313 return ret;
314 }
315
316 static void
317 nvc0_query_end(struct pipe_context *pipe, struct pipe_query *pq)
318 {
319 struct nvc0_context *nvc0 = nvc0_context(pipe);
320 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
321 struct nvc0_query *q = nvc0_query(pq);
322
323 if (q->state != NVC0_QUERY_STATE_ACTIVE) {
324 /* some queries don't require 'begin' to be called (e.g. GPU_FINISHED) */
325 if (q->rotate)
326 nvc0_query_rotate(nvc0, q);
327 q->sequence++;
328 }
329 q->state = NVC0_QUERY_STATE_ENDED;
330
331 switch (q->type) {
332 case PIPE_QUERY_OCCLUSION_COUNTER:
333 case PIPE_QUERY_OCCLUSION_PREDICATE:
334 nvc0_query_get(push, q, 0, 0x0100f002);
335 if (--nvc0->screen->num_occlusion_queries_active == 0) {
336 PUSH_SPACE(push, 1);
337 IMMED_NVC0(push, NVC0_3D(SAMPLECNT_ENABLE), 0);
338 }
339 break;
340 case PIPE_QUERY_PRIMITIVES_GENERATED:
341 nvc0_query_get(push, q, 0, 0x09005002 | (q->index << 5));
342 break;
343 case PIPE_QUERY_PRIMITIVES_EMITTED:
344 nvc0_query_get(push, q, 0, 0x05805002 | (q->index << 5));
345 break;
346 case PIPE_QUERY_SO_STATISTICS:
347 nvc0_query_get(push, q, 0x00, 0x05805002 | (q->index << 5));
348 nvc0_query_get(push, q, 0x10, 0x06805002 | (q->index << 5));
349 break;
350 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
351 /* TODO: How do we sum over all streams for render condition ? */
352 /* PRIMS_DROPPED doesn't write sequence, use a ZERO query to sync on */
353 nvc0_query_get(push, q, 0x00, 0x03005002 | (q->index << 5));
354 nvc0_query_get(push, q, 0x20, 0x00005002);
355 break;
356 case PIPE_QUERY_TIMESTAMP:
357 case PIPE_QUERY_TIME_ELAPSED:
358 nvc0_query_get(push, q, 0, 0x00005002);
359 break;
360 case PIPE_QUERY_GPU_FINISHED:
361 nvc0_query_get(push, q, 0, 0x1000f010);
362 break;
363 case PIPE_QUERY_PIPELINE_STATISTICS:
364 nvc0_query_get(push, q, 0x00, 0x00801002); /* VFETCH, VERTICES */
365 nvc0_query_get(push, q, 0x10, 0x01801002); /* VFETCH, PRIMS */
366 nvc0_query_get(push, q, 0x20, 0x02802002); /* VP, LAUNCHES */
367 nvc0_query_get(push, q, 0x30, 0x03806002); /* GP, LAUNCHES */
368 nvc0_query_get(push, q, 0x40, 0x04806002); /* GP, PRIMS_OUT */
369 nvc0_query_get(push, q, 0x50, 0x07804002); /* RAST, PRIMS_IN */
370 nvc0_query_get(push, q, 0x60, 0x08804002); /* RAST, PRIMS_OUT */
371 nvc0_query_get(push, q, 0x70, 0x0980a002); /* ROP, PIXELS */
372 nvc0_query_get(push, q, 0x80, 0x0d808002); /* TCP, LAUNCHES */
373 nvc0_query_get(push, q, 0x90, 0x0e809002); /* TEP, LAUNCHES */
374 break;
375 case NVC0_QUERY_TFB_BUFFER_OFFSET:
376 /* indexed by TFB buffer instead of by vertex stream */
377 nvc0_query_get(push, q, 0x00, 0x0d005002 | (q->index << 5));
378 break;
379 case PIPE_QUERY_TIMESTAMP_DISJOINT:
380 /* This query is not issued on GPU because disjoint is forced to false */
381 q->state = NVC0_QUERY_STATE_READY;
382 break;
383 default:
384 #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
385 if (q->type >= NVC0_QUERY_DRV_STAT(0) &&
386 q->type <= NVC0_QUERY_DRV_STAT_LAST) {
387 q->u.value = nvc0->screen->base.stats.v[q->index] - q->u.value;
388 return;
389 } else
390 #endif
391 if ((q->type >= NVE4_HW_SM_QUERY(0) && q->type <= NVE4_HW_SM_QUERY_LAST) ||
392 (q->type >= NVC0_HW_SM_QUERY(0) && q->type <= NVC0_HW_SM_QUERY_LAST)) {
393 nvc0_hw_sm_query_end(nvc0, q);
394 }
395 break;
396 }
397 if (q->is64bit)
398 nouveau_fence_ref(nvc0->screen->base.fence.current, &q->fence);
399 }
400
401 static inline void
402 nvc0_query_update(struct nouveau_client *cli, struct nvc0_query *q)
403 {
404 if (q->is64bit) {
405 if (nouveau_fence_signalled(q->fence))
406 q->state = NVC0_QUERY_STATE_READY;
407 } else {
408 if (q->data[0] == q->sequence)
409 q->state = NVC0_QUERY_STATE_READY;
410 }
411 }
412
413 static boolean
414 nvc0_query_result(struct pipe_context *pipe, struct pipe_query *pq,
415 boolean wait, union pipe_query_result *result)
416 {
417 struct nvc0_context *nvc0 = nvc0_context(pipe);
418 struct nvc0_query *q = nvc0_query(pq);
419 uint64_t *res64 = (uint64_t*)result;
420 uint32_t *res32 = (uint32_t*)result;
421 uint8_t *res8 = (uint8_t*)result;
422 uint64_t *data64 = (uint64_t *)q->data;
423 unsigned i;
424
425 #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
426 if (q->type >= NVC0_QUERY_DRV_STAT(0) &&
427 q->type <= NVC0_QUERY_DRV_STAT_LAST) {
428 res64[0] = q->u.value;
429 return true;
430 } else
431 #endif
432 if ((q->type >= NVE4_HW_SM_QUERY(0) && q->type <= NVE4_HW_SM_QUERY_LAST) ||
433 (q->type >= NVC0_HW_SM_QUERY(0) && q->type <= NVC0_HW_SM_QUERY_LAST)) {
434 return nvc0_hw_sm_query_result(nvc0, q, result, wait);
435 }
436
437 if (q->state != NVC0_QUERY_STATE_READY)
438 nvc0_query_update(nvc0->screen->base.client, q);
439
440 if (q->state != NVC0_QUERY_STATE_READY) {
441 if (!wait) {
442 if (q->state != NVC0_QUERY_STATE_FLUSHED) {
443 q->state = NVC0_QUERY_STATE_FLUSHED;
444 /* flush for silly apps that spin on GL_QUERY_RESULT_AVAILABLE */
445 PUSH_KICK(nvc0->base.pushbuf);
446 }
447 return false;
448 }
449 if (nouveau_bo_wait(q->bo, NOUVEAU_BO_RD, nvc0->screen->base.client))
450 return false;
451 NOUVEAU_DRV_STAT(&nvc0->screen->base, query_sync_count, 1);
452 }
453 q->state = NVC0_QUERY_STATE_READY;
454
455 switch (q->type) {
456 case PIPE_QUERY_GPU_FINISHED:
457 res8[0] = true;
458 break;
459 case PIPE_QUERY_OCCLUSION_COUNTER: /* u32 sequence, u32 count, u64 time */
460 res64[0] = q->data[1] - q->data[5];
461 break;
462 case PIPE_QUERY_OCCLUSION_PREDICATE:
463 res8[0] = q->data[1] != q->data[5];
464 break;
465 case PIPE_QUERY_PRIMITIVES_GENERATED: /* u64 count, u64 time */
466 case PIPE_QUERY_PRIMITIVES_EMITTED: /* u64 count, u64 time */
467 res64[0] = data64[0] - data64[2];
468 break;
469 case PIPE_QUERY_SO_STATISTICS:
470 res64[0] = data64[0] - data64[4];
471 res64[1] = data64[2] - data64[6];
472 break;
473 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
474 res8[0] = data64[0] != data64[2];
475 break;
476 case PIPE_QUERY_TIMESTAMP:
477 res64[0] = data64[1];
478 break;
479 case PIPE_QUERY_TIMESTAMP_DISJOINT:
480 res64[0] = 1000000000;
481 res8[8] = false;
482 break;
483 case PIPE_QUERY_TIME_ELAPSED:
484 res64[0] = data64[1] - data64[3];
485 break;
486 case PIPE_QUERY_PIPELINE_STATISTICS:
487 for (i = 0; i < 10; ++i)
488 res64[i] = data64[i * 2] - data64[24 + i * 2];
489 break;
490 case NVC0_QUERY_TFB_BUFFER_OFFSET:
491 res32[0] = q->data[1];
492 break;
493 default:
494 assert(0); /* can't happen, we don't create queries with invalid type */
495 return false;
496 }
497
498 return true;
499 }
500
501 void
502 nvc0_query_fifo_wait(struct nouveau_pushbuf *push, struct nvc0_query *q)
503 {
504 unsigned offset = q->offset;
505
506 if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE) offset += 0x20;
507
508 PUSH_SPACE(push, 5);
509 PUSH_REFN (push, q->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
510 BEGIN_NVC0(push, SUBC_3D(NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH), 4);
511 PUSH_DATAh(push, q->bo->offset + offset);
512 PUSH_DATA (push, q->bo->offset + offset);
513 PUSH_DATA (push, q->sequence);
514 PUSH_DATA (push, (1 << 12) |
515 NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_EQUAL);
516 }
517
518 static void
519 nvc0_render_condition(struct pipe_context *pipe,
520 struct pipe_query *pq,
521 boolean condition, uint mode)
522 {
523 struct nvc0_context *nvc0 = nvc0_context(pipe);
524 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
525 struct nvc0_query *q;
526 uint32_t cond;
527 bool wait =
528 mode != PIPE_RENDER_COND_NO_WAIT &&
529 mode != PIPE_RENDER_COND_BY_REGION_NO_WAIT;
530
531 if (!pq) {
532 cond = NVC0_3D_COND_MODE_ALWAYS;
533 }
534 else {
535 q = nvc0_query(pq);
536 /* NOTE: comparison of 2 queries only works if both have completed */
537 switch (q->type) {
538 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
539 cond = condition ? NVC0_3D_COND_MODE_EQUAL :
540 NVC0_3D_COND_MODE_NOT_EQUAL;
541 wait = true;
542 break;
543 case PIPE_QUERY_OCCLUSION_COUNTER:
544 case PIPE_QUERY_OCCLUSION_PREDICATE:
545 if (likely(!condition)) {
546 if (unlikely(q->nesting))
547 cond = wait ? NVC0_3D_COND_MODE_NOT_EQUAL :
548 NVC0_3D_COND_MODE_ALWAYS;
549 else
550 cond = NVC0_3D_COND_MODE_RES_NON_ZERO;
551 } else {
552 cond = wait ? NVC0_3D_COND_MODE_EQUAL : NVC0_3D_COND_MODE_ALWAYS;
553 }
554 break;
555 default:
556 assert(!"render condition query not a predicate");
557 cond = NVC0_3D_COND_MODE_ALWAYS;
558 break;
559 }
560 }
561
562 nvc0->cond_query = pq;
563 nvc0->cond_cond = condition;
564 nvc0->cond_condmode = cond;
565 nvc0->cond_mode = mode;
566
567 if (!pq) {
568 PUSH_SPACE(push, 1);
569 IMMED_NVC0(push, NVC0_3D(COND_MODE), cond);
570 return;
571 }
572
573 if (wait)
574 nvc0_query_fifo_wait(push, q);
575
576 PUSH_SPACE(push, 7);
577 PUSH_REFN (push, q->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
578 BEGIN_NVC0(push, NVC0_3D(COND_ADDRESS_HIGH), 3);
579 PUSH_DATAh(push, q->bo->offset + q->offset);
580 PUSH_DATA (push, q->bo->offset + q->offset);
581 PUSH_DATA (push, cond);
582 BEGIN_NVC0(push, NVC0_2D(COND_ADDRESS_HIGH), 2);
583 PUSH_DATAh(push, q->bo->offset + q->offset);
584 PUSH_DATA (push, q->bo->offset + q->offset);
585 }
586
587 void
588 nvc0_query_pushbuf_submit(struct nouveau_pushbuf *push,
589 struct nvc0_query *q, unsigned result_offset)
590 {
591 #define NVC0_IB_ENTRY_1_NO_PREFETCH (1 << (31 - 8))
592
593 PUSH_REFN(push, q->bo, NOUVEAU_BO_RD | NOUVEAU_BO_GART);
594 nouveau_pushbuf_space(push, 0, 0, 1);
595 nouveau_pushbuf_data(push, q->bo, q->offset + result_offset, 4 |
596 NVC0_IB_ENTRY_1_NO_PREFETCH);
597 }
598
599 void
600 nvc0_so_target_save_offset(struct pipe_context *pipe,
601 struct pipe_stream_output_target *ptarg,
602 unsigned index, bool *serialize)
603 {
604 struct nvc0_so_target *targ = nvc0_so_target(ptarg);
605
606 if (*serialize) {
607 *serialize = false;
608 PUSH_SPACE(nvc0_context(pipe)->base.pushbuf, 1);
609 IMMED_NVC0(nvc0_context(pipe)->base.pushbuf, NVC0_3D(SERIALIZE), 0);
610
611 NOUVEAU_DRV_STAT(nouveau_screen(pipe->screen), gpu_serialize_count, 1);
612 }
613
614 nvc0_query(targ->pq)->index = index;
615
616 nvc0_query_end(pipe, targ->pq);
617 }
618
619
620 /* === DRIVER STATISTICS === */
621
622 #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
623
624 static const char *nvc0_drv_stat_names[] =
625 {
626 "drv-tex_obj_current_count",
627 "drv-tex_obj_current_bytes",
628 "drv-buf_obj_current_count",
629 "drv-buf_obj_current_bytes_vid",
630 "drv-buf_obj_current_bytes_sys",
631 "drv-tex_transfers_rd",
632 "drv-tex_transfers_wr",
633 "drv-tex_copy_count",
634 "drv-tex_blit_count",
635 "drv-tex_cache_flush_count",
636 "drv-buf_transfers_rd",
637 "drv-buf_transfers_wr",
638 "drv-buf_read_bytes_staging_vid",
639 "drv-buf_write_bytes_direct",
640 "drv-buf_write_bytes_staging_vid",
641 "drv-buf_write_bytes_staging_sys",
642 "drv-buf_copy_bytes",
643 "drv-buf_non_kernel_fence_sync_count",
644 "drv-any_non_kernel_fence_sync_count",
645 "drv-query_sync_count",
646 "drv-gpu_serialize_count",
647 "drv-draw_calls_array",
648 "drv-draw_calls_indexed",
649 "drv-draw_calls_fallback_count",
650 "drv-user_buffer_upload_bytes",
651 "drv-constbuf_upload_count",
652 "drv-constbuf_upload_bytes",
653 "drv-pushbuf_count",
654 "drv-resource_validate_count"
655 };
656
657 #endif /* NOUVEAU_ENABLE_DRIVER_STATISTICS */
658
659
660 /* === PERFORMANCE MONITORING COUNTERS for NVE4+ === */
661
662 /* Code to read out MP counters: They are accessible via mmio, too, but let's
663 * just avoid mapping registers in userspace. We'd have to know which MPs are
664 * enabled/present, too, and that information is not presently exposed.
665 * We could add a kernel interface for it, but reading the counters like this
666 * has the advantage of being async (if get_result isn't called immediately).
667 */
668 static const uint64_t nve4_read_hw_sm_counters_code[] =
669 {
670 /* sched 0x20 0x20 0x20 0x20 0x20 0x20 0x20
671 * mov b32 $r8 $tidx
672 * mov b32 $r12 $physid
673 * mov b32 $r0 $pm0
674 * mov b32 $r1 $pm1
675 * mov b32 $r2 $pm2
676 * mov b32 $r3 $pm3
677 * mov b32 $r4 $pm4
678 * sched 0x20 0x20 0x23 0x04 0x20 0x04 0x2b
679 * mov b32 $r5 $pm5
680 * mov b32 $r6 $pm6
681 * mov b32 $r7 $pm7
682 * set $p0 0x1 eq u32 $r8 0x0
683 * mov b32 $r10 c0[0x0]
684 * ext u32 $r8 $r12 0x414
685 * mov b32 $r11 c0[0x4]
686 * sched 0x04 0x2e 0x04 0x20 0x20 0x28 0x04
687 * ext u32 $r9 $r12 0x208
688 * (not $p0) exit
689 * set $p1 0x1 eq u32 $r9 0x0
690 * mul $r8 u32 $r8 u32 96
691 * mul $r12 u32 $r9 u32 16
692 * mul $r13 u32 $r9 u32 4
693 * add b32 $r9 $r8 $r13
694 * sched 0x28 0x04 0x2c 0x04 0x2c 0x04 0x2c
695 * add b32 $r8 $r8 $r12
696 * mov b32 $r12 $r10
697 * add b32 $r10 $c $r10 $r8
698 * mov b32 $r13 $r11
699 * add b32 $r11 $r11 0x0 $c
700 * add b32 $r12 $c $r12 $r9
701 * st b128 wt g[$r10d] $r0q
702 * sched 0x4 0x2c 0x20 0x04 0x2e 0x00 0x00
703 * mov b32 $r0 c0[0x8]
704 * add b32 $r13 $r13 0x0 $c
705 * $p1 st b128 wt g[$r12d+0x40] $r4q
706 * st b32 wt g[$r12d+0x50] $r0
707 * exit */
708 0x2202020202020207ULL,
709 0x2c00000084021c04ULL,
710 0x2c0000000c031c04ULL,
711 0x2c00000010001c04ULL,
712 0x2c00000014005c04ULL,
713 0x2c00000018009c04ULL,
714 0x2c0000001c00dc04ULL,
715 0x2c00000020011c04ULL,
716 0x22b0420042320207ULL,
717 0x2c00000024015c04ULL,
718 0x2c00000028019c04ULL,
719 0x2c0000002c01dc04ULL,
720 0x190e0000fc81dc03ULL,
721 0x2800400000029de4ULL,
722 0x7000c01050c21c03ULL,
723 0x280040001002dde4ULL,
724 0x204282020042e047ULL,
725 0x7000c00820c25c03ULL,
726 0x80000000000021e7ULL,
727 0x190e0000fc93dc03ULL,
728 0x1000000180821c02ULL,
729 0x1000000040931c02ULL,
730 0x1000000010935c02ULL,
731 0x4800000034825c03ULL,
732 0x22c042c042c04287ULL,
733 0x4800000030821c03ULL,
734 0x2800000028031de4ULL,
735 0x4801000020a29c03ULL,
736 0x280000002c035de4ULL,
737 0x0800000000b2dc42ULL,
738 0x4801000024c31c03ULL,
739 0x9400000000a01fc5ULL,
740 0x200002e04202c047ULL,
741 0x2800400020001de4ULL,
742 0x0800000000d35c42ULL,
743 0x9400000100c107c5ULL,
744 0x9400000140c01f85ULL,
745 0x8000000000001de7ULL
746 };
747
748 /* NOTE: intentionally using the same names as NV */
749 static const char *nve4_pm_query_names[] =
750 {
751 /* MP counters */
752 "active_cycles",
753 "active_warps",
754 "atom_count",
755 "branch",
756 "divergent_branch",
757 "gld_request",
758 "global_ld_mem_divergence_replays",
759 "global_store_transaction",
760 "global_st_mem_divergence_replays",
761 "gred_count",
762 "gst_request",
763 "inst_executed",
764 "inst_issued",
765 "inst_issued1",
766 "inst_issued2",
767 "l1_global_load_hit",
768 "l1_global_load_miss",
769 "l1_local_load_hit",
770 "l1_local_load_miss",
771 "l1_local_store_hit",
772 "l1_local_store_miss",
773 "l1_shared_load_transactions",
774 "l1_shared_store_transactions",
775 "local_load",
776 "local_load_transactions",
777 "local_store",
778 "local_store_transactions",
779 "prof_trigger_00",
780 "prof_trigger_01",
781 "prof_trigger_02",
782 "prof_trigger_03",
783 "prof_trigger_04",
784 "prof_trigger_05",
785 "prof_trigger_06",
786 "prof_trigger_07",
787 "shared_load",
788 "shared_load_replay",
789 "shared_store",
790 "shared_store_replay",
791 "sm_cta_launched",
792 "threads_launched",
793 "uncached_global_load_transaction",
794 "warps_launched",
795 /* metrics, i.e. functions of the MP counters */
796 "metric-ipc", /* inst_executed, clock */
797 "metric-ipac", /* inst_executed, active_cycles */
798 "metric-ipec", /* inst_executed, (bool)inst_executed */
799 "metric-achieved_occupancy", /* active_warps, active_cycles */
800 "metric-sm_efficiency", /* active_cycles, clock */
801 "metric-inst_replay_overhead" /* inst_issued, inst_executed */
802 };
803
804 /* For simplicity, we will allocate as many group slots as we allocate counter
805 * slots. This means that a single counter which wants to source from 2 groups
806 * will have to be declared as using 2 counter slots. This shouldn't really be
807 * a problem because such queries don't make much sense ... (unless someone is
808 * really creative).
809 */
810 struct nvc0_mp_counter_cfg
811 {
812 uint32_t func : 16; /* mask or 4-bit logic op (depending on mode) */
813 uint32_t mode : 4; /* LOGOP,B6,LOGOP_B6(_PULSE) */
814 uint32_t num_src : 3; /* number of sources (1 - 6, only for NVC0:NVE4) */
815 uint32_t sig_dom : 1; /* if 0, MP_PM_A (per warp-sched), if 1, MP_PM_B */
816 uint32_t sig_sel : 8; /* signal group */
817 uint64_t src_sel; /* signal selection for up to 6 sources (48 bit) */
818 };
819
820 #define NVC0_COUNTER_OPn_SUM 0
821 #define NVC0_COUNTER_OPn_OR 1
822 #define NVC0_COUNTER_OPn_AND 2
823 #define NVC0_COUNTER_OP2_REL_SUM_MM 3 /* (sum(ctr0) - sum(ctr1)) / sum(ctr0) */
824 #define NVC0_COUNTER_OP2_DIV_SUM_M0 4 /* sum(ctr0) / ctr1 of MP[0]) */
825 #define NVC0_COUNTER_OP2_AVG_DIV_MM 5 /* avg(ctr0 / ctr1) */
826 #define NVC0_COUNTER_OP2_AVG_DIV_M0 6 /* avg(ctr0) / ctr1 of MP[0]) */
827
828 struct nvc0_hw_sm_query_cfg
829 {
830 struct nvc0_mp_counter_cfg ctr[4];
831 uint8_t num_counters;
832 uint8_t op;
833 uint8_t norm[2]; /* normalization num,denom */
834 };
835
836 #define _Q1A(n, f, m, g, s, nu, dn) [NVE4_HW_SM_QUERY_##n] = { { { f, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m, 0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g, s }, {}, {}, {} }, 1, NVC0_COUNTER_OPn_SUM, { nu, dn } }
837 #define _Q1B(n, f, m, g, s, nu, dn) [NVE4_HW_SM_QUERY_##n] = { { { f, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m, 0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g, s }, {}, {}, {} }, 1, NVC0_COUNTER_OPn_SUM, { nu, dn } }
838 #define _M2A(n, f0, m0, g0, s0, f1, m1, g1, s1, o, nu, dn) [NVE4_HW_SM_QUERY_METRIC_##n] = { { \
839 { f0, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m0, 0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g0, s0 }, \
840 { f1, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m1, 0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g1, s1 }, \
841 {}, {}, }, 2, NVC0_COUNTER_OP2_##o, { nu, dn } }
842 #define _M2B(n, f0, m0, g0, s0, f1, m1, g1, s1, o, nu, dn) [NVE4_HW_SM_QUERY_METRIC_##n] = { { \
843 { f0, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m0, 0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g0, s0 }, \
844 { f1, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m1, 0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g1, s1 }, \
845 {}, {}, }, 2, NVC0_COUNTER_OP2_##o, { nu, dn } }
846 #define _M2AB(n, f0, m0, g0, s0, f1, m1, g1, s1, o, nu, dn) [NVE4_HW_SM_QUERY_METRIC_##n] = { { \
847 { f0, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m0, 0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g0, s0 }, \
848 { f1, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m1, 0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g1, s1 }, \
849 {}, {}, }, 2, NVC0_COUNTER_OP2_##o, { nu, dn } }
850
851 /* NOTES:
852 * active_warps: bit 0 alternates btw 0 and 1 for odd nr of warps
853 * inst_executed etc.: we only count a single warp scheduler
854 * metric-ipXc: we simply multiply by 4 to account for the 4 warp schedulers;
855 * this is inaccurate !
856 */
857 static const struct nvc0_hw_sm_query_cfg nve4_hw_sm_queries[] =
858 {
859 _Q1B(ACTIVE_CYCLES, 0x0001, B6, WARP, 0x00000000, 1, 1),
860 _Q1B(ACTIVE_WARPS, 0x003f, B6, WARP, 0x31483104, 2, 1),
861 _Q1A(ATOM_COUNT, 0x0001, B6, BRANCH, 0x00000000, 1, 1),
862 _Q1A(BRANCH, 0x0001, B6, BRANCH, 0x0000000c, 1, 1),
863 _Q1A(DIVERGENT_BRANCH, 0x0001, B6, BRANCH, 0x00000010, 1, 1),
864 _Q1A(GLD_REQUEST, 0x0001, B6, LDST, 0x00000010, 1, 1),
865 _Q1B(GLD_MEM_DIV_REPLAY, 0x0001, B6, REPLAY, 0x00000010, 1, 1),
866 _Q1B(GST_TRANSACTIONS, 0x0001, B6, MEM, 0x00000004, 1, 1),
867 _Q1B(GST_MEM_DIV_REPLAY, 0x0001, B6, REPLAY, 0x00000014, 1, 1),
868 _Q1A(GRED_COUNT, 0x0001, B6, BRANCH, 0x00000008, 1, 1),
869 _Q1A(GST_REQUEST, 0x0001, B6, LDST, 0x00000014, 1, 1),
870 _Q1A(INST_EXECUTED, 0x0003, B6, EXEC, 0x00000398, 1, 1),
871 _Q1A(INST_ISSUED, 0x0003, B6, ISSUE, 0x00000104, 1, 1),
872 _Q1A(INST_ISSUED1, 0x0001, B6, ISSUE, 0x00000004, 1, 1),
873 _Q1A(INST_ISSUED2, 0x0001, B6, ISSUE, 0x00000008, 1, 1),
874 _Q1B(L1_GLD_HIT, 0x0001, B6, L1, 0x00000010, 1, 1),
875 _Q1B(L1_GLD_MISS, 0x0001, B6, L1, 0x00000014, 1, 1),
876 _Q1B(L1_LOCAL_LD_HIT, 0x0001, B6, L1, 0x00000000, 1, 1),
877 _Q1B(L1_LOCAL_LD_MISS, 0x0001, B6, L1, 0x00000004, 1, 1),
878 _Q1B(L1_LOCAL_ST_HIT, 0x0001, B6, L1, 0x00000008, 1, 1),
879 _Q1B(L1_LOCAL_ST_MISS, 0x0001, B6, L1, 0x0000000c, 1, 1),
880 _Q1B(L1_SHARED_LD_TRANSACTIONS, 0x0001, B6, TRANSACTION, 0x00000008, 1, 1),
881 _Q1B(L1_SHARED_ST_TRANSACTIONS, 0x0001, B6, TRANSACTION, 0x0000000c, 1, 1),
882 _Q1A(LOCAL_LD, 0x0001, B6, LDST, 0x00000008, 1, 1),
883 _Q1B(LOCAL_LD_TRANSACTIONS, 0x0001, B6, TRANSACTION, 0x00000000, 1, 1),
884 _Q1A(LOCAL_ST, 0x0001, B6, LDST, 0x0000000c, 1, 1),
885 _Q1B(LOCAL_ST_TRANSACTIONS, 0x0001, B6, TRANSACTION, 0x00000004, 1, 1),
886 _Q1A(PROF_TRIGGER_0, 0x0001, B6, USER, 0x00000000, 1, 1),
887 _Q1A(PROF_TRIGGER_1, 0x0001, B6, USER, 0x00000004, 1, 1),
888 _Q1A(PROF_TRIGGER_2, 0x0001, B6, USER, 0x00000008, 1, 1),
889 _Q1A(PROF_TRIGGER_3, 0x0001, B6, USER, 0x0000000c, 1, 1),
890 _Q1A(PROF_TRIGGER_4, 0x0001, B6, USER, 0x00000010, 1, 1),
891 _Q1A(PROF_TRIGGER_5, 0x0001, B6, USER, 0x00000014, 1, 1),
892 _Q1A(PROF_TRIGGER_6, 0x0001, B6, USER, 0x00000018, 1, 1),
893 _Q1A(PROF_TRIGGER_7, 0x0001, B6, USER, 0x0000001c, 1, 1),
894 _Q1A(SHARED_LD, 0x0001, B6, LDST, 0x00000000, 1, 1),
895 _Q1B(SHARED_LD_REPLAY, 0x0001, B6, REPLAY, 0x00000008, 1, 1),
896 _Q1A(SHARED_ST, 0x0001, B6, LDST, 0x00000004, 1, 1),
897 _Q1B(SHARED_ST_REPLAY, 0x0001, B6, REPLAY, 0x0000000c, 1, 1),
898 _Q1B(SM_CTA_LAUNCHED, 0x0001, B6, WARP, 0x0000001c, 1, 1),
899 _Q1A(THREADS_LAUNCHED, 0x003f, B6, LAUNCH, 0x398a4188, 1, 1),
900 _Q1B(UNCACHED_GLD_TRANSACTIONS, 0x0001, B6, MEM, 0x00000000, 1, 1),
901 _Q1A(WARPS_LAUNCHED, 0x0001, B6, LAUNCH, 0x00000004, 1, 1),
902 _M2AB(IPC, 0x3, B6, EXEC, 0x398, 0xffff, LOGOP, WARP, 0x0, DIV_SUM_M0, 10, 1),
903 _M2AB(IPAC, 0x3, B6, EXEC, 0x398, 0x1, B6, WARP, 0x0, AVG_DIV_MM, 10, 1),
904 _M2A(IPEC, 0x3, B6, EXEC, 0x398, 0xe, LOGOP, EXEC, 0x398, AVG_DIV_MM, 10, 1),
905 _M2A(INST_REPLAY_OHEAD, 0x3, B6, ISSUE, 0x104, 0x3, B6, EXEC, 0x398, REL_SUM_MM, 100, 1),
906 _M2B(MP_OCCUPANCY, 0x3f, B6, WARP, 0x31483104, 0x01, B6, WARP, 0x0, AVG_DIV_MM, 200, 64),
907 _M2B(MP_EFFICIENCY, 0x01, B6, WARP, 0x0, 0xffff, LOGOP, WARP, 0x0, AVG_DIV_M0, 100, 1),
908 };
909
910 #undef _Q1A
911 #undef _Q1B
912 #undef _M2A
913 #undef _M2B
914
915 /* === PERFORMANCE MONITORING COUNTERS for NVC0:NVE4 === */
916 static const uint64_t nvc0_read_hw_sm_counters_code[] =
917 {
918 /* mov b32 $r8 $tidx
919 * mov b32 $r9 $physid
920 * mov b32 $r0 $pm0
921 * mov b32 $r1 $pm1
922 * mov b32 $r2 $pm2
923 * mov b32 $r3 $pm3
924 * mov b32 $r4 $pm4
925 * mov b32 $r5 $pm5
926 * mov b32 $r6 $pm6
927 * mov b32 $r7 $pm7
928 * set $p0 0x1 eq u32 $r8 0x0
929 * mov b32 $r10 c0[0x0]
930 * mov b32 $r11 c0[0x4]
931 * ext u32 $r8 $r9 0x414
932 * (not $p0) exit
933 * mul $r8 u32 $r8 u32 36
934 * add b32 $r10 $c $r10 $r8
935 * add b32 $r11 $r11 0x0 $c
936 * mov b32 $r8 c0[0x8]
937 * st b128 wt g[$r10d+0x00] $r0q
938 * st b128 wt g[$r10d+0x10] $r4q
939 * st b32 wt g[$r10d+0x20] $r8
940 * exit */
941 0x2c00000084021c04ULL,
942 0x2c0000000c025c04ULL,
943 0x2c00000010001c04ULL,
944 0x2c00000014005c04ULL,
945 0x2c00000018009c04ULL,
946 0x2c0000001c00dc04ULL,
947 0x2c00000020011c04ULL,
948 0x2c00000024015c04ULL,
949 0x2c00000028019c04ULL,
950 0x2c0000002c01dc04ULL,
951 0x190e0000fc81dc03ULL,
952 0x2800400000029de4ULL,
953 0x280040001002dde4ULL,
954 0x7000c01050921c03ULL,
955 0x80000000000021e7ULL,
956 0x1000000090821c02ULL,
957 0x4801000020a29c03ULL,
958 0x0800000000b2dc42ULL,
959 0x2800400020021de4ULL,
960 0x9400000000a01fc5ULL,
961 0x9400000040a11fc5ULL,
962 0x9400000080a21f85ULL,
963 0x8000000000001de7ULL
964 };
965
966 static const char *nvc0_pm_query_names[] =
967 {
968 /* MP counters */
969 "active_cycles",
970 "active_warps",
971 "atom_count",
972 "branch",
973 "divergent_branch",
974 "gld_request",
975 "gred_count",
976 "gst_request",
977 "inst_executed",
978 "inst_issued1_0",
979 "inst_issued1_1",
980 "inst_issued2_0",
981 "inst_issued2_1",
982 "local_load",
983 "local_store",
984 "prof_trigger_00",
985 "prof_trigger_01",
986 "prof_trigger_02",
987 "prof_trigger_03",
988 "prof_trigger_04",
989 "prof_trigger_05",
990 "prof_trigger_06",
991 "prof_trigger_07",
992 "shared_load",
993 "shared_store",
994 "threads_launched",
995 "thread_inst_executed_0",
996 "thread_inst_executed_1",
997 "thread_inst_executed_2",
998 "thread_inst_executed_3",
999 "warps_launched",
1000 };
1001
1002 #define _Q(n, f, m, g, c, s0, s1, s2, s3, s4, s5) [NVC0_HW_SM_QUERY_##n] = { { { f, NVC0_COMPUTE_MP_PM_OP_MODE_##m, c, 0, g, s0|(s1 << 8)|(s2 << 16)|(s3 << 24)|(s4##ULL << 32)|(s5##ULL << 40) }, {}, {}, {} }, 1, NVC0_COUNTER_OPn_SUM, { 1, 1 } }
1003
1004 static const struct nvc0_hw_sm_query_cfg nvc0_hw_sm_queries[] =
1005 {
1006 _Q(ACTIVE_CYCLES, 0xaaaa, LOGOP, 0x11, 1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00),
1007 _Q(ACTIVE_WARPS, 0xaaaa, LOGOP, 0x24, 6, 0x10, 0x21, 0x32, 0x43, 0x54, 0x65),
1008 _Q(ATOM_COUNT, 0xaaaa, LOGOP, 0x63, 1, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00),
1009 _Q(BRANCH, 0xaaaa, LOGOP, 0x1a, 2, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00),
1010 _Q(DIVERGENT_BRANCH, 0xaaaa, LOGOP, 0x19, 2, 0x20, 0x31, 0x00, 0x00, 0x00, 0x00),
1011 _Q(GLD_REQUEST, 0xaaaa, LOGOP, 0x64, 1, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00),
1012 _Q(GRED_COUNT, 0xaaaa, LOGOP, 0x63, 1, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00),
1013 _Q(GST_REQUEST, 0xaaaa, LOGOP, 0x64, 1, 0x60, 0x00, 0x00, 0x00, 0x00, 0x00),
1014 _Q(INST_EXECUTED, 0xaaaa, LOGOP, 0x2d, 3, 0x00, 0x11, 0x22, 0x00, 0x00, 0x00),
1015 _Q(INST_ISSUED1_0, 0xaaaa, LOGOP, 0x7e, 1, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00),
1016 _Q(INST_ISSUED1_1, 0xaaaa, LOGOP, 0x7e, 1, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00),
1017 _Q(INST_ISSUED2_0, 0xaaaa, LOGOP, 0x7e, 1, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00),
1018 _Q(INST_ISSUED2_1, 0xaaaa, LOGOP, 0x7e, 1, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00),
1019 _Q(LOCAL_LD, 0xaaaa, LOGOP, 0x64, 1, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00),
1020 _Q(LOCAL_ST, 0xaaaa, LOGOP, 0x64, 1, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00),
1021 _Q(PROF_TRIGGER_0, 0xaaaa, LOGOP, 0x01, 1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00),
1022 _Q(PROF_TRIGGER_1, 0xaaaa, LOGOP, 0x01, 1, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00),
1023 _Q(PROF_TRIGGER_2, 0xaaaa, LOGOP, 0x01, 1, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00),
1024 _Q(PROF_TRIGGER_3, 0xaaaa, LOGOP, 0x01, 1, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00),
1025 _Q(PROF_TRIGGER_4, 0xaaaa, LOGOP, 0x01, 1, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00),
1026 _Q(PROF_TRIGGER_5, 0xaaaa, LOGOP, 0x01, 1, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00),
1027 _Q(PROF_TRIGGER_6, 0xaaaa, LOGOP, 0x01, 1, 0x60, 0x00, 0x00, 0x00, 0x00, 0x00),
1028 _Q(PROF_TRIGGER_7, 0xaaaa, LOGOP, 0x01, 1, 0x70, 0x00, 0x00, 0x00, 0x00, 0x00),
1029 _Q(SHARED_LD, 0xaaaa, LOGOP, 0x64, 1, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00),
1030 _Q(SHARED_ST, 0xaaaa, LOGOP, 0x64, 1, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00),
1031 _Q(THREADS_LAUNCHED, 0xaaaa, LOGOP, 0x26, 6, 0x10, 0x21, 0x32, 0x43, 0x54, 0x65),
1032 _Q(TH_INST_EXECUTED_0, 0xaaaa, LOGOP, 0xa3, 6, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55),
1033 _Q(TH_INST_EXECUTED_1, 0xaaaa, LOGOP, 0xa5, 6, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55),
1034 _Q(TH_INST_EXECUTED_2, 0xaaaa, LOGOP, 0xa4, 6, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55),
1035 _Q(TH_INST_EXECUTED_3, 0xaaaa, LOGOP, 0xa6, 6, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55),
1036 _Q(WARPS_LAUNCHED, 0xaaaa, LOGOP, 0x26, 1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00),
1037 };
1038
1039 #undef _Q
1040
1041 static const struct nvc0_hw_sm_query_cfg *
1042 nvc0_hw_sm_query_get_cfg(struct nvc0_context *nvc0, struct nvc0_query *q)
1043 {
1044 struct nvc0_screen *screen = nvc0->screen;
1045
1046 if (screen->base.class_3d >= NVE4_3D_CLASS)
1047 return &nve4_hw_sm_queries[q->type - PIPE_QUERY_DRIVER_SPECIFIC];
1048 return &nvc0_hw_sm_queries[q->type - NVC0_HW_SM_QUERY(0)];
1049 }
1050
1051 boolean
1052 nvc0_hw_sm_query_begin(struct nvc0_context *nvc0, struct nvc0_query *q)
1053 {
1054 struct nvc0_screen *screen = nvc0->screen;
1055 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
1056 const bool is_nve4 = screen->base.class_3d >= NVE4_3D_CLASS;
1057 const struct nvc0_hw_sm_query_cfg *cfg;
1058 unsigned i, c;
1059 unsigned num_ab[2] = { 0, 0 };
1060
1061 cfg = nvc0_hw_sm_query_get_cfg(nvc0, q);
1062
1063 /* check if we have enough free counter slots */
1064 for (i = 0; i < cfg->num_counters; ++i)
1065 num_ab[cfg->ctr[i].sig_dom]++;
1066
1067 if (screen->pm.num_hw_sm_active[0] + num_ab[0] > 4 ||
1068 screen->pm.num_hw_sm_active[1] + num_ab[1] > 4) {
1069 NOUVEAU_ERR("Not enough free MP counter slots !\n");
1070 return false;
1071 }
1072
1073 assert(cfg->num_counters <= 4);
1074 PUSH_SPACE(push, 4 * 8 * (is_nve4 ? 1 : 6) + 6);
1075
1076 if (!screen->pm.mp_counters_enabled) {
1077 screen->pm.mp_counters_enabled = true;
1078 BEGIN_NVC0(push, SUBC_SW(0x06ac), 1);
1079 PUSH_DATA (push, 0x1fcb);
1080 }
1081
1082 /* set sequence field to 0 (used to check if result is available) */
1083 for (i = 0; i < screen->mp_count; ++i)
1084 q->data[i * 10 + 10] = 0;
1085
1086 for (i = 0; i < cfg->num_counters; ++i) {
1087 const unsigned d = cfg->ctr[i].sig_dom;
1088
1089 if (!screen->pm.num_hw_sm_active[d]) {
1090 uint32_t m = (1 << 22) | (1 << (7 + (8 * !d)));
1091 if (screen->pm.num_hw_sm_active[!d])
1092 m |= 1 << (7 + (8 * d));
1093 BEGIN_NVC0(push, SUBC_SW(0x0600), 1);
1094 PUSH_DATA (push, m);
1095 }
1096 screen->pm.num_hw_sm_active[d]++;
1097
1098 for (c = d * 4; c < (d * 4 + 4); ++c) {
1099 if (!screen->pm.mp_counter[c]) {
1100 q->ctr[i] = c;
1101 screen->pm.mp_counter[c] = (struct pipe_query *)q;
1102 break;
1103 }
1104 }
1105 assert(c <= (d * 4 + 3)); /* must succeed, already checked for space */
1106
1107 /* configure and reset the counter(s) */
1108 if (is_nve4) {
1109 if (d == 0)
1110 BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_A_SIGSEL(c & 3)), 1);
1111 else
1112 BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_B_SIGSEL(c & 3)), 1);
1113 PUSH_DATA (push, cfg->ctr[i].sig_sel);
1114 BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_SRCSEL(c)), 1);
1115 PUSH_DATA (push, cfg->ctr[i].src_sel + 0x2108421 * (c & 3));
1116 BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_FUNC(c)), 1);
1117 PUSH_DATA (push, (cfg->ctr[i].func << 4) | cfg->ctr[i].mode);
1118 BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_SET(c)), 1);
1119 PUSH_DATA (push, 0);
1120 } else {
1121 unsigned s;
1122
1123 for (s = 0; s < cfg->ctr[i].num_src; s++) {
1124 BEGIN_NVC0(push, NVC0_COMPUTE(MP_PM_SIGSEL(s)), 1);
1125 PUSH_DATA (push, cfg->ctr[i].sig_sel);
1126 BEGIN_NVC0(push, NVC0_COMPUTE(MP_PM_SRCSEL(s)), 1);
1127 PUSH_DATA (push, (cfg->ctr[i].src_sel >> (s * 8)) & 0xff);
1128 BEGIN_NVC0(push, NVC0_COMPUTE(MP_PM_OP(s)), 1);
1129 PUSH_DATA (push, (cfg->ctr[i].func << 4) | cfg->ctr[i].mode);
1130 BEGIN_NVC0(push, NVC0_COMPUTE(MP_PM_SET(s)), 1);
1131 PUSH_DATA (push, 0);
1132 }
1133 }
1134 }
1135 return true;
1136 }
1137
1138 static void
1139 nvc0_hw_sm_query_end(struct nvc0_context *nvc0, struct nvc0_query *q)
1140 {
1141 struct nvc0_screen *screen = nvc0->screen;
1142 struct pipe_context *pipe = &nvc0->base.pipe;
1143 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
1144 const bool is_nve4 = screen->base.class_3d >= NVE4_3D_CLASS;
1145 uint32_t mask;
1146 uint32_t input[3];
1147 const uint block[3] = { 32, is_nve4 ? 4 : 1, 1 };
1148 const uint grid[3] = { screen->mp_count, 1, 1 };
1149 unsigned c;
1150 const struct nvc0_hw_sm_query_cfg *cfg;
1151
1152 cfg = nvc0_hw_sm_query_get_cfg(nvc0, q);
1153
1154 if (unlikely(!screen->pm.prog)) {
1155 struct nvc0_program *prog = CALLOC_STRUCT(nvc0_program);
1156 prog->type = PIPE_SHADER_COMPUTE;
1157 prog->translated = true;
1158 prog->num_gprs = 14;
1159 prog->parm_size = 12;
1160 if (is_nve4) {
1161 prog->code = (uint32_t *)nve4_read_hw_sm_counters_code;
1162 prog->code_size = sizeof(nve4_read_hw_sm_counters_code);
1163 } else {
1164 prog->code = (uint32_t *)nvc0_read_hw_sm_counters_code;
1165 prog->code_size = sizeof(nvc0_read_hw_sm_counters_code);
1166 }
1167 screen->pm.prog = prog;
1168 }
1169
1170 /* disable all counting */
1171 PUSH_SPACE(push, 8);
1172 for (c = 0; c < 8; ++c)
1173 if (screen->pm.mp_counter[c]) {
1174 if (is_nve4) {
1175 IMMED_NVC0(push, NVE4_COMPUTE(MP_PM_FUNC(c)), 0);
1176 } else {
1177 IMMED_NVC0(push, NVC0_COMPUTE(MP_PM_OP(c)), 0);
1178 }
1179 }
1180 /* release counters for this query */
1181 for (c = 0; c < 8; ++c) {
1182 if (nvc0_query(screen->pm.mp_counter[c]) == q) {
1183 screen->pm.num_hw_sm_active[c / 4]--;
1184 screen->pm.mp_counter[c] = NULL;
1185 }
1186 }
1187
1188 BCTX_REFN_bo(nvc0->bufctx_cp, CP_QUERY, NOUVEAU_BO_GART | NOUVEAU_BO_WR,
1189 q->bo);
1190
1191 PUSH_SPACE(push, 1);
1192 IMMED_NVC0(push, SUBC_COMPUTE(NV50_GRAPH_SERIALIZE), 0);
1193
1194 pipe->bind_compute_state(pipe, screen->pm.prog);
1195 input[0] = (q->bo->offset + q->base);
1196 input[1] = (q->bo->offset + q->base) >> 32;
1197 input[2] = q->sequence;
1198 pipe->launch_grid(pipe, block, grid, 0, input);
1199
1200 nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_QUERY);
1201
1202 /* re-activate other counters */
1203 PUSH_SPACE(push, 16);
1204 mask = 0;
1205 for (c = 0; c < 8; ++c) {
1206 unsigned i;
1207 q = nvc0_query(screen->pm.mp_counter[c]);
1208 if (!q)
1209 continue;
1210 cfg = nvc0_hw_sm_query_get_cfg(nvc0, q);
1211 for (i = 0; i < cfg->num_counters; ++i) {
1212 if (mask & (1 << q->ctr[i]))
1213 break;
1214 mask |= 1 << q->ctr[i];
1215 if (is_nve4) {
1216 BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_FUNC(q->ctr[i])), 1);
1217 } else {
1218 BEGIN_NVC0(push, NVC0_COMPUTE(MP_PM_OP(q->ctr[i])), 1);
1219 }
1220 PUSH_DATA (push, (cfg->ctr[i].func << 4) | cfg->ctr[i].mode);
1221 }
1222 }
1223 }
1224
1225 static inline bool
1226 nvc0_hw_sm_query_read_data(uint32_t count[32][4],
1227 struct nvc0_context *nvc0, bool wait,
1228 struct nvc0_query *q,
1229 const struct nvc0_hw_sm_query_cfg *cfg,
1230 unsigned mp_count)
1231 {
1232 unsigned p, c;
1233
1234 for (p = 0; p < mp_count; ++p) {
1235 const unsigned b = (0x24 / 4) * p;
1236
1237 for (c = 0; c < cfg->num_counters; ++c) {
1238 if (q->data[b + 8] != q->sequence) {
1239 if (!wait)
1240 return false;
1241 if (nouveau_bo_wait(q->bo, NOUVEAU_BO_RD, nvc0->base.client))
1242 return false;
1243 }
1244 count[p][c] = q->data[b + q->ctr[c]];
1245 }
1246 }
1247 return true;
1248 }
1249
1250 static inline bool
1251 nve4_hw_sm_query_read_data(uint32_t count[32][4],
1252 struct nvc0_context *nvc0, bool wait,
1253 struct nvc0_query *q,
1254 const struct nvc0_hw_sm_query_cfg *cfg,
1255 unsigned mp_count)
1256 {
1257 unsigned p, c, d;
1258
1259 for (p = 0; p < mp_count; ++p) {
1260 const unsigned b = (0x60 / 4) * p;
1261
1262 for (c = 0; c < cfg->num_counters; ++c) {
1263 count[p][c] = 0;
1264 for (d = 0; d < ((q->ctr[c] & ~3) ? 1 : 4); ++d) {
1265 if (q->data[b + 20 + d] != q->sequence) {
1266 if (!wait)
1267 return false;
1268 if (nouveau_bo_wait(q->bo, NOUVEAU_BO_RD, nvc0->base.client))
1269 return false;
1270 }
1271 if (q->ctr[c] & ~0x3)
1272 count[p][c] = q->data[b + 16 + (q->ctr[c] & 3)];
1273 else
1274 count[p][c] += q->data[b + d * 4 + q->ctr[c]];
1275 }
1276 }
1277 }
1278 return true;
1279 }
1280
1281 /* Metric calculations:
1282 * sum(x) ... sum of x over all MPs
1283 * avg(x) ... average of x over all MPs
1284 *
1285 * IPC : sum(inst_executed) / clock
1286 * INST_REPLAY_OHEAD: (sum(inst_issued) - sum(inst_executed)) / sum(inst_issued)
1287 * MP_OCCUPANCY : avg((active_warps / 64) / active_cycles)
1288 * MP_EFFICIENCY : avg(active_cycles / clock)
1289 *
1290 * NOTE: Interpretation of IPC requires knowledge of MP count.
1291 */
1292 static boolean
1293 nvc0_hw_sm_query_result(struct nvc0_context *nvc0, struct nvc0_query *q,
1294 void *result, boolean wait)
1295 {
1296 uint32_t count[32][4];
1297 uint64_t value = 0;
1298 unsigned mp_count = MIN2(nvc0->screen->mp_count_compute, 32);
1299 unsigned p, c;
1300 const struct nvc0_hw_sm_query_cfg *cfg;
1301 bool ret;
1302
1303 cfg = nvc0_hw_sm_query_get_cfg(nvc0, q);
1304
1305 if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS)
1306 ret = nve4_hw_sm_query_read_data(count, nvc0, wait, q, cfg, mp_count);
1307 else
1308 ret = nvc0_hw_sm_query_read_data(count, nvc0, wait, q, cfg, mp_count);
1309 if (!ret)
1310 return false;
1311
1312 if (cfg->op == NVC0_COUNTER_OPn_SUM) {
1313 for (c = 0; c < cfg->num_counters; ++c)
1314 for (p = 0; p < mp_count; ++p)
1315 value += count[p][c];
1316 value = (value * cfg->norm[0]) / cfg->norm[1];
1317 } else
1318 if (cfg->op == NVC0_COUNTER_OPn_OR) {
1319 uint32_t v = 0;
1320 for (c = 0; c < cfg->num_counters; ++c)
1321 for (p = 0; p < mp_count; ++p)
1322 v |= count[p][c];
1323 value = ((uint64_t)v * cfg->norm[0]) / cfg->norm[1];
1324 } else
1325 if (cfg->op == NVC0_COUNTER_OPn_AND) {
1326 uint32_t v = ~0;
1327 for (c = 0; c < cfg->num_counters; ++c)
1328 for (p = 0; p < mp_count; ++p)
1329 v &= count[p][c];
1330 value = ((uint64_t)v * cfg->norm[0]) / cfg->norm[1];
1331 } else
1332 if (cfg->op == NVC0_COUNTER_OP2_REL_SUM_MM) {
1333 uint64_t v[2] = { 0, 0 };
1334 for (p = 0; p < mp_count; ++p) {
1335 v[0] += count[p][0];
1336 v[1] += count[p][1];
1337 }
1338 if (v[0])
1339 value = ((v[0] - v[1]) * cfg->norm[0]) / (v[0] * cfg->norm[1]);
1340 } else
1341 if (cfg->op == NVC0_COUNTER_OP2_DIV_SUM_M0) {
1342 for (p = 0; p < mp_count; ++p)
1343 value += count[p][0];
1344 if (count[0][1])
1345 value = (value * cfg->norm[0]) / (count[0][1] * cfg->norm[1]);
1346 else
1347 value = 0;
1348 } else
1349 if (cfg->op == NVC0_COUNTER_OP2_AVG_DIV_MM) {
1350 unsigned mp_used = 0;
1351 for (p = 0; p < mp_count; ++p, mp_used += !!count[p][0])
1352 if (count[p][1])
1353 value += (count[p][0] * cfg->norm[0]) / count[p][1];
1354 if (mp_used)
1355 value /= (uint64_t)mp_used * cfg->norm[1];
1356 } else
1357 if (cfg->op == NVC0_COUNTER_OP2_AVG_DIV_M0) {
1358 unsigned mp_used = 0;
1359 for (p = 0; p < mp_count; ++p, mp_used += !!count[p][0])
1360 value += count[p][0];
1361 if (count[0][1] && mp_used) {
1362 value *= cfg->norm[0];
1363 value /= (uint64_t)count[0][1] * mp_used * cfg->norm[1];
1364 } else {
1365 value = 0;
1366 }
1367 }
1368
1369 *(uint64_t *)result = value;
1370 return true;
1371 }
1372
1373 int
1374 nvc0_screen_get_driver_query_info(struct pipe_screen *pscreen,
1375 unsigned id,
1376 struct pipe_driver_query_info *info)
1377 {
1378 struct nvc0_screen *screen = nvc0_screen(pscreen);
1379 int count = 0;
1380
1381 count += NVC0_QUERY_DRV_STAT_COUNT;
1382
1383 if (screen->base.device->drm_version >= 0x01000101) {
1384 if (screen->compute) {
1385 if (screen->base.class_3d == NVE4_3D_CLASS) {
1386 count += NVE4_HW_SM_QUERY_COUNT;
1387 } else
1388 if (screen->base.class_3d < NVE4_3D_CLASS) {
1389 /* NVC0_COMPUTE is not always enabled */
1390 count += NVC0_HW_SM_QUERY_COUNT;
1391 }
1392 }
1393 }
1394
1395 if (!info)
1396 return count;
1397
1398 /* Init default values. */
1399 info->name = "this_is_not_the_query_you_are_looking_for";
1400 info->query_type = 0xdeadd01d;
1401 info->max_value.u64 = 0;
1402 info->type = PIPE_DRIVER_QUERY_TYPE_UINT64;
1403 info->group_id = -1;
1404
1405 #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
1406 if (id < NVC0_QUERY_DRV_STAT_COUNT) {
1407 info->name = nvc0_drv_stat_names[id];
1408 info->query_type = NVC0_QUERY_DRV_STAT(id);
1409 info->max_value.u64 = 0;
1410 if (strstr(info->name, "bytes"))
1411 info->type = PIPE_DRIVER_QUERY_TYPE_BYTES;
1412 info->group_id = NVC0_QUERY_DRV_STAT_GROUP;
1413 return 1;
1414 } else
1415 #endif
1416 if (id < count) {
1417 if (screen->compute) {
1418 if (screen->base.class_3d == NVE4_3D_CLASS) {
1419 info->name = nve4_pm_query_names[id - NVC0_QUERY_DRV_STAT_COUNT];
1420 info->query_type = NVE4_HW_SM_QUERY(id - NVC0_QUERY_DRV_STAT_COUNT);
1421 info->max_value.u64 =
1422 (id < NVE4_HW_SM_QUERY_METRIC_MP_OCCUPANCY) ? 0 : 100;
1423 info->group_id = NVC0_QUERY_MP_COUNTER_GROUP;
1424 return 1;
1425 } else
1426 if (screen->base.class_3d < NVE4_3D_CLASS) {
1427 info->name = nvc0_pm_query_names[id - NVC0_QUERY_DRV_STAT_COUNT];
1428 info->query_type = NVC0_HW_SM_QUERY(id - NVC0_QUERY_DRV_STAT_COUNT);
1429 info->group_id = NVC0_QUERY_MP_COUNTER_GROUP;
1430 return 1;
1431 }
1432 }
1433 }
1434 /* user asked for info about non-existing query */
1435 return 0;
1436 }
1437
1438 int
1439 nvc0_screen_get_driver_query_group_info(struct pipe_screen *pscreen,
1440 unsigned id,
1441 struct pipe_driver_query_group_info *info)
1442 {
1443 struct nvc0_screen *screen = nvc0_screen(pscreen);
1444 int count = 0;
1445
1446 #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
1447 count++;
1448 #endif
1449
1450 if (screen->base.device->drm_version >= 0x01000101) {
1451 if (screen->compute) {
1452 if (screen->base.class_3d == NVE4_3D_CLASS) {
1453 count++;
1454 } else
1455 if (screen->base.class_3d < NVE4_3D_CLASS) {
1456 count++; /* NVC0_COMPUTE is not always enabled */
1457 }
1458 }
1459 }
1460
1461 if (!info)
1462 return count;
1463
1464 if (id == NVC0_QUERY_MP_COUNTER_GROUP) {
1465 if (screen->compute) {
1466 info->name = "MP counters";
1467 info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_GPU;
1468
1469 if (screen->base.class_3d == NVE4_3D_CLASS) {
1470 info->num_queries = NVE4_HW_SM_QUERY_COUNT;
1471
1472 /* On NVE4+, each multiprocessor have 8 hardware counters separated
1473 * in two distinct domains, but we allow only one active query
1474 * simultaneously because some of them use more than one hardware
1475 * counter and this will result in an undefined behaviour. */
1476 info->max_active_queries = 1; /* TODO: handle multiple hw counters */
1477 return 1;
1478 } else
1479 if (screen->base.class_3d < NVE4_3D_CLASS) {
1480 info->num_queries = NVC0_HW_SM_QUERY_COUNT;
1481
1482 /* On NVC0:NVE4, each multiprocessor have 8 hardware counters
1483 * in a single domain. */
1484 info->max_active_queries = 8;
1485 return 1;
1486 }
1487 }
1488 }
1489 #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
1490 else if (id == NVC0_QUERY_DRV_STAT_GROUP) {
1491 info->name = "Driver statistics";
1492 info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_CPU;
1493 info->max_active_queries = NVC0_QUERY_DRV_STAT_COUNT;
1494 info->num_queries = NVC0_QUERY_DRV_STAT_COUNT;
1495 return 1;
1496 }
1497 #endif
1498
1499 /* user asked for info about non-existing query group */
1500 info->name = "this_is_not_the_query_group_you_are_looking_for";
1501 info->max_active_queries = 0;
1502 info->num_queries = 0;
1503 info->type = 0;
1504 return 0;
1505 }
1506
1507 void
1508 nvc0_init_query_functions(struct nvc0_context *nvc0)
1509 {
1510 struct pipe_context *pipe = &nvc0->base.pipe;
1511
1512 pipe->create_query = nvc0_query_create;
1513 pipe->destroy_query = nvc0_query_destroy;
1514 pipe->begin_query = nvc0_query_begin;
1515 pipe->end_query = nvc0_query_end;
1516 pipe->get_query_result = nvc0_query_result;
1517 pipe->render_condition = nvc0_render_condition;
1518 }